Ejemplo n.º 1
0
    async def flow_vs_intensity_violin(experiment, intensity=130):
        data = await flow_vs_intensity_data(experiment, intensity)
        df_cat_expand = ["Dark"] * len(data["Dark"]) + ["Light"] * len(
            data["Light"])
        df_val_expand = data["Dark"] + data["Light"]

        df = pd.DataFrame(dict(Flow=df_val_expand, Category=df_cat_expand))
        return ff.create_violin(df,
                                data_header="Flow",
                                group_header="Category",
                                title=None,
                                rugplot=False)
Ejemplo n.º 2
0
    async def flow_vs_category_violin(experiment):
        labels = ["Undefined", "Unknown", "Bitumen", "Sand", "Bubble"]
        data = []
        for category in range(5):
            data.append(await flow_vs_category_data(experiment, category))

        labels = ((["Undefined"] * len(data[0])) +
                  (["Unknown"] * len(data[1])) + (["Bitumen"] * len(data[2])) +
                  (["Sand"] * len(data[3])) + (["Bubble"] * len(data[4])))
        df = pd.DataFrame(
            dict(Flow=data[0] + data[1] + data[2] + data[3] + data[4],
                 Category=labels))
        return ff.create_violin(df,
                                data_header="Flow",
                                group_header="Category",
                                title=None,
                                rugplot=False)
Ejemplo n.º 3
0
    def compare_class_clusters_violin(self):
        cluster_int_labels = np.array(self.cluster_results.labels_)
        class_values = self.df_class[self.target_key]
        df_colour_dict = dict([
            (class_label, i)
            for (i, class_label) in enumerate(set(class_values))
        ])
        class_int_labels = np.array(
            [df_colour_dict[key] for key in class_values], dtype=int)
        df = pd.DataFrame()
        df['Class'] = class_values
        df['Cluster'] = cluster_int_labels

        fig = ff.create_violin(df,
                               data_header='Cluster',
                               group_header='Class',
                               height=500,
                               width=800)
        return py.iplot(fig, filename='Multiple Violins')
Ejemplo n.º 4
0
 def create_violin(*args, **kwargs):
     FigureFactory._deprecated('create_violin')
     from plotly.figure_factory import create_violin
     return create_violin(*args, **kwargs)
Ejemplo n.º 5
0
df.head()
print(df)

from plotly.offline import iplot
import plotly.figure_factory as ff

figure = ff.create_scatterplotmatrix(df[['KnownSex', 'Head', 'Mass', 'Skull']],
                                     index='KnownSex',
                                     height=800,
                                     width=800)
iplot(figure)

corrs = df.corr()

figure = ff.create_annotated_heatmap(z=corrs.round(2).values,
                                     x=list(corrs.columns),
                                     y=list(corrs.index),
                                     showscale=True)
#iplot(figure)

figure = ff.create_scatterplotmatrix(df[['KnownSex', 'Head', 'Mass', 'Skull']],
                                     diag='histogram',
                                     index='KnownSex',
                                     height=800,
                                     width=800)
#iplot(figure)

iplot(ff.create_violin(df, data_header='Mass', group_header='KnownSex'))
figure = ff.create_dendrogram(df[['Mass', 'Skull']])
iplot(figure)
Ejemplo n.º 6
0
 def create_violin(*args, **kwargs):
     FigureFactory._deprecated('create_violin')
     from plotly.figure_factory import create_violin
     return create_violin(*args, **kwargs)
Ejemplo n.º 7
0
    def Plot(self):
        freqs = self.Frequencies(counts=True, percents=True)
        df = pandas.DataFrame({"title":[x[0] for x in self.data],
                               "observed stars":[x[1] for x in self.data],
                               "sentiment score":[x[4] for x in self.data]})

        group_stats = {}

        groupby_data = df.groupby(["observed stars"])

        for group in [0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0]:
            group_data = groupby_data.get_group(group)["sentiment score"]

            mdn = numpy.median(group_data)
            group_stats[group] = mdn


        box0 = dict(type="box", y=[x[2] for x in self.data], boxmean="sd",
                    name="Measured Stars",marker={"color":"#9d87ed"},xaxis="x1",yaxis="y2")
        box1 = dict(type="box", y=df["observed stars"], boxmean="sd",
                    name="Observed Stars",marker={"color":"#87aaed"}, xaxis="x1",yaxis="y2")

        obsbars = dict(type="bar", x=[k for k in freqs["Observed_Perc"].keys()],
                       y=[v for v in freqs["Observed_Perc"].values()],
                       name='Observed Star Ratings',
                       xaxis="x2", yaxis="y1",marker={"color":"#87aaed"})
        measbars = dict(type="bar", x=[k for k in freqs["Measured_Perc"].keys()],
                        y=[v for v in freqs["Measured_Perc"].values()],
                        name='Measured Star Ratings', xaxis="x1", yaxis="y1",
                        marker={"color":"#9d87ed"})

        diffs = dict(Counter(list(map(lambda x, y: x-y,
                                      [float(j[2]) for j in self.data],
                                      [float(i[1]) for i in self.data]))))


        diffbars = graphs.Bar(x=[x for x in diffs.keys()],
                              y=[y for y in diffs.values()],
                              marker=dict(color="#ed8787"))


        fig = tools.make_subplots(rows=4, cols=2,
                                  specs=[[{}, {'rowspan': 2}],
                                         [{}, None],
                                         [{'rowspan': 2, 'colspan': 2}, None],
                                         [None, None]], print_grid=False,
                                  subplot_titles=('Measured Star Rating Groups',
                                                  'Measured & Observed Stars Box Plot with Median',
                                                  'Observed Star Rating Groups',
                                                  'Difference Counts (Measured - Observed)'))


        fig.append_trace(obsbars, 1, 1)
        fig.append_trace(measbars, 2, 1)
        fig.append_trace(box0, 1, 2)
        fig.append_trace(box1, 1, 2)
        fig.append_trace(diffbars, 3, 1)
        fig["layout"].update(showlegend=False, title="Observed v Measured Stars")
        violin = figfactory.create_violin(df, data_header="sentiment score",
                                          group_header="observed stars",
                                          height=500, width=800,
                                          group_stats=group_stats,
                                          use_colorscale=True,
                                          title="Sentiment Score to Observed Stars")


      
        # pltly.plot(fig)
        # sleep(2)
        pltly.plot(violin)
Ejemplo n.º 8
0
def generate_distribution_plot(plot_data, nbins=20, ptype='hist'):
    dtypes = set()
    for s_name in plot_data:
        dtypes.update(plot_data[s_name])
    figs = []
    for dtype in sorted(dtypes):
        pdata = []
        for s_name in plot_data:
            try:
                pdata.append(float(plot_data[s_name][dtype]))
            except:
                pass
        if ptype == 'hist':
            figs.append(
                go.Histogram(
                    x = pdata,
                    opacity = 0.75,
                    nbinsx = nbins,
                    name = "{} ({})".format(dtype, len(pdata))
                )
            )
        elif ptype == 'boxplot':
            figs.append(
                go.Box(
                    y = pdata,
                    name = "{} ({})".format(dtype, len(pdata))
                )
            )
        elif ptype == 'dotplot':
            figs.append(
                go.Box(
                    y = pdata,
                    name = "{} ({})".format(dtype, len(pdata)),
                    boxpoints = 'all',
                    jitter = 0.5,
                    pointpos = 0,
                    line = dict(width = 0),
                    whiskerwidth = 0,
                    fillcolor = 'rgba(0, 0, 0, 0)'
                )
            )
        elif ptype == 'violin':
            if len(figs) == 0:
                figs = {}
            dname = "{} ({})".format(dtype, len(pdata))
            figs[dname] = pdata
        else:
            return 'Error - unrecognised plot type: {}'.format(ptype)
    layout = {}
    if ptype == 'hist':
        layout = go.Layout(
            barmode = 'overlay',
            showlegend = True,
            xaxis = dict(
                title = "Data"
            ),
            yaxis = dict(
                title = "Sample Count"
                # TODO - integers only
            )
        )
    if ptype == 'violin':
        figure = ff.create_violin(figs)
    else:
        figure = go.Figure(data = figs, layout = layout)
    plot_div = py.plot(
        figure,
        output_type = 'div',
        show_link = False,
        config = dict(
            modeBarButtonsToRemove = [
                'sendDataToCloud',
                'resetScale2d',
                'hoverClosestCartesian',
                'hoverCompareCartesian',
                'toggleSpikelines'
            ],
            displaylogo = False
        )
    )
    return plot_div
Ejemplo n.º 9
0
print("The mean is %r" % mean)
print("The standard deviation is %r" % st_dev)

median = np.median(data['alcohol'])
maximum = np.max(data['alcohol'])
minimum = np.min(data['alcohol'])

print("The median is %r" % median)
print("The maximum is %r" % maximum)
print("The minimum is %r" % minimum)

# --------------------------------------------------
y = data['alcohol'].values.tolist()

fig = FF.create_violin(y, title='Violin Plot', colors='#604d9e')
py.offline.plot(fig, filename='alcohol-violin-visual.html')

# --------------------------------------------------
# https://plot.ly/python/box-plots/
y = data['alcohol'].values.tolist()

trace = go.Box(
    y=y,
    name='Box Plot',
    boxpoints='all',
    jitter=0.3,
    marker=dict(color='rgb(214,12,140)', ),
)

layout = go.Layout(
Ejemplo n.º 10
0
def plot_a_graph(g,
                 secondary_edges=None,
                 title=None,
                 smart_sizing=True,
                 smart_shaping=True,
                 smart_layout=False,
                 mark_root=False,
                 interactive_mode_flag=True,
                 save_image_path=None):
    if title is None:
        title = 'StoryGraph<br>'
    node_color = 'rgba(152, 76, 0, .1)'
    root_color = 'rgba(0, 152, 0, .15)'
    if 'pos' not in list(g.nodes(data=True))[0][1].keys():
        nx.set_node_attributes(
            g,
            name='pos',
            values=nx.spring_layout(
                g,
                iterations=20,  # default: 50
                k=0.9,  # Optimal distance between nodes. If None the distance is set to 1/sqrt(n)
            ))
    edge_traces = []
    node_violin_traces = []
    middle_node_trace = go.Scatter(x=[],
                                   y=[],
                                   text=[],
                                   mode='markers',
                                   hoverinfo='text',
                                   marker=go.scatter.Marker(opacity=0, ),
                                   textposition='top right')
    edge_weights = [
        e[2]['weight'] for e in g.edges(data=True) if e[2]['weight'] > 0
    ]
    min_edge_weight = min(edge_weights)
    max_edge_weight = max(edge_weights)
    if max_edge_weight == min_edge_weight:  # corner-case
        min_edge_weight = 0
    primary_edge_traces, middle_node_trace = create_edge_trace_for_plotly(
        g,
        middle_node_trace=middle_node_trace,
        min_edge_weight=min_edge_weight,
        max_edge_weight=max_edge_weight,
        color='#888',
        dash=None,
        smart_layout=smart_layout)
    edge_traces.extend(primary_edge_traces)

    if secondary_edges is not None:
        secondary_edge_traces, middle_node_trace = create_edge_trace_for_plotly(
            secondary_edges,
            middle_node_trace=middle_node_trace,
            min_edge_weight=min_edge_weight,
            max_edge_weight=max_edge_weight,
            color='#0ff',
            dash='dash',
            smart_layout=smart_layout)
        edge_traces.extend(secondary_edge_traces)

    node_trace = go.Scatter(x=[],
                            y=[],
                            text=[],
                            hovertext=[],
                            mode='markers+text',
                            textposition='middle center',
                            textfont={"size": []},
                            hoverinfo='text',
                            marker=go.scatter.Marker(
                                colorscale='YlGnBu',
                                reversescale=True,
                                color=[],
                                size=[],
                            ))

    layout = go.Layout(
        autosize=False,
        height=(720 if interactive_mode_flag else 960) *
        np.ceil(len(list(g.nodes())) / 100),
        width=1320 if interactive_mode_flag else 1760,
        title=title,
        titlefont=dict(size=16),
        font=dict(size=18),  # 24 for saving the fig
        showlegend=False,
        hovermode='closest',
        margin=dict(b=20, l=10, r=10, t=60 + 20 * title.count("<br>")),
        xaxis=go.layout.XAxis(showgrid=False,
                              zeroline=False,
                              showticklabels=False),
        yaxis=go.layout.YAxis(showgrid=False,
                              zeroline=False,
                              showticklabels=False),
        shapes=[])
    if smart_layout:
        layout['xaxis'] = go.layout.XAxis(type="date",
                                          showgrid=True,
                                          ticklen=5)
        layout['margin'] = dict(b=60,
                                l=5,
                                r=5,
                                t=60 + 20 * title.count("<br>"))

    # first_trace_flag = True  # modified this after switching to plotly 3.3
    for node_name, node_data in g.nodes(data=True):
        x, y = node_data['pos']
        # degree = g.degree(node, weight=None)
        if smart_layout:
            # x = (node_data['ts_start_epoch'] + node_data['ts_end_epoch']) / 2 * 1000
            x = np.mean(node_data['article_epochs']) * 1000
        if smart_sizing:
            if (not smart_shaping) or (len(set(node_data['article_epochs'])) <
                                       3):
                layout['shapes'] += ({
                    'type':
                    'circle',
                    'xref':
                    'x',
                    'yref':
                    'y',
                    'x0':
                    node_data['ts_start_epoch'] * 1000,
                    'y0':
                    y - max(10, 4 * np.sqrt(node_data['support'])) / 2,
                    'x1':
                    node_data['ts_end_epoch'] * 1000,
                    'y1':
                    y + max(10, 4 * np.sqrt(node_data['support'])) / 2,
                    'fillcolor':
                    root_color
                    if mark_root and node_data['level'] == 0 else node_color,
                    'line': {
                        'width': 0.1,
                    },
                }, )
            else:
                # https://github.com/plotly/plotly.py/blob/master/plotly/figure_factory/_violin.py
                if len(node_data['article_epochs']) > 1:
                    node_violin_trace = ff.create_violin(
                        [(article_epoch - node_data['ts_start_epoch']) /
                         (node_data['ts_end_epoch'] -
                          node_data['ts_start_epoch'])
                         for article_epoch in node_data['article_epochs']],
                        rugplot=False)['data']
                    node_violin_trace = node_violin_trace[:
                                                          2]  # the first two Scatter objects are the top/bottom halves
                    first_trace_flag = True  # dirty trick! modified this after switching to plotly 3.3
                    for violin_trace in node_violin_trace:
                        temp = np.copy(violin_trace['x'])
                        violin_trace['x'] = 1000 * (
                            (node_data['ts_end_epoch'] -
                             node_data['ts_start_epoch']) *
                            np.copy(violin_trace['y']) +
                            node_data['ts_start_epoch'])
                        violin_trace['y'] = y + 2 * np.sqrt(
                            node_data['support']) * temp
                        if 'box' in violin_trace:
                            violin_trace['box'] = {'visible': False}
                        if 'scalemode' in violin_trace:
                            violin_trace['scalemode'] = 'count'
                        if 'meanline' in violin_trace:
                            violin_trace['meanline'] = {'visible': False}
                        if 'opacity' in violin_trace:
                            violin_trace['opacity'] = 0.05
                        if 'fill' in violin_trace:
                            if first_trace_flag:
                                violin_trace['fill'] = 'toself'
                                first_trace_flag = False
                            else:
                                violin_trace['fill'] = 'tonexty'
                        if 'text' in violin_trace:
                            violin_trace['text'] = None
                        if 'line' in violin_trace:
                            violin_trace['line']['color'] = 'rgba(0,0,0,0)'
                        if 'marker' in violin_trace:
                            violin_trace['marker']['color'] = 'rgba(0,0,0,0)'
                        if 'fillcolor' in violin_trace:
                            violin_trace[
                                'fillcolor'] = root_color if mark_root and node_data[
                                    'level'] == 0 else node_color
                    node_violin_traces.extend(node_violin_trace)
        else:
            if mark_root and node_data['level'] == 0:
                node_trace['marker']['color'] += (root_color, )
            else:
                node_trace['marker']['color'] += (node_color,
                                                  )  # node_data['bet_cent']
            if smart_shaping:
                node_trace['marker']['size'] += (max(
                    50, 20 * np.sqrt(node_data['support'])),
                                                 )  # g.node[node]['pr']
            else:
                node_trace['marker']['size'] += (50, )
        node_trace['x'] += (x, )
        node_trace['y'] += (y, )
        node_trace['hovertext'] += (
            "<br>".join([
                # "%s" % node_name,
                "%d article%s" % (node_data['support'],
                                  "s" if node_data['support'] > 1 else ""),
                "query relevance: %.4f" % node_data['score'],
                ("level: %d" %
                 node_data['level']) if 'level' in node_data.keys() else "",
                "%d day%s" % (node_data['ts_n_dates'],
                              "s" if node_data['ts_n_dates'] > 0 else ""),
                "%s - %s" %
                (time.strftime('%d.%m.%Y',
                               time.localtime(node_data['ts_start_epoch'])),
                 time.strftime('%d.%m.%Y',
                               time.localtime(node_data['ts_end_epoch']))),
                ("redundant tags: %s" % node_data['redundant_nodes'])
                if 'redundant_nodes' in node_data.keys() else ""
                # "%s<br>(%.1f %.1f)" % (node_name, x, y)
            ]), )
        node_trace['text'] += (
            "%s" % node_name,
            # "%s<br>(%.1f %.1f)" % (node_name, x, y)
        )  # + " (degree: %d) (hub_degree: %d)" % (T.degree(node), int(T.node[node]['hub_degree'])))
        node_trace['textfont']['size'] += (max(
            12, 4 * np.log2(node_data['support'])), )

    fig = go.Figure(data=[
        *node_violin_traces, *edge_traces, node_trace, middle_node_trace
    ],
                    layout=layout)
    if interactive_mode_flag:
        div = plotly.offline.iplot(
            fig,
            image_height=720 * np.ceil(len(list(g.nodes())) / 100),
            image_width=1320,
        )
    else:
        div = plotly.offline.plot(
            fig,
            auto_open=False,
            output_type='div',
            image_height=960 * np.ceil(len(list(g.nodes())) / 100),
            image_width=1760,
        )
    # save the plot(s)
    if save_image_path is not None:
        # remove the title
        fig['layout']['title'] = None
        fig['layout']['margin'] = dict(b=60, l=40, r=40, t=60)
        plotly.offline.plot(
            fig,
            filename=save_image_path,
            image='svg',
            auto_open=False,
            image_height=480,  # 960,
            image_width=1200,  # 1760
        )
    return div