Example #1
0
    def make_sankey(self, theme):

        fig = go.Figure(data=[go.Sankey(
            valueformat = ".0f",
            valuesuffix = "TWh",
            # Define nodes
            node = dict(
              pad = 15,
              thickness = 15,
              line = dict(color = "black", width = 0.5),
              label =  data['data'][0]['node']['label'],
              color =  data['data'][0]['node']['color']
            ),
            # Add links
            link = dict(
              source =  data['data'][0]['link']['source'],
              target =  data['data'][0]['link']['target'],
              value =  data['data'][0]['link']['value'],
              label =  data['data'][0]['link']['label'],
              color =  data['data'][0]['link']['color']
        ))])
        return fig
Example #2
0
def sankey_plot(corr_mat, parties_apr, parties_sep):
    source, target = np.meshgrid(
        np.arange(0, len(parties_apr)),
        np.arange(len(parties_apr),
                  len(parties_apr) + len(parties_sep)))
    source = source.flatten()
    target = target.flatten()

    fig = go.Figure(data=[
        go.Sankey(node=dict(
            thickness=12,
            label=list(parties_apr) + list(parties_sep),
        ),
                  link=dict(
                      source=source,
                      target=target,
                      value=corr_mat.flatten(),
                  ))
    ])
    fig.update_layout(
        title_text=
        "Votes Moving: September 2019 Elections ---> April 2019 Elections")
    fig.show()
Example #3
0
def update_sankey_plot(data):
    fig = go.Figure(data=[
        go.Sankey(
            valueformat=".0f",
            valuesuffix="TWh",
            # Define nodes
            node=dict(pad=15,
                      thickness=15,
                      line=dict(color="black", width=0.5),
                      label=data['data'][0]['node']['label'],
                      color=data['data'][0]['node']['color']),
            # Add links
            link=dict(source=data['data'][0]['link']['source'],
                      target=data['data'][0]['link']['target'],
                      value=data['data'][0]['link']['value'],
                      label=data['data'][0]['link']['label']))
    ])

    fig.update_layout(
        title_text=
        "Energy forecast for 2050<br>Source: Department of Energy & Climate Change, Tom Counsell via <a href='https://bost.ocks.org/mike/sankey/'>Mike Bostock</a>",
        font_size=10)
    return fig
def update_Sankey(xaxis_column1_name):
    dff = df[xaxis_column1_name]
    week = [i for i in df.DayOfWeek.unique()]
    region = [i for i in df.PdDistrict.unique()]
    other_type = [i for i in df[xaxis_column1_name].unique()]
    size = week + region + other_type
    fig = go.Figure(data=[
        go.Sankey(node=dict(
            pad=15,
            thickness=15,
            line=dict(color='blue', width=0.8),
            label=size,
            color=[
                'rgb(50, 168, 160)', 'yellow', 'green', 'purple', 'white',
                'orange', 'rgb(131, 207, 56)', 'rgb(43, 196, 156)',
                'rgb(28, 92, 156)', 'rgb(11, 120, 230)', 'rgb(232, 39, 120)',
                'rgb(186, 103, 48)', 'rgb(23, 156, 76)', 'rgb(55, 21, 176)',
                'rgb(100, 156, 123)', 'rgb(31, 66, 110)', 'rgb(186, 150, 48)',
                'rgb(131, 207, 56)', 'rgb(232, 39, 120)'
            ]),
                  link=getData(size, dff, week, xaxis_column1_name))
    ])
    return fig
Example #5
0
def format_data(nodes):
    label, color_node, source, target, value, color_link, customdata = [], [], [], [], [], [], []

    for node in nodes:
        label.append(node.move)
        color_node.append(color_codes[node.color])
        customdata.append([node.game_string(), node.score() * 100])
        if node.parent.move != 'root':
            source.append(node.parent.idx)
            target.append(node.idx)
            value.append(node.visits)
            color_link.append(score2color(node.score()))

    node = dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=label,
        color=color_node,
        customdata=customdata,
        hovertemplate=
        "%{customdata[0]}<br />You scored %{customdata[1]:.0f}% in %{value:.f} games<extra></extra>"
    )

    link = dict(
        source=source,
        target=target,
        value=value,
        color=color_link,
        hovertemplate=
        "%{target.label}<br />%{value:.f} of %{source.value:.f} games<extra></extra>"
    )

    data = go.Sankey(node=node, link=link)

    return data
def get_pooling_sankey_diagram(_df, _name, _unit=1000.0):
    pool_tot_share = _df["multi_passengers_trips_per_ride_hail_trips"]
    pool_share = _df["multi_passengers_trips_per_pool_trips"]
    solo_share = (_df["solo_trips"] +
                  _df["one_passenger_pool_trips"]) / _df["ride_hail_requests"]
    unmatched_share = (
        _df["unmatched_pool_requests"] +
        _df["unmatched_solo_requests"]) / _df["ride_hail_requests"]
    labels = [
        "pool requests: {:.1f}K".format(_df["ride_hail_pool_requests"] /
                                        _unit),
        "solo requests: {:.1f}K".format(_df["ride_hail_solo_requests"] /
                                        _unit),
        "pool: {:.1%} ({:.1%})".format(pool_tot_share, pool_share),
        "solo: {:.1%}".format(solo_share),
        "unmatched: {:.1%}".format(unmatched_share)
    ]
    fig = go.Figure(data=[
        go.Sankey(
            # Define nodes
            node=dict(pad=15,
                      thickness=15,
                      line=dict(color="black", width=0.5),
                      label=labels),
            # Add links
            link=dict(source=[0, 0, 0, 1, 1],
                      target=[2, 3, 4, 3, 4],
                      value=[
                          _df["multi_passenger_pool_trips"],
                          _df["one_passenger_pool_trips"],
                          _df["unmatched_pool_requests"], _df["solo_trips"],
                          _df["unmatched_solo_requests"]
                      ]))
    ])
    fig.update_layout(title_text="Sankey Diagram For Pooling", font_size=10)
    fig.write_image("{}/pooling-metrics-sankey.png".format(_name))
def update_Sankey(selected_poke_attr,selectedData):
    if selectedData:
        indices = [point['pointIndex'] for point in selectedData['points']]
        dff = df.iloc[indices, :]
    else:
        dff = df

    is_legendary = [i for i in dff.isLegendary.unique()]
    color = [i for i in dff.Color.unique()]
    selected_attr = [i for i in dff[selected_poke_attr].unique()]
    size = is_legendary+color+selected_attr
    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=15,
            line=dict(color='blue', width=0.8),
            label=size,
            color=['rgb(50, 168, 160)', 'yellow', 'green',
                'purple', 'white', 'orange', 'rgb(131, 207, 56)', 'rgb(43, 196, 156)', 'rgb(28, 92, 156)', 'rgb(11, 120, 230)', 'rgb(232, 39, 120)', 'rgb(186, 103, 48)', 'rgb(23, 156, 76)', 'rgb(55, 21, 176)', 'rgb(100, 156, 123)', 'rgb(31, 66, 110)', 'rgb(186, 150, 48)', 'rgb(131, 207, 56)', 'rgb(232, 39, 120)']
        ),
        link=getData(size, color, dff, is_legendary,selected_poke_attr)

    )])
    return fig
Example #8
0
    def fig(self):
        grouped_cash_flow = self._create_grouped_df()
        self._calc_edge_values(grouped_cash_flow)

        figure = go.Figure(
            layout=dict(height=800),
            data=[go.Sankey(
                arrangement='perpendicular',
                valuesuffix='€',
                valueformat=".2f",
                node=dict(
                    pad=15,
                    thickness=15,
                    line=dict(color="black", width=0.5),
                    label=self.label,
                    color="blue"
                ),
                link=dict(
                    source=self.source,
                    target=self.target,
                    value=self.value
                ))])
        figure.update_layout(font_size=10, margin=dict(l=20, r=20, t=10, b=10))
        return figure
Example #9
0
        value_list.append(links_dict[(s, t)])
    elif args.method == "networkx":
        source_list.append(s)
        target_list.append(t)
    else:
        print "Invalid method: {}".format(method)

if args.method == "plotly":
    import plotly.graph_objects as go
    fig = go.Figure(data=[
        go.Sankey(
            valueformat=".0f",
            valuesuffix="s",
            # Define nodes
            node=dict(pad=15,
                      thickness=15,
                      line=dict(color="black", width=0.5),
                      label=label_list),
            # Add links
            link=dict(source=source_list, target=target_list,
                      value=value_list))
    ])

    fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
    fig.show()

elif args.method == "networkx":

    import networkx as nx
    from networkx.drawing.nx_agraph import graphviz_layout, to_agraph
Example #10
0
    def clf_model_retrieval(self, metrics=None):
        """This function implements classification model retrieval visualization.
    
        Parameters
        ----------
        metrics : str, default = None
            Value in ["accuracy","precision","recall"].

        Example
        -------
        
        .. [] https://Optimal-Flow.readthedocs.io/en/latest/demos.html#pipeline-cluster-traversal-experiments-model-retrieval-diagram-using-autoviz
        
        References
        ----------
        
        """
        columns = [
            "Dataset", "Encode_low_dimension", "Encode_high_dimension",
            "Winsorize", "Scale"
        ]
        df_pp = pd.DataFrame(columns=columns)

        for i in list(self.DICT_PREPROCESSING.keys()):
            row_pp = [i]
            s = self.DICT_PREPROCESSING[i]
            ext = re.search("Encoded Features:(.*)']", s).group(1)

            if ("onehot_" in ext) and ("Frequency_" in ext):
                row_pp.append('Low Dim_Onehot')
                row_pp.append('High Dim_Frequency')
                row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1))
                row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1))
                df_pp.loc[len(df_pp)] = row_pp
            elif ("onehot_" in ext) and ("Mean_" in ext):
                row_pp.append('Low Dim_Onehot')
                row_pp.append('High Dim_Mean')
                row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1))
                row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1))
                df_pp.loc[len(df_pp)] = row_pp
            elif ("onehot_" in ext) and ("Mean_" not in ext) and ("Frequency_"
                                                                  not in ext):
                row_pp.append('Low Dim_Onehot')
                row_pp.append('High Dim_No Encoder')
                row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1))
                row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1))
                df_pp.loc[len(df_pp)] = row_pp
            elif ("Label_" in ext) and ("Frequency_" in ext):
                row_pp.append('Low Dim_Label')
                row_pp.append('High Dim_Frequency')
                row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1))
                row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1))
                df_pp.loc[len(df_pp)] = row_pp
            elif ("Label_" in ext) and ("Mean_" in ext):
                row_pp.append('Low Dim_Label')
                row_pp.append('High Dim_Mean')
                row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1))
                row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1))
                df_pp.loc[len(df_pp)] = row_pp
            elif ("Label_" in ext) and ("Mean_" not in ext) and ("Frequency_"
                                                                 not in ext):
                row_pp.append('Low Dim_Label')
                row_pp.append('High Dim_No Encoder')
                row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1))
                row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1))
                df_pp.loc[len(df_pp)] = row_pp
            elif ("Frequency_"
                  in ext) and ("onehot_" not in ext) and ("Label_" not in ext):
                row_pp.append('Low Dim_No Encoder')
                row_pp.append('High Dim_Frequency')
                row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1))
                row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1))
                df_pp.loc[len(df_pp)] = row_pp
            elif ("Mean_" in ext) and ("onehot_" not in ext) and ("Label_"
                                                                  not in ext):
                row_pp.append('Low Dim_No Encoder')
                row_pp.append('High Dim_Mean')
                row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1))
                row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1))
                df_pp.loc[len(df_pp)] = row_pp
            elif ("Frequency_" not in ext) and ("Mean_" not in ext) and (
                    "onehot_" not in ext) and ("Label_" not in ext):
                row_pp.append('Low Dim_No Encoder')
                row_pp.append('High Dim_No Encoder')
                row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1))
                row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1))
                df_pp.loc[len(df_pp)] = row_pp

        if metrics == "accuracy":
            df_report_Accuracy = df_pp.merge(
                self.dyna_report[['Dataset', 'Accuracy']],
                how='left',
                on='Dataset')
            bins = [0, 0.70, 0.90, 1]
            labels = ["Low Accuracy", "High Accuracy", "Top Accuracy"]
            df_report_Accuracy['Level'] = pd.cut(
                df_report_Accuracy['Accuracy'], bins=bins, labels=labels)
            df_report_Accuracy['cnt'] = 1
            df_report_Accuracy.loc[df_report_Accuracy['Scale'] == 'None',
                                   'Scale'] = "No Scaler"
            df_report_Accuracy[
                'Scale'] = 'Scale_' + df_report_Accuracy['Scale']
            df_report_Accuracy[
                'Winsorize'] = 'Winsorize_' + df_report_Accuracy['Winsorize']

            step1_df = df_report_Accuracy.groupby(
                ['Encode_low_dimension', 'Dataset'],
                as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Dataset": "antecedentIndex",
                        "Encode_low_dimension": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex', 'Total']]
            step2_df = df_report_Accuracy.groupby(
                ['Encode_low_dimension', 'Encode_high_dimension'],
                as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Encode_low_dimension": "antecedentIndex",
                        "Encode_high_dimension": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex', 'Total']]
            step3_df = df_report_Accuracy.groupby(
                ['Encode_high_dimension', 'Winsorize'],
                as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Encode_high_dimension": "antecedentIndex",
                        "Winsorize": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex', 'Total']]
            step4_df = df_report_Accuracy.groupby(
                ['Winsorize', 'Scale'], as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Winsorize": "antecedentIndex",
                        "Scale": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex', 'Total']]
            step5_df = df_report_Accuracy.groupby(
                ['Scale', 'Level'], as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Scale": "antecedentIndex",
                        "Level": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex',
                             'Total']].dropna()
            integrated_df = pd.concat(
                [step1_df, step2_df, step3_df, step4_df, step5_df], axis=0)

            label_df = pd.DataFrame(integrated_df['antecedentIndex'].append(
                integrated_df['consequentIndex']).drop_duplicates(),
                                    columns={"label"})
            label_df['Number'] = label_df.reset_index().index
            label_list = list(label_df.label)

            source_df = pd.DataFrame(integrated_df['antecedentIndex'])
            source_df = source_df.merge(label_df,
                                        left_on=['antecedentIndex'],
                                        right_on=['label'],
                                        how='left')
            source_list = list(source_df['Number'])

            target_df = pd.DataFrame(integrated_df['consequentIndex'])
            target_df = target_df.merge(label_df,
                                        left_on=['consequentIndex'],
                                        right_on=['label'],
                                        how='left')
            target_list = list(target_df['Number'])

            value_list = [int(i) for i in list(integrated_df.Total)]

            fig = go.Figure(data=[
                go.Sankey(node=dict(pad=15,
                                    thickness=10,
                                    line=dict(color='rgb(25,100,90)',
                                              width=0.5),
                                    label=label_list,
                                    color='rgb(71,172,55)'),
                          link=dict(source=source_list,
                                    target=target_list,
                                    value=value_list))
            ])

            fig.update_layout(
                title=
                f'Pipeline Cluster Traversal Experiments - autoViz {metrics} Retrieval Diagram <a href="https://www.linkedin.com/in/lei-tony-dong/"> ©Tony Dong</a>',
                font_size=8)
            plot(fig)
            fig.show()

        elif metrics == "precision":
            df_report_Precision = df_pp.merge(
                self.dyna_report[['Dataset', 'Precision']],
                how='left',
                on='Dataset')
            bins = [0, 0.70, 0.90, 1]
            labels = ["Low Precision", "High Precision", "Top Precision"]
            df_report_Precision['Level'] = pd.cut(
                df_report_Precision['Precision'], bins=bins, labels=labels)
            df_report_Precision['cnt'] = 1
            df_report_Precision.loc[df_report_Precision['Scale'] == 'None',
                                    'Scale'] = "No Scaler"
            df_report_Precision[
                'Scale'] = 'Scale_' + df_report_Precision['Scale']
            df_report_Precision[
                'Winsorize'] = 'Winsorize_' + df_report_Precision['Winsorize']

            step1_df = df_report_Precision.groupby(
                ['Encode_low_dimension', 'Dataset'],
                as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Dataset": "antecedentIndex",
                        "Encode_low_dimension": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex', 'Total']]
            step2_df = df_report_Precision.groupby(
                ['Encode_low_dimension', 'Encode_high_dimension'],
                as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Encode_low_dimension": "antecedentIndex",
                        "Encode_high_dimension": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex', 'Total']]
            step3_df = df_report_Precision.groupby(
                ['Encode_high_dimension', 'Winsorize'],
                as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Encode_high_dimension": "antecedentIndex",
                        "Winsorize": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex', 'Total']]
            step4_df = df_report_Precision.groupby(
                ['Winsorize', 'Scale'], as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Winsorize": "antecedentIndex",
                        "Scale": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex', 'Total']]
            step5_df = df_report_Precision.groupby(
                ['Scale', 'Level'], as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Scale": "antecedentIndex",
                        "Level": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex',
                             'Total']].dropna()
            integrated_df = pd.concat(
                [step1_df, step2_df, step3_df, step4_df, step5_df], axis=0)

            label_df = pd.DataFrame(integrated_df['antecedentIndex'].append(
                integrated_df['consequentIndex']).drop_duplicates(),
                                    columns={"label"})
            label_df['Number'] = label_df.reset_index().index
            label_list = list(label_df.label)

            source_df = pd.DataFrame(integrated_df['antecedentIndex'])
            source_df = source_df.merge(label_df,
                                        left_on=['antecedentIndex'],
                                        right_on=['label'],
                                        how='left')
            source_list = list(source_df['Number'])

            target_df = pd.DataFrame(integrated_df['consequentIndex'])
            target_df = target_df.merge(label_df,
                                        left_on=['consequentIndex'],
                                        right_on=['label'],
                                        how='left')
            target_list = list(target_df['Number'])

            value_list = [int(i) for i in list(integrated_df.Total)]

            fig = go.Figure(data=[
                go.Sankey(node=dict(pad=15,
                                    thickness=10,
                                    line=dict(color='rgb(25,100,90)',
                                              width=0.5),
                                    label=label_list,
                                    color='rgb(71,172,55)'),
                          link=dict(source=source_list,
                                    target=target_list,
                                    value=value_list))
            ])

            fig.update_layout(
                title=
                f'Pipeline Cluster Traversal Experiments - autoViz {metrics} Retrieval Diagram <a href="https://www.linkedin.com/in/lei-tony-dong/"> ©Tony Dong</a>',
                font_size=8)
            plot(fig)
            fig.show()

        elif metrics == "recall":
            df_report_Recall = df_pp.merge(dyna_report[['Dataset', 'Recall']],
                                           how='left',
                                           on='Dataset')
            bins = [0, 0.70, 0.90, 1]
            labels = ["Low Recall", "High Recall", "Top Recall"]
            df_report_Recall['Level'] = pd.cut(df_report_Recall['Recall'],
                                               bins=bins,
                                               labels=labels)
            df_report_Recall['cnt'] = 1
            df_report_Recall.loc[df_report_Recall['Scale'] == 'None',
                                 'Scale'] = "No Scaler"
            df_report_Recall['Scale'] = 'Scale_' + df_report_Recall['Scale']
            df_report_Recall[
                'Winsorize'] = 'Winsorize_' + df_report_Recall['Winsorize']

            step1_df = df_report_Recall.groupby(
                ['Encode_low_dimension', 'Dataset'],
                as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Dataset": "antecedentIndex",
                        "Encode_low_dimension": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex', 'Total']]
            step2_df = df_report_Recall.groupby(
                ['Encode_low_dimension', 'Encode_high_dimension'],
                as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Encode_low_dimension": "antecedentIndex",
                        "Encode_high_dimension": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex', 'Total']]
            step3_df = df_report_Recall.groupby(
                ['Encode_high_dimension', 'Winsorize'],
                as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Encode_high_dimension": "antecedentIndex",
                        "Winsorize": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex', 'Total']]
            step4_df = df_report_Recall.groupby(
                ['Winsorize', 'Scale'], as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Winsorize": "antecedentIndex",
                        "Scale": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex', 'Total']]
            step5_df = df_report_Recall.groupby(
                ['Scale', 'Level'], as_index=False)['cnt'].count().rename(
                    {
                        "cnt": "Total",
                        "Scale": "antecedentIndex",
                        "Level": "consequentIndex"
                    },
                    axis=1)[['antecedentIndex', 'consequentIndex',
                             'Total']].dropna()
            integrated_df = pd.concat(
                [step1_df, step2_df, step3_df, step4_df, step5_df], axis=0)

            label_df = pd.DataFrame(integrated_df['antecedentIndex'].append(
                integrated_df['consequentIndex']).drop_duplicates(),
                                    columns={"label"})
            label_df['Number'] = label_df.reset_index().index
            label_list = list(label_df.label)

            source_df = pd.DataFrame(integrated_df['antecedentIndex'])
            source_df = source_df.merge(label_df,
                                        left_on=['antecedentIndex'],
                                        right_on=['label'],
                                        how='left')
            source_list = list(source_df['Number'])

            target_df = pd.DataFrame(integrated_df['consequentIndex'])
            target_df = target_df.merge(label_df,
                                        left_on=['consequentIndex'],
                                        right_on=['label'],
                                        how='left')
            target_list = list(target_df['Number'])

            value_list = [int(i) for i in list(integrated_df.Total)]

            fig = go.Figure(data=[
                go.Sankey(node=dict(pad=15,
                                    thickness=10,
                                    line=dict(color='rgb(25,100,90)',
                                              width=0.5),
                                    label=label_list,
                                    color='rgb(71,172,55)'),
                          link=dict(source=source_list,
                                    target=target_list,
                                    value=value_list))
            ])

            fig.update_layout(
                title=
                f'Pipeline Cluster Traversal Experiments - autoViz {metrics} Retrieval Diagram <a href="https://www.linkedin.com/in/lei-tony-dong/"> ©Tony Dong</a>',
                font_size=8)
            plot(fig)
            fig.show()
Example #11
0
    def _update_execution_plot(self):
        # type: () -> ()
        """
        Update sankey diagram of the current pipeline
        """
        sankey_node = dict(
            label=[],
            color=[],
            hovertemplate='%{label}<extra></extra>',
            # customdata=[],
            # hovertemplate='%{label}<br />Hyper-Parameters:<br />%{customdata}<extra></extra>',
        )
        sankey_link = dict(
            source=[],
            target=[],
            value=[],
            hovertemplate='%{target.label}<extra></extra>',
        )
        visited = []
        node_params = []
        nodes = list(self._nodes.values())
        while nodes:
            next_nodes = []
            for node in nodes:
                if not all(p in visited for p in node.parents or []):
                    next_nodes.append(node)
                    continue
                visited.append(node.name)
                idx = len(visited) - 1
                parents = [visited.index(p) for p in node.parents or []]
                node_params.append(node.job.task_parameter_override if node.job else node.parameters) or {}
                # sankey_node['label'].append(node.name)
                # sankey_node['customdata'].append(
                #     '<br />'.join('{}: {}'.format(k, v) for k, v in (node.parameters or {}).items()))
                sankey_node['label'].append(
                    '{}<br />'.format(node.name) +
                    '<br />'.join('{}: {}'.format(k, v) for k, v in (node.parameters or {}).items()))
                sankey_node['color'].append(
                    ("blue" if not node.job or not node.job.is_failed() else "red")
                    if node.executed is not None else ("green" if node.job else "lightsteelblue"))

                for p in parents:
                    sankey_link['source'].append(p)
                    sankey_link['target'].append(idx)
                    sankey_link['value'].append(1)

            nodes = next_nodes

        # make sure we have no independent (unconnected) nodes
        for i in [n for n in range(len(visited)) if n not in sankey_link['source'] and n not in sankey_link['target']]:
            sankey_link['source'].append(i)
            sankey_link['target'].append(i)
            sankey_link['value'].append(0.1)

        fig = make_subplots(
            rows=2, cols=1,
            shared_xaxes=True,
            vertical_spacing=0.03,
            specs=[[{"type": "table"}],
                   [{"type": "sankey"}], ]
        )
        # noinspection PyUnresolvedReferences
        fig.add_trace(
            go.Sankey(
                node=sankey_node, link=sankey_link, textfont=dict(color='rgba(0,0,0,0)', size=1)
            ),
            row=1, col=1
        )
        # noinspection PyUnresolvedReferences
        fig.add_trace(
            go.Table(
                header=dict(
                    values=["Pipeline Step", "Task ID", "Parameters"],
                    align="left",
                ),
                cells=dict(
                    values=[visited,
                            [self._nodes[v].executed or (self._nodes[v].job.task_id() if self._nodes[v].job else '')
                             for v in visited],
                            [str(p) for p in node_params]],
                    align="left")
            ),
            row=2, col=1
        )

        # fig = go.Figure(data=[go.Sankey(
        #     node=sankey_node, link=sankey_link, textfont=dict(color='rgba(0,0,0,0)', size=1))],)
        self._task.get_logger().report_plotly(
            title='Pipeline', series='execution flow', iteration=0, figure=fig)
Example #12
0
def sankey(df, mapping):
    """Plot a sankey diagram

    It is currently only possible to create this diagram for single years.

    Parameters
    ----------
    df : :class:`pyam.IamDataFrame`
        Data to be plotted
    mapping : dict
        Assigns the source and target component of a variable

        .. code-block:: python

            {
                variable: (source, target),
            }

        Returns
        -------
        fig : :class:`plotly.graph_objects.Figure`
    """
    # Check for duplicates
    for col in [name for name in df._data.index.names if name != "variable"]:
        levels = get_index_levels(df._data, col)
        if len(levels) > 1:
            raise ValueError(f"Non-unique values in column {col}: {levels}")

    # Concatenate the data with source and target columns
    _df = pd.DataFrame.from_dict(mapping,
                                 orient="index",
                                 columns=["source",
                                          "target"]).merge(df._data,
                                                           how="left",
                                                           left_index=True,
                                                           right_on="variable")
    label_mapping = dict([
        (label, i)
        for i, label in enumerate(set(_df["source"].append(_df["target"])))
    ])
    _df.replace(label_mapping, inplace=True)
    region = get_index_levels(_df, "region")[0]
    unit = get_index_levels(_df, "unit")[0]
    year = get_index_levels(_df, "year")[0]
    fig = go.Figure(data=[
        go.Sankey(
            valuesuffix=unit,
            node=dict(
                pad=15,
                thickness=10,
                line=dict(color="black", width=0.5),
                label=pd.Series(list(label_mapping)),
                hovertemplate="%{label}: %{value}<extra></extra>",
                color="blue",
            ),
            link=dict(
                source=_df.source,
                target=_df.target,
                value=_df.value,
                hovertemplate='"%{source.label}" to "%{target.label}": \
                %{value}<extra></extra>',
            ),
        )
    ])
    fig.update_layout(title_text=f"region: {region}, year: {year}",
                      font_size=10)
    return fig
Example #13
0
def Creating_tracking_and_analyses(Sample = 100, ITN = '5306'):
    dir_path = os.path.dirname(os.path.realpath(__file__)) # Current directory
    columns = ['Generator primary NAICS name', 'SRS chemical ID',\
           'Generator condition of use', 'Quantity transferred by generator', 'EoL activity category under TSCA', \
           'EoL activity category under waste management hierarchy', 'RETDF TRIF ID', 'RETDF primary NAICS name', \
           'Maximum amount of chemical present at RETDF', 'Total chemical generated as waste by RETDF', \
           'Environmental compartment', 'RETDF chemical flow releases to the compartment', 'RETDF total chemical release']
    type = {'Generator primary NAICS name':'str',
            'SRS chemical ID': 'str',
            'Generator condition of use':'str',
            'Quantity transferred by generator':'float',
            'EoL activity category under TSCA': 'str',
            'EoL activity category under waste management hierarchy':'str',
            'RETDF TRIF ID':'str',
            'RETDF primary NAICS name':'str',
            'Maximum amount of chemical present at RETDF':'int',
            'Total chemical generated as waste by RETDF': 'float',
            'Environmental compartment':'str',
            'RETDF chemical flow releases to the compartment':'float',
            'RETDF total chemical release': 'float'}
    df = pd.read_csv(dir_path + '/EoL_database_for_MC.csv', sep = ',', usecols = columns, low_memory = False, dtype = type, header = 0)
    df_chem = df.loc[df['SRS chemical ID'] == ITN]
    df_sankey = df_chem[['Generator primary NAICS name',
                    'Generator condition of use',
                    'Quantity transferred by generator',
                    'EoL activity category under TSCA',
                    'EoL activity category under waste management hierarchy',
                    'RETDF TRIF ID',
                    'Maximum amount of chemical present at RETDF',
                    'Total chemical generated as waste by RETDF',
                    'RETDF total chemical release',
                    'RETDF primary NAICS name',
                    'Environmental compartment',
                    'RETDF chemical flow releases to the compartment']]
    # First level (GiS -> CoU)
    df1 = df_sankey[['Generator primary NAICS name', 'Generator condition of use', 'Quantity transferred by generator']]
    Total_1 = df1['Quantity transferred by generator'].sum()
    df1['Proportion'] = df1['Quantity transferred by generator'].apply(lambda x: 100*x/Total_1)
    group1 = df1.groupby(['Generator primary NAICS name', 'Generator condition of use'], as_index = False).sum()

    # Second level (CoU -> RETDFiS)
    df2 = df_sankey[['Generator condition of use', 'Quantity transferred by generator',
                    'RETDF primary NAICS name']]
    Total_2 = df2['Quantity transferred by generator'].sum()
    df2['Proportion'] = df2['Quantity transferred by generator'].apply(lambda x: 100*x/Total_2)
    group2 = df2.groupby(['Generator condition of use', 'RETDF primary NAICS name'], as_index = False).sum()

    # Third level (RETDFiS -> WMH) and Fourth level  (RETDFiS -> EoL)
    df3 = df_sankey[['Quantity transferred by generator', 'EoL activity category under TSCA',
                     'RETDF primary NAICS name']]
    Total_3 = df3['Quantity transferred by generator'].sum()
    df3['Proportion'] = df3['Quantity transferred by generator'].apply(lambda x: 100*x/Total_3)
    group_aux = df3.groupby(['RETDF primary NAICS name', 'EoL activity category under TSCA'], as_index = False).sum()
    group3 =  group_aux.loc[group_aux['EoL activity category under TSCA'].isin(['Energy recovery', 'Recycling'])]
    group4 =  group_aux.loc[~group_aux['EoL activity category under TSCA'].isin(['Energy recovery', 'Recycling'])]

    # Fifth leve (EoL -> WMH)
    df4 = df_sankey[['Quantity transferred by generator', 'EoL activity category under TSCA',
                     'EoL activity category under waste management hierarchy']]
    df4 =  df4.loc[~df4['EoL activity category under TSCA'].isin(['Energy recovery', 'Recycling'])]
    df4['Proportion'] = df4['Quantity transferred by generator'].apply(lambda x: 100*x/Total_3)
    group5 = df4.groupby(['EoL activity category under TSCA', 'EoL activity category under waste management hierarchy'], as_index = False).sum()

    # Sixth level (WMH -> EC)
    df5 = df_sankey[['Quantity transferred by generator', 'EoL activity category under waste management hierarchy', 'RETDF TRIF ID', 'Maximum amount of chemical present at RETDF', \
                 'Total chemical generated as waste by RETDF', 'Environmental compartment', 'RETDF chemical flow releases to the compartment', 'RETDF total chemical release']]
    n_cols = df5.shape[1]
    df_handler_facility = df5[['RETDF TRIF ID', 'Maximum amount of chemical present at RETDF', \
                                'Total chemical generated as waste by RETDF', 'RETDF total chemical release']] \
                            .drop_duplicates(keep = 'first')
    columns_maximum = ['INV MAXIMUM QUANTITY ON-SITE ' + str(N + 1) for N in range(Sample)]
    df_handler_facility = df_handler_facility.merge(\
                        df_handler_facility.apply(lambda s: pd.Series(Cal(columns_maximum,
                                                                          s['Maximum amount of chemical present at RETDF'],
                                                                          s['Total chemical generated as waste by RETDF'],
                                                                          s['RETDF total chemical release'])), axis = 1),
                        left_index = True, right_index = True)
    df5 = pd.merge(df5, df_handler_facility, how = 'left',
               on = ['RETDF TRIF ID', 'Maximum amount of chemical present at RETDF',
                    'Total chemical generated as waste by RETDF', 'RETDF total chemical release'])
    df5_aux = df5.iloc[:,n_cols:].multiply(df5['RETDF chemical flow releases to the compartment'], axis = 'index')
    df5_aux = df5_aux.multiply(df5['Quantity transferred by generator'], axis = 'index')
    columns_flow = {'INV MAXIMUM QUANTITY ON-SITE ' + str(N + 1): \
                     'RELEASE TO COMPARTMENT ' + str(N + 1) for N in range(Sample)}
    df5_aux.rename(columns = columns_flow, inplace = True)
    df5 = pd.concat([df5.iloc[:,0:n_cols], df5_aux], axis =  1)
    del df5_aux
    df5['STD RETDF chemical flow releases to the compartment'] = df5.iloc[:,n_cols:Sample + n_cols].std(axis = 1)
    df5['MEAN RETDF chemical flow releases to the compartment'] = df5.iloc[:,n_cols:Sample + n_cols].mean(axis = 1)
    df6 = df5[['EoL activity category under waste management hierarchy', 'Environmental compartment', 'MEAN RETDF chemical flow releases to the compartment']]
    func = {'Quantity transferred by generator': lambda x: 0.25*x.sum(),
            'MEAN RETDF chemical flow releases to the compartment': lambda x: x.sum()}
    df6_aux = df5[['EoL activity category under waste management hierarchy', 'Quantity transferred by generator', 'MEAN RETDF chemical flow releases to the compartment']]
    df6_aux = df6_aux.groupby('EoL activity category under waste management hierarchy', as_index = False).agg(func)
    df6_aux['MEAN RETDF chemical flow releases to the compartment'] = df6_aux.apply(lambda x: x['Quantity transferred by generator'] - x['MEAN RETDF chemical flow releases to the compartment'], axis = 1)
    df6_aux['Environmental compartment'] = None
    lb_wm = {'Recycling': 'Recycled', 'Disposal': 'Discarded', 'Treatment': 'Treated', 'Energy recovery': 'Energy'}
    for key, value in lb_wm.items():
        df6_aux.loc[df6_aux['EoL activity category under waste management hierarchy'] == key, 'Environmental compartment']  = value
    df6_aux = df6_aux[['EoL activity category under waste management hierarchy', 'Environmental compartment', 'MEAN RETDF chemical flow releases to the compartment']]
    df6 = pd.concat([df6, df6_aux], axis =  0, ignore_index = True)
    Total_5 = df6['MEAN RETDF chemical flow releases to the compartment'].sum()
    df6['Proportion'] = df6['MEAN RETDF chemical flow releases to the compartment'].apply(lambda x: 100*x/Total_5)
    group6 = df6.groupby(['EoL activity category under waste management hierarchy', 'Environmental compartment'], as_index = False).sum()

    # Generating labels for Sankey diagram
    GiS = {val: 'GiS-' + str(idx + 1) for idx, val \
        in enumerate(list(group1['Generator primary NAICS name'].unique()))}
    CoU = {val: 'CoU-' + str(idx + 1) for idx, val \
        in enumerate(list(group1['Generator condition of use'].unique()))}
    RETDFiS = {val: 'RETDFiS-' + str(idx + 1) for idx, val \
        in enumerate(list(group2['RETDF primary NAICS name'].unique()))}
    EoL = {val: 'EoL-' + str(idx + 1) for idx, val \
        in enumerate(list(group4['EoL activity category under TSCA'].unique()))}
    WMH = {val: 'WMH-' + str(idx + 1) for idx, val \
        in enumerate(list(group6['EoL activity category under waste management hierarchy'].unique()))}
    EC = {val: 'EC-' + str(idx + 1) for idx, val \
        in enumerate(list(group6['Environmental compartment'].unique()))}

    # Saving percentages
    i = 0
    for gr in [group1, group2, group3, group4, group5, group6]:
        i = i + 1
        gr.to_csv(dir_path + '/Percentages_{}_'.format(ITN) + str(i) + '.csv', sep = ',', index = False)

    # Saving label names
    TRI = {'Added as a formulation component': 'TRIU-1',
        'Used as a chemical processing aid': 'TRIU-2',
        'Repackaging': 'TRIU-3',
        'Ancillary or other use': 'TRIU-4',
        'Produce the chemical': 'TRIU-5',
        'Used as a reactant': 'TRIU-6',
        'As a process impurity': 'TRIU-7',
        'Used as a manufacturing aid': 'TRIU-8',
        'Import the chemical': 'TRIU-9',
        'Used as an article component': 'TRIU-10'}
    CoU_aux = {}
    for key, value in CoU.items():
        CoU_aux.update({value: ' + '.join(TRI[e] for e in key.split(' + '))})
    j = 0
    for l in [GiS, TRI, CoU_aux, RETDFiS, EoL, WMH, EC]:
        j = j + 1
        df_aux = pd.DataFrame({'Col 1': list(l.keys()), 'Col 2': list(l.values())})
        df_aux.to_csv(dir_path + '/Label_names_{}_'.format(ITN) + str(j) + '.csv', sep = ',', index = False)

    # Levels and colors
    level_1 = list(GiS.values())
    colors_1 = ['#ff5050' for i in range(len(level_1))]
    level_2 = list(CoU.values())
    colors_2 = ['#0066cc' for i in range(len(level_2))]
    level_3 = list(RETDFiS.values())
    colors_3 = ['#009933' for i in range(len(level_3))]
    level_4 = list(EoL.values())
    colors_4 = ['#ff944d' for i in range(len(level_4))]
    level_5 = list(WMH.values())
    colors_5 = ['#ffcc66' for i in range(len(level_5))]
    level_6 = list(EC.values())
    colors_6 = ['#6666ff' for i in range(len(level_6))]
    levels = level_1 + level_2 + level_3 + level_4 + level_5 + level_6
    colors = colors_1 + colors_2 + colors_3 + colors_4 + colors_5 + colors_6

    Sources = []
    Targets = []
    Values = []

    for index, row in group1.iterrows():
        Sources.append(levels.index(GiS[row['Generator primary NAICS name']]))
        Targets.append(levels.index(CoU[row['Generator condition of use']]))
        Values.append(row['Proportion'])

    for index, row in group2.iterrows():
        Sources.append(levels.index(CoU[row['Generator condition of use']]))
        Targets.append(levels.index(RETDFiS[row['RETDF primary NAICS name']]))
        Values.append(row['Proportion'])

    for index, row in group3.iterrows():
        Targets.append(levels.index(WMH[row['EoL activity category under TSCA']]))
        Sources.append(levels.index(RETDFiS[row['RETDF primary NAICS name']]))
        Values.append(row['Proportion'])

    for index, row in group4.iterrows():
        Sources.append(levels.index(RETDFiS[row['RETDF primary NAICS name']]))
        Targets.append(levels.index(EoL[row['EoL activity category under TSCA']]))
        Values.append(row['Proportion'])

    for index, row in group5.iterrows():
        Sources.append(levels.index(EoL[row['EoL activity category under TSCA']]))
        Targets.append(levels.index(WMH[row['EoL activity category under waste management hierarchy']]))
        Values.append(row['Proportion'])

    for index, row in group6.iterrows():
        Sources.append(levels.index(WMH[row['EoL activity category under waste management hierarchy']]))
        Targets.append(levels.index(EC[row['Environmental compartment']]))
        Values.append(row['Proportion'])

    # Sankey diagram
    fig1 = go.Figure(data=[go.Sankey(
            node = dict(
                pad = 35,
                thickness = 5,
                line = dict(
                    color = "black",
                    width = 0),
                label = levels,
                color = colors
                ),
            link = dict(
                source = Sources,
                target = Targets,
                value = Values)
                )])
    fig1.update_layout(plot_bgcolor = '#e8e8e8',
                       paper_bgcolor = '#e8e8e8',
                       width=1000,
                       height=900)
    fig1.write_image(dir_path + '/Sankey_{}.pdf'.format(ITN))

    df_box = df5.iloc[:,[1,5] + list(range(n_cols,Sample + n_cols))]
    EC_non_cero = list(df_box.loc[~(df_box.iloc[:,2:] == 0.0).all(axis = 1), 'Environmental compartment'].unique())
    WMH_non_cero = list(df_box.loc[~(df_box.iloc[:,2:] == 0.0).all(axis = 1), 'EoL activity category under waste management hierarchy'].unique())

    df_compartments = {}
    for compartment in EC_non_cero:
        df_compartment = pd.DataFrame(columns = ['Management', 'Flow_log', 'Flow'])
        for management in WMH_non_cero:
            df_EC_WM = df_box.loc[(df_box['Environmental compartment'] == compartment) & \
                              (df_box['EoL activity category under waste management hierarchy'] == management)]
            n_times = df_EC_WM.shape[0]*Sample
            col = [2.20462*l[0] for l in np.reshape(df_EC_WM.iloc[:,2:].to_numpy(), (n_times, 1))]
            aux = pd.DataFrame(columns = ['Management', 'Flow'])
            aux['Flow'] = pd.Series(col)
            aux['Flow_log']  = np.log(aux['Flow'])
            aux['Management'] = '<b>' + management + '</b>'
            df_compartment = pd.concat([df_compartment, aux], ignore_index = True, axis = 0)
        df_compartments.update({compartment:df_compartment})

    # Box
    color_box = ['#009933', '#ffcc66', '#ff944d', '#ff5050']
    fig2 = go.Figure()

    for idx, compartment in enumerate(EC_non_cero):
        fig2.add_trace(go.Box(
            y = list(df_compartments[compartment]['Flow_log']),
            x = list(df_compartments[compartment]['Management']),
            name = compartment.capitalize(),
            boxmean = True,
            whiskerwidth = 0.1,
            #notchwidth = 0.1,
            marker = dict(
            color = color_box[idx]
                    )
                    ))

    fig2.update_layout(xaxis = dict(title = '<b>Waste management</b>',
                                    zeroline = False),
                       yaxis = dict(title = '<b>Release, log(lb/yr)</b>',
                                    zeroline = False),
                       boxmode='group',
                       paper_bgcolor = '#f5f5f5',
                       plot_bgcolor = '#e8e8e8',
                       width = 1500,
                       height = 1000,
                       shapes = [
                       go.layout.Shape(
                                        type = 'line',
                                        x0 = -0.5,
                                        y0 = np.log(0.5),
                                        x1 = 3.5,
                                        y1 = np.log(0.5),
                                        line = dict(
                                            color = '#6666ff',
                                            width = 2,
                                            dash = 'dot',
                                        ),
                                        ),
                        go.layout.Shape(
                                         type = 'line',
                                         x0 = -0.5,
                                         y0 = np.log(10.5),
                                         x1 = 3.5,
                                         y1 = np.log(10.5),
                                         line = dict(
                                             color = '#6666ff',
                                             width = 2,
                                             dash = 'dot',
                                         ),
                                         ),
                        go.layout.Shape(
                                         type = 'line',
                                         x0 = -0.5,
                                         y0 = np.log(499.5),
                                         x1 = 3.5,
                                         y1 = np.log(499.5),
                                         line = dict(
                                             color = '#6666ff',
                                             width = 2,
                                             dash = 'dot',
                                         ),
                                         ),
                        go.layout.Shape(
                                         type = 'line',
                                         x0 = -0.5,
                                         y0 = np.log(999.5),
                                         x1 = 3.5,
                                         y1 = np.log(999.5),
                                         line = dict(
                                             color = '#6666ff',
                                             width = 2,
                                             dash = 'dot',
                                         ),
                                         )
                       ],
                        legend = go.layout.Legend(
                                        bgcolor = 'White',
                                        bordercolor = '#6666ff',
                                        borderwidth = 1
                            )
                )
    fig2.update_xaxes(title_font=dict(size=18))
    fig2.update_yaxes(title_font=dict(size=20))
    fig2.write_image(dir_path + '/Box_{}.pdf'.format(ITN))

    # Histogram
    df_histogram = pd.DataFrame(columns = ['Management', 'Flow', 'Environmental compartment'])
    for compartment in EC_non_cero:
        df_histogram_aux = df_compartments[compartment][['Management', 'Flow']]
        df_histogram_aux['Management'] = df_histogram_aux['Management'].apply(lambda x: x.replace('<b>','').replace('</b>',''))
        df_histogram_aux['Environmental compartment'] = compartment
        df_histogram = pd.concat([df_histogram, df_histogram_aux], axis = 0)

    df_histogram['Relese code'] = df_histogram.apply(lambda x: Release_code(x['Flow']), axis = 1)
    df_histogram['Order'] = df_histogram['Relese code'].apply(lambda x: values(x))
    df_histogram.sort_values(by=['Order'], ascending = True, inplace = True)

    color_box = ['#009933', '#ffcc66', '#ff944d', '#ff5050']
    trace = []
    for compartment in EC_non_cero:
        for idx, management in enumerate(WMH_non_cero):
            data = list(df_histogram.loc[(df_histogram['Environmental compartment'] == compartment) & \
                                         (df_histogram['Management'] == management), 'Relese code'])
            if len(trace) < len(WMH_non_cero):
                trace.append(go.Histogram(histnorm = 'probability density',
                                        x = data,
                                        name = management,
                                        marker_color = color_box[idx],
                                        opacity = 0.75,
                                        autobinx = False))
            else:
                trace.append(go.Histogram(histnorm = 'probability density',
                                        x = data,
                                        marker_color = color_box[idx],
                                        opacity = 0.75,
                                        showlegend = False))

    n_EC = len(EC_non_cero)
    n_WMH = len(WMH_non_cero)
    titles = tuple(ec.capitalize() for ec in EC_non_cero)
    if n_EC < 4:
        fig3 = make_subplots(rows = n_EC, cols = 1,
                            shared_xaxes = True,
                            subplot_titles = titles)
        n_trace = 0
        row = 0
        for tr in trace:
            n_trace =  n_trace + 1
            if (n_trace - 1) % n_WMH == 0:
                row = row + 1
            fig3.append_trace(tr, row, 1)
    else:
        fig3 = make_subplots(rows = 2, cols = 2,
                            shared_xaxes = True,
                            subplot_titles = titles)
        n_trace = 0
        row = 1
        col = 1
        n_fig = 0
        for tr in trace:
            n_trace += 1
            if (n_trace - 1) % n_WMH == 0:
                n_fig += 1
                if n_fig == 1:
                    row = 1
                    col = 1
                elif n_fig == 2:
                    row = 1
                    col = 2
                elif n_fig == 3:
                    row = 2
                    col = 1
                else:
                    row = 2
                    col = 2
            fig3.append_trace(tr, row, col)

    fig3.update_layout(paper_bgcolor = '#f5f5f5',
                        plot_bgcolor = '#e8e8e8',
                        legend = go.layout.Legend(
                                        bgcolor = 'White',
                                        bordercolor = '#6666ff',
                                        borderwidth = 1))
    fig3.write_image(dir_path + '/Histogram_{}.pdf'.format(ITN))
Example #14
0
    axis=1)

movments.rename(columns={'size': 'value'}, inplace=True)
pal = sns.color_palette('Set2', len(world) + len(isr_cities)).as_hex()

all_names = list(isr_cities) + world
label_2_color = {}
for i, name in enumerate(all_names):
    label_2_color[name] = pal[i]

label_list = list(layer_1.keys()) + list(layer_2.keys()) + list(layer_3.keys())

fig = go.Figure(data=[
    go.Sankey(
        node=dict(pad=15,
                  thickness=20,
                  line=dict(color="black", width=0.5),
                  label=list(layer_1.keys()) + list(layer_2.keys()) +
                  list(layer_3.keys()),
                  color=[label_2_color[n] for n in label_list]),
        link=dict(
            source=movments[
                'source'],  # indices correspond to labels, eg A1, A2, A2, B1, ...
            target=movments['target'],
            value=movments['value'],
            #color = [new_pal[i] for i in g['source']]
        ))
])

#fig.write_image(r"/Users/daniellemiller/Google Drive/covid19/paper/COVID19_phylodynamics/figures/transmission_patterns.png", scale=10, width=1000)
fig.show()
Example #15
0
data['data'][0]['node']['color'] = [
    'rgba(255,0,255, 0.8)' if color == "magenta" else color
    for color in data['data'][0]['node']['color']
]
data['data'][0]['link']['color'] = [
    data['data'][0]['node']['color'][src].replace("0.8", str(opacity))
    for src in data['data'][0]['link']['source']
]

fig = go.Figure(data=[
    go.Sankey(
        valueformat=".0f",
        #valuesuffix = "TWh",
        # Define nodes
        node=dict(pad=15,
                  thickness=15,
                  line=dict(color="black", width=0.5),
                  label=data['data'][0]['node']['label'],
                  color=data['data'][0]['node']['color']),
        # Add links
        link=dict(source=data['data'][0]['link']['source'],
                  target=data['data'][0]['link']['target'],
                  value=data['data'][0]['link']['value'],
                  label=data['data'][0]['link']['label'],
                  color=data['data'][0]['link']['color']))
])

fig.update_layout(title_text="Imagined CreAItures", font_size=14)

fig.show()
Example #16
0
def new_cnx_convergence(kw_pairs, cat, year, mode):
    """
	Until the year of connection the shortest path for each given
	keyword pair is observed.
	"""

    # Will store the number of pairs that have a specific distance in a year
    # -1 for no connection
    # -2 for at least one keyword not yet in graph

    # first the distances of each keyword pair are saved for each year chunk
    pair_dists = [list() for i in range(len(kw_pairs))]

    # construct list of labels
    distances = ["not in graph", "unconnected"
                 ] + [str(i)
                      for i in range(THRESHOLD)] + [">=" + str(THRESHOLD)]
    # add years for readability
    labels = [d + " : " + str(y) for y in YEARS for d in distances]

    for i, y_chunk in enumerate(YEARS):
        graph = build_graph(GRAPH_FILES.format(cat, y_chunk))
        for i, pair in enumerate(kw_pairs):
            n1, n2 = pair
            # all distances for all years are consecutively numbered. -> to match a label in 'labels'
            if n1 not in graph or n2 not in graph:
                d = 0
            elif not nx.has_path(graph, n1, n2):
                d = 1
            else:
                path = nx.shortest_path_length(graph, n1, n2)
                if path < THRESHOLD:
                    d = 2 + path  # first two indices are taken by 'not in graph' and 'unconnected'
                else:
                    d = 2 + THRESHOLD
            # Save index that matches the right label in 'labels'
            pair_dists[i].append(i * (len(distances)) + d)

    # convert the results for plotly sankey diagrams

    # source contains start distance (label); target contains goal distance (label), value contains the number of links that changed distance from source to target in a specific year
    # -> data is always saved as a triple
    dist_changes = {"source": list(), "target": list(), "value": list()}
    transfer_indices = dict(
    )  # saves position of source-target pairs in dist_changes
    for pair_dist in pair_dists:
        for dist in range(len(pair_dist) - 1):
            source_target = (pair_dist[dist], pair_dist[dist + 1])

            if source_target not in transfer_indices:
                # get next free position
                transfer_indices[source_target] = len(transfer_indices)
                dist_changes["source"].append(source_target[0])
                dist_changes["target"].append(source_target[1])
                dist_changes["value"].append(0)

            dist_changes["value"][transfer_indices[source_target]] += 1

    # Save results
    fig = go.Figure(data=[
        go.Sankey(
            node=dict(pad=15,
                      thickness=20,
                      line=dict(color="black", width=0.5),
                      label=labels,
                      color="blue"),
            link=dict(
                source=dist_changes[
                    "source"],  # indices correspond to labels, eg A1, A2, A2, B1, ...
                target=dist_changes["target"],
                value=dist_changes["value"]))
    ])

    fig.update_layout(
        title_text=
        "Distance of {} sampled keyword pairs that get connected in {}".format(
            len(kw_pairs), year),
        font_size=10)
    plotly.io.write_html(fig,
                         PLOT_DIR.format(
                             "pairs_before_cnx_{}_{}_{}.html".format(
                                 cat, year, mode)),
                         include_plotlyjs="cdn")
    print("Saved as " + PLOT_DIR.format(
        "pairs_before_cnx_{}_{}_{}.html".format(cat, year, mode)))
Example #17
0
for i, col in enumerate(cols):
    if i != len(cols) - 1:
        n_col = cols[i + 1]
        for col_un in imp_dfs_cl[col].unique():
            for n_col_un in imp_dfs_cl[n_col].unique():
                if (col_un in list(nodes['label'])) and (n_col_un in list(nodes['label'])):
                    source.append(nodes[nodes['label'] == col_un].index[0])
                    target.append(nodes[nodes['label'] == n_col_un].index[0])
                    values.append(imp_dfs_cl.loc[(imp_dfs_cl[col] == col_un) & (imp_dfs_cl[n_col] == n_col_un)][to_group].sum())

print(f"> Renaming columns")
nodes = nodes.replace(rename_mask)

print(f"> Creating sankey diagram with {len(source)} sources, {len(target)} targets and {len(values)} values")
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=nodes['label'],  # ["A1", "A2", "B1", "B2", "C1", "C2"],
        color='#245C7C'
    ),
    link=dict(
        source=source,
        target=target,
        value=values
    ))])
fig.update_layout(title_text=f"Summed {to_group.title()}", font_size=10)
offline.plot(fig, filename='Content/sankey.html')
print("Finished")
Example #18
0
        names.append(s.replace('MRS_TX_3', 'mRS_90days'))
    else:
        names.append(s)

source = []
target = []
value = []


for i in ['0', '1', '2', '3', '4', '5', '6']:
    for j in ['0', '1']:
        source.append(label.index('MRS_TX_1_'+i))
        target.append(label.index('MRS_TX_3_'+j))
        value.append(data[(data['MRS_TX_1_'+i] == 1) & (data['MRS_TX_3_'+j] == 1)].shape[0])


fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = names
    ),
    link = dict(
      source = source, # indices correspond to labels, eg A1, A2, A2, B1, ...
      target = target,
      value = value
  ))])

fig.update_layout(title_text="", font_size=10)
fig.show()
    def sankey_diagram(self, output_file):
        nodes = [
            "Hospitalisation", "ICU Admission", "Non-ICU Wards",
            "Poor Outcomes", "Mild Outcomes", "Death", "Discharged"
        ]
        hos = set(self._df['name_index'].tolist())
        hos2icu = set(
            self._df[self._df['days_in_icu'].notnull()]['name_index'].tolist())
        hos2nonicu = hos - hos2icu
        poor_outcomes = set(
            self._df[self._df['poor_prognosis'] > 0]['name_index'].tolist())
        mild_outcomes = hos - poor_outcomes
        deaths = set(self._df[self._df['death'] > 0]['name_index'].tolist())
        # hos2poor = poor_outcomes - hos2icu
        # hos2death = deaths - poor_outcomes - hos2icu
        # hos2dis = hos - hos2death - hos2poor - hos2icu

        icu2death = deaths & hos2icu - poor_outcomes
        icu2poor = poor_outcomes & hos2icu
        icu2mild = hos2icu - icu2poor - icu2death

        nonicu2poor = poor_outcomes - hos2icu
        nonicu2death = deaths - poor_outcomes - hos2icu
        nonicu2mild = hos - nonicu2death - nonicu2poor - hos2icu

        mild2dis = icu2mild | nonicu2mild

        poor2death = poor_outcomes & deaths
        poor2dis = poor_outcomes - poor2death

        links = [{
            "s": "Hospitalisation",
            "t": "ICU Admission",
            "n": len(hos2icu)
        }, {
            "s": "Hospitalisation",
            "t": "Non-ICU Wards",
            "n": len(hos2nonicu)
        }, {
            "s": "Non-ICU Wards",
            "t": "Poor Outcomes",
            "n": len(nonicu2poor)
        }, {
            "s": "Non-ICU Wards",
            "t": "Death",
            "n": len(nonicu2death)
        }, {
            "s": "Non-ICU Wards",
            "t": "Mild Outcomes",
            "n": len(nonicu2mild)
        }, {
            "s": "ICU Admission",
            "t": "Poor Outcomes",
            "n": len(icu2poor)
        }, {
            "s": "ICU Admission",
            "t": "Death",
            "n": len(icu2death)
        }, {
            "s": "ICU Admission",
            "t": "Mild Outcomes",
            "n": len(icu2mild)
        }, {
            "s": "Mild Outcomes",
            "t": "Discharged",
            "n": len(mild2dis)
        }, {
            "s": "Poor Outcomes",
            "t": "Death",
            "n": len(poor2death)
        }, {
            "s": "Poor Outcomes",
            "t": "Discharged",
            "n": len(poor2dis)
        }]
        colors = {
            "toICU": "rgba(245, 182, 66, 0.2)",
            "toPoor": "rgba(240, 151, 139, 0.2)",
            "toNonICU": "rgba(124, 159, 191, 0.2)",
            "toDeath": "rgba(105, 81, 78, 0.2)",
            "toMild": "rgba(124, 191, 144, 0.2)",
            "toDischarge": "rgba(124, 191, 144, 0.2)",
            "toDischarge2": "rgba(54, 153, 131, 0.2)"
        }
        s = []
        t = []
        v = []
        for l in links:
            s.append(nodes.index(l['s']))
            t.append(nodes.index(l['t']))
            v.append(l['n'])
        fig = go.Figure(data=[
            go.Sankey(node=dict(pad=15,
                                thickness=20,
                                line=dict(color="black", width=0.5),
                                label=nodes,
                                color=[
                                    "#f5bc42", "#f57b42", '#425b96', "#f55742",
                                    "#829bba", "#61352f", "#37b05b"
                                ]),
                      link=dict(source=s,
                                target=t,
                                value=v,
                                color=[
                                    colors['toICU'], colors['toNonICU'],
                                    colors['toPoor'], colors['toDeath'],
                                    colors['toDischarge'], colors['toPoor'],
                                    colors['toDeath'], colors['toMild'],
                                    colors['toDischarge'], colors['toDeath'],
                                    colors['toDischarge2']
                                ]))
        ])

        fig.update_layout(title_text="Pathways of China Cohort", font_size=10)
        # fig.show()
        fig.write_image(output_file)
Example #20
0
def plot_sankey(all_results,
                live=False,
                filename="communities_sankey.html",
                time_index=None):
    """Plot Sankey diagram of communities accros time (plotly only).

    Args:
        all_results (dict): results from run function
        live (bool): if True, interactive figure will appear in browser
        filename (str): filename to save the plot
        time_index (bool): plot time of indices
    """
    import plotly.graph_objects as go
    from plotly.offline import plot as _plot

    sources = []
    targets = []
    values = []
    shift = 0

    if not time_index:
        all_results["community_id_reduced"] = all_results["community_id"]
    else:
        all_results["community_id_reduced"] = [
            all_results["community_id"][i] for i in time_index
        ]

    for i in range(len(all_results["community_id_reduced"]) - 1):
        community_source = np.array(all_results["community_id_reduced"][i])
        community_target = np.array(all_results["community_id_reduced"][i + 1])
        source_ids = set(community_source)
        target_ids = set(community_target)
        for source in source_ids:
            for target in target_ids:
                value = sum(
                    community_target[community_source == source] == target)
                if value > 0:
                    values.append(value)
                    sources.append(source + shift)
                    targets.append(target + len(source_ids) + shift)
        shift += len(source_ids)

    layout = go.Layout(autosize=True)
    fig = go.Figure(
        data=[
            go.Sankey(
                node=dict(
                    pad=1,
                    thickness=1,
                    line=dict(color="black", width=0.0),
                ),
                link=dict(source=sources, target=targets, value=values),
            )
        ],
        layout=layout,
    )

    _plot(fig, filename=filename)

    if live:
        fig.show()
Example #21
0
def plot_factor_diagram(factor):
    import plotly.graph_objects as go
    from .factor import BaseFactor, CustomFactor
    from .datafactor import DataFactor

    color = [
        "rgba(31, 119, 180, 0.8)", "rgba(255, 127, 14, 0.8)",
        "rgba(44, 160, 44, 0.8)", "rgba(214, 39, 40, 0.8)",
        "rgba(148, 103, 189, 0.8)", "rgba(140, 86, 75, 0.8)",
        "rgba(227, 119, 194, 0.8)", "rgba(127, 127, 127, 0.8)",
        "rgba(188, 189, 34, 0.8)", "rgba(23, 190, 207, 0.8)",
        "rgba(31, 119, 180, 0.8)", "rgba(255, 127, 14, 0.8)",
        "rgba(44, 160, 44, 0.8)", "rgba(214, 39, 40, 0.8)",
        "rgba(148, 103, 189, 0.8)", "rgba(140, 86, 75, 0.8)",
        "rgba(227, 119, 194, 0.8)", "rgba(127, 127, 127, 0.8)",
        "rgba(188, 189, 34, 0.8)", "rgba(23, 190, 207, 0.8)",
        "rgba(31, 119, 180, 0.8)", "rgba(255, 127, 14, 0.8)",
        "rgba(44, 160, 44, 0.8)", "rgba(214, 39, 40, 0.8)",
        "rgba(148, 103, 189, 0.8)", "rgba(140, 86, 75, 0.8)",
        "rgba(227, 119, 194, 0.8)", "rgba(127, 127, 127, 0.8)",
        "rgba(188, 189, 34, 0.8)", "rgba(23, 190, 207, 0.8)",
        "rgba(31, 119, 180, 0.8)", "rgba(255, 127, 14, 0.8)",
        "rgba(44, 160, 44, 0.8)", "rgba(214, 39, 40, 0.8)",
        "rgba(148, 103, 189, 0.8)", "magenta", "rgba(227, 119, 194, 0.8)",
        "rgba(127, 127, 127, 0.8)", "rgba(188, 189, 34, 0.8)",
        "rgba(23, 190, 207, 0.8)", "rgba(31, 119, 180, 0.8)",
        "rgba(255, 127, 14, 0.8)", "rgba(44, 160, 44, 0.8)",
        "rgba(214, 39, 40, 0.8)", "rgba(148, 103, 189, 0.8)",
        "rgba(140, 86, 75, 0.8)", "rgba(227, 119, 194, 0.8)",
        "rgba(127, 127, 127, 0.8)"
    ]

    factor_id = dict()
    label = []
    source = []
    target = []
    value = []
    line_label = []

    def add_node(this, parent_label_id, parent_label, parent_win):
        class_id = id(this)

        if class_id in factor_id:
            this_label_id = factor_id[class_id]
        else:
            this_label_id = len(label)
            if isinstance(this, DataFactor):
                label.append(this.inputs[0])
            else:
                label.append(type(this).__name__)

        if parent_label_id is not None:
            source.append(parent_label_id)
            target.append(this_label_id)
            value.append(parent_win)
            line_label.append(parent_label)

        if class_id in factor_id:
            return

        if isinstance(this, CustomFactor):
            this_win = this.win
        else:
            this_win = 1

        factor_id[class_id] = this_label_id
        if isinstance(this, CustomFactor):
            if this.inputs:
                for upstream in this.inputs:
                    if isinstance(upstream, BaseFactor):
                        add_node(upstream, this_label_id, 'inputs', this_win)

            if this._mask is not None:
                add_node(this._mask, this_label_id, 'mask', this_win)

    add_node(factor, None, None, None)

    fig = go.Figure(data=[
        go.Sankey(
            valueformat=".0f",
            valuesuffix="win",
            node=dict(pad=15,
                      thickness=15,
                      line=dict(color="black", width=0.5),
                      label=label,
                      color=list(islice(cycle(color), len(label)))),
            # Add links
            link=dict(
                source=source, target=target, value=value, label=line_label))
    ])

    fig.update_layout(title_text="Factor Diagram")
    fig.show()
e2 = df['estado2'].value_counts().sort_index().values

e1 = e1/e1.sum()*100
e2 = e2/e2.sum()*100

values = np.concatenate((e1, e2), axis=None).tolist()


fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = ["necesita aprobación", "no necesita aprobación", "obtuvo aprobación", "no obtuvo aprobación", "oportunidad ganada", "oportunidad perdida"],
      #color= ['LightSkyBlue', 'MediumPurple', "blue", "blue", "blue", "blue", "red"]
      #color = "blue"
    ),
    link = dict(
      source = [0, 0, 1, 2, 2, 3, 3], # indices correspond to labels, eg A1, A2, A2, B1, ...
      target = [2, 3, 3, 4, 5, 4, 5],
      #value = [9890, 3803, 3254, 6404, 3971, 3443, 3129]
      value = values
  ))])

fig.update_layout(
    title={
        'text': "Analisis de aprobación de precio total de oportunidades",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
Example #23
0
                max_len = max(map(len, next_level_numbers))
                node_label.append(f'{label}{"X" * max_len}')
                if max_len < 2:
                    continue
                if next_level_numbers and parent_node_id is not None:
                    links_source.append(parent_node_id)
                    link_target.append(node_id)
                    link_value.append(len(next_level_numbers))
                add_to_sankey(next_level_numbers, parent_node_id=node_id)

        add_to_sankey(numbers=dns)
        fig = go.Figure(data=[
            go.Sankey(node=dict(pad=15,
                                thickness=20,
                                line=dict(color='black', width=0.5),
                                label=node_label,
                                color='blue'),
                      link=dict(source=links_source,
                                target=link_target,
                                value=link_value))
        ])
        fig.update_layout(title_text='Sankey', font_size=10)
        fig.show()

    @menu_register('Find intra-site dialing translation patterns')
    def menu_find_intrasite_translation_patterns(self):
        tps = self.proxy.translation_pattern.list
        print(f'Found {len(tps)} translation patterns')
        tp_by_len: Dict[int, List[TranslationPattern]] = defaultdict(list)
        names = []
        parents = []
        for tp in tps:
]
#           AI            ML                Fradude dete          Recommendation             Clustering                  NLP               image recognition        medical diagn            arbitrage                forecasting          predictive main         epidemiology
source = [
    0, 0, 0, 1, 1, 1, 1, 1, 1, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7,
    8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8,
    9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 3, 3, 3, 21, 21,
    21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 22, 22
]
target = [
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11,
    12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 15,
    15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 19, 19,
    19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 20, 21, 22, 10, 11, 12, 15, 16, 17,
    18, 10, 11, 12, 15, 16, 17, 18, 11, 18
]
value = [
    20, 0, 20, 6, 6, 3, 1, 2, 2, 0.8, 1.45, .6, 0, .4, .33, 0.8, 1.45, 0, 0, 0,
    .33, .2, 1.45, 0, 0, 0, 0, 0.8, 0, .6, 0, 0, 0, 0.8, 0, .6, .9, .4, 0, 0.8,
    0, .6, 0, .4, .33, 0.8, 1.45, 0, 0, .4, 0, 0.4, 0, .3, .1, .2, .15, 0.4, 0,
    .3, .1, .2, .15, 0.2, 0.2, 0, 0, 0, .33, 6, 8, 6, 8 / 7, 8 / 7, 8 / 7,
    8 / 7, 8 / 7, 8 / 7, 8 / 7, 6 / 7, 6 / 7, 6 / 7, 6 / 7, 6 / 7, 6 / 7,
    6 / 7, 3, 3
]
# data to dict, dict to sankey
link = dict(source=source, target=target, value=value)
node = dict(label=label, pad=50, thickness=5)
data = go.Sankey(link=link, node=node)
# plot
fig = go.Figure(data)
plot(fig)
Example #25
0
    # Sankey Diagram
    html.Div([
        html.H1(children='Sankey Diagram for Pokemon Dataset',
                style={'textAlign': 'center'}),
        dcc.Graph(
            id='snakey_diagram',
            figure=go.Figure(data=[
                go.Sankey(
                    valueformat=".0f",
                    valuesuffix=" pokemon",
                    # Define nodes
                    node=dict(
                        pad=15,
                        thickness=15,
                        #line = dict(color = "black", width = 0.5),
                        label=cat_list,
                        color='darkcyan'),
                    # Add links
                    link=dict(source=sources,
                              target=targets,
                              value=values,
                              color=flow_color
                              #label =  data['data'][0]['link']['label']
                              ))
            ])),
    ])
])


# update the k-means graph
@app.callback(Output('kmeans_with_k', 'figure'),
Example #26
0
                     - not providing an indication of both number of subjects and number of ROIs.
                  """]

fig1 = go.Figure(data=[go.Sankey(
    arrangement = "freeform",
    node = dict(
      pad = 80,
      thickness = 10,
      line = dict(color = "black", width = 0.5),
      label = ["Main records identified (database searching)",
               "Additional records (reviews)",
               "Records screened",
               "Records excluded",
               "Full-text articles assessed for eligibility",
               "Full-text articles excluded",
               "Studied included in the literature overview",
               "Studies included in the meta-analysis"],
      x = [0, 0, 0.4, 0.6, 0.5, 0.8, 0.7, 1],
      y = [0, 0, 0.5, 0.8, 0.15, 0.05, 0.4, 0.6],
      hovertemplate = "%{label}<extra>%{value}</extra>",
      color = ["darkblue","darkblue","darkblue","darkred","darkgreen","darkred","darkgreen","darkgreen"]
    ),
    link = dict(
      source = [0, 1, 2, 2, 4, 4, 6],
      target = [2, 2, 3, 4, 5, 6, 7],
      value = [688, 1, 597, 92, 34, 58, 43],
      customdata = screening_info,
      hovertemplate = "%{customdata}",
  ))])

fig1.update_layout(title = dict(text="Figure 1 - Review methodology"),
                   width=650,
Example #27
0
def SANK(data):
  updateGene(data)
  if len(data['genes'])==0:
    tmp, D = getObs(data)
    D = D.apply(lambda x:x.apply(lambda y:x.name+":"+y))
  else:
    adata = createData(data)
    D = pd.concat([adata.obs.apply(lambda x:x.apply(lambda y:x.name+":"+y)),
                   adata.to_df().apply(lambda x:pd.cut(x,int(data['sankBin'])).apply(lambda y:x.name+":"+'%.1f_%.1f'%(y.left,y.right)))],
                  axis=1,sort=False)
  D = D.astype('str').astype('category')
  if 'name_0' in D.columns:
    del D['name_0']
  
  colName =['Set1','Set3','viridis']
  labels = []
  cols = []
  colindex = 0
  for gID in D.columns:
    gNames = list(D[gID].unique())
    labels += gNames
    if len(gNames) <10:
      cols += sns.color_palette(colName[colindex%2],len(gNames)).as_hex()
      colindex += 1
    else:
      cols += sns.color_palette(colName[2],len(gNames)).as_hex()
  
  sIDs =[]
  dIDs =[]
  v=[]
  Dnames = data['sankOrder']#list(D.columns)
  #maxGrp = 0
  #ppr.pprint(Dnames)
  for i in range(len(Dnames)-1):
    oneName = Dnames[i:i+2]
    #maxGrp = max(maxGrp,len(D[oneName[0]].unique()))
    summaryOne = D.groupby(oneName).size().reset_index(name='Count')
    summaryOne=summaryOne[summaryOne['Count']>0]
    sIDs += list(summaryOne[oneName[0]].apply(lambda x: labels.index(x)))
    dIDs += list(summaryOne[oneName[1]].apply(lambda x: labels.index(x)))
    v += list(summaryOne['Count'])
    
  data_trace = dict(
    type='sankey',
    domain=dict(x=[0,1],y=[0,1]),
    orientation='h',
    valueformat = ".0f",
    node = dict(
      pad = 10,
      thickness = 15,
      line = dict(
        color = "black",
        width = 0.5
      ),
      label =  labels,
      color =  cols
    ),
    link = dict(
      source = sIDs,
      target = dIDs,
      value = v
    )
  )
  ## if the image is requested
  if 'imgSave' in data.keys():
    layout = dict(
      font = dict(size=int(data['figOpt']['fontsize'])),
      height= int(data['imgH']),
      width = int(data['imgW'])*D.shape[1]
    )
    fig = go.Figure(data=[go.Sankey(data_trace)],layout=layout)
    img = plotIO.to_image(fig,data['imgSave'])
    return base64.encodebytes(img).decode('utf-8')
    
  layout = dict(
    font = dict(size=int(data['figOpt']['fontsize'])),
    height= int(data['imgH']),
    width = int(data['imgW'])*D.shape[1],
    updatemenus= [
            dict(
                y=0.9,
                buttons=[
                    dict(
                        label='Thick',
                        method='restyle',
                        args=['node.thickness', 15]
                    ),
                    dict(
                        label='Thin',
                        method='restyle',
                        args=['node.thickness', 8]
                    )      
                ]
            ),
            dict(
                y=0.8,
                buttons=[
                    dict(
                        label='Small gap',
                        method='restyle',
                        args=['node.pad', 15]
                    ),
                    dict(
                        label='Large gap',
                        method='restyle',
                        args=['node.pad', 20]
                    )
                ]
            ),
            dict(
                y=0.7,
                buttons=[
                    dict(
                        label='Snap',
                        method='restyle',
                        args=['arrangement', 'snap']
                    ),
                    dict(
                        label='Perpendicular',
                        method='restyle',
                        args=['arrangement', 'perpendicular']
                    ),
                    dict(
                        label='Freeform',
                        method='restyle',
                        args=['arrangement', 'freeform']
                    ),
                    dict(
                        label='Fixed',
                        method='restyle',
                        args=['arrangement', 'fixed']
                    )       
                ]
            ),
            dict(
                y=0.6,
                buttons=[             
                    dict(
                        label='Horizontal',
                        method='restyle',
                        args=['orientation','h']#{,'height':700,'width':250*D.shape[1]}
                    ),
                    dict(
                        label='Vertical',
                        method='restyle',
                        args=['orientation','v']#{'orientation': 'v','height':250*D.shape[1],'width':700}
                    )
                ]
            
            )
        ]    
  )
  fig = go.Figure(data=[go.Sankey(data_trace)],layout=layout)
  div = plotIO.to_html(fig)
  return div#[div.find('<div>'):(div.find('</div>')+6)]
Example #28
0
def sankey():
    result = node_trace()
    fig = go.Figure(data=[
        go.Sankey(
            valueformat=".0f",
            node=dict(
                pad=15,
                thickness=20,
                line=dict(color="black", width=0.5),
                label=[
                    "0", "state1", "state2", "state3", "state4", "state5",
                    "state1", "state2", "state3", "state4", "state5", "state1",
                    "state2", "state3", "state4", "state5", "state1", "state2",
                    "state3", "state4", "state5", "state1", "state2", "state3",
                    "state4", "state5"
                ],
                #rgba(r,g,b,a) red green blue alpha
                color=[
                    "rgba(0,0,0,0.8)",  #the first parmeter in "label" and "color" no means
                    "rgba(82,12,172,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(190,190,190,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(6,130,196,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(30,122,51,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(156,9,13,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(82,12,172,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(190,190,190,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(6,130,196,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(30,122,51,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(156,9,13,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(82,12,172,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(190,190,190,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(6,130,196,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(30,122,51,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(156,9,13,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(82,12,172,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(190,190,190,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(6,130,196,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(30,122,51,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(156,9,13,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(82,12,172,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(190,190,190,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(6,130,196,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(30,122,51,0.8)",  #"rgba(190,190,190,0.6)",
                    "rgba(156,9,13,0.8)",  #"rgba(190,190,190,0.6)",
                ]),
            link=dict(
                source=result[
                    0],  # indices correspond to labels, eg A1, A2, A2, B1, ...
                target=result[1],
                value=result[2],
                color=[
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(82,12,172,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(190,190,190,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(6,130,196,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(30,122,51,0.5)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                    "rgba(156,9,13,0.8)",
                ]),

            #textfont= dict(size=1)
        )
    ])

    fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
    #fig.write_image("./fig1.png", width=600, height=600)
    fig.show()
Example #29
0
def _log_plot(log_data, savetofile, checkpoint_lines: List[str], pipeline_sections):
    import matplotlib.pyplot as plt  # 1 SECOND IMPORT
    important_text = []
    important_time = []

    for lin, file_line, t in log_data:
        for cl in checkpoint_lines:
            if cl in file_line:
                important_text.append(cl)
                important_time.append(t)

    @dataclass
    class LoggedPipelineSection:
        start: float
        end: float
        thread: str
        process: str
        pid: str
        label: str
        # subsections: Optional[List] = None  # does do anything yet
        index: int = -1  # MUST SET LATER
        source: Optional[int] = None  # might set later
        sourceSec: Optional = None
        x: int = 0

        # set later
        time_amount: Optional[float] = None
        time_rel: Optional[float] = None
        time_amount_rel: Optional[float] = None
        y_center: Optional[float] = None

        color: str = 'orange'


    loggedSections = []
    for sec, v in listitems(pipeline_sections):
        if v['start'] and v['end']:
            loggedSections.append(LoggedPipelineSection(
                start=v['start'],
                end=v['end'],
                label=sec,
                thread=v['thread'],
                pid=v['pid'],
                process=v['process']
            ))

    total = log_data[-1][2]

    important_text = [shorten_str(s, 20) for s in important_text]

    fig, axs = plt.subplots(nrows=1)
    table_ax = axs
    table_ax.set_axis_off()

    important_time = [round(t, 2) for t in important_time]
    if important_time:
        table = table_ax.table(
            cellText=[[str(t)] for t in important_time],
            rowLabels=important_text,
            colLabels=['time'],
            rowColours=["palegreen"] * (len(important_text) + 1),
            colColours=["palegreen"] * 2,
            colWidths=[0.5, 0.5],
            cellLoc='center',
            loc='center'
        )

        table_ax.set_title('Important Logs', fontweight="bold")

    time_amounts = []
    time_rels = []
    time_amount_rels = []
    y_centers = []
    last = 0
    for t in important_time:
        time_amounts.append(t - last)
        time_rels.append(t / total)
        time_amount_rels.append(time_amounts[-1] / total)
        y_centers.append(time_rels[-1] - (time_amount_rels[-1] / 2))
        last = t
    sizes = important_time

    loggedSectionsTotal = loggedSections[0].end - loggedSections[0].start
    for i, sec in enum(loggedSections):
        sec.time_amount = sec.end - sec.start
        # no need for time_rel?
        sec.time_amount_rel = sec.time_amount / loggedSectionsTotal
        sec.y_center = (((sec.end - (sec.time_amount / 2)) - loggedSections[0].start) / loggedSectionsTotal)
        sec.index = i
    loggedSections[0].y_center = 0.5

    for sec in loggedSections:
        candidates = []
        for secsec in loggedSections:
            if sec.start > secsec.start:
                candidates.append(secsec)
        candidates2 = []
        for cand in candidates:
            if sec.end < cand.end:
                candidates2.append(cand)
            elif sec.start > secsec.end:
                pass
                # OVERLAP!
                # assert sec.start > secsec.end  # throws error if there is overlap but not nesting
        if candidates2:
            secsec = max(candidates2, key=lambda x: x.start)

            sec.source = secsec.index
            sec.sourceSec = secsec
    def count_recurse(sec):
        if sec.sourceSec:
            return 1 + count_recurse(sec.sourceSec)
        else:
            return 0
    for sec in loggedSections:
        sec.x = count_recurse(sec)

    colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']

    while len(colors) < len(sizes):
        colors = colors + colors
    colors = colors[:len(sizes)]

    if important_text:
        plt.savefig(savetofile.abspath)

    plt.clf()

    maxX = max([sec.x for sec in loggedSections])
    xstep = normX = 1 / maxX
    for sec in loggedSections:
        sec.x = sec.x / maxX

    labels = [sec.label for sec in loggedSections]
    values = [sec.time_amount for sec in loggedSections if sec.source is not None]

    if True:
        for i in itr(labels):
            if i > 0:
                labels[i] = labels[i] + f' ({format_sec_dur(values[i - 1])})'
    labels[0] = labels[0] + f' ({format_sec_dur(loggedSections[0].time_amount)})'

    jitter_step = xstep / 10
    keepJittering = True
    while keepJittering:
        for sec, secsec in unique_pairs(loggedSections):
            if sec.x == secsec.x:
                if sec.thread != secsec.thread or sec.process != secsec.process or sec.pid != secsec.pid:
                    secsec.color = 'blue'
                    secsec.x += jitter_step
                    break
        keepJittering = False

    import plotly.graph_objects as go
    fig = go.Figure(data=[go.Sankey(


        # arrangement="fixed", # no cutoff, but overlap
        arrangement="snap",  # no overlap, but cutoff
        # arrangement = "perpendicular", # overlap and cutoff (less of both)
        # arrangement="freeform",# both overlap and cutoff

        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=labels,
            y=[sec.y_center for sec in loggedSections],
            x=(arr([sec.x for sec in loggedSections]) * 1.0).tolist(),
            color=[sec.color for sec in loggedSections]
        ),
        link=dict(
            source=[sec.source for sec in loggedSections if sec.source is not None],
            target=list(range(1, (len(loggedSections)))),
            value=values
        ))])

    fig.update_layout(
        font_size=20,
    )

    html = _get_fig(fig, full_html=True, include_plotlyjs=True)
    File(savetofile).res_pre_ext("_sankey").resrepext('html').write(html)
Example #30
0
diagram.save('R410A_logph.svg')

# %% exergy analysis

ean = ExergyAnalysis(network=nw, E_F=[power, heat_geo], E_P=[heat_cons])
ean.analyse(pamb, Tamb)
print("\n##### EXERGY ANALYSIS #####\n")
ean.print_results()

# create sankey diagram
links, nodes = ean.generate_plotly_sankey_input()
fig = go.Figure(
    go.Sankey(arrangement="snap",
              node={
                  "label": nodes,
                  'pad': 11,
                  'color': 'orange'
              },
              link=links))
plot(fig, filename='R410A_sankey.html')

# %% plot exergy destruction

# create data for bar chart
comps = ['E_F']
E_F = ean.network_data.E_F
# top bar
E_D = [0]  # no exergy destruction in the top bar
E_P = [E_F]  # add E_F as the top bar
for comp in ean.component_data.index:
    # only plot components with exergy destruction > 1 W