def combine_two_heatmaps_xaxis(plot1, plot2, plotname):

    #combine plots in a panel
    fig = tools.make_subplots(rows=1, cols=2, print_grid=False)
    for trace in plot1['data']:
        trace['colorbar']['x'] = 0.4
        trace['colorbar']['thickness'] = 20
        fig.append_trace(trace, 1, 1)

    for trace in plot2['data']:
        trace['colorbar']['x'] = 0.95
        trace['colorbar']['xpad'] = 50
        trace['colorbar']['thickness'] = 20
        fig.append_trace(trace, 1, 2)

    fig['layout']['title'] = ""
    fig['layout']['xaxis1'].update(plot1['layout']['xaxis'])
    fig['layout']['xaxis1']['title'] = ""
    fig['layout']['xaxis1']['domain'] = [0, 0.4]
    fig['layout']['yaxis1'].update(plot1['layout']['yaxis'])
    fig['layout']['yaxis1']['title'] = ""

    fig['layout']['xaxis2'].update(plot2['layout']['xaxis'])
    fig['layout']['yaxis2'].update(plot2['layout']['yaxis'])
    fig['layout']['xaxis2']['domain'] = [0.55, 0.95]
    fig['layout']['yaxis2']['side'] = 'right'
    fig['layout']['yaxis2']['scaleanchor'] = 'x2'
    fig['layout']['xaxis2']['title'] = ""
    fig['layout']['yaxis2']['title'] = ""

    fig['layout']['font']['size'] = 18
    fig['layout']['hovermode'] = 'closest'
    fig['layout']['margin']['t'] = 10

    plotly_plot(fig, filename=plotname, auto_open=False)
def plot_freq_abs_vs_distance(distances_ab, abs, seq_sep, distance_definition, plot_dir):
    bins = np.arange(2,50,0.5)
    data = []

    for ab in abs:
        p_r_ab = []
        for i in range(len(bins)):
            p_r_ab.append(len(np.array(distances_ab[seq_sep][ab])[np.digitize(distances_ab[seq_sep][ab],bins)==i]))
        p_r_ab = np.array(p_r_ab) / float(np.sum(p_r_ab))

        data.append(
            go.Scatter(
                x=bins,
                y=p_r_ab,
                mode='lines',
                name=str(ab) + "("+str(len(distances_ab[seq_sep][ab]))+")"
            )
        )

    layout = go.Layout(
        title="",
        xaxis=dict(
            title="distance bins"
        ),
        yaxis=dict(
            title="frequency at seq sep " + str(seq_sep)
        )
    )

    fig = go.Figure(data=data,
                    layout=layout)

    plot_file = plot_dir + "/" + distance_definition + "_frequency_seqsep" + str(seq_sep) + ".html"
    plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_freq_abs_vs_distance(distances_ab, abs, seq_sep, distance_definition,
                              plot_dir):
    bins = np.arange(2, 50, 0.5)
    data = []

    for ab in abs:
        p_r_ab = []
        for i in range(len(bins)):
            p_r_ab.append(
                len(
                    np.array(distances_ab[seq_sep][ab])[np.digitize(
                        distances_ab[seq_sep][ab], bins) == i]))
        p_r_ab = np.array(p_r_ab) / float(np.sum(p_r_ab))

        data.append(
            go.Scatter(x=bins,
                       y=p_r_ab,
                       mode='lines',
                       name=str(ab) + "(" +
                       str(len(distances_ab[seq_sep][ab])) + ")"))

    layout = go.Layout(title="",
                       xaxis=dict(title="distance bins"),
                       yaxis=dict(title="frequency at seq sep " +
                                  str(seq_sep)))

    fig = go.Figure(data=data, layout=layout)

    plot_file = plot_dir + "/" + distance_definition + "_frequency_seqsep" + str(
        seq_sep) + ".html"
    plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_boxplot_correlation(pearson_r, proteins, plot_file):

    data = [go.Box(
        y=pearson_r,
        name = "APC vs Entropy correction",
        showlegend=False,
        boxmean=False,
        boxpoints='Outliers',
        text=proteins
        #jitter=0.5,
        #pointpos=1.8
    )]

    plot = {
    "data": data,
    "layout" : go.Layout(
        font = dict(size=24),
        margin=dict(t=10),
        yaxis=dict(range=[0,1], title="Pearson correlation"),
        width="500",
        height="400"
        )
    }

    plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
Esempio n. 5
0
def plot_1d_coupling_profile(couplings_per_bin, plot_dir, ab):

    group_labels = [
        str(bindict['lower']) + "Å < ΔCβ  < " + str(bindict['upper']) + "Å"
        for binname, bindict in sorted(couplings_per_bin.iteritems(),
                                       reverse=True)
    ]
    hist_data = [
        bindict['couplings']
        for binname, bindict in sorted(couplings_per_bin.iteritems(),
                                       reverse=True)
    ]

    nr_datapoints = int(
        np.round(np.mean([len(x) for x in hist_data]), decimals=-2))

    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data,
                             group_labels,
                             show_hist=False,
                             show_rug=False)

    for trace in fig['data']:
        trace['line']['width'] = 2

    fig['layout']['font'] = dict(size=16)
    fig['layout']['xaxis']['title'] = "couplings w_ij(" + ab + ")"
    fig['layout']['xaxis']['range'] = [-1, 1]
    fig['layout']['yaxis']['title'] = "Distribution of couplings for " + ab
    fig['layout']['margin']['t'] = 10

    plot_name = plot_dir + "/1d_coupling_profile_" + ab + "_avgdatapoints" + str(
        nr_datapoints) + ".html"
    plotly_plot(fig, filename=plot_name, auto_open=False)
Esempio n. 6
0
def plot_boxplot_correlation(pearson_r_pll, pearson_r_pcd, plot_file):


    box_pearson_pll = go.Box(
        y=pearson_r_pll,
        name = "pseudo-likelihood",
        showlegend=False,
        boxmean=False,
        boxpoints='outliers'
    )

    box_pearson_pcd = go.Box(
        y=pearson_r_pcd,
        name="persistent contrastive divergence",
        showlegend=False,
        boxmean=False,
        boxpoints='outliers'
    )


    plot = {
    "data": [box_pearson_pll, box_pearson_pcd],
    "layout" : go.Layout(
        title = "Correlation between APC and Entropy Correction",
        font = dict(size=24),
        margin=dict(t=50),
        yaxis=dict(range=[0,1], title="Pearson correlation"),
        width=900,
        height=450
        )
    }

    plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
Esempio n. 7
0
def plot_metrics(log_metric_dict, metric, plot_out):

    data = []

    order = log_metric_dict.pop('order')

    for key in order:

        print key
        color = None
        if  "1dv1A03" in key:
            color = "rgb(153, 00, 00)"
        if "1c5aA00"  in key:
            color = "rgb(65, 105, 225)"

        dash = None
        if "1e-2.opt" in key:
            dash = "dot"

        if "1e-4.opt" in key:
            dash = "dash"

        if "0.opt" in key:
            dash = "solid"

        trace = go.Scatter(
                x=range(1, len(log_metric_dict[key])+1),
                y=log_metric_dict[key],
                name=key,
                line=dict(width=4)
        )

        if color is not None:
            trace['line']['color'] = color


        if dash is not None:
            trace['line']['dash'] = dash

        data.append(trace)

    layout = go.Layout(
        title="",
        margin=dict(t=10),
        xaxis=dict(
            range=[0, 2500],
            title="iterations"),
        yaxis=dict(
            range=[-1.5,4],
            type="log",
            title=metric,
            exponentformat="e"
        ),
        font=dict(size=18)
    )


    fig=go.Figure(data=data, layout=layout)

    plotly_plot(fig, filename=plot_out, auto_open=False)
Esempio n. 8
0
def plot_alignment(aa_counts_single, title, plot_file, freq=True):

    Neff = np.sum(aa_counts_single[0, :])
    L = aa_counts_single.shape[0]

    #create plot
    data = []

    if freq:
        aa_counts_single /= Neff

    #add bar for each amino acid for each position
    for aa in range(20):
        data.append(
            go.Bar(x=list(range(1, L + 1)),
                   y=aa_counts_single[:, aa].tolist(),
                   showlegend=True,
                   name=io.AMINO_ACIDS[aa]))

    layout = go.Layout(barmode='stack',
                       title=title,
                       xaxis=dict(title="Alignment Position"),
                       yaxis=dict(title="Amino Acid Distribution",
                                  exponentformat='e',
                                  showexponent='All'),
                       font=dict(size=18))

    plot = {'data': data, 'layout': layout}

    plotly_plot(plot, filename=plot_file, auto_open=False, link_text='')
Esempio n. 9
0
    def plot_progress(self):

        if self.plotfile is not None:

            protein = os.path.basename(self.plotfile).split(".")[0]
            title = "Optimization Log for {0} ".format(protein)
            title += self.title

            data = []
            for name, metric in self.optimization_log.items():
                data.append(
                    go.Scatter(x=list(
                        range(1,
                              len(self.optimization_log[name]) + 1)),
                               y=metric,
                               mode='lines',
                               visible="legendonly",
                               name=name))

            plot = {
                "data":
                data,
                "layout":
                go.Layout(title=title,
                          xaxis1=dict(title="iteration",
                                      exponentformat="e",
                                      showexponent='All'),
                          yaxis1=dict(title="metric",
                                      exponentformat="e",
                                      showexponent='All'),
                          font=dict(size=18),
                          titlefont=dict(size=14))
            }

            plotly_plot(plot, filename=self.plotfile, auto_open=False)
def plot_1d_coupling_profile(couplings_per_bin, plot_dir, ab):



    group_labels = [ str(bindict['lower']) + "Å < ΔCβ  < " + str(bindict['upper']) + "Å" for binname, bindict in sorted(couplings_per_bin.iteritems(), reverse=True)]
    hist_data = [bindict['couplings'] for  binname, bindict in sorted(couplings_per_bin.iteritems(), reverse=True)]

    nr_datapoints = int(np.round(np.mean([len(x) for x in hist_data]), decimals=-2))

    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False)


    for trace in fig['data']:
        trace['line']['width'] = 2


    fig['layout']['font'] = dict(size = 16)
    fig['layout']['xaxis']['title'] = "couplings w_ij("+ab+")"
    fig['layout']['xaxis']['range'] = [-1,1]
    fig['layout']['yaxis']['title'] = "Distribution of couplings for " + ab
    fig['layout']['margin']['t'] = 10

    plot_name = plot_dir + "/1d_coupling_profile_"+ ab + "_avgdatapoints"+str(nr_datapoints)+".html"
    plotly_plot(fig, filename=plot_name, auto_open=False)
def combine_two_heatmaps_xaxis(plot1, plot2, plotname):

    #combine plots in a panel
    fig = tools.make_subplots(rows=1, cols=2, print_grid=False)
    for trace in plot1['data']:
        trace['colorbar']['x'] = 0.4
        trace['colorbar']['thickness'] = 20
        fig.append_trace(trace, 1, 1)

    for trace in plot2['data']:
        trace['colorbar']['x'] = 0.95
        trace['colorbar']['xpad'] = 50
        trace['colorbar']['thickness'] = 20
        fig.append_trace(trace, 1, 2)

    fig['layout']['title']  = ""
    fig['layout']['xaxis1'].update(plot1['layout']['xaxis'])
    fig['layout']['xaxis1']['title'] = ""
    fig['layout']['xaxis1']['domain'] = [0,0.4]
    fig['layout']['yaxis1'].update(plot1['layout']['yaxis'])
    fig['layout']['yaxis1']['title'] = ""

    fig['layout']['xaxis2'].update(plot2['layout']['xaxis'])
    fig['layout']['yaxis2'].update(plot2['layout']['yaxis'])
    fig['layout']['xaxis2']['domain'] = [0.55, 0.95]
    fig['layout']['yaxis2']['side']='right'
    fig['layout']['yaxis2']['scaleanchor'] = 'x2'
    fig['layout']['xaxis2']['title'] = ""
    fig['layout']['yaxis2']['title'] = ""

    fig['layout']['font']['size']=18
    fig['layout']['hovermode']='closest'
    fig['layout']['margin']['t'] = 10

    plotly_plot(fig, filename=plotname, auto_open=False)
def write_ccmgen_benchmark_figure(fig, title, plot_file, height=400, width=400):

    for trace in fig['data']:
        trace['name'] = trace['name'].split("-")[-1].split("(")[0]


    fig['layout']['font']['size'] =18
    fig['layout']['hovermode']='closest'
    fig['layout']['title']=title
    fig['layout']['margin']['b']=45
    fig['layout']['margin']['t']=50
    fig['layout']['legend']={
        'orientation':"v",
        'x':0.65, 'y': 1.0
    }
    fig['layout']['xaxis']={
        'title': "#predicted contacts / protein length"}
    fig['layout']['yaxis']={
        'title': "mean precision over proteins",
        'range' : [0,0.8]
    }
    fig['layout']['height'] = height
    fig['layout']['width'] = width

    plotly_plot(fig, filename=plot_file, auto_open=False, show_link=False)
Esempio n. 13
0
def plot_runtime(plot_data, plot_file):

    data = []
    for method, runtimes in plot_data.items():

        box = go.Box(y=runtimes,
                     boxmean=True,
                     boxpoints='Outliers',
                     name=method,
                     marker=dict(opacity=1),
                     hoverinfo='all',
                     orientation='v',
                     showlegend=False)

        data.append(box)

    plot = {
        "data":
        data,
        "layout":
        go.Layout(yaxis=dict(title="runtime in min",
                             type='log',
                             exponentformat='none',
                             showexponent='none',
                             tickmode="array",
                             tickvals=[1, 10, 100, 500, 1000, 5000, 10000],
                             ticktext=[1, 10, 100, 500, 1000, 5000, 10000]),
                  font=dict(size=18),
                  width=800,
                  height=500,
                  margin=dict(t=10))
    }

    plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
def plot_boxplot_correlation(pearson_r, proteins, plot_file):

    data = [
        go.Box(y=pearson_r,
               name="APC vs Entropy correction",
               showlegend=False,
               boxmean=False,
               boxpoints='Outliers',
               text=proteins
               #jitter=0.5,
               #pointpos=1.8
               )
    ]

    plot = {
        "data":
        data,
        "layout":
        go.Layout(font=dict(size=24),
                  margin=dict(t=10),
                  yaxis=dict(range=[0, 1], title="Pearson correlation"),
                  width="500",
                  height="400")
    }

    plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
def plot_boxplot_scores(protein, method_1, method_2, braw_1, braw_2, plot_dir,l2norm=False, apc=False):

    L = braw_1.ncol
    upper_triangular_indices = np.triu_indices(L, k=1)

    title = protein
    plot_out = plot_dir + "/boxplot_for_" + protein + "_method1_" + method_1 + "_method2_" + method_2 + "_score.html"

    if l2norm:
        mat_1 = b.compute_l2norm_from_braw(braw_1, apc)
        mat_2 = b.compute_l2norm_from_braw(braw_2, apc)
        score_1 = mat_1[upper_triangular_indices]
        score_2 = mat_2[upper_triangular_indices]
        plot_out = plot_out.replace(".html", "_l2norm_apc"+str(apc)+".html")
    else:
        score_1 = braw_1.x_pair[upper_triangular_indices[0], upper_triangular_indices[1], :20, :20].flatten()
        score_2 = braw_2.x_pair[upper_triangular_indices[0], upper_triangular_indices[1], :20, :20].flatten()


    data = [
        go.Box(
            y=score_1,
            name = method_1,
            showlegend=False,
            boxmean='sd',
            boxpoints=False
        ),
        go.Box(
            y=score_2,
            name = method_2,
            showlegend=False,
            boxmean='sd',
            boxpoints=False
        )
    ]

    plot = {
        "data": data,
        "layout": go.Layout(
            title=title,
            font=dict(size=18),
            yaxis1=dict(
                title="score for residue pair",
                exponentformat="e",
                showexponent='All',
                scaleratio=1.0,
                scaleanchor='x'
            ),
            xaxis1=dict(
                exponentformat="e",
                showexponent='All',
                scaleratio=1.0,
                scaleanchor='y'

            )
        )
    }

    plotly_plot(plot, filename=plot_out, auto_open=False)
Esempio n. 16
0
def plot_pll_vs_pcd_benchmark_figure(subplots,
                                     plot_dir,
                                     height=500,
                                     width=500):

    data = []

    #add PCD traces
    trace_for_lin = copy.copy(
        subplots['persistent contrastive divergence']['data'][0])
    data.append(trace_for_lin)
    data[-1]['legendgroup'] = 'method'
    data[-1]['name'] = 'PCD'
    data[-1]['line']['color'] = 'black'
    #data[-1]['showlegend'] = True
    #data[-1]['visible'] = True #'legendonly'

    for trace in subplots['persistent contrastive divergence']['data']:
        trace['name'] = trace['name'].split("-")[-1].split("(")[0]
        #trace['showlegend'] = True
        trace['legendgroup'] = 'correction'
        data.append(trace)

    #add pLL traces
    trace_for_lin = copy.copy(
        subplots['pseudo-likelihood maximization']['data'][0])
    data.append(trace_for_lin)
    data[-1]['legendgroup'] = 'method'
    data[-1]['name'] = 'pLL'
    data[-1]['line']['color'] = 'black'
    data[-1]['line']['dash'] = 'dot'
    data[-1]['showlegend'] = True
    #data[-1]['visible'] = True #'legendonly'

    for trace in subplots['pseudo-likelihood maximization']['data']:
        trace['name'] = trace['name'].split("-")[-1].split("(")[0]
        trace['legendgroup'] = 'correction'
        trace['showlegend'] = False
        trace['line']['dash'] = 'dot'
        data.append(trace)

    layout = go.Layout(
        font=dict(size=18),
        hovermode='closest',
        title="",
        margin=dict(t=10),
        legend=dict(orientation="v", x=1.01, y=1.0),
        yaxis=dict(title="Mean Precision over Proteins", range=[0, 1]),
        xaxis=dict(title="#predicted contacts / protein length"),
        height=height,
        width=width)

    fig = go.Figure(data=data, layout=layout)

    plot_file = plot_dir + "/" + "ccmgen_benchmark_figure_pll_vs_pcd.html"
    plotly_plot(fig, filename=plot_file, auto_open=False, show_link=False)
    return plot_file
def plot_pdb_uniprot_fct(data_dict, seq_dict, plot_dir=None):

    data = []

    for name, df in seq_dict.iteritems():
        df['Date'] = pd.to_datetime(df['Date'])
        data_dict[name] = df.drop(df.index[df[df['Date'] < '1996-01-01'].index])
        data.append(
            go.Scatter(
                x=data_dict[name].Date,
                y=data_dict[name].Total,
                showlegend=True,
                name=name,
                line=dict(
                    width=4,
                    dash='dot'
                )
            )
        )

    data_dict['PDB-Protein']['Date'] = pd.to_datetime(data_dict['PDB-Protein']['Date'], format="%Y")
    data_dict['PDB-Protein'] = data_dict['PDB-Protein'].drop(data_dict['PDB-Protein'].index[data_dict['PDB-Protein'][data_dict['PDB-Protein']['Date'] < '1996-01-01'].index])
    data_dict['PDB-Protein']['Date'][0] = pd.to_datetime('today')
    data.append(
        go.Scatter(
            x=data_dict['PDB-Protein'].Date,
            y=data_dict['PDB-Protein'].Total,
            showlegend=True,
            name='PDB-Protein',
            line=dict(
                width=4
            )
        )
    )

    plot = {
        "data": data,
        "layout": go.Layout(
            legend=dict(x=.05, y=1.0),
            title="",  # Yearly Growth of Structures in PDB by Experimental Method",
            xaxis=dict(
                title="Year",
                range=['01-01-1999', '01-01-2017']
            ),
            yaxis=dict(
                title="Total number of Entries",
                type="log"
            ),
            font=dict(size=18)
        )
    }

    if plot_dir is not None:
        plot_file = plot_dir + "/pdb_uniprot.html"
        plotly_plot(plot, filename=plot_file, auto_open=False)
    else:
        return plot
def plot_boxplot_correlation(stats_dict, method_1, method_2, keys_list, plot_dir):

    df = pd.DataFrame(stats_dict)
    df = df.transpose()

    df['Pearson r'] = [x for x,y in df['pearson'].tolist()]
    df['Pearson pvalue'] = [y for x,y in df['pearson'].tolist()]
    df['Spearman rho'] = [x for x,y in df['spearmanrho'].tolist()]
    df['Spearman pvalue'] = [y for x,y in df['spearmanrho'].tolist()]
    df['Kendalls tau'] = [x for x,y in df['kendalltau'].tolist()]
    df['Kendalls pvalue'] = [y for x,y in df['kendalltau'].tolist()]

    df['kolmogorov-smirnov pvalue'] = [y for x,y in df['kolmogorov-smirnov'].tolist()]
    df['kolmogorov-smirnov'] = [x for x,y in df['kolmogorov-smirnov'].tolist()]

    df['linear fit slope'] = [slope for slope, intercept, rvalue, pvalue, stderr in df['linreg'].tolist()]
    df['linear fit intercept'] = [intercept for slope, intercept, rvalue, pvalue, stderr in df['linreg'].tolist()]



    df['protein'] = df.index
    df['Neff'] = [int(x) for x in df.Neff.tolist()]


    data = []
    for key in keys_list:
        data.append(
            go.Box(
                y=df[key],
                name = key,
                text=df['protein'],
                showlegend=False,
                boxmean=False,
                boxpoints='Outliers'
                #jitter=0.5,
                #pointpos=1.8
            )
        )


    plot = {
        "data": data,
        "layout": go.Layout(
            margin=dict(t=10),
            font=dict(size=18),
            yaxis1=dict(
                title="statistics value",
                exponentformat="e",
                showexponent='All',
                range=[0,1]
            )
        )
    }

    plot_out = plot_dir + "/comparative_statistics_boxplot_for_"+method_1.replace(" ", "_") + "_" + method_2.replace(" ", "_") + "_l2norm_APC_scores.html"
    plotly_plot(plot, filename=plot_out, auto_open=False, show_link=False)
def plot_percentage_gaps_per_position(alignment, plot_file=None):

    N = float(len(alignment))
    L = len(alignment[0])

    weighting = SequenceWeights(False, 0.8)
    weights = weighting.weights_simple(alignment)

    #compute counts and frequencies
    pseudocounts = PseudoCounts(alignment, weights)
    pseudocounts.calculate_frequencies(
        'uniform_pseudocounts', 1, 1, remove_gaps=False
    )

    #compute percentage of gaps
    gaps = pseudocounts.counts[0][:, 20] / pseudocounts.counts[0].sum(1)

    #normalized entropy
    entropy_per_position = scipy.stats.entropy(pseudocounts.counts[0].transpose(),base=2)
    entropy_per_position /= np.max(entropy_per_position)


    #create plot
    data = []
    data.append(
        go.Scatter(
            x=[x for x in range(1,L+1)],
            y=gaps,
            name = "percentage of gaps",
            mode="Lines",
            line=dict(width=3)
        )
    )

    data.append(
        go.Scatter(
            x=[x for x in range(1,L+1)],
            y=entropy_per_position,
            name = "relative Entropy",
            mode="Lines",
            line=dict(width=3)
        )
    )

    layout = {
        'title':"Percentage of gaps and Entropy in alignment <br> N="+str(N) + ", L="+str(L),
        'xaxis':{'title':"Alignment Position"},
        'yaxis':{'title':"Percentage of Gaps/Entropy"},
        'font':{'size':18}
    }

    plot = {'data': data, 'layout': layout}
    if plot_file is None:
        return plot
    else:
        plotly_plot(plot, filename=plot_file, auto_open=False)
def plot_density(protein, bqij_data, plot_dir):

    group_labels    = [key for key in sorted(bqij_data.keys()) if key != "L"]
    L = bqij_data['L']

    hist_data       = []
    data=[]
    for group in group_labels:
        bqij_file = bqij_data[group]
        Nij, qij = io.read_qij(bqij_file, bqij_data['L'])

        data_group = qij[np.triu_indices(n=L, k=1)].flatten()

        hist_data.append(data_group)

        data.append(
            go.Histogram(
                x=data_group,
                histnorm='probability',
                name=group,
                xbins=dict(
                    start=-0.1,
                    end=1,
                    size=0.005
                ),
                opacity=0.75
            )
        )


    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False)
    fig['layout']['font'] = dict(size = 18)
    fig['layout']['xaxis']['title'] = "q_ijab"
    plot_file = plot_dir + "/" + protein + "_distribution_qijab" + ".html"
    plotly_plot(fig, filename=plot_file, auto_open=False)

    #create histogram
    plot_file = plot_dir + "/" + protein + "_histogram_qijab" + ".html"
    layout = go.Layout(
        barmode='overlay',
        xaxis=dict(
            title="q_ijab",
            exponentformat="e",
            showexponent='All'
        ),
        yaxis=dict(
            exponentformat="e",
            showexponent='All'
        ),
        font=dict(size = 18)
    )
    fig = go.Figure(data=data, layout=layout)
    plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_alignment_entropy(alignment_file, plot_dir=None):

    # read alignment
    protein = os.path.basename(alignment_file).split(".")[0]
    alignment = io.read_alignment(alignment_file)
    N = float(len(alignment))
    L = len(alignment[0])

    alignment = alignment.transpose()

    #determine amino acid frequencies (without any pseudocounts)
    aa_freq_per_pos = np.zeros((21, L))
    for position in range(L):
        aa_counts = Counter(alignment[position])
        for aa, counts in aa_counts.iteritems():
            freq = counts / N
            aa_freq_per_pos[aa, position] = freq

    aa_freq_per_pos = aa_freq_per_pos[1:]  #remove gaps
    aa_freq_per_pos = aa_freq_per_pos.transpose()

    entropy_per_position = [
        entropy(aa_freq_per_pos[pos], base=2) for pos in range(L)
    ]

    #create plot
    data = []
    data.append(
        go.Scatter(x=[x for x in range(L)],
                   y=entropy_per_position,
                   name="percentage of gaps",
                   mode="Lines"))

    layout = {
        'title':
        "Entropy (base 2) in alignment of " + str(protein) + "<br> N=" +
        str(N) + ", L=" + str(L),
        'xaxis': {
            'title': "Alignment Position"
        },
        'yaxis': {
            'title': "Entropy "
        },
        'font': {
            'size': 18
        }
    }

    plot = {'data': data, 'layout': layout}
    if plot_dir is None:
        return plot
    else:
        plot_file = plot_dir + "/alignment_entropy_" + protein + ".html"
        plotly_plot(plot, filename=plot_file, auto_open=False)
def plot_gradient_ab_trace(gradient_df, ab_list, colors, plot_out=None):

    plot = {'data': [],
            'layout': {}
            }

    # set up drop down menu
    plot['layout']['updatemenus'] = [{'xanchor': 'left',
                                      'yanchor': 'bottom',
                                      'x': 1.02,
                                      'y': 0.2,
                                      'buttons': [],
                                      'active': 0,
                                      }]

    nr_components = len(gradient_df.columns)

    for ab in ab_list:

        for parameter in gradient_df.columns.tolist():
            component = int(parameter.split("_")[-1])

            plot['data'].append(
                go.Scatter(
                    x=range(1, len(gradient_df) + 1),
                    y=gradient_df[parameter].apply(lambda x: x[ab]).tolist(),
                    mode='lines',
                    line = dict(
                        color = colors[component]
                    ),
                    name="component " + str(component)  + " "+ parameter + " (" + AB[ab] + ")",
                    showlegend=True,
                    visible=False
                )
            )

        #every component will have a gradient trace
        plot['layout']['updatemenus'][0]['buttons'].append(
            {
                'args': ['visible', [False] * (nr_components) * ab_list.index(ab) + [True] * (nr_components) + [False] * (nr_components) * (len(ab_list) - ab_list.index(ab) - 1)],
                'label': AB[ab],
                'method': 'restyle'
            }
        )

    parameter_name = gradient_df.columns[0].split("_")[0]
    plot['layout']['xaxis1'] = {'title': 'iteration'}
    plot['layout']['yaxis1'] = {'title': "gradient for "+parameter_name}
    plot['layout']['title'] = "gradient trace for "+parameter_name

    if plot_out is not None:
        plotly_plot(plot, filename=plot_out, auto_open=False)
    else:
        return plot
Esempio n. 23
0
def plot_boxplot_correlation(stats_dict, keys_list, plot_file):

    df = pd.DataFrame(stats_dict)
    df = df.transpose()

    df['Pearson r'] = [x for x, y in df['pearson'].tolist()]
    df['Pearson pvalue'] = [y for x, y in df['pearson'].tolist()]
    df['Spearman rho'] = [x for x, y in df['spearmanrho'].tolist()]
    df['Spearman pvalue'] = [y for x, y in df['spearmanrho'].tolist()]
    df['Kendalls tau'] = [x for x, y in df['kendalltau'].tolist()]
    df['Kendalls pvalue'] = [y for x, y in df['kendalltau'].tolist()]
    df['kolmogorov-smirnov pvalue'] = [
        y for x, y in df['kolmogorov-smirnov'].tolist()
    ]
    df['kolmogorov-smirnov'] = [
        x for x, y in df['kolmogorov-smirnov'].tolist()
    ]
    df['linear fit slope'] = [
        slope
        for slope, intercept, rvalue, pvalue, stderr in df['linreg'].tolist()
    ]
    df['linear fit intercept'] = [
        intercept
        for slope, intercept, rvalue, pvalue, stderr in df['linreg'].tolist()
    ]

    df['protein'] = df.index

    data = []
    for key in keys_list:
        data.append(
            go.Box(y=df[key],
                   name=key,
                   text=df['protein'],
                   showlegend=False,
                   boxmean=False,
                   boxpoints='outliers'))

    plot = {
        "data":
        data,
        "layout":
        go.Layout(margin=dict(t=10),
                  font=dict(size=18),
                  yaxis1=dict(title="statistics value",
                              exponentformat="e",
                              showexponent='all',
                              range=[0, 1]),
                  width=800,
                  height=500)
    }

    plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
def plot_scatter(apc, ec, text, plot_file):

    scatter_data = go.Scatter(x=apc,
                              y=ec,
                              mode='markers',
                              marker=dict(color="black"),
                              text=text,
                              showlegend=False)

    diagonal = go.Scatter(x=[0, np.max(list(apc) + list(ec))],
                          y=[0, np.max(list(apc) + list(ec))],
                          mode="lines",
                          line=dict(color="darkgrey", width=4, dash="dot"),
                          showlegend=False)

    pearson_r = pearsonr(apc, ec)

    data = []
    data.append(diagonal)
    data.append(scatter_data)

    plot = {
        "data":
        data,
        "layout":
        go.Layout(font=dict(size=24),
                  yaxis=dict(title="Entropy Correction",
                             exponentformat="e",
                             showexponent='All',
                             scaleratio=1,
                             scaleanchor='x'),
                  xaxis=dict(title="Average Product Correction",
                             exponentformat="e",
                             showexponent='All',
                             scaleratio=1,
                             scaleanchor='y'),
                  annotations=go.Annotations([
                      go.Annotation(x=0.05,
                                    y=0.95,
                                    showarrow=False,
                                    text='Pearson r = {0}'.format(
                                        np.round(pearson_r[0], decimals=3)),
                                    font=dict(color="black", size=24),
                                    xref='paper',
                                    yref='paper')
                  ]),
                  margin=dict(t=10),
                  width="550",
                  height="500")
    }

    plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
def plot_convergence_trace_plotly(negll_trace_df, name, plot_title, plot_out=None):
    """
    Define a plot in plotly dictionary style
    Either plot it or return dictionary

    :param negll_trace_df:  Pandas Dataframe with columns: pass, step, col1, col2
    :param name:            List of column names for plotting, e.g [cols, col2]
    :param plot_title:      title
    :param plot_out:        Path to HTML output file
    :return:
    """


    data = []
    for trace in name:
        for iteration in set(negll_trace_df['pass']):
            data.append(
                go.Scatter(
                    x=negll_trace_df[negll_trace_df['pass'] == iteration]['step'].tolist(),
                    y=negll_trace_df[negll_trace_df['pass'] == iteration][trace].tolist(),
                    mode='lines',
                    name=trace + ' pass ' + str(iteration),
                    connectgaps=True,
                    showlegend=True,
                    line=dict(
                        width=4
                    )

                )
            )

    plot = {
        "data": data,
        "layout": go.Layout(
            title = plot_title,
            xaxis1 = dict(title="step",
                          exponentformat="e",
                          showexponent='All'),
            yaxis1 = dict(title="negative log likelihood",
                          exponentformat="e",
                          showexponent='All'
                          ),
            font = dict(size=18),
        )
    }

    if plot_out is not None:
        plotly_plot(plot, filename=plot_out, auto_open=False)
    else:
        return plot
def plot_projection_on_two_components_gapstructure(plot_dict, plot_out):
    data = []
    for plot_data in plot_dict['data']:
        if plot_data['name'] == "Pfam":

            percent_gaps = [len(np.where(seq == 20)[0]) / float(plot_data['L']) for seq in plot_data['seq']]

            seq_nr  = ["seq no " + str(n) for n in range(1, plot_data['N'] + 1)]
            seq = ["".join(["<br>"+io.AMINO_ACIDS[plot_data['seq'][n][l]] if (l+1)% 50 == 0 else io.AMINO_ACIDS[plot_data['seq'][n][l]] for l in range(plot_data['L'])]) for n in range(plot_data['N'])]
            text = [seq_nr[n] + "<br>fraction of gaps: " + str(np.round(percent_gaps[n], decimals=3)) + "<br>" + seq[n] for n in range(plot_data['N'])]

            data.append(
                go.Scatter(
                    x=plot_data['x'],
                    y=plot_data['y'],
                    name=plot_data['name'],
                    mode='markers',
                    marker=dict(
                        color=percent_gaps,
                        colorbar=go.ColorBar(
                            title='Fraction of Gaps'
                        ),
                        colorscale='Bluered'
                    ),
                    text=text, #list(range(1, len(plot_data['x']) + 1)),
                    showlegend=False
                )
            )

    plot = {
        "data": data,
        "layout": go.Layout(
            font=dict(size=18),
            title="",
            margin=dict(t=10),
            hovermode='closest',
            yaxis=dict(
                title="principal component 2",
                exponentformat="e",
                showexponent='All'
            ),
            xaxis=dict(
                title="principal component 1",
                exponentformat="e",
                showexponent='All'
            )
        )
    }

    plotly_plot(plot, filename=plot_out, auto_open=False)
def plot_log_observed_expected_at_abs(distances_ab, abs, seq_sep, distance_definition, plot_dir):

    bins = np.arange(2,50,0.5)

    data = []

    # expected nr of pairs:
    #   frequency of pairs observed at this distance in PDB
    p_r = []
    for i in range(len(bins)):
        p_r.append(len(distances_ab[seq_sep]['all'][np.digitize(distances_ab[seq_sep]['all'], bins) == i]))
    p_r = np.array(p_r) / float(np.sum(p_r))


    for ab  in abs:

        p_r_ab = []
        for i in range(len(bins)):
            #print np.array(distances_ab[seq_sep])[np.digitize(distances_ab[seq_sep],bins)==i]
            p_r_ab.append(len(np.array(distances_ab[seq_sep][ab])[np.digitize(distances_ab[seq_sep][ab],bins)==i]))
        p_r_ab = np.array(p_r_ab) / float(np.sum(p_r_ab))


        log_ratio = np.log(p_r_ab / p_r)

        data.append(
            go.Scatter(
                x=bins,
                y=log_ratio,
                mode='lines',
                name=ab + " ("+str(len(distances_ab[seq_sep][ab]))+")"
            )
        )

    layout = go.Layout(
        title="",
        xaxis=dict(
            title="distance bins"
        ),
        yaxis=dict(
            title="log ratio observed vs expected"
        )
    )

    fig = go.Figure(data=data,
                    layout=layout)

    plot_file = plot_dir + "/" + distance_definition + "_logratio_seqsep" + str(seq_sep) + ".html"
    plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_projection_on_two_components(plot_dict, title, plot_out):

    data = []

    for plot_data in plot_dict['data']:

        seq_nr  = ["seq no " + str(n) for n in range(1, plot_data['N'] + 1)]
        seq = ["".join(["<br>"+io.AMINO_ACIDS[plot_data['seq'][n][l]] if (l+1)% 50 == 0 else io.AMINO_ACIDS[plot_data['seq'][n][l]] for l in range(plot_data['L'])]) for n in range(plot_data['N'])]

        text = [seq_nr[n] + "<br>" + seq[n] for n in range(plot_data['N'])]

        data.append(
            go.Scatter(
                x=plot_data['x'],
                y=plot_data['y'],
                name=plot_data['name'],
                mode='markers',
                opacity=0.5,
                text=text, #list(range(1, len(plot_data['x']) + 1)),
                showlegend=True
            )
        )

    plot = {
        "data": data,
        "layout": go.Layout(
            font=dict(size=18),
            title=title,
            titlefont= dict(size=12),
            legend=dict(orientation="v"),
            hovermode='closest',
            yaxis=dict(
                title="principal component 2",
                exponentformat="e",
                showexponent='All'
            ),
            xaxis=dict(
                title="principal component 1",
                exponentformat="e",
                showexponent='All'
            )
        )
    }

    if title == "":
        plot['layout']['margin']['t'] =10


    plotly_plot(plot, filename=plot_out, auto_open=False)
Esempio n. 29
0
def plot_boxplot(statistics_dict, property, plot_file):

    topologies = sorted(statistics_dict.keys())

    data = []
    for topology in topologies:

        values = statistics_dict[topology][property]
        proteins = statistics_dict[topology]['protein']
        target_neff = statistics_dict[topology]['target neff']
        sample_neff = statistics_dict[topology]['sample neff']

        hover_text = [
            "{0}<br>target neff:{1}<br>sample neff:{2}".format(
                proteins[i], target_neff[i], sample_neff[i])
            for i in range(len(values))
        ]

        box = go.Box(y=values,
                     boxmean=True,
                     pointpos=1.8,
                     jitter=0.4,
                     boxpoints='all',
                     name=topology,
                     marker=dict(opacity=1),
                     text=hover_text,
                     hoverinfo='all',
                     orientation='v',
                     showlegend=False)

        data.append(box)

    plot = {
        "data":
        data,
        "layout":
        go.Layout(yaxis=dict(exponentformat='e', showexponent='all'),
                  margin=dict(t=10),
                  font=dict(size=18),
                  width=800,
                  height=500)
    }

    if property == "neff_difference":
        plot['layout']['yaxis']['title'] = "Pfam Neff - synthetic Neff"
    if property == "mutation_rate":
        plot['layout']['yaxis']['title'] = "mutation rate"

    plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
def plot_distance_distribution(distances_ab, ab, distance_definition, log, plot_dir):

    group_labels    = ["sequence separation " + str(seq_sep) for seq_sep, values in sorted(distances_ab.iteritems())]
    hist_data       = [np.array(values[ab])[~np.isnan(values[ab])] for seq_sep, values in sorted(distances_ab.iteritems())]

    if log:
        hist_data = [ np.log(np.array(values[ab]))[~np.isnan(values[ab])] for seq_sep, values in sorted(distances_ab.iteritems())]


    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False)


    for trace in fig['data']:
        trace['line']['width'] = 2
        if log:
            trace['text'] = ['Cb distance: ' + str(x) for x in np.exp(trace['x'])]
        else:
            trace['text'] = ['Cb distance: ' + str(x) for x in trace['x']]
        trace['hoverinfo'] = "text"


    residues =  ab[0] + " and " + ab[2]
    if ab == 'all':
        residues = "residue pair"



    fig['layout']['font'] = dict(size = 16)
    fig['layout']['xaxis']['title'] = distance_definition + " distance between " + residues
    fig['layout']['xaxis']['showspikes'] = True
    fig['layout']['yaxis']['title'] = "Distribution of " + residues + " distances ("+distance_definition+")"
    fig['layout']['yaxis']['showspikes'] = True
    fig['layout']['xaxis']['range'] = [3,100]
    fig['layout']['xaxis']['tickangle'] = 0
    fig['layout']['margin']['t'] = 10


    plot_file = plot_dir + "/" + distance_definition + "_distribution_" + ab + "_data" + str(int(np.mean([len(h) for h in hist_data])))+".html"

    if log:
        fig['layout']['xaxis']['tickmode'] = "array"
        fig['layout']['xaxis']['ticktext'] = [3,4,5,6,8,10,12,15,20,30,40,50,70,80]
        fig['layout']['xaxis']['tickvals'] = np.log(fig['layout']['xaxis']['ticktext'])
        fig['layout']['xaxis']['range'] = np.log([3,100])
        plot_file = plot_file.replace(".html","_log.html")

    plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_learning_rate_schedules(dict_of_schedules, alpha_0, plot_out):

    linetype=['dash', 'dot', 'longdash', 'dashdot']
    color=cl.scales['4']['qual']['Set1']

    data = []
    for id_schedule, name in enumerate(sorted(dict_of_schedules.keys())):
        print id_schedule, name
        orderered_keys = dict_of_schedules[name].pop('order')
        for id_rate, decay_rate in enumerate(orderered_keys):
            print id_rate, decay_rate
            data.append(
                go.Scatter(
                    x = range(1, len(dict_of_schedules[name][decay_rate])+1),
                    y = dict_of_schedules[name][decay_rate],
                    name = name + " (" + str(decay_rate) + ")",
                    legendgroup = name,
                    line=dict(
                        width=4,
                        dash=linetype[id_rate],
                        color=color[id_schedule]
                    )
                )
            )

    layout = go.Layout(
        title="Comparison of learning rate schedules <br> alpha0={0}".format(alpha_0),
        font=dict(size=18),
        yaxis=dict(
            exponentformat="e",
            showexponent='All',
            title="learning rate"
        ),
        xaxis=dict(
            title="iteration"
        )

    )

    plot=go.Figure(data=data, layout=layout)
    plot_file = plot_out + "/learning_rate_schedules_alpha0"+str(alpha_0)+".html"
    plotly_plot(plot, filename=plot_file, auto_open=False)


    plot['layout']['title']=""
    plot['layout']['margin']['t']=10
    plot_file = plot_out + "/learning_rate_schedules_alpha0"+str(alpha_0)+"_notitle.html"
    plotly_plot(plot, filename=plot_file, auto_open=False)
Esempio n. 32
0
 def plot(self):
     return plotly_plot(
         self.figure_or_data,
         show_link=False,
         output_type='div',
         include_plotlyjs=False,
     )
Esempio n. 33
0
 def plot(self):
     return plotly_plot(
         self.figure_or_data,
         show_link=False,
         output_type='div',
         include_plotlyjs=False,
     )
Esempio n. 34
0
def plot_ccmgen_benchmark_figure(fig, title, plot_file, height=350, width=400):

    fig['layout']['font']['size'] = 18
    fig['layout']['hovermode'] = 'closest'
    fig['layout']['title'] = title
    fig['layout']['margin']['b'] = 45
    fig['layout']['margin']['t'] = 50
    fig['layout']['legend'] = {'orientation': "v", 'x': 0.65, 'y': 1.0}
    fig['layout']['yaxis'] = {
        'title': "mean precision over proteins",
        'range': [0, 1]
    }
    fig['layout']['height'] = height
    fig['layout']['width'] = width

    plotly_plot(fig, filename=plot_file, auto_open=False, show_link=False)
Esempio n. 35
0
def plot_pdb_uniprot_fct(data_dict, seq_dict, plot_dir=None):

    data = []

    for name, df in seq_dict.iteritems():
        df['Date'] = pd.to_datetime(df['Date'])
        data_dict[name] = df.drop(
            df.index[df[df['Date'] < '1996-01-01'].index])
        data.append(
            go.Scatter(x=data_dict[name].Date,
                       y=data_dict[name].Total,
                       showlegend=True,
                       name=name,
                       line=dict(width=4, dash='dot')))

    data_dict['PDB-Protein']['Date'] = pd.to_datetime(
        data_dict['PDB-Protein']['Date'], format="%Y")
    data_dict['PDB-Protein'] = data_dict['PDB-Protein'].drop(
        data_dict['PDB-Protein'].index[data_dict['PDB-Protein'][
            data_dict['PDB-Protein']['Date'] < '1996-01-01'].index])
    data_dict['PDB-Protein']['Date'][0] = pd.to_datetime('today')
    data.append(
        go.Scatter(x=data_dict['PDB-Protein'].Date,
                   y=data_dict['PDB-Protein'].Total,
                   showlegend=True,
                   name='PDB-Protein',
                   line=dict(width=4)))

    plot = {
        "data":
        data,
        "layout":
        go.Layout(
            legend=dict(x=.05, y=1.0),
            title=
            "",  # Yearly Growth of Structures in PDB by Experimental Method",
            xaxis=dict(title="Year", range=['01-01-1999', '01-01-2017']),
            yaxis=dict(title="Total number of Entries", type="log"),
            font=dict(size=18))
    }

    if plot_dir is not None:
        plot_file = plot_dir + "/pdb_uniprot.html"
        plotly_plot(plot, filename=plot_file, auto_open=False)
    else:
        return plot
Esempio n. 36
0
def plot_percentage_gaps_per_position(single_freq, plot_file=None):

    L = single_freq.shape[0]

    #compute percentage of gaps
    gaps = single_freq[:, 20] / single_freq.sum(1)

    #normalized entropy
    entropy_per_position = scipy.stats.entropy(single_freq.transpose(),base=2)
    entropy_per_position /= np.max(entropy_per_position)


    #create plot
    data = []
    data.append(
        go.Scatter(
            x=[x for x in range(1,L+1)],
            y=gaps,
            name = "percentage of gaps",
            mode="Lines",
            line=dict(width=3)
        )
    )

    data.append(
        go.Scatter(
            x=[x for x in range(1,L+1)],
            y=entropy_per_position,
            name = "relative Entropy",
            mode="Lines",
            line=dict(width=3)
        )
    )

    layout = {
        'title':"Percentage of gaps and Entropy in alignment",
        'xaxis':{'title':"Alignment Position"},
        'yaxis':{'title':"Percentage of Gaps/Entropy"},
        'font':{'size':18}
    }

    plot = {'data': data, 'layout': layout}
    if plot_file is None:
        return plot
    else:
        plotly_plot(plot, filename=plot_file, auto_open=False)
def plot_log_observed_expected_at_abs(distances_ab, abs, seq_sep,
                                      distance_definition, plot_dir):

    bins = np.arange(2, 50, 0.5)

    data = []

    # expected nr of pairs:
    #   frequency of pairs observed at this distance in PDB
    p_r = []
    for i in range(len(bins)):
        p_r.append(
            len(distances_ab[seq_sep]['all'][np.digitize(
                distances_ab[seq_sep]['all'], bins) == i]))
    p_r = np.array(p_r) / float(np.sum(p_r))

    for ab in abs:

        p_r_ab = []
        for i in range(len(bins)):
            #print np.array(distances_ab[seq_sep])[np.digitize(distances_ab[seq_sep],bins)==i]
            p_r_ab.append(
                len(
                    np.array(distances_ab[seq_sep][ab])[np.digitize(
                        distances_ab[seq_sep][ab], bins) == i]))
        p_r_ab = np.array(p_r_ab) / float(np.sum(p_r_ab))

        log_ratio = np.log(p_r_ab / p_r)

        data.append(
            go.Scatter(x=bins,
                       y=log_ratio,
                       mode='lines',
                       name=ab + " (" + str(len(distances_ab[seq_sep][ab])) +
                       ")"))

    layout = go.Layout(title="",
                       xaxis=dict(title="distance bins"),
                       yaxis=dict(title="log ratio observed vs expected"))

    fig = go.Figure(data=data, layout=layout)

    plot_file = plot_dir + "/" + distance_definition + "_logratio_seqsep" + str(
        seq_sep) + ".html"
    plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_scatter(statistics_dict, key, plot_out):
    methods = sorted(statistics_dict.keys())

    data = []

    for method in methods:
        values = statistics_dict[method][key]
        proteins = statistics_dict[method]['protein']
        target_neff = statistics_dict[method]['target neff']
        sample_neff = statistics_dict[method]['sample neff']

        data.append(
            go.Scatter(
                x=target_neff,
                y=values,
                name=method,
                mode="markers",
                text=[
                    proteins[i] + "<br>target neff: " + str(target_neff[i]) + "<br>sample neff: " + str(sample_neff[i])
                    for i in range(len(values))],
            )
        )



    plot = {
        "data": data,
        "layout": go.Layout(
            yaxis=dict(
                exponentformat='e',
                showexponent='All'
            ),
            xaxis=dict(title="Target Neff"),
            font=dict(size=18)
        )
    }

    if key == "neff_difference":
        plot['layout']['title'] = "Difference in target and sampled Neff"
        plot['layout']['yaxis']['title'] = "target - sampled neff"
    if key == "mutation_rate":
        plot['layout']['title'] = "Mutation rate used for Sampling"
        plot['layout']['yaxis']['title'] = "mutation rate"

    plotly_plot(plot, filename=plot_out, auto_open=False)
def plot_percentage_gaps_per_position(alignment, plot_file=None):

    N = float(len(alignment))
    L = len(alignment[0])

    gaps = ali_ut.compute_gaps_per_position(alignment)
    entropy_per_position = ali_ut.compute_entropy_per_position(alignment)

    #create plot
    data = []
    data.append(
        go.Scatter(x=[x for x in range(1, L + 1)],
                   y=gaps,
                   name="percentage of gaps",
                   mode="Lines",
                   line=dict(width=3)))

    data.append(
        go.Scatter(x=[x for x in range(1, L + 1)],
                   y=entropy_per_position,
                   name="relative Entropy",
                   mode="Lines",
                   line=dict(width=3)))

    layout = {
        'title':
        "Percentage of gaps and Entropy in alignment <br> N=" + str(N) +
        ", L=" + str(L),
        'xaxis': {
            'title': "Alignment Position"
        },
        'yaxis': {
            'title': "Percentage of Gaps/Entropy"
        },
        'font': {
            'size': 18
        }
    }

    plot = {'data': data, 'layout': layout}
    if plot_file is None:
        return plot
    else:
        plotly_plot(plot, filename=plot_file, auto_open=False)
def plot_coupling_vs_distance_distribution(couplings_per_bin, plot_dir, ab, abs=False):

    methods = couplings_per_bin.keys()

    data = []
    for method in methods:

        x=[]
        y=[]
        for bin in sorted(couplings_per_bin[method].keys()):
            x.extend([bin] * len(couplings_per_bin[method][bin]))

            if abs:
                y.extend(np.abs(couplings_per_bin[method][bin]))
            else:
                y.extend(couplings_per_bin[method][bin])

        data.append(
            go.Box(
                y=y,
                x=x,
                name=method
            )
        )

    layout = go.Layout(
        title='Distribution of couplings for wij('+ab+") <br> ~" + str(len(data[0]['x']) / len(couplings_per_bin[methods[0]].keys())) +" couplings per bin " ,
        yaxis=dict(
            zeroline=False
        ),
        xaxis=dict(
            title="Cbeta distance bins",
            tickvals=sorted(couplings_per_bin[methods[0]].keys())
        ),
        font = dict(size = 18),
        boxmode='group'
    )

    fig = go.Figure(data=data, layout=layout)

    plot_name = plot_dir + "/coupling_distribution_"+ ab
    if abs:
        plot_name = plot_name + "_abs"
    plotly_plot(fig, filename=plot_name+".html", auto_open=False)
def plot_boxplot_correlation_alignment_statistics_pll_vs_pcd(data_dict, plot_dir):

    data = []

    data.append(
        go.Box(
            y=data_dict['pseudo-likelihood']['x'],
            x=data_dict['pseudo-likelihood']['y'],
            boxpoints='outliers',
            name="pseudo-likelihood",
            hoverinfo='all',
            orientation="v",
            showlegend=True
        )
    )

    data.append(
        go.Box(
            y=data_dict['contrastive divergence']['x'],
            x=data_dict['contrastive divergence']['y'],
            boxpoints='outliers',
            name="persistent contrastive divergence",
            hoverinfo='all',
            orientation="v",
            showlegend=True
        )
    )

    layout=go.Layout(
        #title="Pearson Correlation Coefficients<br>between Original and Sampled Alignment Statistics",
        title="",
        margin=dict(t=10),
        legend=dict(orientation="h",
                    xanchor="center", x=0.5, y=1.2),
        yaxis=dict(title="Pearson's r", range=[0,1]),
        font=dict(size=18),
        boxmode='group'
    )

    fig = go.Figure(data=data, layout=layout)

    plot_out = plot_dir+"/boxplot_pearson_correlation_coeff_empirical_vs_model_statistics.html"
    plotly_plot(fig, filename=plot_out, auto_open=False, show_link=False)
def plot_meta_property_vs_method(method_numit, axis_title, sorted_methods, plot_dir):

    plot_name = plot_dir+"/distribution_"+ axis_title.replace(" ", "_") + "_against_methods.html"
    # plot.plot_boxplot(method_numit, "", axis_title, colors=None, jitter_pos=1.5, orient='v',
    #                   print_total=True, order=sorted_methods, boxmean=False, plot_out=plot_name)


    data = []
    for method in method_numit:
        values = method_numit[method]

        box = go.Box(
            y=values,
            boxmean=True,
            boxpoints='Outliers',
            name=method,
            marker=dict(opacity=1),
            hoverinfo='all',
            orientation='v',
            showlegend=False
        )

        data.append(box)


    plot = {
        "data": data,
        "layout": go.Layout(
            yaxis=dict(
                title=axis_title,
                type='log',
                #autorange=True,
                exponentformat='none',
                showexponent='none',
                tickmode="array",
                tickvals=[1, 10, 100, 500],
                ticktext=[1, 10, 100, 500]
            ),
            font=dict(size=18)
        )
    }

    plotly_plot(plot, filename=plot_name, auto_open=False, show_link=False)
Esempio n. 43
0
def plot_density(protein, bqij_data, plot_dir):

    group_labels = [key for key in sorted(bqij_data.keys()) if key != "L"]
    L = bqij_data['L']

    hist_data = []
    data = []
    for group in group_labels:
        bqij_file = bqij_data[group]
        Nij, qij = io.read_qij(bqij_file, bqij_data['L'])

        data_group = qij[np.triu_indices(n=L, k=1)].flatten()

        hist_data.append(data_group)

        data.append(
            go.Histogram(x=data_group,
                         histnorm='probability',
                         name=group,
                         xbins=dict(start=-0.1, end=1, size=0.005),
                         opacity=0.75))

    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data,
                             group_labels,
                             show_hist=False,
                             show_rug=False)
    fig['layout']['font'] = dict(size=18)
    fig['layout']['xaxis']['title'] = "q_ijab"
    plot_file = plot_dir + "/" + protein + "_distribution_qijab" + ".html"
    plotly_plot(fig, filename=plot_file, auto_open=False)

    #create histogram
    plot_file = plot_dir + "/" + protein + "_histogram_qijab" + ".html"
    layout = go.Layout(barmode='overlay',
                       xaxis=dict(title="q_ijab",
                                  exponentformat="e",
                                  showexponent='All'),
                       yaxis=dict(exponentformat="e", showexponent='All'),
                       font=dict(size=18))
    fig = go.Figure(data=data, layout=layout)
    plotly_plot(fig, filename=plot_file, auto_open=False)
Esempio n. 44
0
def combine_plots(plot_pdb, plot_pdb_uniprot, plot_dir):

    #combine plots in a panel
    fig = tools.make_subplots(rows=1, cols=2, print_grid=False)
    for trace in plot_pdb['data']:
        fig.append_trace(trace, 1, 1)

    for trace in plot_pdb_uniprot['data']:
        fig.append_trace(trace, 1, 2)

    fig['layout']['xaxis1'].update(plot_pdb['layout']['xaxis'])
    fig['layout']['yaxis1'].update(plot_pdb['layout']['yaxis'])
    fig['layout']['xaxis2'].update(plot_pdb_uniprot['layout']['xaxis'])
    fig['layout']['yaxis2'].update(plot_pdb_uniprot['layout']['yaxis'])

    fig['layout']['font']['size'] = 18
    fig['layout']['margin']['t'] = 10

    plotname = plot_file = plot_dir + "/pdb_uniprot_stats.html"
    plotly_plot(fig, filename=plotname, auto_open=False)
Esempio n. 45
0
def plot_learning_rate_schedules(dict_of_schedules, alpha_0, plot_out):

    linetype = ['dash', 'dot', 'longdash', 'dashdot']
    color = cl.scales['4']['qual']['Set1']

    data = []
    for id_schedule, name in enumerate(sorted(dict_of_schedules.keys())):
        print id_schedule, name
        orderered_keys = dict_of_schedules[name].pop('order')
        for id_rate, decay_rate in enumerate(orderered_keys):
            print id_rate, decay_rate
            data.append(
                go.Scatter(x=range(
                    1,
                    len(dict_of_schedules[name][decay_rate]) + 1),
                           y=dict_of_schedules[name][decay_rate],
                           name=name + " (" + str(decay_rate) + ")",
                           legendgroup=name,
                           line=dict(width=4,
                                     dash=linetype[id_rate],
                                     color=color[id_schedule])))

    layout = go.Layout(
        title="Comparison of learning rate schedules <br> alpha0={0}".format(
            alpha_0),
        font=dict(size=18),
        yaxis=dict(exponentformat="e",
                   showexponent='All',
                   title="learning rate"),
        xaxis=dict(title="iteration"))

    plot = go.Figure(data=data, layout=layout)
    plot_file = plot_out + "/learning_rate_schedules_alpha0" + str(
        alpha_0) + ".html"
    plotly_plot(plot, filename=plot_file, auto_open=False)

    plot['layout']['title'] = ""
    plot['layout']['margin']['t'] = 10
    plot_file = plot_out + "/learning_rate_schedules_alpha0" + str(
        alpha_0) + "_notitle.html"
    plotly_plot(plot, filename=plot_file, auto_open=False)
def plot_meta_property_vs_method(method_numit, axis_title, sorted_methods,
                                 plot_dir):

    plot_name = plot_dir + "/distribution_" + axis_title.replace(
        " ", "_") + "_against_methods.html"
    # plot.plot_boxplot(method_numit, "", axis_title, colors=None, jitter_pos=1.5, orient='v',
    #                   print_total=True, order=sorted_methods, boxmean=False, plot_out=plot_name)

    data = []
    for method in method_numit:
        values = method_numit[method]

        box = go.Box(y=values,
                     boxmean=True,
                     boxpoints='Outliers',
                     name=method,
                     marker=dict(opacity=1),
                     hoverinfo='all',
                     orientation='v',
                     showlegend=False)

        data.append(box)

    plot = {
        "data":
        data,
        "layout":
        go.Layout(
            yaxis=dict(
                title=axis_title,
                type='log',
                #autorange=True,
                exponentformat='none',
                showexponent='none',
                tickmode="array",
                tickvals=[1, 10, 100, 500],
                ticktext=[1, 10, 100, 500]),
            font=dict(size=18))
    }

    plotly_plot(plot, filename=plot_name, auto_open=False, show_link=False)
def combine_plots(plot_pdb, plot_pdb_uniprot, plot_dir):

    #combine plots in a panel
    fig = tools.make_subplots(rows=1, cols=2, print_grid=False)
    for trace in plot_pdb['data']:
        fig.append_trace(trace, 1, 1)

    for trace in plot_pdb_uniprot['data']:
        fig.append_trace(trace, 1, 2)

    fig['layout']['xaxis1'].update(plot_pdb['layout']['xaxis'])
    fig['layout']['yaxis1'].update(plot_pdb['layout']['yaxis'])
    fig['layout']['xaxis2'].update(plot_pdb_uniprot['layout']['xaxis'])
    fig['layout']['yaxis2'].update(plot_pdb_uniprot['layout']['yaxis'])


    fig['layout']['font']['size']=18
    fig['layout']['margin']['t'] = 10

    plotname = plot_file = plot_dir + "/pdb_uniprot_stats.html"
    plotly_plot(fig, filename=plotname, auto_open=False)
def plot_amino_acid_distribution_per_position(aa_counts_single, title, plot_file=None, freq=True):

    Neff = np.sum(aa_counts_single[0,:])
    L = aa_counts_single.shape[0]

    #create plot
    data = []

    if freq:
        aa_counts_single /= Neff

    #add bar for each amino acid for each position
    for aa in range(20):
        data.append(
            go.Bar(
                x= list(range(1,L+1)),
                y=aa_counts_single[:, aa].tolist(),
                showlegend=True,
                name=io.AMINO_ACIDS[aa]
              )
        )


    layout = go.Layout(
        barmode='stack',
        title=title,
        xaxis=dict(title="Alignment Position"),
        yaxis=dict(
            title="Amino Acid Distribution",
            exponentformat='e',
            showexponent='All'),
        font=dict(size=18)
    )

    plot = {'data': data, 'layout': layout}

    if plot_file is None:
        return plot
    else:
        plotly_plot(plot, filename=plot_file, auto_open=False)
def plot_1d_coupling_profile(couplings_per_pair, lower_cb_distance, upper_cb_distance, plot_file ):



    group_labels    = [key + "("+str(len(couplings_per_pair[key]))+")" for key in couplings_per_pair.keys()]
    hist_data       = couplings_per_pair.values()

    # Create distplot with custom bin_size
    fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False)


    for trace in fig['data']:
        trace['line']['width'] = 2


    fig['layout']['font'] = dict(size = 16)
    fig['layout']['xaxis']['title'] = "couplings w_ijab for residue pairs ij at {0}Å < ΔCβ  < {1}Å".format(lower_cb_distance, upper_cb_distance)
    fig['layout']['xaxis']['range'] = [-1,1]
    fig['layout']['yaxis']['title'] = "Distribution of couplings "
    fig['layout']['margin']['t'] = 10


    plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_boxplot_all_stats(stats_df, plot_out=None):

    proteins=stats_df['protein']
    stats_names = stats_df.keys().tolist()
    stats_names.remove("protein")

    ## define subplots
    fig = tools.make_subplots(rows=1, cols=len(stats_names))

    ## add traces as subplots
    for nr, key in enumerate(stats_names):
        trace = go.Box(
            y=stats_df[key],
            #boxmean='sd',
            name=key,
            hoverinfo='all',
            orientation="v",
            showlegend=False,
            boxpoints="all",
            jitter=0.5,
            pointpos=2,
            text=proteins
        )
        fig.append_trace(trace, 1, nr+1)

    fig['layout'].update(
        font = dict(size=18),
        hovermode = 'closest',
        title = "Dataset Statistics",
        width=300 * len(stats_names),
        height=500
    )

    if plot_out is not None:
        plotly_plot(fig, filename=plot_out, auto_open=False, link_text='')
    else:
        return fig
def with_jax(fig, filename):

    plot_div = plotly_plot(fig, output_type = 'div')

    template = """
    <head>
    <script type="text/javascript" async
      src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG">
    </script>
    </head>
    <body>
    {plot_div:s}
    </body>""".format(plot_div = plot_div)
    with open(filename, 'w') as fp:
        fp.write(template)
def plot_empirical_vs_model_statistics(
        single_freq_observed, single_freq_sampled,
        pairwise_freq_observed, pairwise_freq_sampled,
        title, plot_out=None, log=False, width=1500):

    L = single_freq_observed.shape[0]
    indices_upper_triangle = np.triu_indices(L, k=1)

    ## compute data
    if log:
        x_single = np.log(single_freq_observed.flatten()).tolist()
        y_single = np.log(single_freq_sampled.flatten()).tolist()
        pair_freq_observed = pairwise_freq_observed[
                             indices_upper_triangle[0],
                             indices_upper_triangle[1], :, :].flatten().tolist()
        pair_freq_sampled = pairwise_freq_sampled[
                                   indices_upper_triangle[0],
                                   indices_upper_triangle[1], :, :].flatten().tolist()
        cov_observed = [pairwise_freq_observed[i, j, a, b] - (single_freq_observed[i, a] * single_freq_observed[j, b])
                        for i in range(L - 1) for j in range(i + 1, L) for a in range(20) for b in range(20)]
        cov_sampled = [pairwise_freq_sampled[i, j, a, b] - (single_freq_sampled[i, a] * single_freq_sampled[j, b])
                       for i in range(L - 1) for j in range(i + 1, L) for a in range(20) for b in range(20)]
        pair_freq_observed = np.log(pair_freq_observed)
        pair_freq_sampled = np.log(pair_freq_sampled)

    else:
        x_single = single_freq_observed.flatten().tolist()
        y_single = single_freq_sampled.flatten().tolist()
        pair_freq_observed = pairwise_freq_observed[
                             indices_upper_triangle[0],
                             indices_upper_triangle[1], :, :].flatten().tolist()
        pair_freq_sampled = pairwise_freq_sampled[
                            indices_upper_triangle[0],
                            indices_upper_triangle[1], :, :].flatten().tolist()
        cov_observed = [pairwise_freq_observed[i,j,a,b] - (single_freq_observed[i,a] * single_freq_observed[j,b])
                        for i in range(L-1) for j in range(i+1, L) for a in range(20) for b in range(20)]
        cov_sampled  = [pairwise_freq_sampled[i,j,a,b] - (single_freq_sampled[i,a] * single_freq_sampled[j,b])
                        for i in range(L-1) for j in range(i+1, L) for a in range(20) for b in range(20)]


    ## first trace: single amino acid frequencies
    trace_single_frequencies = go.Scattergl(
        x=x_single,
        y=y_single,
        mode='markers',
        name='single frequencies',
        text=["position: {0}<br>amino acid: {1}".format(i+1,io.AMINO_ACIDS[a]) for i in range(L) for a in range(20)],
        marker=dict(color='black'),
        opacity=0.1,
        showlegend=False
    )
    pearson_corr_single = np.corrcoef(x_single, y_single)[0,1]


    ## second trace: pairwise amino acid frequencies
    parir_freq_annotation = ["position: {0}-{1}<br>amino acid: {2}-{3}".format(i+1,j+1, io.AMINO_ACIDS[a], io.AMINO_ACIDS[b]) for i in range(L-1) for j in range(i+1, L) for a in range(20) for b in range(20)]
    trace_pairwise_frequencies = go.Scattergl(
        x=pair_freq_observed,
        y=pair_freq_sampled,
        mode='markers',
        name='pairwise frequencies',
        text=parir_freq_annotation,
        marker=dict(color='black'),
        opacity=0.1,
        showlegend=False
    )
    pearson_corr_pair = np.corrcoef(pair_freq_observed, pair_freq_sampled)[0, 1]

    ## third trace: covariances
    trace_cov = go.Scattergl(
        x=cov_observed,
        y=cov_sampled,
        mode='markers',
        name='covariances',
        text=parir_freq_annotation,
        marker=dict(color='black'),
        opacity=0.1,
        showlegend=False
    )
    pearson_corr_cov = np.corrcoef(cov_observed, cov_sampled)[0, 1]


    #define diagonals
    diag_single = [np.min(x_single  + y_single), np.max(x_single  + y_single)]
    diag_pair = [np.min(pair_freq_observed + pair_freq_sampled), np.max(pair_freq_observed  + pair_freq_sampled)]
    diag_cov = [np.min(cov_observed + cov_sampled), np.max(cov_observed+ cov_sampled)]

    diagonal_single = go.Scattergl(
        x=diag_single,
        y=diag_single,
        mode="lines",
        showlegend=False,
        marker=dict(color='rgb(153, 204, 255)')
    )

    diagonal_pair = go.Scattergl(
        x=diag_pair,
        y=diag_pair,
        mode="lines",
        showlegend=False,
        marker=dict(color='rgb(153, 204, 255)')
    )

    diagonal_cov = go.Scattergl(
        x=diag_cov,
        y=diag_cov,
        mode="lines",
        showlegend=False,
        marker=dict(color='rgb(153, 204, 255)')
    )



    ## define subplots
    fig = tools.make_subplots(
        rows=1,
        cols=3,
        subplot_titles=["single site amino acid frequencies", "pairwise amino acid frequencies", "covariances"],
        horizontal_spacing = 0.05
    )

    ## add traces as subplots
    fig.append_trace(trace_single_frequencies, 1, 1)
    fig.append_trace(diagonal_single, 1, 1)
    fig.append_trace(trace_pairwise_frequencies, 1, 2)
    fig.append_trace(diagonal_pair, 1, 2)
    fig.append_trace(trace_cov, 1, 3)
    fig.append_trace(diagonal_cov, 1, 3)

    #incresae size of subplot titles
    fig['layout']['annotations'][0]['font']['size'] = 20
    fig['layout']['annotations'][1]['font']['size'] = 20
    fig['layout']['annotations'][2]['font']['size'] = 20

    # # add text to plot: Pearson correlation coefficient
    fig['layout']['annotations'].extend(
        [
            dict(
                x=0.13,#0.02,
                y=0.04,#0.95,
                xanchor="left",
                xref='paper',
                yref='paper',
                text='Pearson r = ' + str(np.round(pearson_corr_single, decimals=3)),
                bgcolor = "white",
                showarrow=False
            ),
            dict(
                x=0.48,#0.37,
                y=0.04,#0.95,
                xanchor="left",
                xref='paper',
                yref='paper',
                text='Pearson r = ' + str(np.round(pearson_corr_pair, decimals=3)),
                bgcolor="white",
                showarrow=False
            ),
            dict(
                x=0.85,#0.71,
                y=0.04,#0.95,
                xanchor="left",
                xref='paper',
                yref='paper',
                text='Pearson r = ' + str(np.round(pearson_corr_cov, decimals=3)),
                bgcolor="white",
                showarrow=False
            )
        ]
    )



    #define layout
    fig['layout'].update(
        font = dict(size=20),
        hovermode = 'closest',
        width=width
    )


    if title == "":
        fig['layout']['margin']['t']= 40
        fig['layout']['height'] = width/3
    else:
        fig['layout']['margin']['t'] = 120
        fig['layout']['title'] = title
        fig['layout']['titlefont']['size'] =12
        fig['layout']['height'] = width/3+100



    #specify axis layout details
    fig['layout']['yaxis1'].update(
            title="statistics from MCMC sample",
            exponentformat="e",
            showexponent='All',
            scaleanchor="x1",
            scaleratio=1
    )
    fig['layout']['yaxis2'].update(
            exponentformat="e",
            showexponent='All',
            scaleanchor="x2",
            scaleratio=1
    )
    fig['layout']['yaxis3'].update(
            exponentformat="e",
            showexponent='All',
            scaleanchor="x3",
            scaleratio=1
    )
    fig['layout']['xaxis1'].update(
            exponentformat="e",
            showexponent='All',
            scaleanchor="y1",
            scaleratio=1,
            showspikes=True
    )
    fig['layout']['xaxis2'].update(
            title="statistics from natural sequences",
            exponentformat="e",
            showexponent='All',
            scaleanchor="y2",
            scaleratio=1
    )
    fig['layout']['xaxis3'].update(
            exponentformat="e",
            showexponent='All',
            scaleanchor="y3",
            scaleratio=1
    )


    if log:
        fig['layout']['xaxis1']['zeroline'] = False
        fig['layout']['yaxis1']['zeroline'] = False
        fig['layout']['xaxis2']['zeroline'] = False
        fig['layout']['yaxis2']['zeroline'] = False

        fig['layout']['xaxis1']['range'] = np.log([5e-5, 2])
        fig['layout']['yaxis1']['range'] = np.log([5e-5, 2])
        fig['layout']['xaxis2']['range'] = np.log([5e-5, 2])
        fig['layout']['yaxis2']['range'] = np.log([5e-5, 2])

        fig['layout']['xaxis1']['ticktext'] = ["{:.0e}".format(i) for i in [1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]]
        fig['layout']['xaxis1']['tickvals'] = np.log([1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10])
        fig['layout']['yaxis1']['ticktext'] = ["{:.0e}".format(i) for i in [1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]]
        fig['layout']['yaxis1']['tickvals'] = np.log([1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10])

        fig['layout']['xaxis2']['ticktext'] = ["{:.0e}".format(i) for i in [1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]]
        fig['layout']['xaxis2']['tickvals'] = np.log([1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10])
        fig['layout']['yaxis2']['ticktext'] = ["{:.0e}".format(i) for i in [1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]]
        fig['layout']['yaxis2']['tickvals'] = np.log([1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10])
    else:
        fig['layout']['xaxis1']['range'] = [0,1]
        fig['layout']['xaxis2']['range'] = [0,1]
        fig['layout']['yaxis1']['range'] = [0,1]
        fig['layout']['yaxis2']['range'] = [0,1]


    if plot_out is not None:
        plotly_plot(fig, filename=plot_out, auto_open=False, link_text='')
    else:
        return fig
def plot_ccmgen_noise_quant_figure(subplots, plot_dir, height=500, width=500):


    precision_noapc_star = []
    precision_ec_star = []
    # precision_apc_star = []
    x = []
    for trace in subplots['star topology']['data']:
        if 'noapc' in trace['name']:
            precision_noapc_star = trace['y']
        if 'ec' in trace['name']:
            precision_ec_star = trace['y']
        x = trace['x']

    entropy_noise_star = precision_ec_star - precision_noapc_star
    entropy_noise_star_trace =  go.Scatter(
        x = x,
        y = entropy_noise_star,
        name="entropy noise star",
        line=dict(width=4)
    )



    precision_noapc_binary = []
    precision_ec_binary = []
    # precision_apc_binary = []
    for trace in subplots['binary topology']['data']:
        if 'noapc' in trace['name']:
            precision_noapc_binary = trace['y']
        if 'ec' in trace['name']:
            precision_ec_binary = trace['y']


    entropy_noise_binary = precision_ec_binary - precision_noapc_binary
    entropy_noise_binary_trace = go.Scatter(
        x = x,
        y = entropy_noise_binary,
        name="entropy noise binary",
        line=dict(width=4)
    )


    phylogenetic_noise = precision_ec_star - precision_ec_binary
    phylogenetic_noise_trace = go.Scatter(
        x = x,
        y = phylogenetic_noise,
        name="phylogenetic noise",
        line=dict(width=4)
    )


    data = [
        entropy_noise_binary_trace,
        entropy_noise_star_trace,
        phylogenetic_noise_trace
    ]

    fig = go.Figure(
        data=data,
        layout=go.Layout(
            title="quantification of noise",
            font=dict(size=18),
            margin=dict(b=45, t=50),
            xaxis=dict(
                title="#predicted contacts / protein length",
                showspikes=True
            ),
            yaxis=dict(
                title="fraction of noise",
                range=[0,0.8],
                showspikes=True
            ),
            legend=dict(
            orientation="v",
            x=0.15, y=1.0
            ),
            width=width,
            height=height
        )
    )


    plot_file = plot_dir+"/"+"ccmgen_noise_quant_figure.html"
    plotly_plot(fig, filename=plot_file, auto_open=False, show_link=False)
    return plot_file
def plot_pll_vs_pcd_benchmark_figure(subplots, plot_dir, height=500, width=500):

    data = []

    #add PCD traces
    trace_for_lin = copy.copy(subplots['persistent contrastive divergence']['data'][0])
    data.append(trace_for_lin)
    data[-1]['legendgroup'] = 'method'
    data[-1]['name'] = 'PCD'
    data[-1]['line']['color'] = 'black'
    #data[-1]['showlegend'] = True
    #data[-1]['visible'] = True #'legendonly'

    for trace in subplots['persistent contrastive divergence']['data']:
        trace['name'] = trace['name'].split("-")[-1].split("(")[0]
        #trace['showlegend'] = True
        trace['legendgroup']='correction'
        data.append(trace)




    #add pLL traces
    trace_for_lin = copy.copy(subplots['pseudo-likelihood maximization']['data'][0])
    data.append(trace_for_lin)
    data[-1]['legendgroup'] = 'method'
    data[-1]['name'] = 'pLL'
    data[-1]['line']['color'] = 'black'
    data[-1]['line']['dash'] = 'dot'
    data[-1]['showlegend'] = True
    #data[-1]['visible'] = True #'legendonly'

    for trace in subplots['pseudo-likelihood maximization']['data']:
        trace['name'] = trace['name'].split("-")[-1].split("(")[0]
        trace['legendgroup'] = 'correction'
        trace['showlegend'] = False
        trace['line']['dash'] = 'dot'
        data.append(trace)



    layout=go.Layout(
        font = dict(size=18),
        hovermode = 'closest',
        title = "",
        margin=dict(t=10),
        legend=dict(
            orientation="v",
            x=1.01, y=1.0
        ),
        yaxis=dict(
            title="Mean Precision over Proteins",
            range=[0,1]
        ),
        xaxis=dict(
            title="#predicted contacts / protein length"
        ),
        height=height,
        width=width
    )

    fig = go.Figure(data=data, layout=layout)

    plot_file = plot_dir+"/"+"ccmgen_benchmark_figure_pll_vs_pcd.html"
    plotly_plot(fig, filename=plot_file, auto_open=False, show_link=False)
    return plot_file
def plot_ccmgen_benchmark_figure(subplots, plot_dir, height=500, width=1500):


    #titles=['star topology', 'binary topology', 'MCMC sample', 'natural sequences']
    titles=['star topology', 'binary topology']


    ## define subplot grid
    fig = tools.make_subplots(
        rows=1,
        cols=len(titles),
        subplot_titles=titles,
        horizontal_spacing = 0.05,
        print_grid=False
    )

    col=1
    ## add traces as subplots
    if "star topology" in titles:
        for trace in subplots['star topology']['data']:
            trace['name'] = trace['name'].split("-")[-1].split("(")[0]
            trace['showlegend'] = True
            trace['legendgroup']= 'correction'
            trace['text'] = ["star topology ({0}) <br>x: {1} <br>y: {2}".format(
                trace['name'], trace['x'][i], np.round(trace['y'][i], decimals=3))
                for i in range(len(trace['x']))]
            trace['hoverinfo'] = 'text'
            fig.append_trace(trace, 1, col)
        col += 1

    if "binary topology" in titles:
        for trace in subplots['binary topology']['data']:
            trace['name'] = trace['name'].split("-")[-1].split("(")[0]
            trace['showlegend'] = False
            trace['legendgroup']= 'correction'
            trace['text'] = ["binary topology ({0}) <br>x: {1} <br>y: {2}".format(
                trace['name'], trace['x'][i], np.round(trace['y'][i], decimals=3))
                for i in range(len(trace['x']))]
            trace['hoverinfo'] = 'text'
            fig.append_trace(trace, 1, col)
        col += 1

    # if "MCMC sample" in titles:
    #     for trace in subplots['MCMC sample']['data']:
    #         trace['name'] = trace['name'].split("-")[-1].split("(")[0]
    #         trace['showlegend'] = False
    #         trace['legendgroup']= 'correction'
    #         trace['text'] = ["MCMC sample ({0}) <br>x: {1} <br>y: {2}".format(
    #             trace['name'], trace['x'][i], np.round(trace['y'][i], decimals=3))
    #             for i in range(len(trace['x']))]
    #         trace['hoverinfo'] = 'text'
    #         fig.append_trace(trace, 1, col)
    #     col += 1
    #
    # if 'natural sequences - PCD' in subplots.keys():
    #     for trace in subplots['natural sequences - PCD']['data']:
    #         trace['name'] = trace['name'].split("-")[-1].split("(")[0]
    #         trace['showlegend'] = False
    #         trace['legendgroup']='correction'
    #         trace['text'] = ["natural sequences - PCD ({0}) <br>x: {1} <br>y: {2}".format(
    #             trace['name'], trace['x'][i], np.round(trace['y'][i], decimals=3))
    #             for i in range(len(trace['x']))]
    #         trace['hoverinfo'] = 'text'
    #         fig.append_trace(trace, 1, col)
    #
    #
    #     fig.append_trace(fig['data'][-1], 1, col)
    #     fig['data'][-1]['legendgroup'] = 'method'
    #     fig['data'][-1]['name'] = 'PCD'
    #     fig['data'][-1]['line']['color'] = 'black'
    #     fig['data'][-1]['showlegend'] = True
    #     fig['data'][-1]['visible'] = 'legendonly'
    #
    # if 'natural sequences - PLL' in subplots.keys():
    #     for trace in subplots['natural sequences - PLL']['data']:
    #         trace['name'] = trace['name'].split("-")[-1].split("(")[0]
    #         trace['legendgroup'] = 'correction'
    #         trace['showlegend'] = False
    #         trace['line']['dash'] = 'dot'
    #         trace['text'] = ["natural sequences - PLL ({0}) <br>x: {1} <br>y: {2}".format(
    #             trace['name'], trace['x'][i], np.round(trace['y'][i], decimals=3))
    #             for i in range(len(trace['x']))]
    #         trace['hoverinfo'] = 'text'
    #         fig.append_trace(trace, 1, col)
    #
    #     fig.append_trace(fig['data'][-1], 1, col)
    #     fig['data'][-1]['legendgroup'] = 'method'
    #     fig['data'][-1]['name'] = 'pLL'
    #     fig['data'][-1]['line']['color'] = 'black'
    #     fig['data'][-1]['showlegend'] = True
    #     fig['data'][-1]['visible'] = 'legendonly'



    #increase subplot title font size

    for subtitle in fig['layout']['annotations']:
        subtitle['font']['size'] = 22
        subtitle['y'] = 1.03

    #add centered x-axis title
    fig['layout']['annotations'].append(
        go.Annotation(
            text="#predicted contacts / protein length",
            x=0.5, y=-0.15,
            xref = 'paper',
            yref = 'paper',
            showarrow =  False,
            font = dict(size = 22)
        )
    )

    #define layout
    fig['layout'].update(
        font = dict(size=18),
        hovermode = 'closest',
        title = "",
        margin=dict(t=40),
        legend=dict(
            orientation="v",
            x=1.0, y=1.0
        ),
        yaxis1=dict(
            title="Mean Precision over Proteins"
        ),
        height=height,
        width=width
    )

    for i in range(1,col+1):
        fig['layout']['yaxis'+str(i)].update(
            range=[0,1],
            zeroline=False,
            tickvals=[0.1, 0.3, 0.5, 0.7, 0.9],
            showspikes=True
        )
        fig['layout']['xaxis'+str(i)].update(
            range=[0,1],
            zeroline=False,
            tickvals=[0.1, 0.3, 0.5, 0.7, 0.9],
            showspikes=True
        )

    plot_file = plot_dir+"/"+"ccmgen_benchmark_figure.html"
    plotly_plot(fig, filename=plot_file, auto_open=False, link_text='')
    return plot_file