예제 #1
0
def plot_hypothesis(hypothesis, file_name):
    bin_types = list(hypothesis)
    scores = list(hypothesis[bin_types[0]])
    plots = []
    for bin_type, score in product(bin_types, scores):
        mean_name = "Mean: " + score
        df = pd.DataFrame(columns=["Bin", "Dataset", mean_name])
        df2 = pd.DataFrame(columns=["Bin", "t-statistic", 'p-value'])
        for bin_ in hypothesis[bin_type][score]:
            h = list(bin_.values())[0]
            bin_name = list(bin_)[0]
            parameter1 = h.p1
            parameter2 = h.p2
            mean1 = h.mean1
            mean2 = h.mean2
            row1 = {
                "Bin": bin_name,
                'Dataset': parameter1,
                mean_name: str(round(float(mean1), 3))
            }
            row2 = {
                "Bin": bin_name,
                'Dataset': parameter2,
                mean_name: str(round(float(mean2), 3))
            }
            df = df.append(row1, ignore_index=True)
            df = df.append(row2, ignore_index=True)
            t_statistic = h.t
            p_value = h.p
            row = {
                "Bin":
                bin_name,
                't-statistic':
                str(round(t_statistic, 3)),
                'p-value':
                str(p_value),
                '95% Confidence':
                "Significant" if p_value <= 0.05 else "Not Significant"
            }
            df2 = df2.append(row, ignore_index=True)
        plots.append(
            (ggplot(df, aes(x='Bin', y=mean_name, fill='Dataset')) +
             geom_col(stat='identity', position='dodge') +
             ggtitle("{0} bin distribution| {1}\nBin's Average Scores".format(
                 bin_type, score))))
        plots.append(
            (ggplot(df2, aes(x='Bin', y='p-value', fill='95% Confidence')) +
             geom_col(stat='identity', width=0.2) + ggtitle(
                 "{0} bin distribution| {1}\nBin's 95% Confidence Level Test".
                 format(bin_type, score)) +
             scale_fill_manual(values={
                 'Significant': "#214517",
                 'Not Significant': '#c62f2d'
             })))
    save_as_pdf_pages(plots, file_name)

    return
예제 #2
0
    def plot_range_comparison(self,
                              xlabel: str = '',
                              figsize: Tuple[int] = (7, 3),
                              add_text_label: bool = True,
                              **kwargs):
        df = self.get_ranges_df(**kwargs)
        fig = (p9.ggplot(df) +
               p9.aes('cat_value', 'counts', fill='direction') +
               p9.geom_col(alpha=.8) +
               p9.theme(figure_size=figsize,
                        axis_text_x=p9.element_text(rotation=45)) +
               p9.scale_fill_manual(['#3f7f93', '#da3b46', '#4d4a4a']) +
               p9.labs(x=xlabel, y='Number of Comparisons', fill='R'))

        if add_text_label:
            if df.loc[df.direction == 'Positive'].loc[df.counts > 0].size > 0:
                fig += p9.geom_text(
                    p9.aes(label='label', x='cat_value', y='n + max(n) * .15'),
                    inherit_aes=False,
                    size=9,
                    data=df.loc[df.direction == 'Positive'].loc[df.counts > 0],
                    color='#3f7f93')
            if df.loc[df.direction == 'Negative'].loc[df.counts > 0].size > 0:
                fig += p9.geom_text(
                    p9.aes(label='label', x='cat_value', y='n + max(n) * .05'),
                    inherit_aes=False,
                    size=9,
                    data=df.loc[df.direction == 'Negative'].loc[df.counts > 0],
                    color='#da3b46')

        return fig
예제 #3
0
def plot_bargraph(count_plot_df, plot_df):
    """
    Plots the bargraph 
    Arguments:
        count_plot_df - The dataframe that contains lemma counts
        plot_df - the dataframe that contains the odds ratio and lemmas
    """

    graph = (
        p9.ggplot(count_plot_df.astype({"count": int}),
                  p9.aes(x="lemma", y="count")) +
        p9.geom_col(position=p9.position_dodge(width=0.5), fill="#253494") +
        p9.coord_flip() + p9.facet_wrap("repository", scales='free_x') +
        p9.scale_x_discrete(limits=(plot_df.sort_values(
            "odds_ratio", ascending=True).lemma.tolist())) +
        p9.scale_y_continuous(labels=custom_format('{:,.0g}')) +
        p9.labs(x=None) + p9.theme_seaborn(
            context='paper', style="ticks", font="Arial", font_scale=0.95) +
        p9.theme(
            # 640 x 480
            figure_size=(6.66, 5),
            strip_background=p9.element_rect(fill="white"),
            strip_text=p9.element_text(size=12),
            axis_title=p9.element_text(size=12),
            axis_text_x=p9.element_text(size=10),
        ))
    return graph
예제 #4
0
    def plot_zmw_stats(self, **kwargs):
        """Plot of ZMW stats for all runs.

        Note
        ----
        Raises an error if :meth:`Summaries.has_zmw_stats` is not `True`.

        Parameters
        ----------
        ``**kwargs`` : dict
            Keyword arguments passed to :meth:`Summaries.zmw_stats`.

        Returns
        -------
        plotnine.ggplot.ggplot
            Stacked bar graph of ZMW stats for each run.

        """
        df = self.zmw_stats(**kwargs)

        p = (p9.ggplot(df, p9.aes(x='name', y='number', fill='status')) +
             p9.geom_col(position=p9.position_stack(reverse=True), width=0.8) +
             p9.theme(axis_text_x=p9.element_text(angle=90,
                                                  vjust=1,
                                                  hjust=0.5),
                      figure_size=(0.4 * len(df['name'].unique()), 2.5)
                      ) +
             p9.ylab('number of ZMWs') +
             p9.xlab('')
             )

        if len(df['status'].unique()) < len(CBPALETTE):
            p = p + p9.scale_fill_manual(CBPALETTE[1:])

        return p
예제 #5
0
def test_col():
    # The color indicates reveals the edges and the stacking
    # that is going on.
    p = (ggplot(df) +
         geom_col(aes('x', 'z', fill='factor(z)'), color='black'))

    assert p + _theme == 'col'
예제 #6
0
def frequency_TL(Data):
    print('======= Creating frequency_TL =======')
    #Filtering
    Data['date_4'] = Data['date'].dt.date
    tl4 = Data.groupby("date_4", sort = False, as_index = False).count()
    tl4 = tl4.iloc[:, 0:2]
    tl4 = tl4.rename(columns = {"Unnamed: 0": "n"})    
    
    sdate = min(tl4["date_4"])  # start date
    edate = max(tl4["date_4"])   # end date
    delta = edate - sdate       # as timedelta
    
#    tl4 = Data.groupby("Date", sort = False, as_index = False).count()
#    tl4 = tl4.iloc[:, 0:2]
#    tl4 = tl4.rename(columns = {"Unnamed: 0": "n"})
#    tl4['Date'] = pd.to_datetime(tl4['Date'])
    
#    #Setting data with missing times
#    sdate = min(tl4["Date"])  # start date
#    edate = max(tl4["Date"])   # end date
#    delta = edate - sdate       # as timedelta
    
    from datetime import timedelta    
    day = []
    for i in range(delta.days + 1):
        d= sdate + timedelta(days=i)
        day.append(d)
        
    DF = pd.DataFrame(day)
    DF.columns = ['date_4']
    data_with_missing_times = pd.merge(DF, tl4, on='date_4', how='outer')
    if delta.days > 1825:
                datebreaks = '18 months'
    else:
        if delta.days > 1095:
            datebreaks = '12 months'                
        else:
            datebreaks = '6 months'
    #Creating and saving TL_4
    
    plot =(p9.ggplot(data=data_with_missing_times,
                     mapping=p9.aes(x='date_4',y='n'))
        + p9.geom_col(fill = 'red')
        + p9.theme_classic()
        + p9.theme(axis_text = p9.element_text(size=40),
                   axis_title = p9.element_text(size = 40,face = 'bold'))
        + p9.scale_x_datetime(date_labels = '%Y-%m', date_breaks = datebreaks)
        + p9.labs(x='',y='')
        )
        
    if (len(data_with_missing_times) > 0):
        plot.save(filename = 'TL_4.jpeg',
                 plot = plot,
                 path = "pdf/iteration/",
                 width = 25, height = 5,
                 dpi = 320)
    else: 
        print('Plot not created; no data found.')
    return(print('=================================frequency_TL DONE ============================='))
def image_histogram():
    # create windows
    cv2.namedWindow('image', cv2.WINDOW_NORMAL)
    cv2.namedWindow('image_bw', cv2.WINDOW_NORMAL)
    cv2.namedWindow('image_bw_eq', cv2.WINDOW_NORMAL)

    # read and work with image
    image = cv2.imread(r"image.jpg")
    image_bw = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image_bw_eq = cv2.equalizeHist(image_bw)

    # display images
    cv2.imshow('image', image)
    cv2.imshow('image_bw', image_bw)
    cv2.imshow('image_bw_eq', image_bw_eq)

    # calculate histogram
    # np_hist_y, bins = np.histogram(image_bw.ravel(), 256, [0, 256])
    # hist = np.bincount(image_bw.ravel(), minlength=256) # faster version of np.histogram
    # plt.hist(image_bw.ravel(), bins=256)
    hist_bw = cv2.calcHist([image_bw], [0], None, [256], [0, 255])
    hist_bw_eq = cv2.calcHist([image_bw_eq], [0], None, [256], [0, 255])
    np_hist_x = np.arange(len(hist_bw))
    d = {
        'np_hist_x': np_hist_x,
        'hist_bw': hist_bw.flatten(),
        'hist_bw_eq': hist_bw_eq.flatten()
    }
    df = pd.DataFrame(data=d)

    # plot histogram
    pn_handle = pn.ggplot(df) + pn.geom_col(pn.aes(x='np_hist_x', y='hist_bw'), color=None, fill='red', alpha=0.5) + pn.ylab('occurences') \
                              + pn.geom_col(pn.aes(x='np_hist_x', y='hist_bw_eq'), color=None, fill='green', alpha=0.5) \
                              + pn.ggtitle('Histograms of bw images')
    pn_handle.draw()
    plt.show()

    while True:
        pressed_key = cv2.waitKey(16)
        if pressed_key == ord('q'):
            break

    # cleanup opencv
    cv2.destroyAllWindows()
예제 #8
0
def test_ordinal_scale():
    df = pd.DataFrame({
        'x': pd.Categorical(list('abcd'), ordered=True),
        'y': [1, 2, 3, 4]
    })

    p = (ggplot(df) + aes('x', 'y', color='-y', fill='x') + geom_col(size=4) +
         _theme)

    assert p + _theme == 'ordinal_scale'
예제 #9
0
def test_stack_negative():
    df = df1.copy()
    _loc = df.columns.get_loc
    df.iloc[0, _loc('y')] *= -1
    df.iloc[len(df) - 1, _loc('y')] *= -1
    p = (ggplot(df) +
         geom_col(aes('factor(x)', 'y', fill='factor(y)'), position='stack') +
         geom_text(aes('factor(x)', 'y', label='y'),
                   position=position_stack(vjust=0.5)))

    assert p + _theme == 'stack-negative'
예제 #10
0
def test_stack_negative():
    df = df1.copy()
    _loc = df.columns.get_loc
    df.iloc[0, _loc('y')] *= -1
    df.iloc[len(df)-1, _loc('y')] *= -1
    p = (ggplot(df)
         + geom_col(aes('factor(x)', 'y', fill='factor(y)'),
                    position='stack')
         + geom_text(aes('factor(x)', 'y', label='y'),
                     position=position_stack(vjust=0.5))
         )

    assert p + _theme == 'stack-negative'
예제 #11
0
    def plot_consecutive(self, alpha: float = .05, **kwargs):
        f = (
            self.sdc_df.loc[lambda dd: dd.p_value < alpha]
            # Here I make groups of consecutive significant values and report the longest for each lag.
            .groupby('lag', as_index=True).apply(
                lambda gdf: gdf.sort_values('start_1').assign(
                    group=lambda dd: (dd.start_1 != dd.start_1.shift(1) + 1).
                    cumsum()).groupby(['group']).size().max()).rename(
                        'Max Consecutive steps').reset_index().pipe(
                            lambda dd: p9.ggplot(dd) + p9
                            .aes('lag', 'Max Consecutive steps') + p9.geom_col(
                            ) + p9.theme(**kwargs) + p9.labs(x='Lag [days]')))

        return f
예제 #12
0
def plot_result_stats(results, title):
    stats = results.describe().unstack().reset_index().rename(columns={
        "level_0": "metric",
        "level_1": "group",
        0: "value"
    })
    stats = stats[~stats["group"].isin(["count", "min", "max"])]
    stats["value_presentation"] = round(stats["value"], 2)
    plot = (p9.ggplot(stats) + p9.aes("metric", "value", fill="group") +
            p9.geom_col(position="dodge") + p9.theme_bw() +
            p9.coord_cartesian(ylim=[0, 1.0]) + p9.ggtitle(title) +
            p9.geom_text(p9.aes(label="value_presentation"),
                         position=p9.position_dodge(width=0.9),
                         va="bottom"))
    return plot
예제 #13
0
def plot_num(df) :
  x = df.copy()
  # add group column to the 
  z = x['hist'].to_list()
  for i in range(len(z)) : 
    z[i]['groups'] = x['col_name'][i] 
  z = pd.concat(z)
  # generate the plot
  ggplt = p9.ggplot(z, p9.aes(x = 'value', y = 'prop', group = 'groups'))\
    + p9.geom_col()\
    + p9.guides(fill = False) \
    + p9.ylab('Proportion') \
    + p9.xlab('') \
    + p9.theme(axis_text_x=p9.element_text(rotation = 45, hjust=1))\
    + p9.facet_wrap(facets = ['groups'], ncol = 3, scales = 'free')
  # return the plot object
  return ggplt
예제 #14
0
파일: plot.py 프로젝트: NPSDC/qb
def protobowl(fold=BUZZER_DEV_FOLD):
    df_rnn = pickle.load(
        open("output/buzzer/RNNBuzzer/{}_protobowl.pkl".format(fold), "rb")
    )
    df_rnn = df_rnn.groupby(["Possibility", "Outcome"])
    df_rnn = df_rnn.size().reset_index().rename(columns={0: "Count"})
    df_rnn["Model"] = pd.Series(["RNN" for _ in range(len(df_rnn))], index=df_rnn.index)

    df_mlp = pickle.load(
        open("output/buzzer/MLPBuzzer/{}_protobowl.pkl".format(fold), "rb")
    )
    df_mlp = df_mlp.groupby(["Possibility", "Outcome"])
    df_mlp = df_mlp.size().reset_index().rename(columns={0: "Count"})
    df_mlp["Model"] = pd.Series(["MLP" for _ in range(len(df_mlp))], index=df_mlp.index)

    df_thr = pickle.load(
        open("output/buzzer/ThresholdBuzzer/{}_protobowl.pkl".format(fold), "rb")
    )
    df_thr = df_thr.groupby(["Possibility", "Outcome"])
    df_thr = df_thr.size().reset_index().rename(columns={0: "Count"})
    df_thr["Model"] = pd.Series(
        ["Threshold" for _ in range(len(df_thr))], index=df_thr.index
    )

    df = df_rnn.append(df_mlp, ignore_index=True)
    df = df.append(df_thr, ignore_index=True)

    outcome_type = CategoricalDtype(categories=[15, 10, 5, 0, -5, -10, -15])
    df["Outcome"] = df["Outcome"].astype(outcome_type)
    model_type = CategoricalDtype(categories=["Threshold", "MLP", "RNN"])
    df["Model"] = df["Model"].astype(model_type)

    p = (
        ggplot(df)
        + geom_col(aes(x="Possibility", y="Count", fill="Outcome"), width=0.7)
        + facet_grid("Model ~")
        + coord_flip()
        + theme_fs()
        + theme(aspect_ratio=0.17)
        + scale_fill_brewer(type="div", palette=7)
    )

    figure_dir = os.path.join("output/buzzer/{}_protobowl.pdf".format(fold))
    p.save(figure_dir)
예제 #15
0
def cell_division(adata):
    """ Plots total_counts as a function of the principal circle nodes to
    visualize the moment of cell division.

    Parameters
    ----------------
    adata: AnnData
        The AnnData object being used for the analysis. Must be previously
        evaluated by `tl.celldiv_moment`.

    Returns
    ------------
    A plotnine line-plot to help visualize the moment of cell division and
    direction of the cell cycle.

    If method = 'counts' when tl.celldiv_moment was run,
    cell division is defined by the largest drop in total_counts. The changes in
    counts are represented by the
    bars at the bottom, and the suggested moment of cell division is marked in
    red. The cell cycle should follow an incremental increase in total counts
    until around the moment of cell division.

    Alternatively, if method='g2m' in tl.celldiv_moment, the G2-M signature
    dynamics are used to define the moment of cell division.
    """
    ref_var = adata.uns['scycle']['cell_div_moment']['ref_var']
    edge_to_0 = adata.uns['scycle']['cell_div_moment']['cell_div_edge'][0]
    edges = adata.uns['princirc_gr']['edges']
    edges['cell_div'] = edges['e1'] == edge_to_0

    cell_div_count = edges[edges['e1'] == edge_to_0]['mean_var']

    cell_div_plot = (ggplot(edges, aes('e1', 'mean_var'))
     + geom_point(aes(y = 'mean_var'), size = 2)
     + geom_path(aes(y = 'mean_var'))
     + geom_smooth(aes(y = 'mean_var'), method = 'lm', linetype = 'dashed')
     + annotate("point", x = edge_to_0, y = cell_div_count, color = 'red', size = 2)
     + labs(x = 'Edge position', y = ref_var)
     + geom_col(aes(y = 'diff_var', fill = 'cell_div'))
     + scale_fill_manual(values = ['darkgrey', 'red'], guide = False)
     + theme_std)

    return cell_div_plot
예제 #16
0
파일: plot.py 프로젝트: Pinafore/qb
def protobowl(fold=BUZZER_DEV_FOLD):
    df_rnn = pickle.load(
        open('output/buzzer/RNNBuzzer/{}_protobowl.pkl'.format(fold), 'rb'))
    df_rnn = df_rnn.groupby(['Possibility', 'Outcome'])
    df_rnn = df_rnn.size().reset_index().rename(columns={0: 'Count'})
    df_rnn['Model'] = pd.Series(['RNN' for _ in range(len(df_rnn))], index=df_rnn.index)

    df_mlp = pickle.load(
        open('output/buzzer/MLPBuzzer/{}_protobowl.pkl'.format(fold), 'rb'))
    df_mlp = df_mlp.groupby(['Possibility', 'Outcome'])
    df_mlp = df_mlp.size().reset_index().rename(columns={0: 'Count'})
    df_mlp['Model'] = pd.Series(['MLP' for _ in range(len(df_mlp))], index=df_mlp.index)

    df_thr = pickle.load(
        open('output/buzzer/ThresholdBuzzer/{}_protobowl.pkl'.format(fold), 'rb'))
    df_thr = df_thr.groupby(['Possibility', 'Outcome'])
    df_thr = df_thr.size().reset_index().rename(columns={0: 'Count'})
    df_thr['Model'] = pd.Series(['Threshold' for _ in range(len(df_thr))], index=df_thr.index)

    df = df_rnn.append(df_mlp, ignore_index=True)
    df = df.append(df_thr, ignore_index=True)

    outcome_type = CategoricalDtype(categories=[15, 10, 5, 0, -5, -10, -15])
    df['Outcome'] = df['Outcome'].astype(outcome_type)
    model_type = CategoricalDtype(
        categories=['Threshold', 'MLP', 'RNN'])
    df['Model'] = df['Model'].astype(model_type)

    p = (
        ggplot(df)
        + geom_col(aes(x='Possibility', y='Count', fill='Outcome'),
                   width=0.7)
        + facet_grid('Model ~')
        + coord_flip()
        + theme_fs()
        + theme(aspect_ratio=0.17)
        + scale_fill_brewer(type='div', palette=7)
    )

    figure_dir = os.path.join('output/buzzer/{}_protobowl.pdf'.format(fold))
    p.save(figure_dir)
예제 #17
0
def protobowl(fold=BUZZER_DEV_FOLD):
    df_rnn = pickle.load(
        open('output/buzzer/RNNBuzzer/{}_protobowl.pkl'.format(fold), 'rb'))
    df_rnn = df_rnn.groupby(['Possibility', 'Outcome'])
    df_rnn = df_rnn.size().reset_index().rename(columns={0: 'Count'})
    df_rnn['Model'] = pd.Series(['RNN' for _ in range(len(df_rnn))],
                                index=df_rnn.index)

    df_mlp = pickle.load(
        open('output/buzzer/MLPBuzzer/{}_protobowl.pkl'.format(fold), 'rb'))
    df_mlp = df_mlp.groupby(['Possibility', 'Outcome'])
    df_mlp = df_mlp.size().reset_index().rename(columns={0: 'Count'})
    df_mlp['Model'] = pd.Series(['MLP' for _ in range(len(df_mlp))],
                                index=df_mlp.index)

    df_thr = pickle.load(
        open('output/buzzer/ThresholdBuzzer/{}_protobowl.pkl'.format(fold),
             'rb'))
    df_thr = df_thr.groupby(['Possibility', 'Outcome'])
    df_thr = df_thr.size().reset_index().rename(columns={0: 'Count'})
    df_thr['Model'] = pd.Series(['Threshold' for _ in range(len(df_thr))],
                                index=df_thr.index)

    df = df_rnn.append(df_mlp, ignore_index=True)
    df = df.append(df_thr, ignore_index=True)

    outcome_type = CategoricalDtype(categories=[15, 10, 5, 0, -5, -10, -15])
    df['Outcome'] = df['Outcome'].astype(outcome_type)
    model_type = CategoricalDtype(categories=['Threshold', 'MLP', 'RNN'])
    df['Model'] = df['Model'].astype(model_type)

    p = (ggplot(df) +
         geom_col(aes(x='Possibility', y='Count', fill='Outcome'), width=0.7) +
         facet_grid('Model ~') + coord_flip() + theme_fs() +
         theme(aspect_ratio=0.17) + scale_fill_brewer(type='div', palette=7))

    figure_dir = os.path.join('output/buzzer/{}_protobowl.pdf'.format(fold))
    p.save(figure_dir)
def plot_acf(data_in, figure_size=(15, 5)):
    """
    Plots the autocorrelation function

    Parameteres
    -----------
    data_in : pd.DataFrame
        Dataframe containing the autcorrelation of our mcmc samples
    figure_size : tuple, default = (15,5)
        Optional input for figure size

    Returns
    -------
    pn.ggplot:
        Plotnine ggplot object containing autocorrelation plot
    """
    pn.options.figure_size = figure_size
    plot_out = pn.ggplot(pn.aes(x = 'lag', y = 'autocorrelation'), data = data_in)\
        + pn.geom_hline(pn.aes(yintercept= 0))\
        + pn.geom_hline(pn.aes(yintercept= 0.05), color = 'red', linetype = 'dashed')\
        + pn.geom_hline(pn.aes(yintercept= -0.05), color = 'red', linetype = 'dashed')\
        + pn.geom_col()
    return (plot_out)
    (temp_df["time_to_published"].dt.total_seconds() / 60 / 60 / 24).max())
category_half_life

# In[14]:

g = (p9.ggplot(
    category_half_life.query("category!='none'").assign(
        half_life_time=lambda x: pd.to_timedelta(x.half_life_time, "D"),
        half_life_ci_l=lambda x: pd.to_timedelta(x.half_life_ci_l, "D"),
        half_life_ci_u=lambda x: pd.to_timedelta(x.half_life_ci_u, "D"),
    ),
    p9.aes(x="category",
           y="half_life_time",
           ymin="half_life_ci_l",
           ymax="half_life_ci_u"),
) + p9.geom_col(fill="#1f78b4") + p9.geom_errorbar() + p9.scale_x_discrete(
    limits=(category_half_life.query("category!='none'").sort_values(
        "half_life_time").category.tolist()[::-1]), ) +
     p9.scale_y_timedelta(labels=timedelta_format("d")) + p9.coord_flip() +
     p9.labs(
         x="Preprint Categories",
         y="Time Until 50% of Preprints are Published",
         title="Preprint Category Half-Life",
     ) + p9.theme_seaborn(context="paper", style="white", font_scale=1.2) +
     p9.theme(axis_ticks_minor_x=p9.element_blank(), ))
g.save("output/preprint_category_halflife.svg", dpi=250)
g.save("output/preprint_category_halflife.png", dpi=250)
print(g)

# Take home Results:
#     1. The average amount of time for half of all preprints to be published is 348 days (~1 year)
예제 #20
0
    datarows.append({
        "edges":
        df.query("pred > @optimal_threshold").hetionet.value_counts()[0],
        "in_hetionet":
        "Novel",
        "relation":
        rel
    })
edges_df = pd.DataFrame.from_records(datarows)
edges_df

# In[11]:

import math
g = (p9.ggplot(edges_df, p9.aes(x="relation", y="edges", fill="in_hetionet")) +
     p9.geom_col(position="dodge") +
     p9.scale_fill_manual(values={
         "Existing": color_map["Existing"],
         "Novel": color_map["Novel"]
     }) + p9.geom_text(p9.aes(label=(
         edges_df.apply(lambda x: f"{x['edges']}\n({x['recall']*100:.0f}%)"
                        if not math.isnan(x['recall']) else f"{x['edges']}",
                        axis=1))),
                       position=p9.position_dodge(width=0.9),
                       size=9,
                       va="bottom") + p9.scale_y_log10() +
     p9.labs(y="# of Edges",
             x="Relation Type",
             title="Reconstructing Edges in Hetionet") +
     p9.guides(fill=p9.guide_legend(title="In Hetionet?")) + p9.theme(
         axis_text_y=p9.element_blank(),
    int(grouped_candidates_pred_df.hetionet.value_counts()[1]),
    "relation":
    "DaG"
})
datarows.append({
    "edges": (grouped_candidates_pred_df.query(
        "pred_max > 0.5").hetionet.value_counts()[0]),
    "in_hetionet":
    "Novel",
    "relation":
    "DaG"
})
edges_df = pd.DataFrame.from_records(datarows)
edges_df

# In[20]:

g = (p9.ggplot(edges_df, p9.aes(x="relation", y="edges", fill="in_hetionet")) +
     p9.geom_col(position="dodge") + p9.geom_text(p9.aes(label=(
         edges_df.apply(lambda x: f"{x['edges']} ({x['recall']*100:.0f}%)"
                        if not math.isnan(x['recall']) else f"{x['edges']}",
                        axis=1))),
                                                  position=p9.position_dodge(
                                                      width=1),
                                                  size=9,
                                                  va="bottom") +
     p9.scale_y_log10() + p9.theme(axis_text_y=p9.element_blank(),
                                   axis_ticks_major=p9.element_blank(),
                                   rect=p9.element_blank()))
print(g)
예제 #22
0
파일: utils.py 프로젝트: cookesd/mcda
def plot_alt_benefit(plot_df,
                     title='Benefit by Alternative',
                     which='both',
                     sensitivity=False,
                     legend=True):
    '''Builds a stacked bar chart of the alternative benefits
    @ param plot_df: The df containing benefits for each alt by the criteria and total benefit
    @ param title: The title for the graph
    @ param which: which parts to plot. Acceptable values are
    'total' for just total value.
    'criteria' for just criteria level stacked bars'
    'both' for total and criteria. The graphs will be faceted in this case
    
    Returns the ggplot graph to be displayed elsewhere'''

    _facet = which == 'both'
    if which == 'both':
        plot_df = plot_df
    elif which == 'total':
        plot_df = plot_df.loc[plot_df['type'] == 'Total Value']
    elif which == 'criteria':
        plot_df = plot_df.loc[plot_df['type'] == 'Weighted Criterion Value']
    else:
        print(
            which,
            'is not an approved value for which.\n Enter "total", "criteria", or "both"'
        )
        return (None)

    if legend:
        g = (
            p9.ggplot(plot_df,
                      p9.aes(x='Alternative', y='Benefit', fill='Criterion')) +
            p9.geom_col(stat='identity', position=p9.position_stack(
                vjust=.5))  # makes stacked bar plot
            + p9.scale_fill_brewer(type='qual', palette='Paired')
        )  # changes the color palette to one for qualitative scales)
    else:
        g = (
            p9.ggplot(plot_df,
                      p9.aes(x='Alternative', y='Benefit', fill='Criterion')) +
            p9.geom_col(
                p9.aes(show_legend=False),
                stat='identity',
                position=p9.position_stack(vjust=.5))  # makes stacked bar plot
            + p9.scale_fill_brewer(
                type='qual', palette='Paired', guide=False
            )  # changes the color palette to one for qualitative scales
            + p9.theme(legend_position=None))

        # Builds the base plot
    g = (
        g
        # + p9.geom_col(stat='identity',position=p9.position_stack(vjust=.5)) # makes stacked bar plot
        # + p9.scale_fill_brewer(type='qual',palette='Paired') # changes the color palette to one for qualitative scales
        + p9.geom_text(p9.aes(label='print_value'),
                       position=p9.position_stack(vjust=.5),
                       size=6,
                       hjust='center')  # adds weighted value to bars
        + p9.ggtitle(title)  # makes the title
        + p9.theme(axis_text_x=p9.element_text(
            rotation=45, hjust=1))  # rotates x axis labels
    )
    # Adds the facet if required
    if sensitivity:
        if _facet:
            return ((g + p9.facet_grid('type~Criterion Weight')))
        else:
            return ((g + p9.facet_grid('Criterion Weight~')))
    elif _facet:
        return ((g + p9.facet_grid('~type')))
    else:
        return (g)
예제 #23
0
def test_labels_series():
    p = (ggplot(df, aes(x=df.x, y=df.y)) + geom_col())
    assert p.labels == {'x': 'x', 'y': 'y'}
예제 #24
0
    def plot(self):
        """Plot the figures using R"""
        df = pandas.DataFrame(
            self.data,
            columns=self.datacols,
        )
        with capture_c_msg("datar", prefix=f"[r]{self.title}[/r]: "):
            df.columns = make_unique(df.columns.tolist())

        if self.savedata:
            datafile = self.outprefix + ".csv"
            logger.info(
                "[r]%s[/r]: Saving data to: %r",
                self.title,
                datafile,
                extra={"markup": True},
            )
            df.to_csv(datafile, index=False)

        if df.shape[0] == 0:
            logger.warning("No data points to plot")
            return

        aes_for_geom_fill = None
        aes_for_geom_color = None
        theme_elems = p9.theme(axis_text_x=p9.element_text(angle=60, hjust=2))
        if df.shape[1] > 2:
            aes_for_geom_fill = p9.aes(fill=df.columns[2])
            aes_for_geom_color = p9.aes(color=df.columns[2])
        plt = p9.ggplot(df, p9.aes(y=df.columns[0], x=df.columns[1]))
        if self.figtype == "scatter":
            plt = plt + p9.geom_point(aes_for_geom_color)
            theme_elems = None
        elif self.figtype == "line":
            pass
        elif self.figtype == "bar":
            plt = plt + p9.geom_bar(p9.aes(fill=df.columns[0]))
        elif self.figtype == "col":
            plt = plt + p9.geom_col(aes_for_geom_fill)
        elif self.figtype == "pie":
            logger.warning("Pie chart is not support by plotnine yet, "
                           "plotting bar chart instead.")
            col0 = df.iloc[:, 0]
            if df.shape[1] > 2:
                plt = plt + p9.geom_bar(
                    p9.aes(x=df.columns[2], y=col0.name, fill=df.columns[2]),
                    stat="identity"
                    # aes_for_geom_fill,
                    # x=df.Group,
                    # y=col0,
                    # label=paste0(round_(100 * col0 / sum_(col0), 1), "%"),
                    # show_legend=False,
                    # position=p9.position_adjust_text(),
                )
            else:
                col0 = factor(col0, levels=rev(unique(as_character(col0))))
                fills = rev(levels(col0))
                sums = map(lambda x: sum(col0 == x), fills)
                print(col0)
                print(fills)
                plt = (p9.ggplot(df, p9.aes(x=df.columns[1])) +
                       p9.geom_bar(p9.aes(fill=df.columns[0])) + p9.geom_label(
                           x=1,
                           y=cumsum(sums) - sums / 2,
                           label=paste0(round(sums / sum(sums) * 100, 1), "%"),
                           show_legend=False,
                       ))
                theme_elems = p9.theme(
                    axis_title_x=p9.element_blank(),
                    axis_title_y=p9.element_blank(),
                    axis_text_y=p9.element_blank(),
                )
        elif self.figtype == "violin":
            plt = plt + p9.geom_violin(aes_for_geom_fill)
        elif self.figtype == "boxplot":
            plt = plt + p9.geom_boxplot(aes_for_geom_fill)
        elif self.figtype in ("histogram", "density"):
            plt = p9.ggplot(df, p9.aes(x=df.columns[0]))
            geom = getattr(p9, f"geom_{self.figtype}")
            if df.columns[1] != "ONE":
                plt = plt + geom(p9.aes(fill=df.columns[1]), alpha=0.6)
                theme_elems = None
            else:
                plt = plt + geom(alpha=0.6)
                theme_elems = p9.theme(legend_position="none")
        elif self.figtype == "freqpoly":
            plt = p9.ggplot(df, p9.aes(x=df.columns[0]))
            if df.columns[1] != "ONE":
                plt = plt + p9.geom_freqpoly(p9.aes(fill=df.columns[1]))
            else:
                plt = plt + p9.geom_freqpoly()
            theme_elems = None
        else:
            raise ValueError(f"Unknown figure type: {self.figtype}")

        plt = plt + p9.ggtitle(self.title)
        self.save_plot(plt, theme_elems)
예제 #25
0
    def barchart_make(roi, df, list_rois, config, ylimit, save_function,
                      find_ylim_function):
        thisroi = list_rois[roi]

        current_df = df.loc[df['index'] == thisroi]

        current_df = current_df.sort_values([config.single_roi_fig_x_axis])
        current_df = current_df.reset_index(
            drop=True)  # Reset index to remove grouping
        current_df[config.single_roi_fig_x_axis] = pd.Categorical(
            current_df[config.single_roi_fig_x_axis],
            categories=current_df[config.single_roi_fig_x_axis].unique())

        figure = (
            pltn.ggplot(
                current_df,
                pltn.aes(x=config.single_roi_fig_x_axis,
                         y='Mean',
                         ymin="Mean-Conf_Int_95",
                         ymax="Mean+Conf_Int_95",
                         fill='factor({colour})'.format(
                             colour=config.single_roi_fig_colour))) +
            pltn.theme_538() + pltn.geom_col(position=pltn.position_dodge(
                preserve='single', width=0.8),
                                             width=0.8,
                                             na_rm=True) +
            pltn.geom_errorbar(size=1,
                               position=pltn.position_dodge(
                                   preserve='single', width=0.8)) +
            pltn.labs(x=config.single_roi_fig_label_x,
                      y=config.single_roi_fig_label_y,
                      fill=config.single_roi_fig_label_fill) +
            pltn.scale_x_discrete(labels=[]) +
            pltn.theme(panel_grid_major_x=pltn.element_line(alpha=0),
                       axis_title_x=pltn.element_text(
                           weight='bold', color='black', size=20),
                       axis_title_y=pltn.element_text(
                           weight='bold', color='black', size=20),
                       axis_text_y=pltn.element_text(size=20, color='black'),
                       legend_title=pltn.element_text(size=20, color='black'),
                       legend_text=pltn.element_text(size=18, color='black'),
                       subplots_adjust={'right': 0.85},
                       legend_position=(0.9, 0.8),
                       dpi=config.plot_dpi) +
            pltn.geom_text(pltn.aes(y=-.7, label=config.single_roi_fig_x_axis),
                           color='black',
                           size=20,
                           va='top') + pltn.scale_fill_manual(
                               values=config.colorblind_friendly_plot_colours))

        if ylimit:
            # Set y limit of figure (used to make it the same for every barchart)
            figure += pltn.ylim(None, ylimit)
            thisroi += '_same_ylim'

        returned_ylim = 0
        if config.use_same_axis_limits in ('Same limits',
                                           'Create both') and ylimit == 0:
            returned_ylim = find_ylim_function(thisroi, figure, 'yaxis')

        if config.use_same_axis_limits == 'Same limits' and ylimit == 0:
            return returned_ylim
        elif ylimit != 0:
            folder = 'Same_yaxis'
        else:
            folder = 'Different_yaxis'

        save_function(figure, thisroi, config, folder, 'barchart')

        return returned_ylim
예제 #26
0
def intensity_graph(Data, Data_m):
    print('======= Creating intensity_graph =======')
    x = Data.Intensity[pd.isna(Data.Intensity) == True]
    if (len(x) == len(Data)):
       print("WARNING: All values for Intensity are NA's")
    
    else:
    #Filter ever and monthly symptomes and correct Intensity
        Data_m_int = Data_m[(Data_m.Group == "sy") & (pd.isna(Data_m.Intensity) == 0)]
        Data_all_int = Data[(Data.Group == "sy") & (pd.isna(Data.Intensity) == 0)]
        
        Test_3_m = Data_m_int.groupby("Intensity", sort = True, as_index = False).count()
        Test_3_m = Test_3_m.iloc[:, 0:2]
        Test_3_m= Test_3_m.rename(columns = {"Unnamed: 0": "n"})
        
        Test_3 = Data_all_int.groupby("Intensity", sort = True, as_index = False).count()
        Test_3 = Test_3.iloc[:, 0:2]
        Test_3 = Test_3.rename(columns = {"Unnamed: 0": "n"})
        #Test_3.Intensity = Test_3.Intensity.astype(str)
    
        
        plot =(p9.ggplot(data=Test_3,
                         mapping=p9.aes(x='Intensity',y='n'))
            + p9.geom_col(fill = 'red')
            + p9.theme_classic()
            + p9.theme(axis_text = p9.element_text(size=40),
                       axis_title = p9.element_text(size = 40,face = 'bold'))
            + p9.coord_cartesian(xlim = (1,10))
            + p9.scale_x_continuous(labels = list(range(1,11)), breaks = list(range(1,11)))
            + p9.labs(x='',y='No. of attacks')
            )    
    
        plot_month =(p9.ggplot(data=Test_3_m,
                         mapping=p9.aes(x='Intensity',y='n'))
            + p9.geom_col(fill = 'red')
            + p9.theme_classic()
            + p9.theme(axis_text = p9.element_text(size=40),
                       axis_title = p9.element_text(size = 40,face = 'bold'))
            + p9.coord_cartesian(xlim = (1,10))
            + p9.scale_x_continuous(labels = list(range(1,11)), breaks = list(range(1,11)))
            + p9.labs(x='',y='No. of attacks')
            )

    #Creating and saving EVER Graph_1
    if (len(Data_m_int) > 0):
        #G1 = graph_1(Data_all_int)
        plot_month.save(filename = 'Graph_1.jpeg',
                 plot = plot_month,
                 path = "pdf/iteration/",
                 width = 25, height = 5,
                 dpi = 320)
    else: 
        print('Plot not created; no data found.')
    if (len(Data_all_int) > 0):
        #G1 = graph_1(Data_all_int)
        plot.save(filename = 'Graph_ALL_1.jpeg',
                 plot = plot,
                 path = "pdf/iteration/",
                 width = 25, height = 5,
                 dpi = 320)    
    else: 
        print('Plot not created; no data found.')
    return(print('=================================intensity_graph DONE ============================='))
예제 #27
0
def test_reorder():
    p = (ggplot(df, aes('reorder(x, y)', 'y', fill='reorder(x, y)')) +
         geom_col())
    assert p + _theme == 'reorder'
예제 #28
0
def test_labels_lists():
    p = (ggplot(df, aes(x=[1, 2, 3], y=[1, 2, 3])) + geom_col())
    assert p.labels == {'x': None, 'y': None}
예제 #29
0
    ["is_same_paper_1", "is_same_paper_2", "is_same_paper_3"]].mode(axis=1))))
final_annotated_df.head()

# In[6]:

binned_stats_df = (final_annotated_df.groupby(
    "distance_bin").final_same_paper.mean().to_frame().rename(
        index=str, columns={
            "final_same_paper": "frac_correct"
        }).reset_index())
binned_stats_df

# In[7]:

g = (p9.ggplot(binned_stats_df, p9.aes(x="distance_bin", y="frac_correct")) +
     p9.geom_col(fill="#a6cee3") + p9.coord_flip() +
     p9.labs(x="Fraction Correct", y="Euclidean Distance Bins") +
     p9.theme_seaborn(
         context="paper", style="ticks", font="Arial", font_scale=1.5))
g.save("output/figures/distance_bin_accuracy.svg")
g.save("output/figures/distance_bin_accuracy.png", dpi=250)
print(g)

# # Logsitic Regression Performance

# In[8]:

biorxiv_embed_df = (pd.read_csv(Path("../word_vector_experiment/output/") /
                                "word2vec_output/" /
                                "biorxiv_all_articles_300.tsv.xz",
                                sep="\t").set_index("document"))
예제 #30
0
def test_reorder_index():
    # The dataframe is created with ordering according to the y
    # variable. So the x index should be ordered acc. to y too
    p = (ggplot(df, aes('reorder(x, x.index)', 'y')) + geom_col())
    assert p + _theme == 'reorder_index'
예제 #31
0
best_result = list(filter(lambda x: x[1] == model.C_, enumerate(model.Cs_)))[0]
print(best_result)

print("Best CV Fold")
print(model.scores_["polka"][:, best_result[0]])
model.scores_["polka"][:, best_result[0]].mean()

model_weights_df = pd.DataFrame.from_dict({
    "weight": model.coef_[0],
    "pc": list(range(1, 51)),
})
model_weights_df["pc"] = pd.Categorical(model_weights_df["pc"])
model_weights_df.head()

g = (p9.ggplot(model_weights_df, p9.aes(x="pc", y="weight")) +
     p9.geom_col(position=p9.position_dodge(width=5), fill="#253494") +
     p9.coord_flip() +
     p9.scale_x_discrete(limits=list(sorted(range(1, 51), reverse=True))) +
     p9.theme_seaborn(
         context="paper", style="ticks", font_scale=1.1, font="Arial") +
     p9.theme(figure_size=(10, 8)) + p9.labs(title="Regression Model Weights",
                                             x="Princpial Component",
                                             y="Model Weight"))
# g.save("output/figures/pca_log_regression_weights.svg")
# g.save("output/figures/pca_log_regression_weights.png", dpi=250)
print(g)

fold_features = model.coefs_paths_["polka"].transpose(1, 0, 2)
model_performance_df = pd.DataFrame.from_dict({
    "feat_num": ((fold_features.astype(bool).sum(axis=1)) > 0).sum(axis=1),
    "C":
예제 #32
0
    items = {}
    data = {}
    all_models = set()
    all_tasks = set()
    data = {}
    with gzip.open(options.input) as ifd:
        for row in csv.DictReader(ifd, delimiter="\t"):
            for k, v in row.items():
                data[k] = data.get(k, [])
                data[k].append(v)
    
    for k in data.keys():
        floats = [maybe_float(x) for x in data[k]]
        if all([re.match(r"^\d+$", x) for x in data[k]]):
            data[k] = [int(x) for x in data[k]]
        elif all(floats):
            data[k] = floats

    df = pandas.DataFrame(data)
    #print df
    x = (ggplot(df, aes("factor(%s)" % (options.x), options.y, color="factor(%s)" % (options.color)))) + \
        ggtitle(options.title.strip("'")) + \
        ylab(options.ylabel.strip("'")) + \
        xlab(options.xlabel.strip("'")) + \
        labs(color=options.color_label.strip("'")) + \
        geom_col(show_legend=False) + \
        lims(y=(0.0, 1.0))
    x.save(options.output)

    #theme(legend_title=element_text("")) + \