Esempio n. 1
0
def test_legend_fill_ratio():
    p = (ggplot(df_linear, aes('x', color='x<0.5'))
         + geom_point(aes(y='y_noisy'))
         + geom_smooth(aes(y='y_noisy'), method='lm', size=0.5, span=.3)
         )

    assert p == 'legend_fill_ratio'
def test_step():
    p = (ggplot(df, aes('x')) +
         geom_step(aes(y='y'), size=4) +
         geom_step(aes(y='y+2'), color='red',
                   direction='vh', size=4))

    assert p == 'step'
Esempio n. 3
0
def test_non_linear_smooth_no_ci():
    p = (ggplot(df_linear, aes('x'))
         + geom_point(aes(y='y_noisy'))
         + geom_smooth(aes(y='y_noisy'), method='loess', span=.3,
                       color='blue', se=False)
         )

    assert p == 'non_linear_smooth_no_ci'
Esempio n. 4
0
def test_linear_smooth():
    p = (ggplot(df_linear, aes('x'))
         + geom_point(aes(y='y_noisy'))
         + geom_smooth(aes(y='y_noisy'), method='lm', span=.3,
                       color='blue')
         )

    assert p == 'linear_smooth'
Esempio n. 5
0
def plot():
    outdir = 'output/protobowl/'
    pathlib.Path(outdir).mkdir(parents=True, exist_ok=True)

    df = load_protobowl()
    df.result = df.result.apply(lambda x: x is True)
    df['log_n_records'] = df.user_n_records.apply(np.log)

    df_user_grouped = df.groupby('uid')
    user_stat = df_user_grouped.agg(np.mean)
    print('{} users'.format(len(user_stat)))
    print('{} records'.format(len(df)))
    max_color = user_stat.log_n_records.max()
    user_stat['alpha'] = pd.Series(
        user_stat.log_n_records.apply(lambda x: x / max_color), index=user_stat.index)

    # 2D user plot
    p0 = ggplot(user_stat) \
        + geom_point(aes(x='relative_position', y='result',
                     size='user_n_records', color='log_n_records', alpha='alpha'),
                     show_legend={'color': False, 'alpha': False, 'size': False}) \
        + scale_color_gradient(high='#e31a1c', low='#ffffcc') \
        + labs(x='Average buzzing position', y='Accuracy') \
        + theme(aspect_ratio=1)
    p0.save(os.path.join(outdir, 'protobowl_users.pdf'))
    # p0.draw()
    print('p0 done')

    # histogram of number of records
    p1 = ggplot(user_stat, aes(x='log_n_records', y='..density..')) \
        + geom_histogram(color='#e6550d', fill='#fee6ce') \
        + geom_density() \
        + labs(x='Log number of records', y='Density') \
        + theme(aspect_ratio=0.3)
    p1.save(os.path.join(outdir, 'protobowl_hist.pdf'))
    # p1.draw()
    print('p1 done')

    # histogram of accuracy
    p2 = ggplot(user_stat, aes(x='result', y='..density..')) \
        + geom_histogram(color='#31a354', fill='#e5f5e0') \
        + geom_density() \
        + labs(x='Accuracy', y='Density') \
        + theme(aspect_ratio=0.3)
    p2.save(os.path.join(outdir, 'protobowl_acc.pdf'))
    # p2.draw()
    print('p2 done')

    # histogram of buzzing position
    p3 = ggplot(user_stat, aes(x='relative_position', y='..density..')) \
        + geom_histogram(color='#3182bd', fill='#deebf7') \
        + geom_density() \
        + labs(x='Average buzzing position', y='Density') \
        + theme(aspect_ratio=0.3)
    p3.save(os.path.join(outdir, 'protobowl_pos.pdf'))
    # p3.draw()
    print('p3 done')
def test_arrow():
    p = (ggplot(df, aes('x', 'y')) +
         geom_path(size=2, arrow=arrow(ends='both', type='closed')) +
         geom_path(aes(y='y+2'), color='red', size=2,
                   arrow=arrow(angle=60, length=1, ends='first')) +
         geom_path(aes(y='y+4'), color='blue', size=2,
                   arrow=arrow(length=1)))

    assert p == 'arrow'
Esempio n. 7
0
def test_quantiles_width_dodge():
    p = (ggplot(df, aes('x')) +
         geom_violin(aes(y='y'),
                     draw_quantiles=[.25, .75], size=2) +
         geom_violin(aes(y='y+25'), color='green',
                     width=0.5, size=2) +
         geom_violin(aes(y='y+50', fill='factor(y%2)'),
                     size=2) +
         theme(subplots_adjust={'right': 0.85}))
    assert p == 'quantiles_width_dodge'
Esempio n. 8
0
def test_aesthetics():
    df = pd.DataFrame({
            'a': range(5),
            'b': 2,
            'c': 3,
            'd': 4,
            'e': 5,
            'f': 6,
            'g': 7,
            'h': 8,
            'i': 9
        })

    p = (ggplot(df, aes(y='a')) +
         geom_point(aes(x='b')) +
         geom_point(aes(x='c', size='a')) +
         geom_point(aes(x='d', alpha='a'),
                    size=10, show_legend=False) +
         geom_point(aes(x='e', shape='factor(a)'),
                    size=10, show_legend=False) +
         geom_point(aes(x='f', color='factor(a)'),
                    size=10, show_legend=False) +
         geom_point(aes(x='g', fill='a'), stroke=0,
                    size=10, show_legend=False) +
         geom_point(aes(x='h', stroke='a'), fill='white',
                    color='green', size=10) +
         geom_point(aes(x='i', shape='factor(a)'),
                    fill='brown', stroke=2, size=10, show_legend=False) +
         theme(subplots_adjust={'right': 0.85}))

    assert p == 'aesthetics'
Esempio n. 9
0
def test_arrow():
    p = (ggplot(df, aes('x', 'y', xend='xend', yend='yend')) +
         geom_segment(aes('x+2', xend='xend+2'),
                      arrow=arrow(), size=2) +
         geom_segment(aes('x+4', xend='xend+4'),
                      arrow=arrow(ends='first'), size=2) +
         geom_segment(aes('x+6', xend='xend+6'),
                      arrow=arrow(ends='both'), size=2)
         )

    assert p == 'arrow'
Esempio n. 10
0
def test_aesthetics():
    p = (ggplot(df, aes('x', 'y', xend='xend', yend='yend')) +
         geom_segment(size=2) +
         # Positive slope segments
         geom_segment(aes(yend='yend+1', color='factor(z)'), size=2) +
         geom_segment(aes(yend='yend+2', linetype='factor(z)'), size=2) +
         geom_segment(aes(yend='yend+3', size='z'),
                      show_legend=False) +
         geom_segment(aes(yend='yend+4', alpha='z'), size=2,
                      show_legend=False))

    assert p + _theme == 'aesthetics'
Esempio n. 11
0
def test_aesthetics():
    p = (ggplot(df, aes('x', 'y')) +
         geom_path(size=4) +
         geom_path(aes(y='y+2', alpha='x'), size=4,
                   show_legend=False) +
         geom_path(aes(y='y+4'), size=4, linetype='dashed',
                   show_legend=False) +
         geom_path(aes(y='y+6', size='x'), color='red',
                   show_legend=False) +
         geom_path(aes(y='y+8', color='x'), size=4))

    assert p == 'aesthetics'
Esempio n. 12
0
def test_tile_aesthetics():
    p = (ggplot(df, aes('x', 'y', width=1, height=1)) +
         geom_tile() +
         geom_tile(aes(y='y+2', alpha='z'),
                   show_legend=False) +
         geom_tile(aes(y='y+4', fill='factor(z)')) +
         geom_tile(aes(y='y+6', color='factor(z+1)'), size=2) +
         geom_tile(aes(y='y+8', linetype='factor(z+2)'),
                   color='yellow', size=2) +
         _theme)

    assert p == 'tile-aesthetics'
Esempio n. 13
0
def test_rect_nofill():
    p = (ggplot(df)
         + aes(xmin='xmin', xmax='xmax', ymin='ymin', ymax='ymax')
         + geom_rect(color='red', fill=None, size=2)
         + geom_rect(aes(ymin='ymin+2', ymax='ymax+2'),
                     color='blue', fill='None', size=2)
         + geom_rect(aes(ymin='ymin+4', ymax='ymax+4'),
                     color='green', fill='', size=2)
         + geom_rect(aes(ymin='ymin+6', ymax='ymax+6'),
                     color='yellow', fill='gray', size=2))

    assert p == 'rect-nofill'
Esempio n. 14
0
def test_no_fill():
    df = pd.DataFrame({'x': range(5), 'y': range(5)})

    p = (ggplot(df, aes('x', 'y'))
         + geom_point(color='red', fill=None, size=5, stroke=1.5)
         + geom_point(aes(y='y+1'),
                      color='blue', fill='none', size=5, stroke=1.5)
         + geom_point(aes(y='y+2'),
                      color='green', fill='', size=5, stroke=1.5)
         + geom_point(aes(y='y+3'),
                      color='yellow', fill='gray', size=5, stroke=1.5))

    assert p == 'no_fill'
Esempio n. 15
0
def test_stack_negative():
    df = df1.copy()
    _loc = df.columns.get_loc
    df.iloc[0, _loc('y')] *= -1
    df.iloc[len(df)-1, _loc('y')] *= -1
    p = (ggplot(df)
         + geom_col(aes('factor(x)', 'y', fill='factor(y)'),
                    position='stack')
         + geom_text(aes('factor(x)', 'y', label='y'),
                     position=position_stack(vjust=0.5))
         )

    assert p + _theme == 'stack-negative'
Esempio n. 16
0
def test_line():
    df2 = df.copy()

    # geom_path plots in given order. geom_line &
    # geom_step sort by x before plotting
    df2['x'] = df['x'].values[::-1]

    p = (ggplot(df2, aes('x')) +
         geom_path(aes(y='y'), size=4) +
         geom_line(aes(y='y+2'), color='blue', size=4) +
         geom_step(aes(y='y+4'), color='red', size=4))

    assert p == 'path_line_step'
Esempio n. 17
0
 def plot_char_percent_vs_accuracy_smooth(self, category=False):
     if category:
         return (
             ggplot(self.char_plot_df)
             + aes(x='char_percent', y='correct', color='category_jmlr')
             + geom_smooth()
         )
     else:
         return (
             ggplot(self.char_plot_df)
             + aes(x='char_percent', y='correct')
             + geom_smooth(method='mavg')
         )
Esempio n. 18
0
 def plot_char_percent_vs_accuracy_histogram(self, category=False):
     if category:
         return (
             ggplot(self.char_plot_df) + facet_wrap('category_jmlr')
             + aes(x='char_percent', fill='Outcome')
             + geom_histogram(binwidth=.05)
         )
     else:
         return (
             ggplot(self.char_plot_df)
             + aes(x='char_percent', fill='Outcome')
             + geom_histogram(binwidth=.05)
         )
Esempio n. 19
0
 def plot_compare_accuracy(self, expo=False):
     if expo:
         return (
             ggplot(self.acc_df) + facet_wrap('position')
             + aes(x='guesser', y='accuracy', fill='Dataset')
             + geom_bar(stat='identity', position='dodge')
             + xlab('Guessing Model')
             + ylab('Accuracy')
         )
     else:
         return (
             ggplot(self.acc_df) + facet_wrap('position')
             + aes(x='guesser', y='accuracy')
             + geom_bar(stat='identity')
         )
Esempio n. 20
0
def test_limits():
    p = (ggplot(df, aes('x')) +
         stat_function(fun=np.cos, size=2,
                       color='blue', arrow=arrow(ends='first')) +
         stat_function(fun=np.cos, xlim=(10, 20), size=2,
                       color='red', arrow=arrow(ends='last')))
    assert p == 'limits'
Esempio n. 21
0
def test_continuous_x():
    n = len(df_continuous_x)
    p = (ggplot(df_continuous_x, aes('x', 'y'))
         + geom_point()
         + geom_smooth(df_continuous_x[3:n-3], method='loess',
                       color='blue', fullrange=False))
    assert p == 'continuous_x'
Esempio n. 22
0
def test_stat_parameter_sharing():
    # When the stat has a parameter with the same name as
    # the geom aesthetic,they both get their value

    # NOTE: This test may need to be modified when the
    # geom & stat internals change
    class stat_abc(stat):
        DEFAULT_PARAMS = {'geom': 'point', 'position': 'identity',
                          'weight': 1}
        REQUIRED_AES = {'x'}
        CREATES = {'y'}

        @classmethod
        def compute_panel(cls, data, scales, **params):
            return data

    class geom_abc(geom):
        DEFAULT_PARAMS = {'stat': stat_abc, 'position': 'identity'}
        REQUIRED_AES = {'x', 'weight'}

        @staticmethod
        def draw(pinfo, panel_params, coord, ax, **kwargs):
            pass

    # weight is manually set, it should be a stat parameter and
    # not a geom manual setting
    g = geom_abc(weight=4)
    assert('weight' in g.aes_params)
    assert('weight' in g._stat.params)

    g = geom_abc(aes(weight='mpg'))
    assert('weight' in g.mapping)
    assert('weight' in g._stat.params)
Esempio n. 23
0
def test_expand_limits():
    df = pd.DataFrame({'x': range(5, 11), 'y': range(5, 11)})
    p = (ggplot(aes('x', 'y'), data=df)
         + geom_point()
         + expand_limits(y=(0, None))
         )
    assert p == 'expand_limits'
Esempio n. 24
0
def test_bool_mapping():
    df = pd.DataFrame({
        'x': [1, 2, 3],
        'y': [True, False, False]
    })
    p = ggplot(df, aes('x', 'y')) + geom_point()
    assert p == 'bool_mapping'
Esempio n. 25
0
def test_normal_with_line():
    p = (ggplot(df_normal, aes(sample='x'))
         + geom_qq()
         + geom_qq_line()
         )
    # Roughly a straight line of points through the origin
    assert p == 'normal_with_line'
Esempio n. 26
0
def test_aesthetics():
    p = (ggplot(df) +
         geom_rug(aes('x', 'y'), size=2) +
         geom_rug(aes('x+2*n', 'y+2*n', alpha='z'),
                  size=2, sides='tr') +
         geom_rug(aes('x+4*n', 'y+4*n', linetype='factor(z)'),
                  size=2, sides='t') +
         geom_rug(aes('x+6*n', 'y+6*n', color='factor(z)'),
                  size=2, sides='b') +
         geom_rug(aes('x+8*n', 'y+8*n', size='z'),
                  sides='tblr'))

    if six.PY2:
        # Small displacement in y-axis text
        assert p + _theme == ('aesthetics', {'tol': 4})
    else:
        assert p + _theme == 'aesthetics'
Esempio n. 27
0
def test_summary_functions():
    p = (ggplot(df, aes('x', 'y'))
         + stat_summary(fun_y=np.mean,
                        fun_ymin=np.min,
                        fun_ymax=np.max,
                        size=2))

    assert p == 'summary_functions'
Esempio n. 28
0
def test_hull():
    p = (ggplot(mtcars)
         + aes('wt', 'mpg', color='factor(cyl)')
         + geom_point()
         + stat_hull(size=1)
         )

    assert p + _theme == 'hull'
Esempio n. 29
0
def test_ribbon_facetting():
    p = (ggplot(df, aes('x', ymin='ymin', ymax='ymax',
                        fill='factor(z)')) +
         geom_ribbon() +
         facet_wrap('~ z')
         )

    assert p + _theme == 'ribbon_facetting'
Esempio n. 30
0
def test_discrete_x():
    p = (ggplot(df, aes('xd', 'y'))
         + stat_summary_bin(fun_y=np.mean,
                            fun_ymin=np.min,
                            fun_ymax=np.max,
                            geom='bar'))

    assert p == 'discrete_x'
Esempio n. 31
0
def analyze_thermal_values(thermal_array, mask, histplot=False):
    """This extracts the thermal values of each pixel writes the values out to
       a file. It can also print out a histogram plot of pixel intensity
       and a pseudocolor image of the plant.

    Inputs:
    array        = numpy array of thermal values
    mask         = Binary mask made from selected contours
    histplot     = if True plots histogram of intensity values

    Returns:
    analysis_img = output image

    :param thermal_array: numpy.ndarray
    :param mask: numpy.ndarray
    :param histplot: bool
    :return analysis_img: ggplot
    """
    max_value = np.amax(thermal_array)
    # Calculate histogram
    hist_thermal = [
        float(i[0]) for i in cv2.calcHist([np.float32(thermal_array)], [0],
                                          mask, [256], [0, max_value])
    ]
    bin_width = max_value / 256.
    b = 0
    bin_labels = [float(b)]
    for i in range(255):
        b += bin_width
        bin_labels.append(b)

    # Store debug mode
    debug = params.debug
    params.debug = None

    # apply plant shaped mask to image
    mask1 = binary_threshold(mask, 0, 255, 'light')
    params.debug = debug

    mask1 = (mask1 / 255)
    masked_thermal = thermal_array[np.where(mask > 0)]

    pixels = cv2.countNonZero(mask1)
    hist_percent = [(p / float(pixels)) * 100 for p in hist_thermal]

    maxtemp = np.amax(masked_thermal)
    mintemp = np.amin(masked_thermal)
    avgtemp = np.average(masked_thermal)
    mediantemp = np.median(masked_thermal)

    # Store data into outputs class
    outputs.add_observation(variable='max_temp',
                            trait='maximum temperature',
                            method='plantcv.plantcv.analyze_thermal_values',
                            scale='degrees',
                            datatype=float,
                            value=maxtemp,
                            label='degrees')
    outputs.add_observation(variable='min_temp',
                            trait='minimum temperature',
                            method='plantcv.plantcv.analyze_thermal_values',
                            scale='degrees',
                            datatype=float,
                            value=mintemp,
                            label='degrees')
    outputs.add_observation(variable='mean_temp',
                            trait='mean temperature',
                            method='plantcv.plantcv.analyze_thermal_values',
                            scale='degrees',
                            datatype=float,
                            value=avgtemp,
                            label='degrees')
    outputs.add_observation(variable='median_temp',
                            trait='median temperature',
                            method='plantcv.plantcv.analyze_thermal_values',
                            scale='degrees',
                            datatype=float,
                            value=mediantemp,
                            label='degrees')
    outputs.add_observation(variable='thermal_frequencies',
                            trait='thermal frequencies',
                            method='plantcv.plantcv.analyze_thermal_values',
                            scale='frequency',
                            datatype=list,
                            value=hist_percent,
                            label=bin_labels)
    analysis_img = None

    if histplot is True:
        params.device += 1

        dataset = pd.DataFrame({
            'Temperature C': bin_labels,
            'Proportion of pixels (%)': hist_percent
        })
        fig_hist = (ggplot(data=dataset,
                           mapping=aes(x='Temperature C',
                                       y='Proportion of pixels (%)')) +
                    geom_line(color='green'))

        analysis_img = fig_hist
        if params.debug == "print":
            fig_hist.save(os.path.join(
                params.debug_outdir,
                str(params.device) + '_therm_histogram.png'),
                          verbose=False)
        elif params.debug == "plot":
            print(fig_hist)

    return analysis_img
def test_no_missing_values():
    p = (ggplot(df_missing, aes(x='x')) + geom_line(aes(y='y2'), size=2))

    assert p == 'no_missing_values'
def test_step():
    p = (ggplot(df, aes('x')) + geom_step(aes(y='y'), size=4) +
         geom_step(aes(y='y+2'), color='red', direction='vh', size=4))

    assert p == 'step'
Esempio n. 34
0
def test_annotation_stripes_coord_flip():
    p = (ggplot(df) + annotation_stripes(fill_range='no') +
         geom_point(aes('factor(x)', 'y')) +
         geom_vline(xintercept=[0.5, 1.5, 2.5, 3.5]) + coord_flip())

    assert p == 'annotation_stripes_coord_flip'
Esempio n. 35
0
def test_annotation_stripes_single_stripe():
    p = (ggplot(df.assign(x=10)) +
         annotation_stripes(fill=["#FF0000", "#00FF00"]) +
         geom_point(aes('factor(x)', 'y')))

    assert p == 'annotation_stripes_single_stripe'
Esempio n. 36
0
def test_linear_smooth_no_ci():
    p = (ggplot(df_linear, aes('x')) + geom_point(aes(y='y_noisy')) +
         geom_smooth(
             aes(y='y_noisy'), method='lm', span=.3, color='blue', se=False))

    assert p == 'linear_smooth_no_ci'
Esempio n. 37
0
from plotnine.data import mpg
from plotnine import ggplot, aes, facet_grid, labs, geom_point, stat_smooth

print(ggplot(mpg)
      + facet_grid(facets="year~class")
      + aes(x="displ", y="hwy")
      + labs(
          x="Engine Size",
          y="Miles per Gallon",
          title="Miles per Gallon for Each Year and Vehicle Class")
      + geom_point()
      + stat_smooth(method='lm'))
Esempio n. 38
0
def test_legend_fill_ratio():
    p = (ggplot(df_linear, aes('x', color='x<0.5')) +
         geom_point(aes(y='y_noisy')) +
         geom_smooth(aes(y='y_noisy'), method='lm', size=0.5, span=.3))

    assert p == 'legend_fill_ratio'
Esempio n. 39
0
 def test_lm_weights(self):
     p = (self.p + aes(weight='x.abs()') + stat_smooth(
         method='lm', formula='y ~ np.sin(x)', fill='red', se=True))
     assert p == 'lm_formula_weights'
Esempio n. 40
0
def test_sorts_by_x():
    df = pd.DataFrame({'x': [5, 0, 1, 2, 3, 4], 'y': range(6)})
    p = ggplot(df, aes('x', 'y')) + geom_smooth(stat='identity')

    assert p == 'sorts_by_x'
Esempio n. 41
0
 def test_mavg(self):
     p = self.p + geom_smooth(
         aes(y='y_noisy'), method='mavg', method_args={'window': 10})
     p.draw_test()
Esempio n. 42
0
 def test_lowess(self):
     p = self.p + geom_smooth(aes(y='y_noisy'), method='lowess')
     with pytest.warns(PlotnineWarning):
         p.draw_test()
Esempio n. 43
0
SDRsuper.dropna(inplace=True)
SDRsub.dropna(inplace=True)

# Add level column
SDRsuper.insert(0, 'Level', 'super')
SDRsub.insert(0, 'Level', 'sub')

SDRall = pd.concat([SDRsub, SDRsuper])

#%% SDRsuper and SDRsub violin plot + boxplot + lines

# =============================================================================
# Simple violin plot:
# =============================================================================

(ggplot(SDRall) + aes(y='value', x='Level', fill='Level') +
 geom_violin(scale="width"))

# =============================================================================
# Next level violin plots
# =============================================================================

shift = 0.1


def alt_sign(x):
    "Alternate +1/-1 if x is even/odd"
    return (-1)**x


m1 = aes(x=stage('Level', after_scale='x+shift*alt_sign(x)'))  # shift outward
Esempio n. 44
0
def test_non_linear_smooth():
    p = (ggplot(df_linear, aes('x')) + geom_point(aes(y='y_noisy')) +
         geom_smooth(aes(y='y_noisy'), method='loess', span=.3, color='blue'))

    assert p == 'non_linear_smooth'
from plotnine.data import economics

from plotnine import ggplot, aes, geom_line, labs

g = (ggplot(economics) + aes(x="date", y="uempmed") + geom_line() +
     labs(x="date", y="median duration of unemployment"))

print(g)
Esempio n. 46
0
def test_discrete_x():
    p = (ggplot(df_discrete_x, aes('x', 'y')) + geom_point() +
         geom_smooth(color='blue'))

    assert p == 'discrete_x'
Esempio n. 47
0
def test_annotation_stripes_fill_range_cycle():
    p = (ggplot(df) + annotation_stripes(fill_range='cycle') +
         geom_point(aes('factor(x)', 'y')) +
         geom_vline(xintercept=[0.5, 1.5, 2.5, 3.5]))

    assert p == 'annotation_stripes_fill_range_cycle'
Esempio n. 48
0
#### Getting set up ####

%matplotlib inline
import plotnine as p9
import pandas as pd

# read in filtered datasets
birth_reduced = pd.read_csv("data/birth_reduced.csv")
smoke_complete = pd.read_csv("data/smoke_complete.csv")

#### create a simple ggplot ####
# bind data to new plot
# specify aesthetic: mapping data to plot
# layers: ways (shapes) through which data are represented
(p9.ggplot(data=smoke_complete,
           mapping=p9.aes(x="age_at_diagnosis", y="cigarettes_per_day"))
    + p9.geom_point()
    )

# ignore warnings (FutureWarning not fatal)
import warnings
warnings.simplefilter("ignore")
# add new cell at top of notebook and re-execute plot to remove errors

# Create object to hold plot framework
smoke_plot = p9.ggplot(data=smoke_complete,
                         mapping=p9.aes(x="age_at_diagnosis", y="cigarettes_per_day"))

# Draw the plot
smoke_plot + p9.geom_point()
Esempio n. 49
0
def test_annotation_stripes_continuous_scale():
    p = (ggplot(df) + annotation_stripes() + geom_point(aes('x', 'y')) +
         geom_vline(xintercept=[0.5, 1.5, 2.5, 3.5]))

    assert p == 'annotation_stripes_continuous_scale'
Esempio n. 50
0
    geom_bar,
    geom_text,
    geom_line,
    ggplot,
    ggtitle,
    theme,
    theme_classic,
    xlab,
    ylab,
)


plt = (
    ggplot(
        data=pr_curves_df,
        mapping=aes(x="recall", y="precision",),  # "factor(species, ordered=False)",
    )
    + geom_line()
)
#%%

plt2 = (
    ggplot(
        data=pr_curves_df,
        mapping=aes(
            x="false_positive_rate", y="true_positives_ratio",
        ),  # "factor(species, ordered=False)",
    )
    + geom_line()
)
Esempio n. 51
0
def test_discrete_x_fullrange():
    p = (ggplot(df_discrete_x, aes('x', 'y')) + geom_point() +
         geom_smooth(color='blue', fullrange=True))

    assert p == 'discrete_x_fullrange'
from plotnine.data import mpg
from plotnine import ggplot, aes, geom_bar

print(ggplot(mpg) + aes(x="class") + geom_bar())
Esempio n. 53
0
# res = aci.groupby(["plot"], as_index=False).apply(check_dates, site_data)
# res = res.groupby(["site", "julian"], as_index=False).agg({"ACI": ["mean", "std"], "lat": "mean", "lon": "mean"})
# res.columns = pd.Index(join_tuple(i, "_") for i in res.columns)
# print(aci.loc[aci["site"] == "Igloolik"])
# print(aci)
res
# res.to_feather("data_glm.feather")


def label_x(dates):
    res = [(datetime.datetime(2018, 1, 1) + datetime.timedelta(x)).strftime("%d-%m") for x in dates]
    print(res)
    return res


(ggplot(data=res, mapping=aes(x='julian', y='value', colour='type')) +
    xlab("Day")
    + ylab("Mean number of detected songs")
    + facet_grid("type~", scales="free")
 + # + geom_line()
 # + facet_wrap("type", nrow=2, ncol=1)
    geom_point()
  # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std"))
    + geom_smooth(method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1})
        + scale_colour_manual(values=cbbPalette, guide=False)
    + scale_x_continuous(labels=label_x)).save("figs/song_events_aci_BARROW_mean_smoothed2.png", height=10, width=16, dpi=150)

(ggplot(data=res, mapping=aes(x='julian', y='n_events_sum', colour='site')) +
    xlab("Day")
    + ylab("Total number of detected songs")
 + # + facet_grid("site~", scales="free")
Esempio n. 54
0
def main():
    """Run CLI."""
    parser = argparse.ArgumentParser(description="""
            Read AnnData object and PCs file. Clusters the data. Saves an
            AnnData object with clusters in the clusters slot, a clusters
            file, and QC plots.
            """)

    parser.add_argument(
        '-v',
        '--version',
        action='version',
        version='%(prog)s {version}'.format(version=__version__))

    parser.add_argument('-h5',
                        '--h5_anndata',
                        action='store',
                        dest='h5',
                        required=True,
                        help='H5 AnnData file.')

    parser.add_argument(
        '-pc',
        '--tsv_pcs',
        action='store',
        dest='pc',
        default='',
        help='Tab-delimited file of PCs for each cell. First column is\
            cell_barcode. Subsequent columns are PCs. If "", uses pca\
            slot in AnnData.\
            (default: "")')

    parser.add_argument(
        '-cm',
        '--cluster_method',
        action='store',
        dest='cm',
        default='leiden',
        help='Clustering method. Valid options: [leiden|louvain].\
            (default: %(default)s)')

    parser.add_argument('-npc',
                        '--number_pcs',
                        action='store',
                        dest='npc',
                        default=0,
                        type=int,
                        help='Number of PCs to use.\
            (default: maximum number in tsv_pcs file)')

    parser.add_argument('-r',
                        '--resolution',
                        action='store',
                        dest='r',
                        default=1.0,
                        type=float,
                        help='Resolution.\
            (default: %(default)s)')

    parser.add_argument(
        '-nn',
        '--number_neighbors',
        action='store',
        dest='number_neighbors',
        default=25,
        type=int,
        help='Number of neighbors. If <= 0, sets to the number of unique\
            "experiment_id".\
            (default: %(default)s)')

    parser.add_argument(
        '--force_recalculate_neighbors',
        action='store_true',
        dest='calculate_neighbors',
        default=False,
        help='Calculate neighbor graph even if it already exists in the\
            AnnData (which it my do so if you already ran BBKNN).\
            (default: %(default)s)')

    parser.add_argument('-ncpu',
                        '--number_cpu',
                        action='store',
                        dest='ncpu',
                        default=4,
                        type=int,
                        help='Number of CPUs to use.\
            (default: %(default)s)')

    parser.add_argument(
        '-of',
        '--output_file',
        action='store',
        dest='of',
        default='',
        help='Basename of output files, assuming output in current working \
            directory.\
            (default: <h5_anndata>-<tsv_pcs>-clustered)')

    options = parser.parse_args()

    # Fixed settings.
    verbose = True

    # Scanpy settings
    sc.settings.figdir = os.getcwd()  # figure output directory to match base.
    sc.settings.n_jobs = options.ncpu  # number CPUs
    # sc.settings.max_memory = 500  # in Gb
    # sc.set_figure_params(dpi_save = 300)

    # Get the out file base.
    out_file_base = options.of
    if out_file_base == '':
        out_file_base = '{}-{}-clustered'.format(
            os.path.basename(options.h5.rstrip('h5ad').rstrip('.')),
            os.path.basename(options.pc.rstrip('tsv.gz').rstrip('.')))

    # Load the AnnData file.
    adata = sc.read_h5ad(filename=options.h5)

    # Load the PCs.
    if options.pc == '':
        df_pca = pd.DataFrame(
            data=adata.obsm['X_pca'],
            index=adata.obs.index,
            columns=[
                'PC{}'.format(x)
                for x in range(1, adata.obsm['X_pca'].shape[1] + 1)
            ])
    else:
        df_pca = pd.read_csv(options.pc, sep='\t', index_col='cell_barcode')

    # Check that nPCs is valid.
    n_pcs = options.npc
    if n_pcs == 0:
        n_pcs = len(df_pca.columns)
    elif n_pcs > len(df_pca.columns):
        raise Exception(
            '--number_pcs ({}) is > than n_pcs in --tsv_pcs ({}).'.format(
                n_pcs, len(df_pca.columns)))
    if verbose:
        print('Using {} PCs.'.format(n_pcs))

    # Add the reduced dimensions to the AnnData object.
    adata.obsm['X_pca'] = df_pca.loc[adata.obs.index, :].values.copy()

    # Check if BBKNN
    # Calculate neighbors for on the specified PCs.
    # By default saved to adata.uns['neighbors']
    #
    # First, however, check to see if adata.uns['neighbors'] already exists
    # ...and unless the user tells us not to, use that slot, not calculating
    # neighbors. This default behaviour is to accommodate the instance when
    # bbknn has been run on the data.
    if 'neighbors' not in adata.uns or options.calculate_neighbors:
        number_neighbors = options.number_neighbors
        if number_neighbors <= 0:
            number_neighbors = len(adata.obs['experiment_id'].cat.categories)
        sc.pp.neighbors(adata,
                        use_rep='X_pca',
                        n_neighbors=options.number_neighbors,
                        n_pcs=n_pcs,
                        copy=False,
                        random_state=0)
    else:
        warnings.warn('WARNING: found neighbors slot in adata.uns. {}'.format(
            'Not calculating neighbors (ignoring n_neighbors parameter).'))
        # If we are using the pre-calculated neighbors drop npcs note.
        if 'n_pcs' in adata.uns['neighbors']['params']:
            n_pcs = adata.uns['neighbors']['params']['n_pcs']

    # Run the clustering, choosing either leiden or louvain algorithms
    cluster_method = options.cm
    cluster_resolution = options.r
    if cluster_method == 'leiden':
        sc.tl.leiden(adata,
                     resolution=cluster_resolution,
                     key_added=cluster_method,
                     copy=False,
                     random_state=0)
    elif cluster_method == 'louvain':
        sc.tl.louvain(adata,
                      flavor='vtraag',
                      resolution=cluster_resolution,
                      key_added=cluster_method,
                      copy=False,
                      random_state=0)
    else:
        raise Exception('Invalid --cluster_method: {}.'.format(cluster_method))
    # Also save the clusters to the same spot so we know where they will be.
    adata.uns['cluster'] = adata.uns[cluster_method]
    adata.uns['cluster']['params']['method'] = cluster_method
    adata.obs['cluster'] = adata.obs[cluster_method]

    # Print the final number of clustered discrovered
    if verbose:
        print('{} clusters identified'.format(
            adata.obs[cluster_method].drop_duplicates().shape[0]))

    # Save the clustered data to a data frame.
    cell_clustering_df = adata.obs[[cluster_method]].copy()
    cell_clustering_df.columns = ['cluster']
    cell_clustering_df['cluster_method'] = cluster_method
    cell_clustering_df['cluster_resolution'] = cluster_resolution
    cell_clustering_df.to_csv('{}.tsv.gz'.format(out_file_base),
                              sep='\t',
                              index=True,
                              quoting=csv.QUOTE_NONNUMERIC,
                              index_label='cell_barcode',
                              na_rep='',
                              compression='gzip')

    adata.write('{}.h5ad'.format(out_file_base), compression='gzip')

    # Save dotplot of number of cells for each sample in each cluster
    df = adata.obs[['experiment_id', 'cluster']]
    df = df.groupby(['cluster',
                     'experiment_id']).size().reset_index(name='nr_cells')
    gplt = plt9.ggplot(df, plt9.aes(x='experiment_id', y='cluster'))
    gplt = gplt + plt9.geom_point(plt9.aes(size='nr_cells', color='nr_cells'))
    gplt = gplt + plt9.theme(axis_text_x=plt9.element_text(angle=90))

    gplt.save('dotplot_sample-{}.png'.format(out_file_base),
              dpi=300,
              width=4,
              height=4)
def test_missing_values():
    p = (ggplot(df_missing, aes(x='x')) + geom_line(aes(y='y1'), size=2))

    with pytest.warns(UserWarning):
        assert p == 'missing_values'
Esempio n. 56
0
        combi_miner = Modl(
            X,
            multiple_overlap_max_number_of_combinations=DEFAULT_QUERIED_ATOMS,
            nb_threads=N_THREADS)
        modl_interesting_combis = combi_miner.find_interesting_combinations()
        stop_time = time.time()

        df_bench = df_bench.append(
            {
                'nb_sets': size,
                'time': stop_time - start_time
            },
            ignore_index=True)

df_bench['nb_sets'] = df_bench['nb_sets'].astype(int)
p = (ggplot(df_bench) + aes('nb_sets', 'time') + geom_point() +
     geom_smooth(span=.3) + scale_x_continuous() + xlab("Number of sets") +
     ylab("Time (seconds)"))
p.save(filename=OUTPUT_ROOT + "scaling_fig1")

## Number of queried words
df_bench = pd.DataFrame(columns=['step', 'time'])

X = test_data_for_modl(nflags=NFLAGS,
                       number_of_sets=DEFAULT_N_SETS,
                       noise=NOISE)

for _ in REPEATS:
    for step in STEPS:

        start_time = time.time()
Esempio n. 57
0
def test_aes_inheritance():
    with pytest.raises(PlotnineError):
        p = (ggplot(df, aes('x', 'y', yintercept='yintercept')) +
             geom_point() + geom_hline(size=2))
        p.draw_test()
Esempio n. 58
0
def test_aes_overwrite():
    with pytest.warns(PlotnineWarning):
        geom_hline(aes(color='y'), yintercept=2)
Esempio n. 59
0
 def test_gls(self):
     p = self.p + geom_smooth(aes(y='y_noisy'), method='gls')
     p.draw_test()
Esempio n. 60
0
def plot_portfolio(portfolio_df, figure_size=(12, 4), line_size=1.5, date_text_size=7):
    """
    Given a daily snapshot of virtual purchases plot both overall and per-stock
    performance. Return a tuple of figures representing the performance as inline data.
    """
    assert portfolio_df is not None
    #print(portfolio_df)
    portfolio_df["date"] = pd.to_datetime(portfolio_df["date"])
    avg_profit_over_period = (
        portfolio_df.filter(items=["stock", "stock_profit"]).groupby("stock").mean()
    )
    avg_profit_over_period["contribution"] = [
        "positive" if profit >= 0.0 else "negative"
        for profit in avg_profit_over_period.stock_profit
    ]
    # dont want to override actual profit with average
    avg_profit_over_period = avg_profit_over_period.drop("stock_profit", axis="columns")
    portfolio_df = portfolio_df.merge(
        avg_profit_over_period, left_on="stock", right_index=True, how="inner"
    )
    # print(portfolio_df)

    # 1. overall performance
    df = portfolio_df.filter(
        items=["portfolio_cost", "portfolio_worth", "portfolio_profit", "date"]
    )
    df = df.melt(id_vars=["date"], var_name="field")
    plot = (
        p9.ggplot(df, p9.aes("date", "value", group="field", color="field"))
        + p9.labs(x="", y="$ AUD")
        + p9.geom_line(size=1.5)
        + p9.facet_wrap("~ field", nrow=3, ncol=1, scales="free_y")
        + p9.theme(
            axis_text_x=p9.element_text(angle=30, size=date_text_size),
            figure_size=figure_size,
            legend_position="none",
        )
    )
    overall_figure = plot_as_inline_html_data(plot)

    df = portfolio_df.filter(
        items=["stock", "date", "stock_profit", "stock_worth", "contribution"]
    )
    melted_df = df.melt(id_vars=["date", "stock", "contribution"], var_name="field")
    all_dates = sorted(melted_df["date"].unique())
    df = melted_df[melted_df["date"] == all_dates[-1]]
    df = df[df["field"] == "stock_profit"]  # only latest profit is plotted
    df["contribution"] = [
        "positive" if profit >= 0.0 else "negative" for profit in df["value"]
    ]

    # 2. plot contributors ie. winners and losers
    plot = (
        p9.ggplot(df, p9.aes("stock", "value", fill="stock"))
        + p9.geom_bar(stat="identity")
        + p9.labs(x="", y="$ AUD")
        + p9.facet_grid("contribution ~ field", scales="free_y")
        + p9.theme(legend_position="none", figure_size=figure_size)
    )
    profit_contributors = plot_as_inline_html_data(plot)

    # 3. per purchased stock performance
    plot = (
        p9.ggplot(melted_df, p9.aes("date", "value", group="stock", colour="stock"))
        + p9.xlab("")
        + p9.geom_line(size=1.0)
        + p9.facet_grid("field ~ contribution", scales="free_y")
        + p9.theme(
            axis_text_x=p9.element_text(angle=30, size=date_text_size),
            figure_size=figure_size,
            panel_spacing=0.5,  # more space between plots to avoid tick mark overlap
            subplots_adjust={"right": 0.8},
        )
    )
    stock_figure = plot_as_inline_html_data(plot)
    return overall_figure, stock_figure, profit_contributors