コード例 #1
0
def main():
    params = load_params("./params.txt")
    test_data = load_data(datetime.date(2017, 12, 20))
    sim_data = simulate(test_data[0], params)
    dif_data = [0 for i in range(0, len(test_data[0]))]
    for i in range(0, len(dif_data) - 3):
        dif_data[i] = test_data[0][i + 3] - sim_data[i]

    df = pandas.DataFrame({
        't': range(6, len(test_data[0])),
        'price': test_data[0][6:]
    })
    df2 = pandas.DataFrame({
        't': range(12,
                   len(sim_data) - 12),
        'price': sim_data[12:-12]
    })
    df3 = pandas.DataFrame({
        't': range(12,
                   len(sim_data) - 12),
        'price': dif_data[12:-12]
    })
    a = ggplot.ggplot(ggplot.aes(x='t', y='price'), data=df) \
            + ggplot.geom_line()
    b = ggplot.ggplot(ggplot.aes(x='t', y='price'), data=df2) \
            + ggplot.geom_line(color='blue')
    c = ggplot.ggplot(ggplot.aes(x='t', y='price'), data=df3) \
            + ggplot.geom_line(color='blue')
    a.save('hoge.png')
    b.save('hoge2.png')
    c.save('hoge3.png')
コード例 #2
0
    def plot(self, what='cumulative_payouts', include_ci=True):
        import ggplot as gg #This is hacky ... need to DRY out the imports

        if what == 'cumulative_payouts':
            plt = self._plot_cumulative_payouts(include_ci=include_ci)
        elif what == 'avg_accuracy':
            plt = self._plot_avg_accuracy(include_ci=include_ci)
        elif what == 'all':
            summary = self.summary()
            p1 = self._plot_cumulative_payouts(include_ci=include_ci, summary=summary)
            p2 = self._plot_avg_accuracy(include_ci=include_ci, summary=summary)
            d1 = p1.data
            d2 = p2.data
            d1['Outcome'] = d1['AverageCumulativePayout']
            d2['Outcome'] = d2['AverageAccuracy']
            d1['Plot'] = 'Cumulative Payouts'
            d2['Plot'] = 'Average Accuracy'
            df = d1.append(d2, ignore_index=True)

            if include_ci:
                plt = gg.ggplot(gg.aes(x='Round', y='Outcome', ymin='ymin', ymax='ymax'), data=df) + \
                    gg.geom_area(alpha=0.5)
            else:
                plt = gg.ggplot(gg.aes(x='Round', y='Outcome'), data=df)

            plt += gg.facet_grid('Plot', scales='free')
        else:
            raise ValueError('%s is not a valid option' % what)

        return plt + gg.geom_line()
コード例 #3
0
ファイル: util_ggplot_trend.py プロジェクト: wingr/pywing
def plot_trend_season(dates, ndf_domain, x, x_trend, season, my_domain):
    # ---------------------- Prepare Data Frame ----------------------- #
    df_domain = pd.DataFrame(ndf_domain, columns=['Date', 'Volume'])
    df_domain['Date'] = dates

    x_lbl = ['Observed Volume' for i in xrange(len(x))]
    xt_lbl = ['Overall Trend' for i in xrange(len(x_trend))]
    xs_lbl = ['Repeat Sending Trend' for i in xrange(len(season))]
    col3 = pd.DataFrame(x_lbl+xt_lbl+xs_lbl)

    df_plot = pd.concat( (df_domain, col3), axis=1)
    df_plot.columns = ['Date', 'Volume', 'Data']
    
    
    # ---------------------- Plot Decomposition ----------------------- #
    p = ggplot.ggplot(aes(x='Date', y='Volume', color='Data'), data=df_plot) + \
        ggplot.geom_line(color='blue', size=2) + \
        ggplot.scale_x_date(labels = date_format("%Y-%m-%d"), breaks="1 week") + \
        ggplot.xlab("Week (Marked on Mondays)") + \
        ggplot.ylab("Message Vol") + \
        ggplot.ggtitle("%s Message Volume by Week" % my_domain) + \
        ggplot.facet_grid('Data', scales='free_y') + \
        ggplot.theme_seaborn()

    return p
コード例 #4
0
ファイル: plotting_helper.py プロジェクト: namangupta9/thesis
def plot_matches(df_in,
                 date,
                 filename_out,
                 x_var='date_time',
                 y_var="shorthand_search_vol"):
    """
    Plot y-var and save based on specified variables.

    Assumes that df has already been filtered using dplyr's sift mechanism.
    Also assumes that a date has been passed in.
    """
    # basic data processing for viz
    df_in['date_time'] = date + " " + df_in['time'].astype(str)
    df_in['date_time'] = pd.to_datetime(df_in['date_time'],
                                        errors="coerce",
                                        infer_datetime_format=True)

    # build layers for plot
    p = ggplot(aes(x=x_var, y=y_var, group="match_id", color="match_id"),
               data=df_in)
    p += geom_line(size=2)

    # informative
    p += labs(x="time (gmt)", y="search volume (scaled to 100)")
    # p += ggtitle("man. city (h) vs. chelsea (a)\naug. 8 '16, etihad stadium")
    p += scale_x_date(labels=date_format("%H:%M:%S"), date_breaks="30 minutes")

    # visual
    t = theme_gray()
    t._rcParams['font.size'] = 8
    t._rcParams['font.family'] = 'monospace'
    p += t

    # done
    p.save(filename_out, width=16, height=8)
コード例 #5
0
def plot_line(X,y,title=None,labelx=None,labely=None,save=False, colors=None):
    '''
    Show on screen a line plot. Can save to a .pdf file too if specified.
    
    X,y - 
    '''
    df = pandas.DataFrame()
    
    if (title!=None):
        img_title = title.replace(" ","").replace(".","-") + ".pdf"
    
    df['X'] = X 
    for i in range(y.shape[1]):
        df[str(i)] = y.iloc[:,i].values
    
    if colors is None:
        colors = list(dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())

    df = df.iloc[0:df.shape[0]-1, :]    
    p = ggplot(df, aes(x='X'))
    
    for i in range(y.shape[1]):
         if colors not in X.columns.values:
            p = p + geom_line(aes(y=str(i),color = colors[i]))
         else:
            p = p + geom_point(aes(y=str(i),color = colors))
    
    p = p + xlab(labelx) + ylab(labely) + ggtitle(title)
    
    if(save):
        p.save(img_title)
    else:   
        return p
コード例 #6
0
def plotSetOfArrays(arrays, names, fileName):
    IDS = np.linspace(0, 1, arrays[0].shape[0])
    A = IDS.reshape(arrays[0].shape[0], 1)
    for i in range(0, len(arrays)):
        A = np.concatenate((A, arrays[i]), axis=1)
    Data = pd.DataFrame(A, columns=['noise'] + names)
    Melted = pd.melt(Data, id_vars=['noise'])

    pv = ggplot.ggplot(ggplot.aes(x='noise', y='value', colour='variable'),
                       data=Melted) + ggplot.geom_line() + ggplot.geom_point()
    ggplot.ggsave(pv, './IMG/' + fileName)

    output_file("iou_scores.html", title="correlation.py example")

    figure(tools="pan,wheel_zoom,box_zoom,reset,previewsave")
    hold()
    line(IDS, arrays[0][:, 0], color='#A6CEE3', legend=names[0])
    line(IDS, arrays[1][:, 0], color='#1F78B4', legend=names[1])
    line(IDS, arrays[2][:, 0], color='#B2DF8A', legend=names[2])
    line(IDS, arrays[3][:, 0], color='#33A02C', legend=names[3])
    line(IDS, arrays[4][:, 0], color='#fb9a99', legend=names[4])

    curplot().title = "Minimum IOU"
    grid().grid_line_alpha = 0.3
    show()
コード例 #7
0
 def plot_roc(self, experiment_type, to_plot):
     # turn this to string for categorical colour scheme
     to_plot.loc[:, "parameter"] = [str(par) for par in to_plot.loc[:, "parameter"]]
     p = gg.ggplot(data = to_plot, aesthetics = gg.aes(x = "FPR", y = "TPR", colour = "parameter")) + \
         gg.geom_line(gg.aes(x = "FPR", y = "TPR", colour = "parameter")) + \
         gg.ggtitle(experiment_type) + gg.xlab("FPR") + gg.ylab("TPR")
     gg.ggsave(filename = self.results_path + experiment_type + "_" + self.mode + ".png", plot = p)
     return
コード例 #8
0
def plot_cost_history(alpha, cost_history):

   cost_df = pandas.DataFrame({
      'Cost_History': cost_history,
      'Iteration': range(len(cost_history))
   })
   return gp.ggplot(cost_df, gp.aes('Iteration', 'Cost_History')) +\
          gp.geom_point() + gp.geom_line() + gp.ggtitle('Cost History for alpha = %.3f' % alpha )
コード例 #9
0
def plot_weather_data(df):
	df.DATEn = pd.to_datetime(df.DATEn)
	grouped = df.groupby('DATEn', as_index=False).sum()
	grouped.index.name = 'DATEn'
	plot = gp.ggplot(grouped, gp.aes(x='DATEn', y='EXITSn_hourly'))
	plot += gp.geom_line()
	plot += gp.ggtitle('Subway Ridership by Day')
	plot += gp.xlab('Date')
	plot += gp.ylab('Exits')
	return plot
コード例 #10
0
def plot_weather_data(df):  # older version
	df.DATEn = pd.to_datetime(df.DATEn)
	grouped = df.groupby('DATEn', as_index=False).sum()
	grouped.index.name = 'DATEn'
	
	p_title = 'Subway Ridership by Hour vs Raining'
	p_xlab = 'Hour of the Day'
	p_ylab = 'Subway Entries'
	plot = gp.ggplot(grouped, gp.aes(x='DATEn', y='EXITSn_hourly')) + gp.geom_line() + gp.ggtitle(p_title) + gp.xlab(p_xlab) + gp.ylab(p_ylab)
	return plot
コード例 #11
0
def lineplot_compare(filename):
    df = pd.read_csv(filename)
    gg = (
        gp.ggplot(df, gp.aes(x="yearID", y="HR", color="teamID"))
        + gp.geom_point()
        + gp.geom_line()
        + gp.ggtitle("Homeruns by Year by Team")
        + gp.xlab("Homeruns")
        + gp.ylab("Year")
    )
    return gg
コード例 #12
0
def lineplot(hr_year_csv):
    df = pandas.read_csv(hr_year_csv)
    gg = (
        gp.ggplot(df, gp.aes(x="yearID", y="HR"))
        + gp.geom_point(color="red")
        + gp.geom_line(color="red")
        + gp.ggtitle("Homeruns by Year")
        + gp.xlab("Homeruns")
        + gp.ylab("Year")
    )
    return gg
コード例 #13
0
 def show_price(self):
     price_table = pd.DataFrame(
         {
             'time_step': range(len(self.price)),
             'price': self.price
         },
         columns=['time_step', 'price'])
     p = gp.ggplot(gp.aes(x='time_step', y='price'), data=price_table) + \
                 gp.geom_line() + \
                 gp.xlim(0, len(self.price)) + \
                 gp.ggtitle('Price trend')
     print(p)
コード例 #14
0
 def show_asset(self):
     asset_table = pd.DataFrame(
         {
             'time_step': range(len(self.asset_history)),
             'asset': self.asset_history
         },
         columns=['time_step', 'asset'])
     p = gp.ggplot(gp.aes(x='time_step', y='asset'), data = asset_table) + \
                gp.geom_line() + \
                gp.xlim(0, len(self.asset_history)) + \
                gp.ggtitle('Asset trend')
     print(p)
コード例 #15
0
ファイル: plot.py プロジェクト: caomw/RecipeWatch
def plotHistogramMeans(hist,fileName):
  num_clust = hist.shape[0]
  IDS = np.mat(range(0,num_clust))
  IDS = IDS.reshape(num_clust,1)

  histD = np.concatenate((IDS,hist),axis=1)

  Data = pd.DataFrame(histD,columns = ['ID']+range(0,hist.shape[1]))
  Melted = pd.melt(Data,id_vars=['ID'])
  pv =  ggplot.ggplot( ggplot.aes(x='variable',y='value'),data=Melted) +  ggplot.geom_line()  + ggplot.facet_wrap("ID")
  print "Saving mean histograms"
  ggplot.ggsave(pv,'./IMG/'+fileName)
コード例 #16
0
 def plot(self):
     dat = []
     for traj in self.trajs:
         rec = traj.to_np_array()
         rec_len = rec.shape[0]
         label = [traj.name] * rec_len
         lb_array = np.array(label)
         lb_array = np.expand_dims(lb_array, 1)
         dat.append(np.concatenate([rec, lb_array], axis=1))
     df_data = np.concatenate(dat, axis=0)
     df = pd.DataFrame(data=df_data, columns=['ep', 'value', 'type'])
     p = gp.ggplot(gp.aes(x='ep', y='value', color='type'), data=df) + \
         gp.geom_line() + gp.ggtitle(self.title)
コード例 #17
0
def plotHistogramMeans(hist, fileName):
    num_clust = hist.shape[0]
    IDS = np.mat(range(0, num_clust))
    IDS = IDS.reshape(num_clust, 1)

    histD = np.concatenate((IDS, hist), axis=1)

    Data = pd.DataFrame(histD, columns=['ID'] + range(0, hist.shape[1]))
    Melted = pd.melt(Data, id_vars=['ID'])
    pv = ggplot.ggplot(
        ggplot.aes(x='variable', y='value'),
        data=Melted) + ggplot.geom_line() + ggplot.facet_wrap("ID")
    print "Saving mean histograms"
    ggplot.ggsave(pv, './IMG/' + fileName)
コード例 #18
0
def lineplot_compare(filename):  # Cleaner version with string vars
    df = pd.read_csv(filename)
    p_title = "Homeruns by Year by Team"
    p_xlab = "Homeruns"
    p_ylab = "Year"
    gg = (
        gp.ggplot(df, gp.aes(x="yearID", y="HR", color="teamID"))
        + gp.geom_point()
        + gp.geom_line()
        + gp.ggtitle(p_title)
        + gp.xlab(p_xlab)
        + gp.ylab(p_ylab)
    )
    return gg
コード例 #19
0
    def line_chart(self, conn, column1, column2, table_chosen, title):

        data_df = dfile.double_selector(conn=conn,
                                        table=table_chosen,
                                        col1=column1,
                                        col2=column2)

        line_plot = ggplot(
            aes(y=column2, x=column1),
            data=data_df) + geom_line() + theme_gray() + labs(title=title)
        now = datetime.datetime.now()
        b = now
        print(b)
        print(b - a)
        print(line_plot)
コード例 #20
0
    def _plot_cumulative_payouts(self, include_ci=True, summary=None):
        import ggplot as gg
        if summary is None:
            summary = self.summary()

        df = pd.DataFrame({'AverageCumulativePayout': summary['CumulativePayout']['Avg'],
                           'Std': summary['CumulativePayout']['Std'],
                           'Round': range(self.n_rounds)})
        if include_ci:
            df['ymin'] = df.AverageCumulativePayout - 1.96 * df.Std
            df['ymax'] = df.AverageCumulativePayout + 1.96 * df.Std
            plt = gg.ggplot(gg.aes(x='Round', y='AverageCumulativePayout', ymin='ymin', ymax='ymax'), data=df) + \
                  gg.geom_area(alpha=0.5)
        else:
            plt = gg.ggplot(gg.aes(x='Round', y='AverageCumulativePayout'), data=df)

        return plt + gg.geom_line()
コード例 #21
0
def googletrend_command(delta_t, threshold=0.0, inverse=False):
    """the command to run google trend algorithm.

	:param delta_t:   the upper bound for original delta_t parameter
    :param threshold: upper bound for the threshold of differentiating two classes
    :param inverse:   whether to inverse the classifier
	"""
    ## handle filepath and title based on parameter inverse
    filename = "googletrend"
    titlename = "ROC of google trend classifier"
    if inverse:
        filename += "_inverse"
        titlename += " (inverse version)"
    filepath = "./plots/%s.jpg" % filename
    ## generate data first
    data = googletrend.preprocess()
    ## store classifier evaluation metrics into dict
    output = {}
    output['tpr'] = []
    output['fpr'] = []
    output['plot'] = []
    for thre in np.arange(0, threshold + 0.1, 0.1):
        print "==> threshold: %f, inverse: %s" % (thre, inverse)
        for i in xrange(1, int(delta_t)):
            googletrend.algorithm(data, i, thre, inverse)
            tp_rate, fp_rate = googletrend.evaluate(data)
            # print "delta_t: %d, TPR: %f, FPR: %f" % (i, tp_rate, fp_rate)
            output['tpr'].append(tp_rate)
            output['fpr'].append(fp_rate)
            output['plot'].append('thre_' + str(thre))
    ## plot ROC graph
    ## add a y=x baseline for comparison
    output['tpr'].extend([0.0, 1.0])
    output['fpr'].extend([0.0, 1.0])
    output['plot'].extend(['baseline', 'baseline'])
    df = pd.DataFrame(output)
    graph = gg.ggplot(df, gg.aes('fpr', 'tpr', color='plot')) + \
      gg.theme_seaborn() + \
      gg.ggtitle(titlename) + \
         gg.xlab("FPR") + \
         gg.ylab("TPR") + \
         gg.xlim(0.0, 1.0) + \
         gg.ylim(0.0, 1.0) + \
      gg.geom_point() + \
      gg.geom_line()
    gg.ggsave(plot=graph, filename=filepath, width=6, height=6, dpi=100)
コード例 #22
0
def plot_predictions(date_times, actual_values, predictions, match_id,
                     feature_set_in, filename):
    """
    Plot y-var and save based on specified variables.

    Assumes that df has already been filtered using dplyr's sift mechanism.
    Also assumes that a date has been passed in.
    """
    actual_df = pd.DataFrame()
    actual_df['date_time'] = pd.to_datetime(date_times,
                                            errors="coerce",
                                            infer_datetime_format=True)
    actual_df['search_vol'] = actual_values
    actual_df['match_id'] = "actual" + match_id

    predict_df = pd.DataFrame()
    predict_df['date_time'] = pd.to_datetime(date_times,
                                             errors="coerce",
                                             infer_datetime_format=True)
    predict_df['search_vol'] = list(predictions)
    predict_df['match_id'] = "predictedby_" + str(feature_set_in) + match_id

    plotting_df = pd.concat([actual_df, predict_df], axis=0, ignore_index=True)

    # build layers for plot
    p = ggplot(aes(x='date_time',
                   y='search_vol',
                   group="match_id",
                   color="match_id"),
               data=plotting_df)
    p += geom_line(size=2)

    # informative
    p += labs(x="time (gmt)", y="search volume (scaled to 100)")
    # p += ggtitle("man. city (h) vs. chelsea (a)\naug. 8 '16, etihad stadium")
    p += scale_x_date(labels=date_format("%H:%M:%S"), date_breaks="30 minutes")

    # visual
    t = theme_gray()
    t._rcParams['font.size'] = 8
    t._rcParams['font.family'] = 'monospace'
    p += t

    # done
    p.save(filename, width=16, height=8)
コード例 #23
0
def graph1(score_data):
    """ Average score as time goes on;
        Creates and returns graph 1, a line graph. """

    date_column = score_data[0][find_time_stamp(score_data)]

    data = DataFrame(score_data[1:], columns=score_data[0])

    # Get all columns that arlabels = date_format("%Y-%m-%d")e numerical
    # questions so we know what to graph
    num_questions = data.select_dtypes(include=['int64']).columns.values

    # Melt data so that each question is in a seperate row
    new_data = pd.melt(data,
                       id_vars=date_column,
                       value_vars=num_questions,
                       var_name="Question",
                       value_name="Score")

    # Convert date string into an actual date type
    new_data[date_column] = pd.to_datetime(new_data[date_column],
                                           format="%m/%d/%Y")

    # Group all rows with same date and question, and then take the average.
    new_data = new_data.groupby([date_column, 'Question']).mean().reset_index()
    new_data['All'] = "Indiviual Questions"

    new_data2 = new_data.groupby(date_column).mean().reset_index()
    new_data2['Question'] = "All Questions"
    new_data2['All'] = "Average of All Questions"

    new_data = pd.concat([new_data, new_data2])

    new_data[date_column] = new_data[date_column].astype('int64')

    # Create time graph with seperate lines for each question
    ret = ggplot.ggplot(ggplot.aes(x=date_column, y="Score", colour="Question"), new_data) +\
        ggplot.geom_point() +\
        ggplot.geom_line() +\
        ggplot.facet_grid("All") +\
        ggplot.scale_x_continuous(labels=[""], breaks=0) +\
        ggplot.labs(x="Time", y="Average Question Score") +\
        ggplot.ggtitle("Question Scores Over Time")
    return ret
コード例 #24
0
ファイル: app.py プロジェクト: ekalosak/audio-in
def main(log):

    log.debug('initializing app')
    p = pyaudio.PyAudio()

    # Open audio input stream
    stream = p.open(format = FORMAT,
        channels = CHANNELS,
        rate = SAMPLE_RATE,
        input = True,
        frames_per_buffer = CHUNK_SIZE)

    log.debug('opened stream <{}>'.format(stream))
    log.debug('reading audio input at rate <{}>'.format(SAMPLE_RATE))

    recorded = []

    # Start mainloop
    loops = 0
    while True:
        loops += 1
        if loops % 25 == 0: log.debug('recorded <{}> loops'.format(loops))

        # Decode chunks of audio data from the stream
        try:
            data = stream.read(CHUNK_SIZE)
            decoded = np.fromstring(data, 'Float32');
            mx = max(decoded)
            recorded.append(mx)

        # On <C-c>, plot max of recorded data
        except KeyboardInterrupt as ee:
            log.debug('closing stream and ending PyAudio')
            stream.close()
            p.terminate()
            df = pd.DataFrame(columns = ['mx', 'time'])
            df['mx'] = recorded
            df['time'] = range(len(recorded))
            plt = ggplot.ggplot(ggplot.aes(x='time', y='mx'), data=df) +\
                        ggplot.geom_line()
            pdb.set_trace()
            log.debug('quitting')
            sys.exit(1)
コード例 #25
0
    def _plot_avg_accuracy(self, include_ci=True, summary=None):
        import ggplot as gg
        if summary is None:
            summary = self.summary()

        df = pd.DataFrame({'AverageAccuracy': summary['Accuracy']['Avg'], 'Round': range(self.n_rounds)})

        if include_ci:
            from scipy import stats
            succ = df.AverageAccuracy * self.n_sim
            fail = self.n_sim - succ
            interval = stats.beta(succ + 1, fail + 1).interval(0.95)

            df['ymin'] = interval[0]
            df['ymax'] = interval[1]
            plt = gg.ggplot(gg.aes(x='Round', y='AverageAccuracy', ymin='ymin', ymax='ymax'), data=df) + \
                gg.geom_area(alpha=0.5)
        else:
            plt = gg.ggplot(gg.aes(x='Round', y='AverageAccuracy'), data=df)

        return plt + gg.geom_line()
コード例 #26
0
ファイル: output.py プロジェクト: DimeZilla/github_repo_DZ
def data_output(data, chart_title):
		print "Good News! You're data has been returned. I'm happy to show it to you."
		print "Just tell me how you want it - Table or Line Graph?"

		data_output = raw_input("Choose table or line > ")

		if data_output[0].lower() == "t":
			print "Ok, here's your data."
			print data
		elif data_output[0] == "l" or data_output[0].lower() =="g":
			import ggplot as gg 

			plot = gg.ggplot(gg.aes(x='Month, Year', y='Value'), data=data) + \
    			gg.geom_point(color='black') + \
    			gg.geom_line(color='green') + \
    			gg.ggtitle(chart_title) + \
    			gg.xlab("Month, Year") + \
    			gg.ylab("Value") 
    			gg.scale_x_date(breaks = gg.date_breaks('1 month'), labels= gg.date_format("%B"))

			print (plot + gg.theme_xkcd())
			
コード例 #27
0
def lineplot(hr_year_csv):
    # A csv file will be passed in as an argument which
    # contains two columns -- 'HR' (the number of homerun hits)
    # and 'yearID' (the year in which the homeruns were hit).
    #
    # Fill out the body of this function, lineplot, to use the
    # passed-in csv file, hr_year_csv, and create a
    # chart with points connected by lines, both colored 'red',
    # showing the number of HR by year.
    #
    # You will want to first load the csv file into a pandas dataframe
    # and use the pandas dataframe along with ggplot to create your visualization
    #
    # You can check out the data in the csv file at the link below:
    # https://www.dropbox.com/s/awgdal71hc1u06d/hr_year.csv
    #
    # You can read more about ggplot at the following link:
    # https://github.com/yhat/ggplot/
    
    df = pandas.read_csv(hr_year_csv)
    gg = gp.ggplot(df, gp.aes('yearID', 'HR')) + gp.geom_point(color='red') + gp.geom_line(color='red')
    
    return gg
def lineplot(hr_year_csv):
    # Assume that we have a pandas dataframe file called hr_year,
    # which contains two columns -- yearID, and HR.
    #
    # The pandas dataframe contains the number of HR hit in the
    # Major League baseball in each year.  Can you write a function,
    # lineplot, that creates a chart with points connected by lines, both
    # colored 'red', showing the number of HR by year?
    #
    # You can check out the data loaded into the dataframe at the link below:
    # https://www.dropbox.com/s/awgdal71hc1u06d/hr_year.csv

    # your code here

    df = pd.read_csv('hr_year.csv')
    gg = gp.ggplot(df, gp.aes('yearID', 'HR'))
    gg += gp.geom_point(color='red')
    gg += gp.geom_line(color='red')
    gg += gp.ggtitle('Total HRs by Year')
    gg += gp.xlab('Year')
    gg += gp.ylab('HR')

    return gg
def lineplot(hr_year_csv):
    # Assume that we have a pandas dataframe file called hr_year, 
    # which contains two columns -- yearID, and HR.  
    # 
    # The pandas dataframe contains the number of HR hit in the
    # Major League baseball in each year.  Can you write a function,
    # lineplot, that creates a chart with points connected by lines, both
    # colored 'red', showing the number of HR by year?
    #
    # You can check out the data loaded into the dataframe at the link below:
    # https://www.dropbox.com/s/awgdal71hc1u06d/hr_year.csv
    
    # your code here

    df = pd.read_csv('hr_year.csv')
    gg  = gp.ggplot(df, gp.aes('yearID', 'HR'))
    gg += gp.geom_point(color='red')
    gg += gp.geom_line(color='red')
    gg += gp.ggtitle('Total HRs by Year')
    gg += gp.xlab('Year')
    gg += gp.ylab('HR')

    return gg
コード例 #30
0
def lineplot_compare(hr_by_team_year_sf_la_csv):
    # Write a function, lineplot_compare, that will read a csv file
    # called hr_by_team_year_sf_la_csv and plot it using pandas and ggplot2.
    #
    # This csv file has three columns -- yearID, HR, and teamID, 
    # representing the total number of HR hit each year by the SF Giants 
    # and LA Dodgers. Produce a visualization comparing the total HR by 
    # year of the two teams. 
    # 
    # You can see the data in hr_by_team_year_sf_la_csv
    # at the link below:
    # https://www.dropbox.com/s/wn43cngo2wdle2b/hr_by_team_year_sf_la.csv
    #
    # Note that to differentiate between multiple categories on the 
    # same plot in ggplot, we can pass color in with the other arguments
    # to aes, rather than in our geometry functions.
    # 
    # For example, ggplot(data, aes(xvar, yvar, color=category_var)).  This
    # should help you.
    df = pandas.read_csv(hr_by_team_year_sf_la_csv)
    #print(df)
    gg = gp.ggplot(df, gp.aes('yearID', 'HR', color='teamID')) + gp.geom_point() + gp.geom_line()
    return gg
コード例 #31
0
ファイル: crm.py プロジェクト: brockk/clintrials
    def plot_toxicity_probabilities(self, chart_title=None, use_ggplot=False):
        """ Plot prior and posterior dose-toxicity curves.

        :param chart_title: optional chart title. Default is fairly verbose
        :type chart_title: str
        :param use_ggplot: True to use ggplot, else matplotlib
        :type use_ggplot: bool
        :return: plot of toxicity curves

        """

        if not chart_title:
            chart_title = "Prior (dashed) and posterior (solid) dose-toxicity curves"
            chart_title = chart_title + "\n"

        if use_ggplot:
            from ggplot import (ggplot, ggtitle, geom_line, geom_hline, aes, ylim)
            import numpy as np
            import pandas as pd
            data = pd.DataFrame({'Dose level': self.dose_levels(),
                                 'Prior': self.prior,
                                 'Posterior': self.prob_tox(),
                                 #                      'Lower': crm.get_tox_prob_quantile(0.05),
                                 #                      'Upper': crm.get_tox_prob_quantile(0.95)
                                 })
            var_name = 'Type'
            value_name = 'Probability of toxicity'
            melted_data = pd.melt(data, id_vars='Dose level', var_name=var_name, value_name=value_name)
            # melted_data['LineType'] =  np.where(melted_data.Type=='Posterior', '--', np.where(melted_data.Type=='Prior', '-', '..'))
            # melted_data['LineType'] =  np.where(melted_data.Type=='Posterior', '--', np.where(melted_data.Type=='Prior', '-', '..'))
            # melted_data['Col'] =  np.where(melted_data.Type=='Posterior', 'green', np.where(melted_data.Type=='Prior', 'blue', 'yellow'))
            # np.where(melted_data.Type=='Posterior', '--', '-')

            p = ggplot(melted_data, aes(x='Dose level', y=value_name, linetype=var_name)) + geom_line() \
                + ggtitle(chart_title) + ylim(0, 1) + geom_hline(yintercept=self.target, color='black')
            # Can add confidence intervals once I work out linetype=??? in ggplot

            return p
        else:
            import matplotlib.pyplot as plt
            import numpy as np
            dl = self.dose_levels()
            prior_tox = self.prior
            post_tox = self.prob_tox()
            post_tox_lower = self.get_tox_prob_quantile(0.05)
            post_tox_upper = self.get_tox_prob_quantile(0.95)
            plt.plot(dl, prior_tox, '--', c='black')
            plt.plot(dl, post_tox, '-', c='black')
            plt.plot(dl, post_tox_lower, '-.', c='black')
            plt.plot(dl, post_tox_upper, '-.', c='black')
            plt.scatter(dl, prior_tox, marker='x', s=300, facecolors='none', edgecolors='k')
            plt.scatter(dl, post_tox, marker='o', s=300, facecolors='none', edgecolors='k')
            plt.axhline(self.target)
            plt.ylim(0, 1)
            plt.xlim(np.min(dl), np.max(dl))
            plt.xticks(dl)
            plt.ylabel('Probability of toxicity')
            plt.xlabel('Dose level')
            plt.title(chart_title)

            p = plt.gcf()
            phi = (np.sqrt(5) + 1) / 2.
            p.set_size_inches(12, 12 / phi)
コード例 #32
0
# -*- coding: utf-8 -*-
from ggplot import ggplot, aes, geom_point, geom_line, ggtitle, xlab, ylab

data = []
xvar = 'X'
yvar = 'Y'

print ggplot(
    data,
    aes(x='yearID', y='HR')) + \
      geom_point(color='red') + \
      geom_line(color='red') + \
      ggtitle('Number of HR by year') + \
      xlab('Year') + \
      ylab('Number of HR')
コード例 #33
0
# plot with ggplot

import ggplot as gg

# ggplot needs data to be in Pandas

data = pd.DataFrame(
        {"train": accuracies, 
         "validation": accuracies_val,
         "epoch": range(len(accuracies)),
         })
data_melted = data.melt(id_vars="epoch")

p = gg.ggplot(data_melted, gg.aes(x="epoch", y="value", color="variable"))
p = p + gg.geom_point() + gg.geom_line()
p

#%%
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, f1_score

#%%
# Evaluate model (last epoch)
accuracy_FINAL = model.evaluate(x_test, y_test)[1]

#%%

# Test accuracy with sklearn

y_predicted = model.predict(x_test)
y_predicted_argmax = np.asarray( y_predicted.argmax(axis=-1) )
コード例 #34
0
ファイル: main.py プロジェクト: aragilar/odes
data = []
for method in methods:
    for model in models:
        for rtol in rtols:
            print('method: {} model: {} rtol: {}'.format(method.name, model.name, rtol), end='')

            # Run
            tic = time.time()
            result = method(model, rtol)
            toc = time.time() - tic

            # Compare to gold standard
            standard = gold_standards[model.name]
            diff = result - standard.values
            max_rel_diff = np.max(diff/standard.max)

            # Append to table
            record = (method.name, model.name, rtol, max_rel_diff, toc)
            print(' err: {} toc: {}'.format(max_rel_diff, toc))
            data.append(record)


data = DataFrame(data, columns=['method', 'model', 'rtol', 'err', 'time'])

print(gg.ggplot(data, gg.aes(x='err', y='time', color='method'))
      + gg.geom_point(size=60.0)
      + gg.geom_line()
      + gg.scale_x_log()
      + gg.scale_y_log()
      + gg.xlim(1e-10, 1e-2))
コード例 #35
0
ファイル: plot.py プロジェクト: caomw/RecipeWatch
def plotSetOfArrays(arrays,names,fileName):
  IDS = np.linspace(0,1,arrays[0].shape[0])
  A = IDS.reshape(arrays[0].shape[0],1)
  for i in range(0,len(arrays)):
    A = np.concatenate((A,arrays[i]),axis=1)
  Data = pd.DataFrame(A,columns = ['noise']+names)
  Melted = pd.melt(Data,id_vars=['noise'])

  pv = ggplot.ggplot(ggplot.aes(x='noise', y='value', colour='variable'), data=Melted) +  ggplot.geom_line() + ggplot.geom_point()
  ggplot.ggsave(pv,'./IMG/'+fileName)

  output_file("iou_scores.html", title="correlation.py example")

  figure(tools="pan,wheel_zoom,box_zoom,reset,previewsave")
  hold()
  line(IDS, arrays[0][:,0], color='#A6CEE3', legend=names[0])
  line(IDS, arrays[1][:,0], color='#1F78B4', legend=names[1])
  line(IDS, arrays[2][:,0], color='#B2DF8A', legend=names[2])
  line(IDS, arrays[3][:,0], color='#33A02C', legend=names[3])
  line(IDS, arrays[4][:,0], color='#fb9a99', legend=names[4])

  curplot().title = "Minimum IOU"
  grid().grid_line_alpha=0.3
  show()
コード例 #36
0
def plot_transmission_results(tx_results, percentage_decline, save_path,
                              path_names):

    #%% what are inputs?

    # transmission results
    # There'll be a folder called 'Runs prepared for ...'
    # all the folders inside that folder will have a CEPAC results folder.
    # tx_data is a dictionary and will have two keys, 'monthly' and 'popstats'
    # 'monthly' key will only have primary transmissions data
    tx_data = deepcopy(tx_results)
    t = 120
    total_var = 3
    total_val = 4
    # percentage decline
    # this is also dictionary of percentage decline values for each folder
    # having cepac results

    # save_path eaxact folder where you want to save your images

    # path_names will have paths to transmissions and sensitivity directories

    #%% plot percentage decline

    # geberate an environment object first
    # lets go for line plot
    data_plot = pd.DataFrame(
        columns=['x', 'Percentage decline', 'Transmissions', 'Variable'],
        index=range(0, total_var * total_val))
    data_in = pd.read_excel(
        os.path.join(path_names['transmission'], 'Input files',
                     'transmission_rate_multiplier_required_inputs.xlsx'))
    col = [
        'Incidence rate per 100 PY specific to high-risk group 1',
        'HIV uninfected individuals in high-risk group 1',
        'HIV infected individuals in high-risk group 1'
    ]
    col_adj = ['Incidence', 'Uninfected', 'Infected']
    data_in[col[0]] = data_in[col[0]].round(1)
    base_val = [np.float64(0.9), 2960000, 136400]
    y1_values = {col[0]: [], col[1]: [], col[2]: []}
    for var in percentage_decline:
        if 'HIV+' in var:
            y1_values[col[2]].append(percentage_decline[var])
        elif 'HIV-' in var:
            y1_values[col[1]].append(percentage_decline[var])
        elif 'Incidence' in var:
            y1_values[col[0]].append(percentage_decline[var])

    for i in range(len(col)):
        idx = data_in.loc[data_in.loc[:, col[i]] != base_val[i],
                          col[i]].index.values[0]
        data_plot.loc[idx - 1:idx + 3 - 1, 'x'] = data_in.loc[idx:idx + 3,
                                                              col[i]].values
        data_plot.loc[idx - 1:idx + 3 - 1, 'Variable'] = col_adj[i]
        data_plot.loc[idx - 1:idx + 3 - 1,
                      'Percentage decline'] = y1_values[col[i]]

    # plot
    df_float = data_plot.loc[data_plot.loc[:, 'Percentage decline'] <= 200, :]
    (ggplot(aes(x='x', y='Percentage decline'), df_float) + geom_line() +
     facet_wrap('Variable', scales='free')).save(
         os.path.join(save_path, 'Percentage decline'))
    del df_float

    #%% visualizing transmissions
    # index = range(time * number of values for each variable * number of variables)
    def set_abc(run, var_idx, var_name, var_value_idx):

        # set variable names
        data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                         'Variable'] = var_name

        # set variable value
        data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                         'Value'] = data_plot.loc[
                             data_plot.loc[:, 'Variable'] == var_name,
                             'x'].values[var_value_idx]

        if 'RunA' in run:
            data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                             'RunA tx'] = tx_data[var]['monthly'][run][
                                 'transmissions'].iloc[0:t].values
        elif 'RunB' in run:
            data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                             'RunB tx'] = tx_data[var]['monthly'][run][
                                 'transmissions'].iloc[0:t].values
        elif 'RunC' in run:
            data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                             'RunC tx'] = tx_data[var]['monthly'][run][
                                 'transmissions'].iloc[0:t].values

    data_plot_tx = pd.DataFrame(
        index=range(t * total_var * total_val),
        columns=['Variable', 'Value', 'RunA tx', 'RunB tx', 'RunC tx'])
    var_idx = -1
    var_val_idx = [-1, -1, -1]
    for var in tx_data:
        var_idx += 1
        if 'HIV+' in var:
            var_val_idx[2] += 1
            var_name = col_adj[2]
            for run in tx_data[var]['monthly']:
                set_abc(run, var_idx, var_name, var_val_idx[2])
        elif 'HIV-' in var:
            var_val_idx[1] += 1
            var_name = col_adj[1]
            for run in tx_data[var]['monthly']:
                set_abc(run, var_idx, var_name, var_val_idx[1])
        elif 'Incidence' in var:
            var_val_idx[0] += 1
            var_name = col_adj[0]
            for run in tx_data[var]['monthly']:
                set_abc(run, var_idx, var_name, var_val_idx[0])
        else:
            continue

    data_plot_tx['t'] = 0
    t_float = -1
    for row in data_plot_tx.index:
        if t_float == t - 1:
            t_float = -1
        t_float += 1
        data_plot_tx.loc[row, 't'] = t_float

    #%% plots for individual runs
    run_col = ['RunA tx', 'RunB tx', 'RunC tx']
    inci = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == 'Incidence', :]
    inf = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == 'Infected', :]
    uninf = data_plot_tx.loc[data_plot_tx.loc[:,
                                              'Variable'] == 'Uninfected', :]
    for i in run_col:
        (ggplot(aes(x='t', y=i, color='Value'), data_plot_tx) + geom_line() +
         facet_wrap('Variable', scales='free')).save(
             os.path.join(
                 save_path,
                 str(i + r'_transmissions for all variable all values')))
        (ggplot(aes(x='t', y=i), inci) + geom_line() +
         facet_wrap('Variable', 'Value', scales='free')).save(
             os.path.join(
                 save_path,
                 str(i + r'_plots for individual values of incidence')))
        (ggplot(aes(x='t', y=i), inf) + geom_line() +
         facet_wrap('Variable', 'Value', scales='free')).save(
             os.path.join(
                 save_path,
                 str(i +
                     r'_plots for individual values of infected population')))
        (ggplot(aes(x='t', y=i), uninf) + geom_line() +
         facet_wrap('Variable', 'Value', scales='free')).save(
             os.path.join(
                 save_path,
                 str(i +
                     '_plots for individual values of uninfected population')))

    #%% compare runs ABC
    data_plot_abc = {}
    for var in col_adj:
        float_df = pd.DataFrame(index=range(0, t * total_var * total_val),
                                columns=['t', 'Value', 'Transmissions', 'Run'])
        insert_idx = -1
        for val in data_plot.loc[data_plot.loc[:, 'Variable'] == var, 'x']:
            var_df = data_plot_tx.loc[data_plot_tx.loc[:,
                                                       'Variable'] == var, :]
            var_df = var_df.reset_index(drop=True)
            var_val_df = var_df.loc[var_df.loc[:, 'Value'] == val, :]
            var_val_df = var_val_df.reset_index(drop=True)
            for c in ['RunA tx', 'RunB tx', 'RunC tx']:
                insert_idx += 1
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             'Run'] = c
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             'Transmissions'] = var_val_df.loc[:, c].values
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             'Run'] = c
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             'Value'] = val
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             't'] = np.arange(t)
        data_plot_abc[var] = float_df.dropna()
        (ggplot(aes(x='t', y='Transmissions', color='Run'), float_df) +
         geom_line() + facet_wrap('Value', scales='free') + ggtitle(var)).save(
             os.path.join(
                 save_path,
                 str(var + '_comparison of transmissions in runs ABC')))

    #%% compare runs BC
    for var in data_plot_abc:
        float_df = data_plot_abc[var].loc[
            data_plot_abc[var].loc[:, 'Run'] != 'RunA tx', :]
        (ggplot(aes(x='t', y='Transmissions', color='Run'), float_df) +
         geom_line(alpha=0.2) + facet_wrap('Value', scales='free') +
         stat_smooth(method='loess', se=False) + ggtitle(var)).save(
             os.path.join(save_path,
                          str(var +
                              '_comparison of transmissions in runs BC')))

    return
コード例 #37
0
from ggplot import aes, geom_line, ggplot, meat
import matplotlib.pyplot as plt

from bokeh import mpl
from bokeh.plotting import output_file, show

g = ggplot(aes(x='date', y='beef'), data=meat) + geom_line()
g.make()

plt.title("Line ggplot-based plot in Bokeh.")

output_file("ggplot_line.html", title="ggplot_line.py example")

show(mpl.to_bokeh())
コード例 #38
0
print(kValues)
print("\nModelių objektai")
print(models)
print("\nS_k reikšmės")
print(WSSSEs)
print("\nRezultatų eilutės su K ir S_k reikšmėmis")
print(rowsWSSSSE)
print("\npyspark.sql.DataFrame turinys su rezultatais")
WSSSEDF.show()


# Galime nubraižyti $S_K$ priklausomybės nuo $K$ grafiką.

# In[26]:

gg.ggplot(gg.aes(x="K", y="WSSSE"), data=WSSSEDF.toPandas()) + gg.geom_line()


# Aiškiai matome, kad $K = 2$ nėra optimali reikšmė.

# Nubraižome grafiką su reikšmėmis $K > 2$

# In[27]:

gg.ggplot(gg.aes(x="K", y="WSSSE"), data=WSSSEDF.where(WSSSEDF["K"] > 2).toPandas()) + gg.geom_line()


# Su $K = 4$ matomas ženklus pagerėjimas. $K=5$ taip pat suteikia pastebimą pagerėjimą. Sprendžiant tikrą uždavinį tokiu atveju interpretuotume klasterių centrus su $K = 4$ ir $K = 5$, tuomet parinktume prasmingesnę $K$ reikšmę.

# $K = 4$ centrai
コード例 #39
0
    from 
    (
    select pod_id_location, segid, med, p25,p75, p05, p95 from cte2
    where n_passes >=100
    union all
    select distinct 0 as pod_id_location, segid, med_mob as med, p25_mob as p25, p75_mob as p75, p05_mob as p05, p95_mob as p95
    from cte2
    where n_passes >= 100
    )
""")
qry_job = bqclient.query(qry_str,
                         location='EU',
                         job_config=bigquery.QueryJobConfig())
#save result as dataframe
df = qry_job.to_dataframe()
df_long = df.melt(id_vars=['pod_id_location', 'segid', 'pod_idx'],
                  value_vars=['p05', 'p25', 'med', 'p75', 'p95'],
                  var_name='yparam',
                  value_name='value')
df_long.to_csv(r'.\charts\subsetdistribs.csv')
#plots
#plt1.save(filename = r'.\charts\ulezpodts.png', width=None, height=None, dpi=200)
plt2 = gg.ggplot(df_long, gg.aes(
    x='pod_idx', y='value',
    color='yparam')) + gg.geom_point() + gg.geom_line() + gg.xlab(
        'pod/segment') + gg.ylab('NO2 (as % of median)') + gg.theme_bw()
plt2.save(filename=r'.\charts\ulezsubsetvar.png',
          width=None,
          height=None,
          dpi=200)
コード例 #40
0
    for t_float in time:
        tp_FS, tp_PK = get_weibull(t=t_float,
                                   coverage=input_par['uptake'],
                                   duration=input_par['duration'],
                                   shape=s)
        plot_prob.loc[row_idx, 'Monthly transition probability'] = tp_FS
        plot_prob.loc[row_idx + 1, 'Monthly transition probability'] = tp_PK
        plot_prob.loc[row_idx, 'time'] = t_float
        plot_prob.loc[row_idx + 1, 'time'] = t_float
        plot_prob.loc[row_idx, 'Formula'] = 'FS'
        plot_prob.loc[row_idx + 1, 'Formula'] = 'PK'

        row_idx += 2

    # collect
    collect_prob['FS ' + str(s)] = plot_prob.loc[
        plot_prob.loc[:, 'Formula'] == 'FS',
        'Monthly transition probability'].values
    collect_prob['PK ' + str(s)] = plot_prob.loc[
        plot_prob.loc[:, 'Formula'] == 'PK',
        'Monthly transition probability'].values

    # plot
    x = ggplot(aes(
        x='time', y='Monthly transition probability', color='Formula'),
               data=plot_prob) + geom_line()
    #name = r'Shape: ' + str(s)# + r', Coverage/Uptake = ' + str(input_par['uptake']*100) + r', Coverage time = ' + str(input_par['duration']) + '.jpg'
    x.save('Weibull' + str(plot_num))

    plot_num += 1
コード例 #41
0
ファイル: core.py プロジェクト: TimZehta/rpgdice
def main():
    global args, ruleset
    # Arguments Parser
    argparser, subparser = parser_setup()
    register_rules(subparser)
    args = argparser.parse_args()
    rulemod = sys.modules["rpgdice.rulesets.%s" % args.ruleset]
    rulemod.prepare(args, srand)

    if args.debug:
        print "DEBUG: args", args
        print

    results = list()
    pool = multiprocessing.Pool()
    try:
        for result in pool.map(rulemod.simulate_rolls, rulemod.variables):
            results.extend(result)
        pool.close()
        pool.join()
    except KeyboardInterrupt:
        sys.exit(130)
    if args.debug:
        print "DEBUG: results:"
        pprint(results)
        print

    conf = dict()
    conf = {"vlab": "Variables", "xlab": "Outcome", "ylab": "Probability %"}
    for item in conf:
        try:
            conf[item] = getattr(rulemod, item)
        except:
            pass

    columns = ("Graph", conf["vlab"], conf["xlab"], "Count", conf["ylab"])
    data = pandas.DataFrame.from_records(results, columns=columns)

    # Create and save graphs
    for gkey in rulemod.graphs:
        # Graph Defaults
        graph_conf = conf.copy()
        graph_conf["file_prefix"] = "%s%02d" % (args.ruleset, gkey)
        graph_conf["file_suffix"] = str()
        # colors
        colors_lower = ["#ff0000", "#cc0000", "#993300", "#666600"]
        colors_upper = ["#006666", "#003399", "#0000cc", "#0000ff"]
        colors_mid = ["#000000"]
        color_count = len(rulemod.variables) - 1
        if color_count % 2 == 0:
            lower_slice = (color_count / 2) * -1
            upper_slice = color_count / 2
        else:
            lower_slice = ((color_count - 1) / 2) * -1
            upper_slice = (color_count + 1) / 2
        graph_conf["color_list"] = colors_lower[lower_slice:] + colors_mid + colors_upper[0:upper_slice]

        # graph_conf from graph
        graph_items = (
            "color_list",
            "file_prefix",
            "file_suffix",
            "graph_type",
            "limits",
            "x_breaks",
            "x_labels",
            "title",
            "vlab",
            "xlab",
            "ylab",
        )
        for item in graph_items:
            try:
                graph_conf[item] = rulemod.graphs[gkey][item]
            except:
                try:
                    graph_conf[item] = getattr(rulemod, item)
                except:
                    if item not in graph_conf:
                        graph_conf[item] = None
        if args.debug:
            print "DEBUG: graph_conf:"
            pprint(graph_conf)
            print

        # plot_data
        plot_data = data.copy()
        plot_data = plot_data[plot_data["Graph"] == gkey]
        plot_data.rename(
            columns={
                conf["vlab"]: graph_conf["vlab"],
                conf["xlab"]: graph_conf["xlab"],
                conf["ylab"]: graph_conf["ylab"],
            },
            inplace=True,
        )
        plot_data.index = range(1, len(plot_data) + 1)
        if args.debug:
            print "DEBUG: plot_data:"
            pprint(plot_data)
            print

        # Create plot
        if args.graph:
            plot = (
                ggplot.ggplot(
                    ggplot.aes(x=graph_conf["xlab"], y=graph_conf["ylab"], color=graph_conf["vlab"]), data=plot_data
                )
                + ggplot.ggtitle(graph_conf["title"])
                + ggplot.theme_gray()
                + ggplot.scale_colour_manual(values=graph_conf["color_list"])
            )
            plot.rcParams["font.family"] = "monospace"
            if graph_conf["x_breaks"] and graph_conf["x_labels"]:
                plot += ggplot.scale_x_discrete(breaks=graph_conf["x_breaks"], labels=graph_conf["x_labels"])
            if graph_conf["limits"]:
                plot += ggplot.ylim(graph_conf["limits"][0], graph_conf["limits"][1])
            if graph_conf["graph_type"] == "bars":
                plot += ggplot.geom_line(size=20)
                text_data = plot_data[plot_data["Count"] > 0]
                text_data.index = range(0, len(text_data))
                outcomes = dict(text_data[graph_conf["xlab"]])
                percents = dict(text_data[graph_conf["ylab"]])
                for k in outcomes:
                    percent = "%4.1f%%" % percents[k]
                    x = outcomes[k]
                    y = percents[k] + 4
                    color = graph_conf["color_list"][k]
                    plot += ggplot.geom_text(label=[percent], x=[x, x + 1], y=[y, y - 1], color=color)
            else:
                plot += ggplot.geom_line()
                plot += ggplot.geom_point(alpha=0.3, size=50)
            if hasattr(rulemod, "update_plot"):
                plot = rulemod.update_plot(gkey, graph_conf, plot, plot_data)
            if args.dumpsave:
                filename = "/dev/null"
            else:
                filename = "%s%s.png" % (graph_conf["file_prefix"], graph_conf["file_suffix"])
            ggplot.ggsave(filename, plot, format="png", dpi=300)

    return 0
コード例 #42
0
def plot_after_transmission_results(data, path_names):

    # import input data for tranmission analysis
    var_and_val = pd.DataFrame(columns=['x', 'Variable'], index=range(0, 12))
    plot_lm = pd.DataFrame(
        columns=['x', 'Life Months', 'Scenario', 'Variable'],
        index=range(0, 24))
    data_in = pd.read_excel(
        os.path.join(path_names['transmission'], 'Input files',
                     'transmission_rate_multiplier_required_inputs.xlsx'))
    col = [
        'Yearly incidence in MSM',
        'Number of HIV uninfected individuals (HRG size)',
        'Number of HIV infected individuals in primary cohort at t=0'
    ]
    col_adj = ['Incidence', 'Uninfected', 'Infected']
    base_val = [0.009, 2960000, 136400]

    for i in range(len(col)):
        idx = data_in.loc[data_in.loc[:, col[i]] != base_val[i],
                          col[i]].index.values[0]
        var_and_val.loc[idx - 1:idx + 3 - 1, 'x'] = data_in.loc[idx:idx + 3,
                                                                col[i]].values
        var_and_val.loc[idx - 1:idx + 3 - 1, 'Variable'] = col_adj[i]

    row_idx = -2
    var_idx = [-1, -1, -1]
    for var in data:

        if 'HIV+' in var:
            var_idx[2] += 1
            plot_lm.loc[row_idx:row_idx + 1, 'x'] = var_and_val.loc[
                var_and_val['Variable'] == col_adj[2], 'x'].values[var_idx[2]]
            plot_lm.loc[row_idx:row_idx + 1, 'Variable'] = var_and_val.loc[
                var_and_val['Variable'] == col_adj[2],
                'Variable'].values[var_idx[2]]
            plot_lm.loc[
                row_idx:row_idx + 1,
                'Life Months'] = data[var]['popstats'].loc[:, 'LMs_'].values
            plot_lm.loc[
                row_idx:row_idx + 1,
                'Scenario'] = data[var]['popstats'].loc[:, 'RUN_NAME_'].values
        elif 'HIV-' in var:
            var_idx[1] += 1
            plot_lm.loc[row_idx:row_idx + 1, 'x'] = var_and_val.loc[
                var_and_val['Variable'] == col_adj[1], 'x'].values[var_idx[1]]
            plot_lm.loc[row_idx:row_idx + 1, 'Variable'] = var_and_val.loc[
                var_and_val['Variable'] == col_adj[1],
                'Variable'].values[var_idx[1]]
            plot_lm.loc[
                row_idx:row_idx + 1,
                'Life Months'] = data[var]['popstats'].loc[:, 'LMs_'].values
            plot_lm.loc[
                row_idx:row_idx + 1,
                'Scenario'] = data[var]['popstats'].loc[:, 'RUN_NAME_'].values
        elif 'Incidence' in var:
            var_idx[0] += 1
            plot_lm.loc[row_idx:row_idx + 1, 'x'] = var_and_val.loc[
                var_and_val['Variable'] == col_adj[0], 'x'].values[var_idx[0]]
            plot_lm.loc[row_idx:row_idx + 1, 'Variable'] = var_and_val.loc[
                var_and_val['Variable'] == col_adj[0],
                'Variable'].values[var_idx[0]]
            plot_lm.loc[
                row_idx:row_idx + 1,
                'Life Months'] = data[var]['popstats'].loc[:, 'LMs_'].values
            plot_lm.loc[
                row_idx:row_idx + 1,
                'Scenario'] = data[var]['popstats'].loc[:, 'RUN_NAME_'].values

        row_idx += 2

    # plot
    save_path = os.path.join(path_names['transmission'], r'Input files',
                             r'Plots for final runs')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    (ggplot(aes(x='x', y='Life Months', color='Scenario'), plot_lm) +
     geom_line() + facet_wrap('Variable', scales='free')).save(
         os.path.join(save_path, 'Comparison of '))

    return
コード例 #43
0
ファイル: ALSmethod.py プロジェクト: jingyu0803/RegLiner
def graph(y):
    data = pd.DataFrame({'iteration': list(range(len(y))), 'RMSE': y})
    p = gg.ggplot(gg.aes(x='iteration', y='RMSE'),
                  data=data) + gg.geom_point() + gg.geom_line()
    return p
コード例 #44
0
ファイル: demdp03.py プロジェクト: lnsongxf/CompEcon-python
print(f.format('Profit Contribution', data['profit'].mean()))
print(f.format('Activity', (data['i'] == 'active').mean()))
print('\nErgodic Standard Deviations\n')
print(f.format('Profit Contribution', data['profit'].std()))
print(f.format('Activity', (data['i'] == 'active').std()))

# Plot Simulated and Expected Continuous State Path
data2 = data[['time', 'profit']].groupby('time').mean()
data2['time'] = data2.index

print(data2)
print(data2.columns)

ppp = ggplot(aes('time','profit'),
             data=data2) + \
      geom_line()

print(ppp)

ppp = ggplot(aes('time','profit','_rep'),
             data=data[data._rep <3]) + \
      geom_point() + \
      geom_line(aes('time','profit'), data=data2)

print(ppp)

print(
    demo.qplot('time', 'profit', '_rep', data=data[data._rep < 3], geom='line')
    + geom_line(aes('time', 'profit'), data=data2))
'''
subdata = data[data['_rep'] < 3][['time', 'profit', '_rep']]
コード例 #45
0
#total-based
dftmp = df[['n_sub']+brks[:5]].melt(id_vars=['n_sub'],value_vars=brks[:5], var_name = 'stat',value_name = 'value')
dftmp['method']=['(Total-Expected Total)/Expected Total']*dftmp['n_sub'].size
df_stacked = dftmp
#enhancement-based
dftmp = df[['n_sub']+brks[5:10]].melt(id_vars=['n_sub'],value_vars=brks[5:10], var_name = 'stat',value_name = 'value')
dftmp['method']=['(Enhanc-Expected Enhanc)/Expected Enhanc']*dftmp['n_sub'].size
df_stacked = df_stacked.append(dftmp)
#enhancements + full sample background
dftmp = df[['n_sub']+brks[10:]].melt(id_vars=['n_sub'],value_vars=brks[10:], var_name = 'stat',value_name = 'value')
dftmp['method']=['(Enhanc+Expected Backgr-Expected Total)/Expected Total']*dftmp['n_sub'].size
df_stacked = df_stacked.append(dftmp)
df_stacked['percentile']=['{0}th%'.format(a[1:3]) for a in df_stacked['stat']]
#plots
#compare all 3
plt1 = gg.ggplot(df_stacked, gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.facet_wrap('method')+gg.ggtitle('Bias comparison {0}'.format(title))
plt1.save(filename = r'..\charts\drivebias_laqn_{0}.png'.format(species), width=None, height=None, dpi=300)

#plot total alone for presenation
plt2 = gg.ggplot(df_stacked[df_stacked['method']=='(Total-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title))
t = gg.theme_bw()
t._rcParams['font.size']=16
plt2 = plt2+t
plt2.save(filename = r'..\charts\drivebias_laqn_{0}_total.png'.format(species), width=None, height=None, dpi=300)

#plot enhancement alone for presenation
plt3 = gg.ggplot(df_stacked[df_stacked['method']=='(Enhanc+Expected Backgr-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title))
t = gg.theme_bw()
t._rcParams['font.size']=16
plt3 = plt3+t
plt3.save(filename = r'..\charts\drivebias_laqn_{0}_enhanc.png'.format(species), width=None, height=None, dpi=300)
print("#######################################")
print("打印所挖掘的文本文件 text-movie.xls 前几行")
print(df.head())

#text = df.comments.iloc[0]   单个影评情感分析实验, iloc中的index值表示第几个应用,编号从0开始
#s = SnowNLP(text)
#
#print(s.sentiments)


def get_sentiment_cn(text):
    s = SnowNLP(text)
    return s.sentiments


df["sentiment"] = df.comments.apply(get_sentiment_cn)
print("#######################################")
print("打印所挖掘的文本文件 text-movie.xls 部分影评及其情感分析值")
print(df)

print("#######################################")
print("重要信息")
print("所有影评的平均值为:", df.sentiment.mean())
print("所有影评的中位数为:", df.sentiment.median())

ggplot.ggplot(ggplot.aes(x="date", y="sentiment"),
              data=df) + ggplot.geom_point() + ggplot.geom_line(
                  color='blue') + ggplot.scale_x_date(
                      labels=ggplot.date_format("%Y-%m-%d"))

df.sort_values(['sentiment'])[:5]
コード例 #47
0
"""Plot target variable as time series."""

import get_data
from ggplot import aes, geom_line, facet_wrap, ggplot


if __name__ == "__main__":

    df = get_data.get_all_data()

    p = ggplot(df, aes('datetime', 'cap', group='date')) + \
        geom_line(alpha=0.2) + \
        facet_wrap('name')
    p.save('../output/time_series.pdf')
コード例 #48
0
ファイル: ggplotSample.py プロジェクト: xxwei/TraderCode
# -*- coding:utf-8 -*-
# 准备数据
import ggplot as gp # 不太喜欢import *
import pandas as pd
meat = gp.meat


p=gp.ggplot(gp.aes(x='date',y='beef'),data=meat)+gp.geom_point(color='red')+gp.ggtitle(u'散点图')
print (p)
p=gp.ggplot(gp.aes(x='date',y='beef'),data=meat)+gp.geom_line(color='blue')+gp.ggtitle(u'折线图')
print (p)
p=gp.ggplot(gp.aes(x='date',y='beef'),data=meat)+gp.geom_point(color='red')+gp.geom_line(color='blue')+gp.ggtitle(u'散点图+折线图')
print (p)

# 将想要表达的变量组成一列
meat_lng = pd.melt(meat[['date','beef','pork','broilers']],id_vars='date')
# meat_lng包含了date,value(变量的值组成的列),variable(变量的名称组成的列)
p = gp.ggplot(gp.aes(x='date',y='value',colour='variable'),data=meat_lng)+\
    gp.geom_point()+gp.geom_line()
print (p)




meat_lng = pd.melt(meat[['date','beef','pork','broilers']],id_vars='date')
p = gp.ggplot(gp.aes(x='date',y='value',colour='variable'),data=meat_lng)+gp.geom_point()+gp.facet_wrap('variable')
print (p)

p = gp.ggplot(gp.aes(x='beef'),data=meat)+gp.geom_histogram()
print (p)
コード例 #49
0
data = []
for method in methods:
    for model in models:
        for rtol in rtols:
            print('method: {} model: {} rtol: {}'.format(
                method.name, model.name, rtol),
                  end='')

            # Run
            tic = time.time()
            result = method(model, rtol)
            toc = time.time() - tic

            # Compare to gold standard
            standard = gold_standards[model.name]
            diff = result - standard.values
            max_rel_diff = np.max(diff / standard.max)

            # Append to table
            record = (method.name, model.name, rtol, max_rel_diff, toc)
            print(' err: {} toc: {}'.format(max_rel_diff, toc))
            data.append(record)

data = DataFrame(data, columns=['method', 'model', 'rtol', 'err', 'time'])

print(
    gg.ggplot(data, gg.aes(x='err', y='time', color='method')) +
    gg.geom_point(size=60.0) + gg.geom_line() + gg.scale_x_log() +
    gg.scale_y_log() + gg.xlim(1e-10, 1e-2))
コード例 #50
0
ファイル: plot.py プロジェクト: crackerboy/kotori
    def render_png(self, buffer):
        """
        Render timeseries plots as PNG images.
        """

        bucket = self.bucket

        import matplotlib.font_manager
        matplotlib.font_manager.findSystemFonts(fontpaths=None, fontext='ttf')

        import matplotlib
        try:
            matplotlib.use('agg')
        except:
            pass

        import matplotlib.pyplot as plt

        df = self.dataframe
        #df = df.set_index(['time'])

        # Compute datetime range boundaries
        datetime_min = min(df.time)
        datetime_max = max(df.time)
        datetime_delta = datetime_max - datetime_min
        #xmin = pd.to_datetime('2016-05-01')
        #xmax = pd.to_datetime('2016-08-01')

        renderer = bucket.tdata.get('renderer', 'matplotlib')
        if renderer == 'matplotlib':

            # Bring DataFrame into appropriate format
            df = dataframe_index_and_sort(df, 'time')

            # Propagate non-null values forward or backward, otherwise
            # matplotlib would not plot the sparse data frame properly.
            # With time series data, using pad/ffill is extremely common so that the “last known value” is available at every time point.
            # http://pandas.pydata.org/pandas-docs/stable/missing_data.html#filling-missing-values-fillna
            df.fillna(method='pad', inplace=True)

            # Make plots of DataFrame using matplotlib / pylab.
            # http://matplotlib.org/
            # http://pandas.pydata.org/pandas-docs/version/0.13.1/visualization.html
            # http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.plot.html
            # https://markthegraph.blogspot.de/2015/05/plotting-time-series-dataframes-in.html

            if 'style' in bucket.tdata and bucket.tdata.style:
                try:
                    plt.style.use(bucket.tdata.style)
                except Exception:
                    error_message = u'# Unknown style "{style_name}", available styles: {available}'.format(
                        style_name=bucket.tdata.style,
                        available=plt.style.available)
                    log.error(error_message)
                    return self.request.error_response(bucket, error_message)

            # Basic plotting
            #df.plot()
            #plt.savefig(buffer)

            # Advanced plotting
            ax = df.plot()
            fig = ax.get_figure()

            # Figure heading
            title = fig.suptitle(bucket.title.human, fontsize=12)
            #fig.tight_layout(pad=1.5)

            # Axis and tick labels
            ax.set_xlabel('Time')
            ax.set_ylabel('Value')
            ax.tick_params(axis='x', labelsize='smaller')

            # Grid and legend
            # http://matplotlib.org/users/legend_guide.html
            # http://matplotlib.org/examples/pylab_examples/legend_demo3.html
            ax.grid(True)

            legend_params = dict(ncol=1,
                                 loc='center left',
                                 bbox_to_anchor=(1, 0.5),
                                 fontsize='small',
                                 shadow=True,
                                 fancybox=True)
            legend = ax.legend(**legend_params)  # title='Origin'
            #ax.legend(**legend_params) # title='Origin'

            # Sort list of legend labels
            # http://stackoverflow.com/questions/22263807/how-is-order-of-items-in-matplotlib-legend-determined/27512450#27512450

            # Axis formatting
            #ax.xaxis_date()
            #ax.autoscale_view()

            # Compute appropriate locator and formatter
            locator, formatter = matplotlib_locator_formatter(datetime_delta,
                                                              span=1)

            #ax.xaxis.set_major_locator(locator)
            ax.xaxis.set_major_formatter(formatter)

            # Figure formatting
            fig.autofmt_xdate()

            # http://stackoverflow.com/questions/10101700/moving-matplotlib-legend-outside-of-the-axis-makes-it-cutoff-by-the-figure-box/10154763#10154763
            fig.savefig(buffer,
                        bbox_extra_artists=(title, legend),
                        bbox_inches='tight')

            # TODO: Add annotations
            """
            # https://stackoverflow.com/questions/11067368/annotate-time-series-plot-in-matplotlib
            # https://stackoverflow.com/questions/17891493/annotating-points-from-a-pandas-dataframe-in-matplotlib-plot
            import matplotlib.dates as mdates
            fig = plot.draw()
            ax = fig.axes[0]
            ax.annotate('Test', (mdates.date2num(x[1]), y[1]), xytext=(15, 15),
                textcoords='offset points', arrowprops=dict(arrowstyle='-|>'))

            """

        elif renderer == 'ggplot':

            # https://yhat.github.io/ggplot/notebook.html?page=build/docs/examples/Multiple%20Line%20Plot.html
            # https://stackoverflow.com/questions/23541497/is-there-a-way-to-plot-a-pandas-series-in-ggplot
            # https://stackoverflow.com/questions/24478925/is-it-possible-to-plot-multiline-chart-on-python-ggplot/24479513#24479513

            # https://github.com/yhat/ggplot/blob/master/docs/how-to/Building%20Faceted%20(or%20Trellised)%20Plots.ipynb
            # https://github.com/yhat/ggplot/blob/master/docs/how-to/Annotating%20Plots%20-%20Titles%20and%20Labels.ipynb
            # https://github.com/yhat/ggplot/blob/master/docs/how-to/How%20to%20make%20xkcd%20style%20graphs.ipynb

            from ggplot import ggplot, aes, qplot, geom_line, geom_text, ggtitle, stat_smooth, scale_x_date, date_format, date_breaks
            from ggplot import theme_538, theme_bw, theme_gray, theme_xkcd

            # https://stackoverflow.com/questions/24478925/is-it-possible-to-plot-multiline-chart-on-python-ggplot/24479513#24479513
            # https://stackoverflow.com/questions/23541497/is-there-a-way-to-plot-a-pandas-series-in-ggplot

            # Convert DataFrame from wide to long format, retaining "time" as visible column
            df = dataframe_wide_to_long_indexed(df, 'time')
            dataframe_index_to_column(df, 'time')

            # Compute appropriate locator and formatter
            locator, formatter = matplotlib_locator_formatter(datetime_delta,
                                                              span=2)

            plot = ggplot(df, aes(x='time', y='value', color='variable'))\
                   + geom_line()\
                   + scale_x_date(limits=(datetime_min, datetime_max), breaks=locator, labels=formatter)\
                   + ggtitle(bucket.title.human)

            # Axis labels
            plot.xlab = 'Time'
            plot.ylab = 'Value'

            # Labs
            #+ stat_smooth(colour='blue', span=0.2) \
            #+ geom_text(aes(x='x', y='y'), label='hello world')
            #+ scale_x_date(limits=(xmin, xmax), breaks=date_breaks('1 hour'), labels=date_format('%Y-%m-%d\n%H:%M'))

            theme_name = bucket.tdata.get('theme')
            # TODO: Switching themes will leak some matplotlib/pyplot properties, postpone to future versions
            if theme_name:
                if isinstance(theme_name, float):
                    theme_name = str(int(theme_name))
                try:
                    theme = eval('theme_' + theme_name)
                    plot += theme()
                except Exception:
                    error_message = u'# Unknown theme "{theme_name}"'.format(
                        theme_name=theme_name)
                    log.error(error_message)
                    return self.request.error_response(bucket, error_message)

            plot.save(buffer)

            # Attempt to reset global matplotlib parameters to get rid of xkcd theme style
            """
            import matplotlib as mpl
            #mpl.rcParams = mpl.rc_params()
            #del mpl.rcParams['path.sketch']
            #del mpl.rcParams['path.effects']
            #mpl.rcParams = mpl.defaultParams.copy()
            #mpl.rcParams.clear()
            #mpl.rcdefaults()
            #mpl.rcParams = mpl.rcParamsOrig
            if 'axes.prop_cycle' in mpl.rcParams:
                del mpl.rcParams['axes.prop_cycle']
            mpl.rcParams.update({'path.sketch': None, 'path.effects': []})
            mpl.rcParams.update(mpl.rc_params())
            """

        elif renderer == 'seaborn':

            # TODO: We don't do statistical plotting yet.

            # https://stanford.edu/~mwaskom/software/seaborn/examples/timeseries_from_dataframe.html
            # https://stanford.edu/~mwaskom/software/seaborn/generated/seaborn.tsplot.html
            import seaborn as sns
            sns.set(style="darkgrid")
            #sns.tsplot(data=gammas, time="timepoint", unit="subject", condition="ROI", value="BOLD signal")
            #print dir(df)
            #df['time'] = pandas.to_datetime(df['time'])
            #df = df.set_index(df.time)
            pprint(df)
            sns.tsplot(data=df, time="time")
            #sns.tsplot(data=df)
            plt.savefig(buffer)

        else:
            error_message = u'# Unknown renderer "{renderer_name}"'.format(
                renderer_name=renderer)
            log.error(error_message)
            return self.request.error_response(bucket, error_message)
コード例 #51
0
ファイル: trends.py プロジェクト: hychoi05/gtrends-beta
def quarterly_queries(keywords, category, cookies, session, domain, throttle, filing_date, ggplot, month_offset=[-12, 12], trends_url=DEFAULT_TRENDS_URL):
	"""Gets interest data (quarterly) for the 12 months before and 12 months after specified date, then gets interest data for the whole period and merges this data.

		month_offset: [no. month back, no. months forward] to query
	Returns daily data over the period.
	"""

	aw_range = arrow.Arrow.range
	begin_period = aget(filing_date).replace(months=month_offset[0])
	ended_period = aget(filing_date).replace(months=month_offset[1])

	# Set up date ranges to iterate queries across
	start_range = aw_range('month', YYYY_MM(begin_period),
									YYYY_MM(ended_period))
	ended_range = aw_range('month', YYYY_MM(begin_period).replace(months=3),
									YYYY_MM(ended_period).replace(months=3))

	start_range = [r.datetime for r in start_range][::3]
	ended_range = [r.datetime for r in ended_range][::3]

	# Fix last date if incomplete quarter (offset -1 week from today)
	last_week = arrow.utcnow().replace(weeks=-1).datetime
	start_range = [d for d in start_range if d < last_week]
	ended_range = [d for d in ended_range if d < last_week]
	if len(ended_range) < len(start_range):
		ended_range += [last_week]

	# Iterate attention queries through each quarter
	all_data = []
	missing_queries = []    # use this to scale IoT later.
	for start, end in zip(start_range, ended_range):
		if start > last_week:
			break

		print("Querying period: {s} ~ {e}".format(s=start.date(),
												  e=end.date()))
		throttle_rate(throttle)

		response_args = {'url': trends_url.format(domain=domain),
						'params': _query_parameters(start, end, keywords, category),
						'cookies': cookies,
						'session': session}

		query_data = _check_data(keywords,
						_process_response(
							_get_response(**response_args)))

		if all(int(vals)==0 for date,vals in query_data):
			query_data = [[date, '0'] for date in arrow.Arrow.range('day', start, end)]
			missing_queries.append('missing')
		elif len(query_data[0][0]) > 10:
			missing_queries.append('weekly')
		else:
			missing_queries.append('daily')

		try:
			if not aligned_weekly(query_data, all_data):
				## Workaround: shift filing date
				q1 = weekly_date(all_data[-1][-1][0])
				q2 = weekly_date(query_data[0][0])

				if q1 < q2:
					start = arrow.get(start).replace(months=-1)
					response_args['params'] = _query_parameters(start, end, keywords, category)
					## Do a new 4month query, overlap/replace previous month.
					query_data = _check_data(keywords,
									_process_response(
										_get_response(**response_args)))
					if all_data[:-1] != []:
						q2 = weekly_date(query_data[0][0], 'start')
						all_data[-1] = [d for d in all_data[-1] if q2 > weekly_date(d[0])]

				elif q1 >= q2:
					# if q1 > 1st date in query_data, remove the first few entries
					query_data = [d for d in query_data if q1 < weekly_date(d[0])]

		except IndexError:
			pass
		except:
			from IPython import embed; embed()

		finally:
			all_data.append(query_data)



	# Get overall long-term trend data across entire queried period
	s = begin_period.replace(weeks=-2).datetime
	e1 = arrow.get(ended_range[-1]).replace(months=+1).datetime
	e2 = arrow.utcnow().replace(weeks=-1).datetime
	e = min(e1,e2)
	print("\n=> Merging with overall period: {s} ~ {e}".format(s=s.date(), e=e.date()))

	response_args = {
		'url': trends_url.format(domain=domain),
		'params': _query_parameters(s, e, keywords, category),
		'cookies': cookies,
		'session': session
		}

	query_data = _check_data(keywords,
					_process_response(
						_get_response(**response_args)))



	if len(query_data) > 1:
		# compute changes in IoI (interest over time) per quarter
		# and merged quarters together after interpolating data
		# with daily data.
		# We cannot mix quarters as Google normalizes each query
		all_ioi_delta = []
		qdat_interp = []
		for quarter_data in all_data:
			if quarter_data != []:
				quarter_data = [x for x in quarter_data if x[1] != '']
				all_ioi_delta += list(zip(*change_in_ioi(*zip(*quarter_data))))

				if ggplot:
					qdat_interp += interpolate_ioi(*zip(*quarter_data))[1]
					# for plotting only

		qdate = [date for date, delta_ioi in all_ioi_delta]
		delta_ioi = [delta_ioi for date, delta_ioi in all_ioi_delta]
		ydate = [date[-10:] if len(date) > 10 else date for date, ioi in query_data]
		try:
			yIoI  = [float(ioi) for date, ioi in query_data]
		except:
			# from IPython import embed; embed()
			yIoI = [float(ioi) for date, ioi in query_data[:-1]]
		ydate, yIoI = interpolate_ioi(ydate, yIoI)

		# match quarterly and yearly dates and get correct delta IoI
		# common_date = [x for x in ydate+qdate if x in ydate and x in qdate]
		common_date = sorted(set(ydate) & set(qdate))

		delta_ioi = [delta_ioi for date,delta_ioi in zip(qdate, delta_ioi)
					if date in common_date]
		y_ioi = [y for x,y in zip(ydate, yIoI) if x in common_date]

		# calculate daily %change in IoI and adjust weekly values
		adj_IoI = [ioi*mult for ioi,mult in zip(y_ioi, delta_ioi)]

		adj_all_data = [[str(date.date()), round(ioi, 2)] for date,ioi in zip(common_date, adj_IoI)]
	else:
		adj_all_data = [[str(date.date()), int(zero)] for date, zero in zip(*interpolate_ioi(*zip(*sum(all_data,[]))))]

	# from IPython import embed; embed()
	heading = ["Date", keywords[0].title]
	querycounts = list(zip((d.date() for d in start_range), missing_queries))
	keywords[0].querycounts = querycounts

	if not ggplot:
		return [heading] + adj_all_data

	## GGplot Only
	else:
		# GGPLOT MERGED GTRENDS PLOTS:
		import pandas as pd
		from ggplot import ggplot, geom_line, ggtitle, ggsave, scale_colour_manual, ylab, xlab, aes
		try:
			ydat = pd.DataFrame(list(zip(common_date, y_ioi)), columns=["Date", 'Weekly series'])
			mdat = pd.DataFrame(list(zip(common_date, adj_IoI)), columns=['Date', 'Merged series'])
			qdat = pd.DataFrame(list(zip(common_date, qdat_interp)), columns=['Date', 'Daily series'])
			ddat = ydat.merge(mdat, on='Date').merge(qdat,on='Date')
			ddat['Date'] = list(map(pd.to_datetime, ddat['Date']))

			ydat['Date'] = list(map(pd.to_datetime, ydat['Date']))
			mdat['Date'] = list(map(pd.to_datetime, mdat['Date']))
			qdat['Date'] = list(map(pd.to_datetime, qdat['Date']))
		except UnboundLocalError as e:
			raise(UnboundLocalError("No Interest-over-time to plot"))

		# meltkeys = ['Date','Weekly series','Merged series','Daily series']
		# melt = pd.melt(ddat[meltkeys], id_vars='Date')

		colors = [
				'#77bde0', # blue
				'#b47bc6',   # purple
				'#d55f5f'    # red
				]

		entity_type = keywords[0].desc

		g = ggplot(aes(x='Date', y='Daily series' ), data=ddat) + \
			geom_line(aes(x='Date', y='Daily series'), data=qdat, alpha=0.5, color=colors[0]) + \
			geom_line(aes(x='Date', y='Merged series'), data=mdat, alpha=0.9, color=colors[1]) + \
			geom_line(aes(x='Date', y='Weekly series'), data=ydat, alpha=0.5, color=colors[2], size=1.5) + \
			ggtitle("Interest over time for '{}' ({})".format(keywords[0].keyword, entity_type)) + \
			ylab("Interest Over Time") + xlab("Date")

		# from IPython import embed; embed()

		print(g)
		# ggsave(BASEDIR + "/iot_{}.png".format(keywords[0].keyword), width=15, height=5)
		return [heading] + adj_all_data
コード例 #52
0
    if True:
        plot_coverage.loc[
            t_float - 1 + t_max - 1,
            'Transition Probability'], plot_coverage.loc[
                t_float - 1 + t_max - 1,
                'Cumulative Probability'] = 0.15, 0.15  #get_weibull(t = t_float, coverage = input_par['uptake'], duration = 1)
        plot_coverage.loc[t_float - 1 + t_max - 1, 'Model'] = 'Static'
        plot_coverage.loc[t_float - 1 + t_max - 1,
                          'Simulation month'] = t_float

#plot
save_dir = os.path.dirname(os.path.abspath(__file__))
gg_trans_p = ggplot(
    aes(x='Simulation month', y='Transition Probability', color='Model'),
    data=plot_coverage
) + geom_line() + ggtitle(
    'Weibull transition probabilities for PrEP uptake \n(Shape = 2, Coverage/Uptake = 15%, Target horizon for coverage/uptake = 30 months)'
)  #\
#geom_vline(aes(xintercept = input_par['duration']), linetype = 'dashed', color = 'gray') + scale_x_continuous(breaks = sort([min(plot_coverage['Simulation month']), max(plot_coverage['Simulation month'])], length.out=5), input_par['duration']) +\
#geom_hline(aes(yintercept = input_par['uptake']), linetype = 'dashed', color = 'gray') + scale_y_continuous(breaks = sort(c(seq(min(plot_coverage['Transition Probability']), max(plot_coverage['Transition Probability']), length.out=5), input_par['uptake']))) +\

gg_cumul_p = ggplot(aes(x='index', y='Cumulative Probability', color='Model'),
                    data=plot_coverage) + geom_line()
gg_trans_p.save(filename='Weibull transition probabilities for PrEP uptake')
gg_cumul_p.save(filename='Weibull cumulative probabilities for PrEP uptake')
#%% get variation of tx rate
x_target_cov = np.array([0.1])
y_target_time = np.array([30])

res_dict, pop = get_threshold_crossing(x_target_cov, y_target_time,
                                       input_par['sus_to_inf'])
def ggplot_img(xt):
    xt = pd.DataFrame({'n': range(len(xt)), 'xt': xt})
    p = gp.ggplot(gp.aes(x='n', y='xt'), data=xt) + gp.geom_line(color='black')
    print(p)