def plot(self, what='cumulative_payouts', include_ci=True): import ggplot as gg #This is hacky ... need to DRY out the imports if what == 'cumulative_payouts': plt = self._plot_cumulative_payouts(include_ci=include_ci) elif what == 'avg_accuracy': plt = self._plot_avg_accuracy(include_ci=include_ci) elif what == 'all': summary = self.summary() p1 = self._plot_cumulative_payouts(include_ci=include_ci, summary=summary) p2 = self._plot_avg_accuracy(include_ci=include_ci, summary=summary) d1 = p1.data d2 = p2.data d1['Outcome'] = d1['AverageCumulativePayout'] d2['Outcome'] = d2['AverageAccuracy'] d1['Plot'] = 'Cumulative Payouts' d2['Plot'] = 'Average Accuracy' df = d1.append(d2, ignore_index=True) if include_ci: plt = gg.ggplot(gg.aes(x='Round', y='Outcome', ymin='ymin', ymax='ymax'), data=df) + \ gg.geom_area(alpha=0.5) else: plt = gg.ggplot(gg.aes(x='Round', y='Outcome'), data=df) plt += gg.facet_grid('Plot', scales='free') else: raise ValueError('%s is not a valid option' % what) return plt + gg.geom_line()
def plot_line(X,y,title=None,labelx=None,labely=None,save=False, colors=None): ''' Show on screen a line plot. Can save to a .pdf file too if specified. X,y - ''' df = pandas.DataFrame() if (title!=None): img_title = title.replace(" ","").replace(".","-") + ".pdf" df['X'] = X for i in range(y.shape[1]): df[str(i)] = y.iloc[:,i].values if colors is None: colors = list(dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys()) df = df.iloc[0:df.shape[0]-1, :] p = ggplot(df, aes(x='X')) for i in range(y.shape[1]): if colors not in X.columns.values: p = p + geom_line(aes(y=str(i),color = colors[i])) else: p = p + geom_point(aes(y=str(i),color = colors)) p = p + xlab(labelx) + ylab(labely) + ggtitle(title) if(save): p.save(img_title) else: return p
def main(): params = load_params("./params.txt") test_data = load_data(datetime.date(2017, 12, 20)) sim_data = simulate(test_data[0], params) dif_data = [0 for i in range(0, len(test_data[0]))] for i in range(0, len(dif_data) - 3): dif_data[i] = test_data[0][i + 3] - sim_data[i] df = pandas.DataFrame({ 't': range(6, len(test_data[0])), 'price': test_data[0][6:] }) df2 = pandas.DataFrame({ 't': range(12, len(sim_data) - 12), 'price': sim_data[12:-12] }) df3 = pandas.DataFrame({ 't': range(12, len(sim_data) - 12), 'price': dif_data[12:-12] }) a = ggplot.ggplot(ggplot.aes(x='t', y='price'), data=df) \ + ggplot.geom_line() b = ggplot.ggplot(ggplot.aes(x='t', y='price'), data=df2) \ + ggplot.geom_line(color='blue') c = ggplot.ggplot(ggplot.aes(x='t', y='price'), data=df3) \ + ggplot.geom_line(color='blue') a.save('hoge.png') b.save('hoge2.png') c.save('hoge3.png')
def plot_roc(self, experiment_type, to_plot): # turn this to string for categorical colour scheme to_plot.loc[:, "parameter"] = [str(par) for par in to_plot.loc[:, "parameter"]] p = gg.ggplot(data = to_plot, aesthetics = gg.aes(x = "FPR", y = "TPR", colour = "parameter")) + \ gg.geom_line(gg.aes(x = "FPR", y = "TPR", colour = "parameter")) + \ gg.ggtitle(experiment_type) + gg.xlab("FPR") + gg.ylab("TPR") gg.ggsave(filename = self.results_path + experiment_type + "_" + self.mode + ".png", plot = p) return
def plot_outcomes(self, chart_title=None, use_ggplot=False): """ Plot the outcomes of patients observed. :param chart_title: optional chart title. Default is fairly verbose :type chart_title: str :param use_ggplot: True to use ggplot, else matplotlib :type use_ggplot: bool :return: a plot of patient outcomes """ if not chart_title: chart_title="Each point represents a patient\nA circle indicates no toxicity, a cross toxicity" chart_title = chart_title + "\n" if use_ggplot: if self.size() > 0: from ggplot import (ggplot, ggtitle, geom_text, aes, ylim) import numpy as np import pandas as pd patient_number = range(1, self.size()+1) symbol = np.where(self.toxicities(), 'X', 'O') data = pd.DataFrame({'Patient number': patient_number, 'Dose level': self.doses(), 'DLT': self.toxicities(), 'Symbol': symbol}) p = ggplot(data, aes(x='Patient number', y='Dose level', label='Symbol')) \ + ggtitle(chart_title) + geom_text(aes(size=20, vjust=-0.07)) + ylim(1, 5) return p else: if self.size() > 0: import matplotlib.pyplot as plt import numpy as np patient_number = np.arange(1, self.size()+1) doses_given = np.array(self.doses()) tox_loc = np.array(self.toxicities()).astype('bool') if sum(tox_loc): plt.scatter(patient_number[tox_loc], doses_given[tox_loc], marker='x', s=300, facecolors='none', edgecolors='k') if sum(~tox_loc): plt.scatter(patient_number[~tox_loc], doses_given[~tox_loc], marker='o', s=300, facecolors='none', edgecolors='k') plt.title(chart_title) plt.ylabel('Dose level') plt.xlabel('Patient number') plt.yticks(self.dose_levels()) p = plt.gcf() phi = (np.sqrt(5)+1)/2. p.set_size_inches(12, 12/phi)
def bar_chart(self, conn, column1, column2, table_chosen, title): # since this is a bar graph only two columns will be there data_df = dfile.double_selector(conn = conn, table= table_chosen, col1 = column1, col2 = column2) bar_plot = ggplot(aes(x=column1, weight=column2), data=data_df) + geom_bar() + labs(title=title) print(bar_plot)
def plot_update_frequency(result): import pandas as pd import numpy #turns query results into timeseries of chnages d = [] v = [] for res in result: d.append(pd.Timestamp(res['_id']['timestamp']).to_datetime()) v.append(res['count']) ts = pd.DataFrame(v, index = d, columns = ['changes']) ts = ts.resample('W', how='sum') ts.index.names = ['date'] import ggplot #plots timeseries of changes p = ggplot.ggplot(ts, ggplot.aes(x = ts.index, y=ts['changes'])) +\ ggplot.geom_point(color = 'blue') +\ ggplot.xlab('Period') +\ ggplot.ylab('Changes') +\ ggplot.geom_smooth() +\ ggplot.ylim(low = 0) +\ ggplot.scale_x_date(breaks = ggplot.date_breaks("12 months"), labels = ggplot.date_format('%Y-%m')) +\ ggplot.ggtitle('OpenStreetMaps Denver-Boulder\nChanges per Week') return p
def two_var_intr_effects(self, target, vars, nval=100, plot=True): """ Loads first level interactions. Args: target - Variable identifier (column name or number) specifying the target variable vars - List of variable identifiers (column names or numbers) specifying other selected variables. Must not contain target nval - Number of evaluation points used for calculation. plot - Determines whether or not to plot results. Returns: Pandas dataframe of interaction effects """ # Check if null.models have already been generated check_str = """ function(){ if(exists("null.models")){ return(T) } else { return(F) } } """ if not robjects.r(check_str)()[0]: self.logger.info( 'Null models not generated, generating null models ' '(n=10)') self._generate_interaction_null_models(10, quiet=False) int_str = """ function(target, vars, nval){ interactions <- twovarint(tvar=target, vars=vars, null.models, nval=nval, plot=F) } """ # Check the input type. If int, add one, if string do nothing. target = target if type(target) is str else target + 1 vars = [var if type(var) is str else var + 1 for var in vars] r_interact = robjects.r(int_str)(target, robjects.Vector(np.array(vars)), nval) interact = pd.DataFrame( { 'interact_str': list(r_interact[0]), 'exp_null_int': list(r_interact[1]), 'std_null_int': list(r_interact[2]) }, index=vars) if plot: int_effects = interact.reset_index().rename( columns={'index': 'vars'}) int_effects_m = pd.melt( int_effects, id_vars='vars', value_vars=['interact_str', 'exp_null_int']) p = gg.ggplot(gg.aes(x='vars', fill='variable', weight='value'), data=int_effects_m) \ + gg.geom_bar() \ + gg.labs( title='Two-var interaction effects - {}'.format(target)) print(p) return interact
def test_ndim_2_facet_wrap_subplots(self): p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap( 'cut', 'clarity') fig, subplots = p.make_facets() nrow, ncol = subplots.shape self.assertEqual(nrow, 7) self.assertEqual(ncol, 6)
def test_ndim_2_facet_wrap(self): p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap( 'cut', 'clarity') nrow, ncol = p.facets.nrow, p.facets.ncol self.assertEqual(nrow, 7) self.assertEqual(ncol, 6) self.assertEqual(p.facets.ndim, 40)
def scatter(x, y, filename=""): df = pd.DataFrame({ 'x': pd.Series(x), 'y': pd.Series(y) }) p = gg.ggplot(gg.aes(x='x', y='y'), data=df) + gg.geom_point() if filename == "": print p else: gg.ggsave(filename="graphs/scatter/"+filename+".png", plot=p)
def displacement_plot(centered, limits=None, style=None): u"""Draws nice displacement plots using ggplot2. params: centered (pd.DataFrame): needs cX, cY, Object, Frame columns, probably produced by calling center() above limits (real): Sets the limits of the scales to a square window showing ±limits on each axis. style (Iterable): Collection of strings. Recognized values are 'theme-bw' (which uses theme_bw instead of theme_seaborn) and 'no-terminal-dot' (which does not label the end of tracks which terminate early). Returns: g (gg.ggplot): Plot object """ style = {} if style is None else style centered['Object'] = centered['Object'].map(str) centered = centered.sort(['Frame', 'Object']) g = (gg.ggplot(centered, gg.aes(x='cX', y='cY', color='Object')) + gg.geom_path(size=0.3)) g += gg.theme_bw() # if 'theme-bw' in style else gg.theme_seaborn() if limits: g = g + gg.ylim(-limits, limits) + gg.xlim(-limits, limits) if 'no-terminal-dot' not in style: max_frame = centered['Frame'].max() endframe = centered.groupby('Object')['Frame'].max() endframe = endframe[endframe != max_frame].reset_index() endframe = endframe.merge(centered, on=['Object', 'Frame']) # we should check if endframe is empty before adding it: # https://github.com/yhat/ggplot/issues/425 if not endframe.empty: g += gg.geom_point(data=endframe, color='black', size=1) return g
def t_sne_visualize(generated,n_sne,epoch): transform = transforms.Compose([transforms.Resize((28, 28)), transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]) # # mnist_ = datasets.MNIST('data/mnist', train=True, download=True, transform=transform) # X=mnist_.data.numpy()/255 # y=mnist_.targets.numpy() # X=np.reshape(np.ravel(X), (X.shape[0], 28*28)) n_label=7 X_sample=generated.data.numpy()/255 y_sample=list(range(n_label))*n_label X_sample=np.reshape(np.ravel(X_sample), (X_sample.shape[0], 28*28*3)) feat_cols = [ 'pixel'+str(i) for i in range(X_sample.shape[1]) ] df = pd.DataFrame(X_sample,columns=feat_cols) df['label'] = y_sample df['label'] = df['label'].apply(lambda i: str(i)) n_sne=49 rndperm = np.concatenate((list(range(df.shape[0],df.shape[0])),np.random.permutation(df.shape[0]))) tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300) print('INITIALIZED') tsne_results = tsne.fit_transform(df.loc[rndperm[:n_sne],feat_cols].values) print('AFTER FITTING') df_tsne = df.loc[rndperm[:n_sne],:].copy() df_tsne['x-tsne'] = tsne_results[:,0] df_tsne['y-tsne'] = tsne_results[:,1] chart=ggplot( df_tsne, aes(x='x-tsne', y='y-tsne', color='label')) \ + geom_point(size=70, alpha =0.7) \ + ggtitle("tSNE dimensions colored by digit") chart.save("tsne"+str(epoch)+".png") return
def graph3(score_data): """ Box plot for scores; Creates and returns graph 3, a box plot. """ date_column = score_data[0][find_time_stamp(score_data)] data = DataFrame(score_data[1:], columns=score_data[0]) # Get all columns that are numerical questions num_questions = data.select_dtypes(include=['int64']).columns.values # Melt data so that each question is in a seperate row new_data = pd.melt(data, id_vars=[date_column, "Name"], value_vars=num_questions, var_name="Question", value_name="Score") # Get rid of unecessary column new_data = new_data.drop('Name', axis=1) # Convert date string into an actual date type new_data[date_column] = pd.to_datetime(new_data[date_column], format="%m/%d/%Y") # Create box plot graph box_plot = ggplot.ggplot(ggplot.aes(x=date_column, y='Score'), new_data) +\ ggplot.geom_boxplot() +\ ggplot.ggtitle("Distribution of Question Scores over Time") return box_plot
def density_plot(by='dpsi_zscore', categorical=True): if categorical: data_dict = { 'muts increasing AAA': np.array([x[by] for x in variants['increase']]), 'muts decreasing AAA': np.array([x[by] for x in variants['decrease']]), 'muts not changing AAA length': np.array([x[by] for x in variants['constant']]) } else: data_dict = OrderedDict( (change, np.array( [x[by] for x in variants['all'] if x['change'] == change])) for change in aaa_changes if len([x[by] for x in variants['all'] if x['change'] == change]) > 1) plot = ( ggplot(aes(x='value', colour='variable', fill='variable'), data=prepare_data_frame(data_dict)) + ggtitle('Impact of variants affecting poly AAA sequences on %s' % by) + xlab(by) + ylab('Kernel density estimate') + geom_density(alpha=0.6)) return plot
def scatter(x, y, filename=""): df = pd.DataFrame({'x': pd.Series(x), 'y': pd.Series(y)}) p = gg.ggplot(gg.aes(x='x', y='y'), data=df) + gg.geom_point() if filename == "": print p else: gg.ggsave(filename="graphs/scatter/" + filename + ".png", plot=p)
def plotSetOfArrays(arrays, names, fileName): IDS = np.linspace(0, 1, arrays[0].shape[0]) A = IDS.reshape(arrays[0].shape[0], 1) for i in range(0, len(arrays)): A = np.concatenate((A, arrays[i]), axis=1) Data = pd.DataFrame(A, columns=['noise'] + names) Melted = pd.melt(Data, id_vars=['noise']) pv = ggplot.ggplot(ggplot.aes(x='noise', y='value', colour='variable'), data=Melted) + ggplot.geom_line() + ggplot.geom_point() ggplot.ggsave(pv, './IMG/' + fileName) output_file("iou_scores.html", title="correlation.py example") figure(tools="pan,wheel_zoom,box_zoom,reset,previewsave") hold() line(IDS, arrays[0][:, 0], color='#A6CEE3', legend=names[0]) line(IDS, arrays[1][:, 0], color='#1F78B4', legend=names[1]) line(IDS, arrays[2][:, 0], color='#B2DF8A', legend=names[2]) line(IDS, arrays[3][:, 0], color='#33A02C', legend=names[3]) line(IDS, arrays[4][:, 0], color='#fb9a99', legend=names[4]) curplot().title = "Minimum IOU" grid().grid_line_alpha = 0.3 show()
def _post_density_plot(self, func=None, x_name='', plot_title='', include_doses=None, boot_samps=1000): from ggplot import aes, ggplot, geom_density, ggtitle import pandas as pd if include_doses is None: include_doses = range(1, self.num_doses + 1) def my_func(x, samp): tox_probs = _pi_T(x, mu=samp[:, 0], beta=samp[:, 1]) eff_probs = _pi_E(x, mu=samp[:, 2], beta1=samp[:, 3], beta2=samp[:, 4]) u = self.metric(eff_probs, tox_probs) return u if func is None: func = my_func x_boot = [] dose_indices = [] samp = self.pds._samp p = self.pds._probs p /= p.sum() for i, x in enumerate(self.scaled_doses()): dose_index = i+1 if dose_index in include_doses: x = func(x, samp) x_boot.extend(np.random.choice(x, size=boot_samps, replace=True, p=p)) dose_indices.extend(np.repeat(dose_index, boot_samps)) df = pd.DataFrame({x_name: x_boot, 'Dose': dose_indices}) return ggplot(aes(x=x_name, fill='Dose'), data=df) + geom_density(alpha=0.6) + ggtitle(plot_title)
def production_envelope(self, dataframe, grid=None, width=None, height=None, title=None, points=None, points_colors=None, palette=None, x_axis_label=None, y_axis_label=None): palette = self.get_option('palette') if palette is None else palette width = self.get_option('width') if width is None else width colors = self._palette(palette, len(dataframe.strain.unique())) plot = aes(data=dataframe, ymin="lb", ymax="ub", x="value", color=scale_colour_manual(colors)) + geom_area() if title: plot += geom_tile(title) if x_axis_label: plot += scale_x_continuous(name=x_axis_label) if y_axis_label: plot += scale_y_continuous(name=y_axis_label) return plot
def t_sne_visualize(latent_vectors, labels, epoch): print(latent_vectors.shape) X_sample = latent_vectors.data.numpy() / 255 feat_cols = ['pixel' + str(i) for i in range(X_sample.shape[1])] nsne = 1000 df = pd.DataFrame(X_sample, columns=feat_cols) df['label'] = labels df['label'] = df['label'].apply(lambda i: str(i)) rndperm = np.concatenate( (list(range(df.shape[0], df.shape[0])), np.random.permutation(df.shape[0]))) tsne = TSNE(n_components=2, verbose=1, perplexity=30) print('INITIALIZED') tsne_results = tsne.fit_transform(df.loc[rndperm[:nsne], feat_cols].values) print('AFTER FITTING') df_tsne = df.loc[rndperm[:nsne], :].copy() df_tsne['x-tsne'] = tsne_results[:, 0] df_tsne['y-tsne'] = tsne_results[:, 1] chart=ggplot( df_tsne, aes(x='x-tsne', y='y-tsne', color='label')) \ + geom_point(size=70, alpha =0.7) \ + ggtitle("tSNE dimensions colored by digit") chart.save( str(args.dataset) + "tsne-vae/2d-vec-miss" + str(args.remove_label) + "/tsne" + str(epoch) + ".png") return
def plot_trend_season(dates, ndf_domain, x, x_trend, season, my_domain): # ---------------------- Prepare Data Frame ----------------------- # df_domain = pd.DataFrame(ndf_domain, columns=['Date', 'Volume']) df_domain['Date'] = dates x_lbl = ['Observed Volume' for i in xrange(len(x))] xt_lbl = ['Overall Trend' for i in xrange(len(x_trend))] xs_lbl = ['Repeat Sending Trend' for i in xrange(len(season))] col3 = pd.DataFrame(x_lbl+xt_lbl+xs_lbl) df_plot = pd.concat( (df_domain, col3), axis=1) df_plot.columns = ['Date', 'Volume', 'Data'] # ---------------------- Plot Decomposition ----------------------- # p = ggplot.ggplot(aes(x='Date', y='Volume', color='Data'), data=df_plot) + \ ggplot.geom_line(color='blue', size=2) + \ ggplot.scale_x_date(labels = date_format("%Y-%m-%d"), breaks="1 week") + \ ggplot.xlab("Week (Marked on Mondays)") + \ ggplot.ylab("Message Vol") + \ ggplot.ggtitle("%s Message Volume by Week" % my_domain) + \ ggplot.facet_grid('Data', scales='free_y') + \ ggplot.theme_seaborn() return p
def length_dist(self, pat_out="genes_lengths.png"): '''Gets a list of sequence lengths, creates a dataframe and plots it using ggplot. Then saves the file in specified path.''' len_ditribution = [len(i) for i in self.num] df = pd.DataFrame({"record_length": np.array(len_ditribution)}) pl = ggplot(df, aes(x="record_length")) + geom_density() pl.save(pat_out)
def plot_matches(df_in, date, filename_out, x_var='date_time', y_var="shorthand_search_vol"): """ Plot y-var and save based on specified variables. Assumes that df has already been filtered using dplyr's sift mechanism. Also assumes that a date has been passed in. """ # basic data processing for viz df_in['date_time'] = date + " " + df_in['time'].astype(str) df_in['date_time'] = pd.to_datetime(df_in['date_time'], errors="coerce", infer_datetime_format=True) # build layers for plot p = ggplot(aes(x=x_var, y=y_var, group="match_id", color="match_id"), data=df_in) p += geom_line(size=2) # informative p += labs(x="time (gmt)", y="search volume (scaled to 100)") # p += ggtitle("man. city (h) vs. chelsea (a)\naug. 8 '16, etihad stadium") p += scale_x_date(labels=date_format("%H:%M:%S"), date_breaks="30 minutes") # visual t = theme_gray() t._rcParams['font.size'] = 8 t._rcParams['font.family'] = 'monospace' p += t # done p.save(filename_out, width=16, height=8)
def main(file_path): # Validate raw data path if not os.path.exists(file_path): LOG_ERROR('Could not find file: {}'.format(file_path)) return # Validate raw data file type if not file_path.endswith('.pkl'): LOG_ERROR('File path must be a pickle file') return with open(file_path, 'rb') as f: LOG_INFO('Parsing pickle file: {}'.format(file_path)) conversation = pickle.load(f) LOG_INFO('Found conversation: {}'.format(conversation['conversation_name'])) df = pd.DataFrame(conversation['messages']) df.columns = ['Timestamp', 'Type', 'Participant'] # df['Datetime'] = pd.to_datetime(df['Timestamp']) df['Datetime'] = df['Timestamp'].apply(lambda x: datetime.datetime.fromtimestamp(float(x)).toordinal()) histogram = ggplot.ggplot(df, ggplot.aes(x='Datetime', fill='Participant')) \ + ggplot.geom_histogram(alpha=0.6, binwidth=2) \ + ggplot.scale_x_date(labels='%b %Y') \ + ggplot.ggtitle(conversation['conversation_name']) \ + ggplot.ylab('Number of messages') \ + ggplot.xlab('Date') print(histogram)
def extra(dataframe): mpl.rcParams["figure.figsize"] = "18, 4" plot = ggplot.ggplot( dataframe, ggplot.aes(x='Time', y='Speed') ) + ggplot.geom_path(color='lightblue', size=5) + ggplot.ggtitle( 'Ports & Speeds') + ggplot.scale_y_reverse() + ggplot.theme_xkcd() plot.show()
def eval(df_in, predicted, method): print(method) from ggplot import ggplot, aes, geom_point df = df_in df['Correct']= df[predicted] == df['donation_flag'] df['Class'] = 'True Positive' df['Class'][(df[predicted] == 1) & (df['Correct'] == False)] = 'False Positive' df['Class'][(df[predicted] == 0) & (df['Correct'] == True)] = 'True Negative' df['Class'][(df[predicted] == 0) & (df['Correct'] == False)] = 'False Negative' TP = df[(df['Class'] == 'True Positive')].shape[0] FP = df[(df['Class'] == 'False Positive')].shape[0] TN = df[(df['Class'] == 'True Negative')].shape[0] FN = df[(df['Class'] == 'False Negative')].shape[0] print ggplot(df, aes(x='donation_count', y='m_since_donation', color = 'Class')) + geom_point() confusion = pd.DataFrame({'Positive': [FP, TP], 'Negative': [TN, FN]}, index = ['TrueNeg', 'TruePos']) accuracy = float(TP+TN)/float(TP + TN + FP + FN) precision = float(TP)/float(TP + FP) recall = float(TP)/float(TP + FN) print(confusion) print('accuracy = ' + str(accuracy)) print('precision = ' + str(precision)) print('recall = ' + str(recall)) print('Done')
def _plot_cumulative_payouts(self, include_ci=True, summary=None): import ggplot as gg if summary is None: summary = self.summary() df = pd.DataFrame({'AverageCumulativePayout': summary['CumulativePayout']['Avg'], 'Std': summary['CumulativePayout']['Std'], 'Round': range(self.n_rounds)}) if include_ci: df['ymin'] = df.AverageCumulativePayout - 1.96 * df.Std df['ymax'] = df.AverageCumulativePayout + 1.96 * df.Std plt = gg.ggplot(gg.aes(x='Round', y='AverageCumulativePayout', ymin='ymin', ymax='ymax'), data=df) + \ gg.geom_area(alpha=0.5) else: plt = gg.ggplot(gg.aes(x='Round', y='AverageCumulativePayout'), data=df) return plt + gg.geom_line()
def plot_cost_history(alpha, cost_history): cost_df = pandas.DataFrame({ 'Cost_History': cost_history, 'Iteration': range(len(cost_history)) }) return gp.ggplot(cost_df, gp.aes('Iteration', 'Cost_History')) +\ gp.geom_point() + gp.geom_line() + gp.ggtitle('Cost History for alpha = %.3f' % alpha )
def render(data, bin_width, plot_density=False): if plot_density: plot = ggplot.ggplot(data, ggplot.aes(x='datetime', color='conversationWithName')) \ + ggplot.geom_density() \ + ggplot.scale_x_date(labels='%b %Y') \ + ggplot.ggtitle('Conversation Densities') \ + ggplot.ylab('Density') \ + ggplot.xlab('Date') else: plot = ggplot.ggplot(data, ggplot.aes(x='datetime', fill='conversationWithName')) \ + ggplot.geom_histogram(alpha=0.6, binwidth=bin_width) \ + ggplot.scale_x_date(labels='%b %Y', breaks='6 months') \ + ggplot.ggtitle('Message Breakdown') \ + ggplot.ylab('Number of Messages') \ + ggplot.xlab('Date') print(plot)
def plot(self, inputs): """Plot the given X and Y axes on a scatter plot""" if inputs.year not in self.dat.Year.values: return if inputs.xvar not in self.dat or inputs.yvar not in self.dat: return subdat = self.dat[self.dat.Year == inputs.year] p = ggplot(subdat, aes(x=inputs.xvar, y=inputs.yvar)) p = p + geom_point() if inputs.shownames: p = p + geom_text(aes(label=self.ID_col), vjust=1, hjust=1) if inputs.linear: p = p + stat_smooth(color="red", method="lm") return p
def plot_sfs(self, pat_out): df = pd.DataFrame({ "freq": [i for i in range(1, len(self.sfs))], "sfs": np.array(self.sfs[1:len(self.sfs)]) }) print df pl = ggplot(df, aes(x="freq", weight="sfs")) + geom_bar() pl.save(pat_out)
def plot_bin_dists(df, bin_def="distance_bin <= 500"): plt.rcParams['figure.figsize'] = np.array([16, 12]) * 0.65 p = gp.ggplot(gp.aes(x='R2'), data=df.query(bin_def)) p = p + gp.geom_histogram( fill='coral') + gp.facet_wrap("distance_bin") + gp.theme_seaborn( context='talk') + gp.ggtitle(bin_def) return p
def histogram(self, dataframe, bins=100, width=None, height=None, palette=None, title='Histogram', values=None, groups=None, legend=True): palette = self.__default_options__.get('palette', None) if palette is None else palette return ggplot(dataframe, aes(x=values, fill=groups, color=groups)) + \ geom_histogram(alpha=0.6, breaks=bins, position="fill") + \ self._palette(palette) + \ ggtitle(title) + \ scale_y_continuous(name="Count (%s)" % values)
def signature_data_plot(sd): import ggplot as gg aes = gg.aes(x='set_exp', y='not_exp', color='pearson_r') return gg.ggplot(aes, data=sd) \ + gg.geom_point(size=15) \ + gg.scale_color_gradient(low='yellow', high='red') \ + gg.scale_x_log() + gg.scale_x_continuous(limits=(0.5, 10000)) \ + gg.scale_y_log() + gg.scale_y_continuous(limits=(0.05, 10000))
def plot_deg_distrib(G): (in_deg, out_deg, deg) = wa.degree_distribution(G) in_deg_series = pd.Series(in_deg) out_deg_series = pd.Series(out_deg) in_out = { 'in_deg': in_deg_series, 'out_deg': out_deg_series } df = pd.DataFrame(in_out) df = pd.melt(df) p = gg.ggplot(gg.aes(x='value', color='variable', fill='variable'), data=df2) + gg.geom_histogram(alpha=0.6, binwidth=1) print p
def plotAverageLatency(self): averages = [d.averageLatency() for d in self.data] dat = {"device": range(1, len(averages) + 1), "average": averages} dataframe = pandas.DataFrame(dat) chart = ggplot.ggplot(ggplot.aes(x="device", weight="average"), dataframe) \ + ggplot.labs(title="Average Latency Per Device") + \ ggplot.ylab("Average Latency (ms)") + \ ggplot.xlab("Device Number") + \ ggplot.geom_bar(stat="identity") chart.show()
def plotAverageLatency(self): averages = [d.averageLatency() for d in self.data] dat = { "device" : range(1, len(averages) + 1), "average" : averages } dataframe = pandas.DataFrame(dat) chart = ggplot.ggplot(ggplot.aes(x="device", weight="average"), dataframe) \ + ggplot.labs(title="Average Latency Per Device") + \ ggplot.ylab("Average Latency (ms)") + \ ggplot.xlab("Device Number") + \ ggplot.geom_bar(stat="identity") chart.show()
def plot_weather_data(df): # older version df.DATEn = pd.to_datetime(df.DATEn) grouped = df.groupby('DATEn', as_index=False).sum() grouped.index.name = 'DATEn' p_title = 'Subway Ridership by Hour vs Raining' p_xlab = 'Hour of the Day' p_ylab = 'Subway Entries' plot = gp.ggplot(grouped, gp.aes(x='DATEn', y='EXITSn_hourly')) + gp.geom_line() + gp.ggtitle(p_title) + gp.xlab(p_xlab) + gp.ylab(p_ylab) return plot
def plot_deg_distrib(G): (in_deg, out_deg, deg) = wa.degree_distribution(G) in_deg_series = pd.Series(in_deg) out_deg_series = pd.Series(out_deg) in_out = {'in_deg': in_deg_series, 'out_deg': out_deg_series} df = pd.DataFrame(in_out) df = pd.melt(df) p = gg.ggplot(gg.aes(x='value', color='variable', fill='variable'), data=df2) + gg.geom_histogram(alpha=0.6, binwidth=1) print p
def plot_weather_data(df): df.DATEn = pd.to_datetime(df.DATEn) grouped = df.groupby('DATEn', as_index=False).sum() grouped.index.name = 'DATEn' plot = gp.ggplot(grouped, gp.aes(x='DATEn', y='EXITSn_hourly')) plot += gp.geom_line() plot += gp.ggtitle('Subway Ridership by Day') plot += gp.xlab('Date') plot += gp.ylab('Exits') return plot
def area_chart(self, conn, column1 , column2, table_chosen, title): data_df = dfile.double_selector(conn=conn, table=table_chosen, col1=column1, col2=column2) ymin = float(input("Enter the minimum value that should be plotted: ")) ymax = float(input("Enter the maximum value that should be plotted: ")) area_plot = ggplot(aes(x=column2, ymin=ymin, ymax=ymax), data=data_df) + geom_area() + theme_gray() + labs( title=title) print(area_plot)
def lineplot(hr_year_csv): df = pandas.read_csv(hr_year_csv) gg = ( gp.ggplot(df, gp.aes(x="yearID", y="HR")) + gp.geom_point(color="red") + gp.geom_line(color="red") + gp.ggtitle("Homeruns by Year") + gp.xlab("Homeruns") + gp.ylab("Year") ) return gg
def plot(self): prob231g_plot_df = self.data.copy() for k in range(self.num_clusters): n = prob231g_plot_df.shape[0] prob231g_plot_df.loc[n] = self.cluster_centers[k] prob231g_plot_df["class_label"] = [label for label in self.class_label] + \ self.num_clusters * ["center"] p = gg.ggplot(prob231g_plot_df, gg.aes(x= "x1", y="x2", colour="class_label")) + \ gg.geom_point() + gg.ggtitle("EM cluster assignments") print p return
def lineplot_compare(filename): df = pd.read_csv(filename) gg = ( gp.ggplot(df, gp.aes(x="yearID", y="HR", color="teamID")) + gp.geom_point() + gp.geom_line() + gp.ggtitle("Homeruns by Year by Team") + gp.xlab("Homeruns") + gp.ylab("Year") ) return gg
def visualize_segmentation(X, var): ''' Prints with ggplot a visualization of the different segments. ''' aux = pandas.DataFrame(index = X.index) aux['fecha'] = X.index.values aux[var] = X[var] aux['Segmento'] = X['segmento'].astype(str) return ggplot(aes(x="fecha", y=var, color="Segmento"), aux) + geom_point() + xlab("Fecha") + ylab(var) + ggtitle("Segmentacion de la variable \"" + var + "\"") + theme(axis_text_x = element_text(color=[0,0,0,0]))
def heatmap(self, dataframe, y=None, x=None, values=None, width=None, height=None, max_color=None, min_color=None, mid_color=None, title='Heatmap'): max_color = self.__default_options__.get('max_color', None) if max_color is None else max_color min_color = self.__default_options__.get('min_color', None) if min_color is None else min_color mid_color = self.__default_options__.get('mid_color', None) if mid_color is None else mid_color width = self.__default_options__.get('width', None) if width is None else width palette = gradient(min_color, mid_color, max_color) return ggplot(dataframe, aes(x=x, y=y, fill=values)) + \ geom_tile() + \ self._palette(palette, "div")
def _ggplot(df, out_file): """Plot faceted items with ggplot wrapper on top of matplotlib. XXX Not yet functional """ import ggplot as gg df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]] df["category"] = [cat_labels[x] for x in df["category"]] df["caller"] = [caller_labels.get(x, None) for x in df["caller"]] p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar() + gg.facet_wrap("variant.type", "category") + gg.theme_seaborn()) gg.ggsave(p, out_file)
def flux_variability_analysis(self, dataframe, grid=None, width=None, height=None, title=None, palette=None, x_axis_label=None, y_axis_label=None): return aes(data=dataframe, )
def prob231cd_recover(initialization): filename = "results/prob231cd" + initialization tuple_in = pkl.load(open(filename + ".pkl", "rb")) prob231c_plot_df = tuple_in[0] kmcalls = tuple_in[1] num_trials = tuple_in[2] p = gg.ggplot(prob231c_plot_df, gg.aes(x= "x1", y="x2", colour="data")) + \ gg.geom_point() + gg.ggtitle(initialization + " initialization") gg.ggsave(filename + ".png", plot = p) obj = [kmcalls[i].obj for i in range(num_trials)] obj_stats = {"mean":np.mean(obj), "sd":np.std(obj), "min":np.min(obj)} return obj_stats
def plotHistogramMeans(hist,fileName): num_clust = hist.shape[0] IDS = np.mat(range(0,num_clust)) IDS = IDS.reshape(num_clust,1) histD = np.concatenate((IDS,hist),axis=1) Data = pd.DataFrame(histD,columns = ['ID']+range(0,hist.shape[1])) Melted = pd.melt(Data,id_vars=['ID']) pv = ggplot.ggplot( ggplot.aes(x='variable',y='value'),data=Melted) + ggplot.geom_line() + ggplot.facet_wrap("ID") print "Saving mean histograms" ggplot.ggsave(pv,'./IMG/'+fileName)
def scatter(self, dataframe, x=None, y=None, width=None, height=None, color=None, title='Scatter', xaxis_label=None, yaxis_label=None): color = self.__default_options__.get('palette', None) if color is None else color width = self.__default_options__.get('width', None) if width is None else width gg = ggplot(dataframe, aes(x, y)) + geom_point(color=color, alpha=0.6) + ggtitle(title) if xaxis_label: gg += scale_x_continuous(name=xaxis_label) if yaxis_label: gg += scale_y_continuous(name=xaxis_label) return gg
def _plot_avg_accuracy(self, include_ci=True, summary=None): import ggplot as gg if summary is None: summary = self.summary() df = pd.DataFrame({'AverageAccuracy': summary['Accuracy']['Avg'], 'Round': range(self.n_rounds)}) if include_ci: from scipy import stats succ = df.AverageAccuracy * self.n_sim fail = self.n_sim - succ interval = stats.beta(succ + 1, fail + 1).interval(0.95) df['ymin'] = interval[0] df['ymax'] = interval[1] plt = gg.ggplot(gg.aes(x='Round', y='AverageAccuracy', ymin='ymin', ymax='ymax'), data=df) + \ gg.geom_area(alpha=0.5) else: plt = gg.ggplot(gg.aes(x='Round', y='AverageAccuracy'), data=df) return plt + gg.geom_line()
def prob231b(initialization = "regular"): cluster_counts = [2,3,5,10,15,20] kmcalls = [0 for i in cluster_counts] for i, num_clusters in enumerate(cluster_counts): kmcalls[i] = KmeansCall(features_only, num_clusters, initialization) kmcalls[i].run_kmeans(verbose = False) df_to_plot = kmcalls[i].data.copy() df_to_plot["class_label"] = [label for label in kmcalls[i].class_label] p = gg.ggplot(df_to_plot, gg.aes(x= "x1", y="x2", colour="class_label")) + \ gg.geom_point() + gg.ggtitle("Synth. data, k=" + str(num_clusters)) metadata = "k=" + str(num_clusters) + "_" + datestring gg.ggsave(filename = "results/" + metadata +".png", plot = p)
def lineplot_compare(filename): # Cleaner version with string vars df = pd.read_csv(filename) p_title = "Homeruns by Year by Team" p_xlab = "Homeruns" p_ylab = "Year" gg = ( gp.ggplot(df, gp.aes(x="yearID", y="HR", color="teamID")) + gp.geom_point() + gp.geom_line() + gp.ggtitle(p_title) + gp.xlab(p_xlab) + gp.ylab(p_ylab) ) return gg
def main(log): log.debug('initializing app') p = pyaudio.PyAudio() # Open audio input stream stream = p.open(format = FORMAT, channels = CHANNELS, rate = SAMPLE_RATE, input = True, frames_per_buffer = CHUNK_SIZE) log.debug('opened stream <{}>'.format(stream)) log.debug('reading audio input at rate <{}>'.format(SAMPLE_RATE)) recorded = [] # Start mainloop loops = 0 while True: loops += 1 if loops % 25 == 0: log.debug('recorded <{}> loops'.format(loops)) # Decode chunks of audio data from the stream try: data = stream.read(CHUNK_SIZE) decoded = np.fromstring(data, 'Float32'); mx = max(decoded) recorded.append(mx) # On <C-c>, plot max of recorded data except KeyboardInterrupt as ee: log.debug('closing stream and ending PyAudio') stream.close() p.terminate() df = pd.DataFrame(columns = ['mx', 'time']) df['mx'] = recorded df['time'] = range(len(recorded)) plt = ggplot.ggplot(ggplot.aes(x='time', y='mx'), data=df) +\ ggplot.geom_line() pdb.set_trace() log.debug('quitting') sys.exit(1)
def prob231g(): filename = "results/prob231g" num_clusters_231g = 3 emcall = EMCall(features_only, labels_only, num_clusters_231g) emcall.run_em() plt.plot(emcall.log_likelihood_record) plt.title("Likelihood over EM iterations") plt.savefig(filename + "_loglike.png") prob231g_plot_df = emcall.data.copy() prob231g_plot_df["class_label"] = [label for label in emcall.class_label] p = gg.ggplot(prob231g_plot_df, gg.aes(x= "x1", y="x2", colour="class_label")) + \ gg.geom_point() + gg.ggtitle("EM cluster assignments") gg.ggsave(filename + "_clusters.png", plot = p) pkl.dump(obj = emcall, file = open(filename + "_a.pkl", "wb")) print("Done with 231g.") return
def data_output(data, chart_title): print "Good News! You're data has been returned. I'm happy to show it to you." print "Just tell me how you want it - Table or Line Graph?" data_output = raw_input("Choose table or line > ") if data_output[0].lower() == "t": print "Ok, here's your data." print data elif data_output[0] == "l" or data_output[0].lower() =="g": import ggplot as gg plot = gg.ggplot(gg.aes(x='Month, Year', y='Value'), data=data) + \ gg.geom_point(color='black') + \ gg.geom_line(color='green') + \ gg.ggtitle(chart_title) + \ gg.xlab("Month, Year") + \ gg.ylab("Value") gg.scale_x_date(breaks = gg.date_breaks('1 month'), labels= gg.date_format("%B")) print (plot + gg.theme_xkcd())