def test_results_withlabels(self): labels = ["Test1", 2, "ardvark", 4] results = cbook.boxplot_stats(self.data, labels=labels) res = results[0] for lab, res in zip(labels, results): assert res["label"] == lab results = cbook.boxplot_stats(self.data) for res in results: assert "label" not in res
def test_results_withlabels(self): labels = ['Test1', 2, 'ardvark', 4] results = cbook.boxplot_stats(self.data, labels=labels) res = results[0] for lab, res in zip(labels, results): assert res['label'] == lab results = cbook.boxplot_stats(self.data) for res in results: assert 'label' not in res
def test_boxplot_stats_autorange_false(self): x = np.zeros(shape=140) x = np.hstack([-25, x, 25]) bstats_false = cbook.boxplot_stats(x, autorange=False) bstats_true = cbook.boxplot_stats(x, autorange=True) assert bstats_false[0]['whislo'] == 0 assert bstats_false[0]['whishi'] == 0 assert_array_almost_equal(bstats_false[0]['fliers'], [-25, 25]) assert bstats_true[0]['whislo'] == -25 assert bstats_true[0]['whishi'] == 25 assert_array_almost_equal(bstats_true[0]['fliers'], [])
def setup(self): np.random.seed(937) self.nrows = 37 self.ncols = 4 self.data = np.random.lognormal(size=(self.nrows, self.ncols), mean=1.5, sigma=1.75) self.known_keys = sorted( ["mean", "med", "q1", "q3", "iqr", "cilo", "cihi", "whislo", "whishi", "fliers", "label"] ) self.std_results = cbook.boxplot_stats(self.data) self.known_nonbootstrapped_res = { "cihi": 6.8161283264444847, "cilo": -0.1489815330368689, "iqr": 13.492709959447094, "mean": 13.00447442387868, "med": 3.3335733967038079, "fliers": np.array([92.55467075, 87.03819018, 42.23204914, 39.29390996]), "q1": 1.3597529879465153, "q3": 14.85246294739361, "whishi": 27.899688243699629, "whislo": 0.042143774965502923, } self.known_bootstrapped_ci = {"cihi": 8.939577523357828, "cilo": 1.8692703958676578} self.known_whis3_res = { "whishi": 42.232049135969874, "whislo": 0.042143774965502923, "fliers": np.array([92.55467075, 87.03819018]), } self.known_res_percentiles = {"whislo": 0.1933685896907924, "whishi": 42.232049135969874} self.known_res_range = {"whislo": 0.042143774965502923, "whishi": 92.554670752188699}
def test_results_bootstrapped(self): results = cbook.boxplot_stats(self.data, bootstrap=10000) res = results[0] for key in list(self.known_bootstrapped_ci.keys()): assert_approx_equal( res[key], self.known_bootstrapped_ci[key] )
def test_results_whiskers_percentiles(self): results = cbook.boxplot_stats(self.data, whis=[5, 95]) res = results[0] for key in list(self.known_res_percentiles.keys()): if key != "fliers": assert_statement = assert_approx_equal else: assert_statement = assert_array_almost_equal assert_statement(res[key], self.known_res_percentiles[key])
def setup(self): np.random.seed(937) self.nrows = 37 self.ncols = 4 self.data = np.random.lognormal(size=(self.nrows, self.ncols), mean=1.5, sigma=1.75) self.known_keys = sorted([ 'mean', 'med', 'q1', 'q3', 'iqr', 'cilo', 'cihi', 'whislo', 'whishi', 'fliers', 'label' ]) self.std_results = cbook.boxplot_stats(self.data) self.known_nonbootstrapped_res = { 'cihi': 6.8161283264444847, 'cilo': -0.1489815330368689, 'iqr': 13.492709959447094, 'mean': 13.00447442387868, 'med': 3.3335733967038079, 'fliers': np.array([ 92.55467075, 87.03819018, 42.23204914, 39.29390996 ]), 'q1': 1.3597529879465153, 'q3': 14.85246294739361, 'whishi': 27.899688243699629, 'whislo': 0.042143774965502923, 'label': 1 } self.known_bootstrapped_ci = { 'cihi': 8.939577523357828, 'cilo': 1.8692703958676578, } self.known_whis3_res = { 'whishi': 42.232049135969874, 'whislo': 0.042143774965502923, 'fliers': np.array([92.55467075, 87.03819018]), } self.known_res_with_labels = { 'label': 'Test1' } self.known_res_percentiles = { 'whislo': 0.1933685896907924, 'whishi': 42.232049135969874 } self.known_res_range = { 'whislo': 0.042143774965502923, 'whishi': 92.554670752188699 }
def compute_boxplot(self, series): """ Compute boxplot for given pandas Series. """ from matplotlib.cbook import boxplot_stats series = series[series.notnull()] if len(series.values) == 0: return {} stats = boxplot_stats(list(series.values))[0] stats['count'] = len(series.values) stats['fliers'] = "|".join(map(str, stats['fliers'])) return stats
def test_results_whiskers_range(self): results = cbook.boxplot_stats(self.data, whis='range') res = results[0] for key in list(self.known_res_range.keys()): if key != 'fliers': assert_statement = assert_approx_equal else: assert_statement = assert_array_almost_equal assert_statement( res[key], self.known_res_range[key] )
def median_confidence_intervals(data): if not data: # empty return [0], [0], [0] bxpstats = cbook.boxplot_stats(data) confidence_intervals = [[], []] medians = [] for stat in bxpstats: confidence_intervals[0].append(stat['cilo']) confidence_intervals[1].append(stat['cihi']) medians.append(stat['med']) confidence_intervals[0] = np.array(confidence_intervals[0]) confidence_intervals[1] = np.array(confidence_intervals[1]) return medians, medians - confidence_intervals[0], confidence_intervals[1] - medians
def plt1(rpt, key='REL', log=True): """ plot supervised learning report. """ # load report form file if necessary. sim = ['fam', 'frq', 'mdl', 'nxp'] nnt = ['gtp', 'xtp', 'nwk'] mtd = ['mtd', 'par'] if isinstance(rpt, str) and rpt.endswith('pgz'): rpt = lpz(rpt) # the benchmark records bmk = rpt.bmk # title ttl = bmk.iloc[0][sim] ttl = ', '.join('{}={}'.format(k, v) for k, v in ttl.items()) # method grouping grp = nnt + mtd # plot of relative error err = bmk[bmk.key == key].loc[:, nnt + mtd + ['val']] err = err[err.mtd != 'nul'] # sample some data points to craft boxplot states X, L = [], [] for l, g in err.groupby(grp): if 'nnt' in l: l = "{nwk:>10}.{mtd}".format(**g.iloc[0]) else: l = "{par:>10}.{mtd}".format(**g.iloc[0]) x = np.array(g.val) X.append(x) L.append(l) X = np.array(X).T S = cbook.boxplot_stats(X, labels=L) # plot plt.close('all') plt.title(ttl) ax = plt.axes() if log: ax.set_yscale('log') ax.bxp(S) # draw a line at y=1 x0, x1 = ax.get_xbound() zx, zy = np.linspace(x0, x1, 10), np.ones(10) ax.plot(zx, zy, linestyle='--', color='red', linewidth=.5) for tick in ax.get_xticklabels(): tick.set_rotation(90) return rpt, plt
def median_confidence_intervals(data: list): """ Compute the median and the median 95% confidence intervals for the data. :param data: the data whose statistics are to be calculated :return: the medians, the low confidence intervals, and the high confidence intervals """ if not data: # empty return [0], [0], [0] bxpstats = cbook.boxplot_stats(data) confidence_intervals = [[], []] medians = [] for stat in bxpstats: confidence_intervals[0].append(stat['cilo']) confidence_intervals[1].append(stat['cihi']) medians.append(stat['med']) confidence_intervals[0] = np.array(confidence_intervals[0]) confidence_intervals[1] = np.array(confidence_intervals[1]) return medians, medians - confidence_intervals[0], confidence_intervals[1] - medians
def plot_boxplots(vectors, axs, s, plot_colors, pos_color, neg_color): y_early = vectors[s, 0, :] x_early = np.random.normal(1, 0.02, len(y_early)) y_late = vectors[s, 1, :] x_late = np.random.normal(1, 0.02, len(y_late)) axs_early = axs[s, 0] axs_late = axs[s, 1] adaptive_changes = 0 for i in range(len(y_early)): if y_early[i] > y_late[i]: adaptive_changes = adaptive_changes + 1 adaptive_change_ratio = np.round(float(adaptive_changes) / len(x_early), 3) mean_diff = np.round(np.mean(y_late) - np.mean(y_early), 3) median_diff = np.round(np.median(y_late) - np.median(y_early), 3) axs_early.boxplot(y_early, showmeans=True, meanprops={"marker": "s", "markerfacecolor": "black", "markeredgecolor": "black"}, showfliers=False) early_fliers = boxplot_stats(y_early)[0]['fliers'] axs_late.boxplot(y_late, showmeans=True, meanprops={"marker": "s", "markerfacecolor": "black", "markeredgecolor": "black"}, showfliers=False) late_fliers = boxplot_stats(y_late)[0]['fliers'] outlier_idxs = [i for i in range(len(y_late)) if y_late[i] in late_fliers or y_early[i] in early_fliers] outlier_mask = np.ones(len(y_late), dtype=bool) outlier_mask[outlier_idxs] = 0 x_early = x_early[outlier_mask] y_early = y_early[outlier_mask] x_late = x_late[outlier_mask] y_late = y_late[outlier_mask] plot_colors = np.array(plot_colors)[outlier_mask] axs_early.scatter(x_early, y_early, marker='.', c=plot_colors) axs_late.scatter(x_late, y_late, marker='.', c=plot_colors) xy_early = np.column_stack((x_early, y_early)) xy_late = np.column_stack((x_late, y_late)) for j in range(xy_early.shape[0]): xy_early_point = xy_early[j, :] xy_late_point = xy_late[j, :] c = pos_color if xy_late_point[1] < xy_early_point[1]: c = neg_color elif xy_late_point[1] == xy_early_point[1]: c = 'black' con = ConnectionPatch(xyA=xy_late_point, xyB=xy_early_point, coordsA='data', coordsB='data', axesA=axs_late, axesB=axs_early, linewidth=0.5, linestyle='dotted', color=c) axs_late.add_artist(con) early_xlim = axs_early.axes.get_xlim() early_ylim = axs_late.axes.get_ylim() late_xlim = axs_early.axes.get_xlim() late_ylim = axs_late.axes.get_ylim() xy_top = np.array([[early_xlim[0], early_ylim[1]], [late_xlim[1], late_ylim[1]]]) xy_bottom = np.array([[early_xlim[0], early_ylim[0]], [late_xlim[1], late_ylim[0]]]) con_top = ConnectionPatch(xyA=xy_top[1, :], xyB=xy_top[0, :], coordsA='data', coordsB='data', axesA=axs_late, axesB=axs_early, linewidth=0.7) con_bottom = ConnectionPatch(xyA=xy_bottom[1, :], xyB=xy_bottom[0, :], coordsA='data', coordsB='data', axesA=axs_late, axesB=axs_early, linewidth=0.7) axs_late.add_artist(con_top) axs_late.add_artist(con_bottom) axs_early.text(0.2, 0.9, "Adaptive Change \nRatio: " + str(adaptive_change_ratio), ha='center', va='center', color='k', fontsize='medium', fontweight='semibold', transform=axs_early.transAxes, bbox=dict(facecolor='none', edgecolor='k', pad=3)) axs_early.text(0.2, 0.5, "Mean Difference: \n" + str(mean_diff), ha='center', va='center', color='k', fontsize='medium', fontweight='semibold', transform=axs_early.transAxes, bbox=dict(facecolor='none', edgecolor='k', pad=3)) axs_early.text(0.21, 0.1, "Median Difference: \n" + str(median_diff), ha='center', va='center', color='k', fontsize='medium', fontweight='semibold', transform=axs_early.transAxes, bbox=dict(facecolor='none', edgecolor='k', pad=3))
def test_results_whiskers_percentiles(self): results = cbook.boxplot_stats(self.data, whis=[5, 95]) res = results[0] for key, value in self.known_res_percentiles.items(): assert_array_almost_equal(res[key], value)
def test_results_whiskers_float(self): results = cbook.boxplot_stats(self.data, whis=3) res = results[0] for key, value in self.known_whis3_res.items(): assert_array_almost_equal(res[key], value)
A good general reference on boxplots and their history can be found here: http://vita.had.co.nz/papers/boxplots.pdf """ import numpy as np import matplotlib.pyplot as plt import matplotlib.cbook as cbook # fake data np.random.seed(19680801) data = np.random.lognormal(size=(37, 4), mean=1.5, sigma=1.75) labels = list('ABCD') # compute the boxplot stats stats = cbook.boxplot_stats(data, labels=labels, bootstrap=10000) ############################################################################### # After we've computed the stats, we can go through and change anything. # Just to prove it, I'll set the median of each set to the median of all # the data, and double the means for n in range(len(stats)): stats[n]['med'] = np.median(data) stats[n]['mean'] *= 2 print(list(stats[0])) fs = 10 # fontsize ###############################################################################
print('upper whisk group one: ', upper_whisk_group_one) print('lower whisk group one: ', lower_whisk_group_one) ''' Q 2(c): Boxplot of x ''' sns.boxplot(col, orient='vertical', color='yellow') plt.show() ''' Q 2(d): boxplots for each group and overall boxplot ''' # splitting the overall data into groups overall_data = file.copy() overall_data['group'] = 'all data' group_zero = file.loc[file['group'] == 0, :] group_one = file.loc[file['group'] == 1, :] # combining the data segments for the box plot combined = pd.concat([overall_data, group_zero, group_one], axis=0) sns.boxplot(x=combined['group'], y=combined['x']) plt.show() # obtaining the outlier values stats = boxplot_stats(overall_data['x']) print('outliers for overall data: ', stats[0]['fliers']) stats = boxplot_stats(group_one['x']) print('outliers for group one: ', stats[0]['fliers']) stats = boxplot_stats(group_zero['x']) print('outliers for group zero: ', stats[0]['fliers'])
def boxplt(dataset): "prepare data for box plot" df = dataset.drop('class', 1) df1 = df.as_matrix() stats = cbook.boxplot_stats(df1) return stats
def remove_outliers(x, of): stat = boxplot_stats(x[of])[0] low, high = stat["whislo"], stat["whishi"] return x.loc[(x[of] > low) & (x[of] < high)]
def test_label_error(self): labels = [1, 2] results = cbook.boxplot_stats(self.data, labels=labels)
def test_bad_dims(self): data = np.random.normal(size=(34, 34, 34)) results = cbook.boxplot_stats(data)
def test_label_error(self): labels = [1, 2] results = cbook.boxplot_stats(self.data, labels=labels)
def test_results_bootstrapped(self): results = cbook.boxplot_stats(self.data, bootstrap=10000) res = results[0] for key in list(self.known_bootstrapped_ci.keys()): assert_approx_equal(res[key], self.known_bootstrapped_ci[key])
def main(): parser = argparse.ArgumentParser() parser.add_argument('sequence_name', help='dataset sequence name') parser.add_argument( '--diff_list', help= 'string - name of diff_list file (needed for Graph 1, output of depth_map.py)' ) parser.add_argument( '--graph_depths', help= 'directory - graph files (needed for Graphs 2-5, output of depth_map.py)' ) parser.add_argument( '--x_axis_spacing', help='integer - separation among ticks in the x axis (for readability))' ) args = parser.parse_args() make_fig_1(args) # depth vs errors (amount, mean) # load all graph_depth*.npy files graph_depth_dir = "./depth_info/" if args.graph_depths: graph_depth_dir = args.graph_depths x_axis_spacing = 5 if args.x_axis_spacing and args.x_axis_spacing >= 1: x_axis_spacing = int(float(args.x_axis_spacing)) print "Using graph depth dir: ", graph_depth_dir npys = glob.glob(graph_depth_dir + 'graph_depth*.npy') if len(npys) <= 0: print "No data to collect.." return bins = np.load(graph_depth_dir + 'bins.npy') print bins graph_depth = [[] for i in range(len(bins))] fig, ax = plt.subplots(1, 1) bxpstats = list() graphs = [] for npy in npys: g = np.load(npy)[1] if len(g) > 0: graphs.append(g) means = np.zeros(len(bins)) medians = np.zeros(len(bins)) maxs = np.zeros(len(bins)) for i in range(len(bins)): graph_depth = [] for j in range(len(npys)): if i < len(graphs[j]): if len(graphs[j][i]) > 0: graph_depth.extend(graphs[j][i]) if len(graph_depth) > 0: means[i] = np.mean(graph_depth) medians[i] = np.median(graph_depth) maxs[i] = np.max(graph_depth) bxpstats.extend(cbook.boxplot_stats(np.ravel(graph_depth))) else: bxpstats.extend(cbook.boxplot_stats(np.ravel([0]))) print "ITEM : ", i, len(graph_depth) ax.bxp(bxpstats, showfliers=False) bins_str = map( lambda x: str(int(bins[x])) if x % x_axis_spacing == 0 else '', range(len(bins))) # bins-bins[0]+1 since it can start at any number plt.xticks(bins - bins[0] + 1, bins_str) plt.xlabel(utf8("Distance to the camera (depth, m)")) plt.ylabel("Error (m)") plt.savefig(args.sequence_name + "2.png") # mean plt.figure(3) plt.plot(bins, means) plt.xlabel(utf8("Distance to the camera (depth, m)")) plt.ylabel("Error - mean (m)") plt.savefig(args.sequence_name + "3.png") plt.figure(4) plt.plot(bins, medians) plt.xlabel(utf8("Distance to the camera (depth, m)")) plt.ylabel("Error - median (m)") plt.savefig(args.sequence_name + "4.png") np.save("depth_info/medians" + args.sequence_name + ".npy", medians) plt.figure(5) plt.plot(bins, maxs) plt.xlabel(utf8("Distance to the camera (depth, m)")) plt.ylabel(utf8("Error - máximo (m)")) plt.savefig(args.sequence_name + "5.png") print "" print "Saved " + args.sequence_name + "{1-5}.png files"
def season_boxplot(reload_data=False): with open("log_sigma3_trim_bundarys_list.json", "r") as f: bundarys_list = json.load(f) for plt_i in range(len(pollutants)): # for plt_i in range(1): if reload_data: get_data_dict(plt_i, bundarys_list[pollutants[plt_i]], method="clip") data_dict = load_data_dict() c = 0 for i in stations: for j in range(len(seasons)): for k in data_dict[i][j]: c += len(data_dict[i][j][k]) print( "Size of {} data: {}MB\nCount of data: {}".format( pollutants[plt_i], sys.getsizeof(data_dict) / 1024 ** 2, c ) ) fig, axs = plt.subplots(2, 1, figsize=[10, 8]) box_width = 0.2 offest = 0.05 positions = np.linspace(0, 3, 4) box_data = [] box_log_data = [] for i_index in range(len(stations)): i = stations[i_index] box_stats = [] box_stats_log = [] for j in range(len(seasons)): all_years = [] for k in data_dict[i][j]: all_years += data_dict[i][j][k] box_stats += cbook.boxplot_stats(all_years) box_stats_log += cbook.boxplot_stats(np.log(all_years)) box_color = box_colors[i_index] _ = axs[0].bxp( box_stats, widths=box_width, showfliers=True, boxprops={"color": box_color}, whiskerprops={"color": box_color}, capprops={"color": box_color}, medianprops={"color": box_color}, flierprops={"color": box_color, "marker": "+"}, positions=positions + i_index * (box_width + offest), ) _ = axs[1].bxp( box_stats_log, widths=box_width, showfliers=True, boxprops={"color": box_color}, whiskerprops={"color": box_color}, capprops={"color": box_color}, medianprops={"color": box_color}, flierprops={"color": box_color, "marker": "+"}, positions=positions + i_index * (box_width + offest), ) box_data.append(box_stats) box_log_data.append(box_stats_log) patches = [ mpatches.Patch(color=box_colors[i], label=station_names[i]) for i in range(len(stations)) ] axs[0].set_ylabel(pollutant_labels[plt_i]) axs[1].set_ylabel(pollutant_log_labels[plt_i]) scale_ls = positions + box_width + offest axs[0].set_xticks([]) axs[1].set_xticks(scale_ls) axs[1].set_xticklabels(seasons) axs[0].legend(handles=patches, bbox_to_anchor=(1.1, 1.3), ncol=3) filename = "{}_log_box_fliers.png".format(pollutants[plt_i]) fig.savefig(filename, dpi=300, bbox_inches="tight") for i in box_data: for j in i: j.pop("fliers") for i in box_log_data: for j in i: j.pop("fliers") with open("{}_box_data.json".format(pollutants[plt_i]), "w") as f: json.dump(box_data, f) with open("{}_box_log_data.json".format(pollutants[plt_i]), "w") as f: json.dump(box_log_data, f)
def test_results_withlabels(self): labels = ['Test1', 2, 3, 4] results = cbook.boxplot_stats(self.data, labels=labels) res = results[0] for key in list(self.known_res_with_labels.keys()): assert_equal(res[key], self.known_res_with_labels[key])
def test_results_bootstrapped(self): results = cbook.boxplot_stats(self.data, bootstrap=10000) res = results[0] for key, value in self.known_bootstrapped_ci.items(): assert_approx_equal(res[key], value)
def test_bad_dims(self): data = np.random.normal(size=(34, 34, 34)) results = cbook.boxplot_stats(data)
def test_results_whiskers_float(self): results = cbook.boxplot_stats(self.data, whis=3) res = results[0] for key, value in self.known_whis3_res.items(): assert_array_almost_equal(res[key], value)
def plotlyGCbias(file_name, frequencies, reads_per_gc, region_size): import plotly.offline as py import plotly.graph_objs as go import matplotlib.cbook as cbook fig = go.Figure() fig['layout']['xaxis1'] = dict(domain=[0.0, 1.0], anchor="y1", title="GC fraction") fig['layout']['yaxis1'] = dict(domain=[0.55, 1.0], anchor="x1", title="Number of reads") fig['layout']['xaxis2'] = dict(domain=[0.0, 1.0], anchor="y2", title="GC fraction", range=[0.2, 0.7]) fig['layout']['yaxis2'] = dict(domain=[0.0, 0.45], anchor="x2", title="log2(observed/expected)") text = "reads per {} base region".format(region_size) annos = [{ 'yanchor': 'bottom', 'xref': 'paper', 'xanchor': 'center', 'yref': 'paper', 'text': text, 'y': 1.0, 'x': 0.5, 'font': { 'size': 16 }, 'showarrow': False }] text = "normalized observed/expected read counts" annos.append({ 'yanchor': 'bottom', 'xref': 'paper', 'xanchor': 'center', 'yref': 'paper', 'text': text, 'y': 0.5, 'x': 0.5, 'font': { 'size': 16 }, 'showarrow': False }) # prepare data for boxplot reads, GC = reads_per_gc.T reads_per_gc, bin_labels = bin_by(reads, GC, nbins=100) to_keep = [idx for idx, x in enumerate(bin_labels) if 0.2 <= x <= 0.7] reads_per_gc = [reads_per_gc[x] for x in to_keep] bin_labels = [bin_labels[x] for x in to_keep] # produce the same boxplot as matplotlib as vastly reduce the output file size bins = [] for b in reads_per_gc: s = cbook.boxplot_stats(b)[0] bins.append([ s['whislo'], s['q1'], s['q1'], s['med'], s['med'], s['med'], s['q3'], s['q3'], s['whishi'] ]) data = [] # top plot for x, y in zip(bin_labels, bins): trace = go.Box(x=x, y=y, xaxis='x1', yaxis='y1', boxpoints='outliers', showlegend=False, name="{}".format(x), line=dict(color='rgb(107,174,214)')) data.append(trace) # bottom plot x = np.linspace(0, 1, frequencies.shape[0]) trace = go.Scatter(x=x, y=np.log2(frequencies[:, 2]), xaxis='x2', yaxis='y2', showlegend=False, line=dict(color='rgb(107,174,214)')) data.append(trace) fig['data'] = data fig['layout']['annotations'] = annos py.plot(fig, filename=file_name, auto_open=False)
def test_results_whiskers_range(self): results = cbook.boxplot_stats(self.data, whis=[0, 100]) res = results[0] for key, value in self.known_res_range.items(): assert_array_almost_equal(res[key], value)
def main(): parser = argparse.ArgumentParser() parser.add_argument('sequence_name', help='dataset sequence name') parser.add_argument('--diff_list', help='string - name of diff_list file (needed for Graph 1, output of depth_map.py)') parser.add_argument('--graph_depths', help='directory - graph files (needed for Graphs 2-5, output of depth_map.py)') parser.add_argument('--x_axis_spacing', help='integer - separation among ticks in the x axis (for readability))') args = parser.parse_args() make_fig_1(args) # depth vs errors (amount, mean) # load all graph_depth*.npy files graph_depth_dir = "./depth_info/" if args.graph_depths: graph_depth_dir = args.graph_depths x_axis_spacing = 5 if args.x_axis_spacing and args.x_axis_spacing >= 1: x_axis_spacing = int(float(args.x_axis_spacing)) print "Using graph depth dir: ", graph_depth_dir npys = glob.glob(graph_depth_dir+'graph_depth*.npy') if len(npys)<=0 : print "No data to collect.." return bins = np.load(graph_depth_dir+'bins.npy') print bins graph_depth = [[] for i in range(len(bins))] fig, ax = plt.subplots(1,1) bxpstats = list() graphs = [] for npy in npys: g = np.load(npy)[1] if len(g) > 0: graphs.append(g) means = np.zeros(len(bins)) medians = np.zeros(len(bins)) maxs = np.zeros(len(bins)) for i in range(len(bins)): graph_depth = [] for j in range(len(npys)): if i < len(graphs[j]): if len(graphs[j][i]) > 0: graph_depth.extend(graphs[j][i]) if len(graph_depth) > 0: means[i] = np.mean(graph_depth) medians[i] = np.median(graph_depth) maxs[i] = np.max(graph_depth) bxpstats.extend(cbook.boxplot_stats(np.ravel(graph_depth))) else: bxpstats.extend(cbook.boxplot_stats(np.ravel([0]))) print "ITEM : " , i, len(graph_depth) ax.bxp(bxpstats, showfliers=False) bins_str = map(lambda x: str(int(bins[x])) if x % x_axis_spacing == 0 else '', range(len(bins))) # bins-bins[0]+1 since it can start at any number plt.xticks(bins-bins[0]+1, bins_str) plt.xlabel(utf8("Distance to the camera (depth, m)")) plt.ylabel("Error (m)") plt.savefig(args.sequence_name+"2.png") # mean plt.figure(3) plt.plot(bins, means) plt.xlabel(utf8("Distance to the camera (depth, m)")) plt.ylabel("Error - mean (m)") plt.savefig(args.sequence_name+"3.png") plt.figure(4) plt.plot(bins, medians) plt.xlabel(utf8("Distance to the camera (depth, m)")) plt.ylabel("Error - median (m)") plt.savefig(args.sequence_name+"4.png") np.save("depth_info/medians"+args.sequence_name+".npy", medians) plt.figure(5) plt.plot(bins, maxs) plt.xlabel(utf8("Distance to the camera (depth, m)")) plt.ylabel(utf8("Error - máximo (m)")) plt.savefig(args.sequence_name+"5.png") print "" print "Saved " + args.sequence_name + "{1-5}.png files"
def test_label_error(self): labels = [1, 2] with pytest.raises(ValueError): results = cbook.boxplot_stats(self.data, labels=labels)
import numpy as np import matplotlib.pyplot as plt from matplotlib import cbook np.random.seed(0) fig, ax = plt.subplots(figsize=(4, 6)) ax.set_yscale('log') data = np.random.lognormal(-1.75, 2.75, size=37) stats = cbook.boxplot_stats(data, labels=['arithmetic']) logstats = cbook.boxplot_stats(np.log(data), labels=['log-transformed']) for lsdict in logstats: for key, value in lsdict.items(): if key != 'label': lsdict[key] = np.exp(value) stats.extend(logstats) ax.bxp(stats) fig.show()
st.markdown("# Missing Values") st.write(full_df.isnull().any()) st.markdown("# Boxplots and Histograms") st.markdown("## Drop useless columns") drop_cols = [ "LocID", "Country", "Time", "MidPeriod", "Code", "Unnamed: 0", "country", "year", "ranking" ] st.write(drop_cols) small_df = full_df.drop(columns=drop_cols) st.markdown("## Plot those with more than 3 outliers") plot_cols = [ column for column in small_df.columns if len([ y for stat in boxplot_stats(small_df[column]) for y in stat['fliers'] ]) > 3 ] st.write(plot_cols) _, axes = plt.subplots(nrows=len(plot_cols), ncols=2, figsize=(10, 150)) for i, column in enumerate(plot_cols): small_df.boxplot(column=column, ax=axes[i][0]) small_df.hist(column=column, ax=axes[i][1]) st.pyplot() st.markdown("## Print rows of max outliers") max_indices = small_df[plot_cols].idxmax(axis=0) for column in [ "Deaths", "DeathsMale", "DeathsFemale", "CNMR", "GrowthRate", "RelMigrations", "change_from_previous_year"
def test_results_bootstrapped(self): results = cbook.boxplot_stats(self.data, bootstrap=10000) res = results[0] for key, value in self.known_bootstrapped_ci.items(): assert_approx_equal(res[key], value)
A good general reference on boxplots and their history can be found here: http://vita.had.co.nz/papers/boxplots.pdf """ import numpy as np import matplotlib.pyplot as plt import matplotlib.cbook as cbook # fake data np.random.seed(19680801) data = np.random.lognormal(size=(37, 4), mean=1.5, sigma=1.75) labels = list('ABCD') # compute the boxplot stats stats = cbook.boxplot_stats(data, labels=labels, bootstrap=10000) ############################################################################### # After we've computed the stats, we can go through and change anything. # Just to prove it, I'll set the median of each set to the median of all # the data, and double the means for n in range(len(stats)): stats[n]['med'] = np.median(data) stats[n]['mean'] *= 2 print(list(stats[0])) fs = 10 # fontsize ###############################################################################
def test_results_whiskers_range(self): results = cbook.boxplot_stats(self.data, whis='range') res = results[0] for key, value in self.known_res_range.items(): assert_array_almost_equal(res[key], value)
wcls = sumbrief[sumbrief['Experiment'].str.contains('_WCL')] wcl = wcls[~wcls['Experiment'].str.contains('_WCLP')] wclp = wcls[wcls['Experiment'].str.contains('_WCLP')] ubs = sumbrief[sumbrief['Experiment'].str.contains('_Ub')] ub = ubs[~ubs['Experiment'].str.contains('_UbP')] ubp = ubs[ubs['Experiment'].str.contains('_UbP')] #print wcl #print wclp #print ub #print ubp # compute the boxplot stats ubstats = cbook.boxplot_stats(ub[["MS/MS Identified"]].values, whis='range', bootstrap=None, labels=None) ubpstats = cbook.boxplot_stats(ubp[["MS/MS Identified"]].values, whis='range', bootstrap=None, labels=None) wclstats = cbook.boxplot_stats(wcl[["MS/MS Identified"]].values, whis='range', bootstrap=None, labels=None) wclpstats = cbook.boxplot_stats(wclp[["MS/MS Identified"]].values, whis='range', bootstrap=None, labels=None) fs = 10 # fontsize # demonstrate how to toggle the display of different elements: fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(4,4)) axes[0, 0].bxp(ubstats) axes[0, 0].set_title('ub', fontsize=fs) axes[0, 1].bxp(ubpstats) axes[0, 1].set_title('ubp', fontsize=fs) axes[1, 0].bxp(wclstats)
def test_results_whiskers_percentiles(self): results = cbook.boxplot_stats(self.data, whis=[5, 95]) res = results[0] for key, value in self.known_res_percentiles.items(): assert_array_almost_equal(res[key], value)
# flist.sort() t_start.append(dt.datetime.strptime(name[:15], '%Y%m%d_%H%M%S')) t_end.append(dt.datetime.strptime(name[17:32], '%Y%m%d_%H%M%S')) if file_paths.startswith(('/data/stor/basic_data/tri_data/rink/proc_data/d0728/INT/REC/')): with open(file_paths,'rb') as f: temp= f.read() pha_= np.fromfile(file_paths, dtype='>f') pha_[pha_==0] = np.nan pha_rectangle = np.reshape(pha_, (1559,845)) vlos = (-0.0175*pha_rectangle)/(4* 3.14159*(2.5/1440)) #LOS Speeds gla.append(vlos[673,233]) rock0.append(vlos[832,328]) #rock velocities #Atmosphere rockfav = np.array(vlos[790:815,340:365]) #rock square for noise analysis unravel = rockfav.ravel() stats['C'] = cbook.boxplot_stats(unravel, labels='C')[0] iqrstats.append(stats['C']) median = st.median(unravel) medians.append(median) means.append(np.mean(unravel)) true_mean = np.mean(unravel) +2*(np.std(unravel)/np.sqrt(400)) t_mean.append(true_mean) snratio= unravel/np.std(unravel) snratios.append(snratio) q1.append(np.percentile(unravel, 25, interpolation = 'midpoint')) q2.append(np.percentile(unravel, 50, interpolation = 'midpoint')) q3.append(np.percentile(unravel, 75, interpolation = 'midpoint')) IQR = np.percentile(unravel, 75, interpolation = 'midpoint') - np.percentile(unravel, 25, interpolation = 'midpoint') iqrs.append(IQR) inter_quart = iqr(unravel) inter.append(inter_quart)
def test_bad_dims(self): data = np.random.normal(size=(34, 34, 34)) with pytest.raises(ValueError): results = cbook.boxplot_stats(data)
def test_label_error(self): labels = [1, 2] with pytest.raises(ValueError): cbook.boxplot_stats(self.data, labels=labels)
def test_bad_dims(self): data = np.random.normal(size=(34, 34, 34)) with pytest.raises(ValueError): cbook.boxplot_stats(data)
def figure1b(drip_boot): import matplotlib.cbook as cbook graph_data = [] graph_ho_data = [] graph_cd_data = [] ylim_range = (0.29, 0.51) for sample in ['control'] + samples: nrow, ncol = drip_boot[sample].shape sample_data = [] sample_ho_data = [] sample_cd_data = [] for i in range(nrow): assert len(drip_boot[sample][i, :]) == ncol, len( drip_boot[sample][i, :]) sample_data.append((drip_boot[sample][i, 60:180]).mean()) sample_ho_data.append((drip_boot[sample][i, 60:120]).mean()) sample_cd_data.append((drip_boot[sample][i, 120:180]).mean()) assert len(sample_data) == nrow stat_data = cbook.boxplot_stats(sample_data)[0] graph_data.append(stat_data) stat_ho_data = cbook.boxplot_stats(sample_ho_data)[0] graph_ho_data.append(stat_ho_data) stat_cd_data = cbook.boxplot_stats(sample_cd_data)[0] graph_cd_data.append(stat_cd_data) y_axis_formatter = matplotlib.ticker.ScalarFormatter(useOffset=True, useMathText=True, useLocale=None) y_axis_formatter.set_powerlimits((-1, 1)) y_axis_formatter.set_scientific(True) ax = plt.axes() ax.bxp(graph_data, widths=0.3, showfliers=False) plt.xticks( [1, 2, 3, 4, 5], [sample_dict[i].replace(' ', '\n') for i in ['control'] + samples], fontsize=12) plt.title('Average DRIP-seq readcount in 12kb window', fontsize=12) plt.ylabel('average DRIP-seq readcount', fontsize=12) plt.ylim(ylim_range) ax.yaxis.set_major_formatter(y_axis_formatter) ax.yaxis.set_offset_position('left') out_fig_route = os.path.join('.', 'figure', 'fig1b.png') plt.savefig(out_fig_route) plt.close() ax = plt.axes() ax.bxp(graph_ho_data, widths=0.3, showfliers=False) plt.xticks( [1, 2, 3, 4, 5], [sample_dict[i].replace(' ', '\n') for i in ['control'] + samples], fontsize=12) plt.title('Average DRIP-seq readcount in 6kb HO region', fontsize=12) plt.ylabel('average DRIP-seq readcount', fontsize=12) plt.ylim(ylim_range) ax.yaxis.set_major_formatter(y_axis_formatter) ax.yaxis.set_offset_position('left') out_fig_route = os.path.join('.', 'figure', 'fig1b_ho.png') plt.savefig(out_fig_route) plt.close() ax = plt.axes() ax.bxp(graph_cd_data, widths=0.3, showfliers=False) plt.xticks( [1, 2, 3, 4, 5], [sample_dict[i].replace(' ', '\n') for i in ['control'] + samples], fontsize=12) plt.title('Average DRIP-seq readcount in 6kb CD region', fontsize=12) plt.ylabel('average DRIP-seq readcount', fontsize=12) plt.ylim(ylim_range) ax.yaxis.set_major_formatter(y_axis_formatter) ax.yaxis.set_offset_position('left') out_fig_route = os.path.join('.', 'figure', 'fig1b_cd.png') plt.savefig(out_fig_route) plt.close()