def plot_box(data, labels, fname): f = plt.figure() plt.boxplot(data); a = f.get_axes() plt.setp(a,xticklabels=labels) plt.savefig(fname) plt.close()
def statistics_charts(self): if plt is None: return for chart in self.stats_charts: if chart["type"] == "plot": fig = plt.figure(figsize=(8, 2)) for xdata, ydata, label in chart["data"]: plt.plot(xdata, ydata, "-", label=label) plt.legend(loc="center left", bbox_to_anchor=(1, 0.5)) elif chart["type"] == "timeline": fig = plt.figure(figsize=(16, 2)) for i, (starts, stops, label) in enumerate(chart["data"]): plt.hlines([i] * len(starts), starts, stops, label=label) plt.ylim(-1, len(chart["data"])) elif chart["type"] == "bars": fig = plt.figure(figsize=(16, 4)) plt.bar(range(len(chart["data"])), chart["data"]) elif chart["type"] == "boxplot": fig = plt.figure(figsize=(16, 4)) plt.boxplot(chart["data"]) else: raise Exception("Unknown chart") png = serialize_fig(fig) yield chart["name"], html_embed_img(png)
def regional_boxplot(self, folder) : """Creates boxplots of the Income per person per region and then saves it to a file""" import matplotlib.pyplot as plt import numpy as np if type(folder) == str : pass else: raise ValueError("expected string for foldername") self.regional_income() incomes = [] label = [] for region in self.region_list : incomes.append(self.region_income[region]) label.append(region) plt.close() plt.figure(figsize=(14, 7)) plt.boxplot(incomes, labels = label) plt.plot([self.global_mean] * (len(self.region_list) + 2), "r--", label="Global Mean") plt.plot([self.global_median] * (len(self.region_list) + 2), "g--", label="Global Median") plt.xlabel("Region") plt.ylabel("Income per person") plt.title("Boxplots of the Income per person for each region for the Year " + str(self.year)) plt.legend() plt.savefig(folder + "/income_boxplot_" + str(self.year) +".pdf") plt.close()
def plot_difficulties(difficulties, bins=10): # Data plot_data = [] names = [] for y_true, c_val in [(0,0), (0,1), (1,0), (1,1)]: diff_yc = difficulties[2*y_true+c_val] plot_data.append(diff_yc) names.append('y=%d, c=%d' %(y_true, c_val)) print("y=%d, c=%d, mean=%.5f, std=%.5f" % (y_true, c_val, np.mean(diff_yc), np.std(diff_yc))) # Boxplots fig, axes = plt.subplots() plt.boxplot(plot_data) xtickNames = plt.setp(axes, xticklabels=names) axes.set_ylim([-.01, 1.01]) axes.set_ylabel('Difficulty') plt.show() # Histogram fig, axes = plt.subplots() plt.yscale('log', nonposy='clip') hist = plt.hist(plot_data, label=names, bins=bins) plt.legend() axes.set_xlabel('Difficulty') axes.set_ylabel('Count (log-scale)') plt.show()
def plot(lookup): data = [] for iiDiameter in sorted(lookup.keys()): data.append(lookup[iiDiameter]) plt.boxplot(data, sym='') plt.setp(plt.gca(),'xticklabels',sorted(lookup.keys())) plt.show()
def bivariate_analysis_cont_catg(cont_catg_list,df,target_name,sub_len,COUNTER,PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE): clean_cont_catg_list = clean_str_list(df,cont_catg_list) if len(clean_str_list(df,[target_name])) == 0 and len(cont_catg_list)>0: raise ValueError("You seem to have a target variable with string values.") clean_df = df.dropna() for col in clean_cont_catg_list: col_classes =clean_df[col].unique() summary = clean_df[col].describe() count = summary[0] mean = summary[1] std = summary[2] plt.subplot(PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE,COUNTER) plt.title("mean "+str(np.float32(mean))+" std "+str(np.float32(std)),fontsize=10) x = [np.array(clean_df[clean_df[col]==i][target_name]) for i in col_classes] y = np.float32(clean_df[target_name]) f_value,p_val = evaluate_anova(np.array(clean_df[col]).reshape(-1,1),y) plt.xlabel(col+"\n f_value: "+str(np.float32(f_value[0]))+" / p_val: "+str(p_val[0]), fontsize=10) plt.ylabel(target_name, fontsize=10) plt.boxplot(x) print (col+" vs "+target_name+" plotted....") COUNTER +=1 return plt,COUNTER
def bivariate_analysis_catg_cont(catg_cont_list,df,target_name,sub_len,COUNTER,PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE): # No need to remove string varible as they are handled by chi2 function of sklearn. # clean_catg_cont_list = clean_str_list(df,catg_cont_list) clean_catg_cont_list = catg_cont_list clean_df = df.dropna() for col in clean_catg_cont_list: col_classes =df[target_name].unique() summary = clean_df[col].describe() count = summary[0] mean = summary[1] std = summary[2] plt.subplot(PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE,COUNTER) plt.title("mean "+str(np.float32(mean))+" std "+str(np.float32(std)),fontsize=10) x = [np.array(clean_df[clean_df[target_name]==i][col]) for i in col_classes] y = clean_df[target_name] f_value,p_val = evaluate_anova(np.array(clean_df[col]).reshape(-1,1),y) plt.xlabel(target_name+"\n f_value: "+str(np.float32(f_value[0]))+" / p_val: "+str(p_val[0]), fontsize=10) plt.ylabel(col, fontsize=10) plt.boxplot(x) print (col+" vs "+target_name+" plotted....") COUNTER +=1 return plt,COUNTER
def descriptive_stats(array, verbose=True, label='', mean=False, plot=False): """ Simple statistics from vector. """ if mean: mean_ = np.mean(array) median = np.median(array) mini = np.min(array) maxi = np.max(array) first_qu = np.percentile(array, 25) third_qu = np.percentile(array, 75) if verbose: if mean: label += 'min={:.1f} / 1st QU={:.1f} / ave={:.1f} / med={:.1f} / ' label += '3rd QU={:.1f} / max={:.1f}' print(label.format(mini, first_qu, mean_, median, third_qu, maxi)) else: label += 'min={:.1f} / 1st QU={:.1f} / med={:.1f} / 3rd QU={:.1f} ' label += '/ max={:.1f}' print(label.format(mini, first_qu, median, third_qu, maxi)) if plot: boxplot(array, vert=False, meanline=mean, showfliers=True, sym='.') if mean: return mini, first_qu, mean_, median, third_qu, maxi else: return mini, first_qu, median, third_qu, maxi
def visualize_performance(self): intra = self._intra inter = self._inter labels = [1]*len(intra) + [-1]*len(inter) scores = intra+inter self._common_visualize_performance( labels, scores) plt.figure() plt.boxplot([intra, inter]) plt.xticks([1, 2], ['intra', 'inter']) plt.title('Distribution of scores') plt.savefig('comparison_score_distribution.pdf') plt.figure() start = np.min(np.min(intra), np.min(inter)) end = np.max(np.max(intra), np.max(inter)) intra_hist, intra_bin = np.histogram(intra,50, (start, end)) inter_hist, inter_bin = np.histogram(inter,50, (start, end)) plt.plot(intra_bin[:-1], intra_hist/float(intra_hist.sum()), label='intra', color='blue') plt.plot(inter_bin[:-1], inter_hist/float(inter_hist.sum()), label='inter', color='red') plt.legend() plt.xlabel('Comparison scores') plt.ylabel('Probability') plt.title('Score distribution')
def seperate(R, P): N = 0.0 T = 0.0 ON = {} OFF = {} for motif in P: M = motif.split("_")[0] if M in R: k, mu, std, n, m, B, pv = P[motif] # cov = R[M][3] cov = R[M] if pv > 0.9999: if M not in ON: ON[M] = cov else: ON[M] = max(ON[M], cov) else: if M not in OFF: OFF[M] = cov else: OFF[M] = min(cov, OFF[M]) plt.boxplot((OFF.values(), ON.values())) plt.show()
def handle(self, *args, **options): fs = 10 # fontsize versions = models.SourceLine.objects.filter( project__startswith='django-').order_by( 'project').values_list( 'project', 'progradon__complexity') for vers, complexity_iter in itertools.groupby( versions, key=operator.itemgetter(1)): print vers, ':' print '-', ', '.join(str(x) for x in complexity_iter) data = models.SourceLine.objects.filter( project='django-1.0.1').values_list( 'progradon__complexity', flat=True) plt.boxplot(data) # , labels=labels) plt.show() # xs, ys, areas = zip(*data) # ys = areas # colors = np.random.rand(len(xs)) # plt.scatter(xs, ys, c=colors) # s=areas) # plt.xlabel('file index') # plt.ylabel('version index') plt.savefig('z.png')
def createBoxPlot(table,title = None, xlab = None, yLab= None, dest = "show"): if dest == "none": return plt.figure("box") flatData = [val for sublist in table for val in table[sublist]] plotData = [] unzippedX, unzippedy = zip(*flatData) setX = set(unzippedX) listX = list(setX) listX.sort() for x in listX: ySet = [datum[1] for datum in flatData if datum[0] == x] plotData.append(ySet) # plotData = unzippedy plt.boxplot(plotData) #set xAxis plt.xticks(range(len(listX)), listX) if title: plt.title(title) if xlab: plt.xlabel(xlab) if yLab: plt.ylabel(yLab) if dest == "show": plt.show("box") else: plt.savefig(dest, bbox_inches='tight') plt.clf() plt.close("box")
def create_boxplot(data, save_dir, correct_entropy=1): """ data_file - path file containing entropy values for the lines added by the mutant files save_directory - directory to save the plot in, not including the name of the plot itself correct_entropy - the entropy of the lines added by the repair program """ print "CREATE BOXPLOT" # fid = open(data_file,'r') # data=[float(l.strip()) for l in fid.readlines()] print data assert len(data) > 0 # plot mutant entropy plt.boxplot(data) # plot correct entropy p1 = plt.plot([0, 2], [correct_entropy, correct_entropy], color="g") # label the repaired program l1 = plt.legend([p1], ["repaired program"]) # annotate the plot plt.ylabel("Entropy (bits)") plt.title("Entropy of lines added in mutant programs") # generate a random number as the name of the plot name = str(random.randint(0, sys.maxint)) plt.savefig(os.path.join(save_dir, name + ".png"), bbox_inches=0) print os.path.join(save_dir, name + ".png") return name
def boxplot(datadict, name): data = np.concatenate(datadict.values()) xdata = np.arange(data.shape[1]) + 1 ydata = np.average(data, axis=0) std = np.std(data, axis=0) minerr = ydata - std maxerr = ydata + std with open(RESULTS_FOLDER + "/result_%s.json" % name, "w") as f: j = { "relative_cost": list(ydata), "std": list(std) } json.dump(j, f, indent=1) strategy = np.concatenate((np.repeat(np.array(maxerr[9]), 9), maxerr[9:])) plt.figure() # plt.plot(xdata, func(xdata, popt[0], popt[1], popt[2])) plt.plot(xdata, ydata, label="mean") plt.plot(xdata, minerr, label="mean - std") plt.plot(xdata, maxerr, label="mean + std") plt.plot(xdata, strategy, lw=3, ls="--", c="black", label="strategy") plt.boxplot(data) plt.legend() plt.axis([0, 30, 0, maxerr[0]]) plt.savefig('%s/boxplot_%s.png' % (RESULTS_FOLDER, name))
def stats_fn(data_frame): global scene stat_file = open("Stat_tests_" + scene[:-4] + ".txt", "w") seen_pairs = [] for algorithm in data_frame: for algorithm2 in data_frame: if (algorithm != algorithm2) and ((algorithm, algorithm2) not in seen_pairs): seen_pairs.append((algorithm, algorithm2)) seen_pairs.append((algorithm2, algorithm)) statistical_significance = stats.wilcoxon(data_frame[algorithm], data_frame[algorithm2]) print >> stat_file, algorithm, " VS ", algorithm2, " -->", statistical_significance print >> stat_file, algorithm, " median = ", np.median(data_frame[algorithm]) print >> stat_file, algorithm2, " median = ", np.median(data_frame[algorithm2]) print >> stat_file, "----------------------------------------------------------" # # This part is for drawing the different boxplots figure_name = scene + "_.png" current_path = os.getcwd() os.chdir("/home/omohamme/INRIA/experiments/moop_sim_comparison/boxplots/" + scene[:-4] + "/") plt.figure(figsize=(15.0, 11.0)) plt.boxplot(data_frame.values()) plt.xticks(range(1, len(data_frame.keys()) + 1), data_frame.keys()) plt.title(figure_name) plt.savefig(figure_name) os.chdir(current_path) stat_file.close()
def plot_importances(forest, cov_dir, basename, X_names): est = forest.steps[0][1] importances = est.feature_importances_ indices = np.argsort(importances)[::-1] import_dist = np.array([tree.feature_importances_ for tree in est.estimators_]) np.savetxt(os.path.join(cov_dir, 'feature-importance-' + \ basename + '.dat'), import_dist, header=" ".join(X_names), comments='') import_dist = import_dist.T[indices][::-1].T print("Feature ranking:") for f in range(len(X_names)): print("%d. %s (%.3g)" % (f+1, X_names[indices[f]], importances[indices[f]])) print() mpl.rc('text', usetex='false') plt.figure(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k') plt.boxplot(import_dist, vert=0) plt.yticks(range(1,1+len(X_names)), np.array(X_names)[indices][::-1]) plt.xlabel("Feature importance") plt.tight_layout() plt.savefig(os.path.join(plot_dir, 'feature_importance-' + \ basename + '.pdf')) plt.close()
def __create_num_threads_vs_jct_graph(num_threads_to_jcts, output_dir, phase): """ Create a graph of num threads per disk vs. JCT for the specified phase, which must be either "write" or "read". num_threads_to_jcts should be a dictionary of the form: { num threads : ( list of write JCTs, list of read JCTs ) } """ assert phase in ["write", "read"] num_ticks = len(num_threads_to_jcts) + 2 xmax = num_ticks - 1 max_jct = max([jct for write_jcts, read_jcts in num_threads_to_jcts.itervalues() for jct in (write_jcts if phase == "write" else read_jcts)]) ymax = max_jct * 1.1 pyplot.title("Num threads per disk vs. JCT ({} phase)".format(phase)) pyplot.xlabel("Num threads per disk") pyplot.ylabel("JCT (s)") pyplot.grid(b=True) pyplot.xlim(xmin=0, xmax=xmax) pyplot.ylim(ymin=0, ymax=ymax) # Build a list of lists of JCTs, sorted by num threads per disk. all_jcts = [write_jcts if phase == "write" else read_jcts for _, (write_jcts, read_jcts) in sorted(num_threads_to_jcts.iteritems())] pyplot.boxplot(all_jcts, whis=[0, 100]) # Replace the visually-correct x-axis values with the numerically correct values. pyplot.xticks(xrange(num_ticks), [""] + sorted(num_threads_to_jcts.keys()) + [""]) # Save the graph as a PDF. output_filepath = path.join(output_dir, "{}_phase_num_threads_vs_jct.pdf".format(phase)) with backend_pdf.PdfPages(output_filepath) as pdf: pdf.savefig() pyplot.close()
def main(): statistics_file = open("statistics.txt", "r") content = statistics_file.readlines() index = 0 i = 0 statistics = {} data = [[] for _ in range(5)] for string in content: if "[" in string: split_spaces = string.split(" ") for splitted in split_spaces : splitted = splitted.replace("[", "") splitted = splitted.replace("]", "") splitted = splitted.replace(",", "") splitted = splitted.replace("\n", "") try: val = int(splitted) except : val = float(splitted) data[i] += [val] i += 1 if i == 5 : i = 0 statistics.update({index : data}) data = [[] for _ in range(5)] index += 1 mean_time = [] std_time = [] for key, val in statistics.items(): mean_time += [np.mean(val[4])] std_time += [np.std(val[4])] plt.figure(figsize=(16, 9)) plt.ylabel("Time") plt.xlabel("Game Speed") plt.xlim(0, len(mean_time) + 1) labels = [str(i) + "x" for i in range(len(mean_time))] plt.errorbar([i + 1 for i in range(len(mean_time))], mean_time, yerr = std_time) plt.xticks([i + 1 for i in range(len(mean_time))], labels) plt.savefig("time_speed.png", bbox_inches='tight', dpi = 200) plt.close() plt.figure(figsize=(16, 9)) plt.ylim(0, 40) plt.boxplot([val[1] for key, val in statistics.items()], labels=[str(i + 1) + "x" for i in range(len(mean_time))]) plt.savefig("kill_bot1_speed.png", bbox_inches='tight', dpi = 200) plt.close() plt.figure(figsize=(16, 9)) plt.ylim(0, 40) plt.boxplot([val[2] for key, val in statistics.items()], labels=[str(i + 1) + "x" for i in range(len(mean_time))]) plt.savefig("kill_bot2_speed.png", bbox_inches='tight', dpi = 200) plt.close()
def plot_htseqcount_dist(htseqfile, plot): '''Run from htseq_out folder''' counts = list_htseq_counts(htseqfile) logcounts = [np.log2(c+1) for c in counts] if plot == 'y': plt.hist(logcounts, bins=1000, color='b') plt.ylim(0, 500) plt.savefig('loghist.png') plt.close() plt.hist(counts, bins=10000, color='b') plt.xlim(0, 100000) plt.ylim(0, 4000) plt.savefig('hist.png') plt.close() plt.boxplot(counts) plt.ylim(0, 10000) plt.savefig('boxplot.png') d = {} d['med'] = np.median(counts) d['logmed'] = np.median(logcounts) d['max'] = np.max(counts) d['logmax'] = np.max(logcounts) d['min'] = np.min(counts) d['logmin'] = np.min(logcounts) return(d)
def plot(y_label,key): x_ticks = ["ARI","AMI","H","C","V","P","R","F1"] y = [] if key=="base": for i,data in enumerate([adjusted_rand_scores,#adjusted_rand_scores_random, adjusted_mutual_info_scores,#adjusted_mutual_info_scores_random, homogeneity_scores,#homogeneity_scores_random, completeness_scores,#completeness_scores_random, v_measures_scores,#v_measures_scores_random pairwise_precision, pairwise_recall, pairwise_f1 ]): y.append(data.flatten()) elif key=="random": for i,data in enumerate([adjusted_rand_scores_random, adjusted_mutual_info_scores_random, homogeneity_scores_random, completeness_scores_random, v_measures_scores_random, pairwise_precision_random, pairwise_recall_random, pairwise_f1_random ]): y.append(data.flatten()) y=np.array(y) print(y.shape) plt.boxplot(y.T) plt.xticks(np.arange(1,len(x_ticks)+1,1),x_ticks) plt.xlabel("measures") plt.ylabel(y_label)
def dictPlot( d, barchart=True, label='run', **plotargs ): "Plot an n to many mapping" xvals = sorted( d.keys() ) yvals = [ d[ x ] for x in xvals ] ind = np.arange( len( yvals ) ) width = .35 indcenter = ind + .5 * width plt.xticks( indcenter, [ str( x ) for x in xvals ] ) # Use box plot unless bar chart was specified if not barchart: plt.boxplot( yvals ) return # If we only have one run, just plot bars if not reduce( and_, [ len( y ) > 1 for y in yvals ] ): plt.bar( ind, [ y[ 0 ] for y in yvals ], width ) return # Otherwise, scatter plot points # was: plt.plot( ind + .5 * width, yvals, 'o', **plotargs ) for x, y in zip( indcenter, yvals ): plt.plot( [ x ] * len( y ), y, 'o', **plotargs ) # hack - is there a better way to add legend? plt.plot( indcenter[ 0 ], yvals[ 0 ][ 0 ], 'o', label=label, **plotargs ) # And plot a bar chart of the means means = [ sum( y ) / len( y ) for y in yvals ] plt.bar( ind, means, width, label='mean' )
def make_error_boxplot(expected_files, observed_files, names): #http://matplotlib.org/examples/pylab_examples/boxplot_demo2.html errors, relative_errors = [], [] for expected_file, observed_file in zip(expected_files, observed_files): try: _, _, error, relative_error = \ get_file_error(expected_file, observed_file) errors.append(error) relative_errors.append(relative_error) except TypeError: return None fig = plt.figure(figsize=(6,4)) ax = plt.subplot(2, 1, 1) plt.boxplot(errors) plt.xticks([]) ax.set_title("Errors") ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) ax = plt.subplot(2, 1, 2) plt.boxplot(relative_errors) ticks = [x + 1 for x in range(len(names))] names = [extract_number(name) for name in names] ax.set_title("Relative errors") ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) return fig
def plot(revisions, benchmarks, subdir='.', baseurl='https://github.com/idaholab/moose/commit/'): data = [] labels = [] for rev, bench in zip(revisions, benchmarks): data.append(bench.realruns) labels.append(rev[:7]) median = sorted(data[0])[int(len(data[0])/2)] plt.axhline(y=median*1.05, linestyle='--', linewidth=2, color='red', alpha=.5, label='+5%') plt.axhline(y=median*1.01, linestyle=':', linewidth=2, color='red', label='+1%') plt.axhline(y=median, dashes=[48, 4, 12, 4], color='black', alpha=.5) plt.axhline(y=median*.99, linestyle=':', linewidth=2, color='green', label='-1%') plt.axhline(y=median*.95, linestyle='--', linewidth=2, color='green', alpha=.5, label='-5%') plt.boxplot(data, labels=labels, whis=1.5) plt.xticks(rotation=90) plt.ylabel('Time (seconds)') fig = plt.gcf() ax = fig.axes[0] labels = ax.get_xticklabels() for label in labels: label.set_url(urlparse.urljoin(baseurl, label.get_text())) legend = ax.legend(loc='upper right') fig.subplots_adjust(bottom=.15) fig.savefig(os.path.join(subdir, benchmarks[0].name + '.svg')) plt.clf()
def sale_price_per_sq_foot_boxplot(self, groupby, title): """Boxplot of sale price per square foot, grouped by a groupby variable title is the plot title""" fig = init_fig() # This figure needs to be extra wide fig.set_size_inches(10, 4) # Remove missings and restrict to the columns we need data = self.data[[groupby, "sale_price_per_sqft"]].dropna() # The boxplot function takes a list of Series, so we make one Series for each # group, and append them all into a list groups = list() values = data[groupby].value_counts().index # All the levels of the groupby variable for value in values: groups.append(data.loc[data[groupby] == value, "sale_price_per_sqft"]) # Now make the plot. The empty string means we don't want the outliers, since # they will mess up the axis scale plt.boxplot(groups, 0, "") plt.ylabel("Sale Price per Sq. Ft.") plt.title(title) plt.xticks(np.arange(len(values)) + 1, values) return fig_to_svg(fig)
def distance_distribution_plot(learner,box_kwargs=None,**kwargs): """ plots the distribution of distances to/from predicted events from/to actual events, dependning on kwargs Args: learner: the learner object to use kwargs: passed to event_distance_distribution (ie: to_true=T/F) """ train_scores = learner._scores_by_params(train=True) valid_scores = learner._scores_by_params(train=False) if (box_kwargs is None): box_kwargs = dict(whis=[5,95]) name = learner.description.lower() x_values = learner.param_values() train_dist = Learning.event_distance_distribution(train_scores,**kwargs) valid_dist = Learning.event_distance_distribution(valid_scores,**kwargs) dist_plot = lambda x: [v for v in x] train_plot = dist_plot(train_dist) valid_plot = dist_plot(valid_dist) plt.boxplot(x=train_plot,**box_kwargs) plt.boxplot(x=valid_plot,**box_kwargs) plt.gca().set_yscale('log') PlotUtilities.lazyLabel("Tuning parameter","Distance Distribution (idx)", "Event distributions for {:s}".format(name), frameon=False)
def make_plot_lfw_reorder_other(save=False): conn = pm.Connection() db = conn['hyperopt'] Jobs = db['jobs'] exp_key = 'thor_model_exploration.model_exploration_bandits.LFWBanditModelExplorationOther/hyperopt.Random' H = Jobs.group(['spec.order'], {'exp_key': exp_key, 'state':2, 'spec.preproc.size.0':250 }, {'losses': []}, 'function(d, o){o.losses.push(d.result.loss);}') order_choices = params.order_choices ords = pluck(H, 'spec.order') reinds = [ords.index(_o) for _o in order_choices] H = [H[_r] for _r in reinds] od = {'lpool': 'p', 'activ': 'a', 'lnorm': 'n'} order_labels = [','.join([od[b] for b in Before]) + '|' + ','.join([od[b] for b in After]) for (Before, After) in order_choices] import matplotlib.pyplot as plt fig = plt.figure(figsize=(18,8)) plt.boxplot([1-np.array(h['losses']) for h in H]) means = [1-np.array(h['losses']).mean() for h in H] plt.plot(range(1,len(H)+1), means, color='green') plt.scatter(range(1,len(H)+1), means) plt.xticks(range(1,len(ords)+1), order_labels, rotation=60) plt.ylabel('Absolute performance') plt.xlabel('Architecture tag')
def boxplot_by_pft(var, timestep, cmtnum, stages, ref_veg_map, ref_run_status): ''' Work in progress... ''' data, units = stitch_stages(var, timestep, stages) print "data size:", data.size print data.shape d2 = data # d2 = sum_across_compartments(data) # print "data size after summing compartments:", d2.size d3 = mask_by_cmt(d2, cmtnum, ref_veg_map) print "data size after masking cmt:", d3.count() d3 = mask_by_failed_run_status(d3, ref_run_status) print "data count after masking run status:", d3.count() pft0avg = np.ma.average(d3, axis=(2,3)) #plt.plot(pft0avg) # Line plot plt.boxplot( pft0avg, labels = ["PFT {}".format(i) for i in range(0, 10)], whis='range', showfliers=False, patch_artist=True, boxprops=dict(color='blue', alpha=0.25), whiskerprops=dict(color='red'), capprops=dict(color='blue'), ) plt.ylabel(units) plt.show(block=True)
def plot(work_time_deltas_hours): # 45 minutes break is assumed work_overtime = sum([w - 8.75 for w in work_time_deltas_hours ]) plt.boxplot(work_time_deltas_hours) plt.ylabel("Working Hours") plt.xticks([0,1,2],()) yvalues = numpy.arange(numpy.floor(numpy.min(work_time_deltas_hours)),numpy.ceil(numpy.max(work_time_deltas_hours)),0.25) plt.yticks(yvalues,[ str(math.floor(x)) + "h " + str(int((x % 1.0) * 60)) +"min" for x in yvalues],rotation=0) # Debug print("Mean: "+str(numpy.mean(work_time_deltas_hours))) print("Min: "+str(numpy.min(work_time_deltas_hours))) print("Max: "+str(numpy.max(work_time_deltas_hours))) print("Median: "+str(numpy.median(work_time_deltas_hours))) print("Work overtime: "+ str(work_overtime)) print("Days tracked: "+str(len(work_time_deltas_hours))) plt.text(1.35,10,"Mean: " + str(math.floor(numpy.mean(work_time_deltas_hours))) + "h " + str(int((numpy.mean(work_time_deltas_hours) % 1.0) * 60)) + "min" "\nMax: " + str(math.floor(numpy.max(work_time_deltas_hours))) + "h " + str(int((numpy.max(work_time_deltas_hours) % 1.0) * 60)) + "min" "\nMin: "+ str(math.floor(numpy.min(work_time_deltas_hours))) + "h " + str(int((numpy.min(work_time_deltas_hours) % 1.0) * 60)) + "min" "\nMedian: "+ str(math.floor(numpy.median(work_time_deltas_hours))) + "h " + str(int((numpy.median(work_time_deltas_hours) % 1.0) * 60)) + "min"+ "\nOvertime: " + str(math.floor(work_overtime)) +"h "+ str(int((work_overtime % 1.0) * 60)) + "min" + "\nDays: " + str(len(work_time_deltas_hours)), bbox=dict(boxstyle='round', facecolor='white', alpha=0.5)) plt.title("Working Hours Boxplot") plt.show()
def main(): data = [] data_month = [] # Post to database con = mdb.connect(host='192.168.1.143', db='monitor', user='******') #Format of data structure #[mm][dd][data] #mm: This is the month of the dataset. Keep in mind that it is indexed from zero. So August (8) is actually 7. #dd: This is the day within the month. #data: This is an array of the the data from the day. Each datapoint is a tuple of (datetime, value). with con: cur = con.cursor() #cur.execute("SELECT temp_actual FROM sensor1 GROUP BY HOUR(datetime) LIMIT 0, 30") for m in range(1,12): for d in range(1,31): cur.execute("SELECT datetime,temp_actual FROM sensor1 WHERE DAY(datetime) = %i AND MONTH(datetime) = %i" %(d,m)) data_month.append(np.array(cur.fetchall())) data.append(data_month) data_month = [] con.close() plt.boxplot(data[7-1][11][:,1]) plt.show() '''
def nrgserrs_0n(save=False): fnames_0 = glob('errfe0_*1.txt') fnames_0 = move_ten(fnames_0) out_file = [] control_FEs = [] pc = 0 for control_set in fnames_0: FE, err = numpy.loadtxt(control_set) control_FEs.append(FE) out_file.append([pc, numpy.mean(FE), 1 - numpy.mean(err)]) pc +=1 fig = plt.figure(figsize=(8, 4)) plt.boxplot(control_FEs) plt.title('F by % of corruption') plt.ylabel('free energy') labels = [(str(x) + '%') for x in range(0, 11)] plt.xticks(range(1, 12), labels) if save: fig.savefig('FE_boxplots0n1c', bbox_inches='tight') save_name = 'errfe_stats_0n.csv' header = 'pc,Fmean,er' numpy.savetxt(save_name, out_file, delimiter=',', fmt='%1.0f,%1.2f,%1.2f', header=header, comments='') else: plt.tight_layout() print numpy.array(out_file)
min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=5, min_weight_fraction_leaf=0.0, n_estimators=800, n_jobs=None, oob_score=False, random_state=None, verbose=0, warm_start=False) models.append(('LassoReg', Lasso(alpha=0.1))) models.append(('SVM', svReg)) models.append(('LinearReg', LinearRegression())) models.append(('randForest', randForReg)) mas = make_scorer(mean_absolute_error, greater_is_better=False); # evaluate each model in turn results = [] names = [] scoring = 'accuracy' for name, model in models: kfold = KFold(n_splits=10, random_state=7) cv_results = cross_val_score(model, x_train, y_train, cv=kfold, n_jobs=4) results.append(cv_results) names.append(name) msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std()) print(msg) # boxplot algorithm comparison fig = pyplot.figure() fig.suptitle('Classification Algorithm Comparison') ax = fig.add_subplot(111) pyplot.boxplot(results) ax.set_xticklabels(names) pyplot.grid() pyplot.show()
for name, model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring) results.append(cv_results) names.append(name) msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std()) print(msg) print('\n'.join(map(str, results))) # Select Best Model # Compare Algorithms fig = plt.figure() fig.suptitle('Algorithm Comparison') ax = fig.add_subplot(111) plt.boxplot(results) ax.set_xticklabels(names) plt.show() '''6. Make Predictions''' # Make predictions on validation dataset knn = KNeighborsClassifier() knn.fit(X_train, Y_train) predictions = knn.predict(X_validation) print(accuracy_score(Y_validation, predictions)) print(confusion_matrix(Y_validation, predictions)) print(classification_report(Y_validation, predictions)) svc = SVC(gamma='auto') svc.fit(X_train, Y_train) predictions = svc.predict(X_validation)
# plot histogram of distances centers_distances = centers_distances / my_dpmm centers_distances_min = centers_distances_min / my_dpmm edges_distances = edges_distances / my_dpmm edges_distances_min = edges_distances_min / my_dpmm centers_distances_avg = np.mean( centers_distances[np.nonzero(centers_distances)]) centers_distances_min_avg = centers_distances_min.mean() edges_distances_avg = np.mean(edges_distances[np.nonzero(edges_distances)]) edges_distances_min_avg = edges_distances_min.mean() hist_dist_fig, hist_dist_ax = plt.subplots() bp = plt.boxplot( (centers_distances[np.nonzero(centers_distances)], centers_distances_min, edges_distances[np.nonzero(edges_distances)], edges_distances_min), notch=0) plt.setp(bp['boxes'], color='black') plt.setp(bp['whiskers'], color='black') plt.setp(bp['fliers'], color='red', marker='+') hist_dist_ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) # Hide these grid behind plot objects hist_dist_ax.set_axisbelow(True) hist_dist_ax.set_title('Recap of distances between objects (mm)') hist_dist_ax.set_xlabel('Type')
def case5_single_boxplot(): x = [1, 5, 5.1, 5.1, 5.5, 5.4, 5.5, 5.4, 5.6, 5.7, 6., 6.1, 9] plt.boxplot(x) plt.show()
plt.ylabel('Steps') plt.title('Steps by Tasks') plt.xticks(index + bar_width, ('open map', 'mcs', 'climate', 'bluetooth audio', 'park')) plt.legend() # plt.show() # plot HR data = np.array( Variables.end2end_map_hr ) plt.figure('HR Value') plt.subplot(2,5,1) plt.ylabel('Heart Rate') plt.xlabel('Open Map Task End2End') plt.boxplot(data, 0, 'gD') data = np.array( Variables.end2end_mcs_hr ) plt.subplot(2,5,2) plt.ylabel('Heart Rate') plt.xlabel('MCS Task End2End') plt.boxplot(data, 0, 'gD') data = np.array( Variables.end2end_climate_hr ) plt.subplot(2,5,3) plt.ylabel('Heart Value') plt.xlabel('Climate Task End2End') plt.boxplot(data, 0, 'gD') data = np.array( Variables.end2end_bt_hr ) plt.subplot(2,5,4)
models.append(('LDA', LinearDiscriminantAnalysis())) models.append(('KNN', KNeighborsClassifier())) models.append(('CART', DecisionTreeClassifier())) models.append(('NB', GaussianNB())) models.append(('SVM', SVC(gamma='auto'))) # evaluate each model in turn results = [] names = [] for name, model in models: kfold = StratifiedKFold(n_splits=10, random_state=1) cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy') results.append(cv_results) names.append(name) print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std())) # Compare Algorithms pyplot.boxplot(results, labels=names) pyplot.title('Algorithm Comparison') pyplot.show() # Make predictions on validation dataset model = SVC(gamma='auto') model.fit(X_train, Y_train) predictions = model.predict(X_validation) # Evaluate predictions print(accuracy_score(Y_validation, predictions)) print(confusion_matrix(Y_validation, predictions)) print(classification_report(Y_validation, predictions))
expoBoot = expo[bootstrapIndices] gumbBoot = gumb[bootstrapIndices] lognBoot = logn[bootstrapIndices] triaBoot = tria[bootstrapIndices] data = [ norm, normBoot, logn, lognBoot, expo, expoBoot, gumb, gumbBoot, tria, triaBoot ] fig = plt.figure(figsize=(10, 6)) fig.canvas.set_window_title('A Boxplot Example') ax1 = fig.add_subplot(111) plt.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25) bp = plt.boxplot(data, notch=0, sym='+', vert=1, whis=1.5) plt.setp(bp['boxes'], color='black') plt.setp(bp['whiskers'], color='black') plt.setp(bp['fliers'], color='red', marker='+') # Add a horizontal grid to the plot, but make it very light in color # so we can use it for reading data values but not be distracting ax1.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) # Hide these grid behind plot objects ax1.set_axisbelow(True) ax1.set_title(
import numpy as np import matplotlib.pyplot as plt data_to_plot = np.array([ 157, 159, 161, 164, 165, 166, 167, 167, 167, 168, 169, 170, 170, 170, 171, 171, 172, 172, 172, 172, 173, 173, 175, 175, 177, 178, 178, 179, 185 ]) plt.figure(1, figsize=(5, 6)) plt.subplot(111) plt.axis([0, 1, 155, 190]) plt.boxplot(data_to_plot, showfliers=True) plt.show()
def plotCF(x, y, labels, numberOfRuns): print "plotBF()" totalFrames = {u'07': 64, u'14': 32, u'28': 16} fig = plt.figure(1, frameon=True) fig.subplots_adjust(bottom=0.2) ax = plt.subplot(111) ax.yaxis.grid() data = [] for messageLength in y: for offset in y[messageLength]: for delay in y[messageLength][offset]: for frameLength in y[messageLength][offset][delay]: for interval in y[messageLength][offset][delay][ frameLength]: if y[messageLength][offset][delay][frameLength][ interval][1] == 0: data.append([0]) else: data.append([ (float(y[messageLength][offset][delay] [frameLength][interval][2]) / float(y[messageLength][offset][delay] [frameLength][interval][1])) * 100 ]) # #print (float(y[messageLength][offset][delay][frameLength][interval][0])/float(y[messageLength][offset][delay][frameLength][interval][1]))/float(frameLength) medianpointprops = dict(marker='', linestyle='-', color='red') bp = plt.boxplot(data, sym='+', vert=1, whis=1.5, patch_artist=True, medianprops=medianpointprops) # colors = ['#3D9970', '#FF9136', '#FFC51B'] colors = ['white', 'white', 'white'] k = 0 i = 0 for patch in bp['boxes']: if k > 7 and k < 15: i = 1 elif k > 15: i = 2 patch.set_facecolor(colors[i]) plt.setp(bp['whiskers'], color='black') plt.setp(bp['fliers'], color='blue') k += 1 plt.ylabel('Correct frames received (%)') c = 0 ax.text(c + 3, 110.0, u'FL=7') ax.text(c + 11, 110.0, u'FL=14') ax.text(c + 19, 110.0, u'FL=28') offset = 8.5 for i in range(1, 25): plt.plot([offset, offset], [-1, 100], color='#000000') offset += 8 tickMarks = range(1, 25) x = range(30, 110, 10) y = x x.extend(y) x.extend(y) plt.xticks(tickMarks, tuple(x)) plt.tick_params(axis='both', which='major', labelsize=5) # ax.set_ylim([0, 26]) plt.xlabel("Time Interval [ms]") box = ax.get_position() ax.set_position([ box.x0 * 0.9, box.y0 + box.height * 0.20, box.width * 1.0, box.height * 0.80 ]) ax.yaxis.grid(True, linestyle='-', which='major', color='grey') ax.set_axisbelow(True) plt.savefig(cfResultsFile) print "boxplot data: "
fig = plt.hist(df['residual sugar'], bins=bin_edges) # add plot labels plt.xlabel('count') plt.ylabel('residual sugar') plt.show() # create scatterplot fig = plt.scatter(df['pH'], df['residual sugar']) # add plot labels plt.xlabel('pH') plt.ylabel('residual sugar') plt.show() plt.boxplot(df['alcohol']) plt.ylim([8, 16]) plt.ylabel('alcohol') fig = plt.gca() fig.axes.get_xaxis().set_ticks([]) plt.show() #gen random num print("random num", np.random.uniform(0, 10)) #create array of random nums 100 nums of 1-10 observations = np.random.uniform(0, 10, 100) print(observations) fig = plt.hist(observations, bins=bin_edges)
def drawBox(heights): pyplot.boxplot([heights],labels=['Heights']) pyplot.title('Heights of Students') pyplot.show()
# create the modeling pipeline pipeline = Pipeline(steps=[('i', SimpleImputer( strategy=s)), ('m', RandomForestClassifier())]) # evaluate the model cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1) scores = cross_val_score(pipeline, X, y, scoring='accuracy', cv=cv, n_jobs=-1) # store results results.append(scores) print('>%s %.3f (%.3f)' % (s, mean(scores), std(scores))) # plot model performance for comparison pyplot.boxplot(results, labels=strategies, showmeans=True) pyplot.show() # most frequent is the best candidate, showing the least variance # split train, test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) # define pipeline pipe = Pipeline(steps=[('i', SimpleImputer( strategy='most_frequent')), ( 'scaler', StandardScaler()), ('rf', RandomForestClassifier())]) # set parameters for grid search params = { 'rf__n_estimators': [10, 25, 50, 100, 250, 500], 'rf__max_depth': [10, 25, 50],
# print " "+clf_name+":" # prediction stage start_time = time.time() for ind in range(num_layers): clf.fit(temp) # print "LSH Forest:" # clf.display() # CONSOLE OUTPUT # print "-------------" # evaluation stage y_pred = clf.decision_function(temp).ravel() if (ind != 0): for row in temp: row.pop(0) if (ind < num_layers - 1): y_pred = y_pred.tolist() temp = np.c_[y_pred, temp].tolist() train_time = time.time() - start_time test_time = time.time() - start_time - train_time auc = roc_auc_score(ground_truth, -1.0 * y_pred) results.append(auc * 100) # print "AUC: ", auc # print "Training time: ", train_time # print "Testing time: ", test_time with open("deep2.csv", "w") as filerw: resultWriter = csv.writer(filerw) resultWriter.writerow(results) filerw.close() mpl.boxplot(results) mpl.show() print results
def plotEOM(noLoadData, withLoadData, numberOfRuns): print "plotEOM()" totalFrames = {u'07': 64, u'14': 32, u'28': 16} plt.figure(1, figsize=(width, height), frameon=False) fig = plt.figure(1, frameon=True) fig.subplots_adjust(bottom=0.2) ax = plt.subplot(1, 2, 1) ax.yaxis.grid() data = [] y = noLoadData for messageLength in y: for offset in y[messageLength]: for delay in y[messageLength][offset]: for frameLength in y[messageLength][offset][delay]: for interval in y[messageLength][offset][delay][ frameLength]: data.append([ float(y[messageLength][offset][delay][frameLength] [interval][0]) / (len(numberOfRuns)) * 100 ]) # medianpointprops = dict(marker='', linestyle='-', color='red') bp = plt.boxplot(data, patch_artist=True) # colors = ['#3D9970', '#FF9136', '#FFC51B'] colors = ['white', 'white', 'white'] k = 0 i = 0 for patch in bp['boxes']: if k > 7 and k < 15: i = 1 elif k > 15: i = 2 patch.set_facecolor(colors[i]) # plt.setp(bp['whiskers'], color='black') # plt.setp(bp['fliers'], color='blue') k += 1 plt.ylabel('End of message errors (%)') c = 0 ax.text(c + 3, 110.0, u'FL=7') ax.text(c + 11, 110.0, u'FL=14') ax.text(c + 19, 110.0, u'FL=28') # ax.text(c+11.1, 125, u'No Load') offset = 8.5 for i in range(1, 25): plt.plot([offset, offset], [-1, 100], color='#000000') offset += 8 tickMarks = range(1, 25) x = range(30, 110, 10) y = x x.extend(y) x.extend(y) plt.xticks(tickMarks, tuple(x)) # plt.tick_params(axis='both', which='major', labelsize=10) # ax.set_ylim([0, 26]) plt.xlabel("Time Interval (ms)\n(a) End of message errors without load") box = ax.get_position() ax.set_position([box.x0 * .5, box.y0, box.width * 1.22, box.height * 0.95]) # ax.yaxis.grid(True, linestyle='-', which='major', color='grey') ax.set_axisbelow(True) plt.savefig(eomResultsFile) # WITH LOAD if delayToPlot[0] == u'3.0': return ax = plt.subplot(1, 2, 2) ax.yaxis.grid() data = [] y = withLoadData for messageLength in y: for offset in y[messageLength]: for delay in y[messageLength][offset]: for frameLength in y[messageLength][offset][delay]: for interval in y[messageLength][offset][delay][ frameLength]: data.append([ float(y[messageLength][offset][delay][frameLength] [interval][0]) / (len(numberOfRuns)) * 100 ]) # medianpointprops = dict(marker='', linestyle='-', color='red') bp = plt.boxplot(data, patch_artist=True) # colors = ['#3D9970', '#FF9136', '#FFC51B'] colors = ['white', 'white', 'white'] k = 0 i = 0 for patch in bp['boxes']: if k > 7 and k < 15: i = 1 elif k > 15: i = 2 patch.set_facecolor(colors[i]) # plt.setp(bp['whiskers'], color='black') # plt.setp(bp['fliers'], color='blue') k += 1 plt.ylabel('End of message errors (%)') c = 0 ax.text(c + 3.1, 110.0, u'FL=7') ax.text(c + 11.1, 110.0, u'FL=14') ax.text(c + 19.1, 110.0, u'FL=28') # ax.text(c+11.1, 125, u'With Load') offset = 8.5 for i in range(1, 25): plt.plot([offset, offset], [-1, 100], color='#000000') offset += 8 tickMarks = range(1, 25) x = range(30, 110, 10) y = x x.extend(y) x.extend(y) plt.xticks(tickMarks, tuple(x)) # plt.tick_params(axis='both', which='major', labelsize=10) plt.xlabel("Time Interval (ms)\n(b) With Load") box = ax.get_position() ax.set_position( [box.x0 + 0.01, box.y0, box.width * 1.22, box.height * 0.95]) # ax.yaxis.grid(True, linestyle='-', which='major', color='grey') ax.set_axisbelow(True) # plt.tight_layout() plt.savefig(eomResultsFile)
def main(): config, groups_map, data = load_files() # Convert the 'data' df into a 2d array of grades given by each grader grades = {} combined = "ALL" data_cols = config['dataHeaders']['data'] group_col = config['dataHeaders']['group'] group_col_list = config['groupHeaders']['group'] use_threshold = config['useAbove'] exclude = config['excludeGraders'] if 'excludeGraders' in config else [] for index1, row in data.iterrows(): group_num = str(int(row.at[group_col])) for index2, item in row.filter(items=data_cols).iteritems(): if group_num in group_col_list: grader = groups_map.at[index2, group_num] # print(group_num, "\t", index2, "\t", grader) # print(item) if item > use_threshold and grader not in exclude: grades.setdefault(combined, []).append(item) grades.setdefault(grader, []).append(item) # Create box plot grades_values = list(grades.values()) grades_keys = list(grades.keys()) try: box_values = plt.boxplot(grades_values, labels=grades_keys) except ValueError as e: print('Keys: %a' % grades_keys, file=sys.stderr) print('Values: %a' % grades_values, file=sys.stderr) sys.exit('EXCEPTION: ValueError! ' + str(e)) # Retrieve data from the box plot res = {key: [v.get_ydata() for v in value] for key, value in box_values.items()} # EXTRACT TABLE DATA whiskers_min = [min(item) for item in res['whiskers'][::2]] # Lower Whisker whiskers_Q1 = [max(item) for item in res['whiskers'][::2]] # Q1 whiskers_Q3 = [min(item) for item in res['whiskers'][1::2]] # Q3 whiskers_max = [max(item) for item in res['whiskers'][1::2]] # Upper Whisker medians = [item[0] for item in res['medians']] # Q2 # ALTERNATIVE METHODS, EQUIVALENT DATA # caps_lower = [item[0] for item in res['caps'][::2]] # Lower Whisker # caps_upper = [item[0] for item in res['caps'][1::2]] # Upper Whisker # boxes_lower = [min(item) for item in res['boxes']] # Q1 # boxes_upper = [max(item) for item in res['boxes']] # Q3 # print(res['fliers']) # outliers # print(res['means']) # EMPTY ARRAY! # Format and display box plot if config['output']['pyplot']: # TODO CONFIG FLAGS plt.grid(axis='y') plt.figure(figsize=(12, 4)) plt.yticks(np.arange(use_threshold, 1.1, step=0.05)) try: plt.savefig('output_box_plot.png', dpi=200) except IOError as e: print(e, file=sys.stderr) print("ERROR: FAILED TO SAVE PYPLOT FIGURE! " "Please make sure the file is not already open", file=sys.stderr) # plt.show() # Calculate hypergeometric CDFs h_cuts = [whiskers_Q1, medians, whiskers_Q3] M = len(grades[combined]) # total overall N = [len(graded) for graded in grades_values] # number the grader completed # CALCULATE n,x using >= cut value to find chance of getting as extreme or more extreme # number of total satisfying. (Matrix len(grades) by len(h_cuts) n = [[sum([1 for item in grades[combined] if item >= val]) for val in cut] for cut in h_cuts] # number of selected satisfying. (Matrix len(grades) by len(h_cuts) x = [[sum([1 for item in gr if item >= val]) for val, gr in zip(cut, grades_values)] for cut in h_cuts] # Prob to get sample LESS 'extreme' (with more selected<cut). (Matrix len(grades) by len(h_cuts) h_cuts_probs = [[hypergeom.cdf(c_x, M, c_n, c_N) for c_x, c_n, c_N in zip(cut_x, cut_n, N)] for cut, cut_n, cut_x in zip(h_cuts, n, x)] # data_out = [ ["Labels"] + grades_keys, ["whisker_min"] + whiskers_min, ["Q1"] + whiskers_Q1, ["Q2"] + medians, ["Q3"] + whiskers_Q3, ["whisker_max"] + whiskers_max, ["M"] + [M for _ in range(len(N))], ["N"] + N, ["n_Q1"] + n[0], ["n_Q2"] + n[1], ["n_Q3"] + n[2], ["x_Q1"] + x[0], ["x_Q2"] + x[1], ["x_Q3"] + x[2], ["Fx(x)_Q1"] + h_cuts_probs[0], ["Fx(x)_Q2"] + h_cuts_probs[1], ["Fx(x)_Q3"] + h_cuts_probs[2] ] if config['output']['format'] == 'xlsx': create_xlsx(config, data_out, grades_keys, grades_values, [whiskers_Q1, medians, whiskers_Q3]) else: with open(config['output']['filename'] + '.csv', 'w+') as csv_outfile: csv_w = csv.writer(csv_outfile, delimiter=',', lineterminator='\n') csv_w.writerows(data_out)
def makesingleboxplot(thisdirname, subdirname, thisfilename): maxnodeint = 1 firsttime = 0 firstdigit = 1 segname = [] ylabelunits = [] idecpoint = -1 ivallen = 0 iseform = -1 decdigits = 2 tablefmtstring = "%d %d " thisdatafile = thisdirname + "/" + thisfilename datafile = open(thisdatafile) for line in datafile: thisnode = [] del thisnode[:] tud, jud, tid, mname, pname2, nodename, trest = line.split(' ', 6) nfile = thisdirname + "/" + ''.join(nodename) if firsttime < 1: firstdigit = re.search(r"\d", ''.join(nodename)) firstdigit = firstdigit.start() firsttime = 1 for i in xrange(0, firstdigit): segname.append(nodename[i]) segname.append('%') segname.append('0') nlen = len(nodename) - firstdigit segname.append("%d" % nlen) segname.append('d') trestlen = len(trest) spdigit = -1 spdigit = trest.find(' ') valstring = [] valstring = trest.split()[0] if len(trest.split()) > 1: ylabelunits = trest.split()[1] ivallen = len(valstring) idecpoint = -1 for j in xrange(0, ivallen): if valstring[j] == '.': idecpoint = j break iseform = str(valstring).find('e') decdigits = 2 if idecpoint > -1: decdigits = ivallen - idecpoint + 1 if iseform > -1: vfmtstring = "%%.%de %%.%de %%.%de %%.%de " % \ (decdigits, decdigits, decdigits - 2, decdigits - 2) else: vfmtstring = "%%.%df %%.%df %%.%df %%.%df\n" % \ (decdigits, decdigits, decdigits - 2, decdigits - 2) tablefmtstring = tablefmtstring + vfmtstring sepdigit = re.search(r"\D", ''.join(nodename[firstdigit:])) if sepdigit: sepdigit = sepdigit.start() tnode = int(nodename[firstdigit:firstdigit + sepdigit - 1]) else: tnode = int(nodename[firstdigit:]) if tnode > maxnodeint: maxnodeint = tnode t2restlen = len(trest) vspdigit = -1 vspdigit = trest.find(' ') tvalstring = [] if vspdigit > -1: for j in xrange(0, vspdigit): tvalstring.append(trest[j]) else: tvalstring = trest nodefile = open(nfile, "a") print >> nodefile, trest.split()[0] nodefile.close() # Begin processing a list of files in the current directory, complicated list of lists chtodir = "cd " + thisdirname os.system(chtodir) fstring = ''.join(segname) # "wf%03d" maxnodenum = maxnodeint + 1 # 620 if len(ylabelunits) < 1: ytitle = "Performance (MB/sec)" else: ytitle = "Performance " + "(" + str(ylabelunits) + ")" ctitle = subdirname + " Performance Boxplot" fname = {} dlist = [] hpldata = [] tmarks = [] tname = [] nnum = [] nticks = 0 for x in range(1, maxnodenum): fname[x] = fstring % (x) if os.path.isfile(thisdirname + "/" + fname[x]): nticks += 1 dlist.append(loadtxt(thisdirname + "/" + fname[x])) nnum.append(x) if (x % 2) == 0: tmarks.append("%d" % (x)) else: tmarks.append('') dmeans = [] for j in dlist: dmeans.append(j.mean()) dmins = [] for j in dlist: dmins.append(j.min()) dmaxs = [] for j in dlist: dmaxs.append(j.max()) dsamps = [] for j in dlist: dsamps.append(j.size) dstds = [] for j in dlist: dstds.append(j.std()) nodetable = [] nodetable = zip(nnum, dsamps, dmeans, dstds, dmins, dmaxs) tablename = thisdirname + "/FinalDataTable.txt" tablefile = open(tablename, "w") for i in nodetable: tablefile.write(tablefmtstring % (i[0], i[1], i[2], i[3], i[4], i[5])) tablefile.close() nodemeans = [] nodemeans = zip(nnum, dmeans) nodemeans.sort(key=lambda x: x[1]) xtickmarks = [] # try to add a null tick mark at beginning xtickmarks.append('') # end try for i, v in nodemeans: xname = fstring % i xtickmarks.append(xname) dlist.sort(key=lambda a: a.mean()) plt.figure() if len(dlist) < 1: return 0 plt.boxplot(dlist) plt.xlabel('Node') plt.ylabel(ytitle) plt.title(ctitle) nnodes = len(dlist) adjustedwidth = nnodes / 100.0 * 10.0 if adjustedwidth < 10.0: adjustedwidth = 10.0 ticktable = [] for k in xtickmarks: ticktable.append(k) plt.xticks(range(len(xtickmarks)), rotation=90, fontsize=6) plt.xticks(range(len(ticktable)), xtickmarks, rotation=90, fontsize=6) fig = matplotlib.pyplot.gcf() fig.set_size_inches(adjustedwidth, 7.0) plotname = thisdirname + "/" + subdirname + "Boxplot.png" plt.savefig(plotname, dpi=150) return nnodes
def main(): x_data , y_data = get_data() plt.figure() plt.boxplot(np.concatenate((x_data , y_data.reshape((len(y_data) , 1))) , axis = 1)) plt.show()
# ===== Importação da biblioteca para Manipulação, Leitura, Visualização de dados. ===== import pandas as pd # ===== Carregando a base de dados ===== base = pd.read_csv('credit-data.csv') # ===== Apagando dados não preenchidos os NAN ===== base = base.dropna() # outliers idade # ===== Importação da biblioteca para visualização de graficos ===== import matplotlib.pyplot as plt plt.boxplot(base.iloc[:, 2], showfliers=True) # ===== Capturando os outliers ===== outliers_age = base[(base.age < -20)] # outliers loan (loan contem a dívida) plt.boxplot(base.iloc[:, 3]) outliers_loan = base[(base.loan > 13400)]
# The above table shows the number of earthquake occurs at differentt magnitude. # In[100]: (n, bins, patches) = plt.hist(e_data["Magnitude"], range=(0, 10), bins=10) plt.xlabel("Earthquake Magnitudes") plt.ylabel("Number of Occurences") plt.title("Overview of earthquake magnitudes") print("Magnitude" + " " + "Number of Occurence") for i in range(5, len(n)): print(str(i) + "-" + str(i + 1) + " " + str(n[i])) # In[101]: plt.boxplot(e_data["Magnitude"]) plt.show() # In[102]: highly_affected = e_data[e_data["Magnitude"] >= 8] # In[103]: print(highly_affected.shape) # In[106]: e_data["Month"] = e_data['Date'].dt.month # In[107]:
def plot(self, filename, output_text): colors = ['r', 'b', 'g'] ekf = self.ekf gt = self.gt vl = self.vl of = self.of with PdfPages(filename) as pdf: # positions plt.figure() max_y = 10 def clip_list(array): return [max(-max_y, min(max_y, a)) for a in array] plt.plot(ekf['t'], clip_list(ekf['x']), colors[0], linewidth=0.5, label='EKF Pos. (X)') plt.plot(ekf['t'], clip_list(ekf['y']), colors[1], linewidth=0.5, label='EKF Pos. (Y)') plt.plot(ekf['t'], clip_list(ekf['z']), colors[2], linewidth=0.5, label='EKF Pos. (Z)') plt.fill_between(ekf['t'], clip_list(ekf['x']) - ekf['cov_13'], clip_list(ekf['x']) + ekf['cov_13'], facecolor=colors[0], alpha=0.5) plt.fill_between(ekf['t'], clip_list(ekf['y']) - ekf['cov_14'], clip_list(ekf['y']) + ekf['cov_14'], facecolor=colors[1], alpha=0.5) plt.fill_between(ekf['t'], clip_list(ekf['z']) - ekf['cov_15'], clip_list(ekf['z']) + ekf['cov_15'], facecolor=colors[2], alpha=0.5) plt.autoscale(False) plt.plot(gt['t'], clip_list(gt['x']), color=colors[0], linewidth=0.5, dashes=(1, 1), label='Ground Truth (X)') plt.plot(gt['t'], clip_list(gt['y']), color=colors[1], linewidth=0.5, dashes=(1, 1)) plt.plot(gt['t'], clip_list(gt['z']), color=colors[2], linewidth=0.5, dashes=(1, 1)) plt.plot(vl['t'], clip_list(vl['x']), color=colors[0], linestyle='None', marker='o', markersize=2, label='Observation (X)') plt.plot(vl['t'], clip_list(vl['y']), color=colors[1], linestyle='None', marker='o', markersize=2) plt.plot(vl['t'], clip_list(vl['z']), color=colors[2], linestyle='None', marker='o', markersize=2) plt.xlabel('Time (s)') plt.ylabel('Position (m)') plt.title('Position') plt.legend(prop={'size': 6}) plt.ylim(-max_y + 0.2, max_y + 0.2) pdf.savefig() # angles plt.figure() plt.plot(ekf['t'], ekf['angle1'], colors[0], linewidth=0.5, label='EKF') plt.plot(ekf['t'], ekf['angle2'], colors[1], linewidth=0.5) plt.plot(ekf['t'], ekf['angle3'], colors[2], linewidth=0.5) plt.fill_between(ekf['t'], ekf['angle1'] - ekf['cov_1'], ekf['angle1'] + ekf['cov_1'], facecolor=colors[0], alpha=0.5) plt.fill_between(ekf['t'], ekf['angle2'] - ekf['cov_2'], ekf['angle2'] + ekf['cov_2'], facecolor=colors[1], alpha=0.5) plt.fill_between(ekf['t'], ekf['angle3'] - ekf['cov_3'], ekf['angle3'] + ekf['cov_3'], facecolor=colors[2], alpha=0.5) plt.autoscale(False) plt.plot(gt['t'], gt['angle1'], color=colors[0], linewidth=0.25, dashes=(1, 1), label='Grount Truth') plt.plot(gt['t'], gt['angle2'], color=colors[1], linewidth=0.25, dashes=(1, 1)) plt.plot(gt['t'], gt['angle3'], color=colors[2], linewidth=0.25, dashes=(1, 1)) plt.plot(vl['t'], vl['angle1'], color=colors[0], linestyle='None', marker='o', markersize=2, label='Observation') plt.plot(vl['t'], vl['angle2'], color=colors[1], linestyle='None', marker='o', markersize=2) plt.plot(vl['t'], vl['angle3'], color=colors[2], linestyle='None', marker='o', markersize=2) plt.xlabel('Time (s)') plt.ylabel('Angle ($^\circ$)') plt.title('Orientation') plt.legend(prop={'size': 6}) pdf.savefig() plt.close() # feature counts plt.figure() plt.plot(ekf['t'], ekf['ml_count'], linestyle='None', marker='x', markersize=6, color='#FF0000', label='Integrated VL Features') plt.plot(ekf['t'], ekf['of_count'], marker='|', markersize=2, color='#0000FF', label='Integrated OF Features') plt.plot(vl['t'], vl['count'], linestyle='None', marker='.', markersize=2, color='#B300FF', label='Observed VL Features (at Reg. Time)') plt.plot(of['t'], of['count'], linestyle='None', marker=',', markersize=2, color='#00FFb3', label='Observed OF Features (at Reg. Time)') plt.xlabel('Time (s)') plt.ylabel('Number of Features') plt.title('EKF Features') plt.legend(prop={'size': 6}) pdf.savefig() plt.close() # mahalnobis distance plt.figure() if len(self.mahal['boxes']) > 0: boxes = plt.boxplot(self.mahal['boxes'], positions=self.mahal['times'], widths=0.2, manage_xticks=False, patch_artist=True) plt.setp(boxes['whiskers'], color='Black', linewidth=0.25) plt.setp(boxes['caps'], color='Black', linewidth=0.25) plt.setp(boxes['medians'], color='Black', linewidth=0.25) plt.setp(boxes['fliers'], color='r', marker='x', markersize=1) plt.setp(boxes['boxes'], color='Black', facecolor='SkyBlue', linewidth=0.25) plt.title('VL Features Mahalnobis Distances') plt.xlabel('Time (s)') plt.ylabel('Mahalnobis Distance') pdf.savefig() plt.close() # linear velocity and acceleration plt.figure() plt.plot(ekf['t'], ekf['vx'], color=colors[0], linewidth=0.5, label='Velocity') plt.plot(ekf['t'], ekf['vy'], color=colors[1], linewidth=0.5) plt.plot(ekf['t'], ekf['vz'], color=colors[2], linewidth=0.5) plt.fill_between(ekf['t'], ekf['vx'] - ekf['cov_7'], ekf['vx'] + ekf['cov_7'], facecolor=colors[0], alpha=0.5) plt.fill_between(ekf['t'], ekf['vy'] - ekf['cov_8'], ekf['vy'] + ekf['cov_8'], facecolor=colors[1], alpha=0.5) plt.fill_between(ekf['t'], ekf['vz'] - ekf['cov_9'], ekf['vz'] + ekf['cov_9'], facecolor=colors[2], alpha=0.5) plt.title('Velocity') plt.xlabel('Time (s)') plt.ylabel('Velocity (m/s)') plt.ylim(-0.5, 0.5) plt.legend(prop={'size': 6}) pdf.savefig() plt.close() plt.figure() plt.plot(ekf['t'], ekf['ax'], color=colors[0], dashes=(1, 1), linewidth=0.5, label='Acceleration') plt.plot(ekf['t'], ekf['ay'], color=colors[1], dashes=(1, 1), linewidth=0.5) plt.plot(ekf['t'], ekf['az'], color=colors[2], dashes=(1, 1), linewidth=0.5) plt.title('Acceleration') plt.xlabel('Time (s)') plt.ylabel('Acceleration (m/s$^2$)') plt.legend(prop={'size': 6}) pdf.savefig() plt.close() # angle and angular velocity plt.figure() ax = plt.gca() ax.plot(ekf['t'], ekf['angle1'], colors[0], linewidth=0.5, label='Angle') ax.plot(ekf['t'], ekf['angle2'], colors[1], linewidth=0.5) ax.plot(ekf['t'], ekf['angle3'], colors[2], linewidth=0.5) ax2 = ax.twinx() ax2.plot(ekf['t'], ekf['ox'], color=colors[0], linewidth=0.5, dashes=(1, 1), label='Angular Velocity') ax2.plot(ekf['t'], ekf['oy'], color=colors[1], linewidth=0.5, dashes=(1, 1)) ax2.plot(ekf['t'], ekf['oz'], color=colors[2], linewidth=0.5, dashes=(1, 1)) ax.set_title('Angular Velocity') ax.set_xlabel('Time (s)') ax.set_ylabel('Angle ($^\circ$)') ax2.set_ylabel('Angular Velocity ($^\circ$/s)') lines, labels = ax.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels() ax.legend(lines + lines2, labels + labels2, prop={'size': 6}) pdf.savefig() plt.close() # bias plt.figure() ax = plt.gca() ax.plot(ekf['t'], ekf['abx'], colors[0], linewidth=0.5, label='Accelerometer Bias') ax.plot(ekf['t'], ekf['aby'], colors[1], linewidth=0.5) ax.plot(ekf['t'], ekf['abz'], colors[2], linewidth=0.5) ax.fill_between(ekf['t'], ekf['abx'] - ekf['cov_10'], ekf['abx'] + ekf['cov_10'], facecolor=colors[0], alpha=0.5) ax.fill_between(ekf['t'], ekf['aby'] - ekf['cov_11'], ekf['aby'] + ekf['cov_11'], facecolor=colors[1], alpha=0.5) ax.fill_between(ekf['t'], ekf['abz'] - ekf['cov_12'], ekf['abz'] + ekf['cov_12'], facecolor=colors[2], alpha=0.5) ax2 = ax.twinx() ax2.plot(ekf['t'], ekf['gbx'], color=colors[0], linewidth=0.5, dashes=(1, 1), label='Gyrometer Bias') ax2.plot(ekf['t'], ekf['gby'], color=colors[1], linewidth=0.5, dashes=(1, 1)) ax2.plot(ekf['t'], ekf['gbz'], color=colors[2], linewidth=0.5, dashes=(1, 1)) ax2.fill_between(ekf['t'], ekf['gbx'] - ekf['cov_4'], ekf['gbx'] + ekf['cov_4'], facecolor=colors[0], alpha=0.5) ax2.fill_between(ekf['t'], ekf['gby'] - ekf['cov_5'], ekf['gby'] + ekf['cov_5'], facecolor=colors[1], alpha=0.5) ax2.fill_between(ekf['t'], ekf['gbz'] - ekf['cov_6'], ekf['gbz'] + ekf['cov_6'], facecolor=colors[2], alpha=0.5) ax.set_title('Bias Terms') ax.set_xlabel('Time (s)') ax.set_ylabel('Accelerometer Bias (m/s$^2$)') ax2.set_ylabel('Gyrometer Bias ($^\circ$/s)') lines, labels = ax.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels() ax.legend(lines + lines2, labels + labels2, prop={'size': 6}) pdf.savefig() plt.close() # covariance plt.figure() plt.plot(ekf['t'], covariance_map(self.ekf, 'cov_13', 'cov_14', 'cov_15'), colors[0], linewidth=0.5, label='Position Covariance') plt.plot(ekf['t'], covariance_map(self.ekf, 'cov_7', 'cov_8', 'cov_9'), colors[1], linewidth=0.5, label='Velocity Covariance') plt.plot(ekf['t'], covariance_map(self.ekf, 'cov_1', 'cov_2', 'cov_3'), colors[2], linewidth=0.5, label='Orientation Covariance') plt.title('Std. Deviation') plt.xlabel('Time (s)') plt.ylabel('Covariance') plt.legend(prop={'size': 6}) pdf.savefig() plt.close() # mahalnobis distance histogram plt.figure() plt.hist( [item for sublist in self.mahal['boxes'] for item in sublist], bins=200, range=(0, 50), normed=True) plt.xlabel('Mahalnobis Distance') plt.ylabel('pdf') plt.title('Mahalnobis Distance Histogram') pdf.savefig() plt.close() plt.figure() plt.imshow(np.transpose(self.vl_heatmap), cmap='hot', interpolation='nearest', vmin=0, vmax=np.amax(self.vl_heatmap)) plt.title('Visual Landmarks Density') pdf.savefig() plt.close() plt.figure() plt.plot(of['t'], of['oldest'], color=colors[0], linewidth=0.5, label='Oldest') plt.plot(of['t'], of['median'], color=colors[1], linewidth=0.5, label='Median') plt.plot(of['t'], of['youngest'], color=colors[2], linewidth=0.5, label='Youngest') plt.xlabel('Time (s)') plt.ylabel('Optical Flow Feature Age (s)') plt.title('Optical Flow Feature Age') plt.legend(prop={'size': 6}) pdf.savefig() plt.close() plt.figure() plt.imshow(np.transpose(self.of_heatmap), cmap='hot', interpolation='nearest', vmin=0, vmax=np.amax(self.vl_heatmap)) plt.title('Optical Flow Density') pdf.savefig() plt.close() plt.figure() plt.axis('off') plt.text(0.0, 0.5, output_text) pdf.savefig() plt.close()
import matplotlib.pyplot as plt figure1 = [k[1] for k in train_X] figure2 = [k[2] for k in train_X] figu1=[] figu2=[] for k in train_X: if k[1]>40 or k[2]>40: continue else: figu1.append(k[1]) figu2.append(k[2]) #labels = ["Intercolumnar distance", "Upper margin", "Lower margin", "Exploitation", # "Row number", "Modular ratio", "Interlinear spacing", # "Weight", "Peak number", "mr/is"] labels=["Upper margin", "Lower margin"] plt.boxplot([figu1, figu2], labels=labels, sym ="o", whis = 1.5) plt.show() figure7 = [] figure8 = [] for k in train_X: if k[7]>10 or k[8]>10: continue else: figure7.append(k[7]) figure8.append(k[8]) yy=[] figure=[k[0] for k in train_X] for i in range(len(data)-1):
def reportPlotAllProjectBreaksDistribution(o_names, p_names, path): import matplotlib.pyplot as plt import numpy, csv, pandas breaks_stats = pandas.DataFrame(columns=[ 'project', 'mean', 'st_dev', 'var', 'median', 'breaks_devlife_corr' ]) projects_counts = [] for i in range(0, len(o_names)): chosen_project = i # FROM 0 TO n-1 project_name = o_names[chosen_project] main_project = p_names[chosen_project] breaks_lifetime = pandas.DataFrame(columns=['BpY', 'life']) #Read Breaks Table with open( path + '/' + project_name + '/' + main_project + '/inactivity_interval_list.csv', 'r') as f: #opens PW file breaks_list = [ list(map(str, rec)) for rec in csv.reader(f, delimiter=',') ] counts_perYear = [] for row in breaks_list: num_breaks = len(row[1:]) - 2 if num_breaks > 0: num_days = int(row[-2]) years = num_days / 365 BpY = num_breaks / years counts_perYear.append(BpY) add(breaks_lifetime, [BpY, num_days]) projects_counts.append(counts_perYear) add(breaks_stats, [ project_name, numpy.mean(counts_perYear), numpy.std(counts_perYear), numpy.var(counts_perYear), numpy.median(counts_perYear), numpy.corrcoef(breaks_lifetime['BpY'], breaks_lifetime['life'])[1][0] ]) breaks_stats.to_csv(path + '/breaks_stats_all.csv', sep=';', na_rep='NA', header=True, index=False, mode='w', encoding='utf-8', quoting=None, quotechar='"', line_terminator='\n', decimal='.') labels = [] for name in p_names: if name == 'framework': labels.append('laravel') else: labels.append(name) plt.clf() projects_counts.reverse() plt.boxplot(projects_counts) labels.reverse() plt.xticks(numpy.arange(1, len(p_names) + 1), labels, rotation=20) # Pad margins so that markers don't get clipped by the axes: plt.margins(0.2) # Tweak spacing to prevent clipping of tick-label: plt.subplots_adjust(bottom=0.15) plt.grid(False) plt.ylabel("Pauses per Year") plt.savefig(path + "/BreaksDistribution", dpi=600) plt.clf()
print("Median API: ", median1) print("Mode API: ", mode1) print("Range: ", range1) print("Standard Deviation API: ", standard_deviation) print("Variance API: ", variance, sep = "\n") print("Percentile API: ", percentile) """# Data Visualization <br> Histogram plotting """ dataset.hist(xlabelsize= 10, ylabelsize= 10, figsize = (10,10)) """# Box Plot visualization <br> 1. Visual representation of numerical data through their quartiles. <br> 2. Used to detect outliers in dataset<br> 3. Summarizes data using 25th, 50th, and 75th percentile """ data = np.array(dataset) for i in range(1, 4): plt.boxplot(np.array(data[:, i], dtype='float')) plt.show() sns.boxplot(data=dataset.ix[:, 1:5]) sns.boxplot(x=dataset['species'], y=dataset['sepal_length'])
credit_risk_data['LoanAmount'].fillna(credit_risk_data['LoanAmount'].median(), inplace=True) # In[484]: # Loan_Amount_Term credit_risk_data['Loan_Amount_Term'].fillna( credit_risk_data['Loan_Amount_Term'].median(), inplace=True) # In[485]: # Handling outliers # In[486]: plt.boxplot(credit_risk_data['ApplicantIncome']) # In[487]: columns = [ 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term' ] upper = [] lower = [] values = [] for col in columns: q1, q3 = np.percentile(credit_risk_data[col], [25, 75]) IQR = q3 - q1 lower_bound = q1 - (1.5 * IQR) upper_bound = q3 + (1.5 * IQR)
def plotAttribute(cur, planners, attribute, typename): """Create a plot for a particular attribute. It will include data for all planners that have data for this attribute.""" labels = [] measurements = [] nanCounts = [] if typename == 'ENUM': cur.execute('SELECT description FROM enums where name IS "%s"' % attribute) descriptions = [t[0] for t in cur.fetchall()] numValues = len(descriptions) for planner in planners: cur.execute('SELECT %s FROM runs WHERE plannerid = %s AND %s IS NOT NULL' \ % (attribute, planner[0], attribute)) measurement = [t[0] for t in cur.fetchall() if t[0] != None] if measurement: cur.execute('SELECT count(*) FROM runs WHERE plannerid = %s AND %s IS NULL' \ % (planner[0], attribute)) nanCounts.append(cur.fetchone()[0]) labels.append(planner[1]) if typename == 'ENUM': scale = 100. / len(measurement) measurements.append( [measurement.count(i) * scale for i in range(numValues)]) else: measurements.append(measurement) if not measurements: print('Skipping "%s": no available measurements' % attribute) return plt.clf() ax = plt.gca() if typename == 'ENUM': width = .5 measurements = np.transpose(np.vstack(measurements)) colsum = np.sum(measurements, axis=1) rows = np.where(colsum != 0)[0] heights = np.zeros((1, measurements.shape[1])) ind = range(measurements.shape[1]) for i in rows: plt.bar(ind, measurements[i], width, bottom=heights[0], \ color=matplotlib.cm.hot(int(floor(i * 256 / numValues))), \ label=descriptions[i]) heights = heights + measurements[i] xtickNames = plt.xticks([x + width / 2. for x in ind], labels, rotation=30) ax.set_ylabel(attribute.replace('_', ' ') + ' (%)') box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) props = matplotlib.font_manager.FontProperties() props.set_size('small') ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), prop=props) elif typename == 'BOOLEAN': width = .5 measurementsPercentage = [sum(m) * 100. / len(m) for m in measurements] ind = range(len(measurements)) plt.bar(ind, measurementsPercentage, width) xtickNames = plt.xticks([x + width / 2. for x in ind], labels, rotation=30) ax.set_ylabel(attribute.replace('_', ' ') + ' (%)') else: if int(matplotlibversion.split('.')[0]) < 1: plt.boxplot(measurements, notch=0, sym='k+', vert=1, whis=1.5) else: plt.boxplot(measurements, notch=0, sym='k+', vert=1, whis=1.5, bootstrap=1000) ax.set_ylabel(attribute.replace('_', ' ')) xtickNames = plt.setp(ax, xticklabels=labels) plt.setp(xtickNames, rotation=25) ax.set_xlabel('Motion planning algorithm') ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) if max(nanCounts) > 0: maxy = max([max(y) for y in measurements]) for i in range(len(labels)): x = i + width / 2 if typename == 'BOOLEAN' else i + 1 ax.text(x, .95 * maxy, str(nanCounts[i]), horizontalalignment='center', size='small') plt.show()
import matplotlib.pyplot as plt import random vetor = [] for i in range(10): vetor.append(random.randint(0, 10000)) plt.boxplot(vetor) plt.show()
precision.append(cv_prec.mean()) recall.append(cv_recall.mean()) f1.append(cv_f1.mean()) print('----------------------------------------') print(msg) Y_pred = cross_val_predict(model, X_input, Y_output, cv=N) conf_mat = confusion_matrix(Y_output, Y_pred) print(conf_mat) print('----------------------------------------') # boxplot for accuracy comparison graph = plt.figure() graph.suptitle('Accuracy Comparison') ax = graph.add_subplot(111) plt.boxplot(accuracyresults) ax.set_xticklabels(names) y_pos = np.arange(len(accuracy)) # bar chart accuracy comparison graph2 = plt.figure() graph2.suptitle('Accuracy Comparison') ax2 = graph2.add_subplot(111) plt.bar(y_pos, accuracy, align='center', alpha=0.5) plt.xticks(y_pos, names) plt.show() #Removing unwantd characters names = str(names) names = names.replace('[', '').replace(']', '').replace("'", "")
# 读取数据 with open(filename, 'r') as f: _ = f.read() # 数据转换 data = pd.read_json(_) # 展示出各坐标轴空值总和 print(data.isnull().sum()) if data.isnull().sum().sum() != 0: print("存在空值,需要进行处理") # 画出各坐标轴盒图,寻找特殊坐标 plt.subplot(131) plt.boxplot(data[data.columns[0]]) plt.subplot(132) plt.boxplot(data[data.columns[1]]) plt.subplot(133) plt.boxplot(data[data.columns[2]]) plt.show() # 计算该问卷数据回答时间,初步判断是否为有效数据 answer_time = data.shape[0] / 5 / 60 if answer_time > 90 or answer_time < 10: print("答题时长异常(少于10min或大于90min),建议删除该数据") # 计算该问卷每个坐标轴方差,若方差过小,可能是手机放在桌上,未拿在手上 if data[data.columns[0]].var() < 0.001 or data[ data.columns[1]].var() < 0.001 or data[data.columns[2]].var() < 0.001: print("可能手机放在平面,未拿在手上,建议删除")
def etandeamain(): aaconversiondict = { 'Ala': 'A', 'Cys': 'C', 'Asp': 'D', 'Glu': 'E', 'Phe': 'F', 'Gly': 'G', 'His': 'H', 'Ile': 'I', 'Lys': 'K', 'Leu': 'L', 'Met': 'M', 'Asn': 'N', 'Pro': 'P', 'Gln': 'Q', 'Arg': 'R', 'Ser': 'S', 'Thr': 'T', 'Val': 'V', 'Trp': 'W', 'Tyr': 'Y' } toaaarray = [ 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y' ] EAdict = readinEA() highcovlist = obtainhighcov() etdict = obtainETscores() cancerfile = open('../DATA/Mutations/filelist.txt') cancerlist = [] for line in cancerfile: cancer = line.strip('\n') if cancer != '': cancerlist.append(cancer) print(cancerlist) cancerlist = ['COAD', 'HNSC', 'LUAD', 'LUSC', 'SKCM', 'STAD', 'UCEC'] #cancerlist=['LUAD','SKCM'] #print(EAdict) #print(etdict) #print('highcovlist',highcovlist) etallarray = [] for i in etdict: etallarray.append(float(etdict[i])) for cancertype in cancerlist: print('cancertype', cancertype) eamutationdict, eamutlist = acquiremutations(cancertype) listofsigpos, lowsigpos = getlistofsigpos(cancertype) #print(eamutationdict) ethyperarray = [] ethypoarray = [] earandarray = [] eahyperarray = [] eahypoarray = [] eaallarray = eamutlist for pos in EAdict: position = int(pos[1:]) etvalue = etdict[int(position)] eaarray = EAdict[pos] if position in highcovlist: '' for j in EAdict[pos]: if j != '-': earandarray.append(float(j)) print('listofsigpos', listofsigpos) print('lowsigpos', lowsigpos) for pos in eamutationdict: #Getting EA the significantly mutated positions, hypo and hyper #print(pos) if pos in listofsigpos: '' for ea in eamutationdict[pos]: if eahyperarray == []: eahyperarray = [ float(ea), ] else: eahyperarray.append(float(ea)) if pos in lowsigpos: for ea in eamutationdict[pos]: if eahyperarray == []: eahypoarray = [ float(ea), ] else: eahypoarray.append(float(ea)) for pos in listofsigpos: if pos in etdict: ethyperarray.append(float(etdict[pos])) for pos in lowsigpos: if pos in etdict: ethypoarray.append(float(etdict[pos])) etplist = [] ettuple = [ethyperarray, ethypoarray, etallarray] for i in ettuple: for j in ettuple: try: t, p = ttest_ind(i, j) except: p = 1.00 #t,p=mannwhitneyu(i,j) #t,p=ks_2samp(i,j) etplist.append(round(p, 3)) plot1 = plt.boxplot( ettuple, whis=1, patch_artist=True, ) plt.setp(plot1['boxes'], color='Black', linewidth=2, facecolor='lightgray') plt.setp( plot1['whiskers'], # customise whisker appearence color='black', # whisker colour linewidth=1) # whisker thickness plt.setp( plot1['caps'], # customize lines at the end of whiskers color='black', # cap colour linewidth=1) # cap thickness plt.setp( plot1['fliers'], # customize marks for extreme values color='white', # set mark colour marker='o', # maker shape markersize=4) # marker size plt.setp( plot1['medians'], # customize median lines color='Black', # line colour linewidth=2) # line thickness plt.title('ET comparison of %s \n%s' % (cancertype, etplist[-3:])) plt.xticks( [1, 2, 3], ['Frequent Positions', 'Infrequent Positions', 'Random Control']) plt.savefig('../Paper/PotentialFigures/EAET/ET_%s.png' % (cancertype)) plt.close() #plt.hist(ethyperarray,alpha=0.5) #plt.hist(ethypoarray,alpha=0.5) #plt.hist(etallarray,alpha=0.5) #plt.show() eaoutfile = open( '../ImagesOlfactory/%s/EAforboxplot_%s.txt' % (cancertype, cancertype), 'w') eatuple = [eahyperarray, eahypoarray, eaallarray, earandarray] for i in eatuple: for j in i: eaoutfile.write(str(j)) eaoutfile.write('\t') eaoutfile.write('\n') eaplist = [] for i in eatuple: for j in eatuple: try: t, p = ttest_ind(i, j) except: p = 1.00 #t,p=mannwhitneyu(i,j) #t,p=ks_2samp(i,j) eaplist.append(round(p, 5)) plot1 = plt.boxplot( eatuple, whis=1, patch_artist=True, ) plt.setp(plot1['boxes'], color='Black', linewidth=2, facecolor='lightgray') plt.setp( plot1['whiskers'], # customise whisker appearence color='black', # whisker colour linewidth=1) # whisker thickness plt.setp( plot1['caps'], # customize lines at the end of whiskers color='black', # cap colour linewidth=1) # cap thickness plt.setp( plot1['fliers'], # customize marks for extreme values color='white', # set mark colour marker='o', # maker shape markersize=4) # marker size plt.setp( plot1['medians'], # customize median lines color='Black', # line colour linewidth=2) # line thickness plt.title('EA comparison of %s\n%s' % (cancertype, eaplist[-4:])) plt.xticks([1, 2, 3, 4], [ 'Frequent Positions', 'Infrequent Positions', 'All Mutations', 'Random Control' ]) plt.ylim(0, 100) plt.savefig('../Paper/PotentialFigures/EAET/EA_%s.png' % (cancertype)) plt.close()
# Exercise 4.2.3 from matplotlib.pyplot import boxplot, xticks, ylabel, title, show # requires data from exercise 4.2.1 from ex4_2_1 import * boxplot(X) xticks(range(1,5),attributeNames) ylabel('cm') title('Fisher\'s Iris data set - boxplot') show() print('Ran Exercise 4.2.3')