def main(): criticidad = {'sup': 0, 'med': 0, 'inf': 0} promedios = [] for i in range(EXPERIMENTOS): tiempo = 0 for j in range(CORRIDAS): # romper huevos + revolver huevos sup = np.random.uniform(2, 4)+np.random.exponential(4) # sumo fin cocinar huevos sup += np.random.uniform(2, 4) med = np.random.uniform(6, 12) # hacer tostadas + tostadas con matequilla inf = np.random.uniform(6, 12) # freir tocino valores = {'sup': sup , 'med': med, 'inf': inf} maximo = max(valores.values()) k_maximo = kmaximo(valores, maximo) valores[k_maximo] += 1 criticidad[k_maximo] += 1 tiempo += valores[k_maximo] promedios.append(tiempo/CORRIDAS) desv = np.std(promedios) promedio = np.average(promedios) print "Valores: ", valores.values() print "Desvio %s " % desv print "Promedio %s" % promedio print "Intervalos de confianza %.2f <= u <= %.2f , con un 99%% de confianza" \ % (promedio - 2.57 * desv, promedio + 2.57 * desv) for k, v in criticidad.iteritems(): print "criticidad %s, %.2f %%" % (k, v*100.00/(CORRIDAS*EXPERIMENTOS)) hist(promedios,6) show()
def compareTwoUsers(data1, data2, outdir): """Compares data for two users. Currently plots difference in peaks for the users on presslengths for different keycodes.""" def computePeakDifference(d1, d2): edges = findCommonEdges(d1, d2) h1, e = np.histogram(d1, bins=edges, normed=True) h2, e = np.histogram(d2, bins=edges, normed=True) a1, a2 = np.argmax(h1), np.argmax(h2) diff = (edges[a1] + edges[a1 + 1] - edges[a2] - edges[a2 + 1]) / 2.0 return diff commonKeys = set(data1.keystrokePLs_key.keys()) & set(data2.keystrokePLs_key.keys()) peakDiffs = [] for key in commonKeys: dat1 = data1.keystrokePLs_key[key] dat2 = data2.keystrokePLs_key[key] peakDiffs.append(computePeakDifference(dat1, dat2)) peakDiffs.append(computePeakDifference(data1.keystrokePLs, data2.keystrokePLs)) edges = findCommonEdges(peakDiffs) plt.figure() plt.hist(peakDiffs, bins=edges) plt.title("Peak Differences for Keystroke PL for %s and %s" % (data1.user, data2.user)) plt.xlabel("Time (seconds)") plt.savefig("%s/%s_%s_kPLpeakDiff.pdf" % (outdir, data1.user, data2.user)) plt.close()
def summary(self, file_idx=0, show_plot=False): print "Cluster output" s = self.cluster_membership.sum(0) nnz = (s>0).sum() print "Number of non-empty clusters: " + str(nnz) + " (of " + str(s.size) + ")" si = (self.cluster_membership).sum(0) print print "Size: count" for i in np.arange(0,si.max()+1): print str(i) + ": " + str((si==i).sum()) t = (self.peak_data.possible.multiply(self.cluster_membership)).data t -= 1 print print "Trans: count" for i in np.arange(len(self.peak_data.transformations)): print self.peak_data.transformations[i].name + ": " + str((t==i).sum()) if show_plot: plt.figure() x = [] cx = self.cluster_model.Z.tocoo() for i,j,v in itertools.izip(cx.row, cx.col, cx.data): x.append(v) x = np.array(x) # x = x[~np.isnan(x)] plt.hist(x, 20) plt.title('Precursor mass clustering -- Z for file ' + str(file_idx)) plt.xlabel('Probabilities') plt.ylabel('Count') plt.show()
def icsd_progress(): n = 60 tasks = Task.objects.filter(project_set='icsd', entry__natoms__lte=n) data = tasks.values_list('entry__natoms', 'state') done = [] failed = [] idle = [] running = [] for task in data: if task[1] == 2: done.append(task[0]) elif task[1] == 1: running.append(task[0]) elif task[1] == 0: idle.append(task[0]) elif task[1] == -1: failed.append(task[0]) plt.hist([ done, running, failed, idle], histtype='barstacked', label=['done', 'running' ,'failed', 'waiting'], bins=n)#, cumulative=True) plt.legend(loc='best') plt.xlabel('# of atoms in primitive cell') plt.ylabel('# of entries') img = StringIO.StringIO() plt.savefig(img, dpi=75, bbox_inches='tight') data_uri = 'data:image/jpg;base64,' data_uri += img.getvalue().encode('base64').replace('\n', '') plt.close() return data_uri
def tst_for_dataset(self, creator, filename): from dials.array_family import flex from dials.algorithms.shoebox import MaskCode print filename rlist = flex.reflection_table.from_pickle(filename) shoebox = rlist['shoebox'] background = [sb.background.deep_copy() for sb in shoebox] success = creator(shoebox) assert(success.count(True) == len(success)) diff = [] for i in range(len(rlist)): mask = flex.bool([(m & MaskCode.Foreground) != 0 for m in shoebox[i].mask]) px1 = background[i].select(mask) px2 = shoebox[i].background.select(mask) den = max([flex.mean(px1), 1.0]) diff.append(flex.mean(px2 - px1) / den) diff = flex.double(diff) mv = flex.mean_and_variance(flex.double(diff)) mean = mv.mean() sdev = mv.unweighted_sample_standard_deviation() try: assert(abs(mean) < 0.01) except Exception: print "Mean: %f, Sdev: %f", mean, sdev from matplotlib import pylab pylab.hist(diff) pylab.show() raise
def plotHist(self, parsList=None): """ Plots distributions for a number of traces. Parameters ---------- parsList : string or list of strings, optional, Refers to a parameter name or a list of parameter names. If None, all available parameters are plotted. """ if not ic.check["matplotlib"]: PE.warn(PE.PyARequiredImport("To use 'plotHists', matplotlib has to be installed.", \ solution="Install matplotlib.")) return if isinstance(parsList, basestring): parsList = [parsList] tracesDic = {} if parsList is not None: for parm in parsList: self._parmCheck(parm) tracesDic[parm] = self[parm] else: # Use all available traces for parm in self.availableParameters(): tracesDic[parm] = self[parm] cols, rows = self.__plotsizeHelper(len(tracesDic)) for i,[pars,trace] in enumerate(tracesDic.items()): if len(parsList) > 1: plt.subplot(rows, cols, i+1) plt.hist(trace, label=pars + " hist") plt.legend()
def scaleTestMinFinding(): xs = range(10) distances = [] noise = 3.5 n = 1000000 for i in range(n): a = random() b = random() c = random() ys = [x*x*a + x*b + c + random() * noise for x in xs] #print a, b, c, polynomialFit(xs, ys)[::-1] minExp, unc = polynomialFindMinimum(xs, ys, returnErrors = True) minCalc = -b/(2.0*a) dist = (minCalc - minExp) / unc #print minCalc, minExp, unc, dist distances.append(dist) print 'mean: %f' % stats.mean(distances) print 'stdDev: %f' % stats.stdDev(distances) for sigma in [1, 2, 3]: print 'With %d sigma: %f%%' % (sigma, 100.0 * sum([int(abs(d) < sigma) for d in distances]) / n) pylab.hist(distances, bins = 50, range = (-5, 5)) pylab.show()
def plot_call_rate(c): # Histogram P.clf() P.figure(1) P.hist(c[:,1], normed=True) P.xlabel('Call Rate') P.ylabel('Portion of Variants') P.savefig(os.environ['OBER'] + '/doc/imputation/cgi/call_rate.png') #################################################################################### #if __name__ == '__main__': # # Input parameters # file_name = sys.argv[1] # Name of data file with MAF, call rates # # # Load data # c = np.loadtxt(file_name, dtype=np.float16) # # # Breakdown by call rate (proportional to the #samples, 1415) # plot_call_rate(c) # h = np.histogram(c[:,1]) # a = np.flipud(np.cumsum(np.flipud(h[0])))/float(c.shape[0]) # print np.concatenate((h[1][:-1][newaxis].transpose(), a[newaxis].transpose()), axis=1) # Breakdown by minor allele frequency maf_n = 20 maf_bins = np.linspace(0, 0.5, maf_n + 1) maf_bin = np.digitize(c[:,0], maf_bins) d = c.astype(float64) mean_call_rate = np.array([(1.*np.mean(d[maf_bin == i,1])) for i in xrange(len(maf_bins))]) P.bar(maf_bins - h, mean_call_rate, width=h) P.figure(2) h = (maf_bins[-1] - maf_bins[0]) / maf_n P.bar(maf_bins - h, mean_call_rate, width=h) P.savefig(os.environ['OBER'] + '/doc/imputation/cgi/call_rate_maf.png')
def mood_hist(index): n_bins = 10 data1 = pd.read_csv('data/split_class/large_IGNORE_406_mood_+1.txt', sep=' ', header=None) data2 = pd.read_csv('data/split_class/large_IGNORE_406_mood_-1.txt', sep=' ', header=None) mood_sum1 = pd.Series([0] * data1.shape[0]) mood_sum2 = pd.Series([0] * data2.shape[0]) # for i in np.arange(1, 7): for i in np.arange(1, 6): print(i) mood_sum1 += data1[i] mood_sum2 += data2[i] col1 = data1[index] / mood_sum1 col2 = data2[index] / mood_sum2 print(col1, col2) print(col1.mean()) # print(col1.describe()) print(col2.mean()) # print(col2.describe()) plt.subplot(1, 2, 1) plt.hist(col1, n_bins, alpha=0.8, color='r', linewidth=1.5) # plt.xlim(0, 0.5) plt.ylabel("frequency") plt.subplot(1, 2, 2) plt.hist(col2, n_bins, alpha=0.8, color='b', linewidth=1.5) # plt.xlim(0, 0.5) # plt.ylabel("frequency") plt.show()
def shopping_hist(): n_bins = 100 data1 = pd.read_csv('data/split_class/large_IGNORE_404_shopping_+1.txt', sep=' ', header=None) data2 = pd.read_csv('data/split_class/large_IGNORE_404_shopping_-1.txt', sep=' ', header=None) shopping1 = data1[2] shopping2 = data2[2] for i in np.arange(3, 17): shopping1 += data1[i] shopping2 += data2[i] col1 = shopping1 / data1[1] print(col1.describe()) col2 = shopping2 / data2[1] print(col2.describe()) plt.subplot(1, 2, 1) plt.hist(col1, n_bins, normed=True, stacked=True, alpha=0.8, color='r', linewidth=1.5) plt.xlim(0, 0.5) plt.ylabel("frequency") plt.subplot(1, 2, 2) plt.hist(col2, n_bins, normed=True, stacked=True, alpha=0.8, color='b', linewidth=1.5) plt.xlim(0, 0.5) plt.ylabel("frequency") # plt.hist(data1[1], n_bins, normed=1, alpha=0.6, color='b', cumulative=True) # plt.hist(data2[1], alpha=0.6, color='r') plt.show()
def hist_extraversion(): ''' 外倾性分数的分布, 及其正态分布曲线 :return: ''' n_bins = 10 data = pd.read_csv('data/regress_train_data.txt', sep=' ', header=None) ext = data[1] mu = ext.mean() sigma = ext.std() print(mu, sigma) fig = plt.figure(figsize=(10, 8)) # --- for *.eps --- # fig.set_rasterized(True) # plt.title("The distribution of score on extraversion") plt.xlabel("$Score\ on\ extraversion$", fontsize=20) plt.ylabel("$Probability$", fontsize=20) plt.grid(True) plt.hist(ext, n_bins, normed=1, alpha=0.8, rwidth=0.85) x = np.linspace(0, 60, 100) y = mlab.normpdf(x, mu, sigma) plt.xlim(0, 60) plt.ylim(0, 0.055) plt.xticks(fontsize=20) plt.yticks(fontsize=20) plt.plot(x, y, 'r--') # plt.tight_layout() plt.savefig('figure/ext_dist.eps', dpi=300) plt.show()
def hist_shortest_path(g, filename, show=0): g.delete_vertices( [i for i, degree in enumerate(g.degree()) if degree == 0]) # print g.degree() shortest_paths = g.shortest_paths_dijkstra(mode='all') # print shortest_paths # ig.plot(g) plt.hist( np.hstack(shortest_paths), range=[0, 5], bins=5, rwidth=1., align='left', normed=True, ) plt.xlabel('Number of steps') plt.ylabel('Proportion') plt.title( 'Number of steps to each member (mean: %.2f)' % np.mean(shortest_paths)) if show: plt.show() else: plt.savefig(filename)
def plot_age_distribution_over_time(g_states, filename=None): num_plots = len(g_states) fig = plt.gcf() max_cols = 5 if int(np.ceil(np.sqrt(num_plots))) >= max_cols: cols = max_cols else: cols = int(np.ceil(np.sqrt(num_plots))) rows = int(np.ceil(num_plots/cols)) fig.set_size_inches(14, 5*rows) for i, g in enumerate(g_states): plt.subplot(rows, int(np.ceil(num_plots/float(rows))), i+1) plt.hist( g.vs['age'], bins=27, range=[18, 45], normed=True, label='t: %i' % i, ) plt.legend() if filename: plt.savefig(filename) fig.clf()
def average_diode_sep(): clust_eps = 0.2 min_dist = 2.0 min_samples = 3.0 thold = 240 FRAMES = np.arange(4000)*2 dataset = "bukowski_02.C" cf = pickle.load(open(os.path.join(ddir(dataset), 'config.pickle'))) region = pickle.load(open(os.path.join(ddir(dataset), 'region.pickle'))) env = util.Environmentz(cf['field_dim_m'], cf['frame_dim_pix']) x_min, y_min = env.gc.real_to_image(region['x_pos_min'], region['y_pos_min']) x_max, y_max = env.gc.real_to_image(region['x_pos_max'], region['y_pos_max']) print x_min, x_max print y_min, y_max if y_min < 0: y_min = 0 frame_images = organizedata.get_frames(ddir(dataset), FRAMES) num_clusters = np.zeros(len(FRAMES)) dists = [] for fi, im in enumerate(frame_images): im = im[y_min:y_max+1, x_min:x_max+1] centers = frame_clust_points(im, 240, min_dist, clust_eps, min_samples) num_clusters[fi] = len(centers) if len(centers) == 2: dists.append(distance.pdist(centers)[0]) dists = np.array(dists) pylab.hist(dists[dists < 50], bins=20) pylab.savefig("average_diode_sep.%s.png" % dataset, dpi=300)
def study_redmapper_lrg_3d(hemi='north'): # create 3d grid object grid = grid3d(hemi=hemi) # load SDSS data sdss = load_sdss_data_both_catalogs(hemi) # load redmapper catalog rm = load_redmapper(hemi=hemi) # get XYZ positions (Mpc) of both datasets x_sdss, y_sdss, z_sdss = grid.xyz_from_radecz(sdss['ra'], sdss['dec'], sdss['z'], applyzcut=False) x_rm, y_rm, z_rm = grid.xyz_from_radecz(rm['ra'], rm['dec'], rm['z_spec'], applyzcut=False) pos_sdss = np.vstack([x_sdss, y_sdss, z_sdss]).T pos_rm = np.vstack([x_rm, y_rm, z_rm]).T # build a couple of KDTree's, one for SDSS, one for RM. from sklearn.neighbors import KDTree tree_sdss = KDTree(pos_sdss, leaf_size=30) tree_rm = KDTree(pos_rm, leaf_size=30) lrg_counts = tree_sdss.query_radius(pos_rm, 100., count_only=True) pl.clf() pl.hist(lrg_counts, bins=50) ipdb.set_trace()
def behavioral_analysis(self): """some analysis of the behavioral data, such as mean percept duration, dominance ratio etc""" self.assert_data_intern() # only do anything if this is not a no report trial if 'RP' in self.file_alias: all_percepts_and_durations = [[],[]] else: all_percepts_and_durations = [[],[],[]] if not 'NR' in self.file_alias: # and not 'RP' in self.file_alias for x in range(len(self.trial_indices)): if len(self.events) != 0: events_this_trial = self.events[(self.events['EL_timestamp'] > self.timestamps_pt[x][0]) & (self.events['EL_timestamp'] < self.timestamps_pt[x][-1])] for sc, scancode in enumerate(self.scancode_list): percept_start_indices = np.arange(len(events_this_trial))[np.array(events_this_trial['scancode'] == scancode)] percept_end_indices = percept_start_indices + 1 # convert to times start_times = np.array(events_this_trial['EL_timestamp'])[percept_start_indices] - self.timestamps_pt[x,0] if len(start_times) > 0: if percept_end_indices[-1] == len(events_this_trial): end_times = np.array(events_this_trial['EL_timestamp'])[percept_end_indices[:-1]] - self.timestamps_pt[x,0] end_times = np.r_[end_times, len(self.from_zero_timepoints)] else: end_times = np.array(events_this_trial['EL_timestamp'])[percept_end_indices] - self.timestamps_pt[x,0] these_raw_event_times = np.array([start_times + self.timestamps_pt[x,0], end_times + self.timestamps_pt[x,0]]).T these_event_times = np.array([start_times, end_times]).T + x * self.trial_duration * self.sample_rate durations = np.diff(these_event_times, axis = -1) all_percepts_and_durations[sc].append(np.hstack((these_raw_event_times, these_event_times, durations))) self.all_percepts_and_durations = [np.vstack(apd) for apd in all_percepts_and_durations] # last element is duration, sum inclusive and exclusive of transitions total_percept_duration = np.concatenate([apd[:,-1] for apd in self.all_percepts_and_durations]).sum() total_percept_duration_excl = np.concatenate([apd[:,-1] for apd in [self.all_percepts_and_durations[0], self.all_percepts_and_durations[-1]]]).sum() self.ratio_transition = 1.0 - (total_percept_duration_excl / total_percept_duration) self.ratio_percept_red = self.all_percepts_and_durations[0][:,-1].sum() / total_percept_duration_excl self.red_durations = np.array([np.mean(self.all_percepts_and_durations[0][:,-1]), np.median(self.all_percepts_and_durations[0][:,-1])]) self.green_durations = np.array([np.mean(self.all_percepts_and_durations[-1][:,-1]), np.median(self.all_percepts_and_durations[-1][:,-1])]) self.transition_durations = np.array([np.mean(self.all_percepts_and_durations[1][:,-1]), np.median(self.all_percepts_and_durations[1][:,-1])]) self.ratio_percept_red_durations = self.red_durations / (self.red_durations + self.green_durations) plot_mean_or_median = 0 # mean f = pl.figure(figsize = (8,4)) s = f.add_subplot(111) for i in range(len(self.colors)): pl.hist(self.all_percepts_and_durations[i][:,-1], bins = 20, color = self.colors[i], histtype='step', lw = 3.0, alpha = 0.4, label = ['Red', 'Trans', 'Green'][i]) pl.hist(np.concatenate([self.all_percepts_and_durations[0][:,-1], self.all_percepts_and_durations[-1][:,-1]]), bins = 20, color = 'k', histtype='step', lw = 3.0, alpha = 0.4, label = 'Percepts') pl.legend() s.set_xlabel('time [ms]') s.set_ylabel('count') sn.despine(offset=10) s.annotate("""ratio_transition: %1.2f, \nratio_percept_red: %1.2f, \nduration_red: %2.2f,\nduration_green: %2.2f, \nratio_percept_red_durations: %1.2f"""%(self.ratio_transition, self.ratio_percept_red, self.red_durations[plot_mean_or_median], self.green_durations[plot_mean_or_median], self.ratio_percept_red_durations[plot_mean_or_median]), (0.5,0.65), textcoords = 'figure fraction') pl.tight_layout() pl.savefig(os.path.join(self.analyzer.fig_dir, self.file_alias + '_dur_hist.pdf'))
def test_flux(self): tol = 150. inputcat = catalog.read(os.path.join(self.args.tmp_path, 'ccd_1.cat')) pixradius = 3*self.target["psf"]/self.instrument["PIXEL_SCALE"] positions = list(zip(inputcat["X_IMAGE"]-1, inputcat["Y_IMAGE"]-1)) fluxes = image.simple_aper_phot(self.im[1], positions, pixradius) sky_background = image.annulus_photometry(self.im[1], positions, pixradius+5, pixradius+8) total_bg_pixels = np.shape(image.build_annulus_mask(pixradius+5, pixradius+8, positions[0]))[1] total_source_pixels = np.shape(image.build_circle_mask(pixradius, positions[0]))[1] estimated_fluxes = fluxes - sky_background*1./total_bg_pixels*total_source_pixels estimated_magnitude = image.flux2mag(estimated_fluxes, self.im[1].header['SIMMAGZP'], self.target["exptime"]) expected_flux = image.mag2adu(17.5, self.target["zeropoint"][0], exptime=self.target["exptime"]) p.figure() p.hist(fluxes, bins=50) p.title('Expected flux: {:0.2f}, mean flux: {:1.2f}'.format(expected_flux, np.mean(estimated_fluxes))) p.savefig(os.path.join(self.figdir,'Fluxes.png')) assert np.all(np.abs(fluxes-expected_flux) < tol)
def PlotMtxError(Corr_w): max_val = 1 min_val = -0.1 AvCorr = np.sum(Corr_w, axis=0) dCorr = Corr_w - AvCorr errCorr = np.log10(np.sqrt(np.einsum("i...,i...", dCorr, dCorr)) / np.absolute(AvCorr) / np.sqrt(Corr_w.shape[0])) # print errCorr.shape # print errCorr plt.rcParams.update({"font.size": 6, "font.weight": "bold"}) for i in xrange(errCorr.shape[0]): plt.subplot(2, 7, i + 1) plt.title("SITE " + str(i + 1) + ":: \nHistogram of errors in corr. mtx.") plt.hist(errCorr[0, :, :].flatten(), 256, range=(min_val, max_val)) plt.xlabel("log_10(sigma)") plt.ylabel("Count") plt.subplot(2, 7, i + 7 + 1) plt.imshow(errCorr[0, :, :], vmin=min_val, vmax=max_val) cbar = plt.colorbar(shrink=0.25, aspect=40) cbar.set_label("log_10(sigma)") plt.set_cmap("gist_yarg") plt.title("SITE " + str(i + 1) + ":: \nError in corr. matx. values") plt.xlabel("Site i") plt.ylabel("Site j") plt.show()
def EstimateDensity(self,name,df,histogram,f,s,ax): # if the desired output is in Histogram format if(histogram): finRes = [] lab = [] for i in xrange(5): res = np.array(df[ df[f] == i][s]) if(res.shape[0]>0): finRes.append(res) lab.append(name[0]+ ' = ' + str(i)) pl.hist(finRes, bins=2, normed=True, histtype='bar',label = lab) # if the desired output is simple plot else: for i in xrange(5): res = np.array(df[ df[f] == i][s]) if(res.shape[0]>0): res = res.reshape(res.shape[0],1) X_plot = np.array(np.linspace(-1, 5,20)).reshape(20,1) kde= KernelDensity(kernel='exponential', bandwidth=0.05) kde.fit(res) log_dens = kde.score_samples(X_plot) ax.plot(X_plot,np.exp(log_dens),label=name[0]+ ' = ' + str(i)) ax.legend() ax.set_title(name[1] + " distrubution for changing " + name[0])
def fit_plot(self, data, topn=0, bins=20): """ Create a plot. """ from matplotlib import pylab as pl distros = self.get_topn(topn) xx = numpy.linspace(data.min(), data.max(), 300) table = [] nparms = max(len(x.parms) for x in distros) tcolours = [] for dd in distros: patch = pl.plot(xx, [dd.pdf(p) for p in xx], label='%10.2f%% %s' % (100.0*dd.rss/dd.dss, dd.name)) row = ['', dd.name, '%10.2f%%' % (100.0*dd.rss/dd.dss,)] + ['%0.2f' % x for x in dd.parms] while len(row) < 3 + nparms: row.append('') table.append(row) tcolours.append([patch[0].get_markerfacecolor()] + ['w'] * (2+nparms)) # add a historgram with the data pl.hist(data, bins=bins, normed=True) tab = pl.table(cellText=table, cellColours=tcolours, colLabels=['', 'Distribution', 'Res. SS/Data SS'] + ['P%d' % (x + 1,) for x in range(nparms)], bbox=(0.0, 1.0, 1.0, 0.3)) #loc='top')) #pl.legend(loc=0) tab.auto_set_font_size(False) tab.set_fontsize(10.)
def handle(self, *args, **options): try: from matplotlib import pylab as pl import numpy as np except ImportError: raise Exception('Be sure to install requirements_scipy.txt before running this.') all_names_and_counts = RawCommitteeTransactions.objects.all().values('attest_by_name').annotate(total=Count('attest_by_name')).order_by('-total') all_names_and_counts_as_tuple_and_sorted = sorted([(row['attest_by_name'], row['total']) for row in all_names_and_counts], key=lambda row: row[1]) print "top ten attestors: (name, number of transactions they attest for)" for row in all_names_and_counts_as_tuple_and_sorted[-10:]: print row n_bins = 100 filename = 'attestor_participation_distribution.png' x_max = all_names_and_counts_as_tuple_and_sorted[-31][1] # eliminate top outliers from hist x_min = all_names_and_counts_as_tuple_and_sorted[0][1] counts = [row['total'] for row in all_names_and_counts] pl.figure(1, figsize=(18, 6)) pl.hist(counts, bins=np.arange(x_min, x_max, (float(x_max)-x_min)/100) ) pl.title('Histogram of Attestor Participation in RawCommitteeTransactions') pl.xlabel('Number of transactions a person attested for') pl.ylabel('Number of people') pl.savefig(filename)
def distance_to_purchase_histogram(purchases): distances = calculate_distance_to_purchase_histogram(purchases) log_distances = [np.log10(0.1+d) for d in distances if d is not None] plt.hist(log_distances, 60, alpha=0.5) plt.xlabel('$log_{10}$ ( distances in miles )') plt.title('Distances between purchase and billing address') return distances
def plotMassFunction(im, pm, outbase, mmin=9, mmax=13, mstep=0.05): """ Make a comparison plot between the input mass function and the predicted projected correlation function """ plt.clf() nmbins = ( mmax - mmin ) / mstep mbins = np.logspace( mmin, mmax, nmbins ) mcen = ( mbins[:-1] + mbins[1:] ) /2 plt.xscale( 'log', nonposx = 'clip' ) plt.yscale( 'log', nonposy = 'clip' ) ic, e, p = plt.hist( im, mbins, label='Original Halos', alpha=0.5, normed = True) pc, e, p = plt.hist( pm, mbins, label='Added Halos', alpha=0.5, normed = True) plt.legend() plt.xlabel( r'$M_{vir}$' ) plt.ylabel( r'$\frac{dN}{dM}$' ) #plt.tight_layout() plt.savefig( outbase+'_mfcn.png' ) mdtype = np.dtype( [ ('mcen', float), ('imcounts', float), ('pmcounts', float) ] ) mf = np.ndarray( len(mcen), dtype = mdtype ) mf[ 'mcen' ] = mcen mf[ 'imcounts' ] = ic mf[ 'pmcounts' ] = pc fitsio.write( outbase+'_mfcn.fit', mf )
def plot_fitted_model(self, sample, data, fig=None, xmin=-1, xmax=12, npoints=1000, nbins=100, epsilon=0.25): """Plot fitted model""" # fetch group group = [i for i, item in enumerate(data.groups.items()) if sample in item[1]][0] # fetch data counts = data.counts_norm[sample].values.astype('float') counts[counts < 1] = epsilon counts = np.log(counts) # compute fitted model x = np.reshape(np.linspace(xmin, xmax, npoints), (-1, 1)) xx = np.exp(x) loglik = _compute_loglik(xx, self.log_phi, self.log_mu, self.beta[self.z[group]]) y = xx * np.exp(loglik) / self.nfeatures # plot fig = pl.figure() if fig is None else fig pl.figure(fig.number) pl.hist(counts, nbins, histtype='stepfilled', linewidth=0, normed=True, color='gray') pl.plot(x, np.sum(y, 1), 'r') pl.grid() pl.xlabel('log counts') pl.ylabel('density') pl.legend(['model', 'data'], loc=0) pl.tight_layout()
def plotFeaturePDF(ift, pft, outbase, fmin=0.0, fmax=1.0, fstep=0.01): """ Plot a comparison between the input feature distribution and the feature distribution of the predicted halos """ plt.clf() nfbins = ( fmax - fmin ) / fstep fbins = np.logspace( fmin, fmax, nfbins ) fcen = ( fbins[:-1] + fbins[1:] ) / 2 plt.xscale( 'log', nonposx='clip' ) plt.yscale( 'log', nonposy='clip' ) ic, e, p = plt.hist( ift, fbins, label='Original Halos', alpha=0.5, normed=True ) pc, e, p = plt.hist( pft, fbins, label='Added Halos', alpha=0.5, normed=True ) plt.legend() plt.xlabel( r'$\delta$' ) plt.savefig( outbase+'_fpdf.png' ) fdtype = np.dtype( [ ('fcen', float), ('ifcounts', float), ('pfcounts', float) ] ) fd = np.ndarray( len(fcen), dtype = fdtype ) fd[ 'mcen' ] = fcen fd[ 'imcounts' ] = ic fd[ 'pmcounts' ] = pc fitsio.write( outbase+'_fpdf.fit', fd )
def compareHist(data1, data2,_title,tag1='data1', tag2='data2'): pl.figure() pl.show() pl.hist(data1, normed=True, alpha=0.5, color='b') pl.hist(data2, normed=True, alpha=0.5, color='r') # Fit a normal distribution to the data: mu1, std1 = stats.norm.fit(data1) xmin, xmax = pl.xlim() x = np.linspace(xmin, xmax, 100) p = stats.norm.pdf(x, mu1, std1) pl.plot(x, p, 'k', linewidth=2, color='b') # Fit a normal distribution to the data: mu2, std2 = stats.norm.fit(data2) xmin, xmax = pl.xlim() x = np.linspace(xmin, xmax, 100) p = stats.norm.pdf(x, mu2, std2) pl.plot(x, p, 'k', linewidth=2, color='r') pl.title(_title) pl.savefig(data_DIR + '/'+ _title + '.png',bbox_inches='tight') pl.close() return
def study_redmapper_2d(): # I just want to know the typical angular separation for RM clusters. # I'm going to do this in a lazy way. hemi = 'north' rm = load_redmapper(hemi=hemi) ra = rm['ra'] dec = rm['dec'] ncl = len(ra) dist = np.zeros((ncl, ncl)) for i in range(ncl): this_ra = ra[i] this_dec = dec[i] dra = this_ra-ra ddec = this_dec-dec dxdec = dra*np.cos(this_dec*np.pi/180.) dd = np.sqrt(dxdec**2. + ddec**2.) dist[i,:] = dd dist[i,i] = 99999999. d_near_arcmin = dist.min(0)*60. pl.clf(); pl.hist(d_near_arcmin, bins=100) pl.title('Distance to Nearest Neighbor for RM clusters') pl.xlabel('Distance (arcmin)') pl.ylabel('N') fwhm_planck_217 = 5.5 # arcmin sigma = fwhm_planck_217/2.355 frac_2sigma = 1.*len(np.where(d_near_arcmin>2.*sigma)[0])/len(d_near_arcmin) frac_3sigma = 1.*len(np.where(d_near_arcmin>3.*sigma)[0])/len(d_near_arcmin) print '%0.3f percent of RM clusters are separated by 2-sigma_planck_beam'%(100.*frac_2sigma) print '%0.3f percent of RM clusters are separated by 3-sigma_planck_beam'%(100.*frac_3sigma) ipdb.set_trace()
def run_catalogue(mag_cut,file_dir="",OUTDIR="./out"): #file_dir = "/data3/scratch/bcc_v1" #file_dir = "" #OUTDIR = "./out" title_in = "" import numpy as np import scipy as sp import matplotlib import matplotlib.pylab as plt import os import pylab as p import rdfits as r import mytools import sys if not os.path.exists(OUTDIR): os.makedirs(OUTDIR) title = str(title_in) newOUTDIR = OUTDIR+"/" import pyfits as pf table1 = pf.open(file_dir+"catalogue.fits") cols = table1[1].data z=cols["Z"] RA=cols["RA"] DEC=cols["DEC"] GAMMA1=cols["S1"] GAMMA2=cols["S2"] TMAGr = cols["TMAGr"] weights = cols["MVIR"] bg = [(z >.5) & (z < 1.5) & (TMAGr < mag_cut)] # background galaxies are where z > zcut = .5 RAbg = RA[bg] DECbg = DEC[bg] GAMMA1bg = GAMMA1[bg] GAMMA2bg = GAMMA2[bg] weightsbg = weights[bg] zbg = z[bg] fg = [(z < .5) & (TMAGr < mag_cut)] # foreground galaxies are where z < zcut = .5 RAfg = RA[fg] DECfg = DEC[fg] GAMMA1fg = GAMMA1[fg] GAMMA2fg = GAMMA2[fg] weightsfg = weights[fg] zfg = z[fg] #print fg fig = plt.figure() plt.hist(zbg,30, normed=0) plt.xlabel("redshift") plt.ylabel("Source Distribution Counts") plt.title("Z_cut = 0.5") fig.savefig("source_distribution.png") mytools.write_fits_table(OUTDIR+'foreground.fits', ['z','RA','DEC','W'], [zfg,RAfg,DECfg,weightsfg]) mytools.write_fits_table(OUTDIR+'background.fits', ['RA','DEC','S1','S2','W'], [RAbg,DECbg,GAMMA1bg,GAMMA2bg,weightsbg])
def gini_after_action(gini_coeff_before, n_population, n_affected, percentile_before, income_increase, seed=42, do_plot=False): """ See how the Gini coefficient changes if you take some segment of the population and make them richer/poorer :param gini_coeff_before: initial gini coefficient :param n_population: size of population :param n_affected: number of people affected :param percentile_before: percentile of income at start :param income_increase: multiplicative factor of increase of income :return: """ pop_max = 1e7 if n_population > pop_max: # scale both numbers down to make it faster scale = pop_max/float(n_population) n_population = scale * n_population n_affected = scale * n_affected n_population = int(round(n_population)) n_affected = int(round(n_affected)) alpha = gini_to_pareto_alpha(gini_coeff_before) x_mode = 1.0 income = sorted(sample_pareto(n_population, x_mode, alpha, seed=seed)) index_middle = percentile_before*n_population index_start = index_middle - n_affected/2 index_end = index_start + n_affected def adjust(i, inc): if i >= index_start and i < index_end: return income_increase*inc return inc income_adjusted = [adjust(i, inc) for i, inc in enumerate(income)] gini_before = gini(income) gini_after = gini(income_adjusted) tol = 1e-8 if n_population > 10000: assert abs(gini_coeff_before - gini_coeff_before) < tol print 'gini before: %s' % gini_before print 'gini after: %s' % gini_after if do_plot: from matplotlib import pylab as plt plt.clf() income_max = 1000 income_cut = [i for i in income if i < income_max] income_adjusted_cut = [i for i in income_adjusted if i < income_max] range = (0, 10) n_bins = 200 plt.hist(income_cut, n_bins, alpha=0.3, range=range, label="Before") plt.hist(income_adjusted_cut, n_bins, alpha=0.3, range=range, label="After") return gini_before, gini_after
def threquency(housing_prices): pl.hist(housing_prices, 50, facecolor='green', alpha=0.75) pl.xlabel('House price') pl.ylabel('Frequency') pl.title('Frequency of housing prices') # pl.hist pl.show()
D = Dists[:, 0] index = 0 #with open(filename, 'rb') as f: # lines = f.readlines() # for l in lines: # myarray = np.fromstring(l, dtype=float, sep=',') # # D[index] = myarray[index + 1:].min() # # index += 1 #min_distances = D.min() bins = range(int(D.max()) + 2) outs = plt.hist(D, bins=bins, normed=True, cumulative=True) N = outs[0] plt.figure() plt.bar(bins[:-1], 1 - N) plt.xlabel('Minimum mismatches') plt.ylabel('CDF') # Check manually for exact matches filename = '/data/ForMimi/AllSgRNAsOct4' seqs = [rec for rec in SeqIO.parse(filename, 'fasta')] zeros = np.where(D == 0)[0] matches = Dists[D == 0, 1].astype(np.int) df = [] for (i, matching_pair) in izip(count(), izip(zeros, matches)):
path ='../res_test' # use your path allFiles = glob.glob(path + "/*.csv") concat_d = [] concat_t = [] i=1 n = np.ceil(len(allFiles)/2.) plt.figure(figsize=[8,2*n]) for file_ in allFiles: df = pd.read_csv(file_,index_col='RowID') if('ProbabilityOfResponse' in df): data = df.loc[label.index] test = df.drop(label.index) test.to_csv('../submissions/singlemodel_'+file_.split('/')[-1]+'.csv') plt.subplot(n,2,i) plt.hist(data['ProbabilityOfResponse'].values[label.values.ravel()==0],binsize,normed=True,alpha=0.5) plt.hist(data['ProbabilityOfResponse'].values[label.values.ravel()==1],binsize,normed=True,alpha=0.5) # plt.hist(test['ProbabilityOfResponse'].values,25) plt.title(file_.split('/')[-1]) plt.xlim([0,1]) i+=1 concat_d.append(data.rename(columns={'ProbabilityOfResponse': file_.split('/')[-1]})) concat_t.append(test.rename(columns={'ProbabilityOfResponse': file_.split('/')[-1]})) plt.show() #%% train = pd.concat(concat_d,axis=1) test = pd.concat(concat_t,axis=1) plt.figure() plt.hist(train.mean(1).values[label.values.ravel()==0],binsize,normed=True,alpha=0.5) plt.hist(train.mean(1).values[label.values.ravel()==1],binsize,normed=True,alpha=0.5) plt.xlim([0,1])
from scipy import stats import numpy as np import matplotlib.pylab as plt import chisquare #f = file('AllServiceTimes.txt', 'r+') f = file('AllInterarrivalTimes.txt', 'r+') data = [float(x) for x in f.read().split(', ')] #extract.extractData('data.txt')[0] # plot normed histogram plt.hist(data, normed=True) # find minimum and maximum of xticks, so we know # where we should compute theoretical distribution xt = plt.xticks()[0] xmin, xmax = min(xt), max(xt) lnspc = np.linspace(xmin, xmax, len(data)) # Try the exponential disctubution aexpon, muExp = stats.expon.fit(data) pdf_exp = stats.expon.pdf(lnspc, aexpon, muExp) plt.plot(lnspc, pdf_exp, label="Exponential") # Try the erlang distrubution ae, be, muErl = stats.erlang.fit(data) pdf_erl = stats.erlang.pdf(lnspc, ae, be, muErl) plt.plot(lnspc, pdf_erl, label="Erlang") # Try the gamma distrubution ag, bg, thetaGamma = stats.gamma.fit(data)
label='Difficulty (minimum value)') pylab.title('Hash Value vs Work') pylab.ylabel('Hash Value (zero bits) (log2(hash))') pylab.xlabel('Cumulative Work (est. hashes computed)') pylab.legend(loc=4) floor_diff = np.floor(dv[:, 2] - np.log2(dv[:, 1])) pylab.figure(3) pylab.clf() pylab.scatter(dv[:, 0], floor_diff, s=0.1, label='Hash Values (bits)') pylab.title('Hash Value - Difficulty') pylab.ylabel('Hash Value (zero bits) (log2(hash))') pylab.xlabel('Time (blocks)') pylab.legend(loc=4) pylab.figure(4) pylab.clf() pylab.hist(floor_diff, color="green", alpha=0.8, histtype='bar', ec='black', bins=range(0, int(floor_diff.max()) + 1)) pylab.title('Hash Value - Difficulty Histogram') pylab.yscale('log', basey=2) pylab.xlabel('Hash Value - Difficulty') pylab.ylabel('Counts') pylab.show()
import matplotlib.pylab as pyl #折线图plot,散点图plot,直方图 hist import numpy as npy #生成随机数 data = npy.random.randint(1, 20, 500) #最小值,最大值,个数 print(data) #生成正态分布的数据 data2 = npy.random.normal(0, 0.1, 1000) #平均数,σ,个数 print(data2) pyl.hist(data) pyl.show() #bar', 'barstacked', 'step', 'stepfilled histtype = "stepfilled" histtype sty = pyl.arange(5, 25, 1) #设置宽度,步长 pyl.hist( data, sty, ) #histtype="stepfilled 是默认的 pyl.show() # # pyl.subplot(2,3,2)#拆分纸图:行,列,当前区域 # pyl.show() # # #在纸图中绘制 # pyl.subplot(2,2,1) # pyl.subplot(2,2,2) # pyl.subplot(2,1,2) # pyl.show()
import matplotlib.pylab as plt import numpy as np x1 = np.random.normal(0, 0.8, 1000) x2 = np.random.normal(-2, 1, 1000) x3 = np.random.normal(3, 2, 1000) plt.hist(x1, histtype='stepfilled', alpha=0.3, bins=40, density=True) #(数据,无边线,透明度,柱子密度,Y轴是否为比例) plt.hist(x2, histtype='stepfilled', alpha=0.3, bins=40, density=True) plt.hist(x3, histtype='stepfilled', alpha=0.3, bins=40, density=True) plt.show()
plt.figure(figsize=(16, 10)) plot_rate_sorted(s_sd, t_sd) plt.savefig('activityrate__sorted' + name + '.png') plt.figure(figsize=(16, 10)) count_vector = np.bincount(s_sd) plot_rate_histogram(count_vector, simtime) plt.savefig('histogram' + name + '.png') def get_ccs(times, senders, n_sample=1000, bin_size=5.): unique_ids = np.unique(senders) bins = np.arange(wuptime, simtime + wuptime + bin_size, bin_size) cc = np.zeros(n_sample) for i in xrange(n_sample): sp1, sp2 = rand.sample(unique_ids, 2) psth1 = np.histogram(times[senders == sp1], bins)[0] psth2 = np.histogram(times[senders == sp2], bins)[0] cc[i] = np.corrcoef(psth1, psth2)[0][1] return cc plt.figure(figsize=(16, 10)) cc = get_ccs(t_sd, s_sd, n_sample=20000) plt.hist(cc, np.arange(-1, 1, 0.01)) plt.xlabel("correlation coefficient", fontsize=30) plt.ylabel("counts", fontsize=30) plt.title('CC mean {}'.format(np.mean(cc))) plt.savefig('cc_histogram' + name + '.png')
pyl.subplot(2,1,2) pyl.plot(x,y,'or') pyl.show() print('***__***') # 分布分析 # 极差 = 最大值-最小值 # 极距 = 极差/组数 avgScore_max = da2[3].max() avgScore_min = da2[3].min() comment_max = da2[6].max() comment_min = da2[6].min() avgScore_rg = avgScore_max - avgScore_min comment_rg = comment_max - comment_min avgScore_dst = avgScore_rg/10 comment_dst = comment_rg/10 avgScore_sty = np.arange(avgScore_min,avgScore_max,avgScore_dst) comment_sty = np.arange(comment_min,comment_max,comment_dst) pyl.subplot(2,1,1) pyl.hist(da2[3],avgScore_sty) #pyl.show() pyl.subplot(2,1,2) pyl.hist(da2[6],comment_sty) pyl.show() print("finished")
['a', 'b', 'c'], ['1', '1', '1']).astype(int) data.generic_holiday.value_counts() # In[55]: data.duplicated(keep='first').sum() # In[56]: #Checking outliers sns.boxplot(data=data, x=data["Revenue"]) # In[57]: # Revenue Histogram plt.hist(data.Revenue, bins=50, color='purple', edgecolor='black') plt.title('Revenue') plt.show() # In[58]: # Checking Outliers using IQR Q1 = data["Revenue"].quantile(0.25) Q3 = data["Revenue"].quantile(0.75) IQR = Q3 - Q1 print("Q1=", Q1) print("Q3=", Q3) print("IQR=", IQR) Lower_Whisker = Q1 - 1.5 * IQR Upper_Whisker = Q3 + 1.5 * IQR print("Lower whisker=", Lower_Whisker)
import random import numpy as np import matplotlib.pylab as plt from matplotlib.pylab import hist, show contenido = np.loadtxt("locationsY.txt") plt.hist(contenido[:, 0], bins=15, color="gray") plt.title("Latitud") plt.show() plt.hist(contenido[:, 1], bins=15) plt.title("Longitud") plt.show() print np.argmax((hist(contenido[:, 0], bins=15))[:, 0]) print np.argmax((hist(contenido[:, 1], bins=15))[:, 1])
plot_hist_TRT_Ranks(df_nonnan,cfg_tds) #df_nonnan["date"] = df_nonnan["date"].astype(np.datetime64,copy=False) prep.exploit_TRT_cell_info(cfg_tds,samples_df=df_nonnan) df_nonnan["RANKr"] = df_nonnan["RANKr"]*10 ## Construct selection criteria for input dataset: print("Split in 10min and 30min forcast") y_10 = df_nonnan[["TRT_Rank_diff|10"]] y_30 = df_nonnan[["TRT_Rank_diff|30"]] ## Plot histogram of Rank changes: print("Plot histograms of TRT Rank changes") fig = plt.figure(figsize = [10,5]) plt.title("Histogram of TRT Rank difference") plt.hist([y_10.values,y_30.values], bins=50, color=[col10,col30], label=['10min Rank difference', '30min Rank difference']) plt.legend() plt.grid() plt.savefig(os.path.join(cfg_tds["fig_output_path"],"Hist_TRT_Rank_diff.pdf"), orientation="portrait") fig = plt.figure(figsize = [10,5]) axes = fig.add_subplot(1,1,1) sns.kdeplot(y_10.values[:,0], shade=True, kernel="gau", bw=0.03, color=col10, label='10min Rank difference') sns.kdeplot(y_30.values[:,0], shade=True, kernel="gau", bw=0.03, color=col30, label='30min Rank difference') plt.xlabel("TRT Rank difference") plt.title("Kernel density estimation of TRT Rank difference") plt.grid() axes.get_yaxis().set_visible(False) plt.savefig(os.path.join(cfg_tds["fig_output_path"],"KDE_TRT_Rank_diff.pdf"), orientation="portrait")
model = lm.LinearRegression() model.fit(X, y) # Predict appliance / energy compsumtion y_est = model.predict(X) residual = y - y_est # Display scatter plot figure() figure(0) subplot(2, 1, 1) plot(y, residual, '.') xlabel('Appliance (true)') ylabel('Appliance (estimated)') figure(1) subplot(2, 1, 2) hist(residual, 40) xlabel('Residual') #Mean squared error print(np.sqrt(np.square(y - y_est).sum() / len(y))) print(metrics.mean_squared_error(y, y_est)) #Which is the same as print(np.square(y - y_est).sum() / len(y)) print("RMSE") print(np.sqrt(np.square(y - y_est).sum() / len(y))) show()
import matplotlib.cm as cm import os #calc_PESC_fluid.py #datadir = 'C:\\Users\\dschaffner\\OneDrive - brynmawr.edu\\Galatic Dynamics Data\\GalpyData_July2018\\' datadir = 'C:\\Users\\dschaffner\\Dropbox\\From OneDrive\\Galatic Dynamics Data\\GalpyData_July2018\\resorted_data\\CR6_3t_Rg_Full\\' npy = '.npz' #fileheader = 'PE_SC_IDdatabase_Type_1_data_249_delays_3000_orbits_galpy0718' #fileheader = 'PE_SC_IDdatabase_Type_1_data_249_delays_galpy0718' fileheader = 'radiusAttimestep0_1t' datafile = loadnpzfile(datadir + fileheader + npy) radii1 = datafile['radius'] plt.figure(1) plt.hist(radii1, bins=50, range=(3.5, 8.5)) plt.title('Radius Dist at 0ts') plt.ylim(0, 1500) fileheader = 'radiusAttimestep500_1t' datafile = loadnpzfile(datadir + fileheader + npy) radii2 = datafile['radius'] plt.figure(2) plt.hist(radii2, bins=50, range=(3.5, 8.5)) plt.title('Radius Dist at 500ts') plt.ylim(0, 1500) fileheader = 'radiusAttimestep1000_1t' datafile = loadnpzfile(datadir + fileheader + npy) radii3 = datafile['radius'] plt.figure(3)
'r-') #load and plot raw data X = load_coal() plt.plot(X, X * 0, 'k|') plt.xlabel('time (years)') plt.ylabel('rate') plt.ylim(-.05, 1) plt.xlim(X.min(), X.max()) save_tikz('coal_rates.tikz', figurewidth='\\figurewidth', figureheight='\\figureheight') plt.figure() #trans = GPy.core.parameterization.transformations.Logexp() trans = GPy.core.parameterization.transformations.Exponent() plt.hist(trans.f(experiment.samples[:, 0]), 100, normed=True) plt.xlabel('signal varaince') #save_tikz('coal_variance.tikz',figurewidth='\\figurewidth', figureheight = '\\figureheight') np.savetxt('coal_var_samples', trans.f(experiment.samples[:, 0])) plt.figure() plt.hist(trans.f(experiment.samples[:, 1]), 100, normed=True) plt.xlabel('lengthscale') #save_tikz('coal_lengthscale.tikz',figurewidth='\\figurewidth', figureheight = '\\figureheight') np.savetxt('coal_ls_samples', trans.f(experiment.samples[:, 1])) #plota scatter of variance, ls variances = trans.f(experiment.samples[:, 0]) lengthscales = trans.f(experiment.samples[:, 1]) plt.figure() plt.plot(lengthscales, variances, 'k.') save_tikz('coal_theta.tikz',
### make sure missing data read in as missing twinData[['DLHRWAGE', 'EDUCH']] twinData[['DLHRWAGE']] twinData = twinData.dropna() twinData[['DLHRWAGE']] # remove rows with missing data (regression will fail to run with missing data) twinData = pd.read_csv("C:/Users/J40311/Documents/School/495R/twins.txt", na_values=["."]) twinData = twinData.dropna() twinData[['DLHRWAGE']] twinData.DLHRWAGE # check normality of response variable (need to drop missing data to generate) import matplotlib.pyplot as plt plt.hist(twinData.DLHRWAGE.dropna()) plt.hist(twinData.DLHRWAGE, 50) plt.show() ### basic linear regression (without variable selection) import statsmodels.api as sm # if I needed to convert one of my variables to factors, could do so twinData.MALEL twinData['MALEL'] = pd.Categorical(twinData.MALEL).codes X = twinData.drop('DLHRWAGE', axis=1) X.columns y = twinData[['DLHRWAGE']] # include intercept in model X1 = sm.add_constant(X)
tx = cuda.threadIdx.x bx = cuda.blockIdx.x bw = cuda.blockDim.x i = tx + bx * bw if i >= arr_out.size: return arr_out[i] = arr_a[i] + arr_b[i] def adder(a, b): c = a + b return c n = 1000000 a = np.arange(n, dtype=np.float32) b = np.arange(n, dtype=np.float32) c = np.empty_like(a) thread_ct = my_gpu.WARP_SIZE block_ct = int(math.ceil(float(n) / thread_ct)) vadd[block_ct, thread_ct](a, b, c) cnc = adder(a, b) toGraph = cnc - c plt.figure() plt.hist(toGraph, bins=100, range=(-.00000001, .00000001)) if c.all() == cnc.all(): print "equal" plt.show()
temp[i].append(np.percentile(e_dist_data[i], 95, interpolation='linear')) # temp[i].append(np.percentile(m_dist_data[i], 95, interpolation='linear')) e_dist_threshold = np.array(temp) # m_dist_threshold = np.array(temp) print("<OOD threshold of distance of penultimate logits in each label>") print(e_dist_threshold) print() # Show histogram for each label's distance distribution ---------------------------------------------------------------- distance = e_dist_data # distance = m_dist_data for i in range(10): data = np.sort(distance[i]) bins = np.arange(0, 300, 2) plt.hist(data, bins, normed=True) plt.title("label: %d" % i) plt.xlabel('distance', fontsize=15) plt.ylabel('num of data', fontsize=15) plt.show(block=True) # ====================================================================================================================== ''' loss = cross_entrophy + (l2) + np.eye(num_neurons)- f(x) (?) experimetn = > runtime check ''' # ====================================================================================================================== ''' # 무작위 데이터 선택 index = 2 img = mnist.test.images[index]
marker='o', markersize='0.1', color='b', label=r'$\bar{\Lambda}^0$') plt.title('Datos simulados') plt.xlabel(r'$\alpha$') plt.ylabel(r'$P_T \; (\frac{MeV}{c})$') plt.legend(loc='best', markerscale=18) plt.grid() #Histogramas das masas: mK = np.asarray(df['mK']) mL = np.asarray(df2['mlambda']) mA = np.asarray(df2['mantilambda']) plt.figure(2) plt.hist(mK, bins=100, range=(450, 550)) plt.title(r'$masa \; K_S^0$') plt.ylabel('contas') plt.xlabel(r'$masa \; (\frac{MeV}{c^2})$') plt.grid() plt.figure(3) plt.hist(mL, bins=100, range=(1080, 1140)) plt.title(r'$masa \; \Lambda^0$') plt.ylabel('contas') plt.xlabel(r'$masa \; (\frac{MeV}{c^2})$') plt.grid() plt.figure(4) plt.hist(mA, bins=100, range=(1080, 1140)) plt.title(r'$masa \; \bar{\Lambda}^0$') plt.ylabel('contas') plt.xlabel(r'$masa \; (\frac{MeV}{c^2})$')
nobs = 500 bins = 20 #x = -3.0 * np.ones(500) #np.linspace(-5,5) y = stats.norm.rvs(loc=-3, size=nobs) hista = gethist(y, bins=bins) # find parameters and estimates of single gaussian p0 = [10.0, -2, 0.5] # initial guess p1, success = optimize.leastsq(errfunc, p0[:], args=(hista[1], hista[0])) errors_sq = errfunc(p1, hista[1], hista[0])**2 yest1 = fitfunc(p1, hista[1]) plt.figure() plt.hist(y, bins=bins) plt.figure() #plt.plot(hista[1],hista[0],'o',hista[1],yest1,'.-') x = np.linspace(hista[1, 0], hista[1, -1], 100) yest1a = fitfunc(p1, x) plt.plot(hista[1], hista[0], 'o', x, yest1a, '-') y1 = stats.norm.rvs(loc=-2, size=nobs * 0.6) y2 = stats.norm.rvs(loc=2, size=nobs * 0.4) y = np.hstack([y1, y2]) hista = gethist(y, bins=bins) # find parameters and estimates of gaussian mixture q0 = [10.0, -3, 0.5, 5, 3, 0.5] # initial guess q1, success = optimize.leastsq(doublegausserr, q0[:],
#! usr/bin/python # coding=utf-8 """ File Name: Data Operation Description: Date: 2016-11-29 Author: QIU HU """ import matplotlib.pylab as plt import jieba jieba.load_userdict('../MidData/user.dict') def tokenize_text(text): tokens = [] for txt in text: tokens.extend(jieba.lcut(txt)) return tokens with open('train_id_view_pol_trans_all.txt') as f: LENS = [] for line in f.readlines(): lis = line.strip().split('\t') tokens = tokenize_text(lis[2:]) LENS.append(len(tokens)) plt.hist(LENS, bins=100) plt.show()
MAL: Proba de que sea normal dado el valor que obtuve ''' x_mean = np.mean(x) x_std = np.std(x) x_skew = skew(x) x_kurtosis = kurtosis(x) #Kurtosis en exceso k-3 x_jb_stat = nb_sim / 6 * (x_skew**2 + 1 / 4 * x_kurtosis**2) #Que tan lejos estas de la normalidad #Necesariamente chico p_value = 1 - chi2.cdf(x_jb_stat, df=2) #Se distribuye chi2 con 2 grados de libertad #Si valor–p < nivel de significación => Rechazo H0. #Si valor–p > nivel de significación => No rechazo H0. x_is_normal = (p_value > 0.05) #equivalente a jb <6 print('skewness is ' + str(x_skew)) print('kurtosis is ' + str(x_kurtosis)) print('Jarque-Bera statistic is ' + str(x_jb_stat)) print('p-value is ' + str(p_value)) print('is normal ' + str(x_is_normal)) #jb_list = [] #jb_list.appennd(x_jb_stat) #Plot histogram plt.figure() plt.hist(x, bins=100) plt.title(x_description) plt.show
s.shape np.mean(s, axis = 0) np.sum(s, axis = 0) np.mean(s, axis = 1) np.sum(s, axis = 1) w = np.random.dirichlet() plt.barh(range(20), s[0]) plt.barh(range(20), s[1], left=s[0], color='g') plt.barh(range(20), s[2], left=s[0]+s[1], color='r') f1 = np.random.dirichlet((100,1), 1000) plt.hist(f1, 30, density = True) plt.show() w = np.random.dirichlet(np.ones(M),N) A = np.transpose(np.array([[1,2,3],[3,4,5],[5,6,7],[10,20,30]])) B = np.transpose(np.array([1,2,3])) A*B # U_sum = st.temp_growth(k, T, Tref, T_pk, N, B_U, Ma, Ea_U, Ea_D) # u_1 = np.empty((0,N)) # for i in range(M-1): # mean = U_sum[i]/N # random.seed(i) # a = np.array([np.random.uniform(0, mean, size = N)]) # u_1 = np.append(u_1,a,axis = 0)
item = browser.ui.workingDataTree.selectedItems()[0] print item.text(0) for c in range(item.childCount()): if 'trace' in item.child(c).text(0): trace = item.child(c) if 'xOnsets' in item.child(c).text(0): xonsets = item.child(c).data # xOnsets is in datapoints, convert to ms dt = trace.attrs['dt'] xonsets = xonsets * dt # Convert to frequency freq = 1000. / np.diff(xonsets) # Make histogram nbins = 100 binsRange = (0, 20) n, bins, patches = plt.hist(freq, bins=nbins, range=binsRange, normed=False, histtype='stepfilled') n = n / float(np.sum(n)) # Store data ndaq.store_data(n, name='n') ndaq.store_data(bins, name='bins') ndaq.store_data(np.array(freq), name='median_freq') # AP nbins = 50, binsRange = 10 # EPSC nbins = 0, binsRange = 10
def test(): parser = argparse.ArgumentParser(description='DAGMM') parser.add_argument('--epoch', '-e', type=int, default=10000, help='Number of sweeps over the dataset to train') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--cn_h_unit', type=int, default=10, help='Number of Compression Network hidden units') parser.add_argument('--cn_z_unit', type=int, default=2, help='Number of Compression Network z units') parser.add_argument('--en_h_unit', type=int, default=10, help='Number of Estimation Network hidden units') parser.add_argument('--en_o_unit', type=int, default=2, help='Number of Estimation Network output units') args = parser.parse_args() print('# epoch: {}'.format(args.epoch)) print('# Output-directory when training: {}'.format(args.out)) print('# Compression Network: Dim - {0} - {1} - {0} - Dim'.format( args.cn_h_unit, args.cn_z_unit)) print('# Estimation Network: {} - {} - {}'.format(args.cn_z_unit + 2, args.en_h_unit, args.en_o_unit)) print('') # データセット読み込み x_data = np.loadtxt('./dataset_arrhythmia/ExplanatoryVariables.csv', delimiter=',') y_label = np.loadtxt('./dataset_arrhythmia/CriterionVariables.csv', delimiter=',') # 正常データのみを抽出 HealthData = x_data[y_label[:] == 1] # 正常データを学習用と検証用に分割 NumOfHealthData = len(HealthData) trainData = HealthData[:math.floor(NumOfHealthData * 0.9)] validData = HealthData[len(trainData):] # 正常ではないデータ(異常データ)を抽出 diseaseData = x_data[y_label[:] != 1] # 型変換 trainData = trainData.astype(np.float32) validData = validData.astype(np.float32) diseaseData = diseaseData.astype(np.float32) model = DAGMM(args.cn_h_unit, args.cn_z_unit, len(trainData[0]), args.en_h_unit, args.en_o_unit) optimizer = optimizers.Adam(alpha=0.0001) optimizer.setup(model) print("------------------") print("Health trainData Energy") with chainer.using_config('train', False), chainer.using_config( 'enable_backprop', False): _, energy_htr, _, _ = model.fwd(trainData) # print(energy_htr.data) print("------------------") print("Health testData Energy") with chainer.using_config('train', False), chainer.using_config( 'enable_backprop', False): _, energy_hte, _, _ = model.fwd(validData) # print(energy_hte.data) print("------------------") print("Disease testData Energy") with chainer.using_config('train', False), chainer.using_config( 'enable_backprop', False): _, energy_di, _, _ = model.fwd(diseaseData) # print(energy_di.data) plt.hist(energy_htr.data, bins=100, alpha=0.4, histtype='stepfilled', color='b') plt.hist(energy_hte.data, bins=100, alpha=0.4, histtype='stepfilled', color='g') plt.hist(energy_di.data, bins=100, alpha=0.4, histtype='stepfilled', color='r') plt.show()
#print(penergy) #exit() #print(events['nMass']) #masses += ak.to_numpy(ak.flatten(events['nMass'].array())).tolist() #e_energies += ak.to_numpy(ak.flatten(events['eenergy'].array())).tolist() #''' #print(masses) ee_ranges = [(0, 5), (0, 0.3), (0, 0.120), (0, 0.120), (0, 0.120)] pe_ranges = [(0, 5), (0, 5), (0, 5), (0, 5), (0, 5)] for i in range(0, 5): plt.figure(figsize=(12, 6)) plt.subplot(2, 2, 1) plt.hist(data['nMass'][i], range=(0.0, 1.3), bins=100) plt.xlabel(r'$M_{pe^-}$ [GeV/c$^2$]', fontsize=18) plt.subplot(2, 2, 3) plt.hist(data['eenergy'][i], range=ee_ranges[i], bins=100) plt.xlabel(r'$E_{e^-}$ [GeV]', fontsize=18) plt.subplot(2, 2, 4) plt.hist(data['penergy'][i], range=pe_ranges[i], bins=100) plt.xlabel(r'$E_{p}$ [GeV]', fontsize=18) plt.tight_layout() name = f"plots/tiny_hydrogen_{i}.png" plt.savefig(name) plt.show()
import cv2 import matplotlib.pylab as plt img = cv2.imread(r"..\lena.jpg", cv2.IMREAD_GRAYSCALE) equ = cv2.equalizeHist(img) cv2.imshow("original", img) cv2.imshow("result", equ) plt.subplot(1, 2, 1) plt.hist(img.ravel(), 256) plt.subplot(1, 2, 2) plt.hist(equ.ravel(), 256) plt.show() cv2.waitKey() cv2.destroyAllWindows()
training_results = train(model, criterion, train_loader, validation_loader, optimizer, epochs=5) # In[5] Model evaluation and Plotting # set models to evaluation so that batchnorm is put in eval mode. model.eval() model_batchnorm.eval() # Plot model activations out = model.activation(validation_dataset[0][0].reshape(-1, 28 * 28)) plt.hist(out[2], label='model with no batch normalization') plt.xlabel("activation ") plt.legend() plt.show() out_batchnorm = model_batchnorm.activation(validation_dataset[0][0].reshape( -1, 28 * 28)) plt.hist(out_batchnorm[2], label='model with normalization') plt.xlabel("activation ") plt.legend() plt.show() # Plot the diagram to show the loss plt.plot(training_results['training_loss'], label='No Batch Normalization') plt.plot(training_results_Norm['training_loss'], label='Batch Normalization') plt.ylabel('Cost')
def plot_tpms(self, min_value: float = 0, max_value: float = None): plt.hist(self.tpms)
#exit() nentries = tree.GetEntries() values = [] valuesjet = [] valuesmet = [[], []] valueselectron = [] for nentry in range(nentries): if nentry % 10000 == 0: print(nentry) tree.GetEntry(nentry) njets = tree.njet for i in range(0, njets): valuesjet.append(tree.jete[i]) #x = tree.muone #y = tree.electrone #print(valuesjet) print(len(valuesjet)) plt.figure() plt.hist(valuesjet, bins=100, range=(0, 500)) #plt.show()
from scipy import stats import numpy as np import matplotlib.pylab as plt import math b = np.genfromtxt('n.txt', unpack=True) mean = np.mean(b) #sem = sem(b) std = np.std(b) var = std**2 #print(mean,'+-','Varianz=',var) plt.hist(b, bins=10, density=True, label="Messwerte") xt = plt.xticks()[0] xmin, xmax = min(xt), max(xt) lnspc = np.linspace(xmin, xmax, len(b)) pdf_g = stats.norm.pdf(lnspc, mean, std) plt.plot(lnspc, pdf_g, color="r", label="Gaußverteilung") plt.xlabel(r'Zählrate N / 1/10s') plt.ylabel(r'Relative Häufigkeit') poi = np.random.poisson(lam=mean, size=10000) plt.hist(poi, bins=10, density=True, histtype="step", color="k", label="Poissonverteilung") plt.grid()