def histogram(A, B, nameA, nameB): plt.hist(A, bins=255, alpha=0.5, color='b', label = nameA) plt.hist(B, bins=255, alpha=0.5, color='r', label = nameB) plt.xlabel('Intensity') plt.ylabel('Number of occurrencies') plt.legend() plt.show()
def groupHourly(dataGroup, names, title, timeShift, stacked=True,show=True): plt.gca() toPlot = [] namesShown = [] for pos in range(len(dataGroup)): #for dataIn in dataGroup: if len(dataGroup[pos]['data']) > 0: data = truncData(dataGroup[pos]['data'],"hour") dates = data['created_at'] dates = [parser.parse(date) for date in dates] hour_list = [(t+timedelta(hours=timeShift)).hour for t in dates] toPlot.append(hour_list) namesShown.append(names[pos]) numbers=[x for x in xrange(0,25)] labels=map(lambda x: str(x), numbers) plt.xticks(numbers, labels) plt.xlabel("Hour (GMT %s)" % timeShift) plt.ylabel("Tweets") if len(namesShown) != 0: plt.title(title,size = 12) plt.hist(toPlot,bins=numbers,stacked=stacked, alpha=0.5, label=names, align='mid') plt.legend(namesShown,"best") if show: plt.show() return plt
def compare_chebhist(dname, mylambda, c, Nbin = 25): if mylambda == 'Do not exist': print('--!!Warning: eig file does not exist, can not display compare histgram') else: mylambda = 1 - mylambda lmin = max(min(mylambda), -1) lmax = min(max(mylambda), 1) # print c cheb_file_content = '\n'.join([str(st) for st in c]) x = np.linspace(lmin, lmax, Nbin + 1) y = plot_chebint(c, x) u = (x[1:] + x[:-1]) / 2 v = y[1:] - y[:-1] plt.clf() plt.hold(True) plt.hist(mylambda,Nbin) plt.plot(u, v, "r.", markersize=10) plt.hold(False) plt.show() filename = 'data/' + dname + '.png' plt.savefig(filename) cheb_filename = 'data/' + dname + '.cheb' f = open(cheb_filename, 'w+') f.write(cheb_file_content) f.close()
def main(): correlations = [] ntrials = 100 for i in range(ntrials): print i # print "Generating Voters..." voters = generate_voters(N) # print "Generating Graphs..." G = generate_graph(voters) rating_graph = G.copy() H = generate_similarity_graph(G) # print "Normalizing Edges..." normalize_edges(H) # print "Calculating credibility scores" cred_graph = assign_credibility(H) correlations.append(calculate_correlation(rating_graph,cred_graph)) plt.hist(correlations, bins=[-1+0.05*i for i in range(40)]) plt.show() print "DONE"
def make_histograms(truescores, fpscores, title): plt.hist(truescores, normed=True, histtype='step', linewidth=3, label="True Events, %d" % len(truescores)) plt.hist(fpscores, normed=True, histtype='step', linewidth=3, label="FP Events, %d" % len(fpscores)) plt.legend() plt.xlabel("Likelihood Score") plt.ylabel("Density of events") plt.title("Histogram of TP and FP Event Likelihoods")
def hist(fname, data, bins, xlabel, ylabel, title, facecolor='green', alpha=0.5, transparent=True, **kwargs): plt.clf() plt.xlabel(xlabel) plt.ylabel(ylabel) plt.title(title) plt.hist(x=data, bins=bins, facecolor=facecolor, alpha=alpha, **kwargs) plt.savefig(fname, transparent=transparent)
def histograma_confianza(promedios_1, promedios_2, lim_min_1, lim_max_1, lim_min_2, lim_max_2): ''' grafica los dos histogramas para pendiente y coeficiente de posicion es una funcion circunstancial, se puede mejorar para graficar de forma generica ''' fig2, ax2 = plt.subplots() plt.hist(promedios_1, bins=40) plt.axvline(aprox1[0], color='r', label="Pendiente del Ajuste") plt.axvline(lim_min_1, color='g', label="Intervalo de Confianza") plt.axvline(lim_max_1, color='g') plt.legend() ax2.set_xlabel("Pendiente") ax2.set_ylabel("Frecuencia") plt.savefig("histograma_parte3_pendiente.png") fig3, ax3 = plt.subplots() plt.hist(promedios_2, bins=40) plt.axvline(aprox1[1], color='r', label="Coef de Posicion del Ajuste") plt.axvline(lim_min_2, color='g', label="Intervalo de Confianza") plt.axvline(lim_max_2, color='g') plt.legend(loc=2) ax3.set_xlabel("Coeficiente de Posicion") ax3.set_ylabel("Frecuencia") plt.savefig("histograma_parte3_coefdeposicion.png")
def plot_ekf_vs_mc(): def fx(x): return x**3 def dfx(x): return 3*x**2 mean = 1 var = .1 std = math.sqrt(var) data = normal(loc=mean, scale=std, size=50000) d_t = fx(data) mean_ekf = fx(mean) slope = dfx(mean) std_ekf = abs(slope*std) norm = scipy.stats.norm(mean_ekf, std_ekf) xs = np.linspace(-3, 5, 200) plt.plot(xs, norm.pdf(xs), lw=2, ls='--', color='b') plt.hist(d_t, bins=200, normed=True, histtype='step', lw=2, color='g') actual_mean = d_t.mean() plt.axvline(actual_mean, lw=2, color='g', label='Monte Carlo') plt.axvline(mean_ekf, lw=2, ls='--', color='b', label='EKF') plt.legend() plt.show() print('actual mean={:.2f}, std={:.2f}'.format(d_t.mean(), d_t.std())) print('EKF mean={:.2f}, std={:.2f}'.format(mean_ekf, std_ekf))
def plot_tothist(infile, tot, maxy, binsize=3): """ Plot the total-score histogram, where the total score (tot) has been previous calculated or read-in by the input functions """ """ Calculate moments of the distribution """ mn = tot.mean() med = np.median(tot) mp = tot.mean() + tot.std() mm = tot.mean() - tot.std() """ Report on the properties of the distibution """ print "" print "Statistics for %s" % infile print "---------------------------------" print " Mean: %5.1f" % mn print " Median: %5.1f" % med print " Sigma: %5.1f" % tot.std() print " Mean - 1 sig: %5.1f" % mm print " Mean + 1 sig: %5.1f" % mp print "" """ Plot the distribution """ binhist = range(int(tot.min())-1,int(tot.max())+3,binsize) plt.hist(tot,binhist,histtype='step',ec='k') plt.ylim(0,maxy) plt.axvline(x=mn, ymin=0, ymax=maxy, c='r', lw=3) plt.axvline(x=mm, ymin=0, ymax=maxy, c='b', lw=3) plt.axvline(x=mp, ymin=0, ymax=maxy, c='b', lw=3) plt.title("Distribution of scores for %s" % infile) plt.xlabel("Scores") plt.ylabel("N") plt.show()
def plot_net_distribution(net_mat, n_bins): """Plot the network distribution. Parameters ---------- net_mat: np.ndarray the net represented in a matrix way. n_bins: int the number of intervals we want to use to plot the distribution. Returns ------- fig: matplotlib.pyplot.figure the figure of the distribution required of the relations between elements defined by the `net_mat`. """ net_mat = net_mat.reshape(-1) fig = plt.figure() plt.hist(net_mat, n_bins) l1 = plt.axvline(net_mat.mean(), linewidth=2, color='k', label='Mean', linestyle='--') plt.legend([l1], ['Mean']) return fig
def createResponsePlot(dataframe,plotdir): mag = dataframe['MAGPDE'].as_matrix() response = (dataframe['TFIRSTPUB'].as_matrix())/60.0 response[response > 60] = 60 #anything over 60 minutes capped at 6 minutes imag5 = (mag >= 5.0).nonzero()[0] imag55 = (mag >= 5.5).nonzero()[0] fig = plt.figure(figsize=(8,6)) n,bins,patches = plt.hist(response[imag5],color='g',bins=60,range=(0,60)) plt.hold(True) plt.hist(response[imag55],color='b',bins=60,range=(0,60)) plt.xlabel('Response Time (min)') plt.ylabel('Number of earthquakes') plt.xticks(np.arange(0,65,5)) ymax = text.ceilToNearest(max(n),10) yinc = ymax/10 plt.yticks(np.arange(0,ymax+yinc,yinc)) plt.grid(True,which='both') plt.hold(True) x = [20,20] y = [0,ymax] plt.plot(x,y,'r',linewidth=2,zorder=10) s1 = 'Magnitude 5.0, Events = %i' % (len(imag5)) s2 = 'Magnitude 5.5, Events = %i' % (len(imag55)) plt.text(35,.85*ymax,s1,color='g') plt.text(35,.75*ymax,s2,color='b') plt.savefig(os.path.join(plotdir,'response.pdf')) plt.savefig(os.path.join(plotdir,'response.png')) plt.close() print 'Saving response.pdf'
def main(): train = pd.DataFrame.from_csv('train.csv') places_index = train['place_id'].values places_loc_sqr_wei = [] for i, place_id in enumerate(train['place_id'].unique()): if not i % 100: print(i) place_df = train.iloc[places_index == place_id] place_weights_acc_sqred = 1 / (place_df['accuracy'].values ** 2) places_loc_sqr_wei.append([place_id, np.average(place_df['x'].values, weights=place_weights_acc_sqred), np.std(place_df['x'].values), np.average(place_df['y'].values, weights=place_weights_acc_sqred), np.std(place_df['y'].values), np.average(np.log(place_df['accuracy'].values)), np.std(np.log(place_df['accuracy'].values)), place_df.shape[0]]) # print(places_loc_sqr_wei[-1]) # plt.hist2d(place_df['x'].values, place_df['y'].values, bins=100) # plt.show() plt.hist(np.log(place_df['accuracy'].values), bins=20) plt.show() places_loc_sqr_wei = np.array(places_loc_sqr_wei) column_names = ['x_mean', 'x_sd', 'y_mean', 'y_sd', 'accuracy_mean', 'accuracy_sd', 'n_persons'] places_loc_sqr_wei = pd.DataFrame(data=places_loc_sqr_wei[:, 1:], index=places_loc_sqr_wei[:, 0], columns=column_names) now = str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) places_loc_sqr_wei.to_csv('places_loc_sqr_weights_%s.csv' % now)
def make_intergenerational_figure(data, lowerbound, upperbound, rows, title): plt.figure(figsize=(10,10)) plt.suptitle(title,fontsize=20) for index in range(4): plt.subplot(2,2,index+1) #simulation distribution plt.hist(accepted[:,rows[index]], normed=True, bins = range(0,100,5), color = col) #simulation values value = np.mean(accepted[:,rows[index]]) std = 2*np.std(accepted[:,rows[index]]) plt.errorbar((value,), (red_marker_location-0.02), xerr=((std,),(std,)), color=col, fmt='o', linewidth=2, capsize=5, mec = col) #survey values value = data[index] lb = lowerbound[index] ub = upperbound[index] plt.errorbar((value,), (red_marker_location,), xerr=((value-lb,),(ub-value,)), color='r', fmt='o', linewidth=2, capsize=5, mec = 'r') #labeling plt.ylim(0,ylimit) plt.xlim(0,100) #make subplots pretty plt.subplot(2,2,1) plt.title("Males") plt.ylabel("'05\nFrequency") plt.subplot(2,2,2) plt.title("Females") plt.subplot(2,2,3) plt.ylabel("'08\nFrequency") plt.xlabel("Percent Responding Affirmatively") plt.subplot(2,2,4) plt.xlabel("Percent Responding Affirmatively")
def plot_scatter_with_histograms(xvals, yvals, colour='k', oneToOneLine=True, xlabel=None, ylabel=None, title=None): gs = gridspec.GridSpec(5, 5) xmin = np.floor(min(xvals)) xmax = np.ceil(max(xvals)) ymin = np.floor(min(yvals)) ymax = np.ceil(max(yvals)) plt.subplot(gs[1:, 0:4]) plt.plot(xvals, yvals, 'o', color=colour) if xlabel is not None: plt.xlabel(xlabel) if ylabel is not None: plt.ylabel(ylabel) if oneToOneLine: oneToOneMax = max([max(xvals),max(yvals)]) plt.plot([0,oneToOneMax],[0,oneToOneMax],'b--') plt.xlim(xmin,xmax) plt.ylim(ymin,ymax) plt.subplot(gs[0, 0:4]) plt.hist(xvals, np.linspace(xmin,xmax,50)) plt.axis('off') plt.subplot(gs[1:,4]) plt.hist(yvals, np.linspace(ymin,ymax,50), orientation='horizontal') plt.axis('off') if title is not None: plt.suptitle(title)
def t90_dist(self): """ Plots T90 distribution, gives the mean and median T90 values of the sample and calculates the number of short, long bursts in the sample """ t90s = [] for i in range(0,len(self.t90s),1): try: t90s.append(float(self.t90s[i])) except ValueError: continue t90s = np.array(t90s) mean_t90 = np.mean(t90s) median_t90 = np.median(t90s) print('Mean T90 time =',mean_t90,'s') print('Median T90 time=',median_t90,'s') mask = np.ma.masked_where(t90s < 2, t90s) short_t90s = t90s[mask == False] long_t90s = t90s[mask != False] print('Number of Short/Long GRBs =',len(short_t90s),'/',len(long_t90s)) plt.figure() plt.xlabel('T$_{90}$ (s)') plt.ylabel('Number of GRBs') plt.xscale('log') minimum, maximum, = min(short_t90s), max(long_t90s) plt.axvline(mean_t90,color='red',linestyle='-') plt.axvline(median_t90,color='blue',linestyle='-') plt.hist(t90s,bins= 10**np.linspace(np.log10(minimum),np.log10(maximum),20),color='grey',alpha=0.5) plt.show()
def fluence_dist(self): """ Plots the fluence distribution and gives the mean and median fluence values of the sample """ fluences = [] for i in range(0,len(self.fluences),1): try: fluences.append(float(self.fluences[i])) except ValueError: continue fluences = np.array(fluences) mean_fluence = np.mean(fluences) median_fluence = np.median(fluences) print('Mean Fluence =',mean_fluence,'(15-150 keV) [10^-7 erg cm^-2]') print('Median Fluence =',median_fluence,'(15-150 keV) [10^-7 erg cm^-2]') plt.figure() plt.xlabel('Fluence (15-150 keV) [$10^{-7}$ erg cm$^{-2}$]') plt.ylabel('Number of GRBs') plt.xscale('log') minimum, maximum = min(fluences), max(fluences) plt.axvline(mean_fluence,color='red',linestyle='-') plt.axvline(median_fluence,color='blue',linestyle='-') plt.hist(fluences,bins= 10**np.linspace(np.log10(minimum),np.log10(maximum),20),color='grey',alpha=0.5) plt.show()
def plot_event_histogram(events, plot_type): from matplotlib.dates import date2num, num2date from matplotlib import ticker plt.figure(figsize=(12, 4)) values = [] for event in events: if plot_type == "depth": values.append(event["depth_in_km"]) elif plot_type == "time": values.append(date2num(event["origin_time"].datetime)) plt.hist(values, bins=250) if plot_type == "time": plt.gca().xaxis.set_major_formatter(ticker.FuncFormatter( lambda numdate, _: num2date(numdate).strftime('%Y-%d-%m'))) plt.gcf().autofmt_xdate() plt.xlabel("Origin time (UTC)") plt.title("Origin time distribution (%i events)" % len(events)) elif plot_type == "depth": plt.xlabel("Event depth in km") plt.title("Hypocenter depth distribution (%i events)" % len(events)) plt.tight_layout()
def csv_dict_reader(file_obj): reader = csv.DictReader(file_obj, delimiter=',') mylist = [] for line in reader: mylist.append(len(line["Asthma"])) #print mylist plt.hist(mylist)
def main(): # produce gaussian noise and show as standard color coded image ar = np.random.randn(100, 200) plt.imshow(ar) plt.colorbar(shrink=.5) plt.title('Gaussian noise color coded') plt.show() # show as grayscale coded image plt.imshow(ar, cmap=cm.gray) plt.colorbar(shrink=.5) plt.title('Gaussian noise grayscale coded') plt.show() # flatten and show as histogram ar_flat = ar.reshape(100 * 200) plt.hist(ar_flat) plt.title('Gaussian noise histogram') plt.show() # show sin(x)*sin(y) sample l = 100 sin_x = np.sin(range(l)) array = np.asarray([sin_x * x for x in sin_x]) plt.imshow(array, interpolation = None) plt.colorbar() plt.title('sin(x)sin(y)') plt.show() # replace al negative values by 0 array_pos = np.maximum(array, 0) plt.imshow(array_pos, interpolation = None) plt.colorbar() plt.title('sin(x)sin(y) only positive') plt.show()
def plotHistogram(self, pixel_array, bins=100): if self.mask == None: self.mask = self.makeArrayMask() plot.hist(pixel_array[self.mask], histtype='step', bins=bins) plot.xlabel('RSP') plot.title(self.name) plot.show()
def test_power(): a = 5. # shape samples = 10000 s1 = np.random.power(a, samples) s2 = common.rand_pow_array(a, samples) plt.figure('power test') count1, bins1, ignored1 = plt.hist(s1, bins=30, label='numpy', histtype='step') x = np.linspace(0, 1, 100) y = a * x**(a - 1.0) normed_y1 = samples * np.diff(bins1)[0] * y plt.plot(x, normed_y1, label='numpy.random.power fit') count2, bins2, ignored2 = plt.hist(s2, bins=30, label='joinmarket', histtype='step') normed_y2 = samples * np.diff(bins2)[0] * y plt.plot(x, normed_y2, label='common.rand_pow_array fit') plt.title('testing power distribution') plt.legend(loc='upper left') plt.show()
def testProbabilites(self): pmf=PMFList(10) count={} times=10000 exp=re.compile('\d+') o=[] for i in range(0,times): sel=pmf.choose() o.append(int(exp.search(sel[0]).group())) try: count[sel]=count[sel]+1 except KeyError: count[sel]=1 print("-------------------------------\n"+ "Results table of the PMF test:\n"+ "-------------------------------") for k,i in count.items(): print("Item name: "+k[0]+" | Item probability: "+ k[1].__str__()+" | Ocurrences: "+i.__str__()+ " | Item empirical probability ("+ times.__str__()+" runs)"+ ": "+(float(i)/times).__str__()) plt.hist(o) plt.xlabel('Item number (1-'+len(count).__str__()+')') plt.ylabel('Number of occurrences') plt.show()
def createHistogram(df, pic, bins=45, rates=False): data=mergeMatrix(df, pic) matrix=sortMatrix(df, pic) density = gaussian_kde(data) xs = np.linspace(min(data), max(data), max(data)) density.covariance_factor = lambda : .25 density._compute_covariance() #xs = np.linspace(min(data), max(data), 1000) fig,ax1 = plt.subplots() #plt.xlim([0, 4000]) plt.hist(data, bins=bins, range=[-500, 4000], histtype='stepfilled', color='grey', alpha=0.5) lims = plt.ylim() height=lims[1]-2 for i in range(0,len(matrix)): currentRow = matrix[i][np.nonzero(matrix[i])] plt.plot(currentRow, np.ones(len(currentRow))*height, '|', color='black') height -= 2 plt.axvline(x=0, color='red', linestyle='dashed') #plt.axvline(x=1000, color='black', linestyle='dashed') #plt.axvline(x=2000, color='black', linestyle='dashed') #plt.axvline(x=3000, color='black', linestyle='dashed') if rates: rates = get_rate(df, pic) ax1.text(-250, 4, str(rates[0]), size=15, ha='center', va='center', color='green') ax1.text(500, 4, str(rates[1]), size=15, ha='center', va='center', color='green') ax1.text(1500, 4, str(rates[2]), size=15, ha='center', va='center', color='green') ax1.text(2500, 4, str(rates[3]), size=15, ha='center', va='center', color='green') ax1.text(3500, 4, str(rates[4])+ r' $\frac{\mathsf{Spikes}}{\mathsf{s}}$', size=15, ha='center', va='center', color='green') plt.ylim([0,lims[1]+5]) plt.xlim([0, 4000]) plt.title('Histogram for ' + str(pic)) ax1.set_xticklabels([-500, 'Start\nStimulus', 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000]) plt.xlabel('Time (ms)') plt.ylabel('Counts (Spikes)') print lims arr_hand = getPic(pic) imagebox = OffsetImage(arr_hand, zoom=.3) xy = [3200, lims[1]+5] # coordinates to position this image ab = AnnotationBbox(imagebox, xy, xybox=(30., -30.), xycoords='data',boxcoords="offset points") ax1.add_artist(ab) ax2 = ax1.twinx() #Necessary for multiple y-axes #Use ax2.plot to draw the hypnogram. Be sure your x values are in seconds ax2.plot(xs, density(xs) , 'g', drawstyle='steps') plt.ylim([0,0.001]) plt.yticks([0.0001,0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008, 0.0009]) ax2.set_yticklabels([1,2,3,4, 5, 6, 7, 8, 9]) plt.ylabel(r'Density ($\cdot \mathsf{10^{-4}}$)', color='green') plt.gcf().subplots_adjust(right=0.89) plt.gcf().subplots_adjust(bottom=0.2) plt.savefig(pic, dpi=150)
def cnt_overall_rating_dis(inputfile, outputfile, bins): ''' Output: 1.Overall mean rating scores and variance of all rating scores; 2.the number of reviews each rating values owns and its probility. ''' rating_cnt_dis = defaultdict(list) data = [float(line[1]) for line in csv.reader(open(inputfile))] plt.hist(data, bins) plt.show() #my_histogram_plot(data, seg_num=100) raw_input() review_num = len(data) avg_rating = np.mean(data) var_rating = np.var(data) writer = csv.writer(open(outputfile, 'w'), lineterminator='\n') writer.writerow(("Average_Rating", "Variance_Rating")) writer.writerow((avg_rating, var_rating)) for rating in data: if rating in rating_cnt_dis: rating_cnt_dis[rating][0] += 1 else: rating_cnt_dis[rating] = [1, 0.0] for rating in rating_cnt_dis: rating_cnt_dis[rating][1] = float(rating_cnt_dis[rating][0])/review_num rows = [[line[0], line[1][0], line[1][1]] for line in rating_cnt_dis.items()] rows = sorted(rows, key=lambda x:float(x[0]), reverse=False) writer.writerow(("Rating", "Num", "Prob")) writer.writerows(rows)
def predicted_probabilities(y_true, y_pred, n_groups=30): """Plots the distribution of predicted probabilities. Parameters ---------- y_true : array_like Observed labels, either 0 or 1. y_pred : array_like Predicted probabilities, floats on [0, 1]. n_groups : int, optional The number of groups to create. The default value is 30. Notes ----- .. plot:: pyplots/predicted_probabilities.py """ plt.hist(y_pred, n_groups) plt.xlim([0, 1]) plt.xlabel('Predicted Probability') plt.ylabel('Count') title = 'Distribution of Predicted Probabilities (n = {})' plt.title(title.format(len(y_pred))) plt.tight_layout()
def CNS(directory): print directory MASegDict = defaultdict(list) seqCount = Counter() numFeatures = defaultdict(list) speciesDistributionMaster = defaultdict(list) for species in [file for file in os.listdir(directory) if file.endswith('.bed')]: try: print directory+species seqCount[species] = 0 speciesDistribution = Counter() with open(directory+species,'r') as f: lines = f.readlines() numFeatures[species] = [len(lines)] if species.endswith('ConservedElements.bed'): for line in lines: if line: lineList = line.split('\t') lineList2 = lineList[-1].split(';') lineList3 = lineList2[1].split(',') tempDict = {word.split(':')[0]:int(word.split(':')[1] != '0') for word in lineList3} MASegDict[lineList2[2].replace('SegmentID=','')] = sum(tempDict.values()) seqCount[species] += int(lineList[2])-int(lineList[1]) for species2 in tempDict.keys(): if species2 not in speciesDistribution.keys(): speciesDistribution[species2] = 0 else: speciesDistribution[species2] += tempDict[species2] else: for line in lines: if line: lineList = line.split('\t') lineList2 = lineList[-1].split(';') lineList3 = lineList2[1].split(',') tempDict = {word.split(':')[0]:int(word.split(':')[1] != '0') for word in lineList3} seqCount[species] += int(lineList[2])-int(lineList[1]) for species2 in tempDict.keys(): if species2 not in speciesDistribution.keys(): speciesDistribution[species2] = 0 else: speciesDistribution[species2] += tempDict[species2] speciesDistributionMaster[species] = speciesDistribution #print speciesDistributionMaster #print numFeatures #print ','.join('%s:%d'%(key,speciesDistributionMaster[species][key]) for key in speciesDistributionMaster[species].keys()) except: print 'Error with ' + species with open(directory+'CNSStatistics.txt','w') as f: for species in sorted(numFeatures.keys()): if species: try: f.write(species+'\nTotalSequenceAmount=%dbps\nNumberOfElements=%d\n%s\n\n'%(seqCount[species],numFeatures[species][0],'SpeciesDistribution='+','.join('%s:%d'%(key,speciesDistributionMaster[species][key]) for key in speciesDistributionMaster[species].keys())))#FIXME Add species number and graph except: print 'Error writing ' + species plt.figure() plt.hist(MASegDict.values(),bins=np.arange(0,int(np.max(MASegDict.values()))) + 0.5) plt.title('Distribution of Number of Species for Conserved Segments') plt.ylabel('Count') plt.xlabel('Number of species in Conserved Segment') plt.savefig(directory+'SpeciesNumberDistribution.png')
def show(self): figure = plt.figure(self.figure_num) num_histograms = len(self.histograms) num_subplots = len(self.subplots) y_dim = 4.0 x_dim = math.ceil((num_subplots + num_histograms)/y_dim) for i in range(len(self.subplots)): title, img = self.subplots[i] print "plotting: " + str(title) print img.shape ax = plt.subplot(x_dim, y_dim, i + 1) format_subplot(ax, img) plt.title(title) plt.imshow(img) for i in range(len(self.histograms)): title, img = self.histograms[i] print "plotting: " + str(title) print img.shape plt.subplot(x_dim,y_dim, num_subplots + i + 1) plt.title(title) plt.hist(img, bins=10, alpha=0.5)
def create_random_sample_from_beta(success, total, sample_size=10000, plot=False): """ Create random sample from the Beta distribution """ failures = total - success data = stats.beta.rvs(success, failures, size=sample_size) if plot: hist(data, 100); show() return data
def plotHist(data, bins=None, figsize=(7,7), title="", **kwargs): if (bins==None): bins=len(data) plt.figure(figsize=figsize); plt.hist(data,bins=bins, **kwargs) plt.title(title) plt.show()
def plotter(fromdat,filename): plt.figure() bins = fromdat.bins plt.hist(fromdat.all_val, bins=bins, color=(0, 0, 0, 1 ), histtype='step',label = 'All Hits' ) plt.ylabel('Counts' ) plt.xlabel('Energy kev' ) plt.title('All Detectors Spectrum\n'+ filename ) plt.legend(loc='upper right' ) plt.show() plt.figure() his_det1 = plt.hist(fromdat.det1_val, bins=bins, color=(0, 0, 0, 0.7), histtype='step', label = fromdat.detector1 ) his_det2 = plt.hist(fromdat.det2_val, bins=bins, color=(0, 1, 0, 0.7 ), histtype='step', label = fromdat.detector2 ) plt.ylabel('Counts' ) plt.xlabel('Energy kev' ) plt.title('Overlay Plot Both Spectrum \n ' + filename) plt.legend(loc='upper right' ) plt.show() his_det3 = plt.hist(fromdat.det3_val, bins=bins, color=(0, 0, 0, 0.5 ), histtype='step',label = fromdat.detector3 ) plt.ylabel('Counts' ) plt.xlabel('Energy kev' ) plt.title( fromdat.detector3) plt.legend(loc='upper right' ) plt.show()
label='Train Error') plt.plot(hist['epoch'], hist['val_mse'], label = 'Val Error') plt.ylim([0,20]) plt.legend() plt.show() plot_history(history) loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0) print("Testing set Mean Abs Error: {:5.2f} MPG".format(mae)) test_predictions = model.predict(normed_test_data).flatten() plt.scatter(test_labels, test_predictions) plt.xlabel('True Values [MPG]') plt.ylabel('Predictions [MPG]') plt.axis('equal') plt.axis('square') plt.xlim([0,plt.xlim()[1]]) plt.ylim([0,plt.ylim()[1]]) _ = plt.plot([-100, 100], [-100, 100]) plt.show() error = test_predictions - test_labels plt.hist(error, bins = 25) plt.xlabel("Prediction Error [MPG]") _ = plt.ylabel("Count") plt.show()
def show(self): feature = [1, 2, 3, 4] print feature data = randn(100) print data plt.hist(data)
dataset = pd.read_csv('Ads_CTR_Optimisation.csv') import random N = 10000 d = 10 ads_selected = [] numbers_of_rewards_1 = [0] * d numbers_of_rewards_0 = [0] * d total_reward = 0 for n in range(0, N): ad = 0 max_random = 0 for i in range(0, d): random_beta = random.betavariate(numbers_of_rewards_1[i] + 1, numbers_of_rewards_0[i] + 1) if (random_beta > max_random): max_random = random_beta ad = i ads_selected.append(ad) reward = dataset.values[n, ad] if reward == 1: numbers_of_rewards_1[ad] = numbers_of_rewards_1[ad] + 1 else: numbers_of_rewards_0[ad] = numbers_of_rewards_0[ad] + 1 total_reward = total_reward + reward plt.hist(ads_selected) plt.title('Histogram of ads selections') plt.xlabel('Ads') plt.ylabel('Number of times each ad was selected') plt.show()
histogram_of_classes[key] = dict_object['numInstances']; class_synset_id[key] = dict_object['synsetId']; #get all classes with instances >1000 good_classes = list(); good_classes_pop = list(); desirable_synset_id = list(); for i in histogram_of_classes.keys(): if(histogram_of_classes[i]> 1000): good_classes.append(i); good_classes_pop.append(histogram_of_classes[i]); desirable_synset_id.append(class_synset_id[i]) print(i+', '+str(histogram_of_classes[i])+', '+class_synset_id[i]) ## do an argsort sorted_args = np.argsort(good_classes_pop); for i in range(len(good_classes)): print(str(good_classes[sorted_args[i]])+', '+str(good_classes_pop[sorted_args[i]])+', '+str(desirable_synset_id[sorted_args[i]])) print(len(good_classes)) print(good_classes) print(desirable_synset_id) print(histogram_of_classes.values()) plt.figure(); plt.hist(histogram_of_classes.values()) plt.show()
print "%s: %d" % (position, max(plot_positions[position])) QB = plot_positions['QB'] RB = plot_positions['RB'] WR = plot_positions['WR'] TE = plot_positions['TE'] K = plot_positions['K'] plt.figure(1) n_bins = 20 points_range = [0, 26] plt.subplot(511) plt.hist(QB, n_bins, range=points_range, normed=1, histtype='bar', color='red', label='QB') # ax0.legend(prop={'size': 20}) # ax0.set_title('QB') plt.subplot(512) plt.hist(RB, range=points_range, normed=1, histtype='bar') # ax1.set_title('RB') plt.subplot(513) plt.hist(WR, range=points_range, histtype='step', stacked=True, fill=False) # ax2.set_title('WR') plt.subplot(514) plt.hist(TE, range=points_range, histtype='bar') # ax3.set_title('TE')
q = [] outname = savename + "/v{}t{}".format(iii, ii) for j in range(entries): a, c = next(gen) b = model.predict(a, verbose=0)[:, 0] x = np.append(x, np.array(c[:, 0])) y = np.append(y, b) for i in range(batch_size): if (c[i][0] == 0): g.append(b[i]) else: q.append(b[i]) plt.figure(1) plt.hist(q, bins=50, weights=np.ones_like(q), histtype='step', alpha=0.7, label='quark') plt.hist(g, bins=50, weights=np.ones_like(g), histtype='step', alpha=0.7, label='gluon') plt.legend(loc="upper center") plt.savefig(outname + "out.png") f = open(outname + "out.dat", 'w') f.write(str(q) + "\n") f.write(str(g)) f.close() t_fpr, t_tpr, _ = roc_curve(x, y)
if not os.access(output_dir, os.F_OK): os.makedirs(output_dir) print('-----PROBLEM 1-----') '''1a Plot a histogram of percentages of the income.txt data with 30 bins. Make sure that the bins are weighted using the normed=True option. Make sure your plot has correct x-axis and y-axis labels as well as a plot title.''' print('\nPart a') print('------\n') incomes = np.loadtxt('incomes.txt') fig_name_1a = 'Fig_1a' count, bins, ignored = plt.hist(incomes, 30, normed=True) plt.title('MACSS Graduates\' Incomes', fontsize=20) plt.xlabel('Income') plt.ylabel('Proportion of Incomes') #plt.xlim([40000, 140000]) plt.tight_layout() #plt.show() output_path = os.path.join(output_dir, fig_name_1a) plt.savefig(output_path) print('Saved {}.\n'.format(fig_name_1a)) '''1b Estimate the parameters of the log normal distribution by generalized method of moments. Use the average income and standard deviation of income as your two moments. Use the identity matrix as the weighting matrix Wˆ. Plot the estimated lognormal PDF against the histogram from part (a). Report the value of your GMM
import matplotlib.pyplot as plt import os import pandas as pd from keras.models import model_from_yaml import glob import random import cv2 import numpy as np import time os.chdir('images') df = pd.read_csv('target.csv') plt.hist(df.target, bins=range(100,501)) plt.show() os.chdir('..') os.chdir('models') df = pd.read_csv('nvidia_history.csv') plt.plot(df.loss[1:]) plt.show() yaml_file = open('nvidia.yaml', 'r') loaded_model_yaml = yaml_file.read() yaml_file.close() loaded_model = model_from_yaml(loaded_model_yaml) loaded_model.load_weights('nvidia.h5') loaded_model.compile(loss='mse', optimizer='adam') os.chdir('../images/color') images = glob.glob('*.jpeg') random.shuffle(images)
# box plot of standardized data (using the zscore function). figure(figsize=(12,6)) title('Wine: Boxplot (standarized)') boxplot(zscore(X, ddof=1), attributeNames) xticks(range(1,M+1), attributeNames, rotation=45) # This plot reveals that there are clearly some outliers in the Volatile # acidity, Density, and Alcohol attributes, i.e. attribute number 2, 8, # and 11. # Next, we plot histograms of all attributes. figure(figsize=(14,9)) u = np.floor(np.sqrt(M)); v = np.ceil(float(M)/u) for i in range(M): subplot(u,v,i+1) hist(X[:,i]) xlabel(attributeNames[i]) ylim(0, N) # Make the y-axes equal for improved readability if i%v!=0: yticks([]) if i==0: title('Wine: Histogram') # This confirms our belief about outliers in attributes 2, 8, and 11. # To take a closer look at this, we next plot histograms of the # attributes we suspect contains outliers figure(figsize=(14,9)) m = [1, 7, 10] for i in range(len(m)): subplot(1,len(m),i+1) hist(X[:,m[i]],50) xlabel(attributeNames[m[i]])
def plot_thresholded_qmap(img, coords, output_folder, brain=None, mask=None, thresh=99, map="map", interactive=False): """ Plot the final estimated t1 or t2-maps and threshold voxel intensity at some percentile or an arbitrary intensity """ # flatten image data for calculating threshold img_arr = load(img).get_data().astype(float) img_arr_flat = img_arr.reshape((-1, img_arr.shape[-1])).flatten() # t1 map has negatives - keeping only positive values if nanmin(img_arr_flat) < 0: # too many zeros result in threshold at 0 - even at 99% # calculating threshold after removing -ves and 0s img_arr_flat = img_arr_flat[img_arr_flat > 0] # calculating threshold # if given threshold is specified as percentile if type(thresh) is str: threshold = nanpercentile(img_arr_flat, float(thresh)) titl = 'Voxel distribution ranged between [0, {}] (at {} percentile)'.format( threshold, thresh) # if given threshold is specified as voxel value else: threshold = float(thresh) titl = 'Voxel distribution ranged between [0, {}]'.format(thresh) # removing voxels higher than threshold img_arr_flat_threshold = img_arr_flat[img_arr_flat <= threshold] print("{} - voxels ranging [0, {}] plotted".format(map, threshold, thresh)) # plot distribution w/o thresholding in blue # hist(img_arr_flat) # show() # plot voxel distribution upto threshold hist(img_arr_flat_threshold) title(titl) fig_name = join(output_folder, '{}_vox_dist.pdf'.format(map)) savefig(fig_name, bbox_inches='tight') close() # plot interactive thresholded t2 map image # this is buggy if interactive: html_view = nilplot.view_img( img, brain, cmap=cm.gray, symmetric_cmap=False, # vmin=0, vmax=threshold, threshold=0) fig_name = join(output_folder, '{}_interactive.html'.format(map)) html_view.save_as_html(fig_name) # plot a simple thresholded image normal_view = nilplot.plot_img(img=img, bg_img=brain, cut_coords=coords, cmap=cm.gray, vmax=threshold, vmin=0, colorbar=True) fig_name = join(output_folder, '{}_plot.pdf'.format(map)) normal_view.savefig(fig_name) normal_view.close()
# name = filenamee # sns_plot.figure.suptitle("Test", fontsize=14) plt.xticks() print(sns_plot.get_yticks) plt.yticks(sns_plot.get_yticks(), sns_plot.get_yticks() * 5000) plt.xticks(sns_plot.get_xticks(), sns_plot.get_xticks() / 1) plt.xlabel('Размер пакета, байт', fontsize=8) plt.ylabel('Количество пакетов, %', fontsize=9) # plt.xlabel('Размер пакетов, байт', fontsize=10) # plt.ylabel('Количество пакетов, %', fontsize=11) # plt.savefig(fname=name + '.png', format='png') plt.show() count, bins, ignored = plt.hist(s, 50, density=True, align='mid') x = np.linspace(min(bins), max(bins), 10000) pdf = (A*np.exp(-(np.log(x) - mu)**2 / (2 * sigma**2)) / (x * sigma * np.sqrt(2 * np.pi))) plt.plot(x, pdf, linewidth=3, color='r') plt.minorticks_on() # Теперь можем отдельно задавать внешний вид # вспомогательной сетки: plt.grid(which='minor', color = 'gray', linestyle = ':') plt.xlabel('Размер пакета, байт', fontsize=8) plt.ylabel('Количество пакетов, %', fontsize=9) plt.xticks(sns_plot.get_xticks(), sns_plot.get_xticks() / 1) plt.yticks(sns_plot.get_yticks(), sns_plot.get_yticks() * 5000)
x = np.random.rand(10) #ax3.axis('tight')#这感觉是做了归一化呢 #关于tight的例子,这不是一个好的例子,具体可以参考这个链接,画直方图的时候用到的例子 #https://stackoverflow.com/questions/37558329/matplotlib-set-axis-tight-only-to-x-or-y-axis ax3.plot(x,'m',x.cumsum(),'y',linestyle='--',marker='>') ax3.set_xticklabels('0ABCDef') plt.savefig('test.jpg',dpi=1000,facecolor='red') #基础 图形篇 #直方图 条形图 饼图 散点图 箱型图 #官方例子直方图 np.random.seed(19680801) mu, sigma = 100, 15 x = mu + sigma * np.random.randn(10000) n, bins, patches = plt.hist(x, 50, density=True, facecolor='g', alpha=0.75) #density代替normed=True plt.xlabel('Smarts') plt.ylabel('Probability') plt.title('Histogram of IQ') plt.text(60, .025, r'$\mu=100,\ \sigma=15$') #这个文字的用法还是挺牛逼的,用了latex类似的表达 plt.xlim(40, 160) plt.ylim(0, 0.03) plt.grid(True) plt.show() #某例子条形图 # https://www.cnblogs.com/always-fight/p/9707727.html y = range(1,17) plt.bar(np.arange(16), y, alpha=0.5, width=0.3, color='yellow',edgecolor='red', label='The First Bar', lw=3) plt.bar(np.arange(16)+0.4, y, alpha=0.2, width=0.3, color='green',edgecolor='blue', label='The Second Bar', lw=3) #饼图 plt.pie(np.array([0.4,0.2,0.15,0.2]),labels=['dog','cat','bird','cow'],shadow=True,explode=[0.1,0,0,0],autopct='%0.1f%%')
'date_time', 'price_usd', 'srch_booking_window', 'srch_saturday_night_bool' ]] print(df.info()) print(df['price_usd'].describe()) df = df.loc[df['price_usd'] < 5584] #time series visualizations df.plot(x='date_time', y='price_usd', figsize=(12, 6)) plt.xlabel('Date time') plt.ylabel('Price in USD') plt.title('Time Series of room price by date time of search') a = df.loc[df['srch_saturday_night_bool'] == 0, 'price_usd'] b = df.loc[df['srch_saturday_night_bool'] == 1, 'price_usd'] plt.figure(figsize=(10, 6)) plt.hist(a, bins=50, alpha=0.5, label='Search Non-Sat Night') plt.hist(b, bins=50, alpha=0.5, label='Search Sat Night') plt.legend(loc='upper right') plt.xlabel('Price') plt.ylabel('Count') #k-means clustering data = df[['price_usd', 'srch_booking_window', 'srch_saturday_night_bool']] n_cluster = range(1, 20) kmeans = [cluster.KMeans(n_clusters=i).fit(data) for i in n_cluster] scores = [kmeans[i].score(data) for i in range(len(kmeans))] fig, ax = plt.subplots(figsize=(10, 6)) ax.plot(n_cluster, scores) plt.xlabel('Number of Clusters') plt.ylabel('Score')
plt.ylabel('Exact') plt.title("Correlation between reference and prediction") plt.xlim(199.8,202) plt.ylim(199.8,202) plt.xticks(col_x_ticks) plt.yticks(col_x_ticks) ############################# Plotting_2 ############################# #统计结果展示 TgNN_R2_set=R2_set TgNN_error_l2_set=error_l2_set num_bins = 15 l2_x_ticks = np.arange(0,0.0016, 0.0003) plt.figure(figsize=(6,4)) plt.hist(TgNN_error_l2_set, num_bins) plt.title(r'$Histogram\ \ of\ \ relative\ \ L_2\ \ error$') plt.xticks(l2_x_ticks) num_bins2 = 15 plt.figure(figsize=(6,4)) plt.hist(TgNN_R2_set, num_bins2) plt.title(r'$Histogram\ \ of\ \ R^2\ \ score$') plt.xlim(0.9,1)
plt.tight_layout() plt.show() #data summary pd.set_option('display.float_format', lambda x: '%.3f' % x) train_df.describe() #in above summary i fwe look at the trip duration minimum is 1 second, and the maximum is 3.5million seconds approx 950 hour #so it's not possible to travel for those many hours, it clearly shows that there are some outliers #removing outliers mean = np.mean(train_df.trip_duration) sd = np.std(train_df.trip_duration) train_df = train_df[train_df['trip_duration'] <= mean + 2 * sd] train_df = train_df[train_df['trip_duration'] >= mean - 2 * sd] plt.hist(train_df['trip_duration'].values, bins=100) plt.xlabel('trip_duration') plt.ylabel('number of train records') plt.show() #transform the target trip duration to logarithemic form i.e x ==> log(x+1) train_y = np.log1p(train_df.trip_duration) # Add some features.. like distance between pick up and drop off coordinates #simplify pickupdate feature into more specific features like month, day, weekday etc train_df['distance'] = train_df.apply(lambda r: haversine.haversine( (r['pickup_latitude'], r['pickup_longitude']), (r['dropoff_latitude'], r['dropoff_longitude'])), axis=1) train_df['month'] = train_df.pickup_datetime.dt.month train_df['day'] = train_df.pickup_datetime.dt.day
def makePlots(sess, myDataManipulations): #Fetch operations x = tf.get_default_graph().get_operation_by_name("input/x-input").outputs[0] y = tf.get_default_graph().get_operation_by_name("model/performance/Sigmoid").outputs[0] yTrue = tf.get_default_graph().get_operation_by_name("input/y-input").outputs[0] dropout_prob = tf.get_default_graph().get_operation_by_name("model/dropout_prob").outputs[0] trainingMode = tf.get_default_graph().get_operation_by_name("model/trainingMode").outputs[0] accuracy = tf.get_default_graph().get_operation_by_name("model/performance/accuracy/update_op").outputs[0] features = myDataManipulations.features featuresCopy = np.copy(features) labels = myDataManipulations.labels nData = myDataManipulations.nData #truePDF = myDataManipulations.PDF pT = features[:,0] #Eta = features[:,0] result = sess.run([y, yTrue, accuracy], feed_dict={x: featuresCopy, yTrue: labels, dropout_prob: 0.0, trainingMode: False}) modelResult = result[0] modelResult = np.reshape(modelResult,(1,-1))[0] modelResults = {"training": modelResult} print("Test sample accuracy:",result[2]) print(labels.shape, modelResult.shape) indexesS = labels==1 signalResponse = modelResult[indexesS] indexesB = labels==0 backgroundResponse = modelResult[indexesB] plt.figure(1) plt.hist(signalResponse, bins = 20, label="fake tau") plt.xlabel('Model prediction') plt.ylabel('Events') plt.legend(loc=2) plt.show(block=False) plt.figure(2) plt.hist(backgroundResponse, bins=20, label="true tau") plt.xlabel('Model prediction') plt.ylabel('Events') plt.legend(loc=2) plt.show() print(labels.shape, pT.shape) nData = len(pT) nbins = 8 pT_bins = getBinnedVar('pT', nbins) Eta_bins = getBinnedVar('Eta', nbins) trueFakesPt = getFR_PtHisto(pT, labels, nbins, nData) #trueFakesEta = getFR_EtaHisto(Eta, labels, nbins, nData) print('pT bins: ', pT_bins.size, 'Eta bins: ', Eta_bins.size) #plot model prediction vs. input data histos plt.figure(1) plt.scatter(pT, modelResult, s=1, label = 'Model prediction') plt.scatter(pT_bins, 50*trueFakesPt, label = 'Input N_{fake}/N_{all}') plt.xlabel('pT') plt.legend(loc=0) plt.show() '''
def plot_histogram(f0, bins_='auto'): n, bins, patches = plt.hist(x=f0, bins_='auto', color='#0504aa', alpha=0.7, rwidth=0.85)
unpack=True, usecols=(3, ), skiprows=1) #f = open(tunnel+way+".txt","w") #f.write( str(np.mean(listError))+'\t'+str(np.std(listError))+'\n' ) mu = np.mean(listError) sigma = np.std(listError) if way == "EntranceExit": line = '-' else: line = '-.' count, bins, ignored = plt.hist(listError, 60, normed=True, edgecolor=colors[count2], facecolor="None") plt.plot(bins, 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins - mu)**2 / (2 * sigma**2)), linewidth=2, color=colors[count2], label=tunnel + way, ls=line) plt.xlabel("Error (m)") #plt.xticks(np.arange(-1,10)) #plt.xscale('log') #plt.yscale('log')
optimistic = map(bestPossibleTime, data) exact = map(exactTime,data) incremental = map(incrementalTime,data) randomModel = SynthesisPolicy() randomModel.zeroParameters() #randomPolicy = [ randomModel.rollout(r,L = mode) for r in data for _ in range(10) ] bins = np.logspace(0,5,30) figure = plot.figure(figsize = (6,1.6)) for j,(ys,l) in enumerate([(exact,'sketch'),(optimistic,'oracle'),(policy,'learned policy (ours)')]): ys += [TIMEOUT]*totalFailures plot.subplot(1,3,1 + j) plot.hist(ys, bins, alpha = 0.3, label = l) if j == 1: plot.gca().set_xlabel('time (sec)',fontsize = 9) if j == 0: plot.ylabel('frequency',fontsize = 9) plot.gca().set_xscale("log") plot.gca().set_xticks([10**e for e in range(6) ]) plot.gca().set_xticklabels([ r"$10^%d$"%e if e < 5 else r"$\infty$" for e in range(6) ], fontsize = 9) plot.gca().set_yticklabels([]) plot.gca().set_yticks([]) #plot.legend(fontsize = 9) plot.title(l,fontsize = 9) # Remove timeouts print l,"timeouts or gives the wrong answer",len([y for y in ys if y == TIMEOUT ]),"times" median = np.median(ys) print l," median",median
temp_acc = 0 else: print('TEAM ID:{}, ERROR:{}'.format(student_folder, 'Prediction file is empty!!!')) temp_acc = 0 else: print('TEAM ID:{}, ERROR:{}'.format(student_folder, 'There is no prediction file!!!')) temp_acc = 0 acc_list.append(temp_acc) with open(grade_file,'a') as f_grade_file: f_grade_file.write('{}\t{}\t{}\t{:.3f}\n'.format(student_folder, student1_id, student2_id, temp_acc)) acc_arr = np.array(acc_list) # print(acc_arr.shape) fig = plt.figure() n, bins, patches = plt.hist(acc_arr, 50, facecolor='g', alpha=0.75) plt.xlabel('Top-10 Accuracy') plt.ylabel('Number of submissions') plt.title('Histogram of Top-10 Accuracy') # plt.axis([40, 160, 0, 0.03]) plt.grid(True) plt.show()
data = [1, 4, 5, 6, 9, 9, 9] c = Counter(data) # calculate the number of instances in the list count_sum = sum(c.values()) # Output frequencies for k,v in c.iteritems(): print k, round(float(v) / count_sum, 3) # Boxplot plt.boxplot(data) plt.savefig("boxplot.png") plt.show() # Histogram plt.hist(data, histtype='bar') plt.savefig("histogram.png") plt.show() # QQ plot plt.figure() graph1 = stats.probplot(data, dist="norm", plot=plt) plt.savefig("qq.png") plt.show()
median = np.median(pdf_tprops) outliers = np.arange(0, len(pdf_tprops))[np.logical_or(pdf_tprops < 0.5, np.abs(pdf_tprops - median) / std > 4)] pdf_tprops = np.delete(pdf_tprops, outliers) print("Removed {} outliers".format(len(outliers))) done = len(outliers) == 0 sigma = 1.3e-4 inv_sigma = 1.0 / sigma min_tprop = min(pdf_tprops) max_tprop = max(pdf_tprops) min_plot = min_tprop - 4 * sigma max_plot = max_tprop + 4 * sigma plt.subplot(2, len(pdf_is), pdf_i_i+1) plt.xlim(min_plot, max_plot) plt.hist(pdf_tprops, bins = 100) if pdf_i_i == 0: plt.ylabel("Counts") if True or pdf_i_i == 2: ticks = 3 if pdf_i_i != 1 else 2 tick_spacing = int((max_plot - min_plot) / (ticks - 0.5) / 0.001) * 0.001 if tick_spacing == 0: tick_spacing = 0.001 first_tick = int(((max_tprop + min_tprop) / 2 - tick_spacing) / tick_spacing) * tick_spacing while first_tick < min_plot: first_tick += 0.001 plt.xticks(first_tick + np.arange(ticks) * tick_spacing) # convolve with gaussian to get our estimated pdf min_x = pdf_tprops.min() - 6 * sigma max_x = pdf_tprops.max() + 6 * sigma
gamma_sample = np.random.gamma(2, size=10000) df = pd.DataFrame({ 'normal': normal_sample, 'random': random_sample, 'gamma': gamma_sample }) plt.figure() # create a boxplot of the normal data, assign the output to a variable to supress output _ = plt.boxplot(df['normal'], whis='range') # clear the current figure plt.clf() # plot boxplots for all three of df's columns _ = plt.boxplot([df['normal'], df['random'], df['gamma']], whis='range') plt.figure() _ = plt.hist(df['gamma'], bins=100) plt.figure() plt.boxplot([df['normal'], df['random'], df['gamma']], whis='range') # overlay axis on top of another ax2 = mpl_il.inset_axes(plt.gca(), width='60%', height='40%', loc=2) ax2.hist(df['gamma'], bins=100) ax2.margins(x=0.5) # switch the y axis ticks for ax2 to the right side ax2.yaxis.tick_right() # if `whis` argument isn't passed, boxplot defaults to showing 1.5*interquartile (IQR) whiskers with outliers plt.figure() _ = plt.boxplot([df['normal'], df['random'], df['gamma']]) sns.factorplot('TotalComments', 'TotalVotes', data=yourkernels) plt.show() plt.figure()
nx.write_edgelist(G, 'wholeGraph.csv', data=False) G.number_of_nodes() G.number_of_edges() in_degree = {} out_degree = {} clustering_coefficient = {} for node in list(G.nodes): in_degree[node] = G.in_degree[node] out_degree[node] = G.out_degree[node] clustering_coefficient[node] = nx.clustering(G, node) # 画图 # in degree plt.hist(np.array(list(in_degree.values())), bins=40) plt.xlabel('in degree') plt.ylabel('appearance number') plt.title('distribution of in_degrees') plt.show() max(list(in_degree.values())) # 8061 a = [x for x in list(in_degree.values()) if x > 5] len(a) # out degree plt.hist(np.array(list(out_degree.values())), bins=40) plt.xlabel('out degree') plt.ylabel('appearance number') plt.title('distribution of out_degrees') plt.show()
def main(): # fixed the gpu error config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) epochs = 10 num_classes = 10 # The data, split between train and test sets: (x_train, y_train), (x_test_original, y_test_original) = cifar10.load_data() x_test = x_test_original y_test = y_test_original print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # Convert class vectors to binary class matrices. y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) # reshape to 4 d becaue we build for 4d? #x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1) #x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1) print('x_train shape:', x_train.shape) # quantize the range to q7 x_test = x_test.astype('float32')/255 x_train = x_train.astype('float32')/255 print("data range", x_test.min(), x_test.max()) # build model model = build_model(x_test.shape[1:]) # train model history = train(model, x_train, y_train, x_test.copy(), y_test.copy(), epochs=epochs) # -------- generate weights.h (NNoM model) ---------- # get the best model model = load_model(save_dir) # plotlayer output in keras L = model.layers test_img = x_test[0].reshape(1, x_test.shape[1], x_test.shape[2], x_test.shape[3]) for inx, layer in enumerate(L): # layer loop import matplotlib.pyplot as plt if(model.input == layer.output or 'dropout' in layer.name): continue layer_model = Model(inputs=model.input, outputs=layer.output) features = layer_model.predict(test_img).flatten() layer_name = layer.name.split('/')[0] plt.hist(features, bins=128) plt.savefig("tmp/" +'keras' +str(inx) + layer_name + ".png") plt.close() # generate binary dataset for NNoM validation, 0~1 -> 0~127, q7 (x_test[0]*127).tofile("tmp/input.raw") # generate NNoM model, x_test is the calibration dataset used in quantisation process generate_model(model, x_test[:50], format='hwc', name="weights.h") # --------- for test in CI ---------- # build NNoM os.system("scons") # do inference using NNoM cmd = ".\mnist.exe" if 'win' in sys.platform else "./mnist" if(0 == os.system(cmd)): import matplotlib.pyplot as plt import glob for inx, filename in enumerate(glob.glob('tmp/*.raw')): result = np.fromfile(filename, dtype="int8") plt.hist(result, bins=128) plt.savefig(filename + ".png") plt.close()
medianCycTime = statistics.median(cycleList) meanLeadTime = statistics.mean(leadList) medianLeadTime = statistics.median(leadList) print("Mean cycle time: " + str(meanCycTime)) print("Median cycle time: " + str(medianCycTime)) print("Minimum cycle time: " + str(min(cycleList))) print("Maximum cycle time: " + str(max(cycleList))) print("Mean lead time: " + str(meanLeadTime)) print("Median lead time: " + str(medianLeadTime)) print("Minimum lead time: " + str(min(leadList))) print("Maximum lead time: " + str(max(leadList))) print("Amount of tickets: " + str(len(u_tickets))) plt.figure(1) plt.subplot(211) plt.hist(cycleList) plt.ylabel("Amount of tickets") plt.xlabel("Cycletime in days") plt.subplot(212) plt.hist(leadList) plt.ylabel("Amount of tickets") plt.xlabel("Leadtime in days") plt.show()
def main(params): print(params) json_str = json.dumps(params) params = json.loads(json_str) picurl = "/home/pic/" + str(uuid.uuid1()) + ".jpg" # TODO 改地址 csvurl = "/Users/alanp/Downloads/param/" + "电流概率分布" + str(uuid.uuid1()) + ".csv" # csvurl="/home/csv/"+str(uuid.uuid1())+".csv" f = open(csvurl, 'a', encoding='utf-8-sig') db = pymysql.connect("10.103.244.129", "root", "yang1290", "baas") cursor = db.cursor() vid = params['vehicleId'] f.write("车辆编号:" + str(vid) + "\n") start_date = params['startTime'] end_date = params['endTime'] start_time = ' 00:00:00' end_time = ' 23:59:59' charge = [] discharge = [] sql = "SELECT Current FROM driving_log WHERE vehicle_id=%d AND time >= '%s' AND time <= '%s'" % ( vid, start_date + start_time, end_date + end_time) try: cursor.execute(sql) results = cursor.fetchall() for row in results: if row[0] >= 5: discharge.append(int(row[0])) elif row[0] <= -5: charge.append(int(row[0])) except: print("Error: unable to fetch data") rmes_charge = -get_rmes(charge) rmes_discharge = get_rmes(discharge) frequency = [] rates = [] currents = [] print() # f.write("电流," + str(list(range(-350, 360, 10))).replace("[", "").replace("]", "") + "\n") margin = 1 fig = plt.figure() # 充电电流分布 plt.subplot(2, 1, 1) plt.grid() bins = range(-350, 10, 10) plt.xlim(-350, 0) plt.title("车辆编号为 " + str(vid) + " 的充电电流分布 " + str(rmes_charge)) plt.xlabel('电流') plt.ylabel('采集点') prob, left, rectangle = plt.hist(x=charge, bins=bins, normed=False, histtype='bar', color=['r']) prob1, left1, rectangle1 = plt.hist(x=charge, bins=bins, normed=True, histtype='bar', color=['r']) # f.write("频次," + str(list(prob)).replace("[", "").replace("]", "")) currents.extend(list(bins)) frequency.extend(prob) rates.extend(prob1) for x, y in zip(left, prob): plt.text(x + 10 / 2, y, '%d' % y, ha='center', va='bottom') # 放电点流分布 plt.subplot(2, 1, 2) plt.grid() bins = range(0, 360, 10) plt.xlim(0, 350) plt.title("车辆编号为 " + str(vid) + " 的放电电流分布 " + str(rmes_discharge)) plt.xlabel('电流') plt.ylabel('采集点') prob, left, rectangle = plt.hist(x=discharge, bins=bins, normed=False, histtype='bar', color=['blue']) prob2, left2, rectangle2 = plt.hist(x=discharge, bins=bins, normed=True, histtype='bar', color=['blue']) # f.write(str(list(prob)).replace("[", "").replace("]", "") + "\n") currents.extend(list(bins)) frequency.extend(prob) rates.extend(prob2) for x, y in zip(left, prob): # 频次分布数据 normed=False plt.text(x + 10 / 2, y, '%d' % y, ha='center', va='bottom') fig.tight_layout() fig.set_dpi(150) # TODO 取消show,开启保存 plt.show() # plt.savefig(picurl) # f.write("频率," + str(list(prob1)).replace("[", "").replace("]", "") + str(list(prob2)).replace("[", "").replace("]", "")) f.write("均方根值" + "," + str(rmes_charge) + "," + str(rmes_discharge)+"\n") f.write("电流, 频次, 频率\n") for i in range(len(currents)): try: f.write(str(currents[i]) + "," + str(frequency[i]) + "," + str(rates[i]) + "\n") except: continue f.close() return "{\"picurl\":\"" + str(picurl) + "\",\"csvurl\":\"" + str(csvurl) + "\",\"code\":\"0\",\"message\":\"成功\"}"
time_differences_even = [] time_differences_odd = [] for i in range(4): for j in range(4): if j >= i: continue else: expected_time_difference = numpy.array(expected_time_differences)[[i in x[0] and j in x[0] for x in expected_time_differences]][0][1] max_time_difference = numpy.array(max_time_differences)[[i in x[0] and j in x[0] for x in max_time_differences]][0][1] plt.figure() plt.suptitle('Cross Correlation Times Between Antenna %i and %i'%(i,j)) ax = plt.subplot(2,1,1) n, bins, patches = plt.hist(delays_even[:,i,j],label=('Channel %i and %i'%(2*i,2*j)),bins=bins) best_delay_even = (bins[numpy.argmax(n)+1] + bins[numpy.argmax(n)])/2.0 time_differences_even.append(((i,j),best_delay_even)) plt.xlabel('Delay (ns)',fontsize=16) plt.ylabel('Counts',fontsize=16) plt.axvline(expected_time_difference,c='r',linestyle='--',label='Expected Time Difference = %f'%expected_time_difference) plt.axvline(-expected_time_difference,c='r',linestyle='--') plt.axvline(max_time_difference,c='g',linestyle='--',label='max Time Difference = %f'%max_time_difference) plt.axvline(-max_time_difference,c='g',linestyle='--') plt.axvline(best_delay_even,c='c',linestyle='--',label='Best Time Difference = %f'%best_delay_even) plt.legend(fontsize=16) plt.subplot(2,1,2,sharex=ax)
def plotDataHistogram(x, variableName): n, bins, patches = plt.hist(x) plt.title('Histogram of ' + variableName) plt.show()
print("data/" + sam + "_" + par + "_" + var + "_truth.txt") fig = plt.figure(num=None, figsize=(ratiox, ratioy), dpi=80, facecolor='w', edgecolor='k') binning = np.arange(lower_range, upper_range + 0.001, (upper_range - lower_range) / nbin) binning = np.array([-1000, *binning, 1000]) n, bins, a = plt.hist(ev, label=sam, bins=binning, lw=0.5, color="blue", fill=False, normed=False, range=(lower_range, upper_range), histtype='step') plt.xlim(lower_range, upper_range) plot_error_region2(n, np.sqrt(n), bins, "blue") ax = plt.gca() ax.xaxis.set_minor_locator(ticker.AutoMinorLocator()) plt.grid(alpha=0.5) plt.xlabel(r"$" + var + "(" + par + ")" + r"$") plt.ylabel("N") plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) plt.legend(loc="best") if ("dec" in sam):