def histogram(A, B, nameA, nameB):
	plt.hist(A, bins=255, alpha=0.5, color='b', label = nameA)
	plt.hist(B, bins=255, alpha=0.5, color='r', label = nameB)
	plt.xlabel('Intensity')
	plt.ylabel('Number of occurrencies')
	plt.legend()
	plt.show()
Example #2
0
def groupHourly(dataGroup, names, title, timeShift, stacked=True,show=True):
    plt.gca()
    toPlot = []
    namesShown = []
    for pos in range(len(dataGroup)):
    #for dataIn in dataGroup:
        if len(dataGroup[pos]['data']) > 0:
            data = truncData(dataGroup[pos]['data'],"hour")
            dates = data['created_at']
            dates = [parser.parse(date) for date in dates]
            hour_list = [(t+timedelta(hours=timeShift)).hour for t in dates]
            toPlot.append(hour_list)
            namesShown.append(names[pos])
            numbers=[x for x in xrange(0,25)]
            labels=map(lambda x: str(x), numbers)
            plt.xticks(numbers, labels)
            plt.xlabel("Hour (GMT %s)" % timeShift)
            plt.ylabel("Tweets")
    if len(namesShown) != 0:
        plt.title(title,size = 12)
        plt.hist(toPlot,bins=numbers,stacked=stacked, alpha=0.5, label=names, align='mid')
        plt.legend(namesShown,"best")
        if show:
            plt.show()
    return plt
Example #3
0
def compare_chebhist(dname, mylambda, c, Nbin = 25):


    if mylambda == 'Do not exist':
        print('--!!Warning: eig file does not exist, can not display compare histgram')
    else:
        mylambda = 1 - mylambda
        lmin = max(min(mylambda), -1)
        lmax = min(max(mylambda),  1)

        # print c
        cheb_file_content = '\n'.join([str(st) for st in c])
        x = np.linspace(lmin, lmax, Nbin + 1)
        y = plot_chebint(c, x)
        u = (x[1:] + x[:-1]) / 2
        v =  y[1:] - y[:-1]

        plt.clf()
        plt.hold(True)
        plt.hist(mylambda,Nbin)
        plt.plot(u, v, "r.", markersize=10)
        plt.hold(False)
        plt.show()
        filename = 'data/' + dname + '.png'
        plt.savefig(filename)

        cheb_filename = 'data/' + dname + '.cheb'
        f = open(cheb_filename, 'w+')
        f.write(cheb_file_content)
        f.close()
def main():
    correlations = []
    ntrials = 100
    for i in range(ntrials):
        print i
        # print "Generating Voters..."
        voters = generate_voters(N)

        # print "Generating Graphs..."
        G = generate_graph(voters)
        rating_graph = G.copy()
        
        H = generate_similarity_graph(G)

        # print "Normalizing Edges..."
        normalize_edges(H)

        # print "Calculating credibility scores"
        cred_graph = assign_credibility(H)

        correlations.append(calculate_correlation(rating_graph,cred_graph))

    plt.hist(correlations, bins=[-1+0.05*i for i in range(40)])

    plt.show()
    print "DONE"
def make_histograms(truescores, fpscores, title):
	plt.hist(truescores, normed=True, histtype='step', linewidth=3, label="True Events, %d" % len(truescores))	
	plt.hist(fpscores, normed=True, histtype='step', linewidth=3, label="FP Events, %d" % len(fpscores))
	plt.legend()
	plt.xlabel("Likelihood Score")
	plt.ylabel("Density of events")
	plt.title("Histogram of TP and FP Event Likelihoods")	
Example #6
0
def hist(fname, data, bins, xlabel, ylabel, title, facecolor='green', alpha=0.5, transparent=True, **kwargs):
    plt.clf()
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.hist(x=data, bins=bins, facecolor=facecolor, alpha=alpha, **kwargs)
    plt.savefig(fname, transparent=transparent)
Example #7
0
def histograma_confianza(promedios_1, promedios_2, lim_min_1, lim_max_1,
                         lim_min_2, lim_max_2):
    '''
    grafica los dos histogramas para pendiente y coeficiente de posicion
    es una funcion circunstancial, se puede mejorar para graficar de forma
    generica
    '''
    fig2, ax2 = plt.subplots()
    plt.hist(promedios_1, bins=40)
    plt.axvline(aprox1[0], color='r', label="Pendiente del Ajuste")
    plt.axvline(lim_min_1, color='g', label="Intervalo de Confianza")
    plt.axvline(lim_max_1, color='g')
    plt.legend()
    ax2.set_xlabel("Pendiente")
    ax2.set_ylabel("Frecuencia")
    plt.savefig("histograma_parte3_pendiente.png")
    fig3, ax3 = plt.subplots()
    plt.hist(promedios_2, bins=40)
    plt.axvline(aprox1[1], color='r', label="Coef de Posicion del Ajuste")
    plt.axvline(lim_min_2, color='g', label="Intervalo de Confianza")
    plt.axvline(lim_max_2, color='g')
    plt.legend(loc=2)
    ax3.set_xlabel("Coeficiente de Posicion")
    ax3.set_ylabel("Frecuencia")
    plt.savefig("histograma_parte3_coefdeposicion.png")
def plot_ekf_vs_mc():

    def fx(x):
        return x**3

    def dfx(x):
        return 3*x**2

    mean = 1
    var = .1
    std = math.sqrt(var)

    data = normal(loc=mean, scale=std, size=50000)
    d_t = fx(data)

    mean_ekf = fx(mean)

    slope = dfx(mean)
    std_ekf = abs(slope*std)


    norm = scipy.stats.norm(mean_ekf, std_ekf)
    xs = np.linspace(-3, 5, 200)
    plt.plot(xs, norm.pdf(xs), lw=2, ls='--', color='b')
    plt.hist(d_t, bins=200, normed=True, histtype='step', lw=2, color='g')

    actual_mean = d_t.mean()
    plt.axvline(actual_mean, lw=2, color='g', label='Monte Carlo')
    plt.axvline(mean_ekf, lw=2, ls='--', color='b', label='EKF')
    plt.legend()
    plt.show()

    print('actual mean={:.2f}, std={:.2f}'.format(d_t.mean(), d_t.std()))
    print('EKF    mean={:.2f}, std={:.2f}'.format(mean_ekf, std_ekf))
Example #9
0
def plot_tothist(infile, tot, maxy, binsize=3):
   """
   Plot the total-score histogram, where the total score (tot) has been
   previous calculated or read-in by the input functions
   """

   """ Calculate moments of the distribution """
   mn = tot.mean()
   med = np.median(tot)
   mp = tot.mean() + tot.std()
   mm = tot.mean() - tot.std()

   """ Report on the properties of the distibution """
   print ""
   print "Statistics for %s" % infile
   print "---------------------------------"
   print "  Mean:         %5.1f" % mn
   print "  Median:       %5.1f" % med
   print "  Sigma:        %5.1f" % tot.std()
   print "  Mean - 1 sig: %5.1f" % mm
   print "  Mean + 1 sig: %5.1f" % mp
   print ""

   """ Plot the distribution """
   binhist = range(int(tot.min())-1,int(tot.max())+3,binsize)
   plt.hist(tot,binhist,histtype='step',ec='k')
   plt.ylim(0,maxy)
   plt.axvline(x=mn, ymin=0, ymax=maxy, c='r', lw=3)
   plt.axvline(x=mm, ymin=0, ymax=maxy, c='b', lw=3)
   plt.axvline(x=mp, ymin=0, ymax=maxy, c='b', lw=3)
   plt.title("Distribution of scores for %s" % infile)
   plt.xlabel("Scores")
   plt.ylabel("N")
   plt.show()
Example #10
0
def plot_net_distribution(net_mat, n_bins):
    """Plot the network distribution.

    Parameters
    ----------
    net_mat: np.ndarray
        the net represented in a matrix way.
    n_bins: int
        the number of intervals we want to use to plot the distribution.

    Returns
    -------
    fig: matplotlib.pyplot.figure
        the figure of the distribution required of the relations between
        elements defined by the `net_mat`.

    """
    net_mat = net_mat.reshape(-1)

    fig = plt.figure()
    plt.hist(net_mat, n_bins)

    l1 = plt.axvline(net_mat.mean(), linewidth=2, color='k', label='Mean',
                     linestyle='--')
    plt.legend([l1], ['Mean'])

    return fig
Example #11
0
def createResponsePlot(dataframe,plotdir):
    mag = dataframe['MAGPDE'].as_matrix()
    response = (dataframe['TFIRSTPUB'].as_matrix())/60.0
    response[response > 60] = 60 #anything over 60 minutes capped at 6 minutes
    imag5 = (mag >= 5.0).nonzero()[0]
    imag55 = (mag >= 5.5).nonzero()[0]
    fig = plt.figure(figsize=(8,6))
    n,bins,patches = plt.hist(response[imag5],color='g',bins=60,range=(0,60))
    plt.hold(True)
    plt.hist(response[imag55],color='b',bins=60,range=(0,60))
    plt.xlabel('Response Time (min)')
    plt.ylabel('Number of earthquakes')
    plt.xticks(np.arange(0,65,5))
    ymax = text.ceilToNearest(max(n),10)
    yinc = ymax/10
    plt.yticks(np.arange(0,ymax+yinc,yinc))
    plt.grid(True,which='both')
    plt.hold(True)
    x = [20,20]
    y = [0,ymax]
    plt.plot(x,y,'r',linewidth=2,zorder=10)
    s1 = 'Magnitude 5.0, Events = %i' % (len(imag5))
    s2 = 'Magnitude 5.5, Events = %i' % (len(imag55))
    plt.text(35,.85*ymax,s1,color='g')
    plt.text(35,.75*ymax,s2,color='b')
    plt.savefig(os.path.join(plotdir,'response.pdf'))
    plt.savefig(os.path.join(plotdir,'response.png'))
    plt.close()
    print 'Saving response.pdf'
def main():
    train = pd.DataFrame.from_csv('train.csv')
    places_index = train['place_id'].values

    places_loc_sqr_wei = []
    for i, place_id in enumerate(train['place_id'].unique()):
        if not i % 100:
            print(i)
        place_df = train.iloc[places_index == place_id]
        place_weights_acc_sqred = 1 / (place_df['accuracy'].values ** 2)

        places_loc_sqr_wei.append([place_id,
                                   np.average(place_df['x'].values, weights=place_weights_acc_sqred),
                                   np.std(place_df['x'].values),
                                   np.average(place_df['y'].values, weights=place_weights_acc_sqred),
                                   np.std(place_df['y'].values),
                                   np.average(np.log(place_df['accuracy'].values)),
                                   np.std(np.log(place_df['accuracy'].values)),
                                   place_df.shape[0]])

        # print(places_loc_sqr_wei[-1])
        # plt.hist2d(place_df['x'].values, place_df['y'].values, bins=100)
        # plt.show()
        plt.hist(np.log(place_df['accuracy'].values), bins=20)
        plt.show()
    places_loc_sqr_wei = np.array(places_loc_sqr_wei)
    column_names = ['x_mean', 'x_sd', 'y_mean', 'y_sd', 'accuracy_mean', 'accuracy_sd', 'n_persons']
    places_loc_sqr_wei = pd.DataFrame(data=places_loc_sqr_wei[:, 1:], index=places_loc_sqr_wei[:, 0],
                                      columns=column_names)

    now = str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M"))
    places_loc_sqr_wei.to_csv('places_loc_sqr_weights_%s.csv' % now)
def make_intergenerational_figure(data, lowerbound, upperbound, rows, title):
    plt.figure(figsize=(10,10))
    plt.suptitle(title,fontsize=20)
    for index in range(4):
        plt.subplot(2,2,index+1)    
        #simulation distribution
        plt.hist(accepted[:,rows[index]], normed=True, bins = range(0,100,5), color = col)
        #simulation values
        value = np.mean(accepted[:,rows[index]])
        std = 2*np.std(accepted[:,rows[index]])
        plt.errorbar((value,), (red_marker_location-0.02), xerr=((std,),(std,)),
                     color=col, fmt='o', linewidth=2, capsize=5, mec = col)
        #survey values
        value = data[index]
        lb = lowerbound[index]
        ub = upperbound[index]
        plt.errorbar((value,), (red_marker_location,), xerr=((value-lb,),(ub-value,)),
                     color='r', fmt='o', linewidth=2, capsize=5, mec = 'r')
        #labeling    
        plt.ylim(0,ylimit)
        plt.xlim(0,100)
    #make subplots pretty
    plt.subplot(2,2,1)
    plt.title("Males")
    plt.ylabel("'05\nFrequency")
    plt.subplot(2,2,2)
    plt.title("Females")
    plt.subplot(2,2,3)
    plt.ylabel("'08\nFrequency")
    plt.xlabel("Percent Responding Affirmatively")
    plt.subplot(2,2,4)
    plt.xlabel("Percent Responding Affirmatively")
Example #14
0
def plot_scatter_with_histograms(xvals, yvals, colour='k', oneToOneLine=True, xlabel=None, ylabel=None, title=None):
    gs = gridspec.GridSpec(5, 5)
    xmin = np.floor(min(xvals))
    xmax = np.ceil(max(xvals))
    ymin = np.floor(min(yvals))
    ymax = np.ceil(max(yvals))
    plt.subplot(gs[1:, 0:4])
    plt.plot(xvals, yvals, 'o', color=colour)
    if xlabel is not None:
        plt.xlabel(xlabel)
    if ylabel is not None:
        plt.ylabel(ylabel)
    if oneToOneLine:
        oneToOneMax = max([max(xvals),max(yvals)])
        plt.plot([0,oneToOneMax],[0,oneToOneMax],'b--')
    plt.xlim(xmin,xmax)
    plt.ylim(ymin,ymax)
    plt.subplot(gs[0, 0:4])
    plt.hist(xvals, np.linspace(xmin,xmax,50))
    plt.axis('off')
    plt.subplot(gs[1:,4])
    plt.hist(yvals, np.linspace(ymin,ymax,50), orientation='horizontal')
    plt.axis('off')
    if title is not None:
        plt.suptitle(title)
Example #15
0
    def t90_dist(self):
        """ Plots T90 distribution, gives the mean and median T90 values of the
        sample and calculates the number of short, long bursts in the sample """
        t90s = []
        for i in range(0,len(self.t90s),1):
            try:
                t90s.append(float(self.t90s[i]))

            except ValueError:
                continue

        t90s = np.array(t90s)
        mean_t90 = np.mean(t90s)
        median_t90 = np.median(t90s)
        print('Mean T90 time =',mean_t90,'s')
        print('Median T90 time=',median_t90,'s')
        mask = np.ma.masked_where(t90s < 2, t90s)
        short_t90s = t90s[mask == False]
        long_t90s = t90s[mask != False]
        print('Number of Short/Long GRBs =',len(short_t90s),'/',len(long_t90s))

        plt.figure()
        plt.xlabel('T$_{90}$ (s)')
        plt.ylabel('Number of GRBs')
        plt.xscale('log')
        minimum, maximum, = min(short_t90s), max(long_t90s)
        plt.axvline(mean_t90,color='red',linestyle='-')
        plt.axvline(median_t90,color='blue',linestyle='-')
        plt.hist(t90s,bins= 10**np.linspace(np.log10(minimum),np.log10(maximum),20),color='grey',alpha=0.5)
        plt.show()
Example #16
0
    def fluence_dist(self):
        """ Plots the fluence distribution and gives the mean and median fluence
        values of the sample """
        fluences = []
        for i in range(0,len(self.fluences),1):
            try:
                fluences.append(float(self.fluences[i]))

            except ValueError:
                continue

        fluences = np.array(fluences)
        mean_fluence = np.mean(fluences)
        median_fluence = np.median(fluences)
        print('Mean Fluence =',mean_fluence,'(15-150 keV) [10^-7 erg cm^-2]')
        print('Median Fluence =',median_fluence,'(15-150 keV) [10^-7 erg cm^-2]')

        plt.figure()
        plt.xlabel('Fluence (15-150 keV) [$10^{-7}$ erg cm$^{-2}$]')
        plt.ylabel('Number of GRBs')
        plt.xscale('log')
        minimum, maximum = min(fluences), max(fluences)
        plt.axvline(mean_fluence,color='red',linestyle='-')
        plt.axvline(median_fluence,color='blue',linestyle='-')
        plt.hist(fluences,bins= 10**np.linspace(np.log10(minimum),np.log10(maximum),20),color='grey',alpha=0.5)
        plt.show()
Example #17
0
def plot_event_histogram(events, plot_type):
    from matplotlib.dates import date2num, num2date
    from matplotlib import ticker

    plt.figure(figsize=(12, 4))

    values = []
    for event in events:
        if plot_type == "depth":
            values.append(event["depth_in_km"])
        elif plot_type == "time":
            values.append(date2num(event["origin_time"].datetime))

    plt.hist(values, bins=250)

    if plot_type == "time":
        plt.gca().xaxis.set_major_formatter(ticker.FuncFormatter(
            lambda numdate, _: num2date(numdate).strftime('%Y-%d-%m')))
        plt.gcf().autofmt_xdate()
        plt.xlabel("Origin time (UTC)")
        plt.title("Origin time distribution (%i events)" % len(events))
    elif plot_type == "depth":
        plt.xlabel("Event depth in km")
        plt.title("Hypocenter depth distribution (%i events)" % len(events))

    plt.tight_layout()
Example #18
0
def csv_dict_reader(file_obj):
    reader = csv.DictReader(file_obj, delimiter=',')
    mylist = []
    for line in reader:
        mylist.append(len(line["Asthma"]))
    #print mylist
    plt.hist(mylist)
Example #19
0
def main():
    # produce gaussian noise and show as standard color coded image
    ar = np.random.randn(100, 200)
    plt.imshow(ar)
    plt.colorbar(shrink=.5)
    plt.title('Gaussian noise color coded')
    plt.show()

    # show as grayscale coded image
    plt.imshow(ar, cmap=cm.gray)
    plt.colorbar(shrink=.5)
    plt.title('Gaussian noise grayscale coded')
    plt.show()

    # flatten and show as histogram
    ar_flat = ar.reshape(100 * 200)
    plt.hist(ar_flat)
    plt.title('Gaussian noise histogram')
    plt.show()

    # show sin(x)*sin(y) sample
    l = 100
    sin_x = np.sin(range(l))
    array = np.asarray([sin_x * x for x in sin_x])
    plt.imshow(array, interpolation = None)
    plt.colorbar()
    plt.title('sin(x)sin(y)')
    plt.show()

    # replace al negative values by 0
    array_pos = np.maximum(array, 0)
    plt.imshow(array_pos, interpolation = None)
    plt.colorbar()
    plt.title('sin(x)sin(y) only positive')
    plt.show()
Example #20
0
	def plotHistogram(self, pixel_array, bins=100):
		if self.mask == None:
			self.mask = self.makeArrayMask()
		plot.hist(pixel_array[self.mask], histtype='step', bins=bins)
		plot.xlabel('RSP')
		plot.title(self.name)
		plot.show()
Example #21
0
def test_power():
    a = 5.  # shape
    samples = 10000
    s1 = np.random.power(a, samples)
    s2 = common.rand_pow_array(a, samples)

    plt.figure('power test')
    count1, bins1, ignored1 = plt.hist(s1,
                                       bins=30,
                                       label='numpy',
                                       histtype='step')
    x = np.linspace(0, 1, 100)
    y = a * x**(a - 1.0)
    normed_y1 = samples * np.diff(bins1)[0] * y
    plt.plot(x, normed_y1, label='numpy.random.power fit')

    count2, bins2, ignored2 = plt.hist(s2,
                                       bins=30,
                                       label='joinmarket',
                                       histtype='step')
    normed_y2 = samples * np.diff(bins2)[0] * y
    plt.plot(x, normed_y2, label='common.rand_pow_array fit')
    plt.title('testing power distribution')
    plt.legend(loc='upper left')
    plt.show()
    def testProbabilites(self):

        pmf=PMFList(10)
        count={}
        times=10000
        exp=re.compile('\d+')

        o=[]
        for i in range(0,times):
            
            sel=pmf.choose()
            o.append(int(exp.search(sel[0]).group()))

            try: 
                count[sel]=count[sel]+1
            except KeyError:
                count[sel]=1

        print("-------------------------------\n"+
              "Results table of the PMF test:\n"+
              "-------------------------------")

        for k,i in count.items():
            print("Item name: "+k[0]+" | Item probability: "+
                  k[1].__str__()+" | Ocurrences: "+i.__str__()+
                  " | Item empirical probability ("+
                  times.__str__()+" runs)"+
                  ": "+(float(i)/times).__str__())
            

        plt.hist(o)
        plt.xlabel('Item number (1-'+len(count).__str__()+')')
        plt.ylabel('Number of occurrences')
        plt.show()
def createHistogram(df, pic, bins=45, rates=False):
    data=mergeMatrix(df, pic)
    matrix=sortMatrix(df, pic)


    density = gaussian_kde(data)
    xs = np.linspace(min(data), max(data), max(data))
    density.covariance_factor = lambda : .25
    density._compute_covariance()
    #xs = np.linspace(min(data), max(data), 1000)

    fig,ax1 = plt.subplots()
    #plt.xlim([0, 4000])
    plt.hist(data, bins=bins, range=[-500, 4000], histtype='stepfilled', color='grey', alpha=0.5)
    lims = plt.ylim()
    height=lims[1]-2
    for i in range(0,len(matrix)):
        currentRow = matrix[i][np.nonzero(matrix[i])]
        plt.plot(currentRow, np.ones(len(currentRow))*height, '|', color='black')
        height -= 2

    plt.axvline(x=0, color='red', linestyle='dashed')
    #plt.axvline(x=1000, color='black', linestyle='dashed')
    #plt.axvline(x=2000, color='black', linestyle='dashed')
    #plt.axvline(x=3000, color='black', linestyle='dashed')

    if rates:
        rates = get_rate(df, pic)
        ax1.text(-250, 4, str(rates[0]), size=15, ha='center', va='center', color='green')
        ax1.text(500, 4, str(rates[1]), size=15, ha='center', va='center', color='green')
        ax1.text(1500, 4, str(rates[2]), size=15, ha='center', va='center', color='green')
        ax1.text(2500, 4, str(rates[3]), size=15, ha='center', va='center', color='green')
        ax1.text(3500, 4, str(rates[4])+ r' $\frac{\mathsf{Spikes}}{\mathsf{s}}$', size=15, ha='center', va='center', color='green')
    plt.ylim([0,lims[1]+5])
    plt.xlim([0, 4000])
    plt.title('Histogram for ' + str(pic))
    ax1.set_xticklabels([-500, 'Start\nStimulus', 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000])
    plt.xlabel('Time (ms)')
    plt.ylabel('Counts (Spikes)')


    print lims
    arr_hand = getPic(pic)
    imagebox = OffsetImage(arr_hand, zoom=.3)
    xy = [3200, lims[1]+5]               # coordinates to position this image

    ab = AnnotationBbox(imagebox, xy, xybox=(30., -30.), xycoords='data',boxcoords="offset points")
    ax1.add_artist(ab)

    ax2 = ax1.twinx() #Necessary for multiple y-axes

    #Use ax2.plot to draw the hypnogram.  Be sure your x values are in seconds
    ax2.plot(xs, density(xs) , 'g', drawstyle='steps')
    plt.ylim([0,0.001])
    plt.yticks([0.0001,0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008, 0.0009])
    ax2.set_yticklabels([1,2,3,4, 5, 6, 7, 8, 9])
    plt.ylabel(r'Density ($\cdot \mathsf{10^{-4}}$)', color='green')
    plt.gcf().subplots_adjust(right=0.89)
    plt.gcf().subplots_adjust(bottom=0.2)
    plt.savefig(pic, dpi=150)
Example #24
0
def cnt_overall_rating_dis(inputfile, outputfile, bins):
    '''
        Output: 1.Overall mean rating scores and variance of all rating scores;
        2.the number of reviews each rating values owns and its probility.
    '''
    rating_cnt_dis = defaultdict(list)
    data = [float(line[1]) for line in csv.reader(open(inputfile))]
    plt.hist(data, bins)
    plt.show()
    #my_histogram_plot(data, seg_num=100)
    raw_input()

    review_num = len(data)
    avg_rating = np.mean(data)
    var_rating = np.var(data)
    writer = csv.writer(open(outputfile, 'w'), lineterminator='\n')
    writer.writerow(("Average_Rating", "Variance_Rating"))
    writer.writerow((avg_rating, var_rating))

    for rating in data:
        if rating in rating_cnt_dis:
            rating_cnt_dis[rating][0] += 1
        else:
            rating_cnt_dis[rating] = [1, 0.0]
    for rating in rating_cnt_dis:
        rating_cnt_dis[rating][1] = float(rating_cnt_dis[rating][0])/review_num
    rows = [[line[0], line[1][0], line[1][1]] for line in rating_cnt_dis.items()]
    rows = sorted(rows, key=lambda x:float(x[0]), reverse=False)
    writer.writerow(("Rating", "Num", "Prob"))
    writer.writerows(rows)
Example #25
0
def predicted_probabilities(y_true, y_pred, n_groups=30):
    """Plots the distribution of predicted probabilities.

    Parameters
    ----------
    y_true : array_like
        Observed labels, either 0 or 1.
    y_pred : array_like
        Predicted probabilities, floats on [0, 1].
    n_groups : int, optional
        The number of groups to create. The default value is 30.

    Notes
    -----
    .. plot:: pyplots/predicted_probabilities.py
    """
    plt.hist(y_pred, n_groups)
    plt.xlim([0, 1])
    plt.xlabel('Predicted Probability')
    plt.ylabel('Count')

    title = 'Distribution of Predicted Probabilities (n = {})'
    plt.title(title.format(len(y_pred)))

    plt.tight_layout()
def CNS(directory):
    print directory
    MASegDict = defaultdict(list)
    seqCount = Counter()
    numFeatures = defaultdict(list)
    speciesDistributionMaster = defaultdict(list)
    for species in [file for file in os.listdir(directory) if file.endswith('.bed')]:
        try:
            print directory+species
            seqCount[species] = 0
            speciesDistribution = Counter()
            with open(directory+species,'r') as f:
                lines = f.readlines()
                numFeatures[species] = [len(lines)]
                if species.endswith('ConservedElements.bed'):
                    for line in lines:
                        if line:
                            lineList = line.split('\t')
                            lineList2 = lineList[-1].split(';')
                            lineList3 = lineList2[1].split(',')
                            tempDict = {word.split(':')[0]:int(word.split(':')[1] != '0') for word in lineList3}
                            MASegDict[lineList2[2].replace('SegmentID=','')] = sum(tempDict.values())
                            seqCount[species] += int(lineList[2])-int(lineList[1])
                            for species2 in tempDict.keys():
                                if species2 not in speciesDistribution.keys():
                                    speciesDistribution[species2] = 0
                                else:
                                    speciesDistribution[species2] += tempDict[species2]
                else:
                    for line in lines:
                        if line:
                            lineList = line.split('\t')
                            lineList2 = lineList[-1].split(';')
                            lineList3 = lineList2[1].split(',')
                            tempDict = {word.split(':')[0]:int(word.split(':')[1] != '0') for word in lineList3}
                            seqCount[species] += int(lineList[2])-int(lineList[1])
                            for species2 in tempDict.keys():
                                if species2 not in speciesDistribution.keys():
                                    speciesDistribution[species2] = 0
                                else:
                                    speciesDistribution[species2] += tempDict[species2]
                speciesDistributionMaster[species] = speciesDistribution
                #print speciesDistributionMaster
                #print numFeatures
                #print ','.join('%s:%d'%(key,speciesDistributionMaster[species][key]) for key in speciesDistributionMaster[species].keys())
        except:
            print 'Error with ' + species
    with open(directory+'CNSStatistics.txt','w') as f:
        for species in sorted(numFeatures.keys()):
            if species:
                try:
                    f.write(species+'\nTotalSequenceAmount=%dbps\nNumberOfElements=%d\n%s\n\n'%(seqCount[species],numFeatures[species][0],'SpeciesDistribution='+','.join('%s:%d'%(key,speciesDistributionMaster[species][key]) for key in speciesDistributionMaster[species].keys())))#FIXME Add species number and graph
                except:
                    print 'Error writing ' + species
    plt.figure()
    plt.hist(MASegDict.values(),bins=np.arange(0,int(np.max(MASegDict.values()))) + 0.5)
    plt.title('Distribution of Number of Species for Conserved Segments')
    plt.ylabel('Count')
    plt.xlabel('Number of species in Conserved Segment')
    plt.savefig(directory+'SpeciesNumberDistribution.png')
    def show(self):
        figure = plt.figure(self.figure_num)
        num_histograms = len(self.histograms)
        num_subplots = len(self.subplots)
        y_dim = 4.0
        x_dim = math.ceil((num_subplots + num_histograms)/y_dim)

        for i in range(len(self.subplots)):
            title, img = self.subplots[i]

            print "plotting: " + str(title)
            print img.shape

            ax = plt.subplot(x_dim, y_dim, i + 1)
            format_subplot(ax, img)
            plt.title(title)
            plt.imshow(img)

        for i in range(len(self.histograms)):
            title, img = self.histograms[i]

            print "plotting: " + str(title)
            print img.shape

            plt.subplot(x_dim,y_dim, num_subplots + i + 1)
            plt.title(title)
            plt.hist(img, bins=10, alpha=0.5)
def create_random_sample_from_beta(success, total, sample_size=10000, plot=False):
    """ Create random sample from the Beta distribution """

    failures = total - success
    data = stats.beta.rvs(success, failures, size=sample_size)
    if plot: hist(data, 100); show()
    return data
def plotHist(data, bins=None, figsize=(7,7), title="", **kwargs):
    if (bins==None):
        bins=len(data)
    plt.figure(figsize=figsize);
    plt.hist(data,bins=bins, **kwargs)
    plt.title(title)
    plt.show()
def plotter(fromdat,filename):
    
    plt.figure() 
    bins = fromdat.bins
    plt.hist(fromdat.all_val, bins=bins, color=(0, 0, 0, 1 ),
                 histtype='step',label = 'All Hits' )
    plt.ylabel('Counts' )
    plt.xlabel('Energy kev' )
    plt.title('All Detectors Spectrum\n'+ filename )
    plt.legend(loc='upper right' ) 
    plt.show() 

    plt.figure() 
    his_det1 = plt.hist(fromdat.det1_val, bins=bins, color=(0, 0, 0, 0.7),
                 histtype='step', label = fromdat.detector1 )
   
    his_det2 = plt.hist(fromdat.det2_val, bins=bins, color=(0, 1, 0, 0.7 ),
                 histtype='step', label = fromdat.detector2 )
    plt.ylabel('Counts' )
    plt.xlabel('Energy kev' )
    plt.title('Overlay Plot Both Spectrum \n ' + filename)
    plt.legend(loc='upper right' ) 
    plt.show()

    his_det3 = plt.hist(fromdat.det3_val, bins=bins, color=(0, 0, 0, 0.5 ),
             histtype='step',label = fromdat.detector3 )
    plt.ylabel('Counts' )
    plt.xlabel('Energy kev' )
    plt.title( fromdat.detector3)
    plt.legend(loc='upper right' ) 
    plt.show() 
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mse'],
           label = 'Val Error')
  plt.ylim([0,20])
  plt.legend()
  plt.show()


plot_history(history)

loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0)
print("Testing set Mean Abs Error: {:5.2f} MPG".format(mae))

test_predictions = model.predict(normed_test_data).flatten()

plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])
plt.show()

error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [MPG]")
_ = plt.ylabel("Count")
plt.show()
Example #32
0
 def show(self):
     feature = [1, 2, 3, 4]
     print feature
     data = randn(100)
     print data
     plt.hist(data)
dataset = pd.read_csv('Ads_CTR_Optimisation.csv')

import random
N = 10000
d = 10
ads_selected = []
numbers_of_rewards_1 = [0] * d
numbers_of_rewards_0 = [0] * d
total_reward = 0
for n in range(0, N):
    ad = 0
    max_random = 0
    for i in range(0, d):
        random_beta = random.betavariate(numbers_of_rewards_1[i] + 1,
                                         numbers_of_rewards_0[i] + 1)
        if (random_beta > max_random):
            max_random = random_beta
            ad = i
    ads_selected.append(ad)
    reward = dataset.values[n, ad]
    if reward == 1:
        numbers_of_rewards_1[ad] = numbers_of_rewards_1[ad] + 1
    else:
        numbers_of_rewards_0[ad] = numbers_of_rewards_0[ad] + 1
    total_reward = total_reward + reward
plt.hist(ads_selected)
plt.title('Histogram of ads selections')
plt.xlabel('Ads')
plt.ylabel('Number of times each ad was selected')
plt.show()
    histogram_of_classes[key] = dict_object['numInstances'];
    class_synset_id[key] = dict_object['synsetId'];

#get all classes with instances >1000
good_classes = list(); good_classes_pop = list();
desirable_synset_id = list();
for i in histogram_of_classes.keys():
    if(histogram_of_classes[i]> 1000):
        good_classes.append(i); good_classes_pop.append(histogram_of_classes[i]);
        desirable_synset_id.append(class_synset_id[i])
        print(i+', '+str(histogram_of_classes[i])+', '+class_synset_id[i])

## do an argsort
sorted_args = np.argsort(good_classes_pop);
for i in range(len(good_classes)):
    print(str(good_classes[sorted_args[i]])+', '+str(good_classes_pop[sorted_args[i]])+', '+str(desirable_synset_id[sorted_args[i]]))

print(len(good_classes))
print(good_classes)
print(desirable_synset_id)


print(histogram_of_classes.values())
plt.figure();
plt.hist(histogram_of_classes.values())
plt.show()




Example #35
0
    print "%s: %d" % (position, max(plot_positions[position]))

QB = plot_positions['QB']
RB = plot_positions['RB']
WR = plot_positions['WR']
TE = plot_positions['TE']
K = plot_positions['K']

plt.figure(1)


n_bins = 20
points_range = [0, 26]

plt.subplot(511)
plt.hist(QB, n_bins, range=points_range, normed=1, histtype='bar', color='red', label='QB')
# ax0.legend(prop={'size': 20})
# ax0.set_title('QB')

plt.subplot(512)
plt.hist(RB, range=points_range, normed=1, histtype='bar')
# ax1.set_title('RB')

plt.subplot(513)
plt.hist(WR, range=points_range, histtype='step', stacked=True, fill=False)
# ax2.set_title('WR')

plt.subplot(514)
plt.hist(TE, range=points_range, histtype='bar')
# ax3.set_title('TE')
Example #36
0
 q = []
 outname = savename + "/v{}t{}".format(iii, ii)
 for j in range(entries):
     a, c = next(gen)
     b = model.predict(a, verbose=0)[:, 0]
     x = np.append(x, np.array(c[:, 0]))
     y = np.append(y, b)
     for i in range(batch_size):
         if (c[i][0] == 0):
             g.append(b[i])
         else:
             q.append(b[i])
 plt.figure(1)
 plt.hist(q,
          bins=50,
          weights=np.ones_like(q),
          histtype='step',
          alpha=0.7,
          label='quark')
 plt.hist(g,
          bins=50,
          weights=np.ones_like(g),
          histtype='step',
          alpha=0.7,
          label='gluon')
 plt.legend(loc="upper center")
 plt.savefig(outname + "out.png")
 f = open(outname + "out.dat", 'w')
 f.write(str(q) + "\n")
 f.write(str(g))
 f.close()
 t_fpr, t_tpr, _ = roc_curve(x, y)
Example #37
0
if not os.access(output_dir, os.F_OK):
    os.makedirs(output_dir)

print('-----PROBLEM 1-----')
'''1a
Plot a histogram of percentages of the income.txt data with 30 bins. Make
sure that the bins are weighted using the normed=True option. Make sure your
plot has correct x-axis and y-axis labels as well as a plot title.'''

print('\nPart a')
print('------\n')

incomes = np.loadtxt('incomes.txt')
fig_name_1a = 'Fig_1a'

count, bins, ignored = plt.hist(incomes, 30, normed=True)
plt.title('MACSS Graduates\' Incomes', fontsize=20)
plt.xlabel('Income')
plt.ylabel('Proportion of Incomes')
#plt.xlim([40000, 140000])
plt.tight_layout()
#plt.show()
output_path = os.path.join(output_dir, fig_name_1a)
plt.savefig(output_path)

print('Saved {}.\n'.format(fig_name_1a))
'''1b
Estimate the parameters of the log normal distribution by generalized method
of moments. Use the average income and standard deviation of income as your two
moments. Use the identity matrix as the weighting matrix Wˆ. Plot the estimated
lognormal PDF against the histogram from part (a). Report the value of your GMM
Example #38
0
import matplotlib.pyplot as plt
import os
import pandas as pd
from keras.models import model_from_yaml
import glob
import random
import cv2
import numpy as np
import time

os.chdir('images')
df = pd.read_csv('target.csv')
plt.hist(df.target, bins=range(100,501))
plt.show()

os.chdir('..')
os.chdir('models')
df = pd.read_csv('nvidia_history.csv')
plt.plot(df.loss[1:])
plt.show()

yaml_file = open('nvidia.yaml', 'r')
loaded_model_yaml = yaml_file.read()
yaml_file.close()
loaded_model = model_from_yaml(loaded_model_yaml)
loaded_model.load_weights('nvidia.h5')
loaded_model.compile(loss='mse', optimizer='adam')

os.chdir('../images/color')
images = glob.glob('*.jpeg')
random.shuffle(images)
# box plot of standardized data (using the zscore function).
figure(figsize=(12,6))
title('Wine: Boxplot (standarized)')
boxplot(zscore(X, ddof=1), attributeNames)
xticks(range(1,M+1), attributeNames, rotation=45)

# This plot reveals that there are clearly some outliers in the Volatile
# acidity, Density, and Alcohol attributes, i.e. attribute number 2, 8,
# and 11. 

# Next, we plot histograms of all attributes.
figure(figsize=(14,9))
u = np.floor(np.sqrt(M)); v = np.ceil(float(M)/u)
for i in range(M):
    subplot(u,v,i+1)
    hist(X[:,i])
    xlabel(attributeNames[i])
    ylim(0, N) # Make the y-axes equal for improved readability
    if i%v!=0: yticks([])
    if i==0: title('Wine: Histogram')
    

# This confirms our belief about outliers in attributes 2, 8, and 11.
# To take a closer look at this, we next plot histograms of the 
# attributes we suspect contains outliers
figure(figsize=(14,9))
m = [1, 7, 10]
for i in range(len(m)):
    subplot(1,len(m),i+1)
    hist(X[:,m[i]],50)
    xlabel(attributeNames[m[i]])
Example #40
0
def plot_thresholded_qmap(img,
                          coords,
                          output_folder,
                          brain=None,
                          mask=None,
                          thresh=99,
                          map="map",
                          interactive=False):
    """
    Plot the final estimated t1 or t2-maps
    and threshold voxel intensity at some percentile 
    or an arbitrary intensity
    """
    # flatten image data for calculating threshold
    img_arr = load(img).get_data().astype(float)
    img_arr_flat = img_arr.reshape((-1, img_arr.shape[-1])).flatten()

    # t1 map has negatives - keeping only positive values
    if nanmin(img_arr_flat) < 0:
        # too many zeros result in threshold at 0 - even at 99%
        # calculating threshold after removing -ves and 0s
        img_arr_flat = img_arr_flat[img_arr_flat > 0]

    # calculating threshold
    # if given threshold is specified as percentile
    if type(thresh) is str:
        threshold = nanpercentile(img_arr_flat, float(thresh))
        titl = 'Voxel distribution ranged between [0, {}] (at {} percentile)'.format(
            threshold, thresh)
    # if given threshold is specified as voxel value
    else:
        threshold = float(thresh)
        titl = 'Voxel distribution ranged between [0, {}]'.format(thresh)
    # removing voxels higher than threshold
    img_arr_flat_threshold = img_arr_flat[img_arr_flat <= threshold]
    print("{} - voxels ranging [0, {}] plotted".format(map, threshold, thresh))

    # plot distribution w/o thresholding in blue
    # hist(img_arr_flat)
    # show()

    # plot voxel distribution upto threshold
    hist(img_arr_flat_threshold)
    title(titl)
    fig_name = join(output_folder, '{}_vox_dist.pdf'.format(map))
    savefig(fig_name, bbox_inches='tight')
    close()

    # plot interactive thresholded t2 map image
    # this is buggy
    if interactive:
        html_view = nilplot.view_img(
            img,
            brain,
            cmap=cm.gray,
            symmetric_cmap=False,
            # vmin=0,
            vmax=threshold,
            threshold=0)
        fig_name = join(output_folder, '{}_interactive.html'.format(map))
        html_view.save_as_html(fig_name)

    # plot a simple thresholded image
    normal_view = nilplot.plot_img(img=img,
                                   bg_img=brain,
                                   cut_coords=coords,
                                   cmap=cm.gray,
                                   vmax=threshold,
                                   vmin=0,
                                   colorbar=True)
    fig_name = join(output_folder, '{}_plot.pdf'.format(map))
    normal_view.savefig(fig_name)
    normal_view.close()
# name = filenamee
# sns_plot.figure.suptitle("Test", fontsize=14)

plt.xticks()
print(sns_plot.get_yticks)
plt.yticks(sns_plot.get_yticks(), sns_plot.get_yticks() * 5000)
plt.xticks(sns_plot.get_xticks(), sns_plot.get_xticks() / 1)

plt.xlabel('Размер пакета, байт', fontsize=8)
plt.ylabel('Количество пакетов, %', fontsize=9)
# plt.xlabel('Размер пакетов, байт', fontsize=10)
# plt.ylabel('Количество пакетов, %', fontsize=11)
# plt.savefig(fname=name + '.png', format='png')
plt.show()

count, bins, ignored = plt.hist(s, 50, density=True, align='mid')
x = np.linspace(min(bins), max(bins), 10000)
pdf = (A*np.exp(-(np.log(x) - mu)**2 / (2 * sigma**2)) / (x * sigma * np.sqrt(2 * np.pi)))
plt.plot(x, pdf, linewidth=3, color='r')

plt.minorticks_on()
#  Теперь можем отдельно задавать внешний вид
#  вспомогательной сетки:
plt.grid(which='minor',
        color = 'gray',
        linestyle = ':')
plt.xlabel('Размер пакета, байт', fontsize=8)
plt.ylabel('Количество пакетов, %', fontsize=9)
plt.xticks(sns_plot.get_xticks(), sns_plot.get_xticks() / 1)

plt.yticks(sns_plot.get_yticks(), sns_plot.get_yticks() * 5000)
Example #42
0
x = np.random.rand(10)
#ax3.axis('tight')#这感觉是做了归一化呢
#关于tight的例子,这不是一个好的例子,具体可以参考这个链接,画直方图的时候用到的例子
#https://stackoverflow.com/questions/37558329/matplotlib-set-axis-tight-only-to-x-or-y-axis
ax3.plot(x,'m',x.cumsum(),'y',linestyle='--',marker='>')
ax3.set_xticklabels('0ABCDef')

plt.savefig('test.jpg',dpi=1000,facecolor='red')

#基础 图形篇
#直方图 条形图 饼图 散点图 箱型图
#官方例子直方图
np.random.seed(19680801)
mu, sigma = 100, 15
x = mu + sigma * np.random.randn(10000)
n, bins, patches = plt.hist(x, 50, density=True, facecolor='g', alpha=0.75) #density代替normed=True
plt.xlabel('Smarts')
plt.ylabel('Probability')
plt.title('Histogram of IQ')
plt.text(60, .025, r'$\mu=100,\ \sigma=15$') #这个文字的用法还是挺牛逼的,用了latex类似的表达
plt.xlim(40, 160)
plt.ylim(0, 0.03)
plt.grid(True)
plt.show() 
#某例子条形图
# https://www.cnblogs.com/always-fight/p/9707727.html
y = range(1,17)
plt.bar(np.arange(16), y, alpha=0.5, width=0.3, color='yellow',edgecolor='red', label='The First Bar', lw=3)
plt.bar(np.arange(16)+0.4, y, alpha=0.2, width=0.3, color='green',edgecolor='blue', label='The Second Bar', lw=3)
#饼图
plt.pie(np.array([0.4,0.2,0.15,0.2]),labels=['dog','cat','bird','cow'],shadow=True,explode=[0.1,0,0,0],autopct='%0.1f%%')
Example #43
0
    'date_time', 'price_usd', 'srch_booking_window', 'srch_saturday_night_bool'
]]
print(df.info())
print(df['price_usd'].describe())
df = df.loc[df['price_usd'] < 5584]

#time series visualizations
df.plot(x='date_time', y='price_usd', figsize=(12, 6))
plt.xlabel('Date time')
plt.ylabel('Price in USD')
plt.title('Time Series of room price by date time of search')

a = df.loc[df['srch_saturday_night_bool'] == 0, 'price_usd']
b = df.loc[df['srch_saturday_night_bool'] == 1, 'price_usd']
plt.figure(figsize=(10, 6))
plt.hist(a, bins=50, alpha=0.5, label='Search Non-Sat Night')
plt.hist(b, bins=50, alpha=0.5, label='Search Sat Night')
plt.legend(loc='upper right')
plt.xlabel('Price')
plt.ylabel('Count')

#k-means clustering
data = df[['price_usd', 'srch_booking_window', 'srch_saturday_night_bool']]
n_cluster = range(1, 20)
kmeans = [cluster.KMeans(n_clusters=i).fit(data) for i in n_cluster]
scores = [kmeans[i].score(data) for i in range(len(kmeans))]

fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(n_cluster, scores)
plt.xlabel('Number of Clusters')
plt.ylabel('Score')
plt.ylabel('Exact')
plt.title("Correlation between reference and prediction")
plt.xlim(199.8,202)
plt.ylim(199.8,202)
plt.xticks(col_x_ticks)
plt.yticks(col_x_ticks)


############################# Plotting_2 #############################
#统计结果展示    
TgNN_R2_set=R2_set
TgNN_error_l2_set=error_l2_set
num_bins = 15
l2_x_ticks = np.arange(0,0.0016, 0.0003)
plt.figure(figsize=(6,4))
plt.hist(TgNN_error_l2_set, num_bins)
plt.title(r'$Histogram\ \ of\ \  relative\ \ L_2\ \ error$')
plt.xticks(l2_x_ticks)


num_bins2 = 15
plt.figure(figsize=(6,4))
plt.hist(TgNN_R2_set, num_bins2)
plt.title(r'$Histogram\ \ of\ \  R^2\ \ score$')
plt.xlim(0.9,1)





plt.tight_layout()
plt.show()

#data summary
pd.set_option('display.float_format', lambda x: '%.3f' % x)
train_df.describe()

#in above summary i fwe look at the trip duration minimum is 1 second, and the maximum is 3.5million seconds approx 950 hour
#so it's not possible to travel for those many hours, it clearly shows that there are some outliers
#removing outliers
mean = np.mean(train_df.trip_duration)
sd = np.std(train_df.trip_duration)
train_df = train_df[train_df['trip_duration'] <= mean + 2 * sd]
train_df = train_df[train_df['trip_duration'] >= mean - 2 * sd]

plt.hist(train_df['trip_duration'].values, bins=100)
plt.xlabel('trip_duration')
plt.ylabel('number of train records')
plt.show()

#transform the target trip duration to logarithemic form i.e x ==> log(x+1)
train_y = np.log1p(train_df.trip_duration)

# Add some features.. like distance between pick up and drop off coordinates
#simplify pickupdate feature  into more specific features like month, day, weekday etc
train_df['distance'] = train_df.apply(lambda r: haversine.haversine(
    (r['pickup_latitude'], r['pickup_longitude']),
    (r['dropoff_latitude'], r['dropoff_longitude'])),
                                      axis=1)
train_df['month'] = train_df.pickup_datetime.dt.month
train_df['day'] = train_df.pickup_datetime.dt.day
Example #46
0
def makePlots(sess, myDataManipulations):
    #Fetch operations
    x = tf.get_default_graph().get_operation_by_name("input/x-input").outputs[0]
    y = tf.get_default_graph().get_operation_by_name("model/performance/Sigmoid").outputs[0]
    yTrue = tf.get_default_graph().get_operation_by_name("input/y-input").outputs[0]
    dropout_prob = tf.get_default_graph().get_operation_by_name("model/dropout_prob").outputs[0]
    trainingMode = tf.get_default_graph().get_operation_by_name("model/trainingMode").outputs[0]
    accuracy = tf.get_default_graph().get_operation_by_name("model/performance/accuracy/update_op").outputs[0]

    features = myDataManipulations.features
    featuresCopy = np.copy(features)
    labels = myDataManipulations.labels
    nData = myDataManipulations.nData
    #truePDF = myDataManipulations.PDF

    pT = features[:,0]
    #Eta = features[:,0]

    result = sess.run([y, yTrue, accuracy], feed_dict={x: featuresCopy, yTrue: labels, dropout_prob: 0.0, trainingMode: False})
    modelResult = result[0]
    modelResult = np.reshape(modelResult,(1,-1))[0]

    modelResults = {"training": modelResult}

    print("Test sample accuracy:",result[2])

    print(labels.shape, modelResult.shape)

    indexesS = labels==1
    signalResponse = modelResult[indexesS]

    indexesB = labels==0
    backgroundResponse = modelResult[indexesB]

    plt.figure(1)
    plt.hist(signalResponse, bins = 20, label="fake tau")
    plt.xlabel('Model prediction')
    plt.ylabel('Events')
    plt.legend(loc=2)
    plt.show(block=False)

    plt.figure(2)
    plt.hist(backgroundResponse, bins=20, label="true tau")
    plt.xlabel('Model prediction')
    plt.ylabel('Events')
    plt.legend(loc=2)
    plt.show()

    print(labels.shape, pT.shape)

    nData = len(pT)
    nbins = 8
    pT_bins = getBinnedVar('pT', nbins)
    Eta_bins = getBinnedVar('Eta', nbins)
    trueFakesPt = getFR_PtHisto(pT, labels, nbins, nData)
    #trueFakesEta = getFR_EtaHisto(Eta, labels, nbins, nData)

    print('pT bins: ', pT_bins.size, 'Eta bins: ', Eta_bins.size)

    #plot model prediction vs. input data histos
    plt.figure(1)
    plt.scatter(pT, modelResult, s=1, label = 'Model prediction')
    plt.scatter(pT_bins, 50*trueFakesPt, label = 'Input N_{fake}/N_{all}')
    plt.xlabel('pT')
    plt.legend(loc=0)
    plt.show()
    '''
Example #47
0
def plot_histogram(f0, bins_='auto'):
    n, bins, patches = plt.hist(x=f0,
                                bins_='auto',
                                color='#0504aa',
                                alpha=0.7,
                                rwidth=0.85)
Example #48
0
                               unpack=True,
                               usecols=(3, ),
                               skiprows=1)
        #f = open(tunnel+way+".txt","w")
        #f.write( str(np.mean(listError))+'\t'+str(np.std(listError))+'\n' )
        mu = np.mean(listError)
        sigma = np.std(listError)

        if way == "EntranceExit":
            line = '-'
        else:
            line = '-.'

        count, bins, ignored = plt.hist(listError,
                                        60,
                                        normed=True,
                                        edgecolor=colors[count2],
                                        facecolor="None")

        plt.plot(bins,
                 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-(bins - mu)**2 /
                                                           (2 * sigma**2)),
                 linewidth=2,
                 color=colors[count2],
                 label=tunnel + way,
                 ls=line)

    plt.xlabel("Error (m)")
    #plt.xticks(np.arange(-1,10))
    #plt.xscale('log')
    #plt.yscale('log')
Example #49
0
    
    optimistic = map(bestPossibleTime, data)
    exact = map(exactTime,data)
    incremental = map(incrementalTime,data)

    randomModel = SynthesisPolicy()
    randomModel.zeroParameters()
    #randomPolicy = [ randomModel.rollout(r,L = mode) for r in data for _ in range(10)  ]

    
    bins = np.logspace(0,5,30)
    figure = plot.figure(figsize = (6,1.6))
    for j,(ys,l) in enumerate([(exact,'sketch'),(optimistic,'oracle'),(policy,'learned policy (ours)')]):
        ys += [TIMEOUT]*totalFailures
        plot.subplot(1,3,1 + j)
        plot.hist(ys, bins, alpha = 0.3, label = l)
        if j == 1: plot.gca().set_xlabel('time (sec)',fontsize = 9)
        if j == 0: plot.ylabel('frequency',fontsize = 9)

        plot.gca().set_xscale("log")
        plot.gca().set_xticks([10**e for e in range(6) ])
        plot.gca().set_xticklabels([ r"$10^%d$"%e if e < 5 else r"$\infty$" for e in range(6)  ],
                                   fontsize = 9)
        plot.gca().set_yticklabels([])
        plot.gca().set_yticks([])
        #plot.legend(fontsize = 9)
        plot.title(l,fontsize = 9)
        # Remove timeouts
        print l,"timeouts or gives the wrong answer",len([y for y in ys if y == TIMEOUT ]),"times"
        median = np.median(ys)
        print l," median",median
Example #50
0
				temp_acc = 0

		else:
			print('TEAM ID:{}, ERROR:{}'.format(student_folder, 'Prediction file is empty!!!'))
			temp_acc = 0
	else:
		print('TEAM ID:{}, ERROR:{}'.format(student_folder, 'There is no prediction file!!!'))
		temp_acc = 0
	
	acc_list.append(temp_acc)
	with open(grade_file,'a') as f_grade_file:
		f_grade_file.write('{}\t{}\t{}\t{:.3f}\n'.format(student_folder, student1_id, student2_id, temp_acc))


acc_arr = np.array(acc_list)
# print(acc_arr.shape)

fig = plt.figure()
n, bins, patches = plt.hist(acc_arr, 50, facecolor='g', alpha=0.75)
plt.xlabel('Top-10 Accuracy')
plt.ylabel('Number of submissions')
plt.title('Histogram of Top-10 Accuracy')
# plt.axis([40, 160, 0, 0.03])
plt.grid(True)

plt.show()




Example #51
0
data = [1, 4, 5, 6, 9, 9, 9]

c = Counter(data)

# calculate the number of instances in the list
count_sum = sum(c.values())

# Output frequencies
for k,v in c.iteritems():
    print k, round(float(v) / count_sum, 3)


# Boxplot
plt.boxplot(data)
plt.savefig("boxplot.png")
plt.show()


# Histogram
plt.hist(data, histtype='bar')
plt.savefig("histogram.png")
plt.show()


# QQ plot  
plt.figure()
graph1 = stats.probplot(data, dist="norm", plot=plt)
plt.savefig("qq.png")
plt.show()
Example #52
0
        median = np.median(pdf_tprops)
        outliers = np.arange(0, len(pdf_tprops))[np.logical_or(pdf_tprops < 0.5, np.abs(pdf_tprops - median) / std > 4)]
        pdf_tprops = np.delete(pdf_tprops, outliers)
        print("Removed {} outliers".format(len(outliers)))
        done = len(outliers) == 0

    sigma = 1.3e-4
    inv_sigma = 1.0 / sigma

    min_tprop = min(pdf_tprops)
    max_tprop = max(pdf_tprops)
    min_plot = min_tprop - 4 * sigma
    max_plot = max_tprop + 4 * sigma
    plt.subplot(2, len(pdf_is), pdf_i_i+1)
    plt.xlim(min_plot, max_plot)
    plt.hist(pdf_tprops, bins = 100)
    if pdf_i_i == 0:
        plt.ylabel("Counts")
    if True or pdf_i_i == 2:
        ticks = 3 if pdf_i_i != 1 else 2
        tick_spacing = int((max_plot - min_plot) / (ticks - 0.5) / 0.001) * 0.001
        if tick_spacing == 0:
            tick_spacing = 0.001
        first_tick = int(((max_tprop + min_tprop) / 2 - tick_spacing) / tick_spacing) * tick_spacing
        while first_tick < min_plot:
            first_tick += 0.001
        plt.xticks(first_tick + np.arange(ticks) * tick_spacing)

    # convolve with gaussian to get our estimated pdf
    min_x = pdf_tprops.min() - 6 * sigma
    max_x = pdf_tprops.max() + 6 * sigma
gamma_sample = np.random.gamma(2, size=10000)

df = pd.DataFrame({
    'normal': normal_sample,
    'random': random_sample,
    'gamma': gamma_sample
})
plt.figure()
# create a boxplot of the normal data, assign the output to a variable to supress output
_ = plt.boxplot(df['normal'], whis='range')
# clear the current figure
plt.clf()
# plot boxplots for all three of df's columns
_ = plt.boxplot([df['normal'], df['random'], df['gamma']], whis='range')
plt.figure()
_ = plt.hist(df['gamma'], bins=100)
plt.figure()
plt.boxplot([df['normal'], df['random'], df['gamma']], whis='range')
# overlay axis on top of another
ax2 = mpl_il.inset_axes(plt.gca(), width='60%', height='40%', loc=2)
ax2.hist(df['gamma'], bins=100)
ax2.margins(x=0.5)
# switch the y axis ticks for ax2 to the right side
ax2.yaxis.tick_right()
# if `whis` argument isn't passed, boxplot defaults to showing 1.5*interquartile (IQR) whiskers with outliers
plt.figure()
_ = plt.boxplot([df['normal'], df['random'], df['gamma']])
sns.factorplot('TotalComments', 'TotalVotes', data=yourkernels)
plt.show()
plt.figure()
nx.write_edgelist(G, 'wholeGraph.csv', data=False)

G.number_of_nodes()
G.number_of_edges()

in_degree = {}
out_degree = {}
clustering_coefficient = {}
for node in list(G.nodes):
    in_degree[node] = G.in_degree[node]
    out_degree[node] = G.out_degree[node]
    clustering_coefficient[node] = nx.clustering(G, node)

# 画图
# in degree
plt.hist(np.array(list(in_degree.values())), bins=40)
plt.xlabel('in degree')
plt.ylabel('appearance number')
plt.title('distribution of in_degrees')
plt.show()

max(list(in_degree.values()))  # 8061
a = [x for x in list(in_degree.values()) if x > 5]
len(a)

# out degree
plt.hist(np.array(list(out_degree.values())), bins=40)
plt.xlabel('out degree')
plt.ylabel('appearance number')
plt.title('distribution of out_degrees')
plt.show()
def main():

    # fixed the gpu error
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)


    epochs = 10
    num_classes = 10

    # The data, split between train and test sets:
    (x_train, y_train), (x_test_original, y_test_original) = cifar10.load_data()

    x_test = x_test_original
    y_test = y_test_original
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # Convert class vectors to binary class matrices.
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    # reshape to 4 d becaue we build for 4d?
    #x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
    #x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)
    print('x_train shape:', x_train.shape)

    # quantize the range to q7
    x_test = x_test.astype('float32')/255
    x_train = x_train.astype('float32')/255
    print("data range", x_test.min(), x_test.max())

    # build model
    model = build_model(x_test.shape[1:])

    # train model
    history = train(model, x_train, y_train, x_test.copy(), y_test.copy(), epochs=epochs)

    # -------- generate weights.h (NNoM model) ----------
    # get the best model
    model = load_model(save_dir)

    # plotlayer output in keras
    L = model.layers
    test_img =  x_test[0].reshape(1, x_test.shape[1], x_test.shape[2], x_test.shape[3])
    for inx, layer in enumerate(L):  # layer loop
        import matplotlib.pyplot as plt
        if(model.input == layer.output or
            'dropout' in layer.name):
            continue
        layer_model = Model(inputs=model.input, outputs=layer.output)
        features = layer_model.predict(test_img).flatten()
        layer_name = layer.name.split('/')[0]
        plt.hist(features, bins=128)
        plt.savefig("tmp/" +'keras' +str(inx) + layer_name + ".png")
        plt.close()


    # generate binary dataset for NNoM validation, 0~1 -> 0~127, q7
    (x_test[0]*127).tofile("tmp/input.raw")

    # generate NNoM model, x_test is the calibration dataset used in quantisation process
    generate_model(model,  x_test[:50], format='hwc', name="weights.h")

    # --------- for test in CI ----------
    # build NNoM
    os.system("scons")

    # do inference using NNoM
    cmd = ".\mnist.exe" if 'win' in sys.platform else "./mnist"
    if(0 == os.system(cmd)):
        import matplotlib.pyplot as plt
        import glob
        for inx, filename in enumerate(glob.glob('tmp/*.raw')):
            result = np.fromfile(filename, dtype="int8")
            plt.hist(result, bins=128)
            plt.savefig(filename + ".png")
            plt.close()
medianCycTime = statistics.median(cycleList)

meanLeadTime = statistics.mean(leadList)
medianLeadTime = statistics.median(leadList)

print("Mean cycle time: " + str(meanCycTime))
print("Median cycle time: " + str(medianCycTime))
print("Minimum cycle time: " + str(min(cycleList)))
print("Maximum cycle time: " + str(max(cycleList)))

print("Mean lead time: " + str(meanLeadTime))
print("Median lead time: " + str(medianLeadTime))
print("Minimum lead time: " + str(min(leadList)))
print("Maximum lead time: " + str(max(leadList)))

print("Amount of tickets: " + str(len(u_tickets)))

plt.figure(1)

plt.subplot(211)
plt.hist(cycleList)
plt.ylabel("Amount of tickets")
plt.xlabel("Cycletime in days")

plt.subplot(212)
plt.hist(leadList)
plt.ylabel("Amount of tickets")
plt.xlabel("Leadtime in days")

plt.show()
Example #57
0
def main(params):
    print(params)
    json_str = json.dumps(params)
    params = json.loads(json_str)
    picurl = "/home/pic/" + str(uuid.uuid1()) + ".jpg"
    # TODO 改地址
    csvurl = "/Users/alanp/Downloads/param/" + "电流概率分布" + str(uuid.uuid1()) + ".csv"
    # csvurl="/home/csv/"+str(uuid.uuid1())+".csv"
    f = open(csvurl, 'a', encoding='utf-8-sig')

    db = pymysql.connect("10.103.244.129", "root", "yang1290", "baas")
    cursor = db.cursor()

    vid = params['vehicleId']
    f.write("车辆编号:" + str(vid) + "\n")
    start_date = params['startTime']
    end_date = params['endTime']
    start_time = ' 00:00:00'
    end_time = ' 23:59:59'
    charge = []
    discharge = []

    sql = "SELECT Current FROM driving_log WHERE vehicle_id=%d AND time >= '%s' AND time <= '%s'" % (
        vid, start_date + start_time, end_date + end_time)
    try:
        cursor.execute(sql)
        results = cursor.fetchall()
        for row in results:
            if row[0] >= 5:
                discharge.append(int(row[0]))
            elif row[0] <= -5:
                charge.append(int(row[0]))
    except:
        print("Error: unable to fetch data")

    rmes_charge = -get_rmes(charge)
    rmes_discharge = get_rmes(discharge)

    frequency = []
    rates = []
    currents = []

    print()
    # f.write("电流," + str(list(range(-350, 360, 10))).replace("[", "").replace("]", "") + "\n")
    margin = 1
    fig = plt.figure()
    # 充电电流分布
    plt.subplot(2, 1, 1)
    plt.grid()
    bins = range(-350, 10, 10)
    plt.xlim(-350, 0)
    plt.title("车辆编号为 " + str(vid) + " 的充电电流分布 " + str(rmes_charge))
    plt.xlabel('电流')
    plt.ylabel('采集点')
    prob, left, rectangle = plt.hist(x=charge, bins=bins, normed=False, histtype='bar', color=['r'])
    prob1, left1, rectangle1 = plt.hist(x=charge, bins=bins, normed=True, histtype='bar', color=['r'])
    # f.write("频次," + str(list(prob)).replace("[", "").replace("]", ""))
    currents.extend(list(bins))
    frequency.extend(prob)
    rates.extend(prob1)
    for x, y in zip(left, prob):
        plt.text(x + 10 / 2, y, '%d' % y, ha='center', va='bottom')

    # 放电点流分布
    plt.subplot(2, 1, 2)
    plt.grid()
    bins = range(0, 360, 10)
    plt.xlim(0, 350)
    plt.title("车辆编号为 " + str(vid) + " 的放电电流分布 " + str(rmes_discharge))
    plt.xlabel('电流')
    plt.ylabel('采集点')
    prob, left, rectangle = plt.hist(x=discharge, bins=bins, normed=False, histtype='bar', color=['blue'])
    prob2, left2, rectangle2 = plt.hist(x=discharge, bins=bins, normed=True, histtype='bar', color=['blue'])
    # f.write(str(list(prob)).replace("[", "").replace("]", "") + "\n")
    currents.extend(list(bins))
    frequency.extend(prob)
    rates.extend(prob2)
    for x, y in zip(left, prob):
        # 频次分布数据 normed=False
        plt.text(x + 10 / 2, y, '%d' % y, ha='center', va='bottom')

    fig.tight_layout()
    fig.set_dpi(150)
    # TODO 取消show,开启保存
    plt.show()
    # plt.savefig(picurl)

    # f.write("频率," + str(list(prob1)).replace("[", "").replace("]", "") + str(list(prob2)).replace("[", "").replace("]", ""))
    f.write("均方根值" + "," + str(rmes_charge) + "," + str(rmes_discharge)+"\n")
    f.write("电流, 频次, 频率\n")
    for i in range(len(currents)):
        try:
            f.write(str(currents[i]) + "," + str(frequency[i]) + "," + str(rates[i]) + "\n")
        except:
            continue
    f.close()
    return "{\"picurl\":\"" + str(picurl) + "\",\"csvurl\":\"" + str(csvurl) + "\",\"code\":\"0\",\"message\":\"成功\"}"
            time_differences_even = []
            time_differences_odd = []
            for i in range(4):
                for j in range(4):
                    if j >= i:
                        continue
                    else:
                        expected_time_difference = numpy.array(expected_time_differences)[[i in x[0] and j in x[0] for x in expected_time_differences]][0][1]
                        max_time_difference = numpy.array(max_time_differences)[[i in x[0] and j in x[0] for x in max_time_differences]][0][1]

                        plt.figure()
                        plt.suptitle('Cross Correlation Times Between Antenna %i and %i'%(i,j))

                        ax = plt.subplot(2,1,1)
                        n, bins, patches = plt.hist(delays_even[:,i,j],label=('Channel %i and %i'%(2*i,2*j)),bins=bins)
                        best_delay_even = (bins[numpy.argmax(n)+1] + bins[numpy.argmax(n)])/2.0
                        time_differences_even.append(((i,j),best_delay_even))

                        plt.xlabel('Delay (ns)',fontsize=16)
                        plt.ylabel('Counts',fontsize=16)
                        plt.axvline(expected_time_difference,c='r',linestyle='--',label='Expected Time Difference = %f'%expected_time_difference)
                        plt.axvline(-expected_time_difference,c='r',linestyle='--')
                        plt.axvline(max_time_difference,c='g',linestyle='--',label='max Time Difference = %f'%max_time_difference)
                        plt.axvline(-max_time_difference,c='g',linestyle='--')

                        plt.axvline(best_delay_even,c='c',linestyle='--',label='Best Time Difference = %f'%best_delay_even)
                        plt.legend(fontsize=16)


                        plt.subplot(2,1,2,sharex=ax)
Example #59
0
def plotDataHistogram(x, variableName):
    n, bins, patches = plt.hist(x)
    plt.title('Histogram of ' + variableName)
    plt.show()
Example #60
0
            print("data/" + sam + "_" + par + "_" + var + "_truth.txt")
            fig = plt.figure(num=None,
                             figsize=(ratiox, ratioy),
                             dpi=80,
                             facecolor='w',
                             edgecolor='k')

            binning = np.arange(lower_range, upper_range + 0.001,
                                (upper_range - lower_range) / nbin)
            binning = np.array([-1000, *binning, 1000])

            n, bins, a = plt.hist(ev,
                                  label=sam,
                                  bins=binning,
                                  lw=0.5,
                                  color="blue",
                                  fill=False,
                                  normed=False,
                                  range=(lower_range, upper_range),
                                  histtype='step')
            plt.xlim(lower_range, upper_range)
            plot_error_region2(n, np.sqrt(n), bins, "blue")

            ax = plt.gca()
            ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
            plt.grid(alpha=0.5)
            plt.xlabel(r"$" + var + "(" + par + ")" + r"$")
            plt.ylabel("N")
            plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
            plt.legend(loc="best")
            if ("dec" in sam):