def main():
    criticidad = {'sup': 0, 'med': 0, 'inf': 0}
    promedios = []
    for i in range(EXPERIMENTOS):
        tiempo = 0
        for j in range(CORRIDAS):
            # romper huevos + revolver huevos
            sup = np.random.uniform(2, 4)+np.random.exponential(4)  
            # sumo fin cocinar huevos
            sup += np.random.uniform(2, 4)  
            med = np.random.uniform(6, 12)  # hacer tostadas + tostadas con matequilla
            inf = np.random.uniform(6, 12)  # freir tocino
            valores = {'sup': sup , 'med': med, 'inf': inf}
            maximo = max(valores.values())
            k_maximo = kmaximo(valores, maximo)
            valores[k_maximo] += 1
            criticidad[k_maximo] += 1
            tiempo += valores[k_maximo]
        promedios.append(tiempo/CORRIDAS)
    desv = np.std(promedios)
    promedio = np.average(promedios)
    print "Valores: ", valores.values()
    print "Desvio %s " % desv
    print "Promedio %s" % promedio
    print "Intervalos de confianza %.2f <= u <= %.2f , con un 99%% de confianza" \
        % (promedio - 2.57 * desv, promedio + 2.57 * desv)
    for k, v in criticidad.iteritems():
        print "criticidad %s, %.2f %%" % (k, v*100.00/(CORRIDAS*EXPERIMENTOS))
    hist(promedios,6)
    show()
Exemple #2
0
def compareTwoUsers(data1, data2, outdir):
    """Compares data for two users. Currently plots difference in peaks for
  the users on presslengths for different keycodes."""

    def computePeakDifference(d1, d2):
        edges = findCommonEdges(d1, d2)
        h1, e = np.histogram(d1, bins=edges, normed=True)
        h2, e = np.histogram(d2, bins=edges, normed=True)
        a1, a2 = np.argmax(h1), np.argmax(h2)
        diff = (edges[a1] + edges[a1 + 1] - edges[a2] - edges[a2 + 1]) / 2.0
        return diff

    commonKeys = set(data1.keystrokePLs_key.keys()) & set(data2.keystrokePLs_key.keys())
    peakDiffs = []
    for key in commonKeys:
        dat1 = data1.keystrokePLs_key[key]
        dat2 = data2.keystrokePLs_key[key]
        peakDiffs.append(computePeakDifference(dat1, dat2))
    peakDiffs.append(computePeakDifference(data1.keystrokePLs, data2.keystrokePLs))
    edges = findCommonEdges(peakDiffs)
    plt.figure()
    plt.hist(peakDiffs, bins=edges)
    plt.title("Peak Differences for Keystroke PL for %s and %s" % (data1.user, data2.user))
    plt.xlabel("Time (seconds)")
    plt.savefig("%s/%s_%s_kPLpeakDiff.pdf" % (outdir, data1.user, data2.user))
    plt.close()
    def summary(self, file_idx=0, show_plot=False):

        print "Cluster output"
        s = self.cluster_membership.sum(0)
        nnz = (s>0).sum()
        
        print "Number of non-empty clusters: " + str(nnz) + " (of " + str(s.size) + ")"
        si = (self.cluster_membership).sum(0)
        print
        
        print "Size: count"
        for i in np.arange(0,si.max()+1):
            print str(i) + ": " + str((si==i).sum())
        t = (self.peak_data.possible.multiply(self.cluster_membership)).data
        t -= 1
        print
        
        print "Trans: count"
        for i in np.arange(len(self.peak_data.transformations)):
            print self.peak_data.transformations[i].name + ": " + str((t==i).sum())
        
        if show_plot:
            plt.figure()
            x = []
            cx = self.cluster_model.Z.tocoo()    
            for i,j,v in itertools.izip(cx.row, cx.col, cx.data):
                x.append(v)       
            x = np.array(x) 
    #         x = x[~np.isnan(x)]    
            plt.hist(x, 20)
            plt.title('Precursor mass clustering -- Z for file ' + str(file_idx))
            plt.xlabel('Probabilities')
            plt.ylabel('Count')
            plt.show()
Exemple #4
0
def icsd_progress():
    n = 60
    tasks = Task.objects.filter(project_set='icsd', entry__natoms__lte=n)

    data = tasks.values_list('entry__natoms', 'state')
    done = []
    failed = []
    idle = []
    running = []
    for task in data:
        if task[1] == 2:
            done.append(task[0])
        elif task[1] == 1:
            running.append(task[0])
        elif task[1] == 0:
            idle.append(task[0])
        elif task[1] == -1:
            failed.append(task[0])

    plt.hist([ done, running, failed, idle], histtype='barstacked',
            label=['done', 'running' ,'failed', 'waiting'],
            bins=n)#, cumulative=True)
    plt.legend(loc='best')

    plt.xlabel('# of atoms in primitive cell')
    plt.ylabel('# of entries')

    img = StringIO.StringIO()
    plt.savefig(img, dpi=75, bbox_inches='tight')
    data_uri = 'data:image/jpg;base64,'
    data_uri += img.getvalue().encode('base64').replace('\n', '')
    plt.close()
    return data_uri
Exemple #5
0
 def tst_for_dataset(self, creator, filename):
   from dials.array_family import flex
   from dials.algorithms.shoebox import MaskCode
   print filename
   rlist = flex.reflection_table.from_pickle(filename)
   shoebox = rlist['shoebox']
   background = [sb.background.deep_copy() for sb in shoebox]
   success = creator(shoebox)
   assert(success.count(True) == len(success))
   diff = []
   for i in range(len(rlist)):
     mask = flex.bool([(m & MaskCode.Foreground) != 0 for m in shoebox[i].mask])
     px1 = background[i].select(mask)
     px2 = shoebox[i].background.select(mask)
     den = max([flex.mean(px1), 1.0])
     diff.append(flex.mean(px2 - px1) / den)
   diff = flex.double(diff)
   mv = flex.mean_and_variance(flex.double(diff))
   mean = mv.mean()
   sdev = mv.unweighted_sample_standard_deviation()
   try:
     assert(abs(mean) < 0.01)
   except Exception:
     print "Mean: %f, Sdev: %f", mean, sdev
     from matplotlib import pylab
     pylab.hist(diff)
     pylab.show()
     raise
  def plotHist(self, parsList=None):
    """
      Plots distributions for a number of traces.

      Parameters
      ----------
      parsList : string or list of strings, optional,
          Refers to a parameter name or a list of parameter names.
          If None, all available parameters are plotted.
    """
    if not ic.check["matplotlib"]:
      PE.warn(PE.PyARequiredImport("To use 'plotHists', matplotlib has to be installed.", \
                                   solution="Install matplotlib."))
      return
    if isinstance(parsList, basestring):
      parsList = [parsList]
    tracesDic = {}
    if parsList is not None:
      for parm in parsList:
        self._parmCheck(parm)
        tracesDic[parm] = self[parm]
    else:
      # Use all available traces
      for parm in self.availableParameters():
        tracesDic[parm] = self[parm]

    cols, rows = self.__plotsizeHelper(len(tracesDic))

    for i,[pars,trace] in enumerate(tracesDic.items()):
      if len(parsList) > 1:
        plt.subplot(rows, cols, i+1)
      plt.hist(trace, label=pars + " hist")
      plt.legend()
def scaleTestMinFinding():
	xs = range(10)
	distances = []
	noise = 3.5
	n = 1000000
	for i in range(n):
		a = random()
		b = random()
		c = random()
		ys = [x*x*a + x*b + c + random() * noise for x in xs]
		
		#print a, b, c, polynomialFit(xs, ys)[::-1]
		minExp, unc = polynomialFindMinimum(xs, ys, returnErrors = True)
		minCalc = -b/(2.0*a)
		dist = (minCalc - minExp) / unc
		#print minCalc, minExp, unc, dist
		distances.append(dist)
		
	print 'mean: %f' % stats.mean(distances)
	print 'stdDev: %f' % stats.stdDev(distances)
	for sigma in [1, 2, 3]:
		print 'With %d sigma: %f%%' % (sigma, 100.0 * sum([int(abs(d) < sigma) for d in distances]) / n)
	
	pylab.hist(distances, bins = 50, range = (-5, 5))
	pylab.show()
Exemple #8
0
def plot_call_rate(c):
    # Histogram
    P.clf()
    P.figure(1)
    P.hist(c[:,1], normed=True)
    P.xlabel('Call Rate')
    P.ylabel('Portion of Variants')
    P.savefig(os.environ['OBER'] + '/doc/imputation/cgi/call_rate.png')

####################################################################################
#if __name__ == '__main__':
#    # Input parameters
#    file_name = sys.argv[1]  # Name of data file with MAF, call rates
#
#    # Load data
#    c = np.loadtxt(file_name, dtype=np.float16)
#
#    # Breakdown by call rate (proportional to the #samples, 1415)
#    plot_call_rate(c)
#    h = np.histogram(c[:,1])
#    a = np.flipud(np.cumsum(np.flipud(h[0])))/float(c.shape[0])
#    print np.concatenate((h[1][:-1][newaxis].transpose(), a[newaxis].transpose()), axis=1)

    # Breakdown by minor allele frequency
    maf_n = 20
    maf_bins = np.linspace(0, 0.5, maf_n + 1)
    maf_bin = np.digitize(c[:,0], maf_bins)
    d = c.astype(float64)
    mean_call_rate = np.array([(1.*np.mean(d[maf_bin == i,1])) for i in xrange(len(maf_bins))])
    P.bar(maf_bins - h, mean_call_rate, width=h)

    P.figure(2)
    h = (maf_bins[-1] - maf_bins[0]) / maf_n
    P.bar(maf_bins - h, mean_call_rate, width=h)
    P.savefig(os.environ['OBER'] + '/doc/imputation/cgi/call_rate_maf.png')
Exemple #9
0
def mood_hist(index):
    n_bins = 10
    data1 = pd.read_csv('data/split_class/large_IGNORE_406_mood_+1.txt', sep=' ', header=None)
    data2 = pd.read_csv('data/split_class/large_IGNORE_406_mood_-1.txt', sep=' ', header=None)

    mood_sum1 = pd.Series([0] * data1.shape[0])
    mood_sum2 = pd.Series([0] * data2.shape[0])
    # for i in np.arange(1, 7):
    for i in np.arange(1, 6):
        print(i)
        mood_sum1 += data1[i]
        mood_sum2 += data2[i]

    col1 = data1[index] / mood_sum1
    col2 = data2[index] / mood_sum2
    print(col1, col2)
    print(col1.mean())
    # print(col1.describe())
    print(col2.mean())
    # print(col2.describe())
    plt.subplot(1, 2, 1)
    plt.hist(col1, n_bins, alpha=0.8, color='r', linewidth=1.5)
    # plt.xlim(0, 0.5)

    plt.ylabel("frequency")
    plt.subplot(1, 2, 2)
    plt.hist(col2, n_bins, alpha=0.8, color='b', linewidth=1.5)
    # plt.xlim(0, 0.5)
    # plt.ylabel("frequency")
    plt.show()
Exemple #10
0
def shopping_hist():
    n_bins = 100
    data1 = pd.read_csv('data/split_class/large_IGNORE_404_shopping_+1.txt', sep=' ', header=None)
    data2 = pd.read_csv('data/split_class/large_IGNORE_404_shopping_-1.txt', sep=' ', header=None)

    shopping1 = data1[2]
    shopping2 = data2[2]
    for i in np.arange(3, 17):
        shopping1 += data1[i]
        shopping2 += data2[i]

    col1 = shopping1 / data1[1]
    print(col1.describe())
    col2 = shopping2 / data2[1]
    print(col2.describe())

    plt.subplot(1, 2, 1)
    plt.hist(col1, n_bins, normed=True, stacked=True, alpha=0.8, color='r', linewidth=1.5)
    plt.xlim(0, 0.5)
    plt.ylabel("frequency")
    plt.subplot(1, 2, 2)
    plt.hist(col2, n_bins, normed=True, stacked=True, alpha=0.8, color='b', linewidth=1.5)
    plt.xlim(0, 0.5)
    plt.ylabel("frequency")

    # plt.hist(data1[1], n_bins, normed=1, alpha=0.6, color='b', cumulative=True)
    # plt.hist(data2[1], alpha=0.6, color='r')
    plt.show()
Exemple #11
0
def hist_extraversion():
    '''
    外倾性分数的分布, 及其正态分布曲线
    :return:
    '''
    n_bins = 10
    data = pd.read_csv('data/regress_train_data.txt', sep=' ', header=None)
    ext = data[1]
    mu = ext.mean()
    sigma = ext.std()
    print(mu, sigma)
    fig = plt.figure(figsize=(10, 8))
    # --- for *.eps --- #
    fig.set_rasterized(True)
    # plt.title("The distribution of score on extraversion")
    plt.xlabel("$Score\ on\ extraversion$", fontsize=20)
    plt.ylabel("$Probability$", fontsize=20)
    plt.grid(True)
    plt.hist(ext, n_bins, normed=1, alpha=0.8, rwidth=0.85)
    x = np.linspace(0, 60, 100)
    y = mlab.normpdf(x, mu, sigma)
    plt.xlim(0, 60)
    plt.ylim(0, 0.055)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.plot(x, y, 'r--')
    # plt.tight_layout()
    plt.savefig('figure/ext_dist.eps', dpi=300)
    plt.show()
def hist_shortest_path(g, filename, show=0):

    g.delete_vertices(
        [i for i, degree in enumerate(g.degree()) if degree == 0])

    # print g.degree()
    shortest_paths = g.shortest_paths_dijkstra(mode='all')
    # print shortest_paths
    # ig.plot(g)
    plt.hist(
        np.hstack(shortest_paths),
        range=[0, 5],
        bins=5,
        rwidth=1.,
        align='left',
        normed=True,
    )
    plt.xlabel('Number of steps')
    plt.ylabel('Proportion')

    plt.title(
        'Number of steps to each member (mean: %.2f)'
        % np.mean(shortest_paths))
    if show:
        plt.show()
    else:
        plt.savefig(filename)
def plot_age_distribution_over_time(g_states, filename=None):
    num_plots = len(g_states)
    fig = plt.gcf()

    max_cols = 5
    if int(np.ceil(np.sqrt(num_plots))) >= max_cols:
        cols = max_cols
    else:
        cols = int(np.ceil(np.sqrt(num_plots)))

    rows = int(np.ceil(num_plots/cols))

    fig.set_size_inches(14, 5*rows)

    for i, g in enumerate(g_states):

        plt.subplot(rows, int(np.ceil(num_plots/float(rows))), i+1)

        plt.hist(
            g.vs['age'],
            bins=27,
            range=[18, 45],
            normed=True,
            label='t: %i' % i,
        )
        plt.legend()

    if filename:
        plt.savefig(filename)
    fig.clf()
def average_diode_sep():
    clust_eps = 0.2
    min_dist = 2.0
    min_samples = 3.0
    thold = 240
    
    FRAMES =  np.arange(4000)*2
    
    dataset = "bukowski_02.C"
    cf = pickle.load(open(os.path.join(ddir(dataset), 'config.pickle')))
    region = pickle.load(open(os.path.join(ddir(dataset), 'region.pickle')))
    
    env = util.Environmentz(cf['field_dim_m'], cf['frame_dim_pix'])
    x_min, y_min = env.gc.real_to_image(region['x_pos_min'], region['y_pos_min'])
    x_max, y_max = env.gc.real_to_image(region['x_pos_max'], region['y_pos_max'])
    print x_min, x_max
    print y_min, y_max
    if y_min < 0:
        y_min = 0
    frame_images = organizedata.get_frames(ddir(dataset), FRAMES)
    num_clusters = np.zeros(len(FRAMES))
    dists = []
    for fi, im in enumerate(frame_images):
        im = im[y_min:y_max+1, x_min:x_max+1]

        centers = frame_clust_points(im, 240, min_dist,
                                     clust_eps, min_samples)

        num_clusters[fi] = len(centers)
        if len(centers) == 2:
            dists.append(distance.pdist(centers)[0])
    dists = np.array(dists)
    pylab.hist(dists[dists < 50], bins=20)

    pylab.savefig("average_diode_sep.%s.png" % dataset, dpi=300)
Exemple #15
0
def study_redmapper_lrg_3d(hemi='north'):
    # create 3d grid object
    grid = grid3d(hemi=hemi)
    
    # load SDSS data
    sdss = load_sdss_data_both_catalogs(hemi)
    
    # load redmapper catalog
    rm = load_redmapper(hemi=hemi)
    
    # get XYZ positions (Mpc) of both datasets
    x_sdss, y_sdss, z_sdss = grid.xyz_from_radecz(sdss['ra'], sdss['dec'], sdss['z'], applyzcut=False)
    x_rm, y_rm, z_rm = grid.xyz_from_radecz(rm['ra'], rm['dec'], rm['z_spec'], applyzcut=False)
    pos_sdss = np.vstack([x_sdss, y_sdss, z_sdss]).T
    pos_rm = np.vstack([x_rm, y_rm, z_rm]).T

    # build a couple of KDTree's, one for SDSS, one for RM.
    from sklearn.neighbors import KDTree
    tree_sdss = KDTree(pos_sdss, leaf_size=30)
    tree_rm = KDTree(pos_rm, leaf_size=30)

    lrg_counts = tree_sdss.query_radius(pos_rm, 100., count_only=True)
    pl.clf()
    pl.hist(lrg_counts, bins=50)
    
    
    ipdb.set_trace()
Exemple #16
0
	def behavioral_analysis(self):
		"""some analysis of the behavioral data, such as mean percept duration, 
		dominance ratio etc"""
		self.assert_data_intern()
		# only do anything if this is not a no report trial
		if 'RP' in self.file_alias:
			all_percepts_and_durations = [[],[]]
		else:
			all_percepts_and_durations = [[],[],[]]
		if not 'NR' in self.file_alias: #  and not 'RP' in self.file_alias
			for x in range(len(self.trial_indices)):
				if len(self.events) != 0:
					events_this_trial = self.events[(self.events['EL_timestamp'] > self.timestamps_pt[x][0]) & (self.events['EL_timestamp'] < self.timestamps_pt[x][-1])]
					for sc, scancode in enumerate(self.scancode_list):
						percept_start_indices = np.arange(len(events_this_trial))[np.array(events_this_trial['scancode'] == scancode)]
						percept_end_indices = percept_start_indices + 1
						
						# convert to times
						start_times = np.array(events_this_trial['EL_timestamp'])[percept_start_indices] - self.timestamps_pt[x,0]
						if len(start_times) > 0:
							if percept_end_indices[-1] == len(events_this_trial):
								end_times = np.array(events_this_trial['EL_timestamp'])[percept_end_indices[:-1]] - self.timestamps_pt[x,0]
								end_times = np.r_[end_times, len(self.from_zero_timepoints)]
							else:
								end_times = np.array(events_this_trial['EL_timestamp'])[percept_end_indices] - self.timestamps_pt[x,0]

							these_raw_event_times = np.array([start_times + self.timestamps_pt[x,0], end_times + self.timestamps_pt[x,0]]).T
							these_event_times = np.array([start_times, end_times]).T + x * self.trial_duration * self.sample_rate
							durations = np.diff(these_event_times, axis = -1)

							all_percepts_and_durations[sc].append(np.hstack((these_raw_event_times, these_event_times, durations)))

			self.all_percepts_and_durations = [np.vstack(apd) for apd in all_percepts_and_durations]

			# last element is duration, sum inclusive and exclusive of transitions
			total_percept_duration = np.concatenate([apd[:,-1] for apd in self.all_percepts_and_durations]).sum()
			total_percept_duration_excl = np.concatenate([apd[:,-1] for apd in [self.all_percepts_and_durations[0], self.all_percepts_and_durations[-1]]]).sum()

			self.ratio_transition = 1.0 - (total_percept_duration_excl / total_percept_duration)
			self.ratio_percept_red = self.all_percepts_and_durations[0][:,-1].sum() / total_percept_duration_excl

			self.red_durations = np.array([np.mean(self.all_percepts_and_durations[0][:,-1]), np.median(self.all_percepts_and_durations[0][:,-1])])
			self.green_durations = np.array([np.mean(self.all_percepts_and_durations[-1][:,-1]), np.median(self.all_percepts_and_durations[-1][:,-1])])
			self.transition_durations = np.array([np.mean(self.all_percepts_and_durations[1][:,-1]), np.median(self.all_percepts_and_durations[1][:,-1])])

			self.ratio_percept_red_durations = self.red_durations / (self.red_durations + self.green_durations)
			plot_mean_or_median = 0 # mean

			f = pl.figure(figsize = (8,4))
			s = f.add_subplot(111)
			for i in range(len(self.colors)):
				pl.hist(self.all_percepts_and_durations[i][:,-1], bins = 20, color = self.colors[i], histtype='step', lw = 3.0, alpha = 0.4, label = ['Red', 'Trans', 'Green'][i])
			pl.hist(np.concatenate([self.all_percepts_and_durations[0][:,-1], self.all_percepts_and_durations[-1][:,-1]]), bins = 20, color = 'k', histtype='step', lw = 3.0, alpha = 0.4, label = 'Percepts')
			pl.legend()
			s.set_xlabel('time [ms]')
			s.set_ylabel('count')
			sn.despine(offset=10)
			s.annotate("""ratio_transition: %1.2f, \nratio_percept_red: %1.2f, \nduration_red: %2.2f,\nduration_green: %2.2f, \nratio_percept_red_durations: %1.2f"""%(self.ratio_transition, self.ratio_percept_red, self.red_durations[plot_mean_or_median], self.green_durations[plot_mean_or_median], self.ratio_percept_red_durations[plot_mean_or_median]), (0.5,0.65), textcoords = 'figure fraction')
			pl.tight_layout()
			pl.savefig(os.path.join(self.analyzer.fig_dir, self.file_alias + '_dur_hist.pdf'))
Exemple #17
0
    def test_flux(self):
        tol = 150.
        inputcat = catalog.read(os.path.join(self.args.tmp_path, 'ccd_1.cat'))
        pixradius = 3*self.target["psf"]/self.instrument["PIXEL_SCALE"]
        positions = list(zip(inputcat["X_IMAGE"]-1, inputcat["Y_IMAGE"]-1))
        fluxes = image.simple_aper_phot(self.im[1], positions, pixradius)
        sky_background = image.annulus_photometry(self.im[1], positions,
        	pixradius+5, pixradius+8)

        total_bg_pixels = np.shape(image.build_annulus_mask(pixradius+5, pixradius+8, positions[0]))[1]
        total_source_pixels = np.shape(image.build_circle_mask(pixradius,
        	positions[0]))[1]

        estimated_fluxes = fluxes - sky_background*1./total_bg_pixels*total_source_pixels

        estimated_magnitude = image.flux2mag(estimated_fluxes,
        	self.im[1].header['SIMMAGZP'], self.target["exptime"])

        expected_flux = image.mag2adu(17.5, self.target["zeropoint"][0],
        	exptime=self.target["exptime"])

        p.figure()
        p.hist(fluxes, bins=50)
        p.title('Expected flux: {:0.2f}, mean flux: {:1.2f}'.format(expected_flux, np.mean(estimated_fluxes)))
        p.savefig(os.path.join(self.figdir,'Fluxes.png'))

        assert np.all(np.abs(fluxes-expected_flux) < tol)
def PlotMtxError(Corr_w):
    max_val = 1
    min_val = -0.1

    AvCorr = np.sum(Corr_w, axis=0)
    dCorr = Corr_w - AvCorr
    errCorr = np.log10(np.sqrt(np.einsum("i...,i...", dCorr, dCorr)) / np.absolute(AvCorr) / np.sqrt(Corr_w.shape[0]))
    # print errCorr.shape
    # print errCorr

    plt.rcParams.update({"font.size": 6, "font.weight": "bold"})
    for i in xrange(errCorr.shape[0]):
        plt.subplot(2, 7, i + 1)
        plt.title("SITE " + str(i + 1) + ":: \nHistogram of errors in corr. mtx.")
        plt.hist(errCorr[0, :, :].flatten(), 256, range=(min_val, max_val))
        plt.xlabel("log_10(sigma)")
        plt.ylabel("Count")

        plt.subplot(2, 7, i + 7 + 1)
        plt.imshow(errCorr[0, :, :], vmin=min_val, vmax=max_val)
        cbar = plt.colorbar(shrink=0.25, aspect=40)
        cbar.set_label("log_10(sigma)")
        plt.set_cmap("gist_yarg")
        plt.title("SITE " + str(i + 1) + ":: \nError in corr. matx. values")
        plt.xlabel("Site i")
        plt.ylabel("Site j")
    plt.show()
 def EstimateDensity(self,name,df,histogram,f,s,ax):
     # if the desired output is in Histogram format
     if(histogram):
         finRes = []
         lab = []
         for i in xrange(5):
             res = np.array(df[ df[f] == i][s])
             if(res.shape[0]>0):
                 finRes.append(res)
                 lab.append(name[0]+ ' = ' + str(i))
         pl.hist(finRes, bins=2, normed=True, histtype='bar',label = lab)
         
     # if the desired output is simple plot
     else:
         for i in xrange(5):
             res = np.array(df[ df[f] == i][s])
             if(res.shape[0]>0):
                 res = res.reshape(res.shape[0],1)
                 X_plot = np.array(np.linspace(-1, 5,20)).reshape(20,1)
                 kde= KernelDensity(kernel='exponential', bandwidth=0.05)
                 kde.fit(res)
                 log_dens = kde.score_samples(X_plot)
                 ax.plot(X_plot,np.exp(log_dens),label=name[0]+ ' = ' + str(i))        
     ax.legend()
     ax.set_title(name[1] + "  distrubution for changing  " + name[0])
Exemple #20
0
    def fit_plot(self, data, topn=0, bins=20):
        """ Create a plot. """
        from matplotlib import pylab as pl

        distros = self.get_topn(topn)

        xx = numpy.linspace(data.min(), data.max(), 300)

        table = []
        nparms = max(len(x.parms) for x in distros)
        tcolours = []
        for dd in distros:
            patch = pl.plot(xx, [dd.pdf(p) for p in xx], label='%10.2f%% %s' % (100.0*dd.rss/dd.dss, dd.name))
            row = ['', dd.name, '%10.2f%%' % (100.0*dd.rss/dd.dss,)] + ['%0.2f' % x for x in dd.parms]
            while len(row) < 3 + nparms:
                row.append('')
            table.append(row)
            tcolours.append([patch[0].get_markerfacecolor()] + ['w'] * (2+nparms))

        # add a historgram with the data
        pl.hist(data, bins=bins, normed=True)
        tab = pl.table(cellText=table, cellColours=tcolours,
                       colLabels=['', 'Distribution', 'Res. SS/Data SS'] + ['P%d' % (x + 1,) for x in range(nparms)],
                       bbox=(0.0, 1.0, 1.0, 0.3))
                 #loc='top'))
        #pl.legend(loc=0)
        tab.auto_set_font_size(False)
        tab.set_fontsize(10.)
    def handle(self, *args, **options):
        try:
            from matplotlib import pylab as pl
            import numpy as np
        except ImportError:
            raise Exception('Be sure to install requirements_scipy.txt before running this.')

        all_names_and_counts = RawCommitteeTransactions.objects.all().values('attest_by_name').annotate(total=Count('attest_by_name')).order_by('-total')
        all_names_and_counts_as_tuple_and_sorted = sorted([(row['attest_by_name'], row['total']) for row in all_names_and_counts], key=lambda row: row[1])
        print "top ten attestors:  (name, number of transactions they attest for)"
        for row in all_names_and_counts_as_tuple_and_sorted[-10:]:
            print row

        n_bins = 100
        filename = 'attestor_participation_distribution.png'

        x_max = all_names_and_counts_as_tuple_and_sorted[-31][1]  # eliminate top outliers from hist
        x_min = all_names_and_counts_as_tuple_and_sorted[0][1]

        counts = [row['total'] for row in all_names_and_counts]
        pl.figure(1, figsize=(18, 6))
        pl.hist(counts, bins=np.arange(x_min, x_max, (float(x_max)-x_min)/100) )
        pl.title('Histogram of Attestor Participation in RawCommitteeTransactions')
        pl.xlabel('Number of transactions a person attested for')
        pl.ylabel('Number of people')
        pl.savefig(filename)
Exemple #22
0
def distance_to_purchase_histogram(purchases):
    distances = calculate_distance_to_purchase_histogram(purchases)
    log_distances = [np.log10(0.1+d) for d in distances if d is not None]
    plt.hist(log_distances, 60, alpha=0.5)
    plt.xlabel('$log_{10}$ ( distances in miles )')
    plt.title('Distances between purchase and billing address')
    return distances
Exemple #23
0
def plotMassFunction(im, pm, outbase, mmin=9, mmax=13, mstep=0.05):
    """
    Make a comparison plot between the input mass function and the 
    predicted projected correlation function
    """
    plt.clf()

    nmbins = ( mmax - mmin ) / mstep
    mbins = np.logspace( mmin, mmax, nmbins )
    mcen = ( mbins[:-1] + mbins[1:] ) /2
    
    plt.xscale( 'log', nonposx = 'clip' )
    plt.yscale( 'log', nonposy = 'clip' )
    
    ic, e, p = plt.hist( im, mbins, label='Original Halos', alpha=0.5, normed = True)
    pc, e, p = plt.hist( pm, mbins, label='Added Halos', alpha=0.5, normed = True)
    
    plt.legend()
    plt.xlabel( r'$M_{vir}$' )
    plt.ylabel( r'$\frac{dN}{dM}$' )
    #plt.tight_layout()
    plt.savefig( outbase+'_mfcn.png' )
    
    mdtype = np.dtype( [ ('mcen', float), ('imcounts', float), ('pmcounts', float) ] )
    mf = np.ndarray( len(mcen), dtype = mdtype )
    mf[ 'mcen' ] = mcen
    mf[ 'imcounts' ] = ic
    mf[ 'pmcounts' ] = pc

    fitsio.write( outbase+'_mfcn.fit', mf )
Exemple #24
0
    def plot_fitted_model(self, sample, data, fig=None, xmin=-1, xmax=12, npoints=1000, nbins=100, epsilon=0.25):
        """Plot fitted model"""

        # fetch group
        group = [i for i, item in enumerate(data.groups.items()) if sample in item[1]][0]

        # fetch data
        counts = data.counts_norm[sample].values.astype('float')
        counts[counts < 1] = epsilon
        counts = np.log(counts)

        # compute fitted model
        x = np.reshape(np.linspace(xmin, xmax, npoints), (-1, 1))
        xx = np.exp(x)
        loglik = _compute_loglik(xx, self.log_phi, self.log_mu, self.beta[self.z[group]])
        y = xx * np.exp(loglik) / self.nfeatures

        # plot
        fig = pl.figure() if fig is None else fig
        pl.figure(fig.number)

        pl.hist(counts, nbins, histtype='stepfilled', linewidth=0, normed=True, color='gray')
        pl.plot(x, np.sum(y, 1), 'r')

        pl.grid()
        pl.xlabel('log counts')
        pl.ylabel('density')
        pl.legend(['model', 'data'], loc=0)
        pl.tight_layout()
Exemple #25
0
def plotFeaturePDF(ift, pft, outbase, fmin=0.0, fmax=1.0, fstep=0.01):
    """
    Plot a comparison between the input feature distribution and the 
    feature distribution of the predicted halos
    """
    plt.clf()
    nfbins = ( fmax - fmin ) / fstep
    fbins = np.logspace( fmin, fmax, nfbins )
    fcen = ( fbins[:-1] + fbins[1:] ) / 2

    plt.xscale( 'log', nonposx='clip' )
    plt.yscale( 'log', nonposy='clip' )
    
    ic, e, p = plt.hist( ift, fbins, label='Original Halos', alpha=0.5, normed=True )
    pc, e, p = plt.hist( pft, fbins, label='Added Halos', alpha=0.5, normed=True )

    plt.legend()
    plt.xlabel( r'$\delta$' )
    plt.savefig( outbase+'_fpdf.png' )

    fdtype = np.dtype( [ ('fcen', float), ('ifcounts', float), ('pfcounts', float) ] )
    fd = np.ndarray( len(fcen), dtype = fdtype )
    fd[ 'mcen' ] = fcen
    fd[ 'imcounts' ] = ic
    fd[ 'pmcounts' ] = pc

    fitsio.write( outbase+'_fpdf.fit', fd )
def compareHist(data1, data2,_title,tag1='data1', tag2='data2'):
    pl.figure()
    pl.show()
    pl.hist(data1, normed=True, alpha=0.5, color='b')
    pl.hist(data2, normed=True, alpha=0.5, color='r')

    # Fit a normal distribution to the data:
    mu1, std1 = stats.norm.fit(data1)
    xmin, xmax = pl.xlim()
    x = np.linspace(xmin, xmax, 100)
    p = stats.norm.pdf(x, mu1, std1)
    pl.plot(x, p, 'k', linewidth=2, color='b')

    # Fit a normal distribution to the data:
    mu2, std2 = stats.norm.fit(data2)
    xmin, xmax = pl.xlim()
    x = np.linspace(xmin, xmax, 100)
    p = stats.norm.pdf(x, mu2, std2)
    pl.plot(x, p, 'k', linewidth=2, color='r')

    pl.title(_title)
    pl.savefig(data_DIR + '/'+ _title + '.png',bbox_inches='tight')

    pl.close()
    return
Exemple #27
0
def study_redmapper_2d():
    # I just want to know the typical angular separation for RM clusters.
    # I'm going to do this in a lazy way.
    hemi = 'north'
    rm = load_redmapper(hemi=hemi)
    ra = rm['ra']
    dec = rm['dec']
    ncl = len(ra)
    dist = np.zeros((ncl, ncl))
    for i in range(ncl):
        this_ra = ra[i]
        this_dec = dec[i]
        dra = this_ra-ra
        ddec = this_dec-dec
        dxdec = dra*np.cos(this_dec*np.pi/180.)
        dd = np.sqrt(dxdec**2. + ddec**2.)
        dist[i,:] = dd
        dist[i,i] = 99999999.
    d_near_arcmin = dist.min(0)*60.
    pl.clf(); pl.hist(d_near_arcmin, bins=100)
    pl.title('Distance to Nearest Neighbor for RM clusters')
    pl.xlabel('Distance (arcmin)')
    pl.ylabel('N')
    fwhm_planck_217 = 5.5 # arcmin
    sigma = fwhm_planck_217/2.355
    frac_2sigma = 1.*len(np.where(d_near_arcmin>2.*sigma)[0])/len(d_near_arcmin)
    frac_3sigma = 1.*len(np.where(d_near_arcmin>3.*sigma)[0])/len(d_near_arcmin)
    print '%0.3f percent of RM clusters are separated by 2-sigma_planck_beam'%(100.*frac_2sigma)
    print '%0.3f percent of RM clusters are separated by 3-sigma_planck_beam'%(100.*frac_3sigma)    
    ipdb.set_trace()
def run_catalogue(mag_cut,file_dir="",OUTDIR="./out"):
	#file_dir = "/data3/scratch/bcc_v1" 
	#file_dir = ""
	#OUTDIR = "./out"
	title_in = ""

	import numpy as np
	import scipy as sp
	import matplotlib
	import matplotlib.pylab as plt
	import os
	import pylab as p
	import rdfits as r
	import mytools
	import sys

	if not os.path.exists(OUTDIR):
		os.makedirs(OUTDIR)
		title = str(title_in)
	newOUTDIR = OUTDIR+"/"

	import pyfits as pf
	table1 = pf.open(file_dir+"catalogue.fits")
	cols = table1[1].data

	z=cols["Z"]
	RA=cols["RA"]
	DEC=cols["DEC"]
	GAMMA1=cols["S1"]
	GAMMA2=cols["S2"]
	TMAGr = cols["TMAGr"]
	weights = cols["MVIR"]
	
	bg = [(z >.5) & (z < 1.5) & (TMAGr < mag_cut)] # background galaxies are where z > zcut = .5
	RAbg = RA[bg]
	DECbg = DEC[bg]
	GAMMA1bg = GAMMA1[bg]
	GAMMA2bg = GAMMA2[bg]
	weightsbg = weights[bg]
	zbg = z[bg]

	fg = [(z < .5) & (TMAGr < mag_cut)] # foreground galaxies are where z < zcut = .5
	RAfg = RA[fg]
	DECfg = DEC[fg]
	GAMMA1fg = GAMMA1[fg]
	GAMMA2fg = GAMMA2[fg]
	weightsfg = weights[fg]
	zfg = z[fg]
        #print fg

	fig = plt.figure()
	plt.hist(zbg,30, normed=0)
	plt.xlabel("redshift")
	plt.ylabel("Source Distribution Counts")
	plt.title("Z_cut = 0.5")
	fig.savefig("source_distribution.png")
	
	mytools.write_fits_table(OUTDIR+'foreground.fits', ['z','RA','DEC','W'], [zfg,RAfg,DECfg,weightsfg])
	mytools.write_fits_table(OUTDIR+'background.fits', ['RA','DEC','S1','S2','W'], [RAbg,DECbg,GAMMA1bg,GAMMA2bg,weightsbg])
Exemple #29
0
def gini_after_action(gini_coeff_before, n_population, n_affected,
                      percentile_before, income_increase, seed=42,
                      do_plot=False):
    """
    See how the Gini coefficient changes if you take some
    segment of the population and make them richer/poorer
    :param gini_coeff_before: initial gini coefficient
    :param n_population: size of population
    :param n_affected: number of people affected
    :param percentile_before: percentile of income at start
    :param income_increase: multiplicative factor of increase of income
    :return:
    """

    pop_max = 1e7
    if n_population > pop_max:
        # scale both numbers down to make it faster
        scale = pop_max/float(n_population)
        n_population = scale * n_population
        n_affected = scale * n_affected

    n_population = int(round(n_population))
    n_affected = int(round(n_affected))

    alpha = gini_to_pareto_alpha(gini_coeff_before)
    x_mode = 1.0
    income = sorted(sample_pareto(n_population, x_mode, alpha, seed=seed))
    index_middle = percentile_before*n_population
    index_start = index_middle - n_affected/2
    index_end = index_start + n_affected

    def adjust(i, inc):
        if i >= index_start and i < index_end:
            return income_increase*inc
        return inc

    income_adjusted = [adjust(i, inc) for i, inc in enumerate(income)]

    gini_before = gini(income)
    gini_after = gini(income_adjusted)
    tol = 1e-8
    if n_population > 10000:
        assert abs(gini_coeff_before - gini_coeff_before) < tol

    print 'gini before: %s' % gini_before
    print 'gini after: %s' % gini_after

    if do_plot:
        from matplotlib import pylab as plt
        plt.clf()
        income_max = 1000
        income_cut = [i for i in income if i < income_max]
        income_adjusted_cut = [i for i in income_adjusted if i < income_max]

        range = (0, 10)
        n_bins = 200
        plt.hist(income_cut, n_bins, alpha=0.3, range=range, label="Before")
        plt.hist(income_adjusted_cut, n_bins, alpha=0.3, range=range, label="After")
    return gini_before, gini_after
Exemple #30
0
def threquency(housing_prices):

    pl.hist(housing_prices, 50, facecolor='green', alpha=0.75)
    pl.xlabel('House price')
    pl.ylabel('Frequency')
    pl.title('Frequency of housing prices')
#    pl.hist
    pl.show()
Exemple #31
0
D = Dists[:, 0]
index = 0

#with open(filename, 'rb') as f:
#    lines = f.readlines()
#    for l in lines:
#        myarray = np.fromstring(l, dtype=float, sep=',')
#
#        D[index] = myarray[index + 1:].min()
#
#        index += 1

#min_distances = D.min()

bins = range(int(D.max()) + 2)
outs = plt.hist(D, bins=bins, normed=True, cumulative=True)
N = outs[0]
plt.figure()
plt.bar(bins[:-1], 1 - N)
plt.xlabel('Minimum mismatches')
plt.ylabel('CDF')

# Check manually for exact matches
filename = '/data/ForMimi/AllSgRNAsOct4'
seqs = [rec for rec in SeqIO.parse(filename, 'fasta')]

zeros = np.where(D == 0)[0]
matches = Dists[D == 0, 1].astype(np.int)

df = []
for (i, matching_pair) in izip(count(), izip(zeros, matches)):
path ='../res_test' # use your path
allFiles = glob.glob(path + "/*.csv")
concat_d = []
concat_t = []
i=1
n = np.ceil(len(allFiles)/2.)
plt.figure(figsize=[8,2*n])
for file_ in allFiles:
    df = pd.read_csv(file_,index_col='RowID')
    if('ProbabilityOfResponse' in df):
        data = df.loc[label.index]
        test = df.drop(label.index)
        test.to_csv('../submissions/singlemodel_'+file_.split('/')[-1]+'.csv')
        plt.subplot(n,2,i)   
        plt.hist(data['ProbabilityOfResponse'].values[label.values.ravel()==0],binsize,normed=True,alpha=0.5)
        plt.hist(data['ProbabilityOfResponse'].values[label.values.ravel()==1],binsize,normed=True,alpha=0.5)
#        plt.hist(test['ProbabilityOfResponse'].values,25)
        plt.title(file_.split('/')[-1])        
        plt.xlim([0,1])
        i+=1        
        concat_d.append(data.rename(columns={'ProbabilityOfResponse': file_.split('/')[-1]}))
        concat_t.append(test.rename(columns={'ProbabilityOfResponse': file_.split('/')[-1]}))
plt.show()
#%% 
train = pd.concat(concat_d,axis=1)
test = pd.concat(concat_t,axis=1)
plt.figure()
plt.hist(train.mean(1).values[label.values.ravel()==0],binsize,normed=True,alpha=0.5)
plt.hist(train.mean(1).values[label.values.ravel()==1],binsize,normed=True,alpha=0.5)
plt.xlim([0,1])
Exemple #33
0
from scipy import stats
import numpy as np
import matplotlib.pylab as plt
import chisquare

#f = file('AllServiceTimes.txt', 'r+')
f = file('AllInterarrivalTimes.txt', 'r+')
data = [float(x)
        for x in f.read().split(', ')]  #extract.extractData('data.txt')[0]

# plot normed histogram
plt.hist(data, normed=True)

# find minimum and maximum of xticks, so we know
# where we should compute theoretical distribution
xt = plt.xticks()[0]
xmin, xmax = min(xt), max(xt)
lnspc = np.linspace(xmin, xmax, len(data))

# Try the exponential disctubution
aexpon, muExp = stats.expon.fit(data)
pdf_exp = stats.expon.pdf(lnspc, aexpon, muExp)
plt.plot(lnspc, pdf_exp, label="Exponential")

# Try the erlang distrubution
ae, be, muErl = stats.erlang.fit(data)
pdf_erl = stats.erlang.pdf(lnspc, ae, be, muErl)
plt.plot(lnspc, pdf_erl, label="Erlang")

# Try the gamma distrubution
ag, bg, thetaGamma = stats.gamma.fit(data)
           label='Difficulty (minimum value)')
pylab.title('Hash Value vs Work')
pylab.ylabel('Hash Value (zero bits) (log2(hash))')
pylab.xlabel('Cumulative Work (est. hashes computed)')
pylab.legend(loc=4)

floor_diff = np.floor(dv[:, 2] - np.log2(dv[:, 1]))
pylab.figure(3)
pylab.clf()
pylab.scatter(dv[:, 0], floor_diff, s=0.1, label='Hash Values (bits)')
pylab.title('Hash Value - Difficulty')
pylab.ylabel('Hash Value (zero bits) (log2(hash))')
pylab.xlabel('Time (blocks)')
pylab.legend(loc=4)

pylab.figure(4)
pylab.clf()
pylab.hist(floor_diff,
           color="green",
           alpha=0.8,
           histtype='bar',
           ec='black',
           bins=range(0,
                      int(floor_diff.max()) + 1))
pylab.title('Hash Value - Difficulty Histogram')
pylab.yscale('log', basey=2)
pylab.xlabel('Hash Value - Difficulty')
pylab.ylabel('Counts')

pylab.show()
Exemple #35
0
import matplotlib.pylab as pyl
#折线图plot,散点图plot,直方图 hist
import numpy as npy
#生成随机数
data = npy.random.randint(1, 20, 500)  #最小值,最大值,个数
print(data)

#生成正态分布的数据
data2 = npy.random.normal(0, 0.1, 1000)  #平均数,σ,个数
print(data2)

pyl.hist(data)
pyl.show()
#bar', 'barstacked', 'step', 'stepfilled
histtype = "stepfilled"
histtype
sty = pyl.arange(5, 25, 1)  #设置宽度,步长
pyl.hist(
    data,
    sty,
)  #histtype="stepfilled  是默认的
pyl.show()
#
# pyl.subplot(2,3,2)#拆分纸图:行,列,当前区域
# pyl.show()
#
# #在纸图中绘制
# pyl.subplot(2,2,1)
# pyl.subplot(2,2,2)
# pyl.subplot(2,1,2)
# pyl.show()
Exemple #36
0
import matplotlib.pylab as plt
import numpy as np

x1 = np.random.normal(0, 0.8, 1000)
x2 = np.random.normal(-2, 1, 1000)
x3 = np.random.normal(3, 2, 1000)

plt.hist(x1, histtype='stepfilled', alpha=0.3, bins=40,
         density=True)  #(数据,无边线,透明度,柱子密度,Y轴是否为比例)
plt.hist(x2, histtype='stepfilled', alpha=0.3, bins=40, density=True)
plt.hist(x3, histtype='stepfilled', alpha=0.3, bins=40, density=True)

plt.show()
Exemple #37
0
plt.figure(figsize=(16, 10))
plot_rate_sorted(s_sd, t_sd)
plt.savefig('activityrate__sorted' + name + '.png')

plt.figure(figsize=(16, 10))
count_vector = np.bincount(s_sd)
plot_rate_histogram(count_vector, simtime)
plt.savefig('histogram' + name + '.png')


def get_ccs(times, senders, n_sample=1000, bin_size=5.):
    unique_ids = np.unique(senders)
    bins = np.arange(wuptime, simtime + wuptime + bin_size, bin_size)
    cc = np.zeros(n_sample)
    for i in xrange(n_sample):
        sp1, sp2 = rand.sample(unique_ids, 2)
        psth1 = np.histogram(times[senders == sp1], bins)[0]
        psth2 = np.histogram(times[senders == sp2], bins)[0]
        cc[i] = np.corrcoef(psth1, psth2)[0][1]
    return cc


plt.figure(figsize=(16, 10))
cc = get_ccs(t_sd, s_sd, n_sample=20000)
plt.hist(cc, np.arange(-1, 1, 0.01))
plt.xlabel("correlation coefficient", fontsize=30)
plt.ylabel("counts", fontsize=30)
plt.title('CC mean {}'.format(np.mean(cc)))
plt.savefig('cc_histogram' + name + '.png')
Exemple #38
0
pyl.subplot(2,1,2)
pyl.plot(x,y,'or')
pyl.show()
print('***__***')
# 分布分析
# 极差 = 最大值-最小值
# 极距 = 极差/组数
avgScore_max = da2[3].max()
avgScore_min = da2[3].min()
comment_max = da2[6].max()
comment_min = da2[6].min()

avgScore_rg = avgScore_max - avgScore_min
comment_rg = comment_max - comment_min

avgScore_dst = avgScore_rg/10
comment_dst = comment_rg/10

avgScore_sty = np.arange(avgScore_min,avgScore_max,avgScore_dst)
comment_sty = np.arange(comment_min,comment_max,comment_dst)

pyl.subplot(2,1,1)
pyl.hist(da2[3],avgScore_sty)
#pyl.show()
pyl.subplot(2,1,2)
pyl.hist(da2[6],comment_sty)
pyl.show()
print("finished")


    ['a', 'b', 'c'], ['1', '1', '1']).astype(int)
data.generic_holiday.value_counts()

# In[55]:

data.duplicated(keep='first').sum()

# In[56]:

#Checking outliers
sns.boxplot(data=data, x=data["Revenue"])

# In[57]:

# Revenue Histogram
plt.hist(data.Revenue, bins=50, color='purple', edgecolor='black')
plt.title('Revenue')
plt.show()

# In[58]:

# Checking Outliers using IQR
Q1 = data["Revenue"].quantile(0.25)
Q3 = data["Revenue"].quantile(0.75)
IQR = Q3 - Q1
print("Q1=", Q1)
print("Q3=", Q3)
print("IQR=", IQR)
Lower_Whisker = Q1 - 1.5 * IQR
Upper_Whisker = Q3 + 1.5 * IQR
print("Lower whisker=", Lower_Whisker)
import random
import numpy as np
import matplotlib.pylab as plt
from matplotlib.pylab import hist, show

contenido = np.loadtxt("locationsY.txt")

plt.hist(contenido[:, 0], bins=15, color="gray")
plt.title("Latitud")
plt.show()

plt.hist(contenido[:, 1], bins=15)
plt.title("Longitud")
plt.show()

print np.argmax((hist(contenido[:, 0], bins=15))[:, 0])
print np.argmax((hist(contenido[:, 1], bins=15))[:, 1])
Exemple #41
0
plot_hist_TRT_Ranks(df_nonnan,cfg_tds)
#df_nonnan["date"] = df_nonnan["date"].astype(np.datetime64,copy=False)
prep.exploit_TRT_cell_info(cfg_tds,samples_df=df_nonnan)
df_nonnan["RANKr"] = df_nonnan["RANKr"]*10

## Construct selection criteria for input dataset:
print("Split in 10min and 30min forcast")
y_10 = df_nonnan[["TRT_Rank_diff|10"]]
y_30 = df_nonnan[["TRT_Rank_diff|30"]]

## Plot histogram of Rank changes:
print("Plot histograms of TRT Rank changes")
fig = plt.figure(figsize = [10,5])
plt.title("Histogram of TRT Rank difference")
plt.hist([y_10.values,y_30.values],
         bins=50,
         color=[col10,col30],
         label=['10min Rank difference', '30min Rank difference'])
plt.legend()
plt.grid()
plt.savefig(os.path.join(cfg_tds["fig_output_path"],"Hist_TRT_Rank_diff.pdf"), orientation="portrait")

fig = plt.figure(figsize = [10,5])
axes = fig.add_subplot(1,1,1)
sns.kdeplot(y_10.values[:,0], shade=True, kernel="gau", bw=0.03, color=col10, label='10min Rank difference')
sns.kdeplot(y_30.values[:,0], shade=True, kernel="gau", bw=0.03, color=col30, label='30min Rank difference')
plt.xlabel("TRT Rank difference")
plt.title("Kernel density estimation of TRT Rank difference")
plt.grid()
axes.get_yaxis().set_visible(False)
plt.savefig(os.path.join(cfg_tds["fig_output_path"],"KDE_TRT_Rank_diff.pdf"), orientation="portrait")
Exemple #42
0
model = lm.LinearRegression()
model.fit(X, y)

# Predict appliance / energy compsumtion
y_est = model.predict(X)
residual = y - y_est

# Display scatter plot
figure()
figure(0)
subplot(2, 1, 1)
plot(y, residual, '.')
xlabel('Appliance (true)')
ylabel('Appliance (estimated)')
figure(1)
subplot(2, 1, 2)
hist(residual, 40)
xlabel('Residual')

#Mean squared error
print(np.sqrt(np.square(y - y_est).sum() / len(y)))

print(metrics.mean_squared_error(y, y_est))
#Which is the same as
print(np.square(y - y_est).sum() / len(y))

print("RMSE")
print(np.sqrt(np.square(y - y_est).sum() / len(y)))

show()
Exemple #43
0
import matplotlib.cm as cm
import os

#calc_PESC_fluid.py

#datadir = 'C:\\Users\\dschaffner\\OneDrive - brynmawr.edu\\Galatic Dynamics Data\\GalpyData_July2018\\'
datadir = 'C:\\Users\\dschaffner\\Dropbox\\From OneDrive\\Galatic Dynamics Data\\GalpyData_July2018\\resorted_data\\CR6_3t_Rg_Full\\'
npy = '.npz'
#fileheader = 'PE_SC_IDdatabase_Type_1_data_249_delays_3000_orbits_galpy0718'
#fileheader = 'PE_SC_IDdatabase_Type_1_data_249_delays_galpy0718'

fileheader = 'radiusAttimestep0_1t'
datafile = loadnpzfile(datadir + fileheader + npy)
radii1 = datafile['radius']
plt.figure(1)
plt.hist(radii1, bins=50, range=(3.5, 8.5))
plt.title('Radius Dist at 0ts')
plt.ylim(0, 1500)

fileheader = 'radiusAttimestep500_1t'
datafile = loadnpzfile(datadir + fileheader + npy)
radii2 = datafile['radius']
plt.figure(2)
plt.hist(radii2, bins=50, range=(3.5, 8.5))
plt.title('Radius Dist at 500ts')
plt.ylim(0, 1500)

fileheader = 'radiusAttimestep1000_1t'
datafile = loadnpzfile(datadir + fileheader + npy)
radii3 = datafile['radius']
plt.figure(3)
Exemple #44
0
         'r-')
#load and plot raw data
X = load_coal()
plt.plot(X, X * 0, 'k|')
plt.xlabel('time (years)')
plt.ylabel('rate')
plt.ylim(-.05, 1)
plt.xlim(X.min(), X.max())
save_tikz('coal_rates.tikz',
          figurewidth='\\figurewidth',
          figureheight='\\figureheight')

plt.figure()
#trans = GPy.core.parameterization.transformations.Logexp()
trans = GPy.core.parameterization.transformations.Exponent()
plt.hist(trans.f(experiment.samples[:, 0]), 100, normed=True)
plt.xlabel('signal varaince')
#save_tikz('coal_variance.tikz',figurewidth='\\figurewidth', figureheight = '\\figureheight')
np.savetxt('coal_var_samples', trans.f(experiment.samples[:, 0]))
plt.figure()
plt.hist(trans.f(experiment.samples[:, 1]), 100, normed=True)
plt.xlabel('lengthscale')
#save_tikz('coal_lengthscale.tikz',figurewidth='\\figurewidth', figureheight = '\\figureheight')
np.savetxt('coal_ls_samples', trans.f(experiment.samples[:, 1]))

#plota scatter of variance, ls
variances = trans.f(experiment.samples[:, 0])
lengthscales = trans.f(experiment.samples[:, 1])
plt.figure()
plt.plot(lengthscales, variances, 'k.')
save_tikz('coal_theta.tikz',
Exemple #45
0
### make sure missing data read in as missing
twinData[['DLHRWAGE', 'EDUCH']]
twinData[['DLHRWAGE']]
twinData = twinData.dropna()
twinData[['DLHRWAGE']]

# remove rows with missing data (regression will fail to run with missing data)
twinData = pd.read_csv("C:/Users/J40311/Documents/School/495R/twins.txt",
                       na_values=["."])
twinData = twinData.dropna()
twinData[['DLHRWAGE']]
twinData.DLHRWAGE

# check normality of response variable (need to drop missing data to generate)
import matplotlib.pyplot as plt
plt.hist(twinData.DLHRWAGE.dropna())
plt.hist(twinData.DLHRWAGE, 50)
plt.show()

### basic linear regression (without variable selection)
import statsmodels.api as sm

# if I needed to convert one of my variables to factors, could do so
twinData.MALEL
twinData['MALEL'] = pd.Categorical(twinData.MALEL).codes
X = twinData.drop('DLHRWAGE', axis=1)
X.columns
y = twinData[['DLHRWAGE']]

# include intercept in model
X1 = sm.add_constant(X)
Exemple #46
0
    tx = cuda.threadIdx.x
    bx = cuda.blockIdx.x
    bw = cuda.blockDim.x
    i = tx + bx * bw
    if i >= arr_out.size:
        return
    arr_out[i] = arr_a[i] + arr_b[i]


def adder(a, b):
    c = a + b
    return c


n = 1000000
a = np.arange(n, dtype=np.float32)
b = np.arange(n, dtype=np.float32)
c = np.empty_like(a)

thread_ct = my_gpu.WARP_SIZE
block_ct = int(math.ceil(float(n) / thread_ct))
vadd[block_ct, thread_ct](a, b, c)
cnc = adder(a, b)
toGraph = cnc - c

plt.figure()
plt.hist(toGraph, bins=100, range=(-.00000001, .00000001))
if c.all() == cnc.all():
    print "equal"
plt.show()
    temp[i].append(np.percentile(e_dist_data[i], 95, interpolation='linear'))
    # temp[i].append(np.percentile(m_dist_data[i], 95, interpolation='linear'))
e_dist_threshold = np.array(temp)
# m_dist_threshold = np.array(temp)

print("<OOD threshold of distance of penultimate logits in each label>")
print(e_dist_threshold)
print()

# Show histogram for each label's distance distribution ----------------------------------------------------------------
distance = e_dist_data
# distance = m_dist_data
for i in range(10):
    data = np.sort(distance[i])
    bins = np.arange(0, 300, 2)
    plt.hist(data, bins, normed=True)
    plt.title("label: %d" % i)
    plt.xlabel('distance', fontsize=15)
    plt.ylabel('num of data', fontsize=15)
    plt.show(block=True)

# ======================================================================================================================
'''
loss = cross_entrophy + (l2) + np.eye(num_neurons)- f(x) (?)
experimetn = > runtime check
'''
# ======================================================================================================================
'''
# 무작위 데이터 선택
index = 2
img = mnist.test.images[index]
Exemple #48
0
         marker='o',
         markersize='0.1',
         color='b',
         label=r'$\bar{\Lambda}^0$')
plt.title('Datos simulados')
plt.xlabel(r'$\alpha$')
plt.ylabel(r'$P_T \; (\frac{MeV}{c})$')
plt.legend(loc='best', markerscale=18)
plt.grid()

#Histogramas das masas:
mK = np.asarray(df['mK'])
mL = np.asarray(df2['mlambda'])
mA = np.asarray(df2['mantilambda'])
plt.figure(2)
plt.hist(mK, bins=100, range=(450, 550))
plt.title(r'$masa \; K_S^0$')
plt.ylabel('contas')
plt.xlabel(r'$masa \; (\frac{MeV}{c^2})$')
plt.grid()
plt.figure(3)
plt.hist(mL, bins=100, range=(1080, 1140))
plt.title(r'$masa \; \Lambda^0$')
plt.ylabel('contas')
plt.xlabel(r'$masa \; (\frac{MeV}{c^2})$')
plt.grid()
plt.figure(4)
plt.hist(mA, bins=100, range=(1080, 1140))
plt.title(r'$masa \; \bar{\Lambda}^0$')
plt.ylabel('contas')
plt.xlabel(r'$masa \; (\frac{MeV}{c^2})$')
Exemple #49
0
nobs = 500
bins = 20

#x = -3.0 * np.ones(500) #np.linspace(-5,5)
y = stats.norm.rvs(loc=-3, size=nobs)
hista = gethist(y, bins=bins)

# find parameters and estimates of single gaussian

p0 = [10.0, -2, 0.5]  # initial guess
p1, success = optimize.leastsq(errfunc, p0[:], args=(hista[1], hista[0]))
errors_sq = errfunc(p1, hista[1], hista[0])**2
yest1 = fitfunc(p1, hista[1])
plt.figure()
plt.hist(y, bins=bins)
plt.figure()
#plt.plot(hista[1],hista[0],'o',hista[1],yest1,'.-')
x = np.linspace(hista[1, 0], hista[1, -1], 100)
yest1a = fitfunc(p1, x)
plt.plot(hista[1], hista[0], 'o', x, yest1a, '-')

y1 = stats.norm.rvs(loc=-2, size=nobs * 0.6)
y2 = stats.norm.rvs(loc=2, size=nobs * 0.4)
y = np.hstack([y1, y2])
hista = gethist(y, bins=bins)

# find parameters and estimates of gaussian mixture
q0 = [10.0, -3, 0.5, 5, 3, 0.5]  # initial guess
q1, success = optimize.leastsq(doublegausserr,
                               q0[:],
#! usr/bin/python
# coding=utf-8
"""
File Name: Data Operation
Description:
Date: 2016-11-29
Author: QIU HU
"""
import matplotlib.pylab as plt
import jieba
jieba.load_userdict('../MidData/user.dict')


def tokenize_text(text):
    tokens = []
    for txt in text:
        tokens.extend(jieba.lcut(txt))
    return tokens

with open('train_id_view_pol_trans_all.txt') as f:
    LENS = []
    for line in f.readlines():
        lis = line.strip().split('\t')
        tokens = tokenize_text(lis[2:])
        LENS.append(len(tokens))
    plt.hist(LENS, bins=100)
    plt.show()

Exemple #51
0
MAL: Proba de que sea normal dado el valor que obtuve

'''
x_mean = np.mean(x)
x_std = np.std(x)
x_skew = skew(x)
x_kurtosis = kurtosis(x)  #Kurtosis en exceso k-3
x_jb_stat = nb_sim / 6 * (x_skew**2 + 1 / 4 * x_kurtosis**2)
#Que tan lejos estas de la normalidad
#Necesariamente chico
p_value = 1 - chi2.cdf(x_jb_stat, df=2)
#Se distribuye chi2 con 2 grados de libertad
#Si valor–p < nivel de significación => Rechazo H0.
#Si valor–p > nivel de significación => No rechazo H0.
x_is_normal = (p_value > 0.05)  #equivalente a jb <6

print('skewness is ' + str(x_skew))
print('kurtosis is ' + str(x_kurtosis))
print('Jarque-Bera statistic is ' + str(x_jb_stat))
print('p-value is ' + str(p_value))
print('is normal ' + str(x_is_normal))

#jb_list = []
#jb_list.appennd(x_jb_stat)

#Plot histogram
plt.figure()
plt.hist(x, bins=100)
plt.title(x_description)
plt.show
Exemple #52
0
s.shape
np.mean(s, axis = 0)
np.sum(s, axis = 0)
np.mean(s, axis = 1)
np.sum(s, axis = 1)


w = np.random.dirichlet()


plt.barh(range(20), s[0])
plt.barh(range(20), s[1], left=s[0], color='g')
plt.barh(range(20), s[2], left=s[0]+s[1], color='r')

f1 = np.random.dirichlet((100,1), 1000) 
plt.hist(f1, 30, density = True) 
plt.show()
w = np.random.dirichlet(np.ones(M),N)

A = np.transpose(np.array([[1,2,3],[3,4,5],[5,6,7],[10,20,30]]))
B = np.transpose(np.array([1,2,3]))
A*B


    # U_sum = st.temp_growth(k, T, Tref, T_pk, N, B_U, Ma, Ea_U, Ea_D)
    # u_1 = np.empty((0,N)) 
    # for i in range(M-1):
    #     mean = U_sum[i]/N
    #     random.seed(i)
    #     a = np.array([np.random.uniform(0, mean, size = N)])
    #     u_1 = np.append(u_1,a,axis = 0)
Exemple #53
0
item = browser.ui.workingDataTree.selectedItems()[0]
print item.text(0)
for c in range(item.childCount()):
    if 'trace' in item.child(c).text(0): trace = item.child(c)
    if 'xOnsets' in item.child(c).text(0): xonsets = item.child(c).data

# xOnsets is in datapoints, convert to ms
dt = trace.attrs['dt']
xonsets = xonsets * dt

# Convert to frequency
freq = 1000. / np.diff(xonsets)

# Make histogram
nbins = 100
binsRange = (0, 20)
n, bins, patches = plt.hist(freq,
                            bins=nbins,
                            range=binsRange,
                            normed=False,
                            histtype='stepfilled')
n = n / float(np.sum(n))

# Store data
ndaq.store_data(n, name='n')
ndaq.store_data(bins, name='bins')
ndaq.store_data(np.array(freq), name='median_freq')

# AP nbins = 50, binsRange = 10
# EPSC nbins = 0, binsRange = 10
Exemple #54
0
def test():
    parser = argparse.ArgumentParser(description='DAGMM')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=10000,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--cn_h_unit',
                        type=int,
                        default=10,
                        help='Number of Compression Network hidden units')
    parser.add_argument('--cn_z_unit',
                        type=int,
                        default=2,
                        help='Number of Compression Network z units')
    parser.add_argument('--en_h_unit',
                        type=int,
                        default=10,
                        help='Number of Estimation Network hidden units')
    parser.add_argument('--en_o_unit',
                        type=int,
                        default=2,
                        help='Number of Estimation Network output units')
    args = parser.parse_args()

    print('# epoch: {}'.format(args.epoch))
    print('# Output-directory when training: {}'.format(args.out))
    print('# Compression Network: Dim - {0} - {1} - {0} - Dim'.format(
        args.cn_h_unit, args.cn_z_unit))
    print('# Estimation Network: {} - {} - {}'.format(args.cn_z_unit + 2,
                                                      args.en_h_unit,
                                                      args.en_o_unit))
    print('')

    # データセット読み込み
    x_data = np.loadtxt('./dataset_arrhythmia/ExplanatoryVariables.csv',
                        delimiter=',')
    y_label = np.loadtxt('./dataset_arrhythmia/CriterionVariables.csv',
                         delimiter=',')

    # 正常データのみを抽出
    HealthData = x_data[y_label[:] == 1]

    # 正常データを学習用と検証用に分割
    NumOfHealthData = len(HealthData)
    trainData = HealthData[:math.floor(NumOfHealthData * 0.9)]
    validData = HealthData[len(trainData):]

    # 正常ではないデータ(異常データ)を抽出
    diseaseData = x_data[y_label[:] != 1]

    # 型変換
    trainData = trainData.astype(np.float32)
    validData = validData.astype(np.float32)
    diseaseData = diseaseData.astype(np.float32)

    model = DAGMM(args.cn_h_unit, args.cn_z_unit, len(trainData[0]),
                  args.en_h_unit, args.en_o_unit)
    optimizer = optimizers.Adam(alpha=0.0001)
    optimizer.setup(model)

    print("------------------")
    print("Health trainData Energy")
    with chainer.using_config('train', False), chainer.using_config(
            'enable_backprop', False):
        _, energy_htr, _, _ = model.fwd(trainData)
    # print(energy_htr.data)

    print("------------------")
    print("Health testData Energy")
    with chainer.using_config('train', False), chainer.using_config(
            'enable_backprop', False):
        _, energy_hte, _, _ = model.fwd(validData)
    # print(energy_hte.data)

    print("------------------")
    print("Disease testData Energy")
    with chainer.using_config('train', False), chainer.using_config(
            'enable_backprop', False):
        _, energy_di, _, _ = model.fwd(diseaseData)
    # print(energy_di.data)

    plt.hist(energy_htr.data,
             bins=100,
             alpha=0.4,
             histtype='stepfilled',
             color='b')
    plt.hist(energy_hte.data,
             bins=100,
             alpha=0.4,
             histtype='stepfilled',
             color='g')
    plt.hist(energy_di.data,
             bins=100,
             alpha=0.4,
             histtype='stepfilled',
             color='r')
    plt.show()
Exemple #55
0
        #print(penergy)
        #exit()
    #print(events['nMass'])
    #masses += ak.to_numpy(ak.flatten(events['nMass'].array())).tolist()
    #e_energies += ak.to_numpy(ak.flatten(events['eenergy'].array())).tolist()
#'''

#print(masses)
ee_ranges = [(0, 5), (0, 0.3), (0, 0.120), (0, 0.120), (0, 0.120)]
pe_ranges = [(0, 5), (0, 5), (0, 5), (0, 5), (0, 5)]

for i in range(0, 5):
    plt.figure(figsize=(12, 6))

    plt.subplot(2, 2, 1)
    plt.hist(data['nMass'][i], range=(0.0, 1.3), bins=100)
    plt.xlabel(r'$M_{pe^-}$ [GeV/c$^2$]', fontsize=18)

    plt.subplot(2, 2, 3)
    plt.hist(data['eenergy'][i], range=ee_ranges[i], bins=100)
    plt.xlabel(r'$E_{e^-}$ [GeV]', fontsize=18)

    plt.subplot(2, 2, 4)
    plt.hist(data['penergy'][i], range=pe_ranges[i], bins=100)
    plt.xlabel(r'$E_{p}$ [GeV]', fontsize=18)
    plt.tight_layout()

    name = f"plots/tiny_hydrogen_{i}.png"
    plt.savefig(name)

plt.show()
Exemple #56
0
import cv2
import matplotlib.pylab as plt

img = cv2.imread(r"..\lena.jpg", cv2.IMREAD_GRAYSCALE)
equ = cv2.equalizeHist(img)
cv2.imshow("original", img)
cv2.imshow("result", equ)
plt.subplot(1, 2, 1)
plt.hist(img.ravel(), 256)
plt.subplot(1, 2, 2)
plt.hist(equ.ravel(), 256)
plt.show()
cv2.waitKey()
cv2.destroyAllWindows()
Exemple #57
0
training_results = train(model,
                         criterion,
                         train_loader,
                         validation_loader,
                         optimizer,
                         epochs=5)

# In[5] Model evaluation and Plotting

# set models to evaluation so that batchnorm is put in eval mode.
model.eval()
model_batchnorm.eval()

# Plot model activations
out = model.activation(validation_dataset[0][0].reshape(-1, 28 * 28))
plt.hist(out[2], label='model with no batch normalization')
plt.xlabel("activation ")
plt.legend()
plt.show()

out_batchnorm = model_batchnorm.activation(validation_dataset[0][0].reshape(
    -1, 28 * 28))
plt.hist(out_batchnorm[2], label='model with normalization')
plt.xlabel("activation ")
plt.legend()
plt.show()

# Plot the diagram to show the loss
plt.plot(training_results['training_loss'], label='No Batch Normalization')
plt.plot(training_results_Norm['training_loss'], label='Batch Normalization')
plt.ylabel('Cost')
Exemple #58
0
    def plot_tpms(self, min_value: float = 0, max_value: float = None):

        plt.hist(self.tpms)
Exemple #59
0
#exit()

nentries = tree.GetEntries()

values = []
valuesjet = []
valuesmet = [[], []]
valueselectron = []

for nentry in range(nentries):

    if nentry % 10000 == 0:
        print(nentry)

    tree.GetEntry(nentry)

    njets = tree.njet
    for i in range(0, njets):
        valuesjet.append(tree.jete[i])

    #x = tree.muone
    #y = tree.electrone

#print(valuesjet)
print(len(valuesjet))

plt.figure()
plt.hist(valuesjet, bins=100, range=(0, 500))

#plt.show()
Exemple #60
0
from scipy import stats
import numpy as np
import matplotlib.pylab as plt
import math

b = np.genfromtxt('n.txt', unpack=True)

mean = np.mean(b)
#sem = sem(b)
std = np.std(b)
var = std**2
#print(mean,'+-','Varianz=',var)

plt.hist(b, bins=10, density=True, label="Messwerte")
xt = plt.xticks()[0]
xmin, xmax = min(xt), max(xt)
lnspc = np.linspace(xmin, xmax, len(b))

pdf_g = stats.norm.pdf(lnspc, mean, std)
plt.plot(lnspc, pdf_g, color="r", label="Gaußverteilung")
plt.xlabel(r'Zählrate N / 1/10s')
plt.ylabel(r'Relative Häufigkeit')

poi = np.random.poisson(lam=mean, size=10000)
plt.hist(poi,
         bins=10,
         density=True,
         histtype="step",
         color="k",
         label="Poissonverteilung")
plt.grid()