Example #1
0
def compute_novelty_stats_without_contrast(data_timeseries, baseline_bool=None):

    # remove the filler novel items (they were never repeated)
    data = data_timeseries[~((data_timeseries.event.data['isFirst']) & (data_timeseries.event.data['lag'] == 0))]

    # determine the mean and std of the baseline period for normalization
    # if baseline bool is not given, use all timepoints before 0
    if baseline_bool is None:
        baseline_bool = data.time.values < 0
    baseline_data = data[:, baseline_bool].mean(dim='time')
    m = np.mean(baseline_data)
    s = np.std(baseline_data)

    # compute the zscored data
    zdata = (data - m) / s

    # pull out the data for each condition
    novel_items = data.event.data['isFirst']
    zdata_novel = zdata[novel_items]
    zdata_repeated = zdata[~novel_items]

    # run stats at each timepoint
    ts, ps = ttest_ind(zdata_novel, zdata_repeated, axis=0)

    # return the statistics and the mean of each condition
    zdata_novel_mean = np.mean(zdata_novel, axis=0)
    zdata_novel_sem = sem(zdata_novel, axis=0)
    zdata_repeated_mean = np.mean(zdata_repeated, axis=0)
    zdata_repeated_sem = sem(zdata_repeated, axis=0)

    return zdata_novel_mean, zdata_repeated_mean, zdata_novel_sem, zdata_repeated_sem, ts, ps
Example #2
0
def plot_approximations_equal_times():
    results_dir = 'experiments/results/approx-heuristic1/'
    figures_dir = 'experiments/figures/'
    file_names = \
      ['wikivote-approx-heuristic-prob_method-2-k_min-0.0010-k_max-0.0100-tau_scale-0.100-samples-5-bfs_samples-1000-init_samples-5-iter_samples-5',\
       'wikivote-approx-heuristic-prob_method-3-k_min-0.0010-k_max-0.0100-tau_scale-0.100-samples-5-bfs_samples-1000-init_samples-5-iter_samples-5',\
      'BA1000_dataset-approx-heuristic-prob_method-2-k_min-0.0010-k_max-0.0100-tau_scale-0.100-samples-5-bfs_samples-1000-init_samples-5-iter_samples-5',\
      'BA1000_dataset-approx-heuristic-prob_method-3-k_min-0.0010-k_max-0.0100-tau_scale-0.100-samples-5-bfs_samples-1000-init_samples-5-iter_samples-5',
      'gnp08-1000-approx-heuristic-prob_method-2-k_min-0.0010-k_max-0.0100-tau_scale-0.100-samples-10-bfs_samples-1000-init_samples-30-iter_samples-30',
      'gnp08-1000-approx-heuristic-prob_method-2-k_min-0.0010-k_max-0.0050-tau_scale-0.100-samples-10-bfs_samples-1000-init_samples-10-iter_samples-10']
    
    titles = ['']*len(file_names)
    approx_errors_apx_d = defaultdict(list)
    approx_errors_seq_d = defaultdict(list)
    for i,fname in enumerate(file_names):
        print "fname: ", fname
        lines = [line.strip().split('\t') for line in open(results_dir + fname, 'r').readlines()]
        for line in lines:
            k_frac, value, apx_value, vanilla_value = float(line[0]), float(line[1]), float(line[2]), float(line[3])
            approx_errors_apx_d[k_frac].append(computeError(value, apx_value))
            approx_errors_seq_d[k_frac].append(computeError(value, vanilla_value))
        k_fracs = approx_errors_apx_d.keys()
        k_fracs.sort()
        errors_apx = [np.mean(approx_errors_apx_d[k_frac]) for k_frac in k_fracs]
        print "errors for apx: ", errors_apx
        sems_apx = [sem(approx_errors_apx_d[k_frac]) for k_frac in k_fracs]
        errors_seq = [np.mean(approx_errors_seq_d[k_frac]) for k_frac in k_fracs]
        print "errors for vanilla: ", errors_seq
        sems_seq = [sem(approx_errors_seq_d[k_frac]) for alpha in k_fracs]
        plot2d(k_fracs,[errors_apx, errors_seq], [sems_apx,sems_seq], [r'INFEST^*', 'Capped MC'], [r'$k/n$','Estimation error'], titles[i] , figures_dir + fname+'.pdf', location="upper left")
    def _add_param_table_row(self, name, data):
        """
        Adds a parameter row to the output table.

        @param name Name of the parameter
        @param data Data values
        """
        data = data[np.isfinite(data)]
        mean = np.mean(data)
        weights = 1.0 / np.abs(np.repeat(mean, data.size) - data)
        weighted_data = data * weights / np.sum(weights)

        # In the event that a fit went wrong and all weights sum to zero
        try:
            weighted_mean = np.average(data, weights=weights)
            weighted_error = stats.sem(weighted_data)
        except ZeroDivisionError:
            weighted_mean = np.nan
            weighted_error = np.nan

        self._output_table.addRow([name,
                                   mean,
                                   np.std(data),
                                   stats.sem(data),
                                   weighted_mean,
                                   weighted_error])
Example #4
0
def cleandat(x, y):
    """
    This function takes a two row data matrix (x,y) and does the following:
    1- sorts the data in ascending form based on the x row.
    2- finds the unique values of x and put the average of corresponding y in the y column
    in the future: 3- adds a third row with standard deviation for multiple data (indication of statistical error)
    """
    # check data form

    # assign variables
    # x = dat[1,:]
    # y = dat[2,:]

    x_sort_idx = np.argsort(x)
    x_srt = x[x_sort_idx]
    y_srt = y[x_sort_idx]

    # finding unique values
    x_uq, x_uq_idx = np.unique(x_srt, return_index=True)

    # the statistic loop
    l = len(x_uq_idx)
    y_out = np.ones(l)
    y_err = np.ones(l)

    for i in np.arange(l - 1):
        y_out[i] = np.mean(y_srt[x_uq_idx[i] : x_uq_idx[i + 1]])
        # y_err[i] = np.std(y_srt[x_uq_idx[i]:x_uq_idx[i+1]])
        y_err[i] = stat.sem(y_srt[x_uq_idx[i] : x_uq_idx[i + 1]])

    y_out[-1] = np.mean(y_srt[x_uq_idx[-1] :])
    # y_err[-1] = np.std(y_srt[x_uq_idx[-1]:])
    y_err[-1] = stat.sem(y_srt[x_uq_idx[-1] :])

    return x_uq, y_out, y_err
Example #5
0
def plot_controller_run(data, trials, epochs, threshold):
    performace_array = []
    for n in range(len(data[0][0])):
        fitness_list = [item[0][n] for item in data]
        pop_list = [item[1][n] for item in data]

        performance = (fitness_list, np.mean(fitness_list), stats.sem(fitness_list)/2, np.std(fitness_list)/2,
                       pop_list, np.mean(pop_list), stats.sem(pop_list)/2, np.std(pop_list)/2)

        performace_array.append(performance)

    y = [item[1] for item in performace_array]
    yerr = [item[2] for item in performace_array]
    x = np.arange(len(y))

    y1 = [item[5] for item in performace_array]
    y1err = [item[6] for item in performace_array]

    f, axarr = plt.subplots(2, sharex=True)

    axarr[0].set_title("Average Performance of Neuro-Evolutionary controller \n "
                       "over n={0} trials, {1:.2f}>theta<{2:.2f} "
                       .format(trials, degrees(threshold[0]), degrees(threshold[1])))
    axarr[0].errorbar(x, y, yerr=yerr, label='Best Fitness')
    axarr[0].legend(loc="upper left", shadow=True, fancybox=True)
    axarr[0].set_ylabel('Fitness')

    axarr[1].errorbar(x, y1, yerr=y1err, label='Avg Pop. Fitness')
    axarr[1].legend(loc="upper left", shadow=True, fancybox=True)
    axarr[1].set_ylabel('Fitness')

    plt.xlabel('Epochs (e={0})'.format(epochs))

    plt.show()
Example #6
0
def widerstand(R,V,A,a):
    print('Einzelwiederstände:')
    for x in range(a):
        R[x] = V[x]/A[x]
        #print(x, ' = ', R[x])
    print('Mittelwert = ', np.mean(R),' +- ', stats.sem(R))
    return ufloat(np.mean(R), stats.sem(R))
Example #7
0
def plot_modulation_depth(arr_early, arr_late, sigma):
	arr_early = ss.zscored_fr(arr_early, sigma).max(axis = 0)
	arr_early = np.nan_to_num(arr_early)
	arr_late = ss.zscored_fr(arr_late, sigma).max(axis = 0)
	arr_late = np.nan_to_num(arr_late)
	if arr_early.size > arr_late.size:
		arr_early = np.random.choice(arr_early, size = arr_late.size, replace = False)
	if arr_late.size > arr_early.size:
		arr_late = np.random.choice(arr_late, size = arr_early.size, replace = False)
	early_sem = stats.sem(arr_early)
	early_mean = arr_early.mean()
	late_sem = stats.sem(arr_late)
	late_mean = arr_late.mean()
	p_val = stats.ttest_rel(arr_early, arr_late)
	print "p val is = " + str(p_val)
	# Pull the formatting out here
	width = 0.8	
	bar_kwargs = {'width':width,'color':'g','linewidth':2,'zorder':5}
	err_kwargs = {'zorder':0,'fmt':None,'lw':2,'ecolor':'k'}	
	means = np.array([early_mean, late_mean])
	errs = np.array([early_sem, late_sem])
	idx = np.arange(2)
	X = idx+width/2	
	labels = ['E1 early', 'E1_late']
	plt.bar(idx, means, alpha = 0.5,**bar_kwargs)
	plt.errorbar(X, means, yerr = errs,**err_kwargs)
	plt.xticks(X, labels)
	plt.ylabel('z-scored modulation depth')
	plt.title('Change in modulation depth from early in session to late in session')
	plt.show()
    def get_data(self):
        directory = self.get_dir()
        shots = self.dic['shots']
        shots.replace(' ', '')  #remove all the spaces
#        keys = " ".join([self.dic['X'], self.dic['Y']])
#        self.data, errmsg, raw_data = qrange.qrange(directory, shots, keys)
        keys = [self.dic['X'], self.dic['Y']]
#	print 'Before qrange.'
        self.data, errmsg, raw_data = qrange.qrange_eval(directory, shots, keys)
#	print 'After qrange.'
        s = ''
        for i in range(self.data.shape[1]):
            col = self.data[:,i]
            s00 = numpy.mean(col)
            s01 = stats.sem(col)
            s02 = numpy.std(col)
            s03 = numpy.max(col) - numpy.min(col)
            s = s + "Mean = %10.6f\n" % s00
            s = s + "Std. deviation  = %10.6f\n" % s02
            s = s + "Std. Error of the mean = %10.6f\n" % s01
            s = s + "Pk-Pk = %10.6f\n" % s03
            s = s+ '\n'
        raw_data = s + raw_data
        self.dic['data_str'] = raw_data

        self.sdata = None
        if self.dic['X'] == "SEQ:shot":
            s = [ numpy.mean(self.data[:,1]), numpy.std(self.data[:,1]), stats.sem(self.data[:,1]),numpy.max(self.data[:,1]) - numpy.min(self.data[:,1]) ]
            a = []
            for val in s:
                a.append( [val for i in range(self.data[:,1].size)])
            self.sdata = numpy.c_[self.data[:,0], numpy.transpose(numpy.array(a))]
        else:
            self.sdata = statdat.statdat(self.data, 0, 1)
        return
Example #9
0
    def _compute_item_pair_diff(self, smoothed_spike_counts):
        data = smoothed_spike_counts[~((smoothed_spike_counts.event.data['isFirst']) & (smoothed_spike_counts.event.data['lag'] == 0))]
        item_names = data.event.data['item_name']

        novel_rep_diffs = []
        mean_item_frs = []
        novel_mean = []
        rep_mean = []

        for this_item in np.unique(item_names):
            data_item = data[item_names == this_item]
            if data_item.shape[0] == 2:
                novel_data_item = data_item[data_item.event.data['isFirst']].values
                rep_data_item = data_item[~data_item.event.data['isFirst']].values
                diff_due_to_cond = novel_data_item - rep_data_item
                novel_rep_diffs.append(diff_due_to_cond)
                novel_mean.append(novel_data_item)
                rep_mean.append(rep_data_item)
                mean_item_frs.append(np.mean(data_item.data))

        novel_mean = np.squeeze(np.stack(novel_mean))
        novel_sem = sem(novel_mean, axis=0)
        novel_trial_means = np.mean(novel_mean, axis=1)
        novel_mean = np.mean(novel_mean, axis=0)

        rep_mean = np.squeeze(np.stack(rep_mean))
        rep_sem = sem(rep_mean, axis=0)
        rep_trial_means = np.mean(rep_mean, axis=1)
        rep_mean = np.mean(rep_mean, axis=0)

        return np.squeeze(np.stack(novel_rep_diffs)), np.stack(mean_item_frs), novel_mean, rep_mean, novel_sem, \
               rep_sem, novel_trial_means, rep_trial_means
Example #10
0
def delP(f,w,m,e,s,force):

    NEMD = read_log_nemd(f,w,m,e,s,force)

    # for now only pick the last 2 nanoseconds
    # should rerun with longer equilibration
    Pleft = NEMD[:,8]
    Pright = NEMD[:,9]

    # plot pressure to check convergence
    fig1 = plt.figure(figsize=(9,7)) 
    ax1  = fig1.add_axes([0.1,0.15,0.8,0.75])
    ax1.plot(NEMD[:,0][::10], (NEMD[:,8][::10]-NEMD[:,9][::10])*1e-1)
    ax1.set_xlabel('Time')
    ax1.set_ylabel('$\Delta$P (MPa)')
    fig1.savefig('PLOTS/PDF/dP_{}_{}_{}_eps{}_s{}_f{}.pdf'.format(f,w,m,e,s,force))
    fig1.clear()


    Pleft_val = np.mean(Pleft)
    Pleft_err = stats.sem(Pleft)

    Pright_val = np.mean(Pright)
    Pright_err = stats.sem(Pright)

    deltaP = (Pright_val - Pleft_val)*1e5
    deltaP_err = 1e5*np.sqrt(Pleft_err**2+Pright_err**2)
    print 'Pressure drop: ', deltaP*1e-6, '+/-', deltaP_err*1e-6, 'MPa.'
    return deltaP, deltaP_err
Example #11
0
    def get_data(self):
        directory = self.get_dir()
        shots = self.setup_dict['shots']
        if type(shots) == type([]):
            shots = ",".join(shots)
        keys = " ".join([self.setup_dict['X'], self.setup_dict['Y']])
        self.data, self.errmsg, self.raw_data = qrange.qrange(directory, shots, keys)
        s = ''
        for i in range(self.data.shape[1]):
            col = self.data[:,i]
            s00 = numpy.mean(col)
            s01 = stats.sem(col)
            s02 = numpy.std(col)
            s03 = numpy.max(col) - numpy.min(col)
            s = s + "Mean = %10.6f\n" % s00
            s = s + "Std. deviation  = %10.6f\n" % s02
            s = s + "Std. Error of the mean = %10.6f\n" % s01
            s = s + "Pk-Pk = %10.6f\n" % s03
            s = s+ '\n'
        self.raw_data = s + self.raw_data

        self.sdata = None
        if self.setup_dict['X'] == "SEQ:shot":
            s = [ numpy.mean(self.data[:,1]), numpy.std(self.data[:,1]), stats.sem(self.data[:,1]),numpy.max(self.data[:,1]) - numpy.min(self.data[:,1]) ]
            a = []
            for val in s:
                a.append( [val for i in range(self.data[:,1].size)])
            self.sdata = numpy.c_[self.data[:,0], numpy.transpose(numpy.array(a))]
        else:
            self.sdata = statdat.statdat(self.data, 0, 1)
Example #12
0
def partition_main(args):
    print(args, file=sys.stderr)
    base_prior = make_base_prior(args.het, GTYPE3) # base genotype prior
    mm,mm0,mm1 = make_mut_matrix(args.mu, GTYPE3) # substitution rate matrix, with non-diagonal set to 0, with diagonal set to 0

    vcffile, variants, DPRs, PLs = read_vcf(args.vcf, args.min_ev)
    n_site,n_smpl = PLs.shape[0:2]

    tree = Tree()
    if sem(PLs[...,1],axis=1).mean() > sem(PLs[...,2],axis=1).mean():
        partition(PLs[...,0:2], tree, np.arange(n_smpl), args.min_ev)
    else:
        partition(PLs, tree, np.arange(n_smpl), args.min_ev)

    init_tree(tree)
    PLs = PLs.astype(np.longdouble)
    populate_tree_PL(tree, PLs, mm, 'PL')
    populate_tree_PL(tree, PLs, mm0, 'PL0')
    calc_mut_likelihoods(tree, mm0, mm1)

    print(tree)
    tree.write(outfile=args.output+'.pt0.nwk', format=5)
    best_tree,best_PL = recursive_NNI(tree, mm0, mm1, base_prior)
    best_tree,best_PL = recursive_reroot(best_tree, mm0, mm1, base_prior)
    print(best_tree)
    print('PL_per_site = %.4f' % (best_PL/n_site))
    best_tree.write(outfile=args.output+'.pt.nwk', format=5)
Example #13
0
def count_totals_to_percents_weighted(count_totals):

    # Here's the return datatype that stores the percentage of occupancy
    # in a given channel/sf state which can be paired with the indices
    ion_count_percents = defaultdict(list)
    ion_count_indices = defaultdict(list)
    for traj_id, count_dict in count_totals.iteritems():
        traj_total_lines = float(sum(count_dict.values()))
        for ion_state, ion_count in count_dict.iteritems():
            ion_count_percents[traj_id].append(ion_count/traj_total_lines)
            ion_count_indices[traj_id].append(ion_state)

    # Append a little statistics, sorry if this is confusing...
    all_weighted_avgs=[]
    weighted_avgs_by_occid=defaultdict(list)
    for traj_id, percents in ion_count_percents.iteritems():
        temp_weighted_avg = 0
        for occ_id, percent in enumerate(percents):
            x = ion_count_indices[traj_id][occ_id]*percent
            temp_weighted_avg += x
            weighted_avgs_by_occid[occ_id].append(x)
        all_weighted_avgs.append(temp_weighted_avg)

    for occ_id, weight_avg in weighted_avgs_by_occid.iteritems():
        ion_count_percents['MEAN'].append(mean(weight_avg))
        ion_count_indices['MEAN'].append(occ_id)
        ion_count_percents['STDERR'].append(sem(weight_avg))
        ion_count_indices['STDERR'].append(occ_id)

    ion_count_percents['MEAN'].append(mean(all_weighted_avgs))
    ion_count_indices['MEAN'].append('ALL')
    ion_count_percents['STDERR'].append(sem(all_weighted_avgs))
    ion_count_indices['STDERR'].append('ALL')

    return (dict(ion_count_percents), dict(ion_count_indices))
Example #14
0
def get_phylo_depth_changes(fluc_levels,fluc_type,data_type):
    
    assert type(fluc_levels)==list
    assert type(fluc_type)==str
    assert fluc_type in ["sync","stag","lowhigh"]
    assert type(data_type)==str
    assert data_type in ["raw","avg"]
    
    for fluc_level in fluc_levels:
        
        fluc_length=int(fluc_level)
        
        if data_type=="avg":
            start_slope_means=[]
            start_slope_se=[]
            end_slope_means=[]
            end_slope_se=[]
        else:
            start_slopes=[[] for i in range(30)]
            end_slopes=[[] for i in range(30)]
            
        for replicate in range(1,31):
            avg_depth_for_updates=[]
            start_inflow_slopes=[]
            end_inflow_slopes=[]
            averages_for_replicate=get_file_lines("../data_"+str(fluc_type)+"_"+str(fluc_level)+"/replicate_"+str(replicate)+"/average.dat")
            for line in averages_for_replicate:
                if len(line)!=0 and line[0]!="#":
                    temp=line.split(" ")
                    update=int(temp[0])
                    if update%fluc_length==0:
                        depth=float(temp[11])
                        avg_depth_for_updates+=[float(depth)]
                        
            for i in range(len(avg_depth_for_updates)-1):
                if i%2==0:
                    start_inflow_slopes+=[math.fabs(avg_depth_for_updates[i]-avg_depth_for_updates[i+1])]
                else:
                    end_inflow_slopes+=[math.fabs(avg_depth_for_updates[i]-avg_depth_for_updates[i+1])]
                    
            if data_type=="avg":
                start_slope_means+=[stats.nanmean(start_inflow_slopes)]
                start_slope_se+=[stats.sem(start_inflow_slopes)]
                end_slope_means+=[stats.nanmean(end_inflow_slopes)]
                end_slope_se+=[stats.sem(end_inflow_slopes)]
            else:
                start_slopes[replicate-1]=list(start_inflow_slopes)
                end_slopes[replicate-1]=list(end_inflow_slopes)
                
        if data_type=="avg":
            pickle.dump(start_slope_means,open("../plot_data/start_slope_mean_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
            pickle.dump(end_slope_means,open("../plot_data/end_slope_mean_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
            pickle.dump(start_slope_se,open("../plot_data/start_slope_se_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
            pickle.dump(end_slope_se,open("../plot_data/end_slope_se_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
        else:
            pickle.dump(start_slopes,open("../plot_data/start_slope_raw_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
            pickle.dump(end_slopes,open("../plot_data/end_slope_raw_"+str(fluc_type)+"_"+str(fluc_level)+".data","wb"))
            
    return "success"
Example #15
0
def plot_avg_and_sem(npArray, axis=1):
    mean = npArray.mean(axis=axis)
    sem_plus = mean + stats.sem(npArray, axis=axis)
    sem_minus = mean - stats.sem(npArray, axis=axis)
    
    plt.figure()
    plt.fill_between(np.arange(mean.shape[0]), sem_plus, sem_minus, alpha=0.5)
    plt.plot(mean)
Example #16
0
def semFinite(data,N):
    'Standard error of the mean with finite population correction'
    # print(N)
    # print(len(data))
    if len(data) < 0.05*N:
        return sem(data)
    else:
        return sem(data)*((N-len(data))/(N-1))**0.5
Example #17
0
def plot_cd_data(pre_arr, peri_arr, post_arr):
	
	# Custom function to draw the p-value bars
	def label_diff(i,j,text,X,Y):
		x = (X[i]+X[j])/2 ##center of the p-val bar
		y = max(Y[i], Y[j])
		
		props = {'connectionstyle':'bar','arrowstyle':'-',\
					 'shrinkA':20,'shrinkB':20,'lw':2}
		ax.annotate(text, xy=(x,y+0.1), zorder=10)
		ax.annotate('', xy=(X[i],y), xytext=(X[j],y), arrowprops=props)

	##create a numpy array containing the mean vals for the bar chart
	means = np.array([pre_arr.mean(), peri_arr.mean(), post_arr.mean()])
	##get the standard error values
	errs = np.array([stats.sem(pre_arr), stats.sem(peri_arr), stats.sem(post_arr)])
	##calculate the p-values between each of the sets
	p_pre_peri = np.round(stats.ttest_rel(pre_arr, peri_arr)[1], 3)
	p_pre_post = np.round(stats.ttest_rel(pre_arr, post_arr)[1], 3)
	p_peri_post = np.round(stats.ttest_rel(peri_arr, post_arr)[1], 3)
	##put all the arrays into one big array to plot the
	##individual lines
	all_arr = np.zeros((3,pre_arr.size))
	all_arr[0,:] = pre_arr
	all_arr[1,:] = peri_arr
	all_arr[2,:] = post_arr

	##formatting stuff
	idx  = np.arange(3)    # the x locations for the groups
	width= 0.8
	labels = ('Pre', 'CD', 'Reinstatement')

	# Pull the formatting out here
	bar_kwargs = {'width':width,'color':'g','linewidth':2,'zorder':5}
	err_kwargs = {'zorder':0,'fmt':None,'lw':2,'ecolor':'k'}

	X = idx+width/2 ##position of the center of the bars

	fig, ax = plt.subplots()
	ax.p1 = plt.bar(idx, means, alpha = 0.5, **bar_kwargs)
	ax.errs = plt.errorbar(X, means, yerr=errs, **err_kwargs)

	##plot the individual lines on their own axis
	ax2 = ax.twinx()
	ax2.lines = plt.plot(np.linspace(0,3,3), all_arr)
	ax2.set_ylabel("Percent correct")


	# Call the function
	label_diff(0,1,'p='+str(p_pre_peri),X,means)
	label_diff(0,2,'p='+str(p_pre_post),X,means)
	label_diff(1,2,'p='+str(p_peri_post),X,means)

	ax.set_ylim(ymax=means.max()+0.3)
	plt.xticks(X, labels, color='k')
	plt.title("Performance during contingency degredation")
	ax.set_ylabel("Percent correct")
	plt.show()
Example #18
0
def avg_scores_plot_multi(fit_results_no_CV, fit_results_CV, predictor_variable, CV_order):
    n_array_no_CV = []
    r2_array_no_CV = []
    rmse_array_no_CV = []
    n_array_CV = []
    r2_array_CV = []
    rmse_array_CV = []

    for key in fit_results_no_CV:
        value = fit_results_no_CV[key]
        i = 0
        sum_r2 = 0
        sum_rmse = 0
        while i < len(value):
            sum_r2 = float(sum_r2) + value[i][1]
            sum_rmse = float(sum_rmse) + value[i][2]
            i = i + 1
        avg_r2 = sum_r2/len(value)
        avg_rmse = sum_rmse/len(value)
        n_array_no_CV.append(key)
        r2_array_no_CV.append(avg_r2)
        rmse_array_no_CV.append(avg_rmse)
        print 'For fit with All Data: For n = ' + str(key) + ': Average R^2 = ' + str(avg_r2) + ', Average RMSE = ' + str(avg_rmse)

    for key in fit_results_CV:
        value = fit_results_CV[key]
        i = 0
        sum_r2 = 0
        sum_rmse = 0
        while i < len(value):
            sum_r2 = float(sum_r2) + value[i][1]
            sum_rmse = float(sum_rmse) + value[i][2]
            i = i + 1
        avg_r2 = sum_r2/len(value)
        avg_rmse = sum_rmse/len(value)
        n_array_CV.append(key)
        r2_array_CV.append(avg_r2)
        rmse_array_CV.append(avg_rmse)
        print 'For fit with CV: For n = ' + str(key) + ': Average R^2 = ' + str(avg_r2) + ', Average RMSE = ' + str(avg_rmse)

    #Plot Average Values of each against one another
    f, ax = plt.subplots()
    y_error_CV=stats.sem(rmse_array_CV, axis=None, ddof=4)
    y_error_no_CV = stats.sem(rmse_array_no_CV, axis=None, ddof=4)
    ax.errorbar(n_array_CV, rmse_array_CV, yerr=y_error_CV, fmt='o', color='r', label='Fit with '+str(CV_order)+'-fold Cross Validation')
    ax.errorbar(n_array_no_CV, rmse_array_no_CV, yerr=y_error_no_CV, fmt='o', color='blue', label='Fit on All Data')
    ax.set_ylabel('Average RMSE Value')
    ax.xaxis.grid()
    ax.yaxis.grid()
    miny = min(rmse_array_no_CV)-500
    maxy = max(rmse_array_no_CV)+500
    plt.ylim([miny, maxy])
    plt.xlim([0, 6])
    plt.xlabel('Polynomial Fit Order')
    plt.title('Avg. RMSE in ' + str(CV_order)+ '-fold Cross Validation and Polynomial Fit with All Data of ' + str(predictor_variable) + ' vs No. 311 Incidents')
    plt.legend()
    plt.show()
Example #19
0
def runQLearningTest():
    runs = 1500
    epochs = 500
    _epsilon=0.1
    _alpha=0.5
    _gamma=0.9
    
    
    bandit = Explorer()
   
    fig, axs = plt.subplots(nrows=1, ncols=2, sharex=True)
    x = np.arange(epochs)
    
    ax = axs[0]   
    accumulator = []
    errors = []
    y = []
    for i in range(runs):
        start = dt.datetime.now()
        accumulator.append(bandit.findPath(epochs, 0, _alpha, _gamma))
        print('Run 10-0 #' + str(i) + ' took ' + str((dt.datetime.now() - start).total_seconds()) + ' seconds') 

    accumulator2 = np.array(accumulator)
    for i in range(len(accumulator[0])):
        errors.append(stats.sem(accumulator2[: ,i], axis=None, ddof=0))
        y.append(np.mean(accumulator2[: ,i]))
    ax.plot(x, y, 'b', label='Greedy', linewidth=2)
    ax.errorbar(x, y, yerr=errors, ecolor='g') #, fmt='o')
    ax.legend(loc='upper left', shadow=True)
    ax.set_ylabel('Reward')
    ax.set_xlabel('Steps {0}, alpha {1}, gamma {2}'.format(epochs, _alpha, _gamma))
    #--------------------------------------------
    
    ax = axs[1]   
    accumulator = []
    errors = []
    y = []
    for i in range(runs):
        start = dt.datetime.now()
        accumulator.append(bandit.findPath(epochs, _epsilon, _alpha, _gamma))
        print('Run 10-0 #' + str(i) + ' took ' + str((dt.datetime.now() - start).total_seconds()) + ' seconds') 

    accumulator2 = np.array(accumulator)
    for i in range(len(accumulator[0])):
        errors.append(stats.sem(accumulator2[: ,i]/2, axis=None, ddof=0))
        y.append(np.mean(accumulator2[: ,i]))
    
    ax.plot(x, y, 'r--', label='e-Greedy', linewidth=2)
    ax.errorbar(x, y, yerr=errors, ecolor='r') #, fmt='o')
    ax.legend(loc='upper left', shadow=True)
    ax.set_xlabel('Steps {0}, alpha {1}, gamma {2}'.format(epochs, _alpha, _gamma))
    
    _title = 'Q-Learning - Greedy vs e-Greedy (e = {0} - {1} Runs)'.format(_epsilon, runs)
    fig.suptitle(_title)


    plt.show()
def make_plot_fig2(sim_results):
    rms_lc = sim_results[0, 2]
    lam_lc = sim_results[0, 0]
    rms_cv = sim_results[1, 2]
    lam_cv = sim_results[1, 0]
    
    fig = plt.figure(figsize = (12,12), dpi = 300)
    widths = [10]
    heights = [1, 1]
    gs = gridspec.GridSpec(2, 1, height_ratios=heights, width_ratios=widths,
                           hspace=0.45, wspace=0.3)
    ax1 = plt.subplot(gs[0])
    if np.min(rms_cv) < np.min(rms_lc):
        trans = np.min(np.mean(rms_cv, axis=0))
    else:
        trans = np.min(np.mean(rms_lc, axis=0))
    mn_rms = np.mean(rms_lc, axis=0) - trans
    st_rms = st.sem(rms_lc, axis=0)
    plt.plot(noise_lvl, mn_rms, marker = 'o', color = 'blue', label = 'L-curve')
    plt.fill_between(noise_lvl, mn_rms - st_rms, 
                     mn_rms + st_rms, alpha = 0.3, color = 'blue')
    mn_rms = np.mean(rms_cv, axis=0) - trans
    st_rms = st.sem(rms_cv, axis=0)
    plt.plot(noise_lvl, mn_rms, marker = 'o', color = 'green', label = 'cross-validation')
    plt.fill_between(noise_lvl, mn_rms - st_rms, 
                    mn_rms + st_rms, alpha = 0.3, color = 'green')
    plt.ylabel('Estimation error')
    plt.xlabel('Relative noise level')
    ax1.spines['right'].set_visible(False)
    ax1.spines['top'].set_visible(False)
    set_axis(ax1, -0.05, 1.05, letter='A')
    ht, lh = ax1.get_legend_handles_labels()
    fig.legend(ht, lh, loc='center', ncol=2, frameon=False)
    
    '''second plot'''
    ax2 = plt.subplot(gs[1])
    mn_lam = np.mean(lam_lc, axis=0)
    st_lam = st.sem(lam_lc, axis=0)
    plt.plot(noise_lvl, mn_lam, marker = 'o', color = 'blue', label = 'L-curve')
    plt.fill_between(noise_lvl, mn_lam - st_lam,
                    mn_lam + st_lam, alpha = 0.3, color = 'blue')
    mn_lam = np.mean(lam_cv, axis=0)
    st_lam = st.sem(lam_cv, axis=0)
    plt.plot(noise_lvl, mn_lam, marker = 'o', color = 'green', label = 'cross-validation')
    plt.fill_between(noise_lvl, mn_lam - st_lam,
                    mn_lam + st_lam, alpha = 0.3, color = 'green')
#    ax2.set_yscale('log')
    ax2.ticklabel_format(style='sci', axis='y', scilimits=((0.0, 0.0)))
    plt.ylabel('Lambda')
    plt.xlabel('Relative noise level')
    set_axis(ax2, -0.05, 1.05, letter='B')
    ht, lh = ax2.get_legend_handles_labels()
    fig.legend(ht, lh, loc='lower center', ncol=2, frameon=False)
    ax2.spines['right'].set_visible(False)
    ax2.spines['top'].set_visible(False)
    fig.savefig('stats.jpg')
Example #21
0
 def get_stats_all(dates, data, ydate):
     ix = []; iy = []
     for i, date in enumerate(dates):
         if date == ydate: iy.append(i)
         else: ix.append(i)
     x0 = data[:,ix]; x = x0.mean(axis=1)
     y0 = data[:,iy]; y = y0.mean(axis=1)
     xe = st.sem(x0, axis=1)
     ye = st.sem(y0, axis=1)
     return x, y, xe, ye, x0, y0
Example #22
0
def plot_fr_means(arrs1, arrs2, chunk1 = (0,10), chunk2 = (35,45), n = None):

	##grab the specified chunks
	arrs1_early = arrs1[:,chunk1[0]*60*1000:chunk1[1]*60*1000]
	arrs1_late = arrs1[:,chunk2[0]*60*1000:chunk2[1]*60*1000]
	arrs2_early = arrs2[:,chunk1[0]*60*1000:chunk1[1]*60*1000]
	arrs2_late = arrs2[:,chunk2[0]*60*1000:chunk2[1]*60*1000]
	##calculate the means across all the arrays
	means =np.array([arrs1_early.mean(), 
		arrs2_early.mean(), arrs1_late.mean(), 
		arrs2_late.mean()])*1000
	##get the across session means
	m_arrs1_early = arrs1_early.mean(axis = 1)*1000
	m_arrs2_early = arrs2_early.mean(axis = 1)*1000
	m_arrs1_late = arrs1_late.mean(axis = 1)*1000
	m_arrs2_late = arrs2_late.mean(axis = 1)*1000
	##get an array of SEM mesurements for the error bars
	errs = np.array([stats.sem(m_arrs1_early,axis = None), 
		stats.sem(m_arrs2_early,axis = None),
		stats.sem(m_arrs1_late,axis = None), 
		stats.sem(m_arrs2_late, axis = None)])
	##calculate the t-tests
	p_e1s = stats.ttest_rel(m_arrs1_early, m_arrs1_late)
	p_e2s = stats.ttest_rel(m_arrs2_early, m_arrs2_late)
	p_e12_early = stats.ttest_rel(m_arrs1_early, m_arrs2_early)
	p_e12_late = stats.ttest_rel(m_arrs1_late, m_arrs2_late)
	##print the ttest results
	print "p_e1s = " + str(p_e1s)
	print "p_e2s = " + str(p_e2s)
	print "p_e12_early = " + str(p_e12_early)
	print "p_e12_late = " + str(p_e12_late)
	##plot the bar graph
	##formatting stuff
	idx  = np.arange(4)    # the x locations for the groups
	width= 0.8
	labels = ('E1 early', 'E2_early', 'E1_late', 'E2_late')

	# Pull the formatting out here
	bar_kwargs = {'width':width,'color':'g','linewidth':2,'zorder':5}
	err_kwargs = {'zorder':0,'fmt':None,'lw':2,'ecolor':'k'}

	X = idx+width/2 ##position of the center of the bars

	fig, ax = plt.subplots()
	ax.p1 = plt.bar(idx, means, alpha = 0.5, **bar_kwargs)
	ax.errs = plt.errorbar(X, means, yerr=errs, **err_kwargs)

	ax.set_ylim(ymax=means.max()+means.max()/6.0)
	plt.xticks(X, labels, color='k')
	plt.title("Average firing rate within sessions")
	ax.set_ylabel("FR (Hz)")
	if n is not None:
		plt.text(0.2, means.max()+means.max()/10, "n= "+str(n)+" sessions")

	plt.show()
Example #23
0
def calculate_tuning_curve_inputs(spikeTimeStamps, eventOnsetTimes, firstSort, secondSort, timeRange, baseRange=[-1.1,-0.1], errorType = 'sem', info='full'):
    fullTimeRange = [min(min(timeRange),min(baseRange)), max(max(timeRange),max(baseRange))]
    
    numFirst = np.unique(firstSort)
    numSec = np.unique(secondSort)
    duration = timeRange[1]-timeRange[0]
    spikeArray = np.zeros((len(numFirst), len(numSec)))
    errorArray = np.zeros_like(spikeArray)
    trialsEachCond = behavioranalysis.find_trials_each_combination(firstSort, 
                                                                   numFirst, 
                                                                   secondSort, 
                                                                   numSec)
    spikeTimesFromEventOnset, trialIndexForEachSpike, indexLimitsEachTrial = spikesanalysis.eventlocked_spiketimes(
                                                                                                        spikeTimeStamps, 
                                                                                                        eventOnsetTimes,
                                                                                                        fullTimeRange)
    spikeCountMat = spikesanalysis.spiketimes_to_spikecounts(spikeTimesFromEventOnset, indexLimitsEachTrial, timeRange)
    baseSpikeCountMat = spikesanalysis.spiketimes_to_spikecounts(spikeTimesFromEventOnset, indexLimitsEachTrial, baseRange)
    baselineSpikeRate = np.mean(baseSpikeCountMat)/(baseRange[1]-baseRange[0])
    if errorType == 'sem':
        baselineError = stats.sem(baseSpikeCountMat)/(baseRange[1]-baseRange[0])
    elif errorType == 'std':
        baselineError = np.std(baseSpikeCountMat)/(baseRange[1]-baseRange[0])
    
    for sec in range(len(numSec)):
        trialsThisSec = trialsEachCond[:,:,sec]
        for first in range(len(numFirst)):
            trialsThisFirst = trialsThisSec[:,first]
            if spikeCountMat.shape[0] != len(trialsThisFirst):
                spikeCountMat = spikeCountMat[:-1,:]
            if any(trialsThisFirst):
                thisFirstCounts = spikeCountMat[trialsThisFirst].flatten()
                spikeArray[first,sec] = np.mean(thisFirstCounts)/duration
                if errorType == 'sem':
                    errorArray[first,sec] = stats.sem(thisFirstCounts)/duration
                elif errorType == 'std':
                    errorArray[first,sec] = np.std(thisFirstCounts)/duration
            else:
                spikeArray[first,sec] = np.nan
                errorArray[first,sec] = np.nan
    if info=='full':
        tuningDict = {'responseArray':spikeArray,
                      'errorArray':errorArray,
                      'baselineSpikeRate':baselineSpikeRate,
                      'baselineSpikeError':baselineError,
                      'spikeCountMat':spikeCountMat,
                      'trialsEachCond':trialsEachCond}
    elif info=='plotting':
        tuningDict = {'responseArray':spikeArray,
                      'errorArray':errorArray,
                      'baselineSpikeRate':baselineSpikeRate,
                      'baselineSpikeError':baselineError}
    else:
        raise NameError('That is not an info type you degenerate')
    return tuningDict
Example #24
0
def AnalyzeResultInfo(modelPredicts, testObs, reflectObs) :
    print "FULL SET"
    sumInfo = DoSummaryInfo(testObs, modelPredicts)
    print "RMSE: %8.4f   %8.4f" % (numpy.mean(sumInfo['rmse']), ss.sem(sumInfo['rmse']))
    print "MAE : %8.4f   %8.4f" % (numpy.mean(sumInfo['mae']), ss.sem(sumInfo['mae']))
    print "CORR: %8.4f   %8.4f" % (numpy.mean(sumInfo['corr']), ss.sem(sumInfo['corr']))




    print "\nZ < 40"
    belowCondition = reflectObs < 40
    belowSumInfo = DoSummaryInfo(numpy.where(belowCondition, testObs, numpy.NaN),
				 numpy.where(belowCondition, modelPredicts, numpy.NaN))
    print "RMSE: %8.4f   %8.4f" % (numpy.mean(belowSumInfo['rmse']), ss.sem(belowSumInfo['rmse']))
    print "MAE : %8.4f   %8.4f" % (numpy.mean(belowSumInfo['mae']), ss.sem(belowSumInfo['mae']))
    print "CORR: %8.4f   %8.4f" % (numpy.mean(belowSumInfo['corr']), ss.sem(belowSumInfo['corr']))


    
    print "\nZ >= 40"
    aboveSumInfo = DoSummaryInfo(numpy.where(belowCondition, numpy.NaN, testObs),
				 numpy.where(belowCondition, numpy.NaN, modelPredicts))
    print "RMSE: %8.4f   %8.4f" % (numpy.mean(aboveSumInfo['rmse']), ss.sem(aboveSumInfo['rmse']))
    print "MAE : %8.4f   %8.4f" % (numpy.mean(aboveSumInfo['mae']), ss.sem(aboveSumInfo['mae']))
    print "CORR: %8.4f   %8.4f" % (numpy.mean(aboveSumInfo['corr']), ss.sem(aboveSumInfo['corr']))
def plot_proportions(us, shortcuts, novels, savepath, savefig=True):
    """Plots proportion of each trajectory taken. Behavior only.

        Parameters
        ----------
        us : list of floats
            Proportion along the u trajectory for each session.
            len(us) == num_sessions evaluated
        shortcuts : list of floats
            Proportion along the shortcut trajectory for each session.
            len(shortcut) == num_sessions evaluated
        novels : list of floats
            Proportion along the novel trajectory for each session.
            len(novel) == num_sessions evaluated
        savepath : str
            Location and filename for the saved plot.
        savefig : boolean
            Default is True and will save the plot to the specified location. False
            shows with plot without saving it.

        """
    all_us = np.mean(us)
    us_sem = stats.sem(us)
    all_shortcuts = np.mean(shortcuts)
    shortcuts_sem = stats.sem(shortcuts)
    all_novels = np.mean(novels)
    novels_sem = stats.sem(novels)

    n_groups = list(range(3))

    colour = ['#5975a4', '#5f9e6e', '#b55d5f']

    data = [all_us, all_shortcuts, all_novels]
    sems = [us_sem, shortcuts_sem, novels_sem]

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for i in list(range(len(data))):
        ax.bar(n_groups[i], data[i], align='center',
               yerr=sems[i], color=colour[i], ecolor='#525252')

    plt.xlabel('(sessions=' + str(len(us)) + ')')
    plt.ylabel('Proportion of trials')
    sns.despine()
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')
    plt.xticks(n_groups, ['U', 'Shortcut', 'Novel'])

    # plt.tight_layout()
    if savefig:
        plt.savefig(savepath, dpi=300, bbox_inches='tight')
        plt.close()
    else:
        plt.show()
def _sta_by_event_cond(spike_rel_times, phase_bin_start, phase_bin_stop, sta_buffer, eeg, filtered_eeg, events,
                       h_file=None):
    valid_samps = np.where((eeg.time > phase_bin_start) & (eeg.time < phase_bin_stop))[0]
    nsamples = int(np.ceil(float(eeg['samplerate']) * sta_buffer))

    # throw out novel items that were never repeated
    good_events = events[~((events['isFirst']) & (events['lag'] == 0))].index.values

    # loop over each event
    stas = []
    stas_filt = []
    is_novel = []
    for (index, e), spikes, eeg_data_event, eeg_filt_data_event in zip(events.iterrows(), spike_rel_times, eeg, filtered_eeg):
        if index in good_events:
            if len(spikes) > 0:
                valid_spikes = spikes[np.in1d(spikes, valid_samps)]
                if len(valid_spikes) > 0:
                    for this_spike in valid_spikes:
                        stas.append(eeg_data_event[this_spike - nsamples:this_spike + nsamples].data)
                        stas_filt.append(eeg_filt_data_event[this_spike - nsamples:this_spike + nsamples].data)
                        is_novel.append(e.isFirst)
    is_novel = np.array(is_novel)

    # sta by condition for raw eeg
    if len(stas) > 0:
        stas = np.stack(stas)
        novel_sta_mean = stas[is_novel].mean(axis=0)
        novel_sta_sem = sem(stas[is_novel], axis=0)
        rep_sta_mean = stas[~is_novel].mean(axis=0)
        rep_sta_sem = sem(stas[~is_novel], axis=0)
        sta_time = np.linspace(-sta_buffer, sta_buffer, novel_sta_mean.shape[0])

        # sta by condition for filtered eeg
        stas_filt = np.stack(stas_filt)
        novel_sta_filt_mean = stas_filt[is_novel].mean(axis=0)
        novel_sta_filt_sem = sem(stas_filt[is_novel], axis=0)
        rep_sta_filt_mean = stas_filt[~is_novel].mean(axis=0)
        rep_sta_filt_sem = sem(stas_filt[~is_novel], axis=0)

        if h_file is not None:
            add_to_hd5f_file(h_file, 'novel_sta_mean', novel_sta_mean)
            add_to_hd5f_file(h_file, 'novel_sta_sem', novel_sta_sem)
            add_to_hd5f_file(h_file, 'rep_sta_mean', rep_sta_mean)
            add_to_hd5f_file(h_file, 'rep_sta_sem', rep_sta_sem)

            add_to_hd5f_file(h_file, 'novel_sta_filt_mean', novel_sta_filt_mean)
            add_to_hd5f_file(h_file, 'novel_sta_filt_sem', novel_sta_filt_sem)
            add_to_hd5f_file(h_file, 'rep_sta_filt_mean', rep_sta_filt_mean)
            add_to_hd5f_file(h_file, 'rep_sta_filt_sem', rep_sta_filt_sem)

            add_to_hd5f_file(h_file, 'sta_time', sta_time)
        else:
            return novel_sta_mean, novel_sta_sem, rep_sta_mean, rep_sta_sem, novel_sta_filt_mean, novel_sta_filt_sem, \
                   rep_sta_filt_mean, rep_sta_filt_sem, sta_time
def post_process():
    pattern = re.compile('lls_([0-9]+)_[0-9]+\.npz')
    data_dir = 'data'
    files = [join(data_dir,f)  
        for f in listdir(data_dir) 
        if isfile(join(data_dir,f)) and re.match(pattern, f)]
    all_lls = [np.load(f)['lls'] for f in files]
    all_lls = np.array(all_lls)
    avg_lls = np.average(all_lls, axis=0)
    lls_sem = 1.96*stats.sem(all_lls)
    x = np.arange(all_lls.shape[1])
    plt.plot(x, avg_lls, linestyle='-.', color='b', label='PB_ungibbs')
    plt.fill_between(x, avg_lls-lls_sem, avg_lls+lls_sem, color='b', alpha=0.3)    
    plt.xlabel('Iterations')
    plt.ylabel('Log Likelihood')
    plt.title('Average Log Likelihood and 95% confidence interval (normal)')      
    
    '''
    pattern2 = re.compile('lls_amcmc_([0-9]+)_[0-9]+\.npz')
    files = [join(data_dir,f)  
        for f in listdir(data_dir) 
        if isfile(join(data_dir,f)) and re.match(pattern2, f)]
    all_lls = [np.load(f)['lls'] for f in files]
    all_lls = np.array(all_lls)
    avg_lls = np.average(all_lls, axis=0)
    lls_sem = 1.96*stats.sem(all_lls)
    x = np.arange(all_lls.shape[1])
    plt.plot(x, avg_lls, linestyle='-.', color='g', label='PB_amcmc')
    plt.fill_between(x, avg_lls-lls_sem, avg_lls+lls_sem, color='g', alpha=0.3)
    '''
    
    # Gibbs ll
    gibbs_lls = np.array(pd.read_csv(join(data_dir, 'gibbs_ll'), 
        sep=' ', header=None)).T
    avg_gibbs_lls = np.average(gibbs_lls, axis=0)
    gibbs_lls_sem = 1.96*stats.sem(gibbs_lls) 
    x_gibbs = np.arange(gibbs_lls.shape[1])   
    plt.plot(x_gibbs, avg_gibbs_lls, linestyle='-', color='r', label='Collapsed Gibbs')
    plt.fill_between(x_gibbs, avg_gibbs_lls-gibbs_lls_sem, 
        avg_gibbs_lls+gibbs_lls_sem, color='r', alpha=0.3)
    
    # VEM ll
    #vem_lls = np.array(pd.read_csv(join(data_dir, 'vem_ll'), 
    #    sep=' ', header=None)).T
    #avg_vem_lls = np.average(vem_lls, axis=0)
    #vem_lls_sem = 1.96*stats.sem(vem_lls)
    #x_vem = np.arange(vem_lls.shape[1])    
    #plt.plot(x_vem, avg_vem_lls, label='VEM')
    #plt.fill_between(x_vem, avg_vem_lls-vem_lls_sem, 
    #    avg_vem_lls+vem_lls_sem, alpha=0.3)
    
    # save plot
    plt.legend(loc='lower right')
    plt.savefig('data/lls.pdf', format='pdf') 
Example #28
0
def plot_avg_and_sem(npArray, axis=1):
    """This routine takes a multidimenionsal numpy array and an axis and then
    plots the average over that axis on top of a band that represents the standard
    error of the mean.
    """
    mean = npArray.mean(axis=axis)
    sem_plus = mean + stats.sem(npArray, axis=axis)
    sem_minus = mean - stats.sem(npArray, axis=axis)
    
    plt.figure()
    plt.fill_between(np.arange(mean.shape[0]), sem_plus, sem_minus, alpha=0.5)
    plt.plot(mean)
def display_grid_scores(grid_scores, top=None):
    """Helper function to format a report on a grid of scores"""

    grid_scores = sorted(grid_scores, key=lambda x: x[1], reverse=True)
    if top is not None:
        grid_scores = grid_scores[:top]
    _, best_mean, best_scores = grid_scores[0]
    threshold = best_mean - 2 * sem(best_scores)

    for params, mean_score, scores in grid_scores:
        append_star = mean_score + 2 * sem(scores) > threshold
        print(display_scores(params, scores, append_star=append_star))
def dump_pre_post_stim_firing_rate(ffname, outprefix, window=10e-3):
    """Dump mean, median and standard deviation in population spike before and after stimulus.
    """
    dbcnt_flist_dict = get_dbcnt_dict(ffname)
    celltype_data_dict = defaultdict(list)
    for dbcnt, flist in dbcnt_flist_dict.items():
        for fname in flist:
            data = TraubData(makepath(fname))
            bgtimes, probetimes = get_stim_times(data, correct_tcr=True)
            times = np.concatenate((bgtimes, probetimes))
            times.sort()
            spiketrains = defaultdict(list)
            for cell, train in data.spikes.items():
                celltype = cell.partition('_')[0]
                spiketrains[celltype].append(train)
            for celltype, trains in spiketrains.items():
                popspikes = np.concatenate(trains)
                popspikes.sort()
                pre = []
                post = []
                for t in times:
                    npre = np.flatnonzero((popspikes <= t) & (popspikes > (t - window/2))).shape[0]
                    pre.append(npre / (data.cellcounts._asdict()[celltype] * window / 2.0))
                    npost = np.flatnonzero((popspikes > t) & (popspikes < (t + window/2))).shape[0]
                    post.append(npost / (data.cellcounts._asdict()[celltype] * window / 2.0))
                dstats = {
                    'filename': fname,
                    'dbcount': dbcnt,
                    'premean': np.mean(pre),
                    'premedian': np.median(pre),
                    'prestd': np.std(pre),
                    'presem': stats.sem(pre),
                    'postmean': np.mean(post),
                    'postmedian': np.median(post),
                    'poststd': np.std(post),
                    'postsem': stats.sem(post),
                    'nstim': len(times)}
                celltype_data_dict[celltype].append(dstats)
    for celltype, datalist in celltype_data_dict.items():
        df = pd.DataFrame(datalist, columns=['filename',
                    'dbcount',
                    'premean',
                    'premedian',
                    'prestd',
                    'presem',
                    'postmean',
                    'postmedian',
                    'poststd',
                    'postsem',
                    'nstim'])
        outfile = '{}_prepost_rates_{}_{}ms_window.csv'.format(outprefix, celltype, window*1e3)
        df.to_csv(outfile)
def bin_and_mean(xdata,
                 ydata,
                 bins=10,
                 distribution='normal',
                 show_fig=True,
                 fig=None,
                 ax=None,
                 figsize=None,
                 dpi=100,
                 show_bins=True,
                 raw_data_label='raw data',
                 mean_data_label='average',
                 xlabel=None,
                 ylabel=None,
                 logx=False,
                 logy=False,
                 grid_on=True,
                 error_bounds=True,
                 err_bound_type='shade',
                 legend_on=True,
                 subsamp_thres=None,
                 show_stats=True,
                 show_SE=False,
                 err_bound_shade_opacity=0.5):
    '''
    Calculate the "bin-and-mean" results and optionally show the "bin-and-mean"
    plot.

    A "bin-and-mean" plot is a more salient way to show the dependency of
    ``ydata`` on ``xdata``. The data points (``xdata``, ``ydata``) are divided
    into different bins according to the values in ``xdata`` (via ``bins``),
    and within each bin, the mean values of x and y are calculated, and treated
    as the representative x and y values.

    "Bin-and-mean" is preferred when data points are highly skewed (e.g.,
    a lot of data points for when x is small, but very few for large x). The
    data points when x is large are usually not noises, and could be even more
    valuable (think of the case where x is earthquake magnitude and y is the
    related economic loss). If we want to study the relationship between
    economic loss and earthquake magnitude, we need to bin-and-mean raw data
    and draw conclusions from the mean data points.

    The theory that enables this method is the assumption that the data points
    with similar x values follow the same distribution. Naively, we assume the
    data points are normally distributed, then y_mean is the arithmetic mean of
    the data points within a bin. We also often assume the data points follow
    log-normal distribution (if we want to assert that y values are all
    positive), then y_mean is the expected value of the log-normal distribution,
    while x_mean for any bins are still just the arithmetic mean.

    Notes:
      (1) For log-normal distribution, the expective value of y is:
                    E(Y) = exp(mu + (1/2)*sigma^2)
          and the variance is:
                 Var(Y) = [exp(sigma^2) - 1] * exp(2*mu + sigma^2)
          where mu and sigma are the two parameters of the distribution.
      (2) Knowing E(Y) and Var(Y), mu and sigma can be back-calculated::

                              ___________________
             mu = ln[ E(Y) / V 1 + Var(Y)/E^2(Y)  ]

                      _________________________
             sigma = V ln[ 1 + Var(Y)/E^2(Y) ]

          (Reference: https://en.wikipedia.org/wiki/Log-normal_distribution)

    Parameters
    ----------
    xdata : list, numpy.ndarray, or pandas.Series
        X data.
    ydata : list, numpy.ndarray, or pandas.Series
        Y data.
    bins : int, list, numpy.ndarray, or pandas.Series
        Number of bins (an integer), or an array representing the actual bin
        edges. If ``bins`` means bin edges, the edges are inclusive on the
        lower bound, e.g., a value 2 shall fall into the bin [2, 3), but not
        the bin [1, 2). Note that the binning is done according to the X values.
    distribution : {'normal', 'lognormal'}
        Specifies which distribution the Y values within a bin follow. Use
        'lognormal' if you want to assert all positive Y values. Only supports
        normal and log-normal distributions at this time.
    show_fig : bool
        Whether or not to show a bin-and-mean plot.
    fig : matplotlib.figure.Figure or ``None``
        Figure object. If None, a new figure will be created.
    ax : matplotlib.axes._subplots.AxesSubplot or ``None``
        Axes object. If None, a new axes will be created.
    figsize: (float, float)
        Figure size in inches, as a tuple of two numbers. The figure
        size of ``fig`` (if not ``None``) will override this parameter.
    dpi : float
        Figure resolution. The dpi of ``fig`` (if not ``None``) will override
        this parameter.
    show_bins : bool
        Whether or not to show the bin edges as vertical lines on the plots.
    raw_data_label : str
        The label name of the raw data to be shown in the legend (such as
        "raw data"). It has no effects if ``show_legend`` is ``False``.
    mean_data_label : str
        The label name of the mean data to be shown in the legend (such as
        "averaged data"). It has no effects if ``show_legend`` is ``False``.
    xlabel : str or ``None``
        X axis label. If ``None`` and ``xdata`` is a pandas Series, use
        ``xdata``'s "name" attribute as ``xlabel``.
    ylabel : str of ``None``
        Y axis label. If ``None`` and ``ydata`` is a pandas Series, use
        ``ydata``'s "name" attribute as ``ylabel``.
    logx : bool
        Whether or not to show the X axis in log scale.
    logy : bool
        Whether or not to show the Y axis in log scale.
    grid_on : bool
        Whether or not to show grids on the plot.
    error_bounds : bool
        Whether or not to show error bounds of each bin.
    err_bound_type : {'shade', 'bar'}
        Type of error bound: shaded area or error bars. It has no effects if
        error_bounds is set to ``False``.
    legend_on : bool
        Whether or not to show a legend.
    subsamp_thres : int
        A positive integer that defines the number of data points in each bin
        to show in the scatter plot. The smaller this number, the faster the
        plotting process. If larger than the number of data points in a bin,
        then all data points from that bin are plotted. If ``None``, then all
        data points from all bins are plotted.
    show_stats : bool
        Whether or not to show R^2 scores, correlation coefficients of the raw
        data and the binned averages on the plot.
    show_SE : bool
        If ``True``, show the standard error of y_mean (orange dots) of each
        bin as the shaded area beneath the mean value lines. If ``False``, show
        the standard deviation of raw Y values (gray dots) within each bin.
    err_bound_shade_opacity : float
        The opacity of the shaded area representing the error bound. 0 means
        completely transparent, and 1 means completely opaque. It has no effect
        if ``error_bound_type`` is ``'bar'``.

    Returns
    -------
    fig : matplotlib.figure.Figure
        The figure object being created or being passed into this function.
        ``None``, if ``show_fig`` is set to ``False``.
    ax : matplotlib.axes._subplots.AxesSubplot
        The axes object being created or being passed into this function.
        ``None``, if ``show_fig`` is set to ``False``.
    x_mean : numpy.ndarray
        Mean X values of each data bin (in terms of X values).
    y_mean : numpy.ndarray
        Mean Y values of each data bin (in terms of X values).
    y_std : numpy.ndarray
        Standard deviation of Y values or each data bin (in terms of X values).
    y_SE : numpy.ndarray
        Standard error of ``y_mean``. It describes how far ``y_mean`` is from
        the population mean (or the "true mean value") within each bin, which
        is a different concept from ``y_std``.
        See https://en.wikipedia.org/wiki/Standard_error#Standard_error_of_mean_versus_standard_deviation
        for further information.
    stats_ : tuple<float>
        A tuple in the order of (r2_score_raw, corr_coeff_raw, r2_score_binned,
        corr_coeff_binned), which are the R^2 score and correlation coefficient
        of the raw data (``xdata`` and ``ydata``) and the binned averages
        (``x_mean`` and ``y_mean``).
    '''
    if not isinstance(xdata, hlp._array_like) or not isinstance(
            ydata, hlp._array_like):
        raise TypeError('`xdata` and `ydata` must be lists, numpy arrays, '
                        'or pandas Series.')

    if len(xdata) != len(ydata):
        raise hlp.LengthError('`xdata` and `ydata` must have the same length.')

    if isinstance(xdata, list): xdata = np.array(xdata)  # otherwise boolean
    if isinstance(ydata, list): ydata = np.array(ydata)  # indexing won't work

    #------------Pre-process "bins"--------------------------------------------
    if isinstance(bins, (int, np.integer)):  # if user specifies number of bins
        if bins <= 0:
            raise ValueError('`bins` must be a positive integer.')
        else:
            nr = bins + 1  # create bins with percentiles in xdata
            x_uni = np.unique(xdata)
            bins = [
                np.nanpercentile(x_uni, (j + 0.) / bins * 100)
                for j in range(nr)
            ]
            if not all(x <= y for x, y in zip(bins, bins[1:])
                       ):  # https://stackoverflow.com/a/4983359/8892243
                print(
                    '\nWARNING: Resulting "bins" array is not monotonically '
                    'increasing. Please use a smaller "bins" to avoid potential '
                    'issues.\n')
    elif isinstance(bins, (list, np.ndarray)):  # if user specifies array
        nr = len(bins)
    else:
        raise TypeError('`bins` must be either an integer or an array.')

    #-----------Pre-process xlabel and ylabel----------------------------------
    if not xlabel and isinstance(xdata, pd.Series):  # xdata has 'name' attr
        xlabel = xdata.name
    if not ylabel and isinstance(ydata, pd.Series):  # ydata has 'name' attr
        ylabel = ydata.name

    #-----------Group data into bins-------------------------------------------
    inds = np.digitize(xdata, bins)
    x_mean = np.zeros(nr - 1)
    y_mean = np.zeros(nr - 1)
    y_std = np.zeros(nr - 1)
    y_SE = np.zeros(nr - 1)
    x_subs = []  # subsampled x data (for faster scatter plots)
    y_subs = []
    for j in range(nr - 1):  # loop over every bin
        x_in_bin = xdata[inds == j + 1]
        y_in_bin = ydata[inds == j + 1]

        #------------Calculate mean and std------------------------------------
        if len(x_in_bin) == 0:  # no point falls into current bin
            x_mean[j] = np.nan  # this is to prevent numpy from throwing...
            y_mean[j] = np.nan  #...confusing warning messages
            y_std[j] = np.nan
            y_SE[j] = np.nan
        else:
            x_mean[j] = np.nanmean(x_in_bin)
            if distribution == 'normal':
                y_mean[j] = np.nanmean(y_in_bin)
                y_std[j] = np.nanstd(y_in_bin)
                y_SE[j] = stats.sem(y_in_bin)
            elif distribution == 'lognormal':
                s, loc, scale = stats.lognorm.fit(y_in_bin, floc=0)
                estimated_mu = np.log(scale)
                estimated_sigma = s
                y_mean[j] = np.exp(estimated_mu + estimated_sigma**2.0 / 2.0)
                y_std[j]  = np.sqrt(np.exp(2.*estimated_mu + estimated_sigma**2.) \
                             * (np.exp(estimated_sigma**2.) - 1) )
                y_SE[j] = y_std[j] / np.sqrt(len(y_in_bin))
            else:
                raise ValueError("Valid values of `distribution` are "
                                 "{'normal', 'lognormal'}. Not '%s'." %
                                 distribution)

        #------------Pick subsets of data, for faster plotting-----------------
        #------------Note that this does not affect mean and std---------------
        if subsamp_thres is not None and show_fig:
            if not isinstance(subsamp_thres,
                              (int, np.integer)) or subsamp_thres <= 0:
                raise TypeError(
                    '`subsamp_thres` must be a positive integer or None.')
            if len(x_in_bin) > subsamp_thres:
                x_subs.extend(
                    np.random.choice(x_in_bin, subsamp_thres, replace=False))
                y_subs.extend(
                    np.random.choice(y_in_bin, subsamp_thres, replace=False))
            else:
                x_subs.extend(x_in_bin)
                y_subs.extend(y_in_bin)

    #-------------Calculate R^2 and corr. coeff.-------------------------------
    non_nan_indices = ~np.isnan(xdata) & ~np.isnan(ydata)
    xdata_without_nan = xdata[non_nan_indices]
    ydata_without_nan = ydata[non_nan_indices]

    r2_score_raw = hlp._calc_r2_score(
        ydata_without_nan, xdata_without_nan)  # treat "xdata" as "y_pred"
    corr_coeff_raw = np.corrcoef(xdata_without_nan, ydata_without_nan)[0, 1]
    r2_score_binned = hlp._calc_r2_score(y_mean, x_mean)
    corr_coeff_binned = np.corrcoef(x_mean, y_mean)[0, 1]
    stats_ = (r2_score_raw, corr_coeff_raw, r2_score_binned, corr_coeff_binned)

    #-------------Plot data on figure------------------------------------------
    if show_fig:
        fig, ax = hlp._process_fig_ax_objects(fig, ax, figsize, dpi)

        if subsamp_thres: xdata, ydata = x_subs, y_subs
        ax.scatter(xdata,
                   ydata,
                   c='gray',
                   alpha=0.3,
                   label=raw_data_label,
                   zorder=1)
        if error_bounds:
            if err_bound_type == 'shade':
                ax.plot(x_mean,
                        y_mean,
                        '-o',
                        c='orange',
                        lw=2,
                        label=mean_data_label,
                        zorder=3)
                if show_SE:
                    ax.fill_between(x_mean,
                                    y_mean + y_SE,
                                    y_mean - y_SE,
                                    label='$\pm$ S.E.',
                                    facecolor='orange',
                                    alpha=err_bound_shade_opacity,
                                    zorder=2.5)
                else:
                    ax.fill_between(x_mean,
                                    y_mean + y_std,
                                    y_mean - y_std,
                                    label='$\pm$ std',
                                    facecolor='orange',
                                    alpha=err_bound_shade_opacity,
                                    zorder=2.5)
                # END IF-ELSE
            elif err_bound_type == 'bar':
                if show_SE:
                    mean_data_label += '$\pm$ S.E.'
                    ax.errorbar(x_mean,
                                y_mean,
                                yerr=y_SE,
                                ls='-',
                                marker='o',
                                c='orange',
                                lw=2,
                                elinewidth=1,
                                capsize=2,
                                label=mean_data_label,
                                zorder=3)
                else:
                    mean_data_label += '$\pm$ std'
                    ax.errorbar(x_mean,
                                y_mean,
                                yerr=y_std,
                                ls='-',
                                marker='o',
                                c='orange',
                                lw=2,
                                elinewidth=1,
                                capsize=2,
                                label=mean_data_label,
                                zorder=3)
                # END IF-ELSE
            else:
                raise ValueError('Valid "err_bound_type" name are {"bound", '
                                 '"bar"}, not "%s".' % err_bound_type)
        else:
            ax.plot(x_mean,
                    y_mean,
                    '-o',
                    c='orange',
                    lw=2,
                    label=mean_data_label,
                    zorder=3)

        ax.set_axisbelow(True)
        if xlabel: ax.set_xlabel(xlabel)
        if ylabel: ax.set_ylabel(ylabel)
        if logx:
            ax.set_xscale('log')
        if logy:
            ax.set_yscale('log')
        if grid_on:
            ax.grid(ls=':')
            ax.set_axisbelow(True)
        if show_bins:
            ylims = ax.get_ylim()
            for k, edge in enumerate(bins):
                lab_ = 'bin edges' if k == 0 else None  # only label 1st edge
                ec = cl.get_colors(N=1)[0]
                ax.plot([edge] * 2,
                        ylims,
                        '--',
                        c=ec,
                        lw=1.0,
                        zorder=2,
                        label=lab_)
        if legend_on:
            ax.legend(loc='best')
        if show_stats:
            stats_text = "$R^2_{\mathrm{raw}}$=%.2f, $r_{\mathrm{raw}}$=%.2f, " \
                         "$R^2_{\mathrm{avg}}$=%.2f, " \
                         "$r_{\mathrm{avg}}$=%.2f" % stats_
            ax.set_title(stats_text)

        return fig, ax, x_mean, y_mean, y_std, y_SE, stats_
    else:
        return None, None, x_mean, y_mean, y_std, y_SE, stats_
Example #32
0
def SNRwavelets(epochs_condition, low, high, step, timewindow, snr_format,
                numrois, frqwindow, snr_format_name):
    #########################################################################################################################
    # Based on SNR estimation in evoked responses described in Gonzale-Morino et al, (2014)
    # SNRwavelets performs single trial and evoked response wavelet transformation on the specified, epoched data,
    # and uses this information to provide an estimate of the SNR in the frequency range of interest, as well as
    # more broadly across all bands for induced and evoked power.

    # Inputs:
    # epochs_condition = epoched data (object)
    # low = lowest frequency to estimate
    # high = highest frequency to estimate
    # step = interval between frequencies
    # timewindow = samples of interest for evoked response
    # snr_format = dictionary of roi channel information (see SNRcreate_test_data)
    # numrois = number of roi channels
    # frqwindow = frequencies defining the evoked response of interest (for ASSR this would be between 38 and 42 for example)
    # snr_format_name = string with names to find in the snr_format dict
    # Returns:
    # dictionary of SNR values
    # roi_snr_ASSR - snr for each channel of the evoked response
    # roi_snr_EVOKEDbands - snr for each channel of the evoked bands
    # roi_snr_INDUCEDbands - snr for each channel of the induced bands

    ####### DEFINE ENVIRONMENT
    print('Importing additional modules')
    import scipy
    from scipy import stats
    import numpy as np
    import copy
    import mne
    from mne.time_frequency import tfr_multitaper, tfr_stockwell, tfr_morlet
    # Organise input and perform wavelet transform for single trials and average response data
    print('Beginning wavelet transforms')
    # frequency information for wavelets
    freqs = np.arange(low, high, step)
    n_cycles = freqs / 4.
    # plot data - whole head - single trials
    power = mne.time_frequency.tfr_morlet(epochs_condition,
                                          freqs=freqs,
                                          n_cycles=n_cycles,
                                          use_fft=False,
                                          return_itc=False,
                                          decim=3,
                                          n_jobs=1,
                                          average=False)
    power.apply_baseline(
        mode='ratio', baseline=(-.5, 0)
    )  # apply baseline correction using ratio method (power is divided by the mean baseline power)
    # plot data - whole head - average
    powerAV = mne.time_frequency.tfr_morlet(epochs_condition,
                                            freqs=freqs,
                                            n_cycles=n_cycles,
                                            use_fft=False,
                                            return_itc=False,
                                            decim=3,
                                            n_jobs=1,
                                            average=True)
    powerAV.apply_baseline(
        mode='ratio', baseline=(-.5, 0)
    )  # apply baseline correction using ratio method (power is divided by the mean baseline power)
    # organise rois
    print('Extracting information from region of interest sites')
    rois = np.zeros(numrois, dtype=np.int)
    for x in range(
            0,
            np.shape(rois)[0]
    ):  # what this loop is doing is to go through and get the name of the items to select from the snr_format dict.
        # this information is then used to find which items to use for the rois
        text1 = snr_format_name
        text2 = str(x + 1)
        text3 = text1 + text2
        rois[x] = int(snr_format[text3])
    eppower = copy.deepcopy(
        power.data[:, rois, :, ])  # trials,channels,freqs,time
    eppowerAV = copy.deepcopy(powerAV.data[rois, :, ])  # channels,freqs,time
    del power
    del powerAV
    # we've now got the roi wavelet data. the next steps are to apply the appropriate baseline
    # and then to estimate the total power from the average of 39:41hz in the evoked and single trials
    # following this we can estimate the snr for each channel, and globally over our roi
    ##########################################################################################################################
    # EVOKED RESPONSE SNR
    # create evoked power average total (39:41hz)
    #    windAV = eppowerAV[...,[18:20],[starta:enda]]
    print('Estimating SNR for evoked response')
    chAVpower = np.zeros(np.shape(rois)[0])
    for x in range(0, np.shape(rois)[0]):
        temp = eppowerAV[x, frqwindow, :]
        f = np.zeros(len(frqwindow))
        for y in range(0, len(frqwindow)):
            f[y] = sum(temp[y, timewindow[0]:(timewindow[-1] + 1)])
        f = np.mean(f, 0)
        del temp
        chAVpower[x] = f
    # create single trial power average total (39:41hz)
    chSTpower = np.zeros((np.shape(rois)[0], np.shape(eppower)[0]))
    for x in range(0, np.shape(eppower)[0]):
        temp1 = eppower[x, :, :, :]
        for y in range(0, np.shape(rois)[0]):
            temp2 = temp1[y, frqwindow, :]
            f = np.zeros(len(frqwindow))
            for yy in range(0, len(frqwindow)):
                f[yy] = sum(temp2[yy, timewindow[0]:(timewindow[-1] + 1)])
            f = np.mean(f, 0)
            chSTpower[y, x] = f
            del temp2
        del temp1
    # this is what we've all been waiting for, get channel snr
    chSNR_ASSR = np.zeros(np.shape(rois)[0])
    for x in range(0, np.shape(rois)[0]):
        temp1 = chAVpower[x]
        temp2 = chSTpower[x, :]
        snr = temp1 / stats.sem(temp2)
        chSNR_ASSR[x] = snr
        del temp1
        del temp2
        del snr
# what this section is doing:
# the first loop goes through each of the roi channels and sums all of the power values in a given frq range over the specified time bin.
# this is then averaged across the rois.
# the second set of loops goes through the single trials and performs the same procedure.
# the third loop estimates the SNR for each roi channel.
#########################################################################################################
# Individual band SNR
# this section will take all of the individual frequency bands and estimate the SNR
# for evoked and induced power. Induced power is retained in the avergae response
# by squaring individual power values (see Gonzale-Morino et al, 2014).
# In this version the window can be set to include any given time window but should
# be focussed on the task response period to allow for analysis of time and phase
# locked properties of the stimulus.

# Frequency bands definition
    delta = 0
    theta = np.arange(1, 3, 1)
    alpha = np.arange(3, 7, 1)
    beta = np.arange(7, 20, 1)
    gamma = np.arange(20, 29, 1)
    # Evoked power
    # pt1 - average response
    print('Estimating SNR for evoked response per band')
    ch = 0
    tempout = np.zeros((len(freqs), numrois))
    while ch < numrois:
        data = eppowerAV[ch, :, :]
        for x in range(0, len(freqs)):
            temp = data[x, timewindow[0]:(timewindow[-1] + 1)]
            temp = np.sum(temp)
            tempout[x, ch] = temp
            del temp
        ch = ch + 1
        del data
    evoked_bands_pt1 = np.zeros((5, numrois))
    del ch
    for x in range(0, numrois):
        evoked_bands_pt1[0, x] = np.sum(tempout[delta, x])
        evoked_bands_pt1[1, x] = np.sum(tempout[theta, x])
        evoked_bands_pt1[2, x] = np.sum(tempout[alpha, x])
        evoked_bands_pt1[3, x] = np.sum(tempout[beta, x])
        evoked_bands_pt1[4, x] = np.sum(tempout[gamma, x])
# pt2 - single trials
    del tempout
    tempout = np.zeros((len(eppower), len(freqs), numrois))
    ch = 0
    tr = 0
    while tr < len(eppower):
        data = eppower[tr, :, :, :]
        for y in range(0, numrois):
            temp = data[y, :, :]
            for x in range(0, len(freqs)):
                tempout[tr, x, y] = np.sum(temp[x, 167:500])
            del temp
        del data
        tr = tr + 1
    evoked_bands_pt2 = np.zeros((len(eppower), 5, numrois))
    for x in range(0, len(eppower)):
        data = tempout[x, :, :]
        for y in range(0, numrois):
            evoked_bands_pt2[x, 0, y] = np.sum(data[delta, y])
            evoked_bands_pt2[x, 1, y] = np.sum(data[theta, y])
            evoked_bands_pt2[x, 2, y] = np.sum(data[alpha, y])
            evoked_bands_pt2[x, 3, y] = np.sum(data[beta, y])
            evoked_bands_pt2[x, 4, y] = np.sum(data[gamma, y])
    # this is what we've all been waiting for, get channel snr
    del tempout
    chSNR_bands_evoked = np.zeros((5, numrois))
    for x in range(0, numrois):
        temp1 = evoked_bands_pt1[:, x]
        temp2 = evoked_bands_pt2[:, :, x]
        chSNR_bands_evoked[0, x] = temp1[0] / stats.sem(temp2[:, 0])
        chSNR_bands_evoked[1, x] = temp1[1] / stats.sem(temp2[:, 1])
        chSNR_bands_evoked[2, x] = temp1[2] / stats.sem(temp2[:, 2])
        chSNR_bands_evoked[3, x] = temp1[3] / stats.sem(temp2[:, 3])
        chSNR_bands_evoked[4, x] = temp1[4] / stats.sem(temp2[:, 4])
    del temp1
    del temp2
    del ch
    del tr
    del x
    del y
    # Induced power
    # pt1 - average response
    # create average response by first squaring the individual values
    ch = 0
    print('Estimating SNR for induced response per band')
    tempout = np.zeros((len(freqs), numrois))
    while ch < numrois:
        data = np.mean(np.square(eppower[:, ch, :, :]), 0)
        for x in range(0, len(freqs)):
            temp = data[x, timewindow[0]:(timewindow[-1] + 1)]
            temp = np.sum(temp)
            tempout[x, ch] = temp
            del temp
        ch = ch + 1
        del data
    induced_bands_pt1 = np.zeros((5, numrois))
    del ch
    for x in range(0, numrois):
        induced_bands_pt1[0, x] = np.sum(tempout[delta, x])
        induced_bands_pt1[1, x] = np.sum(tempout[theta, x])
        induced_bands_pt1[2, x] = np.sum(tempout[alpha, x])
        induced_bands_pt1[3, x] = np.sum(tempout[beta, x])
        induced_bands_pt1[4, x] = np.sum(tempout[gamma, x])
# pt2 - single trials
# this is what we've all been waiting for, get channel snr
    chSNR_bands_induced = np.zeros((5, numrois))
    for x in range(0, numrois):
        temp1 = induced_bands_pt1[:, x]
        temp2 = evoked_bands_pt2[:, :, x]
        chSNR_bands_induced[0, x] = temp1[0] / stats.sem(temp2[:, 0])
        chSNR_bands_induced[1, x] = temp1[1] / stats.sem(temp2[:, 1])
        chSNR_bands_induced[2, x] = temp1[2] / stats.sem(temp2[:, 2])
        chSNR_bands_induced[3, x] = temp1[3] / stats.sem(temp2[:, 3])
        chSNR_bands_induced[4, x] = temp1[4] / stats.sem(temp2[:, 4])
    del temp1
    del temp2
    del x

    ########################################################################################
    #### Output
    print('Complete. Returning output')
    return {
        'roi_snr_ASSR': chSNR_ASSR,
        'roi_snr_EVOKEDbands': chSNR_bands_evoked,
        'roi_snr_INDUCEDbands': chSNR_bands_induced
    }  # create dictionary of outputs
Example #33
0
# extra_features = ['room_per_hh', 'pop_per_hh', 'bedroom_per_room']
# cat_encoder = full_pipeline.named_transformers_['cat']
# cat_1hot = list(cat_encoder.categories_[0])
# features = num_features + extra_features + cat_1hot
# pprint(sorted(zip(feature_importance, features), reverse=True))

# pick the best model
final_model = rand_search.best_estimator_

# evaluate on test set
X_test = strat_test_set.drop('median_house_value', axis=1)
y_test = strat_test_set['median_house_value'].copy()

X_test_prepared = full_pipeline.transform(X_test)
final_prediction = final_model.predict(X_test_prepared)

# final evaluation score
final_mse = mean_squared_error(y_test, final_prediction)
final_rmse = np.sqrt(final_mse)
print(f'\nEvaluation score on test set: {round(final_rmse, 1)}')

# confidence interval
conf = 0.95
sqd_er = (final_prediction - y_test)**2
conf_int = np.sqrt(
    stats.t.interval(conf,
                     len(sqd_er) - 1,
                     loc=sqd_er.mean(),
                     scale=stats.sem(sqd_er)))
print(f'\n95% Confidence interval = {conf_int}')
Example #34
0
def run_analysis(top_k, search_file_name, add_all, embed_type, model_dir):
    top_k = top_k

    with open(search_file_name, 'r') as f:
        data = json.load(f)
        queries = []
        embeddings = []
        all_matches = []
        qids = {}
        query_2_matches = {}
        data_as_array = []
        for obj in data:
            query = obj['query']
            if query not in query_2_matches:
                query_2_matches[query] = []
            query_2_matches[query].append(obj)

        for query, obj in query_2_matches.items():

            queries.append(query)
            all_docs = []
            for match in obj:
                qid = match['q_id']
                # posts get repeated across queries sometimes - to avoid neural
                # embeddings reproducing the same result multiple times - ignore dups
                # across queries.
                if qid in qids:
                    continue
                qids[match['q_id']] = 1
                if add_all:
                    content = embed_sentences(
                        match['q_title'] + ' ' + match['q_text'] + ' ' +
                        match['a_text'], embed_type, model_dir)
                else:
                    content = embed_sentences(match['q_title'], embed_type,
                                              model_dir)
                # not performing this step seems to cause catastrophic
                # issues in the np.asarray(embeddings) step further down
                # suspect something is suboptimal about converting whatever
                # tensorflow returns into np arrays
                if embed_type == 'USE':
                    content = np.asarray(content)
                embeddings.append(content)
                all_matches.append((query, match))
            data_as_array.append((query, obj))
        print('ALL LOADED')
        queries_embedding = np.asarray(
            embed_sentences(queries, embed_type, model_dir))
        print('queries embedded')
        print(len(embeddings))
        embeddings = np.asarray(embeddings).squeeze(1)
        print('numpy array created for main embeddings')

        num_queries = len(queries_embedding)
        faiss.normalize_L2(embeddings)
        faiss.normalize_L2(queries_embedding)
        index = faiss.IndexFlatIP(len(embeddings[0]))
        print(embeddings.shape)
        index.add(embeddings)
        query_distances, query_neighbors = index.search(
            queries_embedding, top_k)

        num_matches_to_text = 0
        all_ndcg = []
        ranks_avgs = []
        overlap_avgs = []
        recipRanks = []
        for index, q in enumerate(query_neighbors):
            ranks = []
            print(data_as_array[index][0])
            search_matches = []
            print('Actual matches (top-100):')
            for idx, m in enumerate(data_as_array[index][1]):
                try:
                    if idx < 100:
                        print(f"{idx} -- {m['q_id']}:{m['q_title']}")
                except:
                    pass
                search_matches.append(m['q_id'])
            print(q)
            print('Returned matches:')
            y_true = []
            y_pred = []
            num_overlap = 0
            for idx, k in enumerate(q):
                # print(all_matches[k][1]['q_title'])
                # print(all_matches[k][1]['q_id'])
                if all_matches[k][1]['q_id'] in search_matches:
                    try:
                        print(
                            f"{idx} -- {all_matches[k][1]['q_id']}:{all_matches[k][1]['q_title']}"
                        )
                    except:
                        pass
                    num_overlap += 1
                    num_matches_to_text += 1
                    rank = search_matches.index(all_matches[k][1]['q_id'])
                    '''
                       # Corpus# 
                       True relevance score - scale from 0-10
                           true_relevance = {'d1': 10, 'd2': 9, 'd3':7, 'd4':6, 'd5':4}# Predicted relevence score
                           predicted_relevance = {'d1': 8, 'd2': 9, 'd3':6, 'd4':6, 'd5':5}# relevance list processed as array
                           true_rel = np.asarray([list(true_relevance.values())])
                           predicted_rel = np.asarray([list(predicted_relevance.values())])
                           >> ndcg_score(true_rel, predicted_rel)
                           >> 0.9826797611735933
                    '''
                    y_true.append(rank +
                                  1)  #true scores of entities to be ranked.
                    y_pred.append(idx + 1)
                    reciprocal = 1 / (rank + 1)
                    recipRanks.append(reciprocal)
                    ranks.append(rank + 1)
            q_mrr = np.mean(np.asarray(ranks))
            ranks_avgs.append(q_mrr)
            q_overlap = num_overlap / top_k
            overlap_avgs.append(q_overlap)
            if q_overlap < 0.1:
                print('Very low overlap: ', q_overlap)
            if len(y_true) > 0 and len(y_pred) > 0:
                print('y_true: ', y_true, ', y_pred: ', y_pred)
                q_ndcg = ndcg_score(np.asarray([y_true]), np.asarray([y_pred]))
                print(f'Question MRR: {q_mrr}, NDCG: {q_ndcg}')
                all_ndcg.append(q_ndcg)

        print('num of matches to text:' + str(num_matches_to_text))
        print('num queries:' + str(num_queries))
        print('average overlap with search:' + str(num_matches_to_text /
                                                   (num_queries * top_k)))
        print('mean search rank:', np.mean(np.asarray(ranks_avgs)))
        meanRecipRank = sum(recipRanks) / len(recipRanks)
        print('MRR: standard error of the mean ', stat.sem(recipRanks))
        print("Mean reciprocal rank is:", meanRecipRank)
        print("Average NDCG:", (sum(all_ndcg) / len(all_ndcg)))
Example #35
0
def pumpProbe(
    data,
    background_data=None,
    norm=None,
    data_select=SelectorPP(spectra=0),
    background_select=SelectorPP(spectra=0),
    norm_select=SelectorPP(spectra=0),
    intensityE_select=SelectorPP(spectra=0),
    wavenumber=None,
    pp_delay=None,
    pixel=None,
):
    """Make pump-probe spectrum object taking the median over the scan axis.

    A pump-probe spectrum combines multiple vicotr `.dat` files into one
    `pysft.spectrum.PumpProbe` object.

    One must select a single `spectra` index. Thus the `SelectorPP(spectra=0)`
    in the default configuration. The selection of the data for `data`
    (data_select), the background (background_select) and the normalization
    (norm_select) are independent, but assignment will fail if background and
    norm can't be casted into the same shape as data.


    Arguments:
    data: A victor data dict as returned by `pysfg.vicotr.read.data_file`.
    background_data: Can be a constant number, or a numpy array with the same
      length as the pixel axis of `data`, or a `pysfg.read.victor.data_file`
      dictionary.
    norm: Can be a constant number, or a 1D array with the same length as pixel
      axis of `data` above, or a 2D array with the exact same shape as `data` above
    data_selectrion: `pysfg.SelectorPP` object. This is used to subselect data from
      the data['data'] entry of the passed data dict. The default is to take spectrum
      index 0 and leave the rest untouched.
    background_select: Same as `data_select` but for the background. Shapes of data
      and background must match or else a `ValueError` occurs.
    norm_select: Same as `data_select` but for the norm. Shape of data and norm must
      match. Else ValueError occurs.
    wavenumber: Not fully implemented, but if None. The calibration is read of
      the above passed data dict.
    pp_delay: Not fully impelemented, but if None, pp_delays is read of the `data`
      dict.

    Example:
      see `pysfg/test/pump_probe.py` for example usage.

    Returns:
      A `pysfg.PumpProbe` object
    """

    if not isinstance(data, dict):
        raise NotImplementedError
        # Need to implement alternative default wavenumber
        # Need to implement alternative for pp_delay
    intensity = np.median(
        data['data'][data_select.select],
        axis=(1)  # Median scans
    )

    intensityE = sem(data['data'][intensityE_select.select], axis=(1))

    # Handle various background data inputs
    if isinstance(background_data, dict):
        baseline = np.median(background_data['data'][background_select.select],
                             axis=(1))
    else:
        baseline = background_data

    # Assume norm is correct shape else it will fail
    # during assingment TODO: Add shape checking
    if isinstance(norm, PumpProbe):
        norm = norm.basesubed

    if isinstance(wavenumber, type(None)):
        wavenumber = from_victor_header(data).wavenumber[data_select.pixel]
    if len(wavenumber) != np.shape(intensity)[-1]:
        raise ValueError(
            "Shape of wavenumber doesn't match shape of intensity")

    if not isinstance(pp_delay, type(None)):
        raise NotImplementedError
    pp_delay = data['timedelay']

    return PumpProbe(
        intensity=intensity,
        baseline=baseline,
        norm=norm,
        wavenumber=wavenumber,
        pp_delay=pp_delay,
        intensityE=intensityE,
        pixel=pixel,
    )
Example #36
0
    plt.ylabel('Frequency')
    ax = plt.gca()
    ax.yaxis.labelpad = -2 
    plt.xlim((-3.5,3.5))
    plt.xticks([-3,-2,-1,0,1,2,3])
    plt.savefig("Orient.jpg",dpi=400)
    plt.savefig("Orient.pdf",dpi=400, format ='pdf')
    plt.clf()

    print 'Here is the mean relative orientation: ' + str(np.mean(ors))
    print 'Here is the std relative orientation: ' + str(np.std(ors))

    allvals = ors


    conf_int = stats.t.interval(0.99, len(allvals)-1, loc=np.mean(allvals), scale=stats.sem(allvals) )

    print  'Here is the confidence interval orientations' + str(conf_int)



    allvals = allvals - np.mean(allvals)

    x = stats.shapiro(allvals)
    print(x)

    fig = plt.figure(1)
    ax = fig.add_subplot(111)
    stats.probplot(allvals,plot=pylab)
    #stats.probplot(allvals,dist="t", sparams=(len(ors),), plot=pylab)
    pylab.xlabel('Quantiles')
Example #37
0
parmfile = '/auto/data/daq/Tabor/TBR023/TBR023a14_p_OLP.m'
parmfile = '/auto/data/daq/Tabor/TBR025/TBR025a13_p_OLP.m'
parmfile = '/auto/data/daq/Tabor/TBR026/TBR026a16_p_OLP.m'
parmfile = '/auto/data/daq/Tabor/TBR027/TBR027a14_p_OLP.m'
parmfile = '/auto/data/daq/Tabor/TBR028/TBR028a08_p_OLP.m'
parmfile = '/auto/data/daq/Tabor/TBR030/TBR030a13_p_OLP.m'
parmfile = '/auto/data/daq/Tabor/TBR031/TBR031a13_p_OLP.m'
parmfile = '/auto/data/daq/Tabor/TBR034/TBR034a14_p_OLP.m'
parmfile = '/auto/data/daq/Tabor/TBR035/TBR035a15_p_OLP.m'
parmfile = '/auto/data/daq/Tabor/TBR036/TBR036a14_p_OLP.m'

regression_stuff(parmfile, plot=True, dataframe='none')
Pred = regression_stuff(parmfile, plot=False, dataframe='none')

neur_pred = np.mean(Pred, axis=1)
neur_sem = stats.sem(Pred, axis=1)
stim_pred = np.mean(Pred, axis=0)
stim_sem = stats.sem(Pred, axis=0)


fig, ax = plt.subplots(1,2, figsize=(7,3), sharey=True)
ax[0].errorbar(range(Pred.shape[0]), neur_pred*-1, yerr=neur_sem, marker='.', ls='none', color='black')
ax[1].errorbar(range(Pred.shape[1]), stim_pred*-1, yerr=stim_sem, marker='.', ls='none', color='black')
ax[0].set_xlabel('Neuron', fontweight='bold', size=15)
ax[0].set_ylabel('Mean Weight', fontweight='bold', size=15)
ax[0].axhline(0, linestyle=':', color='black')
ax[1].set_xlabel('Stimulus Pair', fontweight='bold', size=15)
ax[1].axhline(0, linestyle=':', color='black')

fig.tight_layout()
Example #38
0
def aggregate_by_pos(meth_fi, aggfi, depth_thresh, mod_thresh, pos_list,
                     control, verbose_results, gff, ref, plot, plotdir,
                     plotsummary):
    pos_dict = {}
    if verbose_results:
        pos_dict_verbose = {}

    if pos_list:
        pos_set = make_pos_set(pos_list)

    values_dict = {}
    for line in open(meth_fi, 'r'):
        #try:
        #print line
        try:
            csome, read, pos, context, values, strand, label, prob = tuple(
                line.split('\t'))
        except:  #for backwards compatibility; does not work with verbose results
            csome, read, pos, context, values, strand, label = tuple(
                line.split('\t'))
        nextpos = str(int(pos) + 1)
        if (pos_list and (csome, pos, nextpos, strand)
                not in pos_set) or (context[int(len(context) / 2)] != 'M'):
            continue
        if (csome, pos, nextpos, context, strand) not in pos_dict:
            pos_dict[(csome, pos, nextpos, context, strand)] = []
            values_dict[(csome, pos, nextpos, context, strand)] = []
            if verbose_results:
                pos_dict_verbose[(csome, pos, nextpos, context, strand)] = []
        if (pos_list and
            (csome, pos, nextpos, strand) in pos_set) or (not pos_list
                                                          and plot):
            values_dict[(csome, pos, nextpos, context,
                         strand)].append([float(v)
                                          for v in values.split(',')][:-1])
        if label[0] == 'm':
            pos_dict[(csome, pos, nextpos, context, strand)].append(1)
        else:
            pos_dict[(csome, pos, nextpos, context, strand)].append(0)
        if verbose_results:
            pos_dict_verbose[(csome, pos, nextpos, context,
                              strand)].append(prob.strip())
    #except:
    #    pass
    print(values_dict)
    if plotsummary:
        print('plotting all current deviations...')
        num2lab = {0: 'A', 1: 'm6A'}
        curlab = [(val, num2lab[lab], lab) for pos_tup in values_dict
                  for val, lab in zip(values_dict[pos_tup], pos_dict[pos_tup])]
        currents, labels, klabels = zip(*curlab)
        colours = {'m6A': '#B4656F', 'A': '#55B196'}
        plot_w_labels(klabels,
                      labels,
                      currents,
                      'classifierProb',
                      'allpos',
                      'allpos',
                      plotdir,
                      colours,
                      alpha=0.3)
        print('finished plotting.')

    if plot:
        for locus in values_dict:
            cluster(values_dict[locus], locus[3],
                    ['m6A' if x == 1 else 'A' for x in pos_dict[locus]],
                    locus[0], locus[1], plot, plotdir)

    if pos_list:
        for locus in values_dict:
            values_df = pd.DataFrame(values_dict[locus])
            tvals = []
            pvals = []
            for i in values_df.columns:  #[:-1]:
                #for j in values_df.columns[i+1:]:
                ttest = stats.ttest_1samp(values_df[i], 0)
                #ttest = stats.ttest_rel(values_df[i],values_df[i+1])
                #tvals.append(ttest[0])
                pvals.append((ttest[1], ttest[0]))
            pval = (sum([-np.log10(x[0]) for x in pvals]),
                    max([x[1] for x in pvals]))  #min(pvals)
            values_dict[locus] = [np.round(x, 3) for x in [pval[1], pval[0]]]

    if ref:
        context_dict = ref2context(ref, pos_dict)

    count = 0
    outfi = open(aggfi, 'w')
    for locus in pos_dict.keys():
        a = (not pos_list) and check_thresh(pos_dict[locus], mod_thresh,
                                            depth_thresh, control)
        b = pos_list and (locus[0], locus[1], locus[2],
                          locus[4]) in pos_set  #and #'A' not in set(locus[4])
        if ref:
            cx = context_dict[locus]
        else:
            cx = locus[3]
        if a or b:
            count += 1
            frac = np.mean(pos_dict[locus])
            if gff:
                deets = 'coverage=' + str(
                    len(pos_dict[locus]
                        )) + ';context=' + cx + ';IPDRatio=5;frac=' + str(frac)
                if verbose_results:
                    probs = [float(x) for x in pos_dict_verbose[locus]]
                    se_95 = 2 * stats.sem(probs)
                    deets = deets + ';fracLow=' + str(
                        frac - se_95) + ';fracUp=' + str(
                            frac + se_95) + ';identificationQv=' + str(
                                int(100 * np.mean([
                                    float(x) for x in pos_dict_verbose[locus]
                                ])))
                gff_info = (locus[0], locus[2], locus[4], deets)
                write_gff(outfi, gff_info)
            else:
                print(aggfi)
                out_line = '\t'.join(
                    list(locus)[:-1] + [str(np.mean(pos_dict[locus]))] +
                    [locus[-1]] + [str(len(pos_dict[locus]))
                                   ])  #+[str(x) for x in values_dict[locus]])
                if pos_list:
                    out_line = out_line + '\t' + '\t'.join(
                        [str(x) for x in values_dict[locus]])
                if verbose_results:
                    out_line = out_line + '\t' + ','.join(
                        pos_dict_verbose[locus])
                outfi.write(out_line + '\n')
    if not pos_list:
        if not control:
            print(count, 'methylated loci found with min depth', depth_thresh,
                  'reads')
        else:
            print(count, 'unmethylated loci found with min depth',
                  depth_thresh, 'reads')
Example #39
0
def CI_model(y, confidence=0.95):
    std_err_y = st.sem(y1)
    n_y = len(y1)
    h_y = std_err_y * st.t.ppf((1 + confidence) / 2, n_y - 1)
    return h_y
Example #40
0
def compute_stats(list_dict, pkey, skey):
    sample = [sam[pkey][skey] for sam in list_dict]
    mean = round(np.mean(sample), 3)
    sem = round(stats.sem(sample), 3)
    return mean, sem
Example #41
0
# RMS noise level for all channels


def RMS_calculation(data):

    RMS = np.zeros(num_ivm_channels)

    for i in range(num_ivm_channels):
        RMS[i] = np.sqrt((1 / len(data[i])) * np.sum(data[i]**2))

    return RMS


noise_rms = RMS_calculation(temp_filtered_uV)
noise_rms_average = np.average(noise_rms)
noise_rms_stdv = stats.sem(noise_rms)

print('#------------------------------------------------------')
print('RMS:' + str(noise_rms))
print('RMS_average:' + str(noise_rms_average))
print('RMS_average_stdv:' + str(noise_rms_stdv))
print('#------------------------------------------------------')

filename_RMS = os.path.join(analysis_folder + '\\' + str(high_pass_freq) +
                            'noise_RMS' + '.npy')
np.save(filename_RMS, noise_rms)

#Protocol1 to calculate the stdv from noise MEDIAN

noise_median = np.median(np.abs(temp_filtered_uV) / 0.6745, axis=1)
noise_median_average = np.average(noise_median)
Example #42
0
                                                  curUserSimilarity)
    simRMSE = rmse(curUserSimPreiction, curTestUserItemMatrix)

    rmseList_cosine.append((simRMSE))

# In[95]:

print("Sim_cosine")
for simScore in rmseList_cosine:
    print("%.3lf" % (simScore))
print("the average is ", np.mean(rmseList_cosine))
print("The 95% CI for cosine is",
      (st.t.interval(0.95,
                     len(rmseList_cosine) - 1,
                     loc=np.mean(rmseList_cosine),
                     scale=st.sem(rmseList_cosine))))

# In[91]:

rmseList_eucd = []

for trainFileName, testFileName in datasetsFileNames:
    curTrainDF = pd.read_csv(os.path.join(MOVIELENS_DIR, trainFileName),
                             sep='\t',
                             names=fields)
    curTestDF = pd.read_csv(os.path.join(MOVIELENS_DIR, testFileName),
                            sep='\t',
                            names=fields)
    curTrainUserItemMatrix = buildUserItemMatrix(curTrainDF, numUsers,
                                                 numItems)
    curTestUserItemMatrix = buildUserItemMatrix(curTestDF, numUsers, numItems)
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

testdf = pd.read_csv('test_qrt_pcr_2.csv')
groups = testdf.Group.unique()
group1 = groups[0]
group2 = groups[1]

meancontrol = (testdf['dCT'].where(testdf['Group'] == group1))
meancontrol = [
    meancontrol_i for meancontrol_i in meancontrol
    if str(meancontrol_i) != 'nan'
]
meancontrol

controlsem = stats.sem(meancontrol)

meancontrol = sum(meancontrol) / len(meancontrol)
meancontrol

testdf['Power'] = 2**-(testdf['dCT'] - meancontrol)

i = len(groups)
experimental_rqs = []

for x in range(1, i):
    group = groups[x]
    experimental = (testdf['dCT'].where(testdf['Group'] == group))
    experimental = [
        experimental_i for experimental_i in experimental
        if str(experimental_i) != 'nan'
Example #44
0
low_ci_95 = {}
high_ci_95 = {}
low_ci_99 = {}
high_ci_99 = {}
df_stats = {}

avg = [] 
low_ci_95 = []
high_ci_95 = []
low_ci_99 = []
high_ci_99 = []

for step in df["Step"].unique():
    values = df[feature][df["Step"] == step]
    f_mean = values.mean()
    lci95, hci95 = sps.t.interval(0.95, len(values), loc=f_mean, scale=sps.sem(values))
    lci99, hci99 = sps.t.interval(0.99, len(values), loc=f_mean, scale=sps.sem(values))
    avg.append(f_mean)
    low_ci_95.append(lci95)
    high_ci_95.append(hci95)
    low_ci_99.append(lci99)
    high_ci_99.append(hci99)

df_stats = pd.DataFrame()
df_stats["Step"] = df["Step"].unique()
df_stats["mean"] = avg
df_stats["lci95"] = low_ci_95
df_stats["hci95"] = high_ci_95
df_stats["lci99"] = low_ci_99
df_stats["hci99"] = high_ci_99
Example #45
0
def calc_yield(kstd, lamb, ks, kt, temp, temp_dat, lifetime_exp_zero,
               lifetime_exp_res, lifetime_exp_high, J, j_exp):

    # Define variables, initial frame
    rad_fram_aniso_g1 = np.array([[0.0006, 0.0, 0.0], [0.0, 0.0001, 0.0],
                                  [0.0, 0.0, -0.0009]])
    rad_fram_aniso_g2 = np.array([[0.0010, 0.0, 0.0], [0.0, 0.0007, 0.0],
                                  [0.0, 0.0, -0.0020]])

    rad_fram_aniso_hyperfine_1 = np.zeros([19, 3, 3])
    rad_fram_aniso_hyperfine_1[0] = array_construct(0.018394, 0.00575,
                                                    -0.024144, 0.119167,
                                                    -0.090257, -0.105530)
    rad_fram_aniso_hyperfine_1[1] = array_construct(-0.030255, 0.134767,
                                                    -0.104512, 0.111178,
                                                    0.03952, 0.065691)
    rad_fram_aniso_hyperfine_1[2] = array_construct(0.041327, -0.039294,
                                                    0.002033, 0.017961,
                                                    0.78922, 0.025615)
    rad_fram_aniso_hyperfine_1[3] = array_construct(0.065617, -0.016154,
                                                    -0.049462, 0.036655,
                                                    0.014217, 0.004047)
    rad_fram_aniso_hyperfine_1[4] = array_construct(0.069089, -0.054902,
                                                    -0.014187, 0.013749,
                                                    -0.075976, -0.006477)
    rad_fram_aniso_hyperfine_1[5] = array_construct(0.098308, -0.041108,
                                                    -0.0572, -0.024641,
                                                    0.013959, 0.002803)
    rad_fram_aniso_hyperfine_1[6] = array_construct(0.017844, 0.006183,
                                                    -0.024028, -00.119099,
                                                    -0.090068, 0.105661)
    rad_fram_aniso_hyperfine_1[7] = array_construct(-0.030775, 0.135406,
                                                    -0.104631, -0.110876,
                                                    0.039322, -0.065607)
    rad_fram_aniso_hyperfine_1[8] = array_construct(0.041235, -0.039174,
                                                    -0.002061, -0.018150,
                                                    0.078901, -0.025838)
    rad_fram_aniso_hyperfine_1[9] = array_construct(0.065415, -0.015957,
                                                    -0.049358, -0.036874,
                                                    0.014222, -0.004080)
    rad_fram_aniso_hyperfine_1[10] = array_construct(0.069102, -0.054901,
                                                     -0.014201, -0.014035,
                                                     -0.075981, 0.006618)
    rad_fram_aniso_hyperfine_1[11] = array_construct(0.098464, -0.041245,
                                                     -0.0571219, 0.024346,
                                                     0.014054, -0.002814)
    rad_fram_aniso_hyperfine_1[12] = array_construct(0.036159, -0.00026,
                                                     -0.035899, 0.038259,
                                                     -0.007026, -0.004047)
    rad_fram_aniso_hyperfine_1[13] = array_construct(0.036159, -0.00026,
                                                     -0.035899, 0.038259,
                                                     -0.007026, -0.004047)
    rad_fram_aniso_hyperfine_1[14] = array_construct(0.036159, -0.00026,
                                                     -0.035899, 0.038259,
                                                     -0.007026, -0.004047)
    rad_fram_aniso_hyperfine_1[15] = array_construct(0.035983, -0.000104,
                                                     -0.035879, -0.038338,
                                                     -0.007021, 0.004066)
    rad_fram_aniso_hyperfine_1[16] = array_construct(0.035983, -0.000104,
                                                     -0.035879, -0.038338,
                                                     -0.007021, 0.004066)
    rad_fram_aniso_hyperfine_1[17] = array_construct(0.035983, -0.000104,
                                                     -0.035879, -0.038338,
                                                     -0.007021, 0.004066)
    rad_fram_aniso_hyperfine_1[18] = array_construct(-0.772676, -0.7811,
                                                     1.553776, 0.000000,
                                                     -0.061480, 0.000443)

    rad_fram_aniso_hyperfine_2 = np.zeros([6, 3, 3])
    rad_fram_aniso_hyperfine_2[0] = array_construct(0.011586, 0.032114,
                                                    -0.0437, -0.101834,
                                                    -0.000008, 0.000014)
    rad_fram_aniso_hyperfine_2[1] = array_construct(0.011586, 0.032114,
                                                    -0.0437, -0.101834,
                                                    0.000014, 0.000008)
    rad_fram_aniso_hyperfine_2[2] = array_construct(0.011586, 0.032114,
                                                    -0.0437, -0.101834,
                                                    0.000014, 0.000008)
    rad_fram_aniso_hyperfine_2[3] = array_construct(0.011586, 0.032114,
                                                    -0.0437, -0.101834,
                                                    -0.000008, 0.000014)
    rad_fram_aniso_hyperfine_2[4] = array_construct(0.0352, 0.034, -0.0692,
                                                    0.0, 0.0, 0.0)
    rad_fram_aniso_hyperfine_2[5] = array_construct(0.0352, 0.034, -0.0692,
                                                    0.0, 0.0, 0.0)

    # axis frames
    data_xyz = np.loadtxt('dmj-an-pe1p-ndi-opt.txt', delimiter=',')
    transform_mol = inertia_tensor(data_xyz)

    dmj_xyz = np.loadtxt('dmj_in_pe1p.txt', delimiter=',')
    transform_dmj = inertia_tensor(dmj_xyz)

    ndi_xyz = np.loadtxt('NDI_in_pe1p.txt', delimiter=',')
    transform_ndi = inertia_tensor(ndi_xyz)

    # Convert to molecular frame
    aniso_g1 = rad_tensor_mol_axis(transform_mol, transform_dmj,
                                   rad_fram_aniso_g1)
    aniso_g2 = rad_tensor_mol_axis(transform_mol, transform_ndi,
                                   rad_fram_aniso_g2)

    aniso_hyperfine_1 = rad_tensor_mol_axis(transform_mol, transform_dmj,
                                            rad_fram_aniso_hyperfine_1)
    aniso_hyperfine_2 = rad_tensor_mol_axis(transform_mol, transform_ndi,
                                            rad_fram_aniso_hyperfine_2)

    # for n=1
    radius = 24.044e-10

    cnst = (1.0e3 * 1.25663706e-6 * 1.054e-34 * 1.766086e11) / (4.0 * np.pi *
                                                                radius**3)
    aniso_dipolar = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0],
                              [0.0, 0.0, -2.0]]) * cnst

    # Isotropic components
    g1_iso = 2.0031
    g2_iso = 2.0040

    # ISO h1 for the anti conformation
    iso_h1 = np.array([[
        2.308839, 0.903770, -0.034042, -0.077575, 1.071863, 0.258828, 2.308288,
        0.0902293, -0.034202, 0.077648, 1.073569, 0.259878, -0.166563,
        -0.166563, -0.166563, -0.166487, -0.166487, -0.166487, 0.831260
    ]])

    iso_h2 = np.array([[-0.1927, -0.1927, -0.1927, -0.1927, -0.0963, -0.0963]])

    spin_numbers_1 = np.array([[
        0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
        0.5, 0.5, 0.5, 0.5, 1.0
    ]])
    spin_numbers_2 = np.array([[0.5, 0.5, 0.5, 0.5, 1.0, 1.0]])

    field = np.reshape(temp_dat[:, 0], (len(temp_dat[:, 0])))
    data_y = np.reshape(temp_dat[:, 1], (len(temp_dat[:, 1])))

    sampled_field = np.linspace(0.0, 100.0, 20)
    triplet_yield = np.zeros_like(sampled_field)
    standard_error = np.zeros_like(sampled_field)
    compound_error = np.zeros_like(sampled_field)

    num_samples = 300
    samples = np.arange(1.0, np.float(num_samples))
    trip = np.zeros_like(samples)

    #--------------------------------------------------------------------------------------------------------------------------------------
    #zero field lifetime

    lifetime_zero = 0.0
    zero = np.zeros_like(samples)
    # zero field lifetime
    for index, item in enumerate(samples):
        relaxation_0 = rotational_relaxation(aniso_dipolar, g1_iso, g2_iso,
                                             aniso_g1, aniso_g2, iso_h1,
                                             iso_h2, aniso_hyperfine_1,
                                             aniso_hyperfine_2, spin_numbers_1,
                                             spin_numbers_2, 0.0, J, ks, kt,
                                             lamb, temp, kstd)
        zero[index] = relaxation_0.lifetime()
        lifetime_zero += zero[index]
    lifetime_zero = np.float(lifetime_zero) / np.float(num_samples)
    lifetime_dif_zero = lifetime_zero - lifetime_exp_zero

    #--------------------------------------------------------------------------------------------------------------------------------------
    #resonance field lifetime (B=2J)

    lifetime_res = 0.0
    res = np.zeros_like(samples)
    # zero field lifetime
    for index, item in enumerate(samples):
        relaxation_0 = rotational_relaxation(aniso_dipolar, g1_iso, g2_iso,
                                             aniso_g1, aniso_g2, iso_h1,
                                             iso_h2, aniso_hyperfine_1,
                                             aniso_hyperfine_2, spin_numbers_1,
                                             spin_numbers_2, 2.0 * J, J, ks,
                                             kt, lamb, temp, kstd)
        res[index] = relaxation_0.lifetime()
        lifetime_res += res[index]
    lifetime_res = np.float(lifetime_res) / np.float(num_samples)
    lifetime_dif_res = lifetime_res - lifetime_exp_res

    #--------------------------------------------------------------------------------------------------------------------------------------
    # High field lifetime

    lifetime_high = 0.0
    high = np.zeros_like(samples)
    # zero field lifetime
    for index, item in enumerate(samples):
        relaxation_0 = rotational_relaxation(aniso_dipolar, g1_iso, g2_iso,
                                             aniso_g1, aniso_g2, iso_h1,
                                             iso_h2, aniso_hyperfine_1,
                                             aniso_hyperfine_2, spin_numbers_1,
                                             spin_numbers_2, 100.0, J, ks, kt,
                                             lamb, temp, kstd)
        high[index] = relaxation_0.lifetime()
        lifetime_high += high[index]
    lifetime_high = np.float(lifetime_high) / np.float(num_samples)
    lifetime_dif_high = lifetime_high - lifetime_exp_high

    #--------------------------------------------------------------------------------------------------------------------------------------

    for index_field, item_field in enumerate(sampled_field):
        total_t = 0.0
        for index, item in enumerate(samples):
            np.random.seed(index)
            # Define class
            relaxation = rotational_relaxation(
                aniso_dipolar, g1_iso, g2_iso, aniso_g1, aniso_g2, iso_h1,
                iso_h2, aniso_hyperfine_1, aniso_hyperfine_2, spin_numbers_1,
                spin_numbers_2, item_field, J, ks, kt, lamb, temp, kstd)
            # Calculate triplet yield
            trip[index] = relaxation.triplet_yield()
            total_t += trip[index]

        triplet_yield[index_field] = total_t
        standard_error[index_field] = sts.sem(trip)
        compound_error[index_field] = np.sqrt(
            standard_error[0] * standard_error[0] *
            ((1.0 / triplet_yield[0])**2 +
             (standard_error[index_field] * standard_error[index_field] *
              (triplet_yield[index_field] / triplet_yield[0])**2)))

    compound_error[0] = 0.0
    triplet_yield = triplet_yield / (triplet_yield[0])

    tck = interpolate.splrep(sampled_field, triplet_yield, s=0)
    xnew = field
    ynew = interpolate.splev(xnew, tck, der=0)

    mary = ((ynew) - (data_y - data_y[0] + 1.0)) * ((ynew) -
                                                    (data_y - data_y[0] + 1.0))
    mean_mary = (np.sum(ynew)) / np.float(len(ynew))
    mary_var = (mean_mary - ynew) * (mean_mary - ynew)

    lt = np.array([lifetime_zero, lifetime_res, lifetime_high])
    sq_lt_diff = np.array([(lifetime_dif_zero) * (lifetime_dif_zero),
                           (lifetime_dif_res) * (lifetime_dif_res) + 4.0 *
                           (J - j_exp) * (J - j_exp),
                           (lifetime_dif_high) * (lifetime_dif_high)])
    mean_lt = (np.sum(lt)) / np.float(len(lt))
    lt_var = (mean_lt - lt) * (mean_lt - lt)

    chai_lifetime = np.sum(sq_lt_diff) / np.sum(lt_var)
    chai_yield = np.sum(mary) / np.sum(mary_var)

    return chai_lifetime, chai_yield
Example #46
0
            com_ran = df.commRange[index]
            energy = df.energy[index]
            energy_max = df.energy[index]
            prob = df.freq[index]
            node = Node(location=location,
                        com_ran=com_ran,
                        energy=energy,
                        energy_max=energy_max,
                        id=i,
                        energy_thresh=0.4 * energy,
                        prob=prob)
            list_node.append(node)
        mc = MobileCharger(energy=df.E_mc[index],
                           capacity=df.E_max[index],
                           e_move=df.e_move[index],
                           e_self_charge=df.e_mc[index],
                           velocity=df.velocity[index])
        target = [int(item) for item in df.target[index].split(',')]
        net = Network(list_node=list_node, mc=mc, target=target)
        print(len(net.node), len(net.target), max(net.target))
        q_learning = Q_learning(network=net)
        # inma = Inma()
        file_name = "log/q_learning_" + str(index) + ".csv"
        temp = net.simulate(optimizer=q_learning, file_name=file_name)
        life_time.append(temp)
        result.writerow({"nb run": nb_run, "lifetime": temp})

    confidence = 0.95
    h = sem(life_time) * t.ppf((1 + confidence) / 2, len(life_time) - 1)
    result.writerow({"nb run": mean(life_time), "lifetime": h})
Example #47
0
    def consensus_demultiplex(self):
        """
        Takes a FASTQ file of consensus reads and identifies each by index.  Handles writing demultiplexed FASTQ if
        user desired.
        """
        self.log.info("Consensus Index Search")
        eof = False
        start_time = time.time()
        split_time = time.time()
        fastq_file_name_list = []
        fastq_data_dict = collections.defaultdict(lambda: collections.defaultdict(list))
        indexed_read_count = 0
        key_counts = []
        while not eof:
            # Debugging Code Block
            if self.args.Verbose == "DEBUG":
                read_limit = 1000000
                if self.read_count > read_limit:
                    if self.args.Demultiplex:
                        for index_name in fastq_data_dict:
                            r1_data = fastq_data_dict[index_name]["R1"]
                            r1, r2 = self.fastq_outfile_dict[index_name]
                            r1.write(r1_data)
                            r1.close()
                            if not self.args.PEAR:
                                r2_data = fastq_data_dict[index_name]["R2"]
                                r2.write(r2_data)
                                r2.close()

                    Tool_Box.debug_messenger("Limiting Reads Here to {}".format(read_limit))
                    eof = True
            fastq2_read = None
            try:
                fastq1_read = next(self.fastq1.seq_read())
                if not self.args.PEAR:
                    fastq2_read = next(self.fastq2.seq_read())

            except StopIteration:
                if self.args.Demultiplex:
                    for index_name in fastq_data_dict:
                        r1_data = fastq_data_dict[index_name]["R1"]
                        r1, r2 = self.fastq_outfile_dict[index_name]
                        r1.write(r1_data)
                        r1.close()
                        if not self.args.PEAR:
                            r2_data = fastq_data_dict[index_name]["R2"]
                            r2.write(r2_data)
                            r2.close()

                eof = True
                continue

            self.read_count += 1
            if self.read_count % 100000 == 0:
                elapsed_time = int(time.time() - start_time)
                block_time = int(time.time() - split_time)
                split_time = time.time()
                self.log.info("Processed {} reads in {} seconds.  Total elapsed time: {} seconds."
                              .format(self.read_count, block_time, elapsed_time))

            # Match read with library index.
            match_found, left_seq, right_seq, index_name, fastq1_read, fastq2_read = \
                self.index_matching(fastq1_read, fastq2_read)

            if match_found:
                indexed_read_count += 1
                locus = self.index_dict[index_name][7]
                phase_key = "{}+{}".format(index_name, locus)
                r2_found = False
                r1_found = False
                if self.args.Platform == "Illumina":
                    # Score the phasing and place the reads in a dictionary.
                    for r2_phase, r1_phase in zip(self.phase_dict[locus]["R2"], self.phase_dict[locus]["R1"]):

                        r2_phase_name = r2_phase[1]
                        r1_phase_name = r1_phase[1]

                        # Tag reads that should not have any phasing.
                        if not r1_phase[0]:
                            self.phase_count[phase_key]["Phase " + r1_phase_name] = -1
                            self.phase_count[phase_key]["Phase " + r2_phase_name] = -1
                            continue
                        else:
                            self.phase_count[phase_key]["Phase " + r1_phase_name] += 0
                            self.phase_count[phase_key]["Phase " + r2_phase_name] += 0

                        # The phasing is the last N nucleotides of the consensus.
                        if r2_phase[0] == Sequence_Magic.rcomp(fastq1_read.seq[-len(r2_phase[0]):]) and not r2_found:
                            self.phase_count[phase_key]["Phase "+r2_phase_name] += 1
                            r2_found = True

                        if r1_phase[0] == fastq1_read.seq[:len(r1_phase[0])] and not r1_found:
                            self.phase_count[phase_key]["Phase "+r1_phase_name] += 1
                            r1_found = True

                    # if no phasing is found then note that.
                    if not r2_found:
                        self.phase_count[phase_key]["No Read 2 Phasing"] += 1
                    if not r1_found:
                        self.phase_count[phase_key]["No Read 1 Phasing"] += 1

                    # The adapters on Gupta Lab AAVS1.1 are reversed causing the reads to be reversed.
                    if locus == "AAVS1.1":
                        self.sequence_dict[index_name].append(fastq1_read.seq)
                    else:
                        self.sequence_dict[index_name].append(fastq1_read.seq)

                elif self.args.Platform == "TruSeq":
                    self.sequence_dict[index_name].append(right_seq)

                elif self.args.Platform == "Ramsden":
                    self.sequence_dict[index_name].append(Sequence_Magic.rcomp(fastq1_read.seq))

                else:
                    self.log.error("--Platform {} not correctly defined.  Edit parameter file and try again"
                                   .format(self.args.Platform))
                    raise SystemExit(1)

                if self.args.Demultiplex:
                    fastq_data_dict[index_name]["R1"].append([fastq1_read.name, fastq1_read.seq, fastq1_read.qual])
                    if not self.args.PEAR:
                        fastq_data_dict[index_name]["R2"].append([fastq2_read.name, fastq2_read.seq, fastq2_read.qual])

                    fastq_file_name_list.append("{}{}_{}_Consensus.fastq"
                                                .format(self.args.WorkingFolder, self.args.Job_Name, index_name))

            elif self.args.Demultiplex and not match_found:
                fastq_data_dict['Unknown']["R1"].append([fastq1_read.name, fastq1_read.seq, fastq1_read.qual])
                fastq_data_dict['Unknown']["R2"].append([fastq1_read.name, fastq1_read.seq, fastq1_read.qual])

                fastq_file_name_list.append("{}{}_Unknown_Consensus.fastq"
                                            .format(self.args.WorkingFolder, self.args.Job_Name))

        if self.args.Demultiplex:
            self.fastq_compress(list(set(fastq_file_name_list)))

        for key in self.sequence_dict:
            key_counts.append(len(self.sequence_dict[key]))

        # The lower limit is used when plotting the data.  Generally the lowest values are just noise.
        if len(key_counts) == 0:
            self.log.error("No Scar Patterns Found")
            raise SystemExit(1)
        lower, upper_limit = stats.norm.interval(0.9, loc=statistics.mean(key_counts), scale=stats.sem(key_counts))
        lower_limit = statistics.mean(key_counts)-lower

        return indexed_read_count, lower_limit
Example #48
0
def mean_ci(x):
    import scipy.stats as st
    mn = np.mean(x)
    ci = st.t.interval(0.95, len(x) - 1, loc=np.mean(x), scale=st.sem(x))
    return (mn, ci[0], ci[1])
Example #49
0
def SNRpsdEPOCH_GRANDAV(epoch_condition, starta, enda, snr_format, fstart,
                        fend, snr_format_name, tmin, tmax, baseline,
                        cond_events, cond_events_id, reject):

    print('Importing additional modules')
    import scipy
    from scipy import stats
    from scipy import signal
    import numpy as np
    import copy
    import mne
    import matplotlib as mpl
    from matplotlib import mlab
    ################### PRE-ANALYSIS CHECKS
    # Does the data require epochs creating?
    check = np.shape(epoch_condition._data)
    check = np.size(check)
    if check < 3:
        print('creating temporary epochs object')
        temp = mne.Epochs(
            epoch_condition,
            cond_events,
            cond_events_id,
            tmin,
            tmax,
            proj=False,  #picks=picks,
            baseline=baseline,
            preload=True,
            reject=reject,
            add_eeg_ref=False)
        time = np.linspace(tmin, tmax, np.shape(temp._data)[2])
        epoch_condition = copy.deepcopy(temp)
        del temp
    else:
        time = np.linspace(tmin, tmax, np.shape(epoch_condition._data)[2])
# Create timewindow
    s = mlab.find(time == starta)
    e = mlab.find(time == enda)
    ee = e + 1
    timewindow = np.arange(s, ee, 1)
    # Get sampling frequency
    fs = epoch_condition.info['sfreq']
    ######### PART 1 --- FRQ WINDOW OF INTEREST SNR
    ga = epoch_condition._data
    ga = ga[:, snr_format[snr_format_name], :]
    ga = np.mean(ga, 0)
    ga = np.mean(ga, 0)
    ga = ga - np.mean(ga)
    f, Pxx = signal.welch(ga[timewindow],
                          fs=fs,
                          nperseg=1001,
                          noverlap=np.round(1001 / 2),
                          detrend='linear',
                          scaling='density')
    gapsd = Pxx
    del Pxx
    del ga
    ff = np.round(f)
    frqwindow = np.arange((mlab.find(ff == fstart)),
                          (mlab.find(ff == fend + 1)), 1)
    delta = np.arange((mlab.find(ff == 1)), (mlab.find(ff == 3 + 1)), 1)
    theta = np.arange((mlab.find(ff == 4)), (mlab.find(ff == 7 + 1)), 1)
    alpha = np.arange((mlab.find(ff == 8)), (mlab.find(ff == 14 + 1)), 1)
    beta = np.arange((mlab.find(ff == 15)), (mlab.find(ff == 31 + 1)), 1)
    gamma = np.arange((mlab.find(ff == 32)), (mlab.find(ff == 58 + 1)), 1)
    ######### STEP 2 - PSD FROM EACH ROI CHANNEL (SINGLE TRIALS)
    print('Estimating PSD for each ROI channel, single trials')
    psdmatrix2 = np.zeros([len(epoch_condition._data), int((fs / 2) + 1)])
    for y in range(0, len(epoch_condition._data)):
        ga = epoch_condition._data
        ga = ga[y, snr_format[snr_format_name], :]
        ga = np.mean(ga, 0)
        ga = ga - np.mean(ga)
        f, Pxx = signal.welch(ga[timewindow],
                              fs=fs,
                              nperseg=1001,
                              noverlap=np.round(1001 / 2),
                              detrend='linear',
                              scaling='density')
        psdmatrix2[y, :] = Pxx
    ######### STEP 3 - SNR
    print('Estimating SNR')
    temp1 = gapsd[frqwindow]
    temp1 = np.sum(temp1)
    temp2 = np.sum(psdmatrix2[:, frqwindow], 1)
    snr = temp1 / stats.sem(temp2)
    deltasnr = np.sum(gapsd[delta]) / stats.sem(np.sum(psdmatrix2[:, delta],
                                                       1))
    thetasnr = np.sum(gapsd[theta]) / stats.sem(np.sum(psdmatrix2[:, theta],
                                                       1))
    alphasnr = np.sum(gapsd[alpha]) / stats.sem(np.sum(psdmatrix2[:, alpha],
                                                       1))
    betasnr = np.sum(gapsd[beta]) / stats.sem(np.sum(psdmatrix2[:, beta], 1))
    gammasnr = np.sum(gapsd[gamma]) / stats.sem(np.sum(psdmatrix2[:, gamma],
                                                       1))

    return {
        'roisnrGA': snr,
        'deltasnr': deltasnr,
        'thetasnr': thetasnr,
        'alphasnr': alphasnr,
        'betasnr': betasnr,
        'gammasnr': gammasnr
    }
Example #50
0
#程序文件Pex4_14_2.py
import numpy as np
import scipy.stats as ss
from scipy import stats
a = np.array([
    506, 508, 499, 503, 504, 510, 497, 512, 514, 505, 493, 496, 506, 502, 509,
    496
])
alpha = 0.95
df = len(a) - 1
ci = ss.t.interval(alpha, df, loc=a.mean(), scale=ss.sem(a))
print("置信区间为:", ci)
Example #51
0
def SNRpsdEPOCH(epoch_condition, starta, enda, snr_format, numrois, fstart,
                fend, snr_format_name, blchange, tmin, tmax, baseline,
                cond_events, cond_events_id, reject):

    # SNR check for epoched MEG data using PSD estimation of frequency information.
    # Inputs
    # epoch_condition = data (raw or epoched, if raw event information will be used to create an epoched data set)
    # starta = period of interest start (seconds)
    # enda = period of interest end (seconds)
    # snr_format = snr channel information (dict)
    # numrois = number of roi channels (int)
    # fstart = frequency window of interest start (int)
    # fend = frequency window of interest end (int)
    # snr_format_name = name of channel type (e.g. ASSRnum - str)
    # blchange = measure percent change from baseline (1 = yes, 2 = no)
    # tmin = event epoch minimum time (float)
    # tmax = event epoch maximum time (float)
    # baseline = mne baseline period (e.g (None,0),obj)
    # cond_events = events for condition of interest
    # cond_events_id = event ids for conditions to highlight cond of interest
    # Outputs
    # chSNR_ASSR (roiSNR) = snr for period of interest at each roi channel
    # perchangeSNR = snr for percentage change between baseline and active period

    print('Importing additional modules')
    import scipy
    from scipy import stats
    from scipy import signal
    import numpy as np
    import copy
    import mne
    import matplotlib as mpl
    from matplotlib import mlab
    ################### PRE-ANALYSIS CHECKS
    # Does the data require epochs creating?
    check = np.shape(epoch_condition._data)
    check = np.size(check)
    if check < 3:
        print('creating temporary epochs object')
        temp = mne.Epochs(
            epoch_condition,
            cond_events,
            cond_events_id,
            tmin,
            tmax,
            proj=False,  #picks=picks,
            baseline=baseline,
            preload=True,
            reject=reject,
            add_eeg_ref=False)
        time = np.linspace(tmin, tmax, np.shape(temp._data)[2])
        epoch_condition = copy.deepcopy(temp)
        del temp
    else:
        time = np.linspace(tmin, tmax, np.shape(epoch_condition._data)[2])
# Create timewindow
    s = mlab.find(time == starta)
    e = mlab.find(time == enda)
    ee = e + 1
    timewindow = np.arange(s, ee, 1)
    # Get sampling frequency
    fs = epoch_condition.info['sfreq']
    ######### PART 1 --- FRQ WINDOW OF INTEREST SNR
    ######### STEP 1 - PSD FROM EACH ROI CHANNEL (MEAN)
    # psd is performed on the signal for the time window of interest (mean centred)
    # in this version the appropriate window and overlap should be pre-determined
    # for the resolution. future versions will automate this procedure to determine
    # the windowing properties needed for a given resolution.
    # Example - signal detrended and psd estimated for ~1hz resolution
    #    f,Pxx = signal.welch((teste[timewindow]-mean(teste[timewindow])),
    #                         fs=1000,nperseg = 1001,noverlap=np.round(1001/2),
    #                         detrend = 'linear',scaling='density')
    print('Extracting information from region of interest site')
    rois = np.zeros(numrois, dtype=np.int)
    for x in range(0, np.shape(rois)[0]):
        text1 = snr_format_name
        text2 = str(x + 1)
        text3 = text1 + text2
        rois[x] = int(snr_format[text3])
    numfs = (fs / 2) + 1
    psdmatrix = np.zeros([numfs, numrois])
    print('Estimating grand average PSD for each ROI channel')
    for x in range(0, numrois):
        tempo = np.mean(epoch_condition._data[:, rois[x], timewindow], 0)
        f, Pxx = signal.welch((tempo - np.mean(tempo)),
                              fs=fs,
                              nperseg=1001,
                              noverlap=np.round(1001 / 2),
                              detrend='linear',
                              scaling='density')
        psdmatrix[:, x] = Pxx
    ff = np.round(f)
    frqwindow = np.arange((mlab.find(ff == fstart)),
                          (mlab.find(ff == fend + 1)), 1)
    ######### STEP 2 - PSD FROM EACH ROI CHANNEL (SINGLE TRIALS)
    print('Estimating PSD for each ROI channel, single trials')
    psdmatrix2 = np.zeros([len(epoch_condition._data), (fs / 2) + 1, numrois])
    for x in range(0, numrois):
        data = epoch_condition._data[:, rois[x], timewindow]
        for y in range(0, len(epoch_condition._data)):
            tempo = data[y, :]
            f, Pxx = signal.welch((tempo - np.mean(tempo)),
                                  fs=fs,
                                  nperseg=1001,
                                  noverlap=np.round(1001 / 2),
                                  detrend='linear',
                                  scaling='density')
            psdmatrix2[y, :, x] = Pxx
######### STEP 3 - SNR
    print('Estimating SNR')
    chSNR_ASSR = np.zeros(np.shape(rois)[0])
    for x in range(0, np.shape(rois)[0]):
        temp1 = psdmatrix[frqwindow, x]
        temp1 = np.sum(temp1)
        temp2 = np.sum(psdmatrix2[:, frqwindow, x], 1)
        snr = temp1 / stats.sem(temp2)
        chSNR_ASSR[x] = snr
        del temp1
        del temp2
        del snr


######### STEP 4 - if requested, snr from %signal change (baseline to window --- mean)
    if blchange == 1:
        print(
            'Estimating SNR for percentage change between baseline and active period'
        )
        bl = np.arange(0, (mlab.find(time == 0) + 1), 1)
        bldpsd = np.zeros([(fs / 2) + 1, numrois])
        perchangeMean = np.zeros(numrois)
        bldpsdST = np.zeros(
            [len(epoch_condition._data), (fs / 2) + 1, numrois])
        perchangeST = np.zeros([len(epoch_condition._data), numrois])
        percSNR = np.zeros(numrois)
        for x in range(0, numrois):
            bldat = np.mean(epoch_condition._data[:, rois[x], bl], 0)
            f, Pxx = signal.welch((bldat - np.mean(bldat)),
                                  nfft=1001,
                                  nperseg=500,
                                  noverlap=250,
                                  fs=fs,
                                  detrend='linear',
                                  scaling='density')
            bldpsd[:, x] = Pxx
        bldpsd = np.sum(bldpsd[frqwindow, :], 0)
        for x in range(0, numrois):
            temp1 = np.sum(psdmatrix[frqwindow, x], 0)
            perchangeMean[x] = ((temp1 - bldpsd[x]) / temp1) * 100

        for y in range(0, numrois):
            bldata = epoch_condition._data[:, rois[y], bl]
            for x in range(0, len(epoch_condition._data)):
                temp1 = bldata[x, :]
                f, Pxx = signal.welch((temp1 - np.mean(temp1)),
                                      nfft=1001,
                                      nperseg=500,
                                      noverlap=250,
                                      fs=fs,
                                      detrend='linear',
                                      scaling='density')
                bldpsdST[x, :, y] = Pxx
                temp1 = np.sum(psdmatrix2[x, frqwindow, y], 0)
                perchangeST[
                    x,
                    y] = (temp1 -
                          (np.sum(bldpsdST[x, frqwindow, y], 0)) / temp1) * 100

        for x in range(0, np.shape(rois)[0]):
            temp1 = perchangeMean[x]
            temp2 = perchangeST[x]
            snr = temp1 / stats.sem(temp2)
            percSNR[x] = snr
        print('Finished')
        return {'roiSNR': chSNR_ASSR, 'perchangeSNR': percSNR}
    else:
        print('Finished')
        return chSNR_ASSR
Example #52
0
def plotStandardErrorOfMean(x,methods,drawBarPlot = False, drawPointPlot = False, title="", width=0.10,
    colors=['b', 'g', 'r', 'c', 'm', 'y', 'k'], log_scale_y=False, log_scale_x=False, legend=True,
    x_title="X Label", y_title="Y Label"):
    '''
    Plots Mean and Standard Error of the mean for Methods with multiple runs

    Example
    -------    
        x = np.array([[1, 3, 4, 5], [1, 3, 4, 5], [1, 3, 4, 6]])
        method_1 = np.array([[1,4,5,2], [3,4,3,6] , [2,5,5,8]])
        method_2 = np.array([[8,7,5,9], [7,3,9,1] , [3,2,9,4]])
        method_3 = np.array([[10,13,9,11], [9,12,10,10] , [11,14,18,6]])
        methods = [method_1, method_2, method_3]

        plot = plotStandardErrorOfMean(x,methods,drawBarPlot = True)
        plot.show()

    Parameters
    -----------
    x : numpy array
        For each curve, contains the x-coordinates. Each entry
        corresponds to one method.
    methods : list of numpy arrays
        A list of numpy arrays of methods. Each method contains a numpy array
        of several run of that corresponding method.
    drawBarPlot : Bool
        Should be True if a Bar Plot is expected.
    drawPointPlot : Bool
        Should be True if a Point Plot is expected.
    title : string
        Title of the graph
    width : float
        Width of the bars.
    colors : string array
        Color of the curve. Each entry corresponds to one curve
    log_scale_y : Boolean
        If set to true, changes the y-axis to log scale.
    log_scale_x: Boolean
        If set to true, change the x-axis to log scale.
    legend : Boolean
        If set to true, displays the legend.
    x_title : String
        X label string 
    y_title : String
        Y label string
    Retrun
    ----------
    plt : object
        Plot Object
    '''
    curves = []
    for index,method in enumerate(methods):
        mean = []
        sem = []
        for j in range(0,len(x[index])):
            valueArray = np.array([el[j] for el in method])
            meanValue = np.mean(valueArray)
            semValue = stats.sem(valueArray) #Standard Error of Mean
            mean.append(meanValue)
            sem.append(semValue)
        curves.append(np.array([mean,sem]))
    if(drawBarPlot):
        barPlot = bar_plot(x,curves, title=title, width=width,
            colors=colors,
            log_scale_y=log_scale_y, log_scale_x=log_scale_x, legend=legend,
            x_title=x_title, y_title=y_title)
        return barPlot
    elif (drawPointPlot):
        pointPlot = point_plot(x,curves,title=title,
            colors=colors,
            log_scale_y=log_scale_y, log_scale_x=log_scale_x, legend=legend,
            x_title=x_title, y_title=y_title)
        return pointPlot
    else:
        raise NameError('Please select the type of the plot')
    #coarse-grained metric:
    #statistics of the rmses (w.r.t. ensembles) for predicted position on the prediction interval
    mean_rmse[k] = np.mean(rmse_vec)
    std_rmse[k] = np.std(rmse_vec)
    print('Mean of rmse:', mean_rmse[k])
    print('Std deviation of rmse:', std_rmse[k])

    #true position, multiple predicted positions, the averaged prediction and the 90 percent confidence interval
    sonn = []
    ax4 = plt.figure(figsize=(6, 3))
    plt.plot(t_res, data_orig[trainlen:trainlen + future], 'r^')
    for i in range(n_ens):
        sonn.append(sol[i, trainlen - trainbeg:trainlen + future - trainbeg])
        plt.plot(t_res, sonn[i], alpha=0.2)
    stderr = sem(
        sonn, axis=0
    )  #std error of the mean (sem) provides a simple measure of uncertainty in a value
    #Remark: Confidence interval is calculated assuming the samples are drawn from a Gaussian distribution
    #Justification: As the sample size tends to infinity the central limit theorem guarantees that the sampling
    #               distribution of the mean is asymptotically normal
    plt.plot(t_res, np.mean(sonn, axis=0), 'b-o')
    y1 = np.mean(sonn, axis=0) - 1.645 * stderr
    y2 = np.mean(sonn, axis=0) + 1.645 * stderr
    plt.plot(t_res, y1, '--')
    plt.plot(t_res, y2, '--')
    plt.fill_between(t_res, y1, y2, facecolor='blue', alpha=0.2)
    ax4.text(0.1, 0.96, '(b)', fontsize=12, verticalalignment='top')
    #plt.grid(False)
    #plt.title('true position, multiple predicted positions, the averaged prediction and the 90 percent confidence interval')
    plt.show()
Example #54
0
File: KLLUCB.py Project: Niko55/RL
    plt.close()


def plot_scatter(x, y, title, x_axis, y_axis, file_name):
    plt.plot(x,y,color=colors.pop())
    plt.scatter(x,y,color=colors.pop())
    plt.title(title)
    plt.xlabel(x_axis)
    plt.ylabel(y_axis)
    plt.savefig(file_name)
    plt.show()
    plt.close()


arms = K_array
arm_mean = []
arm_err = []
arms_mistakes = []
sampleSize = len(arms)
freedom_degree = sampleSize-1
for ele in arrayOfNumberOfSamplesForEveryK:
    arm_mean.append(np.mean(ele[1:len(ele)-1]))
    arm_err.append(ss.t.ppf(0.95, freedom_degree)*ss.sem(ele[1:len(ele)-1]))
    arms_mistakes.append(1.0*ele[len(ele)-1]/sampleSize)
plot_errorbar(arms, arm_mean, arm_err, "K vs Sample Complexity", "K", "Sample Complexity", "KL_UCB_Sample_complexity.png")
plot_scatter(arms, arms_mistakes, "Mistakes Probability vs K", "K", "Mistakes Probability", "KL_UCB_Mistake_probablity.png")


#############################
Example #55
0
def confidence_interval(x):
    lower, upper = stats.t.interval(0.95,
                                    len(x) - 1,
                                    loc=np.mean(x),
                                    scale=stats.sem(x))
    return lower, upper
def main():

    fdir = "data/processed"
    df_flux = pd.read_csv(os.path.join(fdir, "g1_fluxnet_screened.csv"))
    df_leaf = pd.read_csv(os.path.join(fdir, "g1_leaf_gas_exchange.csv"))
    df_isotope = pd.read_csv(os.path.join(fdir, "g1_isotope_screened.csv"))

    sns.set_style("ticks")
    sns.set_style({"xtick.direction": "in","ytick.direction": "in"})

    fig = plt.figure(figsize=(12,12))
    fig.subplots_adjust(hspace=0.05)
    fig.subplots_adjust(wspace=0.05)
    plt.rcParams['text.usetex'] = False
    plt.rcParams['font.family'] = "sans-serif"
    plt.rcParams['font.sans-serif'] = "Helvetica"
    plt.rcParams['axes.labelsize'] = 14
    plt.rcParams['font.size'] = 12
    plt.rcParams['legend.fontsize'] = 12
    plt.rcParams['xtick.labelsize'] = 12
    plt.rcParams['ytick.labelsize'] = 12

    almost_black = '#262626'
    # change the tick colors also to the almost black
    plt.rcParams['ytick.color'] = almost_black
    plt.rcParams['xtick.color'] = almost_black

    # change the text colors also to the almost black
    plt.rcParams['text.color'] = almost_black

    # Change the default axis colors from black to a slightly lighter black,
    # and a little thinner (0.5 instead of 1)
    plt.rcParams['axes.edgecolor'] = almost_black
    plt.rcParams['axes.labelcolor'] = almost_black

    colour_list = brewer2mpl.get_map('Set2', 'qualitative', 8).mpl_colors

    #colour_list = sns.palplot(sns.color_palette("colorblind", 10))
    #colour_list = sns.color_palette("Set2", 10)
    # CB palette  with grey:
    # from http://jfly.iam.u-tokyo.ac.jp/color/image/pallete.jpg
    #colour_list = ["#56B4E9", "#009E73", "#0072B2", "#F0E442",\
    #               "#E69F00", "#D55E00", "#CC79A7", "#999999"]
    colour_list = sns.color_palette("Accent", 10)

    ax1 = fig.add_subplot(231)
    ax2 = fig.add_subplot(232)
    ax3 = fig.add_subplot(233)

    ax4 = fig.add_subplot(234)
    ax5 = fig.add_subplot(235)
    ax6 = fig.add_subplot(236)

    pft_order = ['ENF', 'EBF', 'DBF', 'TRF']
    for i, pft in enumerate(pft_order):
        leaf = df_leaf[df_leaf.PFT == pft]
        isotope = df_isotope[df_isotope.PFT == pft]
        flux = df_flux[df_flux.PFT == pft]


        if i >= 3:
            cidx = i+1
        else:
            cidx = i

        for lat in np.unique(leaf.latitude):
            data_lat = leaf[leaf.latitude == lat]

            ax1.errorbar(np.mean(data_lat.g1), np.mean(data_lat.latitude),
                         xerr=stats.sem(data_lat.g1), ls=" ", marker="o",
                         color=colour_list[cidx], markeredgecolor="lightgrey",
                         alpha=0.8, capsize=False)

        for lat in np.unique(isotope.latitude):
            data_lat = isotope[isotope.latitude == lat]

            ax2.errorbar(np.mean(data_lat.g1), np.mean(data_lat.latitude),
                         xerr=stats.sem(data_lat.g1), ls=" ", marker="o",
                         color=colour_list[cidx], markeredgecolor="lightgrey",
                         alpha=0.8, capsize=False)

        for lat in np.unique(flux.latitude):
            data_lat = flux[flux.latitude == lat]

            ax3.errorbar(np.mean(data_lat.g1), np.mean(data_lat.latitude),
                         xerr=stats.sem(data_lat.g1), ls=" ", marker="o",
                         color=colour_list[cidx], markeredgecolor="lightgrey",
                         alpha=0.8, capsize=False)

    for i, pft in enumerate(pft_order):
        if i >= 3:
            cidx = i+1
        else:
            cidx = i

        ax1.plot(np.nan, np.nan, ls=" ", marker="o", color=colour_list[cidx],
                 markeredgecolor="lightgrey", label=pft, alpha=0.9)
    ax1.legend(numpoints=1, ncol=1, loc="best", frameon=False)

    pft_order = ['SAV', 'SHB', 'C3G', 'C4G', 'C3C', 'C4C']
    for i, pft in enumerate(pft_order):
        leaf = df_leaf[df_leaf.PFT == pft]
        isotope = df_isotope[df_isotope.PFT == pft]
        flux = df_flux[df_flux.PFT == pft]

        if i >= 3:
            cidx = i+1
        else:
            cidx = i
        for lat in np.unique(leaf.latitude):
            data_lat = leaf[leaf.latitude == lat]

            ax4.errorbar(np.mean(data_lat.g1), np.mean(data_lat.latitude),
                         xerr=stats.sem(data_lat.g1), ls=" ", marker="D",
                         color=colour_list[cidx], markeredgecolor="lightgrey",
                         alpha=0.8, capsize=False)


        for lat in np.unique(isotope.latitude):
            data_lat = isotope[isotope.latitude == lat]

            ax5.errorbar(np.mean(data_lat.g1), np.mean(data_lat.latitude),
                         xerr=stats.sem(data_lat.g1), ls=" ", marker="D",
                         color=colour_list[cidx], markeredgecolor="lightgrey",
                         label=pft, alpha=0.8, capsize=False)
        for lat in np.unique(flux.latitude):
            data_lat = flux[flux.latitude == lat]

            ax6.errorbar(np.mean(data_lat.g1), np.mean(data_lat.latitude),
                         xerr=stats.sem(data_lat.g1), ls=" ", marker="D",
                         color=colour_list[cidx], markeredgecolor="lightgrey",
                         alpha=0.8, capsize=False)

    for i, pft in enumerate(pft_order):
        if i >= 3:
            cidx = i+1
        else:
            cidx = i

        ax4.plot(np.nan, np.nan, ls=" ", marker="D", color=colour_list[cidx],
                 markeredgecolor="lightgrey", label=pft, alpha=0.9)


    ax4.legend(numpoints=1, ncol=1, loc="best", frameon=False)

    labels = ["(a)", "(b)", "(c)", "(d)", "(e)", "(f)"]
    props = dict(boxstyle='round', facecolor='white', alpha=1.0, ec="white")


    for i, ax in enumerate([ax1, ax2, ax3, ax4, ax5, ax6]):
        ax.set_xlim(0, 14)
        ax.set_ylim(-60, 90)
        ax.locator_params(nbins=6, axis="x")
        ax.locator_params(nbins=6, axis="y")

        ax.axhline(y=0.0, c='grey', lw=1.0, ls='--')
        ax.axhline(y=-23.43723, c='grey', lw=1.0, ls='-.')
        ax.axhline(y=23.43723, c='grey', lw=1.0, ls='-.')

        ax.text(0.03, 0.98, labels[i], transform=ax.transAxes, fontsize=12,
                verticalalignment='top', bbox=props)

    for ax in [ax1, ax2, ax3]:
        plt.setp(ax.get_xticklabels(), visible=False)

    for ax in [ax2, ax3, ax5, ax6]:
        plt.setp(ax.get_yticklabels(), visible=False)

    ax1.set_title("Leaf gas exchange")
    ax2.set_title("Leaf isotope")
    ax3.set_title("FLUXNET")


    ax1.set_ylabel("Latitude (degrees)", position=(0.5, 0.0))
    ax5.set_xlabel("Estimated $g_1$ (kPa$^{0.5}$)")


    odir = "/Users/%s/Dropbox/g1_leaf_ecosystem_paper/figures/figs/" % \
            (os.getlogin())
    plt.savefig(os.path.join(odir, "g1_vs_latitude.pdf"),
                bbox_inches='tight', pad_inches=0.1)
Example #57
0
def tf_edge_delta_out(
    crc_folder,
    bam_list,
    analysis_name,
    edge_table_path_1,
    edge_table_path_2,
    group1_list,
    group2_list,
    output="",
):
    """Calculates changes in group out degree at each predicted motif occurrence (by subpeaks)."""
    crc_folder = utils.format_folder(crc_folder, True)
    edge_path = merge_edge_tables(
        edge_table_path_1,
        edge_table_path_2,
        os.path.join(crc_folder, "{}_EDGE_TABLE.txt".format(analysis_name)),
    )

    # make a gff of the edge table
    edge_table = utils.parse_table(edge_path, "\t")
    edge_gff = []
    for line in edge_table[1:]:
        gff_line = [
            line[2],
            "{}_{}".format(line[0], line[1]),
            "",
            line[3],
            line[4],
            "",
            ".",
            "",
            "{}_{}".format(line[0], line[1]),
        ]
        edge_gff.append(gff_line)

    edge_gff_path = os.path.join(crc_folder,
                                 "{}_EDGE_TABLE.gff".format(analysis_name))
    utils.unparse_table(edge_gff, edge_gff_path, "\t")

    # direct the output to the crc folder
    signal_path = os.path.join(
        crc_folder, "{}_EDGE_TABLE_signal.txt".format(analysis_name))

    all_group_list = group1_list + group2_list
    if not utils.check_output(signal_path, 0, 0):
        signal_table_list = pipeline_utils.map_regions(
            bam_list,
            [edge_gff_path],
            crc_folder,
            crc_folder,
            all_group_list,
            True,
            signal_path,
            extend_reads_to=100,
        )
        print(signal_table_list)
    else:
        print("Found previous signal table at {}".format(signal_path))

    # now bring in the signal table as a dictionary using the locus line as the id
    print("making log2 group1 vs group2 signal table at edges")
    signal_table = utils.parse_table(signal_path, "\t")

    # figure out columns for group1 and group2
    group1_columns = [signal_table[0].index(name) for name in group1_list]
    group2_columns = [signal_table[0].index(name) for name in group2_list]
    group1_signal_vector = []
    group2_signal_vector = []
    for line in signal_table[1:]:
        group1_signal = numpy.mean(
            [float(line[col]) for col in group1_columns])
        group2_signal = numpy.mean(
            [float(line[col]) for col in group2_columns])

        group1_signal_vector.append(group1_signal)
        group2_signal_vector.append(group2_signal)

    group1_median = numpy.median(group1_signal_vector)
    group2_median = numpy.median(group2_signal_vector)

    print("group1 median signal")
    print(group1_median)
    print("group2 median signal")
    print(group2_median)

    # now that we have the median, we can take edges where at least 1 edge is above the median
    # and both are above zero and generate a new table w/ the fold change
    signal_filtered_path = signal_path.replace(".txt", "_filtered.txt")
    if utils.check_output(signal_filtered_path, 0, 0):
        print("Found filtered signal table for edges at {}".format(
            signal_filtered_path))
        signal_table_filtered = utils.parse_table(signal_filtered_path, "\t")
    else:
        signal_table_filtered = [
            signal_table[0] +
            ["GROUP1_MEAN", "GROUP2_MEAN", "GROUP1_vs_GROUP2_LOG2"]
        ]
        for line in signal_table[1:]:
            group1_signal = numpy.mean(
                [float(line[col]) for col in group1_columns])
            group2_signal = numpy.mean(
                [float(line[col]) for col in group2_columns])

            if (group1_signal > group1_median or group2_signal > group2_median
                ) and min(group1_signal, group2_signal) > 0:
                delta = numpy.log2(group1_signal / group2_signal)
                new_line = line + [group1_signal, group2_signal, delta]
                signal_table_filtered.append(new_line)

        utils.unparse_table(signal_table_filtered, signal_filtered_path, "\t")

    # now get a list of all TFs in the system
    tf_list = utils.uniquify(
        [line[0].split("_")[0] for line in signal_table_filtered[1:]])
    tf_list.sort()
    print(tf_list)

    out_degree_table = [[
        "TF_NAME",
        "EDGE_COUNT",
        "DELTA_MEAN",
        "DELTA_MEDIAN",
        "DELTA_STD",
        "DELTA_SEM",
    ]]

    for tf_name in tf_list:
        print(tf_name)
        edge_vector = [
            float(line[-1]) for line in signal_table_filtered[1:]
            if line[0].split("_")[0] == tf_name
        ]

        edge_count = len(edge_vector)
        delta_mean = round(numpy.mean(edge_vector), 4)
        delta_median = round(numpy.median(edge_vector), 4)
        delta_std = round(numpy.std(edge_vector), 4)
        delta_sem = round(stats.sem(edge_vector), 4)
        tf_out_line = [
            tf_name,
            edge_count,
            delta_mean,
            delta_median,
            delta_std,
            delta_sem,
        ]
        out_degree_table.append(tf_out_line)

    # set final output
    if not output:
        output_path = os.path.join(
            crc_folder, "{}_EDGE_DELTA_OUT.txt".format(analysis_name))
    else:
        output_path = output

    utils.unparse_table(out_degree_table, output_path, "\t")
    print(output_path)
    return output_path
Example #58
0
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as sc

TEST_DATA = np.array([
    [1, 2, 3, 2, 1, 2, 3, 4, 2, 3, 2, 1, 2, 3, 4, 4, 3, 2, 3, 2, 3, 2, 1],
    [5, 6, 5, 4, 5, 6, 7, 7, 6, 7, 7, 2, 8, 7, 6, 5, 5, 6, 7, 7, 7, 6, 5],
    [9, 8, 7, 8, 8, 7, 4, 6, 6, 5, 4, 3, 2, 2, 2, 3, 3, 4, 5, 5, 5, 6, 1],
    [3, 2, 3, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 6, 6, 7, 8, 9, 8, 5],
])

# find mean for each of our observations
y = np.mean(TEST_DATA, axis=1, dtype=np.float64)
# and the 95% confidence interval
ci95 = np.abs(y - 1.96 * sc.sem(TEST_DATA, axis=1))

# each set is one try
tries = np.arange(0, len(y), 1.0)

# tweak grid and setup labels, limits
plt.grid(True, alpha=0.5)
plt.gca().set_xlabel('Observation #')
plt.gca().set_ylabel('Mean (+- 95% CI)')
plt.title("Observations with corresponding 95% CI as error bar.")

plt.bar(tries, y, align='center', alpha=0.2)
plt.errorbar(tries, y, yerr=ci95, fmt=None)

plt.show()
Example #59
0
    def evaluate_embeddings(self, algo, global_step, datasets):
        """Labeled evaluation."""
        # TODO(debidatta): Move these hard coded params to config after expts.
        num_labeled_list = range(1, 11)
        num_episodes = 50

        # Set random seed to ensure same samples are generated for each evaluation.
        np.random.seed(seed=42)

        train_embs = np.concatenate(datasets['train_dataset']['embs'])
        val_embs = np.concatenate(datasets['val_dataset']['embs'])

        if train_embs.shape[0] == 0 or val_embs.shape[0] == 0:
            logging.warn(
                'All embeddings are NAN. Something is wrong with model.')
            return 0.0

        val_labels = np.concatenate(datasets['val_dataset']['labels'])

        report_val_accs = []
        train_dataset = datasets['train_dataset']
        num_samples = len(train_dataset['embs'])

        # Also add half of the train dataset.
        num_labeled_list += [int(0.5 * num_samples)]

        # Create episode list.
        episodes_list = []
        for num_labeled in num_labeled_list:
            episodes = []
            for _ in range(num_episodes):
                episodes.append(
                    np.random.permutation(num_samples)[:num_labeled])
            episodes_list.append(episodes)

        def indi_worker(episode):
            """Executes single epsiode for a particular k-shot task."""
            train_embs = np.concatenate(np.take(train_dataset['embs'],
                                                episode))
            train_labels = np.concatenate(
                np.take(train_dataset['labels'], episode))
            train_acc, val_acc = fit_linear_models(train_embs, train_labels,
                                                   val_embs, val_labels)
            return train_acc, val_acc

        def worker(episodes):
            """Executes all epsiodes for a particular k-shot task."""
            with cf.ThreadPoolExecutor() as executor:
                results = executor.map(indi_worker, episodes)
            results = list(zip(*results))
            train_accs = results[0]
            val_accs = results[1]
            return train_accs, val_accs

        with cf.ThreadPoolExecutor() as executor:
            results = executor.map(worker, episodes_list)

            for (num_labeled, (train_accs,
                               val_accs)) in zip(num_labeled_list, results):
                prefix = '%s_%s' % (datasets['name'], str(num_labeled))

                # Get average accuracy over all episodes.
                train_acc = np.mean(np.mean(train_accs))
                val_acc = np.mean(np.mean(val_accs))

                # Get 95% Confidence Intervals.
                train_ci = st.t.interval(
                    0.95,
                    len(train_accs) - 1,
                    loc=train_acc,
                    scale=st.sem(train_accs))[1] - train_acc
                val_ci = st.t.interval(0.95,
                                       len(val_accs) - 1,
                                       loc=val_acc,
                                       scale=st.sem(val_accs))[1] - val_acc

                logging.info('[Global step: {}] Classification {} Shot '
                             'Train Accuracy: {:.4f},'.format(
                                 global_step.numpy(), prefix, train_acc))
                logging.info('[Global step: {}] Classification {} Shot '
                             'Val Accuracy: {:.4f},'.format(
                                 global_step.numpy(), prefix, val_acc))

                logging.info('[Global step: {}] Classification {} Shot '
                             'Train Confidence Interval: {:.4f},'.format(
                                 global_step.numpy(), prefix, train_ci))
                logging.info('[Global step: {}] Classification {} Shot '
                             'Val Confidence Interval: {:.4f},'.format(
                                 global_step.numpy(), prefix, val_ci))

                tf.summary.scalar('few_shot_cxn/train_%s_accuracy' % prefix,
                                  train_acc,
                                  step=global_step)
                tf.summary.scalar('few_shot_cxn/val_%s_accuracy' % prefix,
                                  val_acc,
                                  step=global_step)
                tf.summary.scalar('few_shot_cxn/train_%s_ci' % prefix,
                                  train_ci,
                                  step=global_step)
                tf.summary.scalar('few_shot_cxn/val_%s_ci' % prefix,
                                  val_ci,
                                  step=global_step)

                report_val_accs.append(val_acc)

        return report_val_accs[-1]
Example #60
0
def mean_and_se(data):
    mu = np.mean(data)
    se = stats.sem(data)
    return mu, se