def mann_whitney(self): "Perform Mann-Whitney test on data" "" #set alpha alpha = 0.05 #extracts selected samples for statistical test geno_select = [] for key, value in self.mBar.samples.items(): if value.get() == 1: geno_select.append(key) #re-establish the original order of genotypes geno_test = [] for i in Global.geno: for j in geno_select: if i == j: geno_test.append(j) #generates all combinations of genotype pairs comp = tuple(combinations(geno_test, 2)) #calculate p-values from Mann-Whitney test and put them into list p_val = [] for _, (ii, jj) in enumerate(comp): for s in Global.struct: p_val.append( mw(Global.col[s][Global.col.index == ii], Global.col[s][Global.col.index == jj])[1]) #put p-values in array then dataframe p_val = [round(i, 4) for i in p_val] p_arr = np.array(p_val).reshape(len(comp), len(Global.struct)) df = pd.DataFrame(p_arr, columns=Global.struct) df_row = [] for pp, (ii, jj) in enumerate(comp): df_row.append([ii, jj]) df.insert(loc=0, column='Genotypes', value=df_row) df = df.set_index('Genotypes') #saves p-values dataframe in text file stat_fname = filedialog.asksaveasfilename() + '.txt' #with open(Global.file_name.split('.')[0] + '_MW_pval' + '-' \ # + str(Global.export_number_stats) + '.txt', 'w') as outfile: with open(stat_fname, 'w') as outfile: outfile.write('Mann-Whitney two-sided test for genotype \ combinations indicated in the \n"Genotypes" columns and fungal structures \ indicated as column headers.\nValues indicate p-value.\n\n') df.to_string(outfile)
def example3(): import toolshed as ts import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt import seaborn as sns from scipy.stats import mannwhitneyu as mw import numpy as np iterator = JimFile(args.input, args.regions) #it = ts.reader(args.input) #'/scratch/ucgd/serial/quinlan_lab/data/u1021864/regionsmafsdnds.bed.gz' #iterable = (Interval(**iv) for iv in it) results = defaultdict(lambda : defaultdict(list)) ms = defaultdict(list) ff = args.genome cpg_cutoff = {} maf_cutoff = float(args.maf) if args.maf else 1e-05 start = 0 end = .2 step = .025 j = start #for i in frange(start, end, step): # cpg_cutoff[str(j)+"-"+str(i)] = (j, i) # j = i #cpg_cutoff['0.2-1'] = (.2, 1) cpg_cutoff['0-1'] = (0, 1) base = [] cons = [] genes = None #genes = Fasta(ff) if args.regions == "chunks": regioner = smallchunk chunksize = args.regionsize if args.regions in ["domains", "nodoms", "all"]: regioner = byregiondist chunksize = "" if args.regions == "genes": regioner = bytranscriptdist chunksize = "" y = list(windower(iterator, regioner, chunksize)) comparison = args.comparison if args.exclude: exclude = args.exclude ex = "ex" + args.exclude + "." else: exclude = None ex = "" cv = [] if args.conservation: for r in ts.reader(args.conservation): v = get_conservation(r) cv.append(v) cpg=1 if y: for iv in y: # iterable, size_grouper(1) #cpg = CpG(iv, genes = genes) b = baseline(iv, maf_cutoff = maf_cutoff, exclude = exclude, comparison = comparison, patt = patt) ms['baseline'].append((iv,b[3]/b[4],cpg)) base.append(b) count = 0.0 totlen = 0.0 if base: for b in base: count += b[3] totlen += b[4] baserate = count/totlen for iv, b in zip(y, base): u = upton(b, baserate) c = constraint(iv, maf_cutoff = maf_cutoff, genes = genes, upton = u) r = RVIS(iv, maf_cutoff = 1e-3, patt = patt) ct = (iv, c, cpg) if c != 0: ms['nzconstraint'].append(ct) ms['constraint'].append(ct) ct = (iv, u, cpg) ms['upton'].append((ct[0],ct[1][3],ct[2])) ct = (iv, r, cpg) ms['rvis'].append((ct[0],ct[1],ct[2])) cons.append((u[0],u[1],u[2],c)) # results['iafi'].append((iv, IAFI_inline(iv, n_samples=61000))) # results['frv'].append((iv, FRV_inline(iv, maf_cutoff=maf_cutoff))) # results['count_nons'].append((iv, count_nons(iv))) # TODO: jim add a lot more metrics here... e.g.: bedname = "."+ rtz(maf_cutoff) + "." + comparison + "." + args.regions + str(chunksize) + "." + ex f1 = open("constraint" + bedname + ".bed","w") f2 = open("baseline" + bedname + ".bed","w") for b,c in zip(base,cons): f1.write("\t".join(map(str,c))+"\n") f2.write("\t".join(map(str,b))+"\n") f1.close() f2.close() cutoffs = set() for cutoff in cpg_cutoff: co = str(cpg_cutoff[cutoff][0])+'-'+str(cpg_cutoff[cutoff][1]) cutoffs.add(co) for metric in ms: for ct in ms[metric]: if ct[2] >= cpg_cutoff[cutoff][0] and ct[2] <= cpg_cutoff[cutoff][1]: results[metric][co].append(ct) option = args.truetype trusrc = "" if option == "clinvar" or option == "c": func = clinvar trusrc = "clinvar" if option == "pli" or option == "p": func = pli trusrc = "pli" for metric in results: for cutoff in cutoffs: imgname = metric + "." + trusrc + "." + comparison + "." + args.regions + str(chunksize) + "." + ex + cutoff + "." + rtz(maf_cutoff) print metric, cutoff fig, axes = plt.subplots(2) fig.tight_layout() counts = evaldoms(results[metric][cutoff], args.pathogenic, # forweb_cleaned_exac_r03_march16_z_data_pLI.txt from ExAC ftp or clinvar_20150305.tidy.vcf.gz from clinvar src func) imin, imax = np.percentile(counts[True] + counts[False], [0.01, 99.99]) axes[0].hist(counts[True], bins=80) #,label = cutoff) axes[0].set_xlabel("pathogenic") axes[0].set_xlim(imin, imax) props = dict(boxstyle = 'round', facecolor = 'whitesmoke', alpha = 0.5) axes[0].text(.875, .8, "CpG frac:\n" + cutoff.replace("-"," - "), transform = axes[0].transAxes, bbox = props) #axes[0].legend(loc = 1, frameon = True) axes[1].hist(counts[False], bins=80) axes[1].set_xlabel("not-pathogenic") axes[1].set_xlim(imin, imax) plt.show() plt.savefig(imgname + ".dist.png", bbox_inches = 'tight') print metrics(counts[True], counts[False], imgname + ".auc.png", cutoff = cutoff) print mw(counts[True], counts[False]) del fig plt.close()
def EvaluateAdaptiveBiostate(BVP_array=None, EDA_array=None): global hb_measures global m_baseHR global m_baseHRV global m_previousHR global m_previousHRV global EDA_measures global m_baseTonic global m_basePhasic global m_previousTonic global m_previousPhasic global m_prevLevel global m_curLevel global m_lastAction global levelStatesList global progressionStateList global m_Iteration #1: higher arousal #0: no notificable change #-1: lower arousal hr_rate = 0 hrv_rate = 0 tonic_rate = 0 phasic_rate = 0 if BVP_array is not None: working_data, hb_measure = hb.process(BVP_array, 64.0, report_time=True) #TODO: Test the difference between resting and playing against the easiest possible character # High heart rate and high HRV = decrease level if EDA_array is not None: current_EDA = nk.eda_process(EDA_array, freq=1.9, sampling_rate=4) current_HR = hb_measure['bpm'] current_HRV = hb_measure['rmssd'] current_Tonic = current_EDA['df']['EDA_Tonic'] current_Phasic = current_EDA['df']['EDA_Phasic'] current_Tonic_mean = np.mean(current_Tonic) current_Phasic_mean = np.mean(current_Phasic) # Firstly, is he excited compared to baseline this is for HR and EDA # Secondly, is he exicted compared to previous # 1:0 : Might need to switch back # 0:1 : switch up # 0:0 : switch up # 1:1 : stay #Do the evaluation stuff #If there is NO difference significantly between HR & EDA and the arousal was higher before, switch back #HR hr_state = m_baseHR * percentage_change < current_HR if m_previousHR == None: hr_rate = 1 if hr_state else 0 elif hr_state == 1: prev_hr_state = m_previousHR * percentage_change if prev_hr_state < current_HR: hr_rate = 1 elif current_HR * percentage_change < m_previousHR: hr_rate = -1 else: hr_rate = 0 else: hr_rate = 0 #HRV, use HRV in the end. hrv_state = m_baseHRV > current_HRV * percentage_change if m_previousHRV == None: hrv_rate = 1 if hrv_state else 0 #Compare with previous set elif hrv_state == 1: prev_hrv_state = m_previousHRV if prev_hrv_state > current_HRV * percentage_change: hrv_rate = 1 elif current_HRV > m_previousHRV * percentage_change: hrv_rate = -1 else: hrv_rate = 0 else: if current_HRV > m_previousHRV * percentage_change: hrv_rate = -1 elif current_HRV * percentage_change < m_previousHRV: hrv_rate = 1 else: hrv_rate = -1 if current_HRV > m_baseHRV * percentage_change else 0 #EDA Tonic tonic_stats = mw(current_Tonic, m_baseTonic, alternative='greater') if m_previousTonic is None: if tonic_stats.pvalue <= p_value: tonic_rate = 1 #Increase or stay else: tonic_rate = 0 #Increase! else: if tonic_stats.pvalue > p_value: tonic_rate = 1 #must upgrade else: tonic_greater_stats = mw(current_Tonic, m_previousTonic, alternative='greater') tonic_less_stats = mw(current_Tonic, m_previousTonic, alternative='less') if tonic_greater_stats.pvalue <= p_value: # This is good. Stay or increase tonic_rate = 1 elif tonic_less_stats.pvalue <= p_value: tonic_rate = -1 #Revert back to the previous! else: tonic_rate = 0 #EDA Phasic phasic_stats = mw(current_Phasic, m_basePhasic, alternative='greater') if m_previousPhasic is None: if phasic_stats.pvalue <= p_value: phasic_rate = 1 else: phasic_rate = 0 else: if phasic_stats.pvalue > p_value: phasic_rate = 1 else: phasic_greater_stats = mw(current_Phasic, m_previousPhasic, alternative='greater') phasic_less_stats = mw(current_Phasic, m_previousPhasic, alternative='less') if phasic_greater_stats.pvalue <= p_value: #stay! Or increae phasic_rate = 1 elif phasic_less_stats.pvalue <= p_value: #Revert back phasic_rate = -1 else: #increase! depending on the previous action phasic_rate = 0 #Phasic component is the main,as result = 0 #Higher arousal if ((phasic_rate + tonic_rate + hr_rate) / 3.0) > 0.5: #Check last action if m_lastAction == m_DECREASE: if m_curLevel == 1: result = 0 m_lastAction = m_STAY elif levelStatesList[m_curLevel - 2]['HR_mean'] is None: result = -1 m_lastAction = m_DECREASE else: lowerAffectiveState = ( (1 if levelStatesList[m_curLevel - 2]['HR_mean'] < current_HR else 0) + (1 if levelStatesList[m_curLevel - 2]['HRV'] > current_HRV else 0) + (1 if levelStatesList[m_curLevel - 2]['Tonic_mean'] < current_Tonic_mean else 0) + (1 if levelStatesList[m_curLevel - 2]['Phasic_mean'] < current_Phasic_mean else 0)) / 4 if levelStatesList[m_curLevel - 2]['HR_mean'] is None: result = -1 m_lastAction = m_DECREASE elif lowerAffectiveState > 0.5: result = -1 m_lastAction = m_DECREASE else: result = 0 m_lastAction = m_STAY elif m_lastAction == m_INCREASE: if m_curLevel == 5: result = 0 m_lastAction = m_STAY elif levelStatesList[m_curLevel]['HR_mean'] is None: result = 1 m_lastAction = m_INCREASE else: upperAffectiveState = ( (1 if levelStatesList[m_curLevel]['HR_mean'] < current_HR else 0) + (1 if levelStatesList[m_curLevel]['HRV'] > current_HRV else 0) + (1 if levelStatesList[m_curLevel]['Tonic_mean'] < current_Tonic_mean else 0) + (1 if levelStatesList[m_curLevel]['Phasic_mean'] < current_Phasic_mean else 0)) / 4 if upperAffectiveState > 0.5: if hrv_rate == -1: result = -1 m_lastAction = m_DECREASE else: result = 1 m_lastAction = m_INCREASE else: result = 0 m_lastAction = m_STAY else: if hrv_rate == -1: result = -1 m_lastAction = m_DECREASE elif hrv_rate == 0: result = 0 m_lastAction = m_STAY else: result = 1 m_lastAction = m_INCREASE # significant lower arousal -- SHOULD NOT STAY elif ((phasic_rate + tonic_rate + hr_rate) / 3.0) < 0.0: #REVERT! if m_curLevel == 1: result = 1 m_lastAction = m_INCREASE elif m_curLevel == 5: result = -1 m_lastAction = m_DECREASE elif m_lastAction == m_DECREASE: result = 1 m_lastAction = m_INCREASE elif m_lastAction == m_INCREASE: result = -1 m_lastAction = m_DECREASE else: upperAffectiveState = ( (1 if levelStatesList[m_curLevel]['HR_mean'] < current_HR else 0) + (1 if levelStatesList[m_curLevel]['HRV'] > current_HRV else 0) + (1 if levelStatesList[m_curLevel]['Tonic_mean'] < current_Tonic_mean else 0) + (1 if levelStatesList[m_curLevel]['Phasic_mean'] < current_Phasic_mean else 0)) / 4 lowerAffectiveState = ( (1 if levelStatesList[m_curLevel - 2]['HR_mean'] < current_HR else 0) + (1 if levelStatesList[m_curLevel - 2]['HRV'] > current_HRV else 0) + (1 if levelStatesList[m_curLevel - 2]['Tonic_mean'] < current_Tonic_mean else 0) + (1 if levelStatesList[m_curLevel - 2]['Phasic_mean'] < current_Phasic_mean else 0)) / 4 if upperAffectiveState > 0.5: m_lastAction = m_DECREASE result = -1 elif lowerAffectiveState > 0.5: m_lastAction = m_INCREASE result = 1 else: m_lastAction = m_DECREASE result = -1 #No real change between this and the previous, do the same action again else: if m_previousPhasic is None: result = 1 m_lastAction = m_INCREASE elif m_curLevel == 1: result = 1 m_lastAction = m_INCREASE elif m_curLevel == 5: result = -1 m_lastAction = m_DECREASE elif m_lastAction == m_DECREASE: result = -1 elif m_lastAction == m_INCREASE: result = 1 else: result = 0 m_lastAction = m_STAY #Finish off m_prevLevel = m_curLevel m_curLevel += result if m_curLevel > 5: m_curLevel = 5 elif m_curLevel < 1: m_curLevel = 1 m_previousHR = current_HR m_previousHRV = current_HRV levelStatesList[m_prevLevel - 1]['HR_mean'] = m_previousHR levelStatesList[m_prevLevel - 1]['HRV'] = m_previousHRV progressionStateList[m_Iteration]['HR_mean'] = m_previousHR progressionStateList[m_Iteration]['HRV'] = m_previousHRV m_previousTonic = current_Tonic m_previousPhasic = current_Phasic levelStatesList[m_prevLevel - 1]['Tonic_mean'] = current_Tonic_mean levelStatesList[m_prevLevel - 1]['Phasic_mean'] = current_Phasic_mean progressionStateList[m_Iteration]['Tonic_mean'] = current_Tonic_mean progressionStateList[m_Iteration]['Phasic_mean'] = current_Phasic_mean progressionStateList[m_Iteration]['Level'] = m_prevLevel m_Iteration += 1 print("Baseline HR: {0} HRV: {1}".format(m_baseHR, m_baseHRV)) print( "Phasic:{0}, tonic: {1}, hr: {2}(BPM){3:.3f}, hrv: {4}(RMSSD){5:.3f}". format(phasic_rate, tonic_rate, hr_rate, current_HR, hrv_rate, current_HRV)) print("Done with result: {0}".format(result)) return result
'NodeJS/CSV/Participant{0}BVPReg{1}.csv'.format( args.participantID, dataType), delimiter=';', skip_header=1, unpack=True) working_non, non_m = hb.process(BVP_Non, 64.0, calc_freq=True) EDA_df_Non = pd.read_csv('NodeJS/CSV/Participant{0}EDAReg{1}.csv'.format( args.participantID, dataType), delimiter=';') processed_eda_Non = nk.eda_process(EDA_df_Non["EDA"], freq=1.9, sampling_rate=4) MWTonic_stats_greater = mw(processed_eda_Affect['df']['EDA_Tonic'], processed_eda_Non['df']['EDA_Tonic'], alternative='greater') MWTonic_stats_less = mw(processed_eda_Affect['df']['EDA_Tonic'], processed_eda_Non['df']['EDA_Tonic'], alternative='less') MWPhasic_stats_greater = mw(processed_eda_Affect['df']['EDA_Phasic'], processed_eda_Non['df']['EDA_Phasic'], alternative='greater') MWPhasic_stats_less = mw(processed_eda_Affect['df']['EDA_Phasic'], processed_eda_Non['df']['EDA_Phasic'], alternative='less') print("Baseline HR analysis") times, baseline = genfromtxt( 'NodeJS/CSV/Participant{0}BVPBaseline5.csv'.format( args.participantID, dataType),
def EvaluateAdaptiveBiostate(BVP_array=None, EDA_array=None): #1: higher arousal #0: no notificable change #-1: lower arousal hr_rate = 0 hrv_rate = 0 tonic_rate = 0 phasic_rate = 0 if BVP_array is not None: working_data, hb_measure = hb.process(BVP_array, 64.0, report_time=True) #TODO: Test the difference between resting and playing against the easiest possible character # High heart rate and high HRV = decrease level if EDA_array is not None: current_EDA = nk.eda_process(EDA_array, freq=1.9, sampling_rate=4) current_HR = hb_measure['bpm'] current_HRV = hb_measure['rmssd'] # Firstly, is he excited compared to baseline this is for HR and EDA # Secondly, is he exicted compared to previous # 1:0 : Might need to switch back # 0:1 : switch up # 0:0 : switch up # 1:1 : stay #Do the evaluation stuff #If there is NO difference significantly between HR & EDA and the arousal was higher before, switch back #HR print(m_baseHR) print(percentage_change) hr_state = m_baseHR * percentage_change < current_HR if m_previousHR == None: hr_rate = 1 if hr_state else 0 elif hr_state == 1: prev_hr_state = m_previousHR * percentage_change if prev_hr_state < current_HR: hr_rate = 1 elif current_HR * percentage_change < m_previousHR: hr_rate = -1 else: hr_rate = 0 else: hr_rate = 0 #HRV, use HRV in the end. hrv_state = m_baseHRV > current_HRV * percentage_change if m_previousHRV == None: hrv_rate = 1 if hrv_state else 0 #Compare with previous set elif hrv_state == 1: prev_hrv_state = m_previousHRV if prev_hr_state > current_HRV * percentage_change: hr_rate = 1 elif current_HRV < m_previousHRV * percentage_change: hrv_rate = -1 else: hrv_rate = 0 else: hrv_rate = -1 if current_HRV > m_baseHRV * percentage_change else 0 #EDA Tonic current_Tonic = current_EDA['df']['EDA_Tonic'] tonic_stats = mw(current_Tonic, m_baseTonic, alternative='greater') if m_previousTonic == None: if tonic_stats.p_value > p_value: tonic_rate = 1 #must upgrade else: tonic_greater_stats = mw(current_Tonic, m_previousTonic, alternative='greater') tonic_less_stats = mw(current_Tonic, m_previousTonic, alternative='less') if tonic_greater_stats.p_value <= p_value: # This is good. Stay or increase tonic_rate = 1 elif tonic_less_stats.p_value <= p_value: tonic_rate = -1 #Revert back to the previous! else: tonic_rate = 0 else: if tonic_stats.p_value <= p_value: tonic_rate = 1 #Increase or stay else: tonic_rate = 0 #Increase! #EDA Phasic current_Phasic = current_EDA['df']['EDA_Phasic'] phasic_stats = mw(current_Phasic, m_basePhasic, alternative='greater') if m_previousPhasic == None: if phasic_stats.p_value > p_value: phasic_rate = 1 else: phasic_greater_stats = mw(current_Phasic, m_previousPhasic, alternative='greater') phasic_less_stats = mw(current_Phasic, m_previousPhasic, alternative='less') if phasic_greater_stats.p_value <= p_value: #stay! Or increae phasic_rate = 1 elif phasic_less_stats.p_value <= p_value: #Revert back phasic_rate = -1 else: #increase! depending on the previous action phasic_rate = 0 else: if phasic_stats.p_value <= p_value: phasic_rate = 1 else: phasic_rate = 0 #Phasic component is the main,as result = 0 #Higher arousal if ((current_Phasic + current_Tonic + current_HR) / 3.0) > 0.5: if hrv_rate == -1: result = -1 m_lastAction = m_DECREASE else: result = 1 m_lastAction = m_INCREASE # # significant lower arousal -- elif ((current_Phasic + current_Tonic + current_HR) / 3.0) < 0.0: #REVERT! result = m_prevLevel - m_curLevel m_lastAction = m_REVERT #No real change between this and the previous, do the same action again else: if m_lastAction == m_DECREASE: result = -1 elif m_lastAction == m_INCREASE: result = 1 #Finish off m_prevLevel = m_curLevel m_curLevel += result m_previousHR = hb_measure['bpm'] m_previousHRV = hb_measure['rmssd'] m_previousTonic = current_Tonic m_previousPhasic = current_Phasic return result
'_mean_score', axis='columns') original_sort_list.append(original_sort_cols) best_sort = pd.concat(best_sort_list) best_sort['Sequence Population'] = ['$Dev^+$'] * len(best_sort) original_sort = pd.concat(original_sort_list) original_sort['Sequence Population'] = ['All Predicted'] * len(original_sort) comb_sort = pd.concat([best_sort, original_sort]) #test if increasing or decreasing scores b = best_sort[best_sort['Assay Name'] == sorts[9]]['Assay Score'] o = original_sort[original_sort['Assay Name'] == sorts[9]]['Assay Score'] print(b) print(mw(b, o, alternative='greater')) print(mw(b, o, alternative='less')) a_palette = ['#1f77b4', '#FF0000'] #match colors sns.set_palette(a_palette) ax = sns.violinplot(data=comb_sort, x='Assay Name', y='Assay Score', hue='Sequence Population', hue_order=['All Predicted', '$Dev^+$'], cut=0, linewidth=.75, split=True, gridsize=20, scale='width', inner="quart",