def scanPop(pop,step=10000,winSize=50000): print winSize,pop path='{}results/'.format(dir) pathcsv='{}csv/'.format(path);pathpng='{}plot/'.format(path);pathdf='{}pandas/'.format(path) if not os.path.exists(pathcsv): os.makedirs(pathcsv) if not os.path.exists(pathpng): os.makedirs(pathpng) if not os.path.exists(pathdf): os.makedirs(pathdf) AF=pd.read_pickle( '{}AlleleFrequencies/{}.df'.format(dir,pop)) fname='{}.win{:.0f}K.'.format(pop,winSize/1000,step/1000) res=[] n=getNumSamples(pop) CHROMS=AF.index.get_level_values(0).unique() def f(x): return Estimate.getEstimate(x=x, n=n, method='tajimaD', selectionPredictor=True),Estimate.getEstimate(x=x, n=n, method='H', selectionPredictor=True),Estimate.getEstimate(x=x, n=n, method='SFSelect', selectionPredictor=True, svm_model_sfselect=svm) for CHROM in CHROMS: df=utl.slidingWindow(AF.loc[CHROM],n,step=step,winSize=winSize,f=f) df['CHROM']=CHROM df.set_index('CHROM', append=True, inplace=True) df.index=df.index.swaplevel(0, 1) res.append(df) print pop, CHROM,AF.loc[CHROM].shape[0] df=pd.concat(res) df.to_pickle(pathdf+fname+'df') df.to_csv(pathcsv+fname+'csv') pplt.Manhattan.David(df.reset_index(),fname=pathpng+fname+'png') print pop,'Done!' # exportIndividualNames() # collectAFs() # scanPop('SAS') # map(scanPop,Populations) # Pool(2).map(scanPop,Populations)
def plotTests(pop): print pop dspath='/home/arya/20130502/'+pop+'/' df= pd.read_pickle(dspath+'all.Tests.df') outpath=dspath+pop+'/' if not os.path.exists(outpath): os.makedirs(outpath) df.to_pickle(outpath+'test_scores.df') CHROMS= sorted(df.index.get_level_values(0).unique()) for CHROM in CHROMS: fig=plt.figure(figsize=(20,10)) for i,test in enumerate(['tajimaD','H','SFSelect']): x=df.loc[CHROM][test] plt.subplot(3,1,i+1) x.plot(style='o',grid=True) outliers=utl.findOutliers(x) if len(outliers): outliers.plot(style='ro',grid=True) plt.ylabel(test) plt.suptitle('Chromoseome {}'.format(CHROM)) plt.savefig('{}{}.png'.format( outpath,CHROM) )