Ejemplo n.º 1
0
def scanPop(pop,step=10000,winSize=50000):
    print winSize,pop
    path='{}results/'.format(dir)
    pathcsv='{}csv/'.format(path);pathpng='{}plot/'.format(path);pathdf='{}pandas/'.format(path)
    if not os.path.exists(pathcsv): os.makedirs(pathcsv)
    if not os.path.exists(pathpng): os.makedirs(pathpng)
    if not os.path.exists(pathdf): os.makedirs(pathdf)
    AF=pd.read_pickle( '{}AlleleFrequencies/{}.df'.format(dir,pop))
    fname='{}.win{:.0f}K.'.format(pop,winSize/1000,step/1000)
    res=[]
    n=getNumSamples(pop)
    CHROMS=AF.index.get_level_values(0).unique()
    def f(x): return Estimate.getEstimate(x=x, n=n, method='tajimaD', selectionPredictor=True),Estimate.getEstimate(x=x, n=n, method='H', selectionPredictor=True),Estimate.getEstimate(x=x, n=n, method='SFSelect', selectionPredictor=True, svm_model_sfselect=svm)
    for CHROM in CHROMS:
        df=utl.slidingWindow(AF.loc[CHROM],n,step=step,winSize=winSize,f=f)
        df['CHROM']=CHROM
        df.set_index('CHROM', append=True, inplace=True)
        df.index=df.index.swaplevel(0, 1)
        res.append(df)
        print pop, CHROM,AF.loc[CHROM].shape[0]
    df=pd.concat(res)

    df.to_pickle(pathdf+fname+'df')
    df.to_csv(pathcsv+fname+'csv')
    pplt.Manhattan.David(df.reset_index(),fname=pathpng+fname+'png')
    print pop,'Done!'


# exportIndividualNames()
# collectAFs()
# scanPop('SAS')
# map(scanPop,Populations)
# Pool(2).map(scanPop,Populations)
Ejemplo n.º 2
0
def plotTests(pop):
    print pop
    dspath='/home/arya/20130502/'+pop+'/'
    df= pd.read_pickle(dspath+'all.Tests.df')
    outpath=dspath+pop+'/'
    if not os.path.exists(outpath): os.makedirs(outpath)
    df.to_pickle(outpath+'test_scores.df')

    CHROMS= sorted(df.index.get_level_values(0).unique())
    for CHROM in CHROMS:
        fig=plt.figure(figsize=(20,10))
        for i,test in enumerate(['tajimaD','H','SFSelect']):
            x=df.loc[CHROM][test]
            
            plt.subplot(3,1,i+1)
            x.plot(style='o',grid=True)
            outliers=utl.findOutliers(x)
            if len(outliers):
                outliers.plot(style='ro',grid=True)
            
            plt.ylabel(test)
        plt.suptitle('Chromoseome {}'.format(CHROM))
        plt.savefig('{}{}.png'.format( outpath,CHROM) )