Esempio n. 1
0
def saveTopKSNPs():
    scores = rutl.loadScores()
    ann = loadANN()["Annotation Annotation_Impact               Gene_Name      Gene_ID".split()]
    scores = pd.concat([scores, rutl.loadSNPIDs()], axis=1).set_index('ID', append=True)[0].rename('Hstatistic')
    top = scores[scores > scores.quantile(0.9999)].reset_index('ID').join(rutl.getNut(0), how='inner')
    top = top.join(ann).drop_duplicates().sort_values('Hstatistic', ascending=False)
    top = top[top['Annotation_Impact'] != 'LOW']
    top.to_csv(utl.outpath + 'real/top_1e-4_quantile_SNPs.csv')
Esempio n. 2
0
home = os.path.expanduser('~') + '/'
import popgen.Util as utl
import popgen.Estimate as est
import popgen.Run.TimeSeries.RealData.Utils as rutl

a = rutl.loadAllScores().groupby(level='h', axis=1).apply(rutl.HstatisticAll)
df = pd.read_pickle(utl.outpath + 'real/scores.df')
i = df.lrd.sort_values().index[-1]
df.loc[i]

cd = pd.read_pickle(utl.outpath + 'real/CD.F59.df')

import popgen.Plots as pplt
import pylab as plt

names = rutl.loadSNPIDs()
sns.set_style("white", {"grid.color": "0.9", 'axes.linewidth': .5, "grid.linewidth": "9.99"})
mpl.rc('font', **{'family': 'serif', 'serif': ['Computer Modern']});
mpl.rc('text', usetex=True)
reload(pplt)
f, ax = plt.subplots(1, 2, sharey=True, dpi=300, figsize=(4, 2))
i = a[0.5].sort_values().index[-1]
sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2})

pplt.plotSiteReal(cd.loc[i], ax=ax[0], legend=True)
ax[0].set_title('{}:{:.0f} ({})'.format(i[0], i[1], names.loc[i]), fontsize=8)

i = df.lrdiff.sort_values().index[-1]
pplt.plotSiteReal(cd.loc[i], ax=ax[1])
sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 1.2})