Ejemplo n.º 1
0
def map_variant_to_mean_full_relative_gamma(datadir,
                                            *,
                                            dose='sober',
                                            filtered=True):
    child_gammas = get_child_gammas(datadir)
    parent_gammas = get_parent_gammas(datadir)
    varcon = mapping_lib.get_mapping('variant', 'control', datadir)
    vargamma = mapping_lib.get_mapping('variant', 'gamma', datadir, dose=dose)
    conmask = vargamma.index.intersection(varcon.loc[varcon.control].index)
    congamma = vargamma.loc[conmask]
    sigma = congamma.std().gamma
    z = -sigma  # easier to read
    unfiltered = (child_gammas / parent_gammas) - 1
    if filtered:
        geodelt_gammas = unfiltered.where(parent_gammas < (_Z_THRESHOLD * z))
    else:
        geodelt_gammas = unfiltered
    relgammas = select_dose(geodelt_gammas, dose, datadir)
    relgammas = relgammas.stack(level='sid', dropna=False)[['03']].unstack()
    if filtered:
        colname = 'relgamma'
    else:
        colname = 'unfiltered_relgamma'
    relgammas = pd.DataFrame(relgammas.mean(axis=1), columns=[colname])
    relgammas.reset_index(inplace=True)
    mapping_lib.make_mapping(relgammas, 'variant', colname, datadir, dose=dose)
Ejemplo n.º 2
0
def map_variant_to_bin(datadir, dose='sober'):
    varrg = mapping_lib.get_mapping('variant', 'relgamma', datadir, dose=dose)
    bins = relgamma_bins()
    rgbin = bin_relgammas(varrg.relgamma.values, bins)
    rgbin = pd.DataFrame(rgbin.T, index=varrg.index,
                         columns=['rgbin']).reset_index()
    mapping_lib.make_mapping(rgbin, 'variant', 'rgbin', datadir, dose=dose)
Ejemplo n.º 3
0
def map_variant_to_mean_full_gamma(datadir, dose='sober'):
    allgammas = get_normed_gammas(datadir)
    gammas = select_dose(allgammas, dose, datadir)
    gammas = gammas.stack(level='sid', dropna=False)[['03']].unstack()
    gammas = pd.DataFrame(gammas.mean(axis=1), columns=['gamma'])
    gammas.reset_index(inplace=True)
    mapping_lib.make_mapping(gammas, 'variant', 'gamma', datadir, dose=dose)
Ejemplo n.º 4
0
plt.legend(loc='lower left', fontsize='small')
plt.tight_layout()
plotfile = PLOTDIR / 'scatter.agg.png'.format(**locals())
plt.savefig(plotfile, dpi=_FIGDPI)
plt.close()

locusmap = mapping_lib.get_mapping('variant', 'locus_tag', UNGD)
genemap = mapping_lib.get_mapping('locus_tag', 'gene_name', UNGD)
geneids = genemap.loc[locusmap.loc[data.index].locus_tag]
geneids.index = data.index
data['y_meas'] = data.relgamma
data['y_pred'] = cross_predictions
data['gene_name'] = geneids
data['original'] = familymap.original

mapping_lib.make_mapping(data.reset_index(), 'variant', 'y_pred', UNGD)

rhos = list()

for gene, group in data.groupby('gene_name'):
  predicted = group.y_pred
  measured = group.y_meas
  sprrho, _ = st.spearmanr(predicted, measured)
  prsrho, _ = st.pearsonr(predicted, measured)
  rhos.append(pd.Series({'gene':gene,
                         'sprrho_convnet':sprrho,
                         'prsrho_convnet':prsrho}))
  plt.figure(figsize=(6,6))
  template = 'Predictions vs. Measurements\n{gene}'
  main_title_str = template.format(**locals())
  plt.title(main_title_str)