Example #1
0
    'DP_father': dp_father,
    'DP_mother': dp_mother
})
m_name = os.path.basename(model)
m_name = '.'.join(m_name.split('.')[:-1]) + '_tstlvl' + str(lvl)
res['method'] = m_name
res = res[~res.test_var_alleles.str.contains('nan')]
res.to_csv(os.path.join(output_dir, m_name + '.csv'), index=False)
res['var_id'] = res['test_var_id']
res_u = res[~res.var_id.duplicated()]
res_u.reset_index(inplace=True)
res_u.ix[:, 'pred_labels'] = (res_u['pred_prob'] > prob_cutoff).astype(int)
#res_u = res_u[res_u.pred_labels == 1]
#outp_tsv = os.path.join(output_dir, m_name + '.tsv')
outp_tsv = os.path.join(output_dir, child_id + '.tsv')
func.writePredAsVcf(res_u, outp_tsv, min_DP=min_DP)

script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
cmd = ' '.join(
    [os.path.join(script_dir, 'vcf2table.sh'), outp_tsv, script_dir, child_id])
print(cmd)
func.runInShell(cmd)

summarizeVariants.summarizeMutations(
    os.path.join(output_dir, child_id + '-ann-onePline.tsv'),
    os.path.join(output_dir, 'denovo'), config_file)

#cmd = ' '.join([os.path.join(script_dir, 'work', 'summarizeMutations.py'),
#               os.path.join(output_dir, child_id + '-ann-onePline.tsv'),
#               os.path.join(output_dir, 'denovo'),
#                config_file])
Example #2
0
kv_vcf = pandas.DataFrame()
if os.path.isfile(known_mut_file):
    kv_vcf = pandas.read_csv(known_mut_file, sep='\t')
    kv_vcf = kv_vcf[['ind_id','CHROM', 'POS', 'REF_offspring', 'ALT_base_offspring', 'status', 'descr', 'DP_offspring', 'DP_father', 'DP_mother']]
    kv_vcf = kv_vcf[kv_vcf.descr.isin(['after'])]
    kv_vcf['var_id'] = kv_vcf.ind_id.astype(str)+'_'+kv_vcf.CHROM.astype(str)+'_'+kv_vcf.POS.astype(str)


mypred = pandas.read_csv(pred_file)
mypred['var_id'] = mypred['test_var_id']
mypred_u = mypred[~mypred.var_id.duplicated()]
mypred_u.ix[:, 'pred_labels'] = (mypred_u['pred_prob'] > prob_cutoff).astype(int)

if kv_vcf.empty:
    mypred_u_res = mypred_u[mypred_u.pred_labels == 1]
else:
    mypred_u = mypred_u.merge(kv_vcf[['var_id', 'status']], on='var_id', how='left')
    print 'status', mypred_u.status.value_counts()
    print 'pred_labels', mypred_u.pred_labels.value_counts()
    print 'test.labels', mypred_u.test_labels.value_counts()
    c_status_known = ~mypred_u.status.isnull()
    c_pred_pos = mypred_u.pred_labels == 1
    c_status_pos = mypred_u.test_labels == 1
    c_status_neg = mypred_u.test_labels == 0
    mypred_u_res = mypred_u[c_status_known | c_pred_pos] 
    print 'shape', mypred_u_res.shape
    print 'status', mypred_u_res.status.value_counts()
    print 'pred_labels', mypred_u_res.pred_labels.value_counts()
    print 'test_labels', mypred_u_res.test_labels.value_counts()
func.writePredAsVcf(mypred_u_res, pred_file + tag + '.tsv', min_DP=min_DP)
Example #3
0
    ]]
    kv_vcf = kv_vcf[kv_vcf.descr.isin(['after'])]
    kv_vcf['var_id'] = kv_vcf.ind_id.astype(str) + '_' + kv_vcf.CHROM.astype(
        str) + '_' + kv_vcf.POS.astype(str)

mypred = pandas.read_csv(pred_file)
mypred['var_id'] = mypred['test_var_id']
mypred_u = mypred[~mypred.var_id.duplicated()]
mypred_u.ix[:,
            'pred_labels'] = (mypred_u['pred_prob'] > prob_cutoff).astype(int)

if kv_vcf.empty:
    mypred_u_res = mypred_u[mypred_u.pred_labels == 1]
else:
    mypred_u = mypred_u.merge(kv_vcf[['var_id', 'status']],
                              on='var_id',
                              how='left')
    print 'status', mypred_u.status.value_counts()
    print 'pred_labels', mypred_u.pred_labels.value_counts()
    print 'test.labels', mypred_u.test_labels.value_counts()
    c_status_known = ~mypred_u.status.isnull()
    c_pred_pos = mypred_u.pred_labels == 1
    c_status_pos = mypred_u.test_labels == 1
    c_status_neg = mypred_u.test_labels == 0
    mypred_u_res = mypred_u[c_status_known | c_pred_pos]
    print 'shape', mypred_u_res.shape
    print 'status', mypred_u_res.status.value_counts()
    print 'pred_labels', mypred_u_res.pred_labels.value_counts()
    print 'test_labels', mypred_u_res.test_labels.value_counts()
func.writePredAsVcf(mypred_u_res, pred_file + tag + '.tsv', min_DP=min_DP)
Example #4
0
                        'DP_offspring': dp_offspring,
                        'DP_father': dp_father,
                        'DP_mother': dp_mother})
m_name = os.path.basename(model)
m_name = '.'.join(m_name.split('.')[:-1]) + '_tstlvl' + str(lvl)
res['method'] = m_name
res = res[~res.test_var_alleles.str.contains('nan')]
res.to_csv(os.path.join(output_dir, m_name + '.csv'), index=False)
res['var_id'] = res['test_var_id']
res_u = res[~res.var_id.duplicated()]
res_u.reset_index(inplace=True)
res_u.ix[:, 'pred_labels'] = (res_u['pred_prob'] > prob_cutoff).astype(int)
#res_u = res_u[res_u.pred_labels == 1]
#outp_tsv = os.path.join(output_dir, m_name + '.tsv')
outp_tsv = os.path.join(output_dir, child_id + '.tsv')
func.writePredAsVcf(res_u, outp_tsv, min_DP=min_DP)

script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
cmd = ' '.join([os.path.join(script_dir, 'vcf2table.sh'),
               outp_tsv,
               script_dir,
               child_id])
print(cmd)
func.runInShell(cmd)

summarizeVariants.summarizeMutations(os.path.join(output_dir, child_id + '-ann-onePline.tsv'),
                                                  os.path.join(output_dir, 'denovo'),
                                                  config_file)


#cmd = ' '.join([os.path.join(script_dir, 'work', 'summarizeMutations.py'),