'DP_father': dp_father, 'DP_mother': dp_mother }) m_name = os.path.basename(model) m_name = '.'.join(m_name.split('.')[:-1]) + '_tstlvl' + str(lvl) res['method'] = m_name res = res[~res.test_var_alleles.str.contains('nan')] res.to_csv(os.path.join(output_dir, m_name + '.csv'), index=False) res['var_id'] = res['test_var_id'] res_u = res[~res.var_id.duplicated()] res_u.reset_index(inplace=True) res_u.ix[:, 'pred_labels'] = (res_u['pred_prob'] > prob_cutoff).astype(int) #res_u = res_u[res_u.pred_labels == 1] #outp_tsv = os.path.join(output_dir, m_name + '.tsv') outp_tsv = os.path.join(output_dir, child_id + '.tsv') func.writePredAsVcf(res_u, outp_tsv, min_DP=min_DP) script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) cmd = ' '.join( [os.path.join(script_dir, 'vcf2table.sh'), outp_tsv, script_dir, child_id]) print(cmd) func.runInShell(cmd) summarizeVariants.summarizeMutations( os.path.join(output_dir, child_id + '-ann-onePline.tsv'), os.path.join(output_dir, 'denovo'), config_file) #cmd = ' '.join([os.path.join(script_dir, 'work', 'summarizeMutations.py'), # os.path.join(output_dir, child_id + '-ann-onePline.tsv'), # os.path.join(output_dir, 'denovo'), # config_file])
kv_vcf = pandas.DataFrame() if os.path.isfile(known_mut_file): kv_vcf = pandas.read_csv(known_mut_file, sep='\t') kv_vcf = kv_vcf[['ind_id','CHROM', 'POS', 'REF_offspring', 'ALT_base_offspring', 'status', 'descr', 'DP_offspring', 'DP_father', 'DP_mother']] kv_vcf = kv_vcf[kv_vcf.descr.isin(['after'])] kv_vcf['var_id'] = kv_vcf.ind_id.astype(str)+'_'+kv_vcf.CHROM.astype(str)+'_'+kv_vcf.POS.astype(str) mypred = pandas.read_csv(pred_file) mypred['var_id'] = mypred['test_var_id'] mypred_u = mypred[~mypred.var_id.duplicated()] mypred_u.ix[:, 'pred_labels'] = (mypred_u['pred_prob'] > prob_cutoff).astype(int) if kv_vcf.empty: mypred_u_res = mypred_u[mypred_u.pred_labels == 1] else: mypred_u = mypred_u.merge(kv_vcf[['var_id', 'status']], on='var_id', how='left') print 'status', mypred_u.status.value_counts() print 'pred_labels', mypred_u.pred_labels.value_counts() print 'test.labels', mypred_u.test_labels.value_counts() c_status_known = ~mypred_u.status.isnull() c_pred_pos = mypred_u.pred_labels == 1 c_status_pos = mypred_u.test_labels == 1 c_status_neg = mypred_u.test_labels == 0 mypred_u_res = mypred_u[c_status_known | c_pred_pos] print 'shape', mypred_u_res.shape print 'status', mypred_u_res.status.value_counts() print 'pred_labels', mypred_u_res.pred_labels.value_counts() print 'test_labels', mypred_u_res.test_labels.value_counts() func.writePredAsVcf(mypred_u_res, pred_file + tag + '.tsv', min_DP=min_DP)
]] kv_vcf = kv_vcf[kv_vcf.descr.isin(['after'])] kv_vcf['var_id'] = kv_vcf.ind_id.astype(str) + '_' + kv_vcf.CHROM.astype( str) + '_' + kv_vcf.POS.astype(str) mypred = pandas.read_csv(pred_file) mypred['var_id'] = mypred['test_var_id'] mypred_u = mypred[~mypred.var_id.duplicated()] mypred_u.ix[:, 'pred_labels'] = (mypred_u['pred_prob'] > prob_cutoff).astype(int) if kv_vcf.empty: mypred_u_res = mypred_u[mypred_u.pred_labels == 1] else: mypred_u = mypred_u.merge(kv_vcf[['var_id', 'status']], on='var_id', how='left') print 'status', mypred_u.status.value_counts() print 'pred_labels', mypred_u.pred_labels.value_counts() print 'test.labels', mypred_u.test_labels.value_counts() c_status_known = ~mypred_u.status.isnull() c_pred_pos = mypred_u.pred_labels == 1 c_status_pos = mypred_u.test_labels == 1 c_status_neg = mypred_u.test_labels == 0 mypred_u_res = mypred_u[c_status_known | c_pred_pos] print 'shape', mypred_u_res.shape print 'status', mypred_u_res.status.value_counts() print 'pred_labels', mypred_u_res.pred_labels.value_counts() print 'test_labels', mypred_u_res.test_labels.value_counts() func.writePredAsVcf(mypred_u_res, pred_file + tag + '.tsv', min_DP=min_DP)
'DP_offspring': dp_offspring, 'DP_father': dp_father, 'DP_mother': dp_mother}) m_name = os.path.basename(model) m_name = '.'.join(m_name.split('.')[:-1]) + '_tstlvl' + str(lvl) res['method'] = m_name res = res[~res.test_var_alleles.str.contains('nan')] res.to_csv(os.path.join(output_dir, m_name + '.csv'), index=False) res['var_id'] = res['test_var_id'] res_u = res[~res.var_id.duplicated()] res_u.reset_index(inplace=True) res_u.ix[:, 'pred_labels'] = (res_u['pred_prob'] > prob_cutoff).astype(int) #res_u = res_u[res_u.pred_labels == 1] #outp_tsv = os.path.join(output_dir, m_name + '.tsv') outp_tsv = os.path.join(output_dir, child_id + '.tsv') func.writePredAsVcf(res_u, outp_tsv, min_DP=min_DP) script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) cmd = ' '.join([os.path.join(script_dir, 'vcf2table.sh'), outp_tsv, script_dir, child_id]) print(cmd) func.runInShell(cmd) summarizeVariants.summarizeMutations(os.path.join(output_dir, child_id + '-ann-onePline.tsv'), os.path.join(output_dir, 'denovo'), config_file) #cmd = ' '.join([os.path.join(script_dir, 'work', 'summarizeMutations.py'),