Esempio n. 1
0
m_name = '.'.join(m_name.split('.')[:-1]) + '_tstlvl' + str(lvl)
res['method'] = m_name
res = res[~res.test_var_alleles.str.contains('nan')]
res.to_csv(os.path.join(output_dir, m_name + '.csv'), index=False)
res['var_id'] = res['test_var_id']
res_u = res[~res.var_id.duplicated()]
res_u.reset_index(inplace=True)
res_u.ix[:, 'pred_labels'] = (res_u['pred_prob'] > prob_cutoff).astype(int)
#res_u = res_u[res_u.pred_labels == 1]
#outp_tsv = os.path.join(output_dir, m_name + '.tsv')
outp_tsv = os.path.join(output_dir, child_id + '.tsv')
func.writePredAsVcf(res_u, outp_tsv, min_DP=min_DP)

script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
cmd = ' '.join(
    [os.path.join(script_dir, 'vcf2table.sh'), outp_tsv, script_dir, child_id])
print(cmd)
func.runInShell(cmd)

summarizeVariants.summarizeMutations(
    os.path.join(output_dir, child_id + '-ann-onePline.tsv'),
    os.path.join(output_dir, 'denovo'), config_file)

#cmd = ' '.join([os.path.join(script_dir, 'work', 'summarizeMutations.py'),
#               os.path.join(output_dir, child_id + '-ann-onePline.tsv'),
#               os.path.join(output_dir, 'denovo'),
#                config_file])
#func.runInShell(cmd)

# work/summarizeMutations.py /mnt/xfs1/home/asalomatov/projects/spark/feature_sets/hc/trio003.p1_642940-ann-onePline.tsv /mnt/xfs1/home/asalomatov/projects/spark/feature_sets/hc/denovo cfg_spark.yml
Esempio n. 2
0
dnvo = pandas.read_csv(input_file)
dnvo.ix[:, 'pred_labels'] = (dnvo['pred_prob'] > prob_cutoff).astype(int)
dnvo = dnvo[dnvo.pred_labels == 1]
dnvo.reset_index(inplace=True)
if dnvo.empty:
    sys.exit('No mutations at score %s' % prob_cutoff)

tmp_dir = tempfile.mkdtemp()
print(tmp_dir)
input_file_bn = os.path.splitext(os.path.basename(input_file))[0]
outp_tsv = os.path.join(tmp_dir, input_file_bn + '.tsv')
print(outp_tsv)
func.writePredAsVcf(dnvo, outp_tsv, min_DP=min_DP)
script_name = os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])),
                           'vcf2table.sh')
#script_name = os.path.abspath(pkg_resources.resource_filename('variants',
#                                                              'vcf2table.sh'))
cmd = ' '.join([
    script_name, outp_tsv,
    os.path.dirname(script_name), input_file_bn, targ_bed, incl_make
])
print(cmd)
func.runInShell(cmd)
vn = summarizeVariants.summarizeMutations(
    os.path.join(tmp_dir, input_file_bn + '-ann.vcf.onePline.tsv'),
    os.path.join(tmp_dir, input_file_bn + '-vep.tsv'), input_file_bn,
    output_dir, config_file)
if rm_tmp == 'yes':
    cmd = 'rm -rf %s' % tmp_dir
    func.runInShell(cmd)
Esempio n. 3
0
dnvo.reset_index(inplace=True)
if dnvo.empty:
    sys.exit('No mutations at score %s' % prob_cutoff)

tmp_dir = tempfile.mkdtemp()
print(tmp_dir)
input_file_bn = os.path.splitext(os.path.basename(input_file))[0]
outp_tsv = os.path.join(tmp_dir, input_file_bn + '.tsv')
print(outp_tsv)
func.writePredAsVcf(dnvo, outp_tsv, min_DP=min_DP)
# script_name = os.path.basename(os.path.realpath(sys.argv[0]))
script_name = os.path.abspath(pkg_resources.resource_filename('variants',
                                                              'vcf2table.sh'))
cmd = ' '.join([script_name,
                outp_tsv,
                os.path.dirname(script_name),
                input_file_bn,
                targ_bed])
print(cmd)
func.runInShell(cmd)
vn = summarizeVariants.summarizeMutations(
    os.path.join(tmp_dir,
                 input_file_bn +
                 '-ann-onePline.tsv'),
    input_file_bn,
    output_dir,
    config_file)
if rm_tmp == 'yes':
    cmd = 'rm -rf %s' % tmp_dir
    func.runInShell(cmd)
Esempio n. 4
0
res.to_csv(os.path.join(output_dir, m_name + '.csv'), index=False)
res['var_id'] = res['test_var_id']
res_u = res[~res.var_id.duplicated()]
res_u.reset_index(inplace=True)
res_u.ix[:, 'pred_labels'] = (res_u['pred_prob'] > prob_cutoff).astype(int)
#res_u = res_u[res_u.pred_labels == 1]
#outp_tsv = os.path.join(output_dir, m_name + '.tsv')
outp_tsv = os.path.join(output_dir, child_id + '.tsv')
func.writePredAsVcf(res_u, outp_tsv, min_DP=min_DP)

script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
cmd = ' '.join([os.path.join(script_dir, 'vcf2table.sh'),
               outp_tsv,
               script_dir,
               child_id])
print(cmd)
func.runInShell(cmd)

summarizeVariants.summarizeMutations(os.path.join(output_dir, child_id + '-ann-onePline.tsv'),
                                                  os.path.join(output_dir, 'denovo'),
                                                  config_file)


#cmd = ' '.join([os.path.join(script_dir, 'work', 'summarizeMutations.py'),
#               os.path.join(output_dir, child_id + '-ann-onePline.tsv'),
#               os.path.join(output_dir, 'denovo'),
#                config_file])
#func.runInShell(cmd)

# work/summarizeMutations.py /mnt/xfs1/home/asalomatov/projects/spark/feature_sets/hc/trio003.p1_642940-ann-onePline.tsv /mnt/xfs1/home/asalomatov/projects/spark/feature_sets/hc/denovo cfg_spark.yml