args.output_prefix + '.NO_HAPS_FOUND', args.output_prefix + '.log', args.output_prefix + '.human_readable.txt', args.output_prefix + '.vcf', args.output_prefix + '.cleaned.human_readable.txt', args.output_prefix + '.cleaned.vcf' ] for file in past_files: if os.path.exists(file): os.remove(file) # (1) read the mpileup and vall variants reader = sp.MPileUpReader(args.mpileup_filename) recs = [r for r in reader] vc = VC.MPileUPVariant(recs, min_cov=MIN_COVERAGE, err_sub=ERR_SUB, expected_strand=args.strand, pval_cutoff=args.pval_cutoff) vc.call_variant() print(vc.variant) if len(vc.variant) == 0: os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output_prefix)) print("No SNPs found. END.", file=sys.stderr) sys.exit(0) # (2) for each CCS read, assign a haplotype (or discard if outlier) pp = VariantPhaser.VariantPhaser(vc) pp.phase_variant(args.sam_filename, args.fastx_filename, args.output_prefix,
def main(args, parser): args = parser.parse_args() if args.bhFDR is not None: print( "--bhFDR {0} is given! Will be using Benjamini–Hochberg correction insteaad. --pval_cutoff is ignored." .format(args.bhFDR)) # remove potential past run output past_files = [ args.output + '.NO_SNPS_FOUND', args.output + '.NO_HAPS_FOUND', args.output + '.snps', args.output + '.log', args.output + '.human_readable.txt', args.output + '.vcf', args.output + '.cleaned.human_readable.txt', args.output + '.cleaned.vcf' ] for file in past_files: if os.path.exists(file): os.remove(file) snpsfound = False # (0) generate pileups f_human1 = open(args.output + '.human_readable_by_pos.txt', 'w') f_human1.write("haplotype\thapIdx\tcontig\tpos\tvarIdx\tbase\tcount\n") f_human2 = open(args.output + '.human_readable_by_hap.txt', 'w') f_human2.write("haplotype\thapIdx\tcontig\tcount\n") for mpileupFile, contig, start, end in elitePileups( args.bamfile, args.genes, args.assembly, args.output): # (1) read the mpileup and vall variants reader = sam.MPileUpReader(mpileupFile) recs = [r for r in reader] vc = VC.MagMPileUPVariant(recs, min_cov=MIN_COVERAGE, err_sub=ERR_SUB, expected_strand='+-', pval_cutoff=args.pval_cutoff, bhFDR=args.bhFDR) vc.call_variant() print(vc.variant) if len(vc.variant) != 0: snpsfound = True else: continue # we write SNPs with the bases separated by "/" not "|" becuz we haven't phased them yet with open(args.output + '.snps', 'a+') as f_snp: for pos, v in vc.variant.items(): f_snp.write("{contig}\t{pos}\t{bases}\t{counts}\n".format(\ contig=contig,\ pos=pos+1,\ bases="/".join([b for (b,c) in v]),\ counts="/".join([str(c) for (b,c) in v]))) # (2) for each CCS read, assign a haplotype (or discard if outlier) pp = VariantPhaser.MagVariantPhaser(vc) pp.phase_variant(args.bamfile, [contig, start, end], args.output, partial_ok=True) print(pp.haplotypes) pp.haplotypes.get_haplotype_vcf_assignment() pp.haplotypes.write_haplotype_to_humanreadable(contig, f_human1, f_human2, pp.seq_hap_info) os.remove(mpileupFile) f_human1.close() f_human2.close() if not snpsfound: os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output)) os.remove(args.output + '.human_readable.txt') print("No SNPs found. END.", file=sys.stderr)