recs = [r for r in reader] vc = VC.MPileUPVariant(recs, min_cov=MIN_COVERAGE, err_sub=ERR_SUB, expected_strand=args.strand, pval_cutoff=args.pval_cutoff) vc.call_variant() print(vc.variant) if len(vc.variant) == 0: os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output_prefix)) print("No SNPs found. END.", file=sys.stderr) sys.exit(0) # (2) for each CCS read, assign a haplotype (or discard if outlier) pp = VariantPhaser.VariantPhaser(vc) pp.phase_variant(args.sam_filename, args.fastx_filename, args.output_prefix, partial_ok=args.partial_ok) pp.haplotypes pp.haplotypes.get_haplotype_vcf_assignment() # (3) phase isoforms seqids = set([ r.id for r in SeqIO.parse(open(args.fastx_filename), VariantPhaser.type_fa_or_fq(args.fastx_filename)) ]) isoform_tally = VariantPhaser.phase_isoforms(args.read_stat, seqids, pp) if len(isoform_tally) == 0: os.system("touch {out}.NO_HAPS_FOUND".format(out=args.output_prefix))
vc.call_variant() print vc.variant if len(vc.variant) == 0: os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output_prefix)) print >> sys.stderr, "No SNPs found. END." sys.exit(0) # (2) for each CCS read, assign a haplotype (or discard if outlier) pp = VariantPhaser.VariantPhaser(vc) pp.phase_variant(args.sam_filename, args.fastx_filename, args.output_prefix, partial_ok=args.partial_ok) pp.haplotypes pp.haplotypes.get_haplotype_vcf_assignment() # (3) phase isoforms seqids = set([r.id for r in SeqIO.parse(open(args.fastx_filename), VariantPhaser.type_fa_or_fq(args.fastx_filename))]) isoform_tally = VariantPhaser.phase_isoforms(args.read_stat, seqids, pp) if len(isoform_tally) == 0: os.system("touch {out}.NO_HAPS_FOUND".format(out=args.output_prefix)) print >> sys.stderr, "No good haps found. END." sys.exit(0) pp.haplotypes.write_haplotype_to_vcf(args.mapping_filename, isoform_tally, args.output_prefix) # (4) clean isoforms hap_count = VariantPhaseCleaner.make_haplotype_counts(isoform_tally) # (5) error correct haplotypes # if diploid, use exhaustive search # otherwise, use hap counts (ToDo: make this work with exhaustive search later) variants = [ [base.upper() for base,count in vc.variant[pos]] for pos in pp.accepted_pos]
# (1) read the mpileup and vall variants reader = sp.MPileUpReader(args.mpileup_filename) recs = [r for r in reader] vc = VC.MPileUPVariant(recs, min_cov=MIN_COVERAGE, err_sub=ERR_SUB, expected_strand=args.strand, pval_cutoff=args.pval_cutoff) vc.call_variant() print vc.variant if len(vc.variant) == 0: os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output_prefix)) print >> sys.stderr, "No SNPs found. END." sys.exit(0) # (2) for each CCS read, assign a haplotype (or discard if outlier) pp = VariantPhaser.VariantPhaser(vc) pp.phase_variant(args.sam_filename, args.fasta_filename, args.output_prefix, partial_ok=args.partial_ok) pp.haplotypes pp.haplotypes.get_haplotype_vcf_assignment() # (3) phase isoforms seqids = set([r.id for r in SeqIO.parse(open(args.fasta_filename), 'fasta')]) isoform_tally = VariantPhaser.phase_isoforms(args.read_stat, seqids, pp) if len(isoform_tally) == 0: os.system("touch {out}.NO_HAPS_FOUND".format(out=args.output_prefix)) print >> sys.stderr, "No good haps found. END." sys.exit(0) pp.haplotypes.write_haplotype_to_vcf(args.mapping_filename, isoform_tally, args.output_prefix) # (4) clean isoforms hap_count = VariantPhaseCleaner.make_haplotype_counts(isoform_tally)
os.remove(file) # (1) read the mpileup and vall variants reader = sp.MPileUpReader(args.mpileup_filename) recs = [r for r in reader] vc = VC.MPileUPVariant(recs, min_cov=MIN_COVERAGE, err_sub=ERR_SUB, expected_strand=args.strand, pval_cutoff=args.pval_cutoff) vc.call_variant() print(vc.variant) if len(vc.variant) == 0: os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output_prefix)) print("No SNPs found. END.", file=sys.stderr) sys.exit(0) # (2) for each CCS read, assign a haplotype (or discard if outlier) pp = VariantPhaser.VariantPhaser(vc) pp.phase_variant(args.sam_filename, args.fastx_filename, args.output_prefix, partial_ok=args.partial_ok) pp.haplotypes pp.haplotypes.get_haplotype_vcf_assignment() # (3) phase isoforms seqids = set([r.id for r in SeqIO.parse(open(args.fastx_filename), VariantPhaser.type_fa_or_fq(args.fastx_filename))]) isoform_tally = VariantPhaser.phase_isoforms(args.read_stat, seqids, pp) if len(isoform_tally) == 0: os.system("touch {out}.NO_HAPS_FOUND".format(out=args.output_prefix)) print("No good haps found. END.", file=sys.stderr) sys.exit(0) pp.haplotypes.write_haplotype_to_vcf(args.mapping_filename, isoform_tally, args.output_prefix) # (4) clean isoforms hap_count = VariantPhaseCleaner.make_haplotype_counts(isoform_tally)
def main(args, parser): args = parser.parse_args() if args.bhFDR is not None: print( "--bhFDR {0} is given! Will be using Benjamini–Hochberg correction insteaad. --pval_cutoff is ignored." .format(args.bhFDR)) # remove potential past run output past_files = [ args.output + '.NO_SNPS_FOUND', args.output + '.NO_HAPS_FOUND', args.output + '.snps', args.output + '.log', args.output + '.human_readable.txt', args.output + '.vcf', args.output + '.cleaned.human_readable.txt', args.output + '.cleaned.vcf' ] for file in past_files: if os.path.exists(file): os.remove(file) snpsfound = False # (0) generate pileups f_human1 = open(args.output + '.human_readable_by_pos.txt', 'w') f_human1.write("haplotype\thapIdx\tcontig\tpos\tvarIdx\tbase\tcount\n") f_human2 = open(args.output + '.human_readable_by_hap.txt', 'w') f_human2.write("haplotype\thapIdx\tcontig\tcount\n") for mpileupFile, contig, start, end in elitePileups( args.bamfile, args.genes, args.assembly, args.output): # (1) read the mpileup and vall variants reader = sam.MPileUpReader(mpileupFile) recs = [r for r in reader] vc = VC.MagMPileUPVariant(recs, min_cov=MIN_COVERAGE, err_sub=ERR_SUB, expected_strand='+-', pval_cutoff=args.pval_cutoff, bhFDR=args.bhFDR) vc.call_variant() print(vc.variant) if len(vc.variant) != 0: snpsfound = True else: continue # we write SNPs with the bases separated by "/" not "|" becuz we haven't phased them yet with open(args.output + '.snps', 'a+') as f_snp: for pos, v in vc.variant.items(): f_snp.write("{contig}\t{pos}\t{bases}\t{counts}\n".format(\ contig=contig,\ pos=pos+1,\ bases="/".join([b for (b,c) in v]),\ counts="/".join([str(c) for (b,c) in v]))) # (2) for each CCS read, assign a haplotype (or discard if outlier) pp = VariantPhaser.MagVariantPhaser(vc) pp.phase_variant(args.bamfile, [contig, start, end], args.output, partial_ok=True) print(pp.haplotypes) pp.haplotypes.get_haplotype_vcf_assignment() pp.haplotypes.write_haplotype_to_humanreadable(contig, f_human1, f_human2, pp.seq_hap_info) os.remove(mpileupFile) f_human1.close() f_human2.close() if not snpsfound: os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output)) os.remove(args.output + '.human_readable.txt') print("No SNPs found. END.", file=sys.stderr)
recs = [r for r in reader] vc = VC.MPileUPVariant(recs, min_cov=MIN_COVERAGE, err_sub=ERR_SUB, expected_strand=args.strand, pval_cutoff=args.pval_cutoff) vc.call_variant() print(vc.variant) if len(vc.variant) == 0: os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output_prefix)) print("No SNPs found. END.", file=sys.stderr) sys.exit(0) # (2) for each CCS read, assign a haplotype (or discard if outlier) pp = VariantPhaser.VariantPhaser(vc) pp.phase_variant(args.sam_filename, args.fastx_filename, args.output_prefix, partial_ok=args.partial_ok) pp.haplotypes pp.haplotypes.get_haplotype_vcf_assignment() # (3) phase isoforms -- not needed for this analysis! #seqids = set([r.id for r in SeqIO.parse(open(args.fastx_filename), VariantPhaser.type_fa_or_fq(args.fastx_filename))]) #isoform_tally = VariantPhaser.phase_isoforms(args.read_stat, seqids, pp) #if len(isoform_tally) == 0: # os.system("touch {out}.NO_HAPS_FOUND".format(out=args.output_prefix)) # print("No good haps found. END.", file=sys.stderr) # sys.exit(0) pp.haplotypes.write_haplotype_to_vcf(args.mapping_filename, isoform_tally,