def main(): """The main function """ parser = cmdline_parser() args = parser.parse_args() if args.verbose: LOG.setLevel(logging.INFO) if args.debug: LOG.setLevel(logging.DEBUG) # file check # for (filename, descr, direction, mandatory) in [ (args.fbam_in, 'BAM input file', 'in', True), (args.fbam_out, 'BAM output file', 'out', True), (args.primer_pos_file, 'Primer positions file', 'in', True)]: if not mandatory and not filename: continue if not filename: parser.error("%s argument missing." % descr) sys.exit(1) if filename == '-': continue if direction == 'in' and not os.path.exists(filename): LOG.fatal("file '{}' does not exist.\n".format(filename)) sys.exit(1) if direction == 'out' and os.path.exists(filename) and not args.force_overwrite: LOG.fatal("Refusing to overwrite existing file '{}'.\n".format(filename)) sys.exit(1) if args.fbam_in == "-": LOG.fatal("No streaming allow for input BAM (read twice)") sys.exit(1) # parse peaks and prep # peaks = pp.parse_primer_pos(open(args.primer_pos_file)) # summarize fw-start and rv-end positions in list. actually don't # need anything else (i.e. the end positions) peaks_fw_start_pos = [p.pos for p in peaks if p.ori == 'F'] peaks_rv_end_pos = [p.pos for p in peaks if p.ori == 'R'] dupreads_out = args.fbam_out.replace(".bam", ".dups.gz") if os.path.exists(dupreads_out): LOG.warn("Reusing {}".format(dupreads_out)) else: with gzip.open(dupreads_out, 'w') as dupreads_fh: find_primer_reads(args.fbam_in, peaks_fw_start_pos, peaks_rv_end_pos, dupreads_fh) with pysam.Samfile(args.fbam_in, "rb") as sam_in: with pysam.Samfile(args.fbam_out, "wb", template=sam_in) as sam_out: with gzip.open(dupreads_out) as dupreads_fh: mark_primer(sam_in, sam_out, dupreads_fh)
def main(): """The main function """ parser = cmdline_parser() (opts, args) = parser.parse_args() if len(args): parser.error("Unrecognized arguments found: %s." % (' '.join(args))) sys.exit(1) if opts.verbose: LOG.setLevel(logging.INFO) if opts.debug: LOG.setLevel(logging.DEBUG) # file check # for (filename, descr, direction, mandatory) in [ (opts.primer_pos_file, 'primer position input file', 'in', True), (opts.bed_file, 'bed output file', 'out', False) ]: if not mandatory and not filename: continue if not filename: parser.error("%s argument missing." % descr) sys.exit(1) if filename == '-': continue if direction == 'in' and not os.path.exists(filename): LOG.fatal("file '%s' does not exist.\n" % filename) sys.exit(1) if direction == 'out' and os.path.exists( filename) and not opts.force_overwrite: LOG.fatal("Refusing to overwrite existing file '%s'.\n" % filename) sys.exit(1) # primer length # if opts.primer_len < 0: LOG.fatal("Negative primer length does not make sense") sys.exit(1) if not opts.seq_len or not opts.seq_name: LOG.fatal( "Missing sequence name or sequence length argument (or seq len 0)") sys.exit(1) seq_len = opts.seq_len seq_name = opts.seq_name if opts.bed_file == "-": bed_fh = sys.stdout else: bed_fh = open(opts.bed_file, 'w') primer_positions = pp.parse_primer_pos(open(opts.primer_pos_file)) primer_positions_to_incl_bed(primer_positions, bed_fh, opts.primer_len, seq_len, seq_name) if bed_fh != sys.stdout: bed_fh.close()
def main(): """ The main function """ parser = cmdline_parser() args = parser.parse_args() if args.verbose: LOG.setLevel(logging.INFO) if args.debug: LOG.setLevel(logging.DEBUG) # file check # for (filename, descr, direction, mandatory) in [ (args.fbam_in, 'BAM input file', 'in', True), (args.fbam_out, 'BAM output file', 'out', True), (args.primer_pos_file, 'Primer positions file', 'in', True) ]: if not mandatory and not filename: continue if not filename: parser.error("%s argument missing." % descr) sys.exit(1) if filename == '-': continue if direction == 'in' and not os.path.exists(filename): LOG.fatal("file '{}' does not exist.\n".format(filename)) sys.exit(1) if direction == 'out' and os.path.exists( filename) and not args.force_overwrite: LOG.fatal( "Refusing to overwrite existing file '{}'.\n".format(filename)) sys.exit(1) if args.fbam_in == "-": LOG.fatal("No streaming allow for input BAM (read twice)") sys.exit(1) # parse peaks and prep # peaks = pp.parse_primer_pos(open(args.primer_pos_file)) # summarize fw-start and rv-end positions in list. actually don't # need anything else (i.e. the end positions) peaks_fw_start_pos = [p.pos for p in peaks if p.ori == 'F'] peaks_rv_end_pos = [p.pos for p in peaks if p.ori == 'R'] dupreads_out = args.fbam_out.replace(".bam", ".dups.gz") if os.path.exists(dupreads_out): LOG.warn("Reusing {}".format(dupreads_out)) else: with gzip.open(dupreads_out, 'w') as dupreads_fh: find_primer_reads(args.fbam_in, peaks_fw_start_pos, peaks_rv_end_pos, dupreads_fh) with pysam.Samfile(args.fbam_in, "rb") as sam_in: with pysam.Samfile(args.fbam_out, "wb", template=sam_in) as sam_out: with gzip.open(dupreads_out) as dupreads_fh: mark_primer(sam_in, sam_out, dupreads_fh)
def main(): """The main function """ parser = cmdline_parser() (opts, args) = parser.parse_args() if len(args): parser.error("Unrecognized arguments found: %s." % ( ' '.join(args))) sys.exit(1) if opts.verbose: LOG.setLevel(logging.INFO) if opts.debug: LOG.setLevel(logging.DEBUG) # file check # for (filename, descr, direction, mandatory) in [ (opts.fbam_in, 'BAM input file', 'in', True), (opts.fbam_out, 'BAM output file', 'out', True), (opts.primer_pos_file, 'Primer positions file', 'in', True), ]: if not mandatory and not filename: continue if not filename: parser.error("%s argument missing." % descr) sys.exit(1) if filename == '-': continue if direction == 'in' and not os.path.exists(filename): LOG.fatal( "file '%s' does not exist.\n" % filename) sys.exit(1) if direction == 'out' and os.path.exists(filename) and not opts.force_overwrite: LOG.fatal( "Refusing to overwrite existing file '%s'.\n" % filename) sys.exit(1) # region arguments # plpref = None plpstart = None plpend = None if opts.plpstart or opts.plpend or opts.plpref: if None in [opts.plpstart, opts.plpend, opts.plpref]: LOG.fatal("If one of pileup-ref, -start or -end is given," " then all three must be given") sys.exit(1) if opts.plpstart < 1 or opts.plpstart >= opts.plpend: LOG.fatal("Pileup start and end coordinates don't make sense") sys.exit(1) plpref = opts.plpref plpstart = opts.plpstart-1 plpend = opts.plpend-1 # primer length # if opts.primer_len < 0: LOG.fatal("Negative primer length does not make sense") sys.exit(1) primer_len = opts.primer_len # parse peaks and prep # peaks = pp.parse_primer_pos(open(opts.primer_pos_file)) # summarize fw-start and rv-end positions in list. actually don't # need anything else (i.e. the end positions) peaks_fw_start_pos = [p.pos for p in peaks if p.ori == 'F'] peaks_rv_end_pos = [p.pos for p in peaks if p.ori == 'R'] sam_in = pysam.Samfile(opts.fbam_in, "rb") sam_out = pysam.Samfile(opts.fbam_out, "wb", template=sam_in) mark_primer(sam_in, sam_out, peaks_fw_start_pos, peaks_rv_end_pos, primer_len, (plpref, plpstart, plpend)) if sam_in != sys.stdin: sam_in.close() if sam_out != sys.stdout: sam_out.close()
def main(): """The main function """ parser = cmdline_parser() (opts, args) = parser.parse_args() if len(args): parser.error("Unrecognized arguments found: %s." % ( ' '.join(args))) sys.exit(1) if opts.verbose: LOG.setLevel(logging.INFO) if opts.debug: LOG.setLevel(logging.DEBUG) # file check # for (filename, descr, direction, mandatory) in [ (opts.bam_file, 'BAM input file', 'in', False), (opts.primer_pos_file, 'primer position input file', 'in', True), (opts.bed_file, 'bed output file', 'out', False), ]: if not mandatory and not filename: continue if not filename: parser.error("%s argument missing." % descr) sys.exit(1) if filename == '-': continue if direction == 'in' and not os.path.exists(filename): LOG.fatal( "file '%s' does not exist.\n" % filename) sys.exit(1) if direction == 'out' and os.path.exists(filename) and not opts.force_overwrite: LOG.fatal( "Refusing to overwrite existing file '%s'.\n" % filename) sys.exit(1) # primer length # if opts.primer_len < 0: LOG.fatal("Negative primer length does not make sense") sys.exit(1) if not opts.bam_file: if not opts.seq_len or not opts.seq_name: LOG.fatal("Missing sequence name or sequence length argument") sys.exit(1) seq_len = opts.seq_len seq_name = opts.seq_name else: if opts.seq_len or opts.seq_name: LOG.fatal("BAM file given, so will derive seqlen and" " seqname automatically, which were however" " also given as arguments") sys.exit(1) sam_header = sam.sam_header(opts.bam_file) sq_list = sam.sq_list_from_header(sam_header) assert len(sq_list)==1, ( "Can only work with one sequence but found %d in %s" % ( (len(sq_list), opts.bam_file))) seq_name = sq_list[0] seq_len = sam.len_for_sq(sam_header, seq_name) if opts.bed_file == "-": bed_fh = sys.stdout else: bed_fh = open(opts.bed_file, 'w') primer_positions = pp.parse_primer_pos(open(opts.primer_pos_file)) primer_positions_to_incl_bed( primer_positions, bed_fh, opts.primer_len, seq_len, seq_name) if bed_fh != sys.stdout: bed_fh.close()
def main(): """The main function """ parser = cmdline_parser() (opts, args) = parser.parse_args() if len(args): parser.error("Unrecognized arguments found: %s." % ( ' '.join(args))) sys.exit(1) if opts.verbose: LOG.setLevel(logging.INFO) if opts.debug: LOG.setLevel(logging.DEBUG) # file check # for (filename, descr, direction, mandatory) in [ (opts.f_fa_in, 'Fasta input file', 'in', True), (opts.f_fa_out, 'Fasta output file', 'out', True), (opts.primers, 'Primer position file', 'in', True), ]: if not mandatory and not filename: continue if not filename: parser.error("%s argument missing." % descr) sys.exit(1) if filename == '-': continue if direction == 'in' and not os.path.exists(filename): LOG.fatal( "file '%s' does not exist.\n" % filename) sys.exit(1) if direction == 'out' and os.path.exists(filename) \ and not opts.force_overwrite: LOG.fatal( "Refusing to overwrite existing file '%s'.\n" % filename) sys.exit(1) # primer length # if opts.primer_len < 0: LOG.fatal("Negative primer length does not make sense") sys.exit(1) primer_len = opts.primer_len # parse primers and prep # primer_pos = pp.parse_primer_pos(open(opts.primers)) # summarize fw-start and rv-end positions in list. actually don't # need anything else (i.e. the end positions) primers_fw_start_pos = [p.pos for p in primer_pos if p.ori == 'F'] primers_rv_end_pos = [p.pos for p in primer_pos if p.ori == 'R'] if opts.f_fa_in == '-': fh_fa_in = sys.stdin else: fh_fa_in = open(opts.f_fa_in, 'r') if opts.f_fa_out == '-': fh_fa_out = sys.stdout else: fh_fa_out = open(opts.f_fa_out, 'w') mask_primer(fh_fa_in, fh_fa_out, primers_fw_start_pos, primers_rv_end_pos, primer_len) if fh_fa_in != sys.stdin: fh_fa_in.close() if fh_fa_out != sys.stdout: fh_fa_out.close()