Esempio n. 1
0
def main():
    """The main function
    """

    parser = cmdline_parser()
    args = parser.parse_args()

    if args.verbose:
        LOG.setLevel(logging.INFO)
    if args.debug:
        LOG.setLevel(logging.DEBUG)

    # file check
    #
    for (filename, descr, direction, mandatory) in [
            (args.fbam_in, 'BAM input file', 'in', True),
            (args.fbam_out, 'BAM output file', 'out', True),
            (args.primer_pos_file, 'Primer positions file', 'in', True)]:

        if not mandatory and not filename:
            continue

        if not filename:
            parser.error("%s argument missing." % descr)
            sys.exit(1)

        if filename == '-':
            continue

        if direction == 'in' and not os.path.exists(filename):
            LOG.fatal("file '{}' does not exist.\n".format(filename))
            sys.exit(1)

        if direction == 'out' and os.path.exists(filename) and not args.force_overwrite:
            LOG.fatal("Refusing to overwrite existing file '{}'.\n".format(filename))
            sys.exit(1)

    if args.fbam_in == "-":
        LOG.fatal("No streaming allow for input BAM (read twice)")
        sys.exit(1)

    # parse peaks and prep
    #
    peaks = pp.parse_primer_pos(open(args.primer_pos_file))
    # summarize fw-start and rv-end positions in list. actually don't
    # need anything else (i.e. the end positions)
    peaks_fw_start_pos = [p.pos for p in peaks if p.ori == 'F']
    peaks_rv_end_pos = [p.pos for p in peaks if p.ori == 'R']

    dupreads_out = args.fbam_out.replace(".bam", ".dups.gz")
    if os.path.exists(dupreads_out):
        LOG.warn("Reusing {}".format(dupreads_out))
    else:
        with gzip.open(dupreads_out, 'w') as dupreads_fh:
            find_primer_reads(args.fbam_in, peaks_fw_start_pos, peaks_rv_end_pos, dupreads_fh)

    with pysam.Samfile(args.fbam_in, "rb") as sam_in:
        with pysam.Samfile(args.fbam_out, "wb", template=sam_in) as sam_out:
            with gzip.open(dupreads_out) as dupreads_fh:
                mark_primer(sam_in, sam_out, dupreads_fh)
Esempio n. 2
0
def main():
    """The main function
    """

    parser = cmdline_parser()
    (opts, args) = parser.parse_args()
    if len(args):
        parser.error("Unrecognized arguments found: %s." % (' '.join(args)))
        sys.exit(1)

    if opts.verbose:
        LOG.setLevel(logging.INFO)
    if opts.debug:
        LOG.setLevel(logging.DEBUG)

    # file check
    #
    for (filename, descr, direction, mandatory) in [
        (opts.primer_pos_file, 'primer position input file', 'in', True),
        (opts.bed_file, 'bed output file', 'out', False)
    ]:

        if not mandatory and not filename:
            continue

        if not filename:
            parser.error("%s argument missing." % descr)
            sys.exit(1)

        if filename == '-':
            continue

        if direction == 'in' and not os.path.exists(filename):
            LOG.fatal("file '%s' does not exist.\n" % filename)
            sys.exit(1)

        if direction == 'out' and os.path.exists(
                filename) and not opts.force_overwrite:
            LOG.fatal("Refusing to overwrite existing file '%s'.\n" % filename)
            sys.exit(1)

    # primer length
    #
    if opts.primer_len < 0:
        LOG.fatal("Negative primer length does not make sense")
        sys.exit(1)

    if not opts.seq_len or not opts.seq_name:
        LOG.fatal(
            "Missing sequence name or sequence length argument (or seq len 0)")
        sys.exit(1)
    seq_len = opts.seq_len
    seq_name = opts.seq_name

    if opts.bed_file == "-":
        bed_fh = sys.stdout
    else:
        bed_fh = open(opts.bed_file, 'w')

    primer_positions = pp.parse_primer_pos(open(opts.primer_pos_file))
    primer_positions_to_incl_bed(primer_positions, bed_fh, opts.primer_len,
                                 seq_len, seq_name)

    if bed_fh != sys.stdout:
        bed_fh.close()
Esempio n. 3
0
def main():
    """
    The main function
    """

    parser = cmdline_parser()
    args = parser.parse_args()

    if args.verbose:
        LOG.setLevel(logging.INFO)
    if args.debug:
        LOG.setLevel(logging.DEBUG)

    # file check
    #
    for (filename, descr, direction, mandatory) in [
        (args.fbam_in, 'BAM input file', 'in', True),
        (args.fbam_out, 'BAM output file', 'out', True),
        (args.primer_pos_file, 'Primer positions file', 'in', True)
    ]:

        if not mandatory and not filename:
            continue

        if not filename:
            parser.error("%s argument missing." % descr)
            sys.exit(1)

        if filename == '-':
            continue

        if direction == 'in' and not os.path.exists(filename):
            LOG.fatal("file '{}' does not exist.\n".format(filename))
            sys.exit(1)

        if direction == 'out' and os.path.exists(
                filename) and not args.force_overwrite:
            LOG.fatal(
                "Refusing to overwrite existing file '{}'.\n".format(filename))
            sys.exit(1)

    if args.fbam_in == "-":
        LOG.fatal("No streaming allow for input BAM (read twice)")
        sys.exit(1)

    # parse peaks and prep
    #
    peaks = pp.parse_primer_pos(open(args.primer_pos_file))
    # summarize fw-start and rv-end positions in list. actually don't
    # need anything else (i.e. the end positions)
    peaks_fw_start_pos = [p.pos for p in peaks if p.ori == 'F']
    peaks_rv_end_pos = [p.pos for p in peaks if p.ori == 'R']

    dupreads_out = args.fbam_out.replace(".bam", ".dups.gz")
    if os.path.exists(dupreads_out):
        LOG.warn("Reusing {}".format(dupreads_out))
    else:
        with gzip.open(dupreads_out, 'w') as dupreads_fh:
            find_primer_reads(args.fbam_in, peaks_fw_start_pos,
                              peaks_rv_end_pos, dupreads_fh)

    with pysam.Samfile(args.fbam_in, "rb") as sam_in:
        with pysam.Samfile(args.fbam_out, "wb", template=sam_in) as sam_out:
            with gzip.open(dupreads_out) as dupreads_fh:
                mark_primer(sam_in, sam_out, dupreads_fh)
Esempio n. 4
0
def main():
    """The main function
    """

    parser = cmdline_parser()
    (opts, args) = parser.parse_args()
    if len(args):
        parser.error("Unrecognized arguments found: %s." % (
            ' '.join(args)))
        sys.exit(1)

    if opts.verbose:
        LOG.setLevel(logging.INFO)
    if opts.debug:
        LOG.setLevel(logging.DEBUG)

    # file check
    #
    for (filename, descr, direction, mandatory) in [
            (opts.fbam_in, 'BAM input file', 'in', True),
            (opts.fbam_out, 'BAM output file', 'out', True),
            (opts.primer_pos_file, 'Primer positions file', 'in', True),
            ]:

        if not mandatory and not filename:
            continue

        if not filename:
            parser.error("%s argument missing." % descr)
            sys.exit(1)

        if filename == '-':
            continue

        if direction == 'in' and not os.path.exists(filename):
            LOG.fatal(
                "file '%s' does not exist.\n" % filename)
            sys.exit(1)

        if direction == 'out' and os.path.exists(filename) and not opts.force_overwrite:
            LOG.fatal(
                "Refusing to overwrite existing file '%s'.\n" % filename)
            sys.exit(1)

    # region arguments
    #
    plpref = None
    plpstart = None
    plpend = None
    if opts.plpstart or opts.plpend or opts.plpref:
        if None in [opts.plpstart, opts.plpend, opts.plpref]:
            LOG.fatal("If one of pileup-ref, -start or -end is given,"
                      " then all three must be given")
            sys.exit(1)
        if opts.plpstart < 1 or opts.plpstart >= opts.plpend:
            LOG.fatal("Pileup start and end coordinates don't make sense")
            sys.exit(1)

        plpref = opts.plpref
        plpstart = opts.plpstart-1
        plpend = opts.plpend-1


    # primer length
    #
    if opts.primer_len < 0:
        LOG.fatal("Negative primer length does not make sense")
        sys.exit(1)
    primer_len = opts.primer_len


    # parse peaks and prep
    #
    peaks = pp.parse_primer_pos(open(opts.primer_pos_file))
    # summarize fw-start and rv-end positions in list. actually don't
    # need anything else (i.e. the end positions)
    peaks_fw_start_pos = [p.pos for p in peaks if p.ori == 'F']
    peaks_rv_end_pos = [p.pos for p in peaks if p.ori == 'R']

    sam_in = pysam.Samfile(opts.fbam_in, "rb")
    sam_out = pysam.Samfile(opts.fbam_out, "wb", template=sam_in)

    mark_primer(sam_in, sam_out,
                peaks_fw_start_pos, peaks_rv_end_pos, primer_len,
                (plpref, plpstart, plpend))

    if sam_in != sys.stdin:
        sam_in.close()
    if sam_out != sys.stdout:
        sam_out.close()
Esempio n. 5
0
def main():
    """The main function
    """

    parser = cmdline_parser()
    (opts, args) = parser.parse_args()
    if len(args):
        parser.error("Unrecognized arguments found: %s." % (
            ' '.join(args)))
        sys.exit(1)

    if opts.verbose:
        LOG.setLevel(logging.INFO)
    if opts.debug:
        LOG.setLevel(logging.DEBUG)

    # file check
    #
    for (filename, descr, direction, mandatory) in [
            (opts.bam_file, 'BAM input file', 'in', False),
            (opts.primer_pos_file, 'primer position input file', 'in', True),
            (opts.bed_file, 'bed output file', 'out', False),
            ]:

        if not mandatory and not filename:
            continue

        if not filename:
            parser.error("%s argument missing." % descr)
            sys.exit(1)

        if filename == '-':
            continue

        if direction == 'in' and not os.path.exists(filename):
            LOG.fatal(
                "file '%s' does not exist.\n" % filename)
            sys.exit(1)

        if direction == 'out' and os.path.exists(filename) and not opts.force_overwrite:
            LOG.fatal(
                "Refusing to overwrite existing file '%s'.\n" % filename)
            sys.exit(1)

    # primer length
    #
    if opts.primer_len < 0:
        LOG.fatal("Negative primer length does not make sense")
        sys.exit(1)

    if not opts.bam_file:
        if not opts.seq_len or not opts.seq_name:
            LOG.fatal("Missing sequence name or sequence length argument")
            sys.exit(1)
        seq_len = opts.seq_len
        seq_name = opts.seq_name
    else:
        if opts.seq_len or opts.seq_name:
            LOG.fatal("BAM file given, so will derive seqlen and"
                      " seqname automatically, which were however"
                      " also given as arguments")
            sys.exit(1)
        sam_header = sam.sam_header(opts.bam_file)
        sq_list = sam.sq_list_from_header(sam_header)
        assert len(sq_list)==1, (
            "Can only work with one sequence but found %d in %s" % (
                (len(sq_list), opts.bam_file)))
        seq_name =  sq_list[0]
        seq_len = sam.len_for_sq(sam_header, seq_name)
        
    if opts.bed_file == "-":
        bed_fh = sys.stdout
    else:
        bed_fh = open(opts.bed_file, 'w')

    
    primer_positions = pp.parse_primer_pos(open(opts.primer_pos_file))
    primer_positions_to_incl_bed(
        primer_positions, bed_fh, opts.primer_len,
        seq_len, seq_name)
    
    if bed_fh != sys.stdout:
        bed_fh.close()
Esempio n. 6
0
def main():
    """The main function
    """

    parser = cmdline_parser()
    (opts, args) = parser.parse_args()
    if len(args):
        parser.error("Unrecognized arguments found: %s." % (
            ' '.join(args)))
        sys.exit(1)

    if opts.verbose:
        LOG.setLevel(logging.INFO)
    if opts.debug:
        LOG.setLevel(logging.DEBUG)

    # file check
    #
    for (filename, descr, direction, mandatory) in [
            (opts.f_fa_in, 'Fasta input file', 'in', True),
            (opts.f_fa_out, 'Fasta output file', 'out', True),
            (opts.primers, 'Primer position file', 'in', True),
            ]:

        if not mandatory and not filename:
            continue

        if not filename:
            parser.error("%s argument missing." % descr)
            sys.exit(1)

        if filename == '-':
            continue

        if direction == 'in' and not os.path.exists(filename):
            LOG.fatal(
                "file '%s' does not exist.\n" % filename)
            sys.exit(1)

        if direction == 'out' and os.path.exists(filename) \
          and not opts.force_overwrite:
            LOG.fatal(
                "Refusing to overwrite existing file '%s'.\n" % filename)
            sys.exit(1)

    # primer length
    #
    if opts.primer_len < 0:
        LOG.fatal("Negative primer length does not make sense")
        sys.exit(1)
    primer_len = opts.primer_len


    # parse primers and prep
    #
    primer_pos = pp.parse_primer_pos(open(opts.primers))
    # summarize fw-start and rv-end positions in list. actually don't
    # need anything else (i.e. the end positions)
    primers_fw_start_pos = [p.pos for p in primer_pos if p.ori == 'F']
    primers_rv_end_pos = [p.pos for p in primer_pos if p.ori == 'R']
                
    if opts.f_fa_in == '-':
        fh_fa_in = sys.stdin
    else:
        fh_fa_in = open(opts.f_fa_in, 'r')
    if opts.f_fa_out == '-':
        fh_fa_out = sys.stdout
    else:
        fh_fa_out =  open(opts.f_fa_out, 'w')

    mask_primer(fh_fa_in, fh_fa_out,
                primers_fw_start_pos, primers_rv_end_pos, primer_len)

    if fh_fa_in != sys.stdin:
        fh_fa_in.close()
    if fh_fa_out != sys.stdout:
        fh_fa_out.close()