def test_reverse(self):
     """Test reverse complement function"""
     from mirtop.mirna.realign import reverse_complement
     print "Testing ATGC complement"
     if "GCAT" != reverse_complement("ATGC"):
         logger.error("ATGC complement is not: %s" %
                      reverse_complement("ATGC"))
Example #2
0
 def test_reverse(self):
     """Test reverse complement function"""
     from mirtop.mirna.realign import reverse_complement
     print("Testing ATGC complement")
     if "GCAT" != reverse_complement("ATGC"):
         raise ValueError("ATGC complement is not: %s" %
                          reverse_complement("ATGC"))
Example #3
0
def _bed(handle, bed_fn):
    with open(bed_fn, 'w') as outh:
        for line in handle:
            if line.startswith("@"):
                continue
            cols = line.strip().split()
            if cols[2]=="*":
                logger.debug("READ::Sequence not mapped: %s" % cols[0])
                continue
            query_name = cols[0]
            query_sequence = cols[9]
            counts = cols[14]
            start = int(cols[3])
            strand = "-" if cols[1] == "16" else "+"
            chrom = cols[2]
            # if there no hits
            # if the sequence always matching the read, assuming YES now
            # if not current or query_name!=current:
            query_sequence = query_sequence if not strand=="-" else reverse_complement(query_sequence)
            # logger.debug(("READ::Read name:{0} and Read sequence:{1}").format(line.query_name, sequence))
            if query_sequence and query_sequence.find("N") > -1:
                continue
            end = start + len(query_sequence) - 1
            bed_line = "\t".join(list(map(str, [chrom, start, end, query_name,
                                                query_sequence, strand, counts])))
            outh.write(bed_line + '\n')
Example #4
0
def _analyze_line(line, reads, precursors, handle, args):
    if line.reference_id < 0:
        logger.debug("READ::Sequence not mapped: %s" % line.reference_id)
        return reads
    if not line.cigarstring:
        logger.debug("READ::Sequence malformed: %s" % line)
        return reads
    query_name = line.query_name
    if query_name not in reads and not line.query_sequence:
        return reads
    sequence = line.query_sequence if not line.is_reverse else reverse_complement(
        line.query_sequence)
    logger.debug(("READ::Read name:{0} and Read sequence:{1}").format(
        line.query_name, sequence))
    if line.query_sequence and line.query_sequence.find("N") > -1:
        return reads
    if query_name not in reads:
        reads[query_name].set_sequence(sequence)
        reads[query_name].counts = _get_freq(query_name)
        # TODO if args.quant set to 0
    # TODO if args.quant increase by 1
    if line.is_reverse and not args.genomic:
        logger.debug("READ::Sequence is reverse: %s" % line.query_name)
        return reads
    chrom = handle.getrname(line.reference_id)
    start = line.reference_start

    cigar = line.cigartuples
    # if line.cigarstring.find("I") > -1:
    #     indels_skip += 1
    iso = isomir()
    iso.align = line
    iso.set_pos(start, len(reads[query_name].sequence))
    logger.debug("READ::From BAM start %s end %s at chrom %s" %
                 (iso.start, iso.end, chrom))
    if len(precursors[chrom].replace("N", "")) + 3 < start + len(
            reads[query_name].sequence):
        logger.debug("READ::%s start + %s sequence size are bigger than"
                     " size precursor %s" %
                     (line.reference_id, len(
                         reads[query_name].sequence), len(precursors[chrom])))
        return reads
    iso.subs, iso.add, iso.cigar = filter.tune(reads[query_name].sequence,
                                               precursors[chrom], start, cigar)
    logger.debug("READ::After tune start %s end %s" % (iso.start, iso.end))
    logger.debug("READ::iso add %s iso subs %s" % (iso.add, iso.subs))
    reads[query_name].set_precursor(chrom, iso)
    return reads
Example #5
0
def _analyze_quick_line(line, reads):
    if line.reference_id < 0:
        logger.debug("READ::Sequence not mapped: %s" % line.reference_id)
        return reads
    if not line.cigarstring:
        logger.debug("READ::Sequence malformed: %s" % line)
        return reads
    query_name = line.query_name
    if query_name not in reads and not line.query_sequence:
        return reads
    sequence = line.query_sequence if not line.is_reverse else reverse_complement(
        line.query_sequence)
    logger.debug(("READ::Read name:{0} and Read sequence:{1}").format(
        line.query_name, sequence))
    if line.query_sequence and line.query_sequence.find("N") > -1:
        return reads
    if query_name not in reads:
        reads[query_name].set_sequence(sequence)
        reads[query_name].counts = _get_freq(query_name)
    return reads
Example #6
0
def _bed(bam_fn, bed_fn):
    mode = "r" if bam_fn.endswith("sam") else "rb"
    handle = pysam.Samfile(bam_fn, mode)
    current = None
    if os.path.exists(bed_fn):
        return bed_fn
    with open(bed_fn, 'w') as outh:
        for line in handle:
            if line.reference_id < 0:
                logger.debug("READ::Sequence not mapped: %s" %
                             line.reference_id)
                continue
            if not line.cigarstring:
                logger.debug("READ::Sequence malformed: %s" % line)
                continue
            query_name = line.query_name
            if (not current
                    or query_name != current) and not line.query_sequence:
                continue
            if not current or query_name != current:
                sequence = line.query_sequence if not line.is_reverse else reverse_complement(
                    line.query_sequence)
            logger.debug(("READ::Read name:{0} and Read sequence:{1}").format(
                line.query_name, sequence))
            if line.query_sequence and line.query_sequence.find("N") > -1:
                continue
            chrom = handle.getrname(line.reference_id)
            start = line.reference_start
            end = start + len(sequence) - 1
            current = query_name
            strand = "+" if not line.is_reverse else "-"
            bed_line = "\t".join(
                list(
                    map(str,
                        [chrom, start, end, query_name, sequence, strand])))
            outh.write(bed_line + '\n')