コード例 #1
0
def make_exons(args, thread_index, thread_count):
    is_sam = True
    if re.search('\.bam$', args.sam_file):
        is_sam = False
    stag = ''
    if is_sam: stag = '-S'
    cmd = 'samtools view -F 4 ' + stag + ' ' + args.sam_file
    spcf = SamBasics.SAMtoPSLconversionFactory()
    if args.reference_genome:
        spcf.set_genome(args.reference_genome)
    sampipe = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
    fname = args.tempdir + '/bedpart.' + str(thread_index) + '.bed'
    of = open(fname, 'w')
    z = 0
    with sampipe.stdout as inf:
        for line in inf:
            z += 1
            if z % thread_count != thread_index: continue
            line = line.rstrip()
            if SamBasics.is_header(line):
                continue
            d = SamBasics.sam_line_to_dictionary(line)
            strand = '+'
            if SamBasics.check_flag(d['flag'], 16):
                strand = '-'
            seqs = []
            sequence = d['seq']
            seqs.append([d['qname'], d['rname'], strand, d['pos'], d['cigar']])
            m = re.search('XA:Z:(\S+)', line)
            if m and args.use_secondary_alignments:
                e = m.group(1)
                secondaries = e.rstrip(";").split(";")
                for secondary in secondaries:
                    m1 = re.match('([^,]+),([+-])(\d+),([^,]+)', secondary)
                    if not m1:
                        sys.stderr.write("strange secondary format " +
                                         secondary + "\n")
                        sys.exit()
                    seqs.append([
                        d['qname'],
                        m1.group(1),
                        m1.group(2),
                        int(m1.group(3)),
                        m1.group(4)
                    ])
            #p.apply_async(get_exons_from_seqs,[seqs,d,spcf])
            exons = get_exons_from_seqs(seqs, d, spcf)
            of.write(exons)
            #return exons
    of.close()
def main():
    parser = argparse.ArgumentParser(
        description=
        "Find mapping distance of paired end reads.  Takes an ordered (by query) alignment to a transcriptome.\nSomething that works for an input thus far is like:\nhisat --reorder -x mytranscriptome -1 my_1.fastq -2 my_2.fastq | this_script.py -"
    )
    parser.add_argument(
        'input_sam',
        help="SAMFILE ordered alignment a transcriptome or - for stdin")
    args = parser.parse_args()
    inf = sys.stdin
    if args.input_sam != '-':
        inf = open(args.input_sam)
    msr = SamBasics.MultiEntrySamReader(inf)
    spcf = SamBasics.SAMtoPSLconversionFactory()
    data = []
    sys.stderr.write("Pairs    Mean    Stddev\n")
    while True:
        entries = msr.read_entries()
        if not entries: break
        if len(entries) != 2: continue
        [e1, e2] = entries
        if e1.check_flag(4) or e2.check_flag(4): continue
        if not e1.check_flag(2) and e2.check_flag(2): continue
        if not ((e1.check_flag(64) and e2.check_flag(128)) or
                (e1.check_flag(128) and e2.check_flag(64))):
            continue
        p1 = spcf.convert_line(e1.get_line())
        p2 = spcf.convert_line(e2.get_line())
        if not p1 or not p2: continue
        p1 = PSLBasics.PSL(p1)
        p2 = PSLBasics.PSL(p2)
        dist = max(
            p2.value('tEnd') - p1.value('tStart'),
            p1.value('tEnd') - p2.value('tStart'))
        data.append(dist)
        if len(data) < 2: continue
        if len(data) % 1000 == 0:
            sys.stderr.write(
                str(len(data)) + "    " + str(int(mean(data))) + "    " +
                str(int(stddev(data))) + "              \r")
    sys.stderr.write(
        str(len(data)) + "    " + str(int(mean(data))) + "    " +
        str(int(stddev(data))) + "              \r")
    sys.stderr.write("\n")
コード例 #3
0
def main():
    parser = argparse.ArgumentParser(
        description="Convert a sam file into a psl file")
    parser.add_argument('--genome',
                        help="FASTA input file of reference genome")
    parser.add_argument('--get_secondary_alignments',
                        action='store_true',
                        help="Report SA:Z secondary alignments as well")
    parser.add_argument('--get_alternative_alignments',
                        action='store_true',
                        help="Report XA:Z alternative alignments as well")
    parser.add_argument(
        '--get_all_alignments',
        action='store_true',
        help="Report SA:Z and XA:Z alternative alignments as well")
    parser.add_argument('--give_unique_names',
                        action='store_true',
                        help="Output query names will be unique.")
    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        '--output_fasta',
        help=
        "FILENAME to save an outgoing fasta.  Only works for primary alignments."
    )
    group.add_argument(
        '--output_fastq',
        help=
        "FILENAME to save an outgoing fastq.  Only works for primary alignments."
    )
    parser.add_argument('infile', help="FILENAME input file or '-' for STDIN")
    parser.add_argument('-o',
                        '--output',
                        help="FILENAME for the output, STDOUT if not set.")
    args = parser.parse_args()
    if (args.output_fasta
            or args.output_fastq) and (args.get_secondary_alignments
                                       or args.get_alternative_alignments
                                       or args.get_all_alignments):
        sys.stderr.write(
            "ERROR, can only output the fastq/fasta if we are doing primary alignments only.\n"
        )
        sys.exit()
    inf = sys.stdin
    if args.infile != '-':
        inf = open(args.infile)
    of = sys.stdout
    if args.output:
        of = open(args.output, 'w')
    spcf = SamBasics.SAMtoPSLconversionFactory()
    if args.genome: spcf.set_genome(args.genome)
    off = None
    if args.output_fasta:
        off = open(args.output_fasta, 'w')
    if args.output_fastq:
        off = open(args.output_fastq, 'w')
    z = 0
    for line in inf:
        line = line.rstrip()
        if SamBasics.is_header(line):
            spcf.read_header_line(line)
            continue
        # We have a line to convert
        psl = spcf.convert_line(line)
        if psl:
            pobj = PSL(psl)
            z += 1
            if args.give_unique_names:
                pobj.entry['qName'] = 'Q' + str(z)
            of.write(pobj.get_line() + "\n")
            if args.output_fastq or args.output_fasta:
                sam = SamBasics.SAM(line)
                sequence = sam.value('seq').upper()
                quality = sam.value('qual')
                if sam.check_flag(16):
                    sequence = rc(sam.value('seq').upper())
                    quality = sam.value('qual')[::-1]
                if args.output_fasta:
                    off.write(">" + pobj.value('qName') + "\n" + sequence +
                              "\n")
                elif args.output_fastq:
                    if len(sequence) == len(quality):
                        off.write("@" + pobj.value('qName') + "\n" + sequence +
                                  "\n" + "+\n" + quality + "\n")
                    else:
                        sys.stderr.write("ERROR: sequence " + sequence +
                                         " length (" + str(len(sequence)) +
                                         ") doesnt match quality " + quality +
                                         " length (" + str(len(quality)) +
                                         ")\n")
                        sys.exit()
        # Lets look for secondary alignments to convert
        if args.get_secondary_alignments or args.get_all_alignments:
            secondary_alignments = SamBasics.get_secondary_alignments(
                line.rstrip())
            for samline in secondary_alignments:
                psl = spcf.convert_line(samline)
                if psl:
                    #print "\nsecondary"
                    #print samline
                    z += 1
                    pobj = PSL(psl)
                    if args.give_unique_names:
                        pobj.entry['qName'] = 'Q' + str(z)
                    of.write(pobj.get_line() + "\n")
        if args.get_alternative_alignments or args.get_all_alignments:
            alternative_alignments = SamBasics.get_alternative_alignments(
                line.rstrip())
            for samline in alternative_alignments:
                psl = spcf.convert_line(samline)
                if psl:
                    #print "\nsecondary"
                    #print samline
                    z += 1
                    pobj = PSL(psl)
                    if args.give_unique_names:
                        pobj.entry['qName'] = 'Q' + str(z)
                    of.write(pobj.get_line() + "\n")
    inf.close()
    of.close()