Python FasterFastqReader Beispiele

Programmiersprache: Python

Namespace / Paketname: seqtools.sequence.fastq

Methode / Funktion: FasterFastqReader

Beispiele auf hotexamples.com: 3

Python FasterFastqReader - 3 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die seqtools.sequence.fastq.FasterFastqReader, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: phyluce_utilities_interleave_reads.py Projekt: yuzhenpeng/phyluce

def main():
    args = get_args()
    outfile = fastq.FasterFastqWriter(args.output)
    read1 = fastq.FasterFastqReader(args.read1)
    read2 = fastq.FasterFastqReader(args.read2)
    rc = 0
    sys.stdout.write("Interleaving reads (1 dot = 10,000 pairs): ")
    for r1, r2 in izip(read1, read2):
        assert r1[0].split(" ")[0] == r2[0].split(" ")[0], \
                "Read FASTQ headers mismatch."
        outfile.write(r1)
        outfile.write(r2)
        if rc != 0 and rc % 10000 == 0:
            sys.stdout.write(".")
            sys.stdout.flush()
        rc += 1
    outfile.close()
    read1.close()
    read2.close()

Beispiel #2

Datei anzeigen

Datei: phyluce_utilities_split_reads.py Projekt: yuzhenpeng/phyluce

def main():
    args = get_args()
    r1 = fastq.FasterFastqWriter(args.read1)
    r2 = fastq.FasterFastqWriter(args.read2)
    # read all of our files into fastq iterators
    rc = 0
    reads = fastq.FasterFastqReader(args.input)
    sys.stdout.write("Splitting reads (1 dot = 10,000 pairs): ")
    for read in reads:
        if read[0].split(' ')[1].split(':')[0] == '1':
            r1.write(read)
            first = read[0].split(' ')[0]
        else:
            assert first == read[0].split(' ')[0], "File does not appear interleaved."
            r2.write(read)
            if rc != 0 and rc % 10000 == 0:
                sys.stdout.write('.')
                sys.stdout.flush()
            rc += 1
    print ""
    reads.close()
    r1.close()
    r2.close()

Beispiel #3

Datei anzeigen

Datei: splitaake_reads_many_gz.py Projekt: 00mjk/splitaake

def main():
    """main loop"""
    motd()
    args = get_args()
    # setup tags object
    tags = Tags(args.tagmap, args.section, args.no_correct)
    # create output files
    tags.create_zip_files(args.output)
    if not args.hamming:
        # vectorize the levenshtein function so we only call once per read
        distance = numpy.vectorize(levenshtein)
    else:
        # vectorize the hamming function so we only call once per read
        distance = numpy.vectorize(hamming)
    read = 0
    for f in glob.glob(os.path.join(args.reads, '*_R1_*')):
        print "Working on ", f
        read1 = fastq.FasterFastqReader(f)
        # get basename of R1 file
        read1_basename = os.path.basename(f)
        index = fastq.FasterFastqReader(
            os.path.join(args.reads, read1_basename.replace('R1', 'R2')))
        read2 = fastq.FasterFastqReader(
            os.path.join(args.reads, read1_basename.replace('R1', 'R3')))
        # read all of our files into fastq iterators
        for r1, i, r2 in izip(read1, index, read2):
            if read % 100000 == 0:
                print "{:,}".format(read)
            if not filtered(r1):
                # see if we need to trim the index
                idx, idx_qual = get_index(i, args.tag_length)
                # get index sequence differences from tags
                dist = distance(tags.seqs, idx)
                # get quality values
                good_quality = get_quality(idx_qual, args.min_qual,
                                           args.min_mean_qual)
                # find tags with 0 distance from other tags
                positions = numpy.where(dist == 0)[0]
                # if not a perfect match, check distance 1 matches
                if positions.size == 0 and good_quality and idx not in tags.no_correct:
                    positions = numpy.where(dist == 1)[0]
                if positions.size == 1 and good_quality:
                    # assert headers match
                    assert r1[0].split(' ')[0] == r2[0].split(
                        ' ')[0], "Header mismatch"
                    # get tag for match
                    match = tags.seqs[positions[0]]
                    # change header to add tag
                    r1 = change_read_num(r1, 1, match)
                    r2 = change_read_num(r2, 2, match)
                    # write to output
                    tags.files[match][1].write(r1)
                    tags.files[match][2].write(r2)
                # put low quality tags into their own file
                elif (positions is not None) and (len(positions)
                                                  == 1) and not good_quality:
                    tags.files['lowqual'][1].write(r1)
                    tags.files['lowqual'][2].write(i)
                    tags.files['lowqual'][3].write(r2)
                # put everything else into unknown
                else:
                    tags.files['unknown'][1].write(r1)
                    tags.files['unknown'][2].write(i)
                    tags.files['unknown'][3].write(r2)
            # if for some reason there are reads not passing filter,
            # put those into unknown, too.
            else:
                tags.files['unknown'][1].write(r1)
                tags.files['unknown'][2].write(i)
                tags.files['unknown'][3].write(r2)
            read += 1
        read1.close()
        index.close()
        read2.close()
    tags.close_zip_files()