Exemple #1
0
def subtract_mouse_reads(
    summary_file, in_fastq, out_fastq, sub_fastq, num_mismatches):
    # Accept this as a mouse read if it contains less than or equal to
    # num_mismatches mismatches from the mouse genome.
    from genomicode import filelib
    from genomicode import genomelib

    # List the reads that look like mouse.
    mouse_reads = {}
    for d in filelib.read_row(summary_file, header=1):
        if not d.NM:  # ignore missing alignments
            continue
        if int(d.NM) <= num_mismatches:
            mouse_reads[d.query_name] = 1

    outhandle = open(out_fastq, 'w')
    subhandle = open(sub_fastq, 'w')
    for x in genomelib.read_fastq(in_fastq):
        title, sequence, quality = x
        x = title
        if x.startswith("@"):
            x = x[1:]
        x = x.split()[0]  # BAM file only contains the first part.
        if x in mouse_reads:
            genomelib.write_fastq(title, sequence, quality, subhandle)
        else:
            genomelib.write_fastq(title, sequence, quality, outhandle)
def copy_fastq_file(in_filename, out_filename, num_samples):
    from genomicode import genomelib

    outhandle = open(out_filename, 'w')
    for i, x in enumerate(genomelib.read_fastq(in_filename)):
        if i >= num_samples:
            break
        genomelib.write_fastq(*x, handle=outhandle)
def copy_fastq(in_filename, out_filename, MAX_READS=None):
    from genomicode import genomelib

    in_iter = genomelib.read_fastq(in_filename)
    out_handle = open(out_filename, 'w')
    i = 0
    while MAX_READS is None or i < MAX_READS:
        i += 1
        x = in_iter.next()
        if not x:  # no more reads
            assert i  # make sure at least 1 read
            break
        genomelib.write_fastq(*x, **{"handle" : out_handle})