def inspect_reads(fastq_files, output_prefix, quals):
    """
    uncompresses reads, renames reads, and converts quality scores 
    to 'sanger' format
    """
    # setup file iterators
    filehandles = [open_compressed(f) for f in fastq_files]
    fqiters = [parse_lines(f, numlines=4) for f in filehandles]
    output_files = [(output_prefix + "_%d.fq" % (x+1)) 
                    for x in xrange(len(fastq_files))]
    outfhs = [open(f, "w") for f in output_files]
    qual_func = get_qual_conversion_func(quals)
    linenum = 0    
    try:
        while True:
            pelines = [it.next() for it in fqiters]
            for i,lines in enumerate(pelines):
                # rename read using line number
                lines[0] = "@%d/%d" % (linenum,i+1)
                # ignore redundant header
                lines[2] = "+"
                # convert quality score to sanger
                lines[3] = qual_func(lines[3])
                print >>outfhs[i], '\n'.join(lines)
            linenum += 1
    except StopIteration:
        pass
    except:
        logging.error("Unexpected error during FASTQ file processing")
        for f in output_files:
            if os.path.exists(f):
                os.remove(f)
        return config.JOB_ERROR
    for fh in filehandles:
        fh.close()
    logging.debug("Inspected %d fragments" % (linenum))
    return config.JOB_SUCCESS
Beispiel #2
0
def process_input_reads(fastq_files, output_prefix, quals, trim5, trim3):
    """
    uncompresses reads, renames reads, and converts quality scores 
    to 'sanger' format
    """
    # setup file iterators for input fastq files
    infhs = [open_compressed(f) for f in fastq_files]
    fqiters = [parse_lines(f, numlines=4) for f in infhs]
    # setup output files
    output_files = [(output_prefix + "_%d.fq" % (x + 1))
                    for x in xrange(len(fastq_files))]
    outfhs = [open(f, "w") for f in output_files]
    read_name_file = output_prefix + ".txt"
    read_name_fh = open(read_name_file, 'w')
    # get quality score conversion function
    qual_func = get_qual_conversion_func(quals)
    linenum = 1
    try:
        while True:
            pelines = [it.next() for it in fqiters]
            # get read1 first line of fq record, and remove "@" symbol
            read1_name = pelines[0][0][1:]
            # remove whitespace and/or read number tags /1 or /2
            read1_name = read1_name.split()[0].split("/")[0]
            # write to read name database
            print >> read_name_fh, read1_name
            # convert reads
            for i, lines in enumerate(pelines):
                # rename read using line number
                lines[0] = "@%d/%d" % (linenum, i + 1)
                # ignore redundant header
                lines[2] = "+"
                # trim read
                total_length = len(lines[1])
                pos3p = max(trim5 + 1, total_length - trim3)
                lines[1] = lines[1][trim5:pos3p]
                lines[3] = lines[3][trim5:pos3p]
                # convert quality score to sanger
                lines[3] = qual_func(lines[3])
                print >> outfhs[i], '\n'.join(lines)
            linenum += 1
    except StopIteration:
        pass
    except:
        logging.error("Unexpected error during FASTQ file processing")
        for fh in outfhs:
            fh.close()
        read_name_fh.close()
        for f in output_files:
            if os.path.exists(f):
                os.remove(f)
        if os.path.exists(read_name_file):
            os.remove(read_name_file)
        return config.JOB_ERROR
    # cleanup
    for fh in infhs:
        fh.close()
    for fh in outfhs:
        fh.close()
    read_name_fh.close()
    logging.debug("Inspected %d fragments" % (linenum))
    return config.JOB_SUCCESS
def process_input_reads(fastq_files, output_prefix, quals, trim5, trim3):
    """
    uncompresses reads, renames reads, and converts quality scores 
    to 'sanger' format
    """
    # setup file iterators for input fastq files
    infhs = [open_compressed(f) for f in fastq_files]
    fqiters = [parse_lines(f, numlines=4) for f in infhs]
    # setup output files
    output_files = [(output_prefix + "_%d.fq" % (x+1)) 
                    for x in xrange(len(fastq_files))]
    outfhs = [open(f, "w") for f in output_files]
    read_name_file = output_prefix + ".txt"
    read_name_fh = open(read_name_file, 'w')
    # get quality score conversion function
    qual_func = get_qual_conversion_func(quals)
    linenum = 1
    try:
        while True:
            pelines = [it.next() for it in fqiters]
            # get read1 first line of fq record, and remove "@" symbol
            read1_name = pelines[0][0][1:]
            # remove whitespace and/or read number tags /1 or /2
            read1_name = read1_name.split()[0].split("/")[0]
            # write to read name database
            print >>read_name_fh, read1_name
            # convert reads
            for i,lines in enumerate(pelines):
                # rename read using line number
                lines[0] = "@%d/%d" % (linenum,i+1)
                # ignore redundant header
                lines[2] = "+"
                # trim read
                total_length = len(lines[1])
                pos3p = max(trim5+1, total_length - trim3)
                lines[1] = lines[1][trim5:pos3p]
                lines[3] = lines[3][trim5:pos3p]
                # convert quality score to sanger
                lines[3] = qual_func(lines[3])
                print >>outfhs[i], '\n'.join(lines)
            linenum += 1
    except StopIteration:
        pass
    except:
        logging.error("Unexpected error during FASTQ file processing")
        for fh in outfhs:
            fh.close()
        read_name_fh.close()
        for f in output_files:
            if os.path.exists(f):
                os.remove(f)
        if os.path.exists(read_name_file):
            os.remove(read_name_file)
        return config.JOB_ERROR
    # cleanup
    for fh in infhs:
        fh.close()
    for fh in outfhs:
        fh.close()
    read_name_fh.close()
    logging.debug("Inspected %d fragments" % (linenum))
    return config.JOB_SUCCESS