Example #1
0
 def test_modern(self):
     """Testing IlluminaFastqRecord"""
     fil = os.path.join(os.path.dirname(__file__),
                        'etc/fake_IlluminaSequenceIdentifier.fastq')
     m = infer_fastq_type(fil)
     expected = ('IlluminaFastqRecord', IlluminaFastqRecord)
     self.assertEqual(expected, m)
Example #2
0
 def test_nobarcode(self):
     """Testing IlluminaNoBarcodeFastqRecord"""
     fil = os.path.join(
         os.path.dirname(__file__),
         "etc/fake_IlluminaSequenceIdentifierNoBarcode.fastq",
     )
     m = infer_fastq_type(fil)
     expected = ("IlluminaNoBarcodeFastqRecord",
                 IlluminaNoBarcodeFastqRecord)
     self.assertEqual(expected, m)
Example #3
0
def process_fastq(
    input_file, output_prefix, logger_name="fastq_processing", log_itvl=1000000
):
    """
    Processes the provided fastq file and splits into 1 or more separate
    readgroup level fastq files.

    :param input_file: input fastq file path
    :param output_prefix: output prefix
    :param loggger_name: name of the logger created
    :param log_itvl: print log every N records
    :return: a tuple containing dictionary of report and total counts
    """
    logger = get_logger(logger_name)
    ibase = os.path.basename(input_file)
    logger.info("Processing fastq: {0}".format(ibase))

    fq_cls = infer_fastq_type(input_file)
    logger.info("Inferred fastq class {0} in fastq {1}".format(fq_cls[0], ibase))

    reader = FastqReader(input_file, record_cls=fq_cls[1])
    count = 0
    writers = {}
    try:
        for record in reader:
            count += 1
            if count % log_itvl == 0:
                logger.info("Processed {0} records from {1}".format(count, ibase))

            key = record.read_key
            if key not in writers:
                logger.info("Found read key {0} in fastq {1}".format(key, ibase))
                writer = FastqWriterWithReport.from_record_and_prefix(
                    record, output_prefix
                )
                logger.info(
                    "Output file for read key {0} in fastq {1} is {2}".format(
                        key, ibase, writer.fname
                    )
                )
                writers[key] = writer
            writers[key] += record

    finally:
        reader.close()
        for key in writers:
            writers[key].close()

    logger.info(
        "Processed a total of {0} records from {1} and found {2} read keys".format(
            count, ibase, len(writers)
        )
    )

    return ({key: writers[key].reporter.to_dict() for key in writers}, count)
Example #4
0
    def test_unknown(self):
        """Testing raise exception"""
        fil = os.path.join(os.path.dirname(__file__), 'etc/fake_Unknown.fastq')

        with self.assertRaises(Exception):
            m = infer_fastq_type(fil)