Example #1
0
def organism_filter(
        fastq_r1, fastq_r2, filtered_fastq_r1, filtered_fastq_r2,
        detailed_metrics, summary_metrics, tempdir, cell_id, params,
        reference, docker_image=None, filter_contaminated_reads=False,
):
    # fastq screen tries to skip if files from old runs are available
    if os.path.exists(tempdir):
        shutil.rmtree(tempdir)

    helpers.makedirs(tempdir)

    tagged_fastq_r1, tagged_fastq_r2 = run_fastq_screen_paired_end(
        fastq_r1, fastq_r2, tempdir, params, docker_image=docker_image
    )

    reader = fastqutils.PairedTaggedFastqReader(tagged_fastq_r1, tagged_fastq_r2)
    counts = reader.gather_counts()

    write_detailed_counts(counts, detailed_metrics, cell_id)
    write_summary_counts(counts, summary_metrics, cell_id)

    if filter_contaminated_reads:
        filter_reads(
            tagged_fastq_r1, tagged_fastq_r2, filtered_fastq_r1,
            filtered_fastq_r2, reference
        )
    else:
        # use the full tagged fastq downstream
        # with organism type information in readname
        re_tag_reads(tagged_fastq_r1, filtered_fastq_r1)
        re_tag_reads(tagged_fastq_r2, filtered_fastq_r2)
Example #2
0
def organism_filter(fastq_r1, fastq_r2, filtered_fastq_r1, filtered_fastq_r2,
                    detailed_metrics, summary_metrics, tempdir, cell_id,
                    params):
    # fastq screen tries to skip if files from old runs are available
    if os.path.exists(tempdir):
        shutil.rmtree(tempdir)

    helpers.makedirs(tempdir)

    tagged_fastq_r1, tagged_fastq_r2 = run_fastq_screen_paired_end(
        fastq_r1,
        fastq_r2,
        tempdir,
        params,
    )

    reader = fastqutils.PairedTaggedFastqReader(tagged_fastq_r1,
                                                tagged_fastq_r2)
    counts = reader.gather_counts()

    write_detailed_counts(counts, detailed_metrics, cell_id, params)
    write_summary_counts(counts, summary_metrics, cell_id, params)

    utils.filter_tag_reads(tagged_fastq_r1, tagged_fastq_r2, filtered_fastq_r1,
                           filtered_fastq_r2, params)
def filter_reads(input_r1, input_r2, output_r1, output_r2, reference):
    reader = fastqutils.PairedTaggedFastqReader(input_r1, input_r2)

    with helpers.getFileHandle(output_r1,
                               'wt') as writer_r1, helpers.getFileHandle(
                                   output_r2, 'wt') as writer_r2:
        for read_1, read_2 in reader.filter_read_iterator(reference):

            read_1 = reader.add_tag_to_read_comment(read_1)
            read_2 = reader.add_tag_to_read_comment(read_2)

            for line in read_1:
                writer_r1.write(line)

            for line in read_2:
                writer_r2.write(line)
Example #4
0
def filter_tag_reads(input_r1, input_r2, output_r1, output_r2, params):
    genomes = [v['name'] for v in params['genomes']]

    if not params['filter_tags']:
        filter_tags = set()
    else:
        filter_tags = set(params['filter_tags'])

    reader = fastqutils.PairedTaggedFastqReader(input_r1, input_r2)

    with helpers.getFileHandle(output_r1,
                               'wt') as writer_r1, helpers.getFileHandle(
                                   output_r2, 'wt') as writer_r2:
        for read_1, read_2 in reader.filter_read_iterator(
                genomes, filter_tags):

            read_1 = reader.add_tag_to_read_comment(read_1)
            read_2 = reader.add_tag_to_read_comment(read_2)

            for line in read_1:
                writer_r1.write(line)

            for line in read_2:
                writer_r2.write(line)