예제 #1
0
def filter_snvs(in_fhand,
                out_fhand,
                filters,
                filtered_fhand=None,
                log_fhand=None,
                reader_kwargs=None):
    '''It filters an input vcf.

    The input fhand has to be uncompressed. The original file could be a
    gzipped file, but in that case it has to be opened with gzip.open before
    sending it to this function.
    '''
    if reader_kwargs is None:
        reader_kwargs = {}
    # The input fhand to this function cannot be compressed
    reader_kwargs.update({
        'compressed': False,
        'filename': 'pyvcf_bug_workaround'
    })

    reader = VCFReader(in_fhand, **reader_kwargs)

    template_reader = VCFReader(StringIO(reader.header))
    writer = VCFWriter(out_fhand, template_reader=template_reader)
    if filtered_fhand:
        filtered_writer = VCFWriter(filtered_fhand,
                                    template_reader=template_reader)
    else:
        filtered_writer = None

    packets = group_in_filter_packets(reader.parse_snvs(),
                                      SNPS_PER_FILTER_PACKET)
    tot_snps = 00.01
    passed_snps = OrderedDict()
    broken_pipe = False
    for packet in packets:
        tot_snps += len(packet[PASSED]) + len(packet[FILTERED_OUT])
        for filter_ in filters:
            packet = filter_(packet)
            filter_name = filter_.__class__.__name__
            if filter_name not in passed_snps:
                passed_snps[filter_name] = 0
            passed_snps[filter_name] += len(packet[PASSED])

        for snv in packet[PASSED]:
            if not _safe_write_snv(writer, snv):
                broken_pipe = True
                break
        if filtered_writer:
            for snv in packet[FILTERED_OUT]:
                if not _safe_write_snv(filtered_writer, snv):
                    broken_pipe = True
                    break
        if broken_pipe:
            break

    if log_fhand:
        _write_log(log_fhand, tot_snps, passed_snps)

    writer.flush()
예제 #2
0
 def test_vcf_writer(self):
     varscan = open(join(TEST_DATA_DIR, 'vari_filter.vcf'))
     reader = VCFReader(fhand=varscan)
     out_fhand = NamedTemporaryFile()
     writer = VCFWriter(out_fhand, reader)
     for snv in reader.parse_snvs():
         writer.write_snv(snv)
     writer.flush()
     assert 'CUUC00027_TC01' in open(out_fhand.name).read()
     writer.close()
예제 #3
0
def filter_snvs(in_fhand, out_fhand, filters, filtered_fhand=None,
                log_fhand=None, reader_kwargs=None):
    '''It filters an input vcf.

    The input fhand has to be uncompressed. The original file could be a
    gzipped file, but in that case it has to be opened with gzip.open before
    sending it to this function.
    '''
    if reader_kwargs is None:
        reader_kwargs = {}
    # The input fhand to this function cannot be compressed
    reader_kwargs.update({'compressed': False,
                         'filename': 'pyvcf_bug_workaround'})

    reader = VCFReader(in_fhand, **reader_kwargs)

    template_reader = VCFReader(StringIO(reader.header))
    writer = VCFWriter(out_fhand, template_reader=template_reader)
    if filtered_fhand:
        filtered_writer = VCFWriter(filtered_fhand,
                                    template_reader=template_reader)
    else:
        filtered_writer = None

    packets = group_in_filter_packets(reader.parse_snvs(),
                                      SNPS_PER_FILTER_PACKET)
    tot_snps = 00.01
    passed_snps = OrderedDict()
    broken_pipe = False
    for packet in packets:
        tot_snps += len(packet[PASSED]) + len(packet[FILTERED_OUT])
        for filter_ in filters:
            packet = filter_(packet)
            filter_name = filter_.__class__.__name__
            if filter_name not in passed_snps:
                passed_snps[filter_name] = 0
            passed_snps[filter_name] += len(packet[PASSED])

        for snv in packet[PASSED]:
            if not _safe_write_snv(writer, snv):
                broken_pipe = True
                break
        if filtered_writer:
            for snv in packet[FILTERED_OUT]:
                if not _safe_write_snv(filtered_writer, snv):
                    broken_pipe = True
                    break
        if broken_pipe:
            break

    if log_fhand:
        _write_log(log_fhand, tot_snps, passed_snps)

    writer.flush()