예제 #1
0
    def test_build38_exact_search_variants(self, v600e):
        query = CoordinateQuery('7', 140753336, 140753336, 'T', 'A', 'GRCh38')
        search_results = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        assert len(search_results) == 1
        assert search_results[0] == v600e

        query = CoordinateQuery('7', 140753336, 140753337, 'TT', 'AC',
                                'GRCh38')
        search_results = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        assert len(search_results) == 1
        assert search_results[0].id == 563

        query = CoordinateQuery('3', 10146548, 10146549, 'C', None, 'GRCh38')
        search_results = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        assert len(search_results) == 1
        assert search_results[0].id == 1918

        query = CoordinateQuery('3', 10146618, 10146618, None, 'G', 'GRCh38')
        search_results = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        assert len(search_results) == 1
        assert search_results[0].id == 2042
예제 #2
0
def annotate_vcf(input_vcf, output_vcf, reference, include_status):
    """Annotate a VCF with information from CIViC"""
    reader = vcfpy.Reader.from_path(input_vcf)
    new_header = reader.header.copy()
    new_header.add_info_line(
        OrderedDict([('ID', 'CIVIC'), ('Number', '.'), ('Type', 'String'),
                     ('Description', VCFWriter.CSQ_DESCRIPTION)]))
    writer = vcfpy.Writer.from_path(output_vcf, new_header)
    for entry in reader:
        for alt in entry.ALT:
            position = entry.POS
            ref = entry.REF
            alt = alt.value
            if len(ref) == 1 and len(alt) == 1:
                start = position
                end = position
            else:
                if len(ref) == len(alt):
                    start = position
                    end = position + len(ref) - 1
                else:
                    alt = alt[1:]
                    ref = ref[1:]
                    if len(ref) > len(alt):
                        start = position + 1
                        end = start + len(ref) - 1
                        if alt == '':
                            alt = None
                    else:
                        start = position
                        if ref == '':
                            ref = None
                            end = start + 1
                        else:
                            end = start + len(ref) - 1
            query = CoordinateQuery(entry.CHROM, start, end, alt, ref,
                                    reference)
            variants = civic.search_variants_by_coordinates(
                query, search_mode='exact')
            if variants is not None:
                if len(variants) == 1:
                    csq = variants[0].csq(include_status)
                    if len(csq) > 0:
                        entry.INFO['CIVIC'] = variants[0].csq(include_status)
                elif len(variants) > 1:
                    print(
                        "More than one variant found for start {} stop {} ref {} alt {}. CIViC Variants IDs: {}"
                        .format(
                            start, end, ref, alt,
                            ",".join(list(map(lambda v: str(v.id),
                                              variants)))))
            writer.write_record(entry)
    writer.close()
    reader.close()
예제 #3
0
 def test_errors(self):
     with pytest.raises(ValueError) as context:
         query = CoordinateQuery('7', 140453136, 140453136, 'T', 'A')
         variants_single = civic.search_variants_by_coordinates(
             query, search_mode='wrong_mode')
     assert "unexpected search mode" in str(context.value)
     with pytest.raises(ValueError) as context:
         query = CoordinateQuery('7', 140753336, 140753336, '*', 'A',
                                 'GRCh38')
         search_results = civic.search_variants_by_coordinates(
             query, search_mode='exact')
     assert "Can't use wildcard when searching for non-GRCh37 coordinates" in str(
         context.value)
     with pytest.raises(ValueError) as context:
         query = CoordinateQuery('7', 140753336, 140753336, None, None,
                                 'GRCh38')
         search_results = civic.search_variants_by_coordinates(
             query, search_mode='exact')
     assert "alt or ref required for non-GRCh37 coordinate queries" in str(
         context.value)
     with pytest.raises(ValueError) as context:
         query = CoordinateQuery('7', 140753336, 140753336, 'T', 'A',
                                 'GRCh38')
         search_results = civic.search_variants_by_coordinates(
             query, search_mode='any')
     assert "Only exact search mode is supported for non-GRCh37 coordinate queries" in str(
         context.value)
     with pytest.raises(ValueError) as context:
         query = CoordinateQuery('7', 140453136, 140453136, '-', 'A')
         variants_single = civic.search_variants_by_coordinates(
             query, search_mode='exact')
     assert "Unexpected alt `-` in coordinate query. Did you mean `None`?" in str(
         context.value)
     with pytest.raises(ValueError) as context:
         query = CoordinateQuery('7', 140453136, 140453136, 'T', '-')
         variants_single = civic.search_variants_by_coordinates(
             query, search_mode='exact')
     assert "Unexpected ref `-` in coordinate query. Did you mean `None`?" in str(
         context.value)
     with pytest.raises(ValueError) as context:
         query = CoordinateQuery('7', 140453136, 140453136, '-', 'A',
                                 'GRCh38')
         variants_single = civic.search_variants_by_coordinates(
             query, search_mode='exact')
     assert "Unexpected alt `-` in coordinate query. Did you mean `None`?" in str(
         context.value)
     with pytest.raises(ValueError) as context:
         query = CoordinateQuery('7', 140453136, 140453136, 'T', '-',
                                 'GRCh38')
         variants_single = civic.search_variants_by_coordinates(
             query, search_mode='exact')
     assert "Unexpected ref `-` in coordinate query. Did you mean `None`?" in str(
         context.value)
     with pytest.raises(ValueError) as context:
         query = CoordinateQuery('7', 140453136, 140453136, '-', 'A')
         variants_bulk = civic.bulk_search_variants_by_coordinates(
             [query], search_mode='exact')
     assert "Unexpected alt `-` in coordinate query. Did you mean `None`?" in str(
         context.value)
     with pytest.raises(ValueError) as context:
         query = CoordinateQuery('7', 140453136, 140453136, 'T', '-')
         variants_bulk = civic.bulk_search_variants_by_coordinates(
             [query], search_mode='exact')
     assert "Unexpected ref `-` in coordinate query. Did you mean `None`?" in str(
         context.value)
예제 #4
0
 def test_build36_exact_search_variants(self, v600e):
     query = CoordinateQuery('7', 140099605, 140099605, 'T', 'A', 'NCBI36')
     search_results = civic.search_variants_by_coordinates(
         query, search_mode='exact')
     assert len(search_results) == 1
     assert search_results[0] == v600e
예제 #5
0
    def test_single_and_bulk_exact_return_same_variants(self):
        query = CoordinateQuery('7', 140453136, 140453136, 'T', '*')
        variants_single = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        variants_bulk = civic.bulk_search_variants_by_coordinates(
            [query], search_mode='exact')
        assert len(variants_single) == 1
        assert len(variants_bulk[query]) == 1
        assert hash(variants_single[0]) == variants_bulk[query][0].v_hash

        query = CoordinateQuery('7', 140453136, 140453136, 'T', 'A')
        variants_single = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        variants_bulk = civic.bulk_search_variants_by_coordinates(
            [query], search_mode='exact')
        assert len(variants_single) == 1
        assert len(variants_bulk[query]) == 1
        assert hash(variants_single[0]) == variants_bulk[query][0].v_hash

        query = CoordinateQuery('7', 140453136, 140453136, 'T', None)
        variants_single = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        variants_bulk = civic.bulk_search_variants_by_coordinates(
            [query], search_mode='exact')
        assert len(variants_single) == 0
        assert len(variants_bulk) == 0

        query = CoordinateQuery('7', 140453136, 140453137, 'TT', '*')
        variants_single = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        variants_bulk = civic.bulk_search_variants_by_coordinates(
            [query], search_mode='exact')
        assert len(variants_single) == 1
        assert len(variants_bulk[query]) == 1
        assert hash(variants_single[0]) == variants_bulk[query][0].v_hash

        query = CoordinateQuery('7', 140453136, 140453137, 'TT', 'AC')
        variants_single = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        variants_bulk = civic.bulk_search_variants_by_coordinates(
            [query], search_mode='exact')
        assert len(variants_single) == 1
        assert len(variants_bulk[query]) == 1
        assert hash(variants_single[0]) == variants_bulk[query][0].v_hash

        query = CoordinateQuery('7', 140453136, 140453137, 'TT', None)
        variants_single = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        variants_bulk = civic.bulk_search_variants_by_coordinates(
            [query], search_mode='exact')
        assert len(variants_single) == 0
        assert len(variants_bulk) == 0

        query = CoordinateQuery('3', 10183706, 10183706, None, 'C')
        variants_single = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        variants_bulk = civic.bulk_search_variants_by_coordinates(
            [query], search_mode='exact')
        assert len(variants_single) == 1
        assert len(variants_bulk[query]) == 1
        assert hash(variants_single[0]) == variants_bulk[query][0].v_hash

        query = CoordinateQuery('3', 10183706, 10183706, 'T', 'C')
        variants_single = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        variants_bulk = civic.bulk_search_variants_by_coordinates(
            [query], search_mode='exact')
        assert len(variants_single) == 1
        assert len(variants_bulk[query]) == 1
        assert hash(variants_single[0]) == variants_bulk[query][0].v_hash

        query = CoordinateQuery('3', 10183706, 10183706, '*', 'C')
        variants_single = civic.search_variants_by_coordinates(
            query, search_mode='exact')
        variants_bulk = civic.bulk_search_variants_by_coordinates(
            [query], search_mode='exact')
        variants_single = list(map(lambda v: hash(v), variants_single))
        variants_bulk = list(map(lambda v: v.v_hash, variants_bulk[query]))
        assert len(variants_single) == 2
        assert len(variants_bulk) == 2
        assert sorted(variants_single) == sorted(variants_bulk)
        assert sorted(variants_single) == sorted(variants_bulk)