def test_build38_exact_search_variants(self, v600e): query = CoordinateQuery('7', 140753336, 140753336, 'T', 'A', 'GRCh38') search_results = civic.search_variants_by_coordinates( query, search_mode='exact') assert len(search_results) == 1 assert search_results[0] == v600e query = CoordinateQuery('7', 140753336, 140753337, 'TT', 'AC', 'GRCh38') search_results = civic.search_variants_by_coordinates( query, search_mode='exact') assert len(search_results) == 1 assert search_results[0].id == 563 query = CoordinateQuery('3', 10146548, 10146549, 'C', None, 'GRCh38') search_results = civic.search_variants_by_coordinates( query, search_mode='exact') assert len(search_results) == 1 assert search_results[0].id == 1918 query = CoordinateQuery('3', 10146618, 10146618, None, 'G', 'GRCh38') search_results = civic.search_variants_by_coordinates( query, search_mode='exact') assert len(search_results) == 1 assert search_results[0].id == 2042
def annotate_vcf(input_vcf, output_vcf, reference, include_status): """Annotate a VCF with information from CIViC""" reader = vcfpy.Reader.from_path(input_vcf) new_header = reader.header.copy() new_header.add_info_line( OrderedDict([('ID', 'CIVIC'), ('Number', '.'), ('Type', 'String'), ('Description', VCFWriter.CSQ_DESCRIPTION)])) writer = vcfpy.Writer.from_path(output_vcf, new_header) for entry in reader: for alt in entry.ALT: position = entry.POS ref = entry.REF alt = alt.value if len(ref) == 1 and len(alt) == 1: start = position end = position else: if len(ref) == len(alt): start = position end = position + len(ref) - 1 else: alt = alt[1:] ref = ref[1:] if len(ref) > len(alt): start = position + 1 end = start + len(ref) - 1 if alt == '': alt = None else: start = position if ref == '': ref = None end = start + 1 else: end = start + len(ref) - 1 query = CoordinateQuery(entry.CHROM, start, end, alt, ref, reference) variants = civic.search_variants_by_coordinates( query, search_mode='exact') if variants is not None: if len(variants) == 1: csq = variants[0].csq(include_status) if len(csq) > 0: entry.INFO['CIVIC'] = variants[0].csq(include_status) elif len(variants) > 1: print( "More than one variant found for start {} stop {} ref {} alt {}. CIViC Variants IDs: {}" .format( start, end, ref, alt, ",".join(list(map(lambda v: str(v.id), variants))))) writer.write_record(entry) writer.close() reader.close()
def test_errors(self): with pytest.raises(ValueError) as context: query = CoordinateQuery('7', 140453136, 140453136, 'T', 'A') variants_single = civic.search_variants_by_coordinates( query, search_mode='wrong_mode') assert "unexpected search mode" in str(context.value) with pytest.raises(ValueError) as context: query = CoordinateQuery('7', 140753336, 140753336, '*', 'A', 'GRCh38') search_results = civic.search_variants_by_coordinates( query, search_mode='exact') assert "Can't use wildcard when searching for non-GRCh37 coordinates" in str( context.value) with pytest.raises(ValueError) as context: query = CoordinateQuery('7', 140753336, 140753336, None, None, 'GRCh38') search_results = civic.search_variants_by_coordinates( query, search_mode='exact') assert "alt or ref required for non-GRCh37 coordinate queries" in str( context.value) with pytest.raises(ValueError) as context: query = CoordinateQuery('7', 140753336, 140753336, 'T', 'A', 'GRCh38') search_results = civic.search_variants_by_coordinates( query, search_mode='any') assert "Only exact search mode is supported for non-GRCh37 coordinate queries" in str( context.value) with pytest.raises(ValueError) as context: query = CoordinateQuery('7', 140453136, 140453136, '-', 'A') variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') assert "Unexpected alt `-` in coordinate query. Did you mean `None`?" in str( context.value) with pytest.raises(ValueError) as context: query = CoordinateQuery('7', 140453136, 140453136, 'T', '-') variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') assert "Unexpected ref `-` in coordinate query. Did you mean `None`?" in str( context.value) with pytest.raises(ValueError) as context: query = CoordinateQuery('7', 140453136, 140453136, '-', 'A', 'GRCh38') variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') assert "Unexpected alt `-` in coordinate query. Did you mean `None`?" in str( context.value) with pytest.raises(ValueError) as context: query = CoordinateQuery('7', 140453136, 140453136, 'T', '-', 'GRCh38') variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') assert "Unexpected ref `-` in coordinate query. Did you mean `None`?" in str( context.value) with pytest.raises(ValueError) as context: query = CoordinateQuery('7', 140453136, 140453136, '-', 'A') variants_bulk = civic.bulk_search_variants_by_coordinates( [query], search_mode='exact') assert "Unexpected alt `-` in coordinate query. Did you mean `None`?" in str( context.value) with pytest.raises(ValueError) as context: query = CoordinateQuery('7', 140453136, 140453136, 'T', '-') variants_bulk = civic.bulk_search_variants_by_coordinates( [query], search_mode='exact') assert "Unexpected ref `-` in coordinate query. Did you mean `None`?" in str( context.value)
def test_build36_exact_search_variants(self, v600e): query = CoordinateQuery('7', 140099605, 140099605, 'T', 'A', 'NCBI36') search_results = civic.search_variants_by_coordinates( query, search_mode='exact') assert len(search_results) == 1 assert search_results[0] == v600e
def test_single_and_bulk_exact_return_same_variants(self): query = CoordinateQuery('7', 140453136, 140453136, 'T', '*') variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') variants_bulk = civic.bulk_search_variants_by_coordinates( [query], search_mode='exact') assert len(variants_single) == 1 assert len(variants_bulk[query]) == 1 assert hash(variants_single[0]) == variants_bulk[query][0].v_hash query = CoordinateQuery('7', 140453136, 140453136, 'T', 'A') variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') variants_bulk = civic.bulk_search_variants_by_coordinates( [query], search_mode='exact') assert len(variants_single) == 1 assert len(variants_bulk[query]) == 1 assert hash(variants_single[0]) == variants_bulk[query][0].v_hash query = CoordinateQuery('7', 140453136, 140453136, 'T', None) variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') variants_bulk = civic.bulk_search_variants_by_coordinates( [query], search_mode='exact') assert len(variants_single) == 0 assert len(variants_bulk) == 0 query = CoordinateQuery('7', 140453136, 140453137, 'TT', '*') variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') variants_bulk = civic.bulk_search_variants_by_coordinates( [query], search_mode='exact') assert len(variants_single) == 1 assert len(variants_bulk[query]) == 1 assert hash(variants_single[0]) == variants_bulk[query][0].v_hash query = CoordinateQuery('7', 140453136, 140453137, 'TT', 'AC') variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') variants_bulk = civic.bulk_search_variants_by_coordinates( [query], search_mode='exact') assert len(variants_single) == 1 assert len(variants_bulk[query]) == 1 assert hash(variants_single[0]) == variants_bulk[query][0].v_hash query = CoordinateQuery('7', 140453136, 140453137, 'TT', None) variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') variants_bulk = civic.bulk_search_variants_by_coordinates( [query], search_mode='exact') assert len(variants_single) == 0 assert len(variants_bulk) == 0 query = CoordinateQuery('3', 10183706, 10183706, None, 'C') variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') variants_bulk = civic.bulk_search_variants_by_coordinates( [query], search_mode='exact') assert len(variants_single) == 1 assert len(variants_bulk[query]) == 1 assert hash(variants_single[0]) == variants_bulk[query][0].v_hash query = CoordinateQuery('3', 10183706, 10183706, 'T', 'C') variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') variants_bulk = civic.bulk_search_variants_by_coordinates( [query], search_mode='exact') assert len(variants_single) == 1 assert len(variants_bulk[query]) == 1 assert hash(variants_single[0]) == variants_bulk[query][0].v_hash query = CoordinateQuery('3', 10183706, 10183706, '*', 'C') variants_single = civic.search_variants_by_coordinates( query, search_mode='exact') variants_bulk = civic.bulk_search_variants_by_coordinates( [query], search_mode='exact') variants_single = list(map(lambda v: hash(v), variants_single)) variants_bulk = list(map(lambda v: v.v_hash, variants_bulk[query])) assert len(variants_single) == 2 assert len(variants_bulk) == 2 assert sorted(variants_single) == sorted(variants_bulk) assert sorted(variants_single) == sorted(variants_bulk)