def test_movel_specific_matches_general(self, conn): novel_specific = 'CDKN2A:p.T18888888888888888888M' matches = match.match_positional_variant(conn, novel_specific) names = {m['displayName'] for m in matches} assert matches assert novel_specific not in names assert 'CDKN2A mutation' in names
def test_match_explicit_references(self, conn): reference1 = conn.query({'target': 'Feature', 'filters': {'name': 'BCR'}})[0]['@rid'] reference2 = conn.query({'target': 'Feature', 'filters': {'name': 'ABL1'}})[0]['@rid'] matches = match.match_positional_variant( conn, 'fusion(e.13,e.3)', reference1=reference1, reference2=reference2 ) assert matches
def test_known_fusions(self, conn, known_variant, related_variants): matches = match.match_positional_variant(conn, known_variant) types_selected = [m['type']['name'] for m in matches] assert GENERAL_MUTATION not in types_selected names = {m['displayName'] for m in matches} assert matches assert known_variant in names for variant in related_variants: assert variant in names
def test_known_variants(self, conn, known_variant, related_variants, unrelated_variants): matches = match.match_positional_variant(conn, known_variant) names = {m['displayName'] for m in matches} assert matches assert known_variant in names for variant in related_variants: assert variant in names for variant in unrelated_variants: assert variant not in names
from graphkb import GraphKBConnection from graphkb.constants import BASE_RETURN_PROPERTIES, GENERIC_RETURN_PROPERTIES from graphkb.match import match_positional_variant from graphkb.util import convert_to_rid_list from graphkb.vocab import get_term_tree GKB_API_URL = 'https://pori-demo.bcgsc.ca/graphkb-api/api' GKB_USER = '******' GKB_PASSWORD = '******' graphkb_conn = GraphKBConnection(GKB_API_URL, use_global_cache=False) graphkb_conn.login(GKB_USER, GKB_PASSWORD) variant_name = 'KRAS:p.G12D' variant_matches = match_positional_variant(graphkb_conn, variant_name) for match in variant_matches: print(variant_name, 'will match', match['displayName']) # return properties should be customized to the users needs return_props = (BASE_RETURN_PROPERTIES + ['sourceId', 'source.name', 'source.displayName'] + [f'conditions.{p}' for p in GENERIC_RETURN_PROPERTIES] + [f'subject.{p}' for p in GENERIC_RETURN_PROPERTIES] + [f'evidence.{p}' for p in GENERIC_RETURN_PROPERTIES] + [f'relevance.{p}' for p in GENERIC_RETURN_PROPERTIES] + [f'evidenceLevel.{p}' for p in GENERIC_RETURN_PROPERTIES]) statements = graphkb_conn.query({ 'target': 'Statement', 'filters': {
def test_genomic_coordinates(self, conn): genomic = 'X:g.100611165A>T' match.match_positional_variant(conn, genomic)
def test_known_fusion_single_gene_no_match(self, conn): known = '(BCR,?):fusion(e.13,e.?)' matches = match.match_positional_variant(conn, known) assert not matches
def test_bad_gene2_name(self, conn): with pytest.raises(FeatureNotFoundError): match.match_positional_variant( conn, '(BCR,ME-AS-A-GENE):fusion(e.13,e.3)', )
def test_match_explicit_reference1(self, conn): reference1 = conn.query({'target': 'Feature', 'filters': {'name': 'KRAS'}})[0]['@rid'] matches = match.match_positional_variant(conn, 'p.G12D', reference1=reference1) assert matches
def test_uncertain_position_not_supported(self, conn): with pytest.raises(NotImplementedError): match.match_positional_variant( conn, '(BCR,ABL1):fusion(e.13_24,e.3)', )
def test_bad_gene_name(self, conn): with pytest.raises(FeatureNotFoundError): match.match_positional_variant( conn, 'ME-AS-A-GENE:p.G12D', )
def test_error_on_duplicate_reference2(self, conn): with pytest.raises(ValueError): match.match_positional_variant( conn, '(BCR,ABL1):fusion(e.13,e.3)', reference2='#123:34' )
def test_error_on_bad_reference2(self, conn): with pytest.raises(ValueError): match.match_positional_variant(conn, 'KRAS:p.G12D', reference2='#123:34')
def test_error_on_duplicate_reference1(self, conn): with pytest.raises(ValueError): match.match_positional_variant(conn, 'KRAS:p.G12D', '#123:34')
def annotate_variant(graphkb_conn: GraphKBConnection, raw_variant_name: str, include_unmatched: bool = False) -> List[Dict[str, str]]: results = [] variant_name = convert_aa_3to1(raw_variant_name) if 'c.*' in variant_name: results.append({ 'variant': raw_variant_name, 'error': f'skipping unsupported notation: {variant_name}' }) return results print(f'processing: {variant_name}') try: variant_matches = match_positional_variant(graphkb_conn, variant_name) except FeatureNotFoundError: if include_unmatched: results.append({'variant': raw_variant_name}) return results except Exception as err: results.append({'variant': raw_variant_name, 'error': str(err)}) return results if variant_matches: print(f'{variant_name} matches {len(variant_matches)} variant records') # return properties should be customized to the users needs return_props = (BASE_RETURN_PROPERTIES + ['sourceId', 'source.name', 'source.displayName'] + [f'conditions.{p}' for p in GENERIC_RETURN_PROPERTIES] + [f'subject.{p}' for p in GENERIC_RETURN_PROPERTIES] + [f'evidence.{p}' for p in GENERIC_RETURN_PROPERTIES] + [f'relevance.{p}' for p in GENERIC_RETURN_PROPERTIES] + [f'evidenceLevel.{p}' for p in GENERIC_RETURN_PROPERTIES] + ['reviewStatus']) statements = typing.cast( Statement, graphkb_conn.query({ 'target': 'Statement', 'filters': { 'conditions': convert_to_rid_list(variant_matches), 'operator': 'CONTAINSANY', }, 'returnProperties': return_props, }), ) if not statements: if include_unmatched: results.append({ 'variant_matches': ';'.join(sorted([v['displayName'] for v in variant_matches])), 'variant': raw_variant_name, }) return results print(f'{variant_name} matches {len(statements)} statements') for statement in statements: row = { 'variant_matches': ';'.join(sorted([v['displayName'] for v in variant_matches])), 'variant': raw_variant_name, 'statement.relevance': statement['relevance']['displayName'], 'statement.@rid': statement['@rid'], 'statement.subject': statement['subject']['displayName'], 'statement.source': statement['source']['displayName'] if statement['source'] else '', 'statement.evidence': ';'.join(sorted([e['displayName'] for e in statement['evidence']])), 'statement.conditions': ';'.join( sorted([e['displayName'] for e in statement['conditions']])), 'statement.evidence_level': ';'.join( sorted([ e['displayName'] for e in (statement['evidenceLevel'] or []) ])), 'statement.review_status': statement['reviewStatus'], 'is_therapeutic': bool(statement['relevance']['@rid'] in therapeutic_terms), } results.append(row) return results
def annotate_positional_variants( graphkb_conn: GraphKBConnection, variants: List[IprVariant], disease_name: str, show_progress: bool = False, ) -> List[KbMatch]: """ Annotate variant calls with information from GraphKB and return these annotations in the IPR alterations format Args: graphkb_conn (GraphKBConnection): the graphkb api connection object variants (list.<dict>): list of variants. Defaults to []. Returns: list of kbMatches records for IPR """ errors = 0 alterations = [] problem_genes = set() iterfunc = progressbar if show_progress else iter for row in iterfunc(variants): variant = row['variant'] if not row.get('gene', '') and (not row.get('gene1', '') or not row.get('gene2', '')): # https://www.bcgsc.ca/jira/browse/GERO-56?focusedCommentId=1234791&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-1234791 # should not match single gene SVs continue try: matches = match_positional_variant(graphkb_conn, variant) for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name): new_row = KbMatch({'variant': row['key'], 'variantType': row['variantType']}) new_row.update(ipr_row) alterations.append(new_row) except FeatureNotFoundError as err: logger.debug(f'failed to match positional variants ({variant}): {err}') errors += 1 if 'gene' in row: problem_genes.add(row['gene']) elif 'gene1' in row and f"({row['gene1']})" in str(err): problem_genes.add(row['gene1']) elif 'gene2' in row and f"({row['gene2']})" in str(err): problem_genes.add(row['gene2']) elif 'gene1' in row and 'gene2' in row: problem_genes.add(row['gene1']) problem_genes.add(row['gene2']) else: raise err except HTTPError as err: errors += 1 logger.error(f'failed to match positional variants ({variant}): {err}') if problem_genes: logger.error(f'gene finding failures for {sorted(problem_genes)}') logger.error(f'{len(problem_genes)} gene finding failures for positional variants') if errors: logger.error(f'skipped {errors} positional variants due to errors') logger.info( f'matched {len(variants)} positional variants to {len(alterations)} graphkb annotations' ) return alterations