def test_indel_vocabulary(conn):
    result = vocab.get_term_tree(conn, 'indel')

    names = {row['name'] for row in result}
    assert 'indel' in names
    assert 'copy variant' not in names
    assert 'copy number variant' not in names
def test_expression_down(conn):
    result = vocab.get_term_tree(conn, BASE_REDUCED_EXPRESSION)

    names = [row['name'] for row in result]
    assert BASE_EXPRESSION in names
    assert BASE_REDUCED_EXPRESSION in names
    assert BASE_INCREASED_EXPRESSION not in names
    assert 'increased rna expression' not in names
    assert 'reduced rna expression' in names
Beispiel #3
0
def get_terms_set(graphkb_conn: GraphKBConnection,
                  base_terms: List[str]) -> Set[str]:
    terms = set()
    for base_term in base_terms:
        terms.update(
            convert_to_rid_set(
                get_term_tree(graphkb_conn,
                              base_term,
                              include_superclasses=False)))
    return terms
Beispiel #4
0
        'operator': 'CONTAINSANY'
    },
    'returnProperties': return_props,
})

for statement in statements[:5]:
    print(
        statement['relevance']['displayName'],
        statement['subject']['displayName'],
        statement['source']['displayName'] if statement['source'] else '',
    )

BASE_THERAPEUTIC_TERMS = 'therapeutic efficacy'

therapeutic_terms = get_term_tree(graphkb_conn,
                                  BASE_THERAPEUTIC_TERMS,
                                  include_superclasses=False)

print(f'\nFound {len(therapeutic_terms)} equivalent terms')

for term in therapeutic_terms:
    print('-', term['name'])
print()

statements = graphkb_conn.query({
    'target': 'Statement',
    'filters': {
        'AND': [
            {
                'conditions': convert_to_rid_list(variant_matches),
                'operator': 'CONTAINSANY'
Beispiel #5
0
def summarize(
    graphkb_conn: GraphKBConnection,
    matches: Sequence[KbMatch],
    disease_name: str,
    variants: List[IprVariant],
) -> str:
    """
    Given a list of GraphKB matches generate a text summary to add to the report
    """
    templates: Dict[str, List[Statement]] = {}
    statements: Dict[str, Statement] = {}
    variants_by_keys = {v['key']: v for v in variants}
    variant_keys_by_statement_ids: Dict[str, Set[str]] = {}

    for match in matches:
        rid = match['kbStatementId']
        exp_variant = match['variant']
        variant_keys_by_statement_ids.setdefault(rid, set()).add(exp_variant)

    exp_variants_by_statements: Dict[str, List[IprVariant]] = {}
    for rid, keys in variant_keys_by_statement_ids.items():
        exp_variants_by_statements[rid] = [
            variants_by_keys[key] for key in keys
        ]

    disease_matches = convert_to_rid_set(
        get_term_tree(graphkb_conn, disease_name, ontology_class='Disease'))

    # get details for statements
    for match in matches:
        rid = match['kbStatementId'].replace('#', '')
        result = graphkb_conn.request(
            f'/statements/{rid}?neighbors=1')['result']

        templates.setdefault(result['displayNameTemplate'], []).append(result)
        statements[result['@rid']] = result

    # aggregate similar sentences
    sentences = {}
    for template, group in templates.items():
        sentences.update(
            aggregate_statements(graphkb_conn, template, group,
                                 disease_matches))

    # section statements by genes
    statements_by_genes = section_statements_by_genes(
        graphkb_conn, list(statements.values()))

    output: List[str] = [
        '<h3>The comments below were automatically generated from matches to GraphKB and have not been manually reviewed</h3>'
    ]

    for section, statement_rids in sorted(statements_by_genes.items(),
                                          key=lambda x: len(x[1]),
                                          reverse=True):
        exp_variants = {}
        for variant_list in [
                exp_variants_by_statements[r] for r in statement_rids
        ]:
            for variant in variant_list:
                exp_variants[variant['key']] = variant

        output.append(
            create_section_html(
                graphkb_conn,
                section,
                {r: sentences[r]
                 for r in statement_rids},
                {r: statements[r]
                 for r in statement_rids},
                list(exp_variants.values()),
            ))

    return '\n'.join(output)
Beispiel #6
0
def convert_statements_to_alterations(
    graphkb_conn: GraphKBConnection,
    statements: List[Statement],
    disease_name: str,
    variant_matches: Iterable[str],
) -> List[KbMatch]:
    """
    Given a set of statements matched from graphkb, convert these into their IPR equivalent representations

    Args:
        graphkb_conn: the graphkb connection object
        statements: list of statement records from graphkb
        disease_name: name of the cancer type for the patient being reported on
        variant_matches: the list of RIDs the variant matched for these statements

    Raises:
        ValueError: could not find the disease type in GraphKB

    Returns:
        IPR graphkb row representations

    Notes:
        - only report disease matched prognostic markers https://www.bcgsc.ca/jira/browse/GERO-72 and GERO-196
    """
    disease_matches = {
        r['@rid']
        for r in get_term_tree(
            graphkb_conn, disease_name, ontology_class='Disease')
    }

    if not disease_matches:
        raise ValueError(
            f'failed to match disease ({disease_name}) to graphkb')

    rows = []

    approved = convert_to_rid_set(get_approved_evidence_levels(graphkb_conn))

    for statement in statements:
        variants = [
            c for c in statement['conditions']
            if c['@class'] in VARIANT_CLASSES
        ]
        diseases = [
            c for c in statement['conditions'] if c['@class'] == 'Disease'
        ]
        pmid = ';'.join([e['displayName'] for e in statement['evidence']])

        relevance_id = statement['relevance']['@rid']

        approved_therapy = False

        disease_match = len(
            diseases) == 1 and diseases[0]['@rid'] in disease_matches

        ipr_section = categorize_relevance(graphkb_conn, relevance_id)

        if ipr_section == 'therapeutic':
            for level in statement['evidenceLevel'] or []:
                if level['@rid'] in approved:
                    approved_therapy = True
                    break
        if ipr_section == 'prognostic' and not disease_match:
            continue  # GERO-72 / GERO-196

        for variant in variants:
            if variant['@rid'] not in variant_matches:
                continue
            row = KbMatch({
                'approvedTherapy':
                approved_therapy,
                'category':
                ipr_section or 'unknown',
                'context': (statement['subject']['displayName']
                            if statement['subject'] else None),
                'kbContextId': (statement['subject']['@rid']
                                if statement['subject'] else None),
                'disease':
                ';'.join(sorted(d['displayName'] for d in diseases)),
                'evidenceLevel':
                display_evidence_levels(statement),
                'kbStatementId':
                statement['@rid'],
                'kbVariant':
                variant['displayName'],
                'kbVariantId':
                variant['@rid'],
                'matchedCancer':
                disease_match,
                'reference':
                pmid,
                'relevance':
                statement['relevance']['displayName'],
                'kbRelevanceId':
                statement['relevance']['@rid'],
                'externalSource':
                statement['source']['displayName']
                if statement['source'] else None,
                'externalStatementId':
                statement.get('sourceId'),
            })
            rows.append(row)
    return rows
def test_expression_vocabulary(conn):
    result = vocab.get_term_tree(conn, BASE_EXPRESSION)

    names = [row['name'] for row in result]
    assert BASE_EXPRESSION in names
    assert 'increased rna expression' in names