Esempio n. 1
0
def convert(gene_data):
    """ given gene data from civic, convert it to ga4gh """
    try:
        variants = gene_data['civic']['variants']
        for variant in variants:
            feature = {}
            feature['geneSymbol'] = variant['entrez_name']
            feature['entrez_id'] = variant['entrez_id']
            feature['start'] = variant['coordinates']['start']
            feature['end'] = variant['coordinates']['stop']
            feature['referenceName'] = str(
                variant['coordinates']['reference_build'])  # NOQA
            feature['chromosome'] = str(variant['coordinates']['chromosome'])
            feature['name'] = variant['name']
            for evidence_item in variant['evidence_items']:
                association = {}
                association['description'] = evidence_item['description']
                association['environmentalContexts'] = []
                environmentalContexts = association['environmentalContexts']
                for drug in evidence_item['drugs']:
                    environmentalContexts.append({
                        'description':
                        drug['name'],
                        'pubchem_id':
                        drug['pubchem_id']
                    })
                association['phenotype'] = {
                    'description': evidence_item['disease']['name'],
                    'id': evidence_item['disease']['url']
                }
                association['evidence'] = {
                    "evidenceType": {
                        "sourceName": "CIVIC",
                        "id": '{}'.format(evidence_item['id'])
                    },
                    'description': evidence_item['clinical_significance'],
                    'info': {
                        'publications':
                        [evidence_item['source']['source_url']]
                    }
                }
                # add summary fields for Display
                association = el.evidence_label(evidence_item['description'],
                                                association,
                                                na=True)
                association = ed.evidence_direction(
                    evidence_item['clinical_significance'], association)
                association['publication_url'] = evidence_item['source'][
                    'source_url'],  # NOQA
                if len(evidence_item['drugs']) > 0:
                    association['drug_labels'] = ','.join([
                        drug['name'] for drug in evidence_item['drugs']
                    ])  # NOQA
                # create snapshot of original data
                v = copy.deepcopy(variant)
                del v['evidence_items']
                v['evidence_items'] = [evidence_item]
                feature_association = {
                    'gene': gene_data['gene'],
                    'feature': feature,
                    'association': association,
                    'source': 'civic',
                    'civic': v
                }
                yield feature_association
    except Exception as e:
        print 'CIVIC', gene_data['gene'], e
Esempio n. 2
0
def convert(interpretation):
    """create feature_association from pmkb evidence"""
    for variant in interpretation['variants']:
        if 'coordinates' in variant:
            # '7:140453135-140453136'
            # '3:41266097-41266099, 3:41266100-41266102, 3:41266103-41266105, 3:41266106-41266108, 3:41266109-41266111, 3:41266112-41266114, 3:41266124-41266126, 3:41266136-41266138'  # NOQA
            s = variant['coordinates']
            if not s:
                continue
            coordinates = s.replace(' ', '').split(',')
            for coordinate in coordinates:
                feature = {}
                feature['geneSymbol'] = variant['gene']['name']
                feature['name'] = variant['name']
                a = coordinate.split(':')
                chromosome = a[0]
                start, stop = a[1].split('-')
                feature['start'] = start
                feature['end'] = stop
                feature['chromosome'] = str(chromosome)
                feature['referenceName'] = 'GRCh37/hg19'
                attributes = {}
                for key in variant.keys():
                    if key not in ['coordinates', 'name', 'gene']:
                        attributes[key] = {'string_value': variant[key]}
                feature['attributes'] = attributes

                gene = variant['gene']['name']

                association = {}

                # association['evidence_label'] = interpretation['tier']
                association = el.evidence_label(str(interpretation['tier']),
                                                association,
                                                na=True)
                association = ed.evidence_direction(str(
                    interpretation['tier']),
                                                    association,
                                                    na=True)

                association['description'] = interpretation['interpretation']
                # TODO pmkb does not break out drug !?!?
                # association['environmentalContexts'] = []

                for tumor in interpretation['tumors']:
                    association['phenotype'] = {'description': tumor['name']}

                    association['evidence'] = [{
                        "evidenceType": {
                            "sourceName": "pmkb"
                        },
                        'description':
                        interpretation['tier'],
                        'info': {
                            'publications': [[
                                'http://www.ncbi.nlm.nih.gov/pubmed/{}'.format(
                                    c['pmid'])
                                for c in interpretation['citations']
                            ]  # NOQA
                                             ]
                        }
                    }]
                    # add summary fields for Display
                    if len(interpretation['citations']) > 0:
                        association[
                            'publication_url'] = 'http://www.ncbi.nlm.nih.gov/pubmed/{}'.format(
                                interpretation['citations'][0]['pmid'])
                    feature_association = {
                        'gene': gene,
                        'feature': feature,
                        'association': association,
                        'source': 'pmkb',
                        'pmkb': {
                            'variant': variant,
                            'tumor': tumor,
                            'tissues': interpretation['tissues']
                        }
                    }
                    yield feature_association
Esempio n. 3
0
def convert(gene_data):
    gene = gene_data['gene']
    oncokb = {'clinical': []}
    if 'oncokb' in gene_data:
        oncokb = gene_data['oncokb']
    for clinical in oncokb['clinical']:
        variant = clinical['variant']
        alteration = variant['alteration']
        gene_data = variant['gene']
        feature = {}
        feature['geneSymbol'] = gene
        feature['name'] = variant['name']
        feature['entrez_id'] = gene_data['entrezGeneId']

        # Look up variant and add position information.
        matches = LOOKUP_TABLE.get_entries(gene, alteration)
        if len(matches) > 0:
            # FIXME: just using the first match for now;
            # it's not clear what to do if there are multiple matches.
            match = matches[0]
            feature['chromosome'] = str(match['chrom'])
            feature['start'] = match['start']
            feature['end'] = match['end']
            feature['ref'] = match['ref']
            feature['alt'] = match['alt']
            feature['referenceName'] = str(match['build'])

        association = {}
        association['description'] = clinical['level_label']
        association['environmentalContexts'] = []
        for drug in clinical['drug']:
            association['environmentalContexts'].append({'description': drug})
        association['phenotype'] = {
            'description': clinical['cancerType']['mainType']['name'],
            'id': '{}'.format(clinical['cancerType']['mainType']['id'])
        }
        association['evidence'] = [{
            "evidenceType": {
                "sourceName":
                "oncokb",
                "id":
                '{}-{}'.format(gene, clinical['cancerType']['mainType']['id'])
            },
            'description': clinical['level'],
            'info': {
                'publications': [[
                    drugAbstracts['link']
                    for drugAbstracts in clinical['drugAbstracts']
                ]]
            }
        }]
        # add summary fields for Display
        association['evidence_label'] = clinical['level_label']

        association = el.evidence_label(clinical['level_label'],
                                        association,
                                        na=True)
        association = ed.evidence_direction(clinical['level_label'],
                                            association,
                                            na=True)

        if len(clinical['drugAbstracts']) > 0:
            association['publication_url'] = clinical['drugAbstracts'][0][
                'link']
        else:
            for drugPmid in clinical['drugPmids']:
                association[
                    'publication_url'] = 'http://www.ncbi.nlm.nih.gov/pubmed/{}'.format(
                        drugPmid)
                break

        association['drug_labels'] = ','.join(
            [drug for drug in clinical['drug']])
        feature_association = {
            'gene': gene,
            'feature': feature,
            'association': association,
            'source': 'oncokb',
            'oncokb': {
                'clinical': clinical
            }
        }
        yield feature_association
def convert(evidence):
    """
    ['Primary Tumor type', 'Drug family', 'Alteration type', 'Targeting',
    'Assay type', 'Evidence level', 'Biomarker', 'Drug', 'Alteration',
    'Source', 'Curator', 'Comments', 'Drug status', 'Drug full name',
    'TCGI included', 'Curation date', 'Gene', 'Metastatic Tumor Type',
    'Association']
    {'Primary Tumor type': 'GIST', 'Drug family': '[HSP90 inhibitor]',
     'Alteration type': 'MUT', 'Targeting': nan, 'Assay type': nan,
     'Evidence level': 'Pre-clinical',
     'Biomarker': 'KIT mutation in exon 9 or 17',
     'Drug': '[]',
     'Alteration': 'KIT:788-828,449-514',
     'Source': 'PMID:21737509', 'Curator': 'RDientsmann',
     'Comments': nan, 'Drug status': nan,
     'Drug full name': 'HSP90 inhibitors',
     'TCGI included': True, 'Curation date': '01/16',
     'Gene': 'KIT', 'Metastatic Tumor Type': nan,
     'Association': 'Responsive'}
    """

    def split_gDNA(gDNA):
        ''' Split gDNA field of the form 'chr9:g.133747588G>C' and return dictionary. '''

        # TODO: handle non-SNPs like chr1:g.43815009_43815010delGGinsTT
        try:
            chrom, remainder = gDNA.split(':g.')
            if chrom.startswith('chr'):
                chrom = chrom[3:]
            start = re.search(r'(\d+)', remainder).group()
            ref, alt = remainder[len(start):].split(">")
            return {
                'chromosome': str(chrom),
                'start': start,
                'ref': ref,
                'alt': alt
            }
        except Exception as e:
            return {}

    # Create document for insertion.
    gene = evidence['Gene']
    feature = split_gDNA(evidence['gDNA'])

    feature['biomarker_type'] = _get_biomarker_type(evidence['Alteration type'], evidence['Biomarker'])
    feature['geneSymbol'] = gene
    feature['name'] = evidence['Biomarker']
    feature['description'] = evidence['Alteration']

    association = {}
    association['description'] = '{} {} {}'.format(gene,
                                                   evidence['Drug full name'],
                                                   evidence['Association'])
    association['environmentalContexts'] = []
    association['environmentalContexts'].append({
        'description': evidence['Drug full name']})
    phenotype_description = evidence['Primary Tumor type']
    if not evidence['Metastatic Tumor Type'] == '':
        phenotype_description = '{} {}'.format(
                phenotype_description, evidence['Metastatic Tumor Type'])
    association['phenotype'] = {
        'description': phenotype_description
    }

    pubs = []
    for p in evidence['Source'].split(';'):
        t = None
        if ':' in p:
            t, id = p.split(':')
        if t == 'PMID':
            pubs.append('http://www.ncbi.nlm.nih.gov/pubmed/{}'.format(id))
        else:
            pubs.append('https://www.google.com/#q={}'.format(p))

    association['evidence'] = [{
        "evidenceType": {
            "sourceName": "cgi"
        },
        'description': evidence['Association'],
        'info': {
            'publications': pubs
        }
    }]
    # add summary fields for Display

    association = el.evidence_label(evidence['Evidence level'], association)
    association  = ed.evidence_direction(evidence['Association'], association)

    association['publication_url'] = pubs[0]
    association['drug_labels'] = evidence['Drug full name']
    feature_association = {'gene': gene,
                           'feature': feature,
                           'association': association,
                           'source': 'cgi',
                           'cgi': evidence}

    yield feature_association
Esempio n. 5
0
def convert(evidence):
    """

    """
    sources = evidence['sources']
    # tier = evidence['tier']
    direction = evidence['direction']
    narrative = evidence['narrative']
    therapeuticContext = evidence['therapeuticContext']
    clinicalSignificance = evidence['clinicalSignificance']
    tags = evidence['tags']
    gene = None
    condition = None
    mutation = None
    for tag in tags:
        if tag['facet'] == 'GENE':
            gene = tag['term']
        if tag['facet'] == 'CONDITION':
            condition = tag['term']
        if tag['facet'] == 'MUTATION':
            mutation = tag['term']
    if not gene and mutation:
        gene = mutation.split(' ')[0]

    feature = {}
    feature['geneSymbol'] = gene
    feature['name'] = mutation

    # Add variant-level information.
    # TODO: only looks at first mutation, not all mutations.
    try:
        grch37_mutation = evidence['mutations'][0]['GRCh37_location'][0]
        feature['chromosome'] = str(grch37_mutation['chr'])
        feature['start'] = grch37_mutation['start']
        feature['ref'] = grch37_mutation['ref']
        feature['alt'] = grch37_mutation['alt']
        #  TODO: add build/reference information
    except:
        pass

    drug_label = therapeuticContext[0]['name']

    association = {}
    association['description'] = narrative
    association['environmentalContexts'] = []
    association['environmentalContexts'].append({'description': drug_label})
    association['phenotype'] = {'description': condition}

    pubs = []
    for p in sources:
        pubs.append(p['link'])

    association['evidence'] = [{
        "evidenceType": {
            "sourceName": "molecularmatch"
        },
        'description': narrative,
        'info': {
            'publications': pubs
        }
    }]
    # add summary fields for Display

    # association['evidence_label'] = direction
    association = el.evidence_label(narrative, association, na=True)
    association = ed.evidence_direction(narrative, association, na=True)

    association['publication_url'] = pubs[0]
    association['drug_labels'] = drug_label
    feature_association = {
        'gene': gene,
        'feature': feature,
        'association': association,
        'source': 'molecularmatch',
        'molecularmatch': evidence
    }
    yield feature_association
Esempio n. 6
0
def convert(jax_evidence):
    gene = jax_evidence['gene']
    jax = jax_evidence['jax_id']
    evidence_array = jax_evidence['evidence']
    for evidence in evidence_array:

        # TODO: alterations are treated individually right now, but they are
        # actually combinations and should be treated accordingly.

        # Parse molecular profile and use for variant-level information.
        molecular_profile_fields = evidence['molecular_profile'].split()
        for index in range(0, len(molecular_profile_fields), 2):
            feature = {}
            feature['geneSymbol'] = gene
            feature['name'] = evidence['molecular_profile']

            try:
                gene, alteration = molecular_profile_fields[index:index + 2]
                # Look up variant and add position information.
                matches = LOOKUP_TABLE.get_entries(gene, alteration)
                if len(matches) > 0:
                    # FIXME: just using the first match for now;
                    # it's not clear what to do if there are multiple matches.
                    match = matches[0]
                    feature['chromosome'] = str(match['chrom'])
                    feature['start'] = match['start']
                    feature['end'] = match['end']
                    feature['ref'] = match['ref']
                    feature['alt'] = match['alt']
                    feature['referenceName'] = str(match['build'])
            except:
                pass

            association = {}
            association['description'] = evidence['efficacy_evidence']
            association['environmentalContexts'] = []
            association['environmentalContexts'].append(
                {'description': evidence['therapy_name']})
            association['phenotype'] = {
                'description': evidence['indication_tumor_type']
            }
            association['evidence'] = [{
                "evidenceType": {
                    "sourceName": "jax"
                },
                'description': evidence['response_type'],
                'info': {
                    'publications': [[
                        'http://www.ncbi.nlm.nih.gov/pubmed/{}'.format(r)
                        for r in evidence['references']
                    ]  # NOQA
                                     ]
                }
            }]
            # add summary fields for Display
            association = el.evidence_label(evidence['approval_status'],
                                            association)
            association = ed.evidence_direction(evidence['response_type'],
                                                association)

            if len(evidence['references']) > 0:
                association[
                    'publication_url'] = 'http://www.ncbi.nlm.nih.gov/pubmed/{}'.format(
                        evidence['references'][0])  # NOQA
            association['drug_labels'] = evidence['therapy_name']
            feature_association = {
                'gene': gene,
                'feature': feature,
                'association': association,
                'source': 'jax',
                'jax': evidence
            }
            yield feature_association