Ejemplo n.º 1
0
def toBel(dbId):
    ''' Convert to BEL formats '''

    entity = getEntityData(dbId)

    print('toBel dbId: ', dbId)

    type = entity['schemaClass']

    if (type == 'Compartment'):
        bel = toBelCompartment(entity)
    elif type == 'EntityCompartment':
        bel = toBelEntityCompartment(entity)
    elif type == 'OtherEntity':
        bel = toBelOtherEntity(entity)
    elif type == 'Polymer':
        bel = toBelPolymer(entity)
    elif type == 'Complex':
        bel = toBelComplexComponents(entity)  # alternative version - toBelComplexNamed()
    elif type == 'SimpleEntity':
        bel = toBelSimpleEntity(entity)
    elif type == 'EntityWithAccessionedSequence':
        bel = toBelEntityWithAccessionedSequence(entity)
    elif 'Set' in type:
        bel = toBelSets(entity)
    elif 'GenomeEncodedEntity' in type:
        bel = toBelGenomeEncodedEntity(entity)
    elif 'CatalystActivity' in type:
        bel = toBelCatalystActivity(entity)

    return bel
Ejemplo n.º 2
0
def toBel(dbId):
    ''' Convert to BEL formats '''

    entity = getEntityData(dbId)

    print('toBel dbId: ', dbId)

    type = entity['schemaClass']

    if (type == 'Compartment'):
        bel = toBelCompartment(entity)
    elif type == 'EntityCompartment':
        bel = toBelEntityCompartment(entity)
    elif type == 'OtherEntity':
        bel = toBelOtherEntity(entity)
    elif type == 'Polymer':
        bel = toBelPolymer(entity)
    elif type == 'Complex':
        bel = toBelComplexComponents(
            entity)  # alternative version - toBelComplexNamed()
    elif type == 'SimpleEntity':
        bel = toBelSimpleEntity(entity)
    elif type == 'EntityWithAccessionedSequence':
        bel = toBelEntityWithAccessionedSequence(entity)
    elif 'Set' in type:
        bel = toBelSets(entity)
    elif 'GenomeEncodedEntity' in type:
        bel = toBelGenomeEncodedEntity(entity)
    elif 'CatalystActivity' in type:
        bel = toBelCatalystActivity(entity)

    return bel
Ejemplo n.º 3
0
def buildBelEvidences(reactionList, belversion, pathways=None):
    ''' Load reactions and build BEL Evidences'''

    rxnUrlTpl = 'http://www.reactome.org/PathwayBrowser/#'
    evidences = []
    bad_namespaces_evidences = []  # collect bad evidences

    # indexCnt = 0
    for rxnId, rxnName in reactionList:

        print('rxnId: ', rxnId)

        # indexCnt += 1
        # if indexCnt > 2:
        #     break

        # Process Annotation information
        rxnData = getEntityData(rxnId)
        if 'stableIdentifier' not in rxnData:
            continue

        stableId = rxnData['stableIdentifier']['displayName']
        rxnUrl = '{}{}'.format(rxnUrlTpl, stableId)
        rxnName = escapeBelString(rxnData['displayName'])
        rxnType = rxnData['schemaClass']

        # Todo  collect all compartments and annotate
        compartment = 'Unknown'
        if 'compartment' in rxnData:
            compartment = rxnData['compartment'][0]['displayName']

        rxnAuthor = rxnDate = None
        if 'created' in rxnData:
            try:
                matches = re.search(r'(.*?),\s+(\d{4,4}-\d{2,2}-\d{2,2})',
                                    rxnData['created']['displayName'])
                if matches:
                    rxnAuthor = matches.group(1)
                    rxnDate = matches.group(2)
            except:
                log.info(
                    'Rxn - cannot find created date and author in object: {}'.
                    format(rxnId))

        if rxnDate and rxnAuthor:
            citation = '{{"Online Resource", "{}", "{}", "{}", "{}"}}'.format(
                rxnName, rxnUrl, rxnDate, rxnAuthor)
        else:
            citation = '{{"Online Resource", "{}", "{}"}}'.format(
                rxnName, rxnUrl)

        evidence = {
            'name': rxnName,
            'rxnId': rxnId,  # TODO remove after debugging
            'rxnType': rxnType,
            'compartment': compartment,
            'species': rxnData['speciesName'],
            'species_tax_id':
            convertSpeciesNameToTaxId(rxnData['speciesName']),
            'summary_text': rxnName,
            'citation': citation,
        }

        # Process BEL Statement
        catalysts, inputs, outputs = [], [], []

        if 'catalystActivity' in rxnData:

            for catalyst in rxnData['catalystActivity']:
                catalysts.append(toBel(catalyst['dbId']))
                # print('Catalyst: {}'.format(catalyst['dbId']))

        if 'input' in rxnData:
            for input in dedup(rxnData['input']):
                inputs.append(toBel(input['dbId']))
        if 'output' in rxnData:
            for output in dedup(rxnData['output']):
                outputs.append(toBel(output['dbId']))

        print('Catalysts ', catalysts)
        print('Inputs ', inputs)
        print('Outputs ', outputs)
        print('\n')

        statements = buildStatements(catalysts, inputs, outputs)
        evidence['statements'] = dedupList(statements)

        bad_namespace_flag = False
        for statement in statements:
            if 'ENSEMBL' in statement or 'EMBL' in statement:
                bad_namespace_flag = True

        # with open('tmp_evidences.json', 'a') as f:
        #     json.dump(evidence, f, indent=4)
        #     f.write('\n\n')

        if bad_namespace_flag:
            bad_namespaces_evidences.append(copy.deepcopy(evidence))
        else:
            evidences.append(copy.deepcopy(evidence))

    belscript = render_template(template_filename,
                                evidences,
                                pathways=pathways)

    fn = 'reactome.bels'
    if belversion == '2':
        fn += '2'

    with open(fn, 'w') as f:
        f.write(belscript)

    import json
    with open('bad_evidences.json', 'w') as f:
        json.dump(bad_namespaces_evidences, f, indent=4)
def buildBelEvidences(reactionList, belversion, pathways=None):
    ''' Load reactions and build BEL Evidences'''

    rxnUrlTpl = 'http://www.reactome.org/PathwayBrowser/#'
    evidences = []
    bad_namespaces_evidences = []  # collect bad evidences

    # indexCnt = 0
    for rxnId, rxnName in reactionList:

        print('rxnId: ', rxnId)

        # indexCnt += 1
        # if indexCnt > 2:
        #     break

        # Process Annotation information
        rxnData = getEntityData(rxnId)
        if 'stableIdentifier' not in rxnData:
            continue

        stableId = rxnData['stableIdentifier']['displayName']
        rxnUrl = '{}{}'.format(rxnUrlTpl, stableId)
        rxnName = escapeBelString(rxnData['displayName'])
        rxnType = rxnData['schemaClass']

        # Todo  collect all compartments and annotate
        compartment = 'Unknown'
        if 'compartment' in rxnData:
            compartment = rxnData['compartment'][0]['displayName']

        rxnAuthor = rxnDate = None
        if 'created' in rxnData:
            try:
                matches = re.search(r'(.*?),\s+(\d{4,4}-\d{2,2}-\d{2,2})', rxnData['created']['displayName'])
                if matches:
                    rxnAuthor = matches.group(1)
                    rxnDate = matches.group(2)
            except:
                log.info('Rxn - cannot find created date and author in object: {}'.format(rxnId))

        if rxnDate and rxnAuthor:
            citation = '{{"Online Resource", "{}", "{}", "{}", "{}"}}'.format(rxnName, rxnUrl, rxnDate, rxnAuthor)
        else:
            citation = '{{"Online Resource", "{}", "{}"}}'.format(rxnName, rxnUrl)

        evidence = {
            'name': rxnName,
            'rxnId': rxnId,  # TODO remove after debugging
            'rxnType': rxnType,
            'compartment': compartment,
            'species': rxnData['speciesName'],
            'species_tax_id': convertSpeciesNameToTaxId(rxnData['speciesName']),
            'summary_text': rxnName,
            'citation': citation,
        }

        # Process BEL Statement
        catalysts, inputs, outputs = [], [], []

        if 'catalystActivity' in rxnData:

            for catalyst in rxnData['catalystActivity']:
                catalysts.append(toBel(catalyst['dbId']))
                # print('Catalyst: {}'.format(catalyst['dbId']))

        if 'input' in rxnData:
            for input in dedup(rxnData['input']):
                inputs.append(toBel(input['dbId']))
        if 'output' in rxnData:
            for output in dedup(rxnData['output']):
                outputs.append(toBel(output['dbId']))

        print('Catalysts ', catalysts)
        print('Inputs ', inputs)
        print('Outputs ', outputs)
        print('\n')

        statements = buildStatements(catalysts, inputs, outputs)
        evidence['statements'] = dedupList(statements)

        bad_namespace_flag = False
        for statement in statements:
            if 'ENSEMBL' in statement or 'EMBL' in statement:
                bad_namespace_flag = True

        # with open('tmp_evidences.json', 'a') as f:
        #     json.dump(evidence, f, indent=4)
        #     f.write('\n\n')

        if bad_namespace_flag:
            bad_namespaces_evidences.append(copy.deepcopy(evidence))
        else:
            evidences.append(copy.deepcopy(evidence))

    belscript = render_template(template_filename, evidences, pathways=pathways)

    fn = 'reactome.bels'
    if belversion == '2':
        fn += '2'

    with open(fn, 'w') as f:
        f.write(belscript)

    import json
    with open('bad_evidences.json', 'w') as f:
        json.dump(bad_namespaces_evidences, f, indent=4)