예제 #1
0
def GOQLtoFindDrugs(TargetIds: list,
                    TargetType='Protein',
                    drugEffect=['negative']):
    if TargetType == 'Protein':
        return OQL.get_drugs(for_targets_with_ids=TargetIds)
    elif TargetType == 'Small Molecule':
        REL_TYPES = ['Regulation', 'MolSynthesis']
        OQLquery = OQL.expand_entity(PropertyValues=TargetIds,
                                     SearchByProperties=['id'],
                                     expand_by_rel_types=REL_TYPES,
                                     expand2neighbors=['Small Molecule'],
                                     direction='upstream')
        OQLquery += ' AND Effect = (' + ','.join(drugEffect) + ')'
        return OQLquery
    else:
        REL_TYPES = ['Regulation']
        OQLquery = OQL.expand_entity(PropertyValues=TargetIds,
                                     SearchByProperties=['id'],
                                     expand_by_rel_types=REL_TYPES,
                                     expand2neighbors=['Small Molecule'],
                                     direction='upstream')
        OQLquery += ' AND Effect = (' + ','.join(drugEffect) + ')'
        return OQLquery
예제 #2
0
def retreive_clinical_trials(drugs_name_file=None):
    request_name = 'Find all completed clinical trials'
    oql_query = 'SELECT Relation WHERE objectType = ClinicalTrial AND TrialStatus = Completed AND NeighborOf (SELECT Entity WHERE objectType = SmallMol)'
    if isinstance(drugs_name_file, str):
        with open(drugs_name_file) as f:
            drug_names = [line.rstrip('\n') for line in f]
        print('Read %s with %d drug names' %
              (drugs_name_file, len(drug_names)))

        search_by_prop, drug_names_str = OQL.get_search_strings(
            ['Name', 'Alias'], drug_names)
        oql_query = oql_query + ' AND NeighborOf (SELECT Entity WHERE (Name,Alias)= ({names}))'
        oql_query = oql_query.format(names=drug_names_str)
        request_name = 'Find clinical trials for drugs in {fname}'
        request_name = request_name.format(fname=drugs_name_file)

    return ps_api.process_oql(oql_query, request_name, debug=False)
                           args.infile_has_header,
                           use_cache=False,
                           map2type=entity_types)

    if len(args.pathways) > 0:
        print("Begin linking entities mapped from infile to pathways")
        LinkToPathways = str(args.pathways).split(",")
        start_time = time.time()
        for PathwayName in LinkToPathways:
            PathwayMembersId2Entity = search.get_pathway_member_ids(
                [PathwayName],
                search_pathways_by=['Name'],
                only_entities=['Protein', 'FunctionalClass', 'Complex'],
                with_properties=['objectType'])
            pathway_components = set(PathwayMembersId2Entity.keys())
            QueryOntology = OQL.get_childs(list(pathway_components), ['id'])
            pathway_components.update(search._obj_id_by_oql(QueryOntology))

            if len(pathway_components) == 0:
                print('No entity for %s found in the database' % PathwayName)
            else:
                search.link2concept(PathwayName, list(pathway_components))

        exec_time = search.execution_time(start_time)
        print("Entities in file %s were linked to %s pathway in %s" %
              (EntityListFile, PathwayName, exec_time))
    else:
        print(
            'No pathways were specified for semantic linking with entities from \"%s\"'
            % (EntityListFile))
예제 #4
0
REL_PROPs = ['Name', 'Effect', 'Mechanism', 'ChangeType']# add here relation properties to retrieve
# if properties from NetworkxObjects.REF_ID_TYPES or NetworkxObjects.REF_PROPS are added to REL_PROPs then:
# output size may increase dramatically because it will contain one reference per row.
ENT_PROPs = ['Name', 'Description', 'Cell Localization'] 
ps_api = ps_api = open_api_session()
 
ps_api.PageSize = 10000
ps_api.add_rel_props(list(set(REL_PROPs)|PS_ID_TYPES))
ps_api.add_ent_props(ENT_PROPs)

# this dump file will list all proteins in the database with connectivity >0:
ps_api.add_dump_file('Proteins from database.tsv', replace_main_dump=True)
print('Fetching all proteins from the database')
ProteinsOnyGraph = ps_api.process_oql("Select Entity WHERE objectType = Protein AND Connectivity > 0 AND Name LIKE 'A%'", flush_dump=True)


ps_api.add_dump_file("Protein neighbors dump.tsv", replace_main_dump=True)  # dump file accumulates all data in one big file
out_dir = 'csv'
counter = 0
for node_id, psObj in ProteinsOnyGraph.nodes(data=True):
    protein_name = psObj['Name'][0]
    counter += 1
    print('Finding neighbors for \"%s\", node #%d from %d total' %
          (protein_name, counter, ProteinsOnyGraph.number_of_nodes()))
    
    oql_query = GOQL.expand_entity([node_id], SearchByProperties=['id'])
    ProteinNeighborsGraph = ps_api.process_oql(oql_query)
    protein_neighbors_file = out_dir + '/' + protein_name + '_neighbors.csv'
    ps_api.to_csv(protein_neighbors_file)
    ps_api.Graph.clear()  # need to release memory when performing large dumps
예제 #5
0
import ElsevierAPI.ResnetAPI.PathwayStudioGOQL as OQL
from ElsevierAPI.ResnetAPI.NetworkxObjects import Reference
from ElsevierAPI import load_api_config
from ElsevierAPI.ResnetAPI.ResnetAPISession import APISession
import pandas as pd

APIconfig = load_api_config()
ps_api = APISession(APIconfig['ResnetURL'], APIconfig['PSuserName'],
                    APIconfig['PSpassword'])
fileIn = 'Drugs for Regulators in 4 patients.txt'
InDir = 'D:\\Python\\PBTA\\PNOC003\\4 patients analysis\\'
with open(InDir + fileIn) as f:
    drugs = [line.rstrip('\n') for line in f]

print('Finding drugs in %s in Resnet' % (fileIn))
OQLquery = OQL.get_entities_by_props(drugs, ['Name', 'Alias'],
                                     only_object_types=['Small Molecule'])
ps_api.add_ent_props(['Name', 'PharmaPendium ID'])
resnet_drugs = ps_api.process_oql(OQLquery, 'Find all drugs')
print('Found %d drugs in Resnet' % len(resnet_drugs))

#removing duplicates wiht no PharmaPendium ID
resnet2pharmapendium_map = dict()
for i, drug in resnet_drugs.nodes(data=True):
    try:
        resnet2pharmapendium_map[str(
            drug['Name'][0]).lower()] = drug['PharmaPendium ID'][0]
    except KeyError:
        continue

all_drugs = list(resnet_drugs.nodes(data=True))
for i, drug in all_drugs:
예제 #6
0
    api_config
)  # specify here path to your APIconfig file. Defaults to ./ElsevierAPI/APIconfig.json
ps_api.add_ent_props(
    ['Alias']
)  # need to retreive aliases from the database in case input metabolites are found by Alias
ps_api.PageSize = 10000

# dump file contains references for all relations retreived from database
# do not use dump file unless you need to include references into report:
ps_api.DumpFiles.clear()

# retreive all ChemicalReaction linked to metabolites in excel_file_name as ResnetGraph from the database:
step = 1000
for i in range(0, len(input_metabolite_names), step):
    name_list = input_metabolite_names[i:i + step]
    my_goql_query = OQL.expand_entity(name_list, ['Name', 'Alias'],
                                      expand_by_rel_types=['ChemicalReaction'])
    request_name = 'Retrieve metabolic reactions graph for {count} metabolites'.format(
        count=len(name_list))
    ps_api.process_oql(my_goql_query, request_name)

reactions_graph = ps_api.Graph
input_name2objs, objid2input_names = reactions_graph.get_prop2obj_dic(
    'Name', input_metabolite_names)
aliasinput_2objs, objid2input_alias = reactions_graph.get_prop2obj_dic(
    'Alias', input_metabolite_names)
objid2input_names.update(objid2input_alias)
metabolite_ids = list(objid2input_names.keys())
# objid2input_names = {obj_id:[input_names]} - allows for duplicates when mapping by name+alias

# find enzymes linked to ChemicalReactions and retreive their ontology children (proteins)
enzymes = reactions_graph.get_objects(PROTEIN_TYPES)
예제 #7
0
InputDiseaseNames = ','.join(SearchEntitiesBy)

# specify files used in ps_api.DiseaseNetwork.AddGraph to dump graph data in tab-delimited format:
myDir = ''  # 'D:\\Python\\PS_API\\'
foutDiseaseSNPs = myDir + "Gene variants linked to " + InputDiseaseNames + '.tsv'
foutDiseaseProteins = myDir + "Genes with SNPs linked to " + InputDiseaseNames + '.tsv'
foutDrugsForDiseaseProteins = myDir + "Druggable targets for " + InputDiseaseNames + '.tsv'

ps_api.add_rel_props(REL_PROPs)
ps_api.add_ent_props(ENT_PROP_Neo4j)

print("Finding GeneticVariants linked to %s" % InputDiseaseNames)
ps_api.add_dump_file(foutDiseaseSNPs, replace_main_dump=True)
ps_api.process_oql(
    GOQL.expand_entity(PropertyValues=SearchEntitiesBy,
                       SearchByProperties=['Name', 'Alias'],
                       expand_by_rel_types=[],
                       expand2neighbors=['GeneticVariant']))

SNPIds = list(set(ps_api.Graph.get_entity_ids(['GeneticVariant'])))
print("Finding Proteins containing GeneticVariants linked to %s" %
      InputDiseaseNames)
ps_api.add_dump_file(foutDiseaseProteins, replace_main_dump=True)
ps_api.process_oql(GOQL.expand_entity(PropertyValues=SNPIds,
                                      SearchByProperties=['id'],
                                      expand_by_rel_types=['GeneticChange'],
                                      expand2neighbors=['Protein']),
                   flush_dump=True)

foutDiseasePPI = myDir + "\\PPIs between genes linked to " + InputDiseaseNames + '.tsv'
PPIgraph = ps_api.get_ppi_graph(foutDiseasePPI)
# calculating centrality