Beispiel #1
0
def GOQLtoFindDrugs(TargetIds: list,
                    TargetType='Protein',
                    drugEffect=['negative']):
    if TargetType == 'Protein':
        return OQL.get_drugs(for_targets_with_ids=TargetIds)
    elif TargetType == 'Small Molecule':
        REL_TYPES = ['Regulation', 'MolSynthesis']
        OQLquery = OQL.expand_entity(PropertyValues=TargetIds,
                                     SearchByProperties=['id'],
                                     expand_by_rel_types=REL_TYPES,
                                     expand2neighbors=['Small Molecule'],
                                     direction='upstream')
        OQLquery += ' AND Effect = (' + ','.join(drugEffect) + ')'
        return OQLquery
    else:
        REL_TYPES = ['Regulation']
        OQLquery = OQL.expand_entity(PropertyValues=TargetIds,
                                     SearchByProperties=['id'],
                                     expand_by_rel_types=REL_TYPES,
                                     expand2neighbors=['Small Molecule'],
                                     direction='upstream')
        OQLquery += ' AND Effect = (' + ','.join(drugEffect) + ')'
        return OQLquery
Beispiel #2
0
    api_config
)  # specify here path to your APIconfig file. Defaults to ./ElsevierAPI/APIconfig.json
ps_api.add_ent_props(
    ['Alias']
)  # need to retreive aliases from the database in case input metabolites are found by Alias
ps_api.PageSize = 10000

# dump file contains references for all relations retreived from database
# do not use dump file unless you need to include references into report:
ps_api.DumpFiles.clear()

# retreive all ChemicalReaction linked to metabolites in excel_file_name as ResnetGraph from the database:
step = 1000
for i in range(0, len(input_metabolite_names), step):
    name_list = input_metabolite_names[i:i + step]
    my_goql_query = OQL.expand_entity(name_list, ['Name', 'Alias'],
                                      expand_by_rel_types=['ChemicalReaction'])
    request_name = 'Retrieve metabolic reactions graph for {count} metabolites'.format(
        count=len(name_list))
    ps_api.process_oql(my_goql_query, request_name)

reactions_graph = ps_api.Graph
input_name2objs, objid2input_names = reactions_graph.get_prop2obj_dic(
    'Name', input_metabolite_names)
aliasinput_2objs, objid2input_alias = reactions_graph.get_prop2obj_dic(
    'Alias', input_metabolite_names)
objid2input_names.update(objid2input_alias)
metabolite_ids = list(objid2input_names.keys())
# objid2input_names = {obj_id:[input_names]} - allows for duplicates when mapping by name+alias

# find enzymes linked to ChemicalReactions and retreive their ontology children (proteins)
enzymes = reactions_graph.get_objects(PROTEIN_TYPES)
REL_PROPs = ['Name', 'Effect', 'Mechanism', 'ChangeType']# add here relation properties to retrieve
# if properties from NetworkxObjects.REF_ID_TYPES or NetworkxObjects.REF_PROPS are added to REL_PROPs then:
# output size may increase dramatically because it will contain one reference per row.
ENT_PROPs = ['Name', 'Description', 'Cell Localization'] 
ps_api = ps_api = open_api_session()
 
ps_api.PageSize = 10000
ps_api.add_rel_props(list(set(REL_PROPs)|PS_ID_TYPES))
ps_api.add_ent_props(ENT_PROPs)

# this dump file will list all proteins in the database with connectivity >0:
ps_api.add_dump_file('Proteins from database.tsv', replace_main_dump=True)
print('Fetching all proteins from the database')
ProteinsOnyGraph = ps_api.process_oql("Select Entity WHERE objectType = Protein AND Connectivity > 0 AND Name LIKE 'A%'", flush_dump=True)


ps_api.add_dump_file("Protein neighbors dump.tsv", replace_main_dump=True)  # dump file accumulates all data in one big file
out_dir = 'csv'
counter = 0
for node_id, psObj in ProteinsOnyGraph.nodes(data=True):
    protein_name = psObj['Name'][0]
    counter += 1
    print('Finding neighbors for \"%s\", node #%d from %d total' %
          (protein_name, counter, ProteinsOnyGraph.number_of_nodes()))
    
    oql_query = GOQL.expand_entity([node_id], SearchByProperties=['id'])
    ProteinNeighborsGraph = ps_api.process_oql(oql_query)
    protein_neighbors_file = out_dir + '/' + protein_name + '_neighbors.csv'
    ps_api.to_csv(protein_neighbors_file)
    ps_api.Graph.clear()  # need to release memory when performing large dumps
Beispiel #4
0
InputDiseaseNames = ','.join(SearchEntitiesBy)

# specify files used in ps_api.DiseaseNetwork.AddGraph to dump graph data in tab-delimited format:
myDir = ''  # 'D:\\Python\\PS_API\\'
foutDiseaseSNPs = myDir + "Gene variants linked to " + InputDiseaseNames + '.tsv'
foutDiseaseProteins = myDir + "Genes with SNPs linked to " + InputDiseaseNames + '.tsv'
foutDrugsForDiseaseProteins = myDir + "Druggable targets for " + InputDiseaseNames + '.tsv'

ps_api.add_rel_props(REL_PROPs)
ps_api.add_ent_props(ENT_PROP_Neo4j)

print("Finding GeneticVariants linked to %s" % InputDiseaseNames)
ps_api.add_dump_file(foutDiseaseSNPs, replace_main_dump=True)
ps_api.process_oql(
    GOQL.expand_entity(PropertyValues=SearchEntitiesBy,
                       SearchByProperties=['Name', 'Alias'],
                       expand_by_rel_types=[],
                       expand2neighbors=['GeneticVariant']))

SNPIds = list(set(ps_api.Graph.get_entity_ids(['GeneticVariant'])))
print("Finding Proteins containing GeneticVariants linked to %s" %
      InputDiseaseNames)
ps_api.add_dump_file(foutDiseaseProteins, replace_main_dump=True)
ps_api.process_oql(GOQL.expand_entity(PropertyValues=SNPIds,
                                      SearchByProperties=['id'],
                                      expand_by_rel_types=['GeneticChange'],
                                      expand2neighbors=['Protein']),
                   flush_dump=True)

foutDiseasePPI = myDir + "\\PPIs between genes linked to " + InputDiseaseNames + '.tsv'
PPIgraph = ps_api.get_ppi_graph(foutDiseasePPI)
# calculating centrality