def add_connection(self, element, score, query_id): connection = Connection( source_element_id=query_id, type=self.info.knowledge_map.predicates[0].predicate, attributes=[ Attribute(name='CMAP similarity score', value=str(score), type='CMAP similarity score', source='CMAP', provided_by=self.info.name), Attribute(name='reference', value='PMID:29195078', type='reference', source='CMAP', provided_by=self.info.name), Attribute(name='about CMAP', value='https://clue.io/cmap', type='about CMAP', source='CMAP', url='https://clue.io/cmap', provided_by=self.info.name), Attribute(name='CMAP touchstone data version', value=self.get_version(), type='CMAP touchstone data version', source='CMAP', url='https://api.clue.io/api/touchstone-version', provided_by=self.info.name) ]) element.connections.append(connection)
def _create_trapi_attributes( property_names: List[str], neo4j_object: Dict[str, any]) -> List[Attribute]: new_attributes = [] for property_name in property_names: property_value = neo4j_object.get(property_name) if property_value: # Extract any lists, dicts, and booleans that are stored within strings if type(property_value) is str: if (property_value.startswith('[') and property_value.endswith(']')) or \ (property_value.startswith('{') and property_value.endswith('}')) or \ property_value.lower() == "true" or property_value.lower() == "false": property_value = ast.literal_eval(property_value) # Create the actual Attribute object trapi_attribute = Attribute( name=property_name, type=eu.get_attribute_type(property_name), value=property_value) # Also store this value in Attribute.url if it's a URL if type(property_value) is str and ( property_value.startswith("http:") or property_value.startswith("https:")): trapi_attribute.url = property_value new_attributes.append(trapi_attribute) return new_attributes
def produce(self, controls): compound_list = [] names = controls['compounds'].split(';') for name in names: name = name.strip() cids = self.find_compound(name) if len(cids) == 0: compound_list.append( CompoundInfo(attributes=[ Attribute(name='query name', value=name, source=self.info.name) ])) for cid in cids: structure = self.get_structure(cid) compound = CompoundInfo(compound_id='CID:' + str(cid), identifiers=CompoundInfoIdentifiers( pubchem='CID:' + str(cid)), structure=structure, attributes=[ Attribute(name='query name', value=name, source=self.info.name) ], source=self.info.name) compound_list.append(compound) sleep(X_Throttling_Control ) # PubChem only allows 5 requests per second return compound_list
def find_compound_by_name(self, name): """ Find compound by a name """ compounds = [] molecules = requests.get(CHEMBL_NAME_URL.format(name.upper())).json() for molecule in molecules['molecules']: id = molecule['molecule_chembl_id'] compound_id = CHEMBL + id identifiers = { 'chembl': compound_id, 'smiles': molecule['molecule_structures']['canonical_smiles'], 'inchi': molecule['molecule_structures']['standard_inchi'], 'inchikey': molecule['molecule_structures']['standard_inchi_key'], } compound = Element(id=compound_id, biolink_class=CHEMICAL_SUBSTANCE, identifiers=identifiers, names_synonyms=self.get_names_synonyms( id, molecule['pref_name'], molecule['molecule_synonyms']), attributes=[ Attribute(name='query name', value=name, source=self.info.name), Attribute(name='structure source', value=SOURCE, source=self.info.name) ], connections=[], source=self.info.name) compounds.append(compound) return compounds
def create_connection(self, compound, mechanism): connection = Connection( source_element_id=compound.id, type=self.info.knowledge_map.predicates[0].predicate, attributes=[]) action = mechanism['action_type'] if action is not None: connection.attributes.append( Attribute(name='action_type', value=action, type=ACTION, source=SOURCE, url=None, provided_by=self.info.name)) moa = mechanism['mechanism_of_action'] if moa is not None: connection.attributes.append( Attribute(name='mechanism_of_action', value=moa, type=MOA, source=SOURCE, url=None, provided_by=self.info.name)) for reference in mechanism['mechanism_refs']: connection.attributes.append( Attribute(name=reference['ref_type'], value=reference['ref_id'], type=REFERENCE, source=SOURCE, url=reference['ref_url'], provided_by=self.info.name)) return connection
def _create_ngd_edge(self, ngd_value: float, subject: str, object: str, pmid_list: list) -> Tuple[str, Edge]: ngd_edge = Edge() ngd_edge.predicate = self.ngd_edge_type ngd_edge.subject = subject ngd_edge.object = object ngd_edge_key = f"NGD:{subject}--{ngd_edge.predicate}--{object}" ngd_edge.attributes = [ Attribute(name=self.ngd_edge_attribute_name, type=self.ngd_edge_attribute_type, value=ngd_value, url=self.ngd_edge_attribute_url) ] ngd_edge.attributes += [ Attribute(name="provided_by", value="ARAX", type=eu.get_attribute_type("provided_by")), Attribute(name="is_defined_by", value="ARAX", type=eu.get_attribute_type("is_defined_by")), Attribute(name="publications", value=pmid_list, type=eu.get_attribute_type("publications")) ] return ngd_edge_key, ngd_edge
def _create_swagger_attributes( property_names: List[str], neo4j_object: Dict[str, any]) -> List[Attribute]: new_attributes = [] for property_name in property_names: property_value = neo4j_object.get(property_name) # Extract any lists, dicts, and booleans that are stored within strings if type(property_value) is str: if (property_value.startswith('[') and property_value.endswith(']')) or \ (property_value.startswith('{') and property_value.endswith('}')) or \ property_value.lower() == "true" or property_value.lower() == "false": property_value = ast.literal_eval(property_value) if isinstance(property_value, list): property_value.sort() # Alphabetize lists # Create an Attribute for all non-empty values if property_value is not None and property_value != {} and property_value != []: swagger_attribute = Attribute( name=property_name, type=eu.get_attribute_type(property_name), value=property_value) # Also store this in the 'url' field if it's a URL if type(property_value) is str and ( property_value.startswith("http:") or property_value.startswith("https:")): swagger_attribute.url = property_value new_attributes.append(swagger_attribute) return new_attributes
def _add_answers_to_kg(self, answer_kg: QGOrganizedKnowledgeGraph, reasoner_std_response: Dict[str, any], input_qnode_key: str, output_qnode_key: str, qedge_key: str, log: ARAXResponse) -> QGOrganizedKnowledgeGraph: kg_to_qg_ids_dict = self._build_kg_to_qg_id_dict(reasoner_std_response['results']) if reasoner_std_response['knowledge_graph']['edges']: remapped_node_keys = dict() log.debug(f"Got results back from BTE for this query " f"({len(reasoner_std_response['knowledge_graph']['edges'])} edges)") for node in reasoner_std_response['knowledge_graph']['nodes']: swagger_node = Node() bte_node_key = node.get('id') swagger_node.name = node.get('name') swagger_node.category = eu.convert_to_list(eu.convert_string_to_snake_case(node.get('type'))) # Map the returned BTE qg_ids back to the original qnode_keys in our query graph bte_qg_id = kg_to_qg_ids_dict['nodes'].get(bte_node_key) if bte_qg_id == "n0": qnode_key = input_qnode_key elif bte_qg_id == "n1": qnode_key = output_qnode_key else: log.error("Could not map BTE qg_id to ARAX qnode_key", error_code="UnknownQGID") return answer_kg # Find and use the preferred equivalent identifier for this node (if it's an output node) if qnode_key == output_qnode_key: if bte_node_key in remapped_node_keys: swagger_node_key = remapped_node_keys.get(bte_node_key) else: equivalent_curies = [f"{prefix}:{eu.get_curie_local_id(local_id)}" for prefix, local_ids in node.get('equivalent_identifiers').items() for local_id in local_ids] swagger_node_key = self._get_best_equivalent_bte_curie(equivalent_curies, swagger_node.category[0]) remapped_node_keys[bte_node_key] = swagger_node_key else: swagger_node_key = bte_node_key answer_kg.add_node(swagger_node_key, swagger_node, qnode_key) for edge in reasoner_std_response['knowledge_graph']['edges']: swagger_edge = Edge() swagger_edge_key = edge.get("id") swagger_edge.predicate = edge.get('type') swagger_edge.subject = remapped_node_keys.get(edge.get('source_id'), edge.get('source_id')) swagger_edge.object = remapped_node_keys.get(edge.get('target_id'), edge.get('target_id')) swagger_edge.attributes = [Attribute(name="provided_by", value=edge.get('edge_source'), type=eu.get_attribute_type("provided_by")), Attribute(name="is_defined_by", value="BTE", type=eu.get_attribute_type("is_defined_by"))] # Map the returned BTE qg_id back to the original qedge_key in our query graph bte_qg_id = kg_to_qg_ids_dict['edges'].get(swagger_edge_key) if bte_qg_id != "e1": log.error("Could not map BTE qg_id to ARAX qedge_key", error_code="UnknownQGID") return answer_kg answer_kg.add_edge(swagger_edge_key, swagger_edge, qedge_key) return answer_kg
def get_connections_attributes (self,row,connection1): attributes_list= ['experimental_direct', 'experimental_transferred', 'prediction_direct', 'prediction_transferred', 'database_direct', 'database_transferred', 'textmining_direct', 'textmining_transferred', 'combined_score'] for attribute in attributes_list: if row[attribute] is not None and row[attribute] != '': connection1.attributes.append(Attribute( name=attribute, value=str(row[attribute]), provided_by=self.info.name, type=attribute, source=SOURCE ) ) query = """ SELECT DISTINCT item_id_a, item_id_b, a_is_acting, mode, action, score FROM actions WHERE (item_id_a = ? AND item_id_b = ?) OR (item_id_a = ? AND item_id_b = ?); """ chem_name = row['chemical'] prot_name = row['protein'] cur = connection.execute(query, (chem_name, prot_name, prot_name, chem_name)) for actions_row in cur.fetchall(): prot_is_item_a = actions_row['item_id_a'].startswith('9606') item_a_is_acting = actions_row['a_is_acting'].startswith('t') prot_is_acting = (prot_is_item_a == item_a_is_acting) connection1.attributes.append(Attribute( name='protein_is_acting', value=str(prot_is_acting), provided_by=self.info.name, type='protein_is_acting', source=SOURCE ) ) attributes_list = ['mode', 'action', 'score'] for attribute in attributes_list: if actions_row[attribute] is not None and actions_row[attribute] != '': connection1.attributes.append(Attribute( name=attribute, value=str(actions_row[attribute]), provided_by=self.info.name, type=attribute, source=SOURCE ) )
def _create_swagger_edge_from_kp_edge(self, kp_edge_key: str, kp_edge: Dict[str, any]) -> Edge: swagger_edge = Edge(subject=kp_edge['subject'], object=kp_edge['object'], predicate=kp_edge['predicate']) swagger_edge.attributes = [ Attribute(name="provided_by", value=self.kp_name, type=eu.get_attribute_type("provided_by")), Attribute(name="is_defined_by", value="ARAX", type=eu.get_attribute_type("is_defined_by")) ] return kp_edge_key, swagger_edge
def get_or_create_metabolite(self, row, metabolites, metabolite_list): chembl_id = row['metabolite_chembl_id'] if chembl_id in metabolites: return metabolites[chembl_id] names = Names(name=row['metabolite_name'], synonyms=[],source=SOURCE) if row['metabolite_pref_name'] is not None and row['metabolite_pref_name'] != row['metabolite_name']: names.synonyms.append(row['metabolite_pref_name']) metabolite = Element( id=CHEMBL+chembl_id, biolink_class=CHEMICAL_SUBSTANCE, identifiers = {'chembl':CHEMBL+chembl_id}, names_synonyms=[names], connections=[], attributes=[] ) structure_source=None for struct in ['inchi', 'inchikey', 'smiles']: if row[struct] is not None: metabolite.identifiers[struct] = row[struct] structure_source=Attribute(name='structure source', value=SOURCE,source=self.info.name) if structure_source is not None: metabolite.attributes.append(structure_source) metabolites[chembl_id]=metabolite metabolite_list.append(metabolite) return metabolite
def map(self, compound_list, controls): gene_list = [] genes = {} for compound in compound_list: metabolite = find_metabolite(compound) if metabolite is not None: targets = self.find_targets(metabolite, compound.id) for target in targets: gene_id = target['entrez'] gene = genes.get(gene_id) if gene is None: gene = Element( id=gene_id, biolink_class=GENE, identifiers={'entrez': gene_id}, names_synonyms=[Names(name=target['name'])], attributes=[ Attribute(name='UniProtKB', value=target['uniprot'], source=self.info.label, provided_by=self.info.name) ], connections=[], source=self.info.name) gene_list.append(gene) genes[gene_id] = gene gene.connections.append(target['connection']) return gene_list
def _convert_kg2c_plover_edge_to_trapi_edge(self, edge_tuple: list) -> Edge: edge = Edge(subject=edge_tuple[0], object=edge_tuple[1], predicate=edge_tuple[2], attributes=[]) knowledge_sources = edge_tuple[3] # Indicate that this edge came from the KG2 KP edge.attributes.append(Attribute(attribute_type_id="biolink:aggregator_knowledge_source", value=self.kg2_infores_curie, value_type_id="biolink:InformationResource", attribute_source=self.kg2_infores_curie)) # Create knowledge source attributes for each of this edge's knowledge sources knowledge_source_attributes = [Attribute(attribute_type_id="biolink:knowledge_source", value=infores_curie, value_type_id="biolink:InformationResource", attribute_source=self.kg2_infores_curie) for infores_curie in knowledge_sources] edge.attributes += knowledge_source_attributes return edge
def find_compound_by_name(self, name): """ Find compound by a name """ compounds = [] molecules = requests.get(CHEMBL_NAME_URL.format(name.upper())).json() for molecule in molecules['molecules']: id = molecule['molecule_chembl_id'] compound_id = CHEMBL + id smiles = molecule['molecule_structures']['canonical_smiles'] compound_info = CompoundInfo( compound_id=compound_id, identifiers=CompoundInfoIdentifiers(chembl=compound_id), names_synonyms=self.get_names_synonyms( id, molecule['pref_name'], molecule['molecule_synonyms']), structure=CompoundInfoStructure( smiles=molecule['molecule_structures']['canonical_smiles'], inchi=molecule['molecule_structures']['standard_inchi'], inchikey=molecule['molecule_structures'] ['standard_inchi_key'], source='ChEMBL'), attributes=[ Attribute(name='query name', value=name, source=self.info.name) ], source=self.info.name) compounds.append(compound_info) return compounds
def get_nucleicAcid_info(self, uuid, substance): """ Add Nucleic Acid information to attributes """ query9 = """ SELECT DISTINCT substances.uuid, nucleicAcidType, sequenceType, sequenceOrigin, sequence, length FROM substances JOIN nucleic_acids ON substances.uuid = nucleic_acids.substance_id JOIN nucleic_acid_sequences ON nucleic_acids.uuid = nucleic_acid_sequences.nucleic_acid_id WHERE substances.uuid = ?; """ connection = Inxight_Drugs_DataSupply.get_db() cur9 = connection.execute(query9, (uuid, )) for row in cur9.fetchall(): # Append additional attributes collected from Inxight:Drugs substances table attributes_list = [ 'nucleicAcidType', 'sequenceType', 'sequenceOrigin', 'sequence', 'length' ] for attribute in attributes_list: if row[attribute] is not None and len( str(row[attribute]).strip()) > 0: substance.attributes.append( Attribute(provided_by=self.info.name, name=attribute, value=str(row[attribute]), source=SOURCE, type=attribute))
def get_interaction_attributes(info_name, drug_gene_interaction, interaction_id): # Connection attributes SQL query: query7 = """ SELECT DISTINCT interaction_attributes.name, interaction_attributes.value, sources.source_db_name FROM interaction_attributes LEFT JOIN interaction_attributes_sources ON interaction_attributes_sources.interaction_attribute_id = interaction_attributes.id LEFT JOIN sources ON sources.id = interaction_attributes_sources.source_id WHERE interaction_attributes.interaction_id = ?; """ global connection connection = DGIdbDataSupply.get_db() cur7 = connection.execute(query7, (interaction_id, )) for row in cur7.fetchall(): # append interaction attributes drug_gene_interaction.attributes.append( Attribute( name=row['name'], provided_by=info_name, value=row['value'], source=str(row['source_db_name']) + '@' + SOURCE, type=row[ 'name'], # Interim solution for providing "type", pending Consortium's final decision ))
def get_drug_attributes(info_name, id, compound): # query to fill the attributes array. query6 = """ SELECT drug_attributes.name, drug_attributes.value, sources.source_db_name AS attribute_source FROM drug_attributes JOIN drug_attributes_sources ON drug_attributes.id = drug_attributes_sources.drug_attribute_id JOIN sources ON drug_attributes_sources.source_id = sources.id WHERE drug_attributes.drug_id = ?; """ global connection connection = DGIdbDataSupply.get_db() cur6 = connection.execute(query6, (id, )) for row in cur6.fetchall(): compound.attributes.append( Attribute( name=row['name'], provided_by=info_name, value=row['value'], source=row['attribute_source'] + '@DGIdb', type=row[ 'name'], # Interim solution for providing "type", pending Consortium's final decision ))
def find_drug_attributes(self, rxcui): """ Find drug attribute """ attributes = [] # slect * is not a good practice rather than * distinct listing is better. query = """ select distinct RXNSAT.RXCUI, RXNSAT.ATN, RXNSAT.ATV from RXNSAT where (RXNSAT.RXCUI = ?) and RXNSAT.ATN != 'SPL_SET_ID' """ cur = connection.execute( query, (rxcui, )) # in order to make the varible as a tuple of one explicitely. for row in cur.fetchall(): attribute = Attribute(name=row['ATN'], value=row['ATV'], source=SOURCE, provided_by=self.info.name, type=row['ATN']) attributes.append(attribute) return attributes
def get_codes(self, uuid, substance): """ Add codes as references to attributes """ query3 = """ SELECT _name, type, codeSystem, comments, code, url, codeText FROM substances JOIN substance_codes ON substances.uuid = substance_codes.substance_id JOIN codes ON substance_codes.code_id = codes.uuid WHERE substances.uuid = ?; """ connection = Inxight_Drugs_DataSupply.get_db() cur3 = connection.execute(query3, (uuid, )) for row in cur3.fetchall(): reference = row['comments'] url = row['url'] source = row['codeSystem'] + '@' + SOURCE if reference is None: reference = url if (url): substance.attributes.append( Attribute(name='code', value=reference, type='reference', source=source, url=url, provided_by=self.info.name))
def __init__(self): self.node_attributes = { "iri": str, "description": str, "all_categories": list, "all_names": list, "equivalent_curies": list, "publications": list } self.edge_attributes = { "publications": list, "publications_info": dict, "kg2_ids": list, "knowledge_source": list } self.attribute_shells = { "iri": Attribute(attribute_type_id="biolink:IriType", value_type_id="metatype:Uri"), "description": Attribute(attribute_type_id="biolink:description", value_type_id="metatype:String"), "all_categories": Attribute( attribute_type_id="biolink:category", value_type_id="metatype:Uriorcurie", description= "Categories of all nodes in this synonym set in RTX-KG2."), "all_names": Attribute( attribute_type_id="biolink:synonym", value_type_id="metatype:String", description="Names of all nodes in this synonym set in RTX-KG2." ), "equivalent_curies": Attribute( attribute_type_id="biolink:xref", value_type_id="metatype:Nodeidentifier", description= "Identifiers of all nodes in this synonym set in RTX-KG2."), "publications": Attribute(attribute_type_id="biolink:publications", value_type_id="biolink:Uriorcurie"), "publications_info": Attribute(attribute_type_id="bts:sentence", value_type_id=None), "kg2_ids": Attribute( attribute_type_id="biolink:original_edge_information", value_type_id="metatype:String", description= "The original RTX-KG2pre edge(s) corresponding to this edge prior to any " "synonymization or remapping. Listed in " "(subject)--(relation)--(object)--(source) format.") } self.array_delimiter_char = "ǂ" self.kg2_infores_curie = "infores:rtx-kg2" # Can't use expand_utilities.py here due to circular imports
def add_attribute(self, connection, name, value): attribute = Attribute( name = name, value = str(value), type = name, source = self.info.label, provided_by = self.info.name ) connection.attributes.append(attribute)
def produce(self, controls): compound_list = [] names = controls['compounds'].split(';') for name in names: name = name.strip() for compound in self.find_compound(name): compound.attributes.append(Attribute(name='query name', value=name,source=SOURCE,provided_by=self.info.name)) compound_list.append(compound) return compound_list
def add_reference(self, connection, pmid): attribute = Attribute( name = 'reference', value = 'PMID:'+(pmid), type = 'reference', source = self.info.label, url = 'https://pubmed.ncbi.nlm.nih.gov/'+str(pmid), provided_by = self.info.name ) connection.attributes.append(attribute)
def get_computed_value_attribute() -> Attribute: arax_infores_curie = "infores:arax" return Attribute( attribute_type_id="biolink:computed_value", value=True, value_type_id="metatype:Boolean", attribute_source=arax_infores_curie, description= "This edge is a container for a computed value between two nodes that is not " "directly attachable to other edges.")
def get_kp_source_attribute(kp_name: str, arax_kp: bool = False, description: Optional[str] = None) -> Attribute: if not arax_kp and not description: description = f"ARAX inserted this attribute because the KP ({kp_name}) did not seem to provide such " \ f"an attribute (indicating that this edge came from them)." return Attribute(attribute_type_id="biolink:knowledge_source", value=kp_name, value_type_id="biolink:InformationResource", description=description, attribute_source="infores:arax")
def produce(self, controls): compound_list = [] names = controls['compounds'].split(';') for name in names: name = name.strip() for drug in find_drug(name): compound_info = get_drug(drug) compound_info.source = self.info.name compound_info.attributes = [Attribute(name='query name', value=name,source=self.info.name)] compound_list.append(compound_info) return compound_list
def get_element_attributes (self,row,compound): attributes_list= ['LIGAND_ID','TYPE', 'APPROVED', 'WITHDRAWN', 'LABELLED', 'RADIOACTIVE'] for attribute in attributes_list: if row[attribute] is not None and row[attribute] != '': compound.attributes.append(Attribute( name= attribute, value= row[attribute], provided_by=self.info.name, type= attribute, source= SOURCE ) )
def add_references(transformer, connection, ref_table, id_column, ref_id): for reference in get_refs(ref_table, id_column, ref_id): connection.attributes.append( Attribute( name=reference['ref_type'], value=add_ref_prefix(reference['ref_type'], reference['ref_id']), type='publication', source=SOURCE, url=reference['ref_url'], provided_by=transformer.info.name ) )
def get_attributes (self,row,compound): attributes_list = ['molecular_weight'] for attribute in attributes_list: if row[attribute] is not None and row[attribute] != '': compound.attributes.append(Attribute( name=attribute, value=str(row[attribute]), provided_by=self.info.name, type=attribute, source=self.info.label ) )
def pull_attributes(self, node): """ build a list of attributes from the values in the object """ attribute_list = [] if node is not None and 'attributes' in node and node.get('attributes') is not None: for attribute in node.get('attributes'): # print("got attribute: {}".format(attribute)) if 'type' in attribute and attribute.get('type') is not None and attribute.get('type') != '': # print("===========adding attribute: {}".format(attribute)) node_attribute = Attribute(name=attribute.get('name'), value=attribute.get('value'), type=attribute.get('type'), url=attribute.get('url'), source=attribute.get('source')) attribute_list.append(node_attribute) return attribute_list