def save_network(network: Network, config: Dict): output_path = config['output-path'] # Save nodes node_import_files = [] for label in network.node_labels(): file_name = 'nodes_%s.csv' % label.replace(';', '_') nodes = set(network.get_nodes_by_label(label)) if len(nodes) > 0: node_import_files.append(file_name) with io.open(os.path.join(output_path, file_name), 'w', encoding='utf-8', newline='') as f: writer = csv.writer(f, delimiter=',', quotechar='"') all_attribute_keys = set() for n in nodes: all_attribute_keys.update(n.attributes.keys()) all_attribute_keys = sorted(all_attribute_keys) writer.writerow([ 'label_id:ID(Node-ID)', '_id:string', 'ids:string[]', 'names:string[]' ] + ['%s:string' % x for x in all_attribute_keys] + [':LABEL']) for n in nodes: row = [ n.label_id, n.id, ';'.join(n.ids), ';'.join(n.names) ] for key in all_attribute_keys: row.append(n.attributes[key] if key in n.attributes else None) row.append(n.label) writer.writerow(row) edge_metadata = { 'HAS_MOLECULAR_FUNCTION': [['source:string', 'pmid:string'], ['source', 'pmid']], # pmid int now not string 'BELONGS_TO_BIOLOGICAL_PROCESS': [['source:string', 'pmid:string'], ['source', 'pmid']], 'IN_CELLULAR_COMPONENT': [['source:string', 'pmid:string'], ['source', 'pmid']], 'INDICATES': [['source:string'], ['source']], 'REGULATES': [['source:string', 'pmid:string'], ['source', 'pmid']], 'TRANSCRIBES': [['source:string'], ['source']], 'CONTRAINDICATES': [['source:string'], ['source']], 'INDUCES': [['source:string'], ['source']], 'CODES': [['source:string', 'pmid:int'], ['source', 'pmid']], 'EQTL': [[ 'source:string', 'pvalue:string', 'snp_chr:string', 'cis_trans:string' ], ['source', 'pvalue', 'snp_chr', 'cis_trans']], 'INTERACTS': [['source:string', 'description:string'], ['source', 'description']], 'TARGETS': [[ 'source:string', 'known_action:boolean', 'actions:string[]', 'simplified_action:string' ], [ 'source', lambda attr: ('true' if attr['known_action'] else 'false') if 'known_action' in attr else None, lambda attr: ';'.join(attr['actions']), 'simplified_action' ]], 'ASSOCIATES_WITH': [['source:string', 'num_pmids:int', 'num_snps:int', 'score:string'], ['source', 'num_pmids', 'num_snps', 'score']], 'HAS_ADR': [['source:string'], ['source']], 'ASSOCIATED_WITH_ADR': [['source:string'], ['source']] } # Save relationships for x in edge_metadata: with io.open(os.path.join(output_path, 'rel_%s.csv' % x), 'w', encoding='utf-8', newline='') as f: writer = csv.writer(f, delimiter=',', quotechar='"') writer.writerow([':START_ID(Node-ID)'] + edge_metadata[x][0] + [':END_ID(Node-ID)', ':TYPE']) for e in network.get_edges_by_label(x): values = [] for l in edge_metadata[x][1]: if isinstance(l, type(lambda: 0)): values.append(l(e.attributes)) else: values.append(e.attributes[l] if l in e.attributes else None) source_id = network.get_node_by_id( e.source_node_id, e.source_node_label).label_id target_id = network.get_node_by_id( e.target_node_id, e.target_node_label).label_id writer.writerow([source_id] + values + [target_id, e.label]) with io.open(os.path.join(output_path, 'create_indices.cypher'), 'w', encoding='utf-8', newline='') as f: unique_labels = set() for node_label in network.node_labels(): unique_labels.update(set(node_label.split(';'))) for node_label in unique_labels: f.write('create constraint on (p:%s) assert p._id is unique;\n' % node_label) with io.open(os.path.join(output_path, 'import_admin.bat'), 'w', encoding='utf-8', newline='') as f: f.write('@echo off\n') f.write('net stop neo4j\n') f.write('rmdir /s "%s"\n' % os.path.join(config['Neo4j']['database-path'], config['Neo4j']['database-name'])) f.write('CALL ' + os.path.join(config['Neo4j']['bin-path'], 'neo4j-admin')) f.write(' import ' + '--database %s ' % config['Neo4j']['database-name'] + ' '.join(['--nodes %s' % x for x in node_import_files]) + ' ' + ' '.join([ '--relationships rel_%s.csv' % x for x in network.edge_labels() ]) + ' > import.log\n') f.write('net start neo4j\n') f.write(os.path.join(config['Neo4j']['bin-path'], 'cypher-shell')) f.write( ' -u %s -p %s --non-interactive < create_indices.cypher 1>> import.log 2>&1\n' % (config['Neo4j']['user'], config['Neo4j']['password'])) with io.open(os.path.join(output_path, 'import_admin.sh'), 'w', encoding='utf-8', newline='') as f: f.write(os.path.join(config['Neo4j']['bin-path'], 'neo4j-admin')) f.write(' import ' + '--database %s ' % config['Neo4j']['database-name'] + ' '.join(['--nodes %s' % x for x in node_import_files]) + ' ' + ' '.join([ '--relationships rel_%s.csv' % x for x in network.edge_labels() ]) + ' > import.log\n')
from_id = association.find('from_code').text from_name = association.find('from_name').text to_namespace = association.find('to_namespace').text to_id = association.find('to_code').text to_name = association.find('to_name').text if from_namespace != 'RxNorm' or to_namespace != 'MeSH': continue if association_type not in ['induces', 'CI_with', 'may_treat']: continue drug_id = 'RxNorm:%s' % from_id if from_id not in added_rxnorm_drugs: drug = Drug([drug_id], [from_name]) network.add_node(drug) added_rxnorm_drugs.add(from_id) else: drug = network.get_node_by_id(drug_id, 'Drug') disease_id = 'MeSH:%s' % to_id if to_id not in added_mesh_diseases: disease = Disease([disease_id], [to_name]) network.add_node(disease) added_mesh_diseases.add(to_id) else: disease = network.get_node_by_id(disease_id, 'Disease') rel = {'source': 'MEDRT'} if association_type == 'induces': network.add_edge(Edge(drug, disease, 'INDUCES', rel)) elif association_type == 'CI_with': network.add_edge(Edge(drug, disease, 'CONTRAINDICATES', rel)) elif association_type == 'may_treat': network.add_edge(Edge(drug, disease, 'INDICATES', rel))
drug_count += 1 indications = { x.target_node_id for x in network.get_node_edges_by_label(drug, 'INDICATES') } contraindications = { x.target_node_id for x in network.get_node_edges_by_label( drug, 'CONTRAINDICATES') } if not indications.isdisjoint(contraindications): drug_check_failed_count += 1 for intersection in indications.intersection( contraindications): disease = network.get_node_by_id(intersection, 'Disease') drug_text = '%s<br/>%s' % (node_ids_to_links( drug.ids), '<br/>'.join(drug.names)) indications_text = '<br/>'.join([ '%s: %s -> %s' % (x.attributes['source'], node_ids_to_links([x.source_node_id]), node_ids_to_links([x.target_node_id])) for x in network.get_edges_from_to( drug, disease, 'INDICATES') ]) contraindications_text = '<br/>'.join([ '%s: %s -> %s' % (x.attributes['source'], node_ids_to_links([x.source_node_id]), node_ids_to_links([x.target_node_id]))
id_node = owl_class.find(obo_in_owl_ns + 'id') obo_ns_node = owl_class.find(obo_in_owl_ns + 'hasOBONamespace') label_node = owl_class.find(rdfs_ns + 'label') if id_node is not None and obo_ns_node is not None: go_class = GOClass([id_node.text], [label_node.text]) network.add_node(go_class) go_class_ns_lookup[id_node.text] = obo_ns_node.text for alternative_id_node in owl_class.findall(obo_in_owl_ns + 'hasAlternativeId'): go_class_redirects[alternative_id_node.text] = id_node.text with io.open(annotations_file, 'r', encoding='utf-8', newline='') as f: reader = csv.reader(f, delimiter='\t', quotechar='"') for row in reader: if not row[0][0].startswith('!') and row[12] == 'taxon:9606': gene = Gene(['UniProtKB:%s' % row[1], 'HGNC:%s' % row[2]], []) network.add_node(gene) if row[4] not in go_class_ns_lookup: # print('[WARN] GO id %s is obsolete, redirecting to %s' % (row[4], go_class_redirects[row[4]])) row[4] = go_class_redirects[row[4]] label = go_class_ns_lookup[row[4]].upper() if label == 'MOLECULAR_FUNCTION': label = 'HAS_' + label elif label == 'BIOLOGICAL_PROCESS': label = 'BELONGS_TO_' + label elif label == 'CELLULAR_COMPONENT': label = 'IN_' + label e = Edge(gene, network.get_node_by_id(row[4], 'GOClass'), label, {'source': 'GO,%s' % row[5]}) network.add_edge(e) network.save('../data/GO/graph.json')
ids = ['DrugCentral:%s' % row[0], 'DrugBank:%s' % row[1]] if row[2]: ids.append('RxNorm:%s' % row[2]) network.add_node(Drug(ids, [row[3]])) with io.open('../data/DrugCentral/drugcentral_indications.csv', 'r', encoding='utf-8', newline='') as f: reader = csv.reader(f, delimiter=',', quotechar='"') next(reader, None) for row in reader: disease = Disease(['SnoMedCT:%s' % row[2], 'UMLS:%s' % row[3]], [row[1]]) network.add_node(disease) drug = network.get_node_by_id('DrugBank:%s' % row[0], 'Drug') e = Edge(drug, disease, 'INDICATES', {'source': 'DrugCentral'}) network.add_edge(e) with io.open('../data/DrugCentral/drugcentral_contraindications.csv', 'r', encoding='utf-8', newline='') as f: reader = csv.reader(f, delimiter=',', quotechar='"') next(reader, None) for row in reader: disease = Disease(['SnoMedCT:%s' % row[2], 'UMLS:%s' % row[3]], [row[1]]) network.add_node(disease) drug = network.get_node_by_id('DrugBank:%s' % row[0], 'Drug') e = Edge(drug, disease, 'CONTRAINDICATES', {'source': 'DrugCentral'})