Esempio n. 1
0
def save_network(network: Network, config: Dict):
    output_path = config['output-path']
    # Save nodes
    node_import_files = []
    for label in network.node_labels():
        file_name = 'nodes_%s.csv' % label.replace(';', '_')
        nodes = set(network.get_nodes_by_label(label))
        if len(nodes) > 0:
            node_import_files.append(file_name)
            with io.open(os.path.join(output_path, file_name),
                         'w',
                         encoding='utf-8',
                         newline='') as f:
                writer = csv.writer(f, delimiter=',', quotechar='"')
                all_attribute_keys = set()
                for n in nodes:
                    all_attribute_keys.update(n.attributes.keys())
                all_attribute_keys = sorted(all_attribute_keys)
                writer.writerow([
                    'label_id:ID(Node-ID)', '_id:string', 'ids:string[]',
                    'names:string[]'
                ] + ['%s:string' % x for x in all_attribute_keys] + [':LABEL'])
                for n in nodes:
                    row = [
                        n.label_id, n.id, ';'.join(n.ids), ';'.join(n.names)
                    ]
                    for key in all_attribute_keys:
                        row.append(n.attributes[key] if key in
                                   n.attributes else None)
                    row.append(n.label)
                    writer.writerow(row)

    edge_metadata = {
        'HAS_MOLECULAR_FUNCTION': [['source:string', 'pmid:string'],
                                   ['source',
                                    'pmid']],  # pmid int now not string
        'BELONGS_TO_BIOLOGICAL_PROCESS': [['source:string', 'pmid:string'],
                                          ['source', 'pmid']],
        'IN_CELLULAR_COMPONENT': [['source:string', 'pmid:string'],
                                  ['source', 'pmid']],
        'INDICATES': [['source:string'], ['source']],
        'REGULATES': [['source:string', 'pmid:string'], ['source', 'pmid']],
        'TRANSCRIBES': [['source:string'], ['source']],
        'CONTRAINDICATES': [['source:string'], ['source']],
        'INDUCES': [['source:string'], ['source']],
        'CODES': [['source:string', 'pmid:int'], ['source', 'pmid']],
        'EQTL': [[
            'source:string', 'pvalue:string', 'snp_chr:string',
            'cis_trans:string'
        ], ['source', 'pvalue', 'snp_chr', 'cis_trans']],
        'INTERACTS': [['source:string', 'description:string'],
                      ['source', 'description']],
        'TARGETS': [[
            'source:string', 'known_action:boolean', 'actions:string[]',
            'simplified_action:string'
        ],
                    [
                        'source', lambda attr:
                        ('true' if attr['known_action'] else 'false')
                        if 'known_action' in attr else None,
                        lambda attr: ';'.join(attr['actions']),
                        'simplified_action'
                    ]],
        'ASSOCIATES_WITH':
        [['source:string', 'num_pmids:int', 'num_snps:int', 'score:string'],
         ['source', 'num_pmids', 'num_snps', 'score']],
        'HAS_ADR': [['source:string'], ['source']],
        'ASSOCIATED_WITH_ADR': [['source:string'], ['source']]
    }

    # Save relationships
    for x in edge_metadata:
        with io.open(os.path.join(output_path, 'rel_%s.csv' % x),
                     'w',
                     encoding='utf-8',
                     newline='') as f:
            writer = csv.writer(f, delimiter=',', quotechar='"')
            writer.writerow([':START_ID(Node-ID)'] + edge_metadata[x][0] +
                            [':END_ID(Node-ID)', ':TYPE'])
            for e in network.get_edges_by_label(x):
                values = []
                for l in edge_metadata[x][1]:
                    if isinstance(l, type(lambda: 0)):
                        values.append(l(e.attributes))
                    else:
                        values.append(e.attributes[l] if l in
                                      e.attributes else None)
                source_id = network.get_node_by_id(
                    e.source_node_id, e.source_node_label).label_id
                target_id = network.get_node_by_id(
                    e.target_node_id, e.target_node_label).label_id
                writer.writerow([source_id] + values + [target_id, e.label])

    with io.open(os.path.join(output_path, 'create_indices.cypher'),
                 'w',
                 encoding='utf-8',
                 newline='') as f:
        unique_labels = set()
        for node_label in network.node_labels():
            unique_labels.update(set(node_label.split(';')))
        for node_label in unique_labels:
            f.write('create constraint on (p:%s) assert p._id is unique;\n' %
                    node_label)
    with io.open(os.path.join(output_path, 'import_admin.bat'),
                 'w',
                 encoding='utf-8',
                 newline='') as f:
        f.write('@echo off\n')
        f.write('net stop neo4j\n')
        f.write('rmdir /s "%s"\n' %
                os.path.join(config['Neo4j']['database-path'],
                             config['Neo4j']['database-name']))
        f.write('CALL ' +
                os.path.join(config['Neo4j']['bin-path'], 'neo4j-admin'))
        f.write(' import ' +
                '--database %s ' % config['Neo4j']['database-name'] +
                ' '.join(['--nodes %s' % x
                          for x in node_import_files]) + ' ' + ' '.join([
                              '--relationships rel_%s.csv' % x
                              for x in network.edge_labels()
                          ]) + ' > import.log\n')
        f.write('net start neo4j\n')
        f.write(os.path.join(config['Neo4j']['bin-path'], 'cypher-shell'))
        f.write(
            ' -u %s -p %s --non-interactive < create_indices.cypher 1>> import.log 2>&1\n'
            % (config['Neo4j']['user'], config['Neo4j']['password']))
    with io.open(os.path.join(output_path, 'import_admin.sh'),
                 'w',
                 encoding='utf-8',
                 newline='') as f:
        f.write(os.path.join(config['Neo4j']['bin-path'], 'neo4j-admin'))
        f.write(' import ' +
                '--database %s ' % config['Neo4j']['database-name'] +
                ' '.join(['--nodes %s' % x
                          for x in node_import_files]) + ' ' + ' '.join([
                              '--relationships rel_%s.csv' % x
                              for x in network.edge_labels()
                          ]) + ' > import.log\n')
Esempio n. 2
0
    from_id = association.find('from_code').text
    from_name = association.find('from_name').text
    to_namespace = association.find('to_namespace').text
    to_id = association.find('to_code').text
    to_name = association.find('to_name').text
    if from_namespace != 'RxNorm' or to_namespace != 'MeSH':
        continue
    if association_type not in ['induces', 'CI_with', 'may_treat']:
        continue
    drug_id = 'RxNorm:%s' % from_id
    if from_id not in added_rxnorm_drugs:
        drug = Drug([drug_id], [from_name])
        network.add_node(drug)
        added_rxnorm_drugs.add(from_id)
    else:
        drug = network.get_node_by_id(drug_id, 'Drug')
    disease_id = 'MeSH:%s' % to_id
    if to_id not in added_mesh_diseases:
        disease = Disease([disease_id], [to_name])
        network.add_node(disease)
        added_mesh_diseases.add(to_id)
    else:
        disease = network.get_node_by_id(disease_id, 'Disease')
    rel = {'source': 'MEDRT'}
    if association_type == 'induces':
        network.add_edge(Edge(drug, disease, 'INDUCES', rel))
    elif association_type == 'CI_with':
        network.add_edge(Edge(drug, disease, 'CONTRAINDICATES', rel))
    elif association_type == 'may_treat':
        network.add_edge(Edge(drug, disease, 'INDICATES', rel))
Esempio n. 3
0
            drug_count += 1
            indications = {
                x.target_node_id
                for x in network.get_node_edges_by_label(drug, 'INDICATES')
            }
            contraindications = {
                x.target_node_id
                for x in network.get_node_edges_by_label(
                    drug, 'CONTRAINDICATES')
            }
            if not indications.isdisjoint(contraindications):
                drug_check_failed_count += 1

                for intersection in indications.intersection(
                        contraindications):
                    disease = network.get_node_by_id(intersection, 'Disease')
                    drug_text = '%s<br/>%s' % (node_ids_to_links(
                        drug.ids), '<br/>'.join(drug.names))
                    indications_text = '<br/>'.join([
                        '%s: %s -> %s' %
                        (x.attributes['source'],
                         node_ids_to_links([x.source_node_id]),
                         node_ids_to_links([x.target_node_id]))
                        for x in network.get_edges_from_to(
                            drug, disease, 'INDICATES')
                    ])
                    contraindications_text = '<br/>'.join([
                        '%s: %s -> %s' %
                        (x.attributes['source'],
                         node_ids_to_links([x.source_node_id]),
                         node_ids_to_links([x.target_node_id]))
Esempio n. 4
0
    id_node = owl_class.find(obo_in_owl_ns + 'id')
    obo_ns_node = owl_class.find(obo_in_owl_ns + 'hasOBONamespace')
    label_node = owl_class.find(rdfs_ns + 'label')
    if id_node is not None and obo_ns_node is not None:
        go_class = GOClass([id_node.text], [label_node.text])
        network.add_node(go_class)
        go_class_ns_lookup[id_node.text] = obo_ns_node.text
        for alternative_id_node in owl_class.findall(obo_in_owl_ns + 'hasAlternativeId'):
            go_class_redirects[alternative_id_node.text] = id_node.text

with io.open(annotations_file, 'r', encoding='utf-8', newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    for row in reader:
        if not row[0][0].startswith('!') and row[12] == 'taxon:9606':
            gene = Gene(['UniProtKB:%s' % row[1], 'HGNC:%s' % row[2]], [])
            network.add_node(gene)
            if row[4] not in go_class_ns_lookup:
                # print('[WARN] GO id %s is obsolete, redirecting to %s' % (row[4], go_class_redirects[row[4]]))
                row[4] = go_class_redirects[row[4]]
            label = go_class_ns_lookup[row[4]].upper()
            if label == 'MOLECULAR_FUNCTION':
                label = 'HAS_' + label
            elif label == 'BIOLOGICAL_PROCESS':
                label = 'BELONGS_TO_' + label
            elif label == 'CELLULAR_COMPONENT':
                label = 'IN_' + label
            e = Edge(gene, network.get_node_by_id(row[4], 'GOClass'), label, {'source': 'GO,%s' % row[5]})
            network.add_edge(e)

network.save('../data/GO/graph.json')
Esempio n. 5
0
        ids = ['DrugCentral:%s' % row[0], 'DrugBank:%s' % row[1]]
        if row[2]:
            ids.append('RxNorm:%s' % row[2])
        network.add_node(Drug(ids, [row[3]]))

with io.open('../data/DrugCentral/drugcentral_indications.csv',
             'r',
             encoding='utf-8',
             newline='') as f:
    reader = csv.reader(f, delimiter=',', quotechar='"')
    next(reader, None)
    for row in reader:
        disease = Disease(['SnoMedCT:%s' % row[2],
                           'UMLS:%s' % row[3]], [row[1]])
        network.add_node(disease)
        drug = network.get_node_by_id('DrugBank:%s' % row[0], 'Drug')
        e = Edge(drug, disease, 'INDICATES', {'source': 'DrugCentral'})
        network.add_edge(e)

with io.open('../data/DrugCentral/drugcentral_contraindications.csv',
             'r',
             encoding='utf-8',
             newline='') as f:
    reader = csv.reader(f, delimiter=',', quotechar='"')
    next(reader, None)
    for row in reader:
        disease = Disease(['SnoMedCT:%s' % row[2],
                           'UMLS:%s' % row[3]], [row[1]])
        network.add_node(disease)
        drug = network.get_node_by_id('DrugBank:%s' % row[0], 'Drug')
        e = Edge(drug, disease, 'CONTRAINDICATES', {'source': 'DrugCentral'})