def insertDrugs(uri, keyspace, num_dr, num_threads, ctn, session): file = '../biograkn-covid/Dataset/DGIdb/dgidb_drugs.tsv' print(' ') print('Opening DGIdb...') print(' ') raw_file = openFile(file, num_dr) drugs = [] for i in raw_file[:num_dr]: data = {} data['drug-claim-name'] = i[0].strip('"') data['drug-name'] = i[1].strip('"') data['chembl-id'] = i[2] data['drug-claim-source'] = i[3] drugs.append(data) counter = 0 drugs_list = drugs batches = [] batches2 = [] tx = session.transaction().write() pool = ThreadPool(num_threads) for d in drugs_list: counter = counter + 1 graql = f'''insert $d isa drug, has drug-claim-name "{d['drug-claim-name']}", has drug-name "{d['drug-name']}", has chembl-id "{d['chembl-id']}", has drug-claim-source "{d['drug-claim-source']}";''' batches.append(graql) del graql if counter % ctn == 0: batches2.append(batches) batches = [] batches2.append(batches) pool.map(partial(batch_job, session), batches2) pool.close() pool.join() print('Drugs committed!')
def filterHomoSapiens(num_path): file = 'Dataset/Reactome/UniProt2Reactome_All_Levels.tsv' print(' ') print('Opening Reactome...') print(' ') raw_file = openFile(file, num_path) pathway_associations = [] for i in raw_file[:num_path]: if i[5] == "H**o sapiens": data = {} data['uniprot-id'] = i[0].strip('"') data['pathway-id'] = i[1].strip('"') data['pathway-name'] = i[3] data['organism'] = i[5] pathway_associations.append(data) return pathway_associations
def insertInteractions(uri, database, num_int, num_threads, ctn, session): batches_pr = [] ssl._create_default_https_context = ssl._create_unverified_context url = "https://www.dgidb.org/data/monthly_tsvs/2021-Jan/interactions.tsv" wget.download(url, 'Dataset/DGIdb/') file = 'Dataset/DGIdb/interactions.tsv' print(' ') print('Opening DGIdb-Interactions...') print(' ') raw_file = openFile(file, num_int) interactions = [] for i in raw_file[:num_int]: data = {} data['gene-name'] = i[0] data['entrez-id'] = i[2] data['interaction-type'] = i[4] data['drug-claim-name'] = i[5] data['drug-name'] = i[7] data['chembl-id'] = i[8] interactions.append(data) os.remove('Dataset/DGIdb/interactions.tsv') counter = 0 pool = ThreadPool(num_threads) batches = [] for q in interactions: if q['entrez-id'] is not "": counter = counter + 1 graql = f"""match $g isa gene, has entrez-id "{q['entrez-id']}"; $d isa drug, has drug-claim-name "{q['drug-claim-name']}";""" # TODO Insert interaction type as a role if q['interaction-type'] == "": graql = graql + f"insert $r (target-gene: $g, interacting-drug: $d) isa drug-gene-interaction;" else: graql = graql + f"""insert $r (target-gene: $g, interacting-drug: $d) isa drug-gene-interaction, has interaction-type "{q['interaction-type']}";""" batches.append(graql) del graql if counter % ctn == 0: batches_pr.append(batches) batches = [] batches_pr.append(batches) pool.map(partial(batch_job, session), batches_pr) pool.close() pool.join() print('.....') print('Finished migrating Drug Interactions.') print('.....')
def filterHomoSapiens(num_path): url = "https://reactome.org/download/current/UniProt2Reactome_All_Levels.txt" wget.download(url, 'Dataset/Reactome/') file = 'Dataset/Reactome/UniProt2Reactome_All_Levels.txt' print(' ') print('Opening Reactome...') print(' ') raw_file = openFile(file, num_path) pathway_associations = [] for i in raw_file[:num_path]: if i[5] == "H**o sapiens": data = {} data['uniprot-id'] = i[0].strip('"') data['pathway-id'] = i[1].strip('"') data['pathway-name'] = i[3] data['organism'] = i[5] pathway_associations.append(data) return pathway_associations
def insertInteractions(uri, keyspace, num_int, num_threads, ctn, session): batches_pr = [] file = '../biograkn-covid/Dataset/DGIdb/dgidb_interactions.tsv' print(' ') print('Opening DGIdb-Interactions...') print(' ') raw_file = openFile(file, num_int) interactions = [] for i in raw_file[:num_int]: data = {} data['gene-name'] = i[0] data['entrez-id'] = i[2] data['interaction-type'] = i[4] data['drug-claim-name'] = i[5] data['drug-name'] = i[7] data['chembl-id'] = i[8] interactions.append(data) counter = 0 pool = ThreadPool(num_threads) batches = [] for q in interactions: if q['entrez-id'] is not "": counter = counter + 1 graql = f"""match $g isa gene, has entrez-id "{q['entrez-id']}"; $d isa drug, has drug-claim-name "{q['drug-claim-name']}";""" # TODO Insert interaction type as a role if q['interaction-type'] == "": graql = graql + f"insert $r (target-gene: $g, interacting-drug: $d) isa drug-gene-interaction;" else: graql = graql + f"""insert $r (target-gene: $g, interacting-drug: $d) isa drug-gene-interaction, has interaction-type "{q['interaction-type']}";""" batches.append(graql) del graql if counter % ctn == 0: batches_pr.append(batches) batches = [] batches_pr.append(batches) pool.map(partial(batch_job, session), batches_pr) pool.close() pool.join() print('.....') print('Finished migrating Drug Interactions.') print('.....')
def insert_interactions(session, num_int, num_threads, batch_size): print(' Downloading drug-gene interactions dataset') ssl._create_default_https_context = ssl._create_unverified_context url = "https://www.dgidb.org/data/monthly_tsvs/2021-Jan/interactions.tsv" wget.download(url, 'Dataset/DGIdb/') print(' Finished downloading') file = 'Dataset/DGIdb/interactions.tsv' raw_file = openFile(file, num_int) interactions = [] for i in raw_file[:num_int]: data = {} data['gene-name'] = i[0] data['entrez-id'] = i[2] data['interaction-type'] = i[4] data['drug-claim-name'] = i[5] data['drug-name'] = i[7] data['chembl-id'] = i[8] interactions.append(data) os.remove('Dataset/DGIdb/interactions.tsv') print(' Starting with drug-gene interactions.') batches = [] batch = [] total = 0 for q in interactions: if q['entrez-id'] != "": typeql = f"""match $g isa gene, has entrez-id "{q['entrez-id']}"; $d isa drug, has drug-claim-name "{q['drug-claim-name']}";""" # TODO Insert interaction type as a role if q['interaction-type'] == "": typeql = typeql + f"insert $r (target-gene: $g, interacting-drug: $d) isa drug-gene-interaction;" else: typeql = typeql + f"""insert $r (target-gene: $g, interacting-drug: $d) isa drug-gene-interaction, has interaction-type "{q['interaction-type']}";""" batch.append(typeql) total += 1 if len(batch) == batch_size: batches.append(batch) batch = [] batches.append(batch) pool = ThreadPool(num_threads) pool.map(partial(write_batch, session), batches) pool.close() pool.join() print(f' Finished drug-gene interactions. ({total} entries) ')
def insertDrugs(uri, database, num_dr, num_threads, ctn, session): #from Migrators.Helpers.get_file import get_file get_file("https://www.dgidb.org/data/monthly_tsvs/2021-Jan/drugs.tsv", "Dataset/DGIdb/") file = 'Dataset/DGIdb/drugs.tsv' print(' ') print('Opening DGIdb...') print(' ') raw_file = openFile(file, num_dr) drugs = [] for i in raw_file[:num_dr]: data = {} data['drug-claim-name'] = i[0].strip('"') data['drug-name'] = i[1].strip('"') data['chembl-id'] = i[2] data['drug-claim-source'] = i[3] drugs.append(data) os.remove('Dataset/DGIdb/drugs.tsv') counter = 0 drugs_list = drugs batches = [] batches2 = [] pool = ThreadPool(num_threads) for d in drugs_list: counter = counter + 1 graql = f'''insert $d isa drug, has drug-claim-name "{d['drug-claim-name']}", has drug-name "{d['drug-name']}", has chembl-id "{d['chembl-id']}", has drug-claim-source "{d['drug-claim-source']}";''' batches.append(graql) del graql if counter % ctn == 0: batches2.append(batches) batches = [] batches2.append(batches) pool.map(partial(batch_job, session), batches2) pool.close() pool.join() print('Drugs committed!')
def insert_drugs(session, num_dr, num_threads, batch_size): # from Migrators.Helpers.get_file import get_file print(' Downloading dataset') get_file("https://www.dgidb.org/data/monthly_tsvs/2021-Jan/drugs.tsv", "Dataset/DGIdb/") print(' Finished downloading') file = 'Dataset/DGIdb/drugs.tsv' raw_file = openFile(file, num_dr) drugs = [] for i in raw_file[:num_dr]: data = {} data['drug-claim-name'] = i[0].strip('"') data['drug-name'] = i[1].strip('"') data['chembl-id'] = i[2] data['drug-claim-source'] = i[3] drugs.append(data) os.remove('Dataset/DGIdb/drugs.tsv') drugs_list = drugs print(' Starting with drugs.') batch = [] batches = [] total = 0 for d in drugs_list: typeql = f'''insert $d isa drug, has drug-claim-name "{d['drug-claim-name']}", has drug-name "{d['drug-name']}", has chembl-id "{d['chembl-id']}", has drug-claim-source "{d['drug-claim-source']}";''' batch.append(typeql) total += 1 if len(batch) == batch_size: batches.append(batch) batch = [] batches.append(batch) pool = ThreadPool(num_threads) pool.map(partial(write_batch, session), batches) pool.close() pool.join() print(f' Drugs inserted! ({total} entries)')