Beispiel #1
0
def insertDrugs(uri, keyspace, num_dr, num_threads, ctn, session):
    file = '../biograkn-covid/Dataset/DGIdb/dgidb_drugs.tsv'
    print('  ')
    print('Opening DGIdb...')
    print('  ')
    raw_file = openFile(file, num_dr)
    drugs = []
    for i in raw_file[:num_dr]:
        data = {}
        data['drug-claim-name'] = i[0].strip('"')
        data['drug-name'] = i[1].strip('"')
        data['chembl-id'] = i[2]
        data['drug-claim-source'] = i[3]
        drugs.append(data)

    counter = 0
    drugs_list = drugs
    batches = []
    batches2 = []

    tx = session.transaction().write()
    pool = ThreadPool(num_threads)
    for d in drugs_list:
        counter = counter + 1
        graql = f'''insert $d isa drug, has drug-claim-name "{d['drug-claim-name']}", has drug-name "{d['drug-name']}", has chembl-id "{d['chembl-id']}", has drug-claim-source "{d['drug-claim-source']}";'''
        batches.append(graql)
        del graql
        if counter % ctn == 0:
            batches2.append(batches)
            batches = []
    batches2.append(batches)
    pool.map(partial(batch_job, session), batches2)
    pool.close()
    pool.join()
    print('Drugs committed!')
Beispiel #2
0
def filterHomoSapiens(num_path):
	file = 'Dataset/Reactome/UniProt2Reactome_All_Levels.tsv'
	print('  ')
	print('Opening Reactome...')
	print('  ')
	raw_file = openFile(file, num_path)
	pathway_associations = []
	for i in raw_file[:num_path]:
		if i[5] == "H**o sapiens":
			data = {}
			data['uniprot-id'] = i[0].strip('"')
			data['pathway-id'] = i[1].strip('"')
			data['pathway-name'] = i[3]
			data['organism'] = i[5]
			pathway_associations.append(data)
	return pathway_associations
Beispiel #3
0
def insertInteractions(uri, database, num_int, num_threads, ctn, session):
    batches_pr = []
    ssl._create_default_https_context = ssl._create_unverified_context
    url = "https://www.dgidb.org/data/monthly_tsvs/2021-Jan/interactions.tsv"
    wget.download(url, 'Dataset/DGIdb/')

    file = 'Dataset/DGIdb/interactions.tsv'
    print('  ')
    print('Opening DGIdb-Interactions...')
    print('  ')
    raw_file = openFile(file, num_int)

    interactions = []
    for i in raw_file[:num_int]:
        data = {}
        data['gene-name'] = i[0]
        data['entrez-id'] = i[2]
        data['interaction-type'] = i[4]
        data['drug-claim-name'] = i[5]
        data['drug-name'] = i[7]
        data['chembl-id'] = i[8]
        interactions.append(data)
    os.remove('Dataset/DGIdb/interactions.tsv')
    counter = 0
    pool = ThreadPool(num_threads)
    batches = []
    for q in interactions:
        if q['entrez-id'] is not "":
            counter = counter + 1
            graql = f"""match $g isa gene, has entrez-id "{q['entrez-id']}"; $d isa drug, has drug-claim-name "{q['drug-claim-name']}";"""
            # TODO Insert interaction type as a role
            if q['interaction-type'] == "":
                graql = graql + f"insert $r (target-gene: $g, interacting-drug: $d) isa drug-gene-interaction;"
            else:
                graql = graql + f"""insert $r (target-gene: $g, interacting-drug: $d) isa drug-gene-interaction, has interaction-type "{q['interaction-type']}";"""
            batches.append(graql)
            del graql
            if counter % ctn == 0:
                batches_pr.append(batches)
                batches = []
    batches_pr.append(batches)
    pool.map(partial(batch_job, session), batches_pr)
    pool.close()
    pool.join()
    print('.....')
    print('Finished migrating Drug Interactions.')
    print('.....')
Beispiel #4
0
def filterHomoSapiens(num_path):
    url = "https://reactome.org/download/current/UniProt2Reactome_All_Levels.txt"
    wget.download(url, 'Dataset/Reactome/')
    file = 'Dataset/Reactome/UniProt2Reactome_All_Levels.txt'
    print('  ')
    print('Opening Reactome...')
    print('  ')
    raw_file = openFile(file, num_path)
    pathway_associations = []
    for i in raw_file[:num_path]:
        if i[5] == "H**o sapiens":
            data = {}
            data['uniprot-id'] = i[0].strip('"')
            data['pathway-id'] = i[1].strip('"')
            data['pathway-name'] = i[3]
            data['organism'] = i[5]
            pathway_associations.append(data)
    return pathway_associations
Beispiel #5
0
def insertInteractions(uri, keyspace, num_int, num_threads, ctn, session):
    batches_pr = []

    file = '../biograkn-covid/Dataset/DGIdb/dgidb_interactions.tsv'
    print('  ')
    print('Opening DGIdb-Interactions...')
    print('  ')
    raw_file = openFile(file, num_int)

    interactions = []
    for i in raw_file[:num_int]:
        data = {}
        data['gene-name'] = i[0]
        data['entrez-id'] = i[2]
        data['interaction-type'] = i[4]
        data['drug-claim-name'] = i[5]
        data['drug-name'] = i[7]
        data['chembl-id'] = i[8]
        interactions.append(data)

    counter = 0
    pool = ThreadPool(num_threads)
    batches = []
    for q in interactions:
        if q['entrez-id'] is not "":
            counter = counter + 1
            graql = f"""match $g isa gene, has entrez-id "{q['entrez-id']}"; $d isa drug, has drug-claim-name "{q['drug-claim-name']}";"""
            # TODO Insert interaction type as a role
            if q['interaction-type'] == "":
                graql = graql + f"insert $r (target-gene: $g, interacting-drug: $d) isa drug-gene-interaction;"
            else:
                graql = graql + f"""insert $r (target-gene: $g, interacting-drug: $d) isa drug-gene-interaction, has interaction-type "{q['interaction-type']}";"""
            batches.append(graql)
            del graql
            if counter % ctn == 0:
                batches_pr.append(batches)
                batches = []
    batches_pr.append(batches)
    pool.map(partial(batch_job, session), batches_pr)
    pool.close()
    pool.join()
    print('.....')
    print('Finished migrating Drug Interactions.')
    print('.....')
def insert_interactions(session, num_int, num_threads, batch_size):
    print('  Downloading drug-gene interactions dataset')
    ssl._create_default_https_context = ssl._create_unverified_context
    url = "https://www.dgidb.org/data/monthly_tsvs/2021-Jan/interactions.tsv"
    wget.download(url, 'Dataset/DGIdb/')
    print('  Finished downloading')
    file = 'Dataset/DGIdb/interactions.tsv'
    raw_file = openFile(file, num_int)

    interactions = []
    for i in raw_file[:num_int]:
        data = {}
        data['gene-name'] = i[0]
        data['entrez-id'] = i[2]
        data['interaction-type'] = i[4]
        data['drug-claim-name'] = i[5]
        data['drug-name'] = i[7]
        data['chembl-id'] = i[8]
        interactions.append(data)
    os.remove('Dataset/DGIdb/interactions.tsv')
    print('  Starting with drug-gene interactions.')
    batches = []
    batch = []
    total = 0
    for q in interactions:
        if q['entrez-id'] != "":
            typeql = f"""match $g isa gene, has entrez-id "{q['entrez-id']}"; $d isa drug, has drug-claim-name "{q['drug-claim-name']}";"""
            # TODO Insert interaction type as a role
            if q['interaction-type'] == "":
                typeql = typeql + f"insert $r (target-gene: $g, interacting-drug: $d) isa drug-gene-interaction;"
            else:
                typeql = typeql + f"""insert $r (target-gene: $g, interacting-drug: $d) isa drug-gene-interaction, has interaction-type "{q['interaction-type']}";"""
            batch.append(typeql)
            total += 1
            if len(batch) == batch_size:
                batches.append(batch)
                batch = []
    batches.append(batch)
    pool = ThreadPool(num_threads)
    pool.map(partial(write_batch, session), batches)
    pool.close()
    pool.join()
    print(f'  Finished drug-gene interactions. ({total} entries) ')
Beispiel #7
0
def insertDrugs(uri, database, num_dr, num_threads, ctn, session):

    #from Migrators.Helpers.get_file import get_file
    get_file("https://www.dgidb.org/data/monthly_tsvs/2021-Jan/drugs.tsv",
             "Dataset/DGIdb/")
    file = 'Dataset/DGIdb/drugs.tsv'

    print('  ')
    print('Opening DGIdb...')
    print('  ')
    raw_file = openFile(file, num_dr)
    drugs = []
    for i in raw_file[:num_dr]:
        data = {}
        data['drug-claim-name'] = i[0].strip('"')
        data['drug-name'] = i[1].strip('"')
        data['chembl-id'] = i[2]
        data['drug-claim-source'] = i[3]
        drugs.append(data)
    os.remove('Dataset/DGIdb/drugs.tsv')
    counter = 0
    drugs_list = drugs
    batches = []
    batches2 = []

    pool = ThreadPool(num_threads)
    for d in drugs_list:
        counter = counter + 1
        graql = f'''insert $d isa drug, has drug-claim-name "{d['drug-claim-name']}", has drug-name "{d['drug-name']}", has chembl-id "{d['chembl-id']}", has drug-claim-source "{d['drug-claim-source']}";'''
        batches.append(graql)
        del graql
        if counter % ctn == 0:
            batches2.append(batches)
            batches = []
    batches2.append(batches)
    pool.map(partial(batch_job, session), batches2)
    pool.close()
    pool.join()
    print('Drugs committed!')
def insert_drugs(session, num_dr, num_threads, batch_size):
    # from Migrators.Helpers.get_file import get_file
    print('  Downloading dataset')
    get_file("https://www.dgidb.org/data/monthly_tsvs/2021-Jan/drugs.tsv",
             "Dataset/DGIdb/")
    print('  Finished downloading')
    file = 'Dataset/DGIdb/drugs.tsv'

    raw_file = openFile(file, num_dr)
    drugs = []
    for i in raw_file[:num_dr]:
        data = {}
        data['drug-claim-name'] = i[0].strip('"')
        data['drug-name'] = i[1].strip('"')
        data['chembl-id'] = i[2]
        data['drug-claim-source'] = i[3]
        drugs.append(data)
    os.remove('Dataset/DGIdb/drugs.tsv')
    drugs_list = drugs
    print('  Starting with drugs.')
    batch = []
    batches = []
    total = 0
    for d in drugs_list:
        typeql = f'''insert $d isa drug, has drug-claim-name "{d['drug-claim-name']}", has drug-name "{d['drug-name']}", has chembl-id "{d['chembl-id']}", has drug-claim-source "{d['drug-claim-source']}";'''
        batch.append(typeql)
        total += 1
        if len(batch) == batch_size:
            batches.append(batch)
            batch = []
    batches.append(batch)
    pool = ThreadPool(num_threads)
    pool.map(partial(write_batch, session), batches)
    pool.close()
    pool.join()
    print(f'  Drugs inserted! ({total} entries)')