def go_test(val_graphs, val_ge_split, reload_fle, **kwargs): # opens session once again, if closed after training client = GraknClient(uri=URI) session = client.session(keyspace=KEYSPACE) ge_graphs, solveds_tr, solveds_ge = pipeline( graphs=val_graphs, # Run the pipeline with prepared graph tr_ge_split=val_ge_split, do_test=True, save_fle="", reload_fle=reload_fle, **kwargs) with session.transaction().write() as tx: write_predictions_to_grakn( ge_graphs, tx) # Write predictions to grakn with learned probabilities session.close() client.close() # Grakn session will be closed here due to write\insert query validation_evals = [solveds_tr, solveds_ge] return ge_graphs, validation_evals
def test_match_query(self): client = GraknClient("localhost:48555") session = client.session("define_schema") with session.transaction().read() as tx: tx.query("match $s sub thing; get;") session.close() client.close()
def test_client_session_close(self): client = GraknClient('localhost:48555') a_session = client.session('test') a_session.close() with self.assertRaises(GraknError): a_session.transaction().read() client.close()
def dgidbMigrator(uri, keyspace, num_dr, num_int, num_threads, ctn): client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) insertDrugs(uri, keyspace, num_dr, num_threads, ctn, session) insertInteractions(uri, keyspace, num_int, num_threads, ctn, session) session.close() client.close()
def diagnosis_example(num_graphs=200, num_processing_steps_tr=5, num_processing_steps_ge=5, num_training_iterations=1000, keyspace=KEYSPACE, uri=URI): """ Run the diagnosis example from start to finish, including traceably ingesting predictions back into Grakn Args: num_graphs: Number of graphs to use for training and testing combined num_processing_steps_tr: The number of message-passing steps for training num_processing_steps_ge: The number of message-passing steps for testing num_training_iterations: The number of training epochs keyspace: The name of the keyspace to retrieve example subgraphs from uri: The uri of the running Grakn instance Returns: Final accuracies for training and for testing """ tr_ge_split = int(num_graphs * 0.5) generate_example_graphs(num_graphs, keyspace=keyspace, uri=uri) client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) graphs = create_concept_graphs(list(range(num_graphs)), session) with session.transaction().read() as tx: # Change the terminology here onwards from thing -> node and role -> edge node_types = get_thing_types(tx) [node_types.remove(el) for el in TYPES_TO_IGNORE] edge_types = get_role_types(tx) [edge_types.remove(el) for el in ROLES_TO_IGNORE] print(f'Found node types: {node_types}') print(f'Found edge types: {edge_types}') ge_graphs, solveds_tr, solveds_ge = pipeline( graphs, tr_ge_split, node_types, edge_types, num_processing_steps_tr=num_processing_steps_tr, num_processing_steps_ge=num_processing_steps_ge, num_training_iterations=num_training_iterations, continuous_attributes=CONTINUOUS_ATTRIBUTES, categorical_attributes=CATEGORICAL_ATTRIBUTES, output_dir=f"./events/{time.time()}/") with session.transaction().write() as tx: write_predictions_to_grakn(ge_graphs, tx) session.close() client.close() return solveds_tr, solveds_ge
def test_define_schema(self): client = GraknClient("localhost:48555") session = client.session("define_schema") with session.transaction().write() as tx: tx.query("define person sub entity, has name; name sub attribute, datatype string;") tx.commit() session.close() client.close()
def reactomeMigrator(uri, keyspace, num_path, num_threads, ctn): client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) pathway_associations = filterHomoSapiens(num_path) insertPathways(uri, keyspace, num_threads, ctn, session, pathway_associations) insertPathwayInteractions(uri, keyspace, num_threads, ctn, session, pathway_associations) session.close() client.close()
def disgenetMigrator(uri, keyspace, num, num_threads, ctn): client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) batches_pr = [] if num is not 0: print(' ') print('Opening Disgenet dataset...') print(' ') with open( '../biograkn-covid/Dataset/Disgenet/all_gene_disease_associations.tsv', 'rt', encoding='utf-8') as csvfile: csvreader = csv.reader(csvfile, delimiter=' ') raw_file = [] n = 0 for row in csvreader: n = n + 1 if n is not 1: raw_file.append(row) disgenet = [] for i in raw_file[:num]: data = {} data['entrez-id'] = i[0].strip() data['gene-symbol'] = i[1] data['disease-id'] = i[4] data['disease-name'] = i[5] data['disgenet-score'] = float(i[9]) disgenet.append(data) insertDiseases(disgenet, session, num_threads, ctn) counter = 0 pool = ThreadPool(num_threads) batches = [] for q in disgenet: counter = counter + 1 graql = f""" match $g isa gene, has gene-symbol "{q['gene-symbol']}", has entrez-id "{q['entrez-id']}"; $d isa disease, has disease-id "{q['disease-id']}", has disease-name "{q['disease-name']}"; insert $r (associated-gene: $g, associated-disease: $d) isa gene-disease-association, has disgenet-score {q['disgenet-score']};""" batches.append(graql) del graql if counter % ctn == 0: batches_pr.append(batches) batches = [] batches_pr.append(batches) pool.map(partial(batch_job, session), batches_pr) pool.close() pool.join() print('.....') print('Finished migrating Disgenet.') print('.....') session.close() client.close()
def test_client_session_invalid_keyspace(self): client = GraknClient('localhost:48555') with self.assertRaises(TypeError): a_session = client.session(123) tx = a_session.transaction().read() # won't fail until opening a transaction inst2 = GraknClient('localhost:48555') with self.assertRaises(GraknError): a_session = inst2.session('') tx = a_session.transaction().read() # won't fail until opening a transaction client.close()
def test_client_init_invalid_uri(self): """ Test invalid URI """ with self.assertRaises(GraknError): a_inst = GraknClient('localhost:1000') a_session = a_inst.session('testkeyspace') a_session.transaction().read() with self.assertRaises(GraknError): a_inst = GraknClient('localhost:1000') with a_inst.session("test") as s: with s.transaction().read() as tx: pass a_inst.close()
def test_insert_query(self): client = GraknClient("localhost:48555") session = client.session("define_schema") with session.transaction().write() as tx: tx.query( "define person sub entity, has name; name sub attribute, value string;" ) tx.commit() with session.transaction().write() as tx: tx.query("insert $x isa person, has name \"john\";") tx.commit() session.close() client.close()
def test_client_session_valid_keyspace(self): """ Test OK uri and keyspace """ a_inst = GraknClient('localhost:48555') a_session = a_inst.session('test') self.assertIsInstance(a_session, grakn.client.Session) tx = a_session.transaction().read() tx.close() a_session.close() # test the `with` statement with a_inst.session('test') as session: self.assertIsInstance(session, grakn.client.Session) tx = session.transaction().read() tx.close() a_inst.close()
def diagnosis_example(num_graphs=200, num_processing_steps_tr=10, num_processing_steps_ge=10, num_training_iterations=1000, keyspace="diagnosis", uri="localhost:48555"): tr_ge_split = int(num_graphs * 0.5) generate_example_graphs(num_graphs, keyspace=keyspace, uri=uri) client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) graphs = create_concept_graphs(list(range(num_graphs)), session) with session.transaction().read() as tx: # Change the terminology here onwards from thing -> node and role -> edge node_types = get_thing_types(tx) edge_types = get_role_types(tx) print(f'Found node types: {node_types}') print(f'Found edge types: {edge_types}') ge_graphs, solveds_tr, solveds_ge = pipeline( graphs, tr_ge_split, node_types, edge_types, num_processing_steps_tr=num_processing_steps_tr, num_processing_steps_ge=num_processing_steps_ge, num_training_iterations=num_training_iterations, continuous_attributes=CONTINUOUS_ATTRIBUTES, categorical_attributes=CATEGORICAL_ATTRIBUTES, output_dir=f"./events/{time.time()}/") with session.transaction().write() as tx: write_predictions_to_grakn(ge_graphs, tx) session.close() client.close() return solveds_tr, solveds_ge
def generate_example_graphs(num_examples, keyspace="diagnosis", uri="localhost:48555"): client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) pmf_array = np.zeros([2, 2, 2, 2], dtype=np.float) pmf_array[0, 1, 0, 1] = 0.1 pmf_array[1, 0, 1, 0] = 0.15 pmf_array[0, 1, 1, 0] = 0.05 pmf_array[1, 0, 0, 1] = 0.34 pmf_array[1, 1, 1, 1] = 0.01 pmf_array[0, 1, 1, 1] = 0.3 pmf_array[1, 0, 1, 1] = 0.05 def normal_dist(mean, var): return lambda: round(np.random.normal(mean, var, 1)[0], 2) pmf = PMF( { 'Flu': [False, True], 'Meningitis': [False, True], 'Light Sensitivity': [False, normal_dist(0.3, 0.1)], 'Fever': [False, normal_dist(0.5, 0.2)] }, pmf_array, seed=0) print(pmf.to_dataframe()) for example_id in range(0, num_examples): tx = session.transaction().write() for query in get_example_queries(pmf, example_id): print(query) tx.query(query) tx.commit() session.close() client.close()
def test_client_tx_valid_enum(self): client = GraknClient('localhost:48555') a_session = client.session('test') tx = a_session.transaction().read() self.assertIsInstance(tx, grakn.client.Transaction) client.close()
lprint('Configure the gcloud CLI') credential_file = '/tmp/gcp-credential.json' with open(credential_file, 'w') as f: f.write(credential) sp.check_call(['gcloud', 'auth', 'activate-service-account', '--key-file', credential_file]) sp.check_call(['gcloud', 'config', 'set', 'project', project]) sp.check_call(['ssh-keygen', '-t', 'rsa', '-b', '4096', '-N', '', '-f', os.path.expanduser('~/.ssh/google_compute_engine')]) lprint('Creating a BioGrakn instance "' + instance + '"') gcloud_instances_create(instance) external_ip = sp.check_output(['gcloud', 'compute', 'instances', 'describe', instance, '--format=get(networkInterfaces[0].accessConfigs[0].natIP)', '--zone', 'europe-west1-b'])[:-1] uri = external_ip + ':48555' client = None while client is None: try: client = GraknClient(uri=uri) with client.session(keyspace="grakn") as session: with session.transaction().read() as read_transaction: answer_iterator = read_transaction.query("match $x isa thing; get;") client.close() except Exception: time.sleep(60) finally: lprint('Deleting the BioGrakn instance') gcloud_instances_delete(instance)
class ITBuildGraphFromQueriesWithRealGrakn(GraphTestCase): KEYSPACE = "it_build_graph_from_queries" SCHEMA = ("define " "person sub entity, has name, plays parent, plays child;" "name sub attribute, value string;" "parentship sub relation, relates parent, relates child;") DATA = ('insert ' '$p isa person, has name "Bob";' '$r(parent: $p, child: $p) isa parentship;') def setUp(self): self._keyspace = type(self).__name__.lower( ) # Use the name of this test class as the keyspace name print(self._keyspace) self._client = GraknClient(uri="localhost:48555") def tearDown(self): self._client.keyspaces().delete(self._keyspace) self._client.close() def test_graph_is_built_from_grakn_as_expected(self): g1 = nx.MultiDiGraph() g1.add_node('x') g2 = nx.MultiDiGraph() g2.add_node('x') g2.add_node('n') g2.add_edge('x', 'n', type='has') g3 = nx.MultiDiGraph() g3.add_node('x') g3.add_node('r') g3.add_node('y') g3.add_edge('r', 'x', type='child') g3.add_edge('r', 'y', type='parent') query_sampler_variable_graph_tuples = [ ('match $x isa person; get;', mock_sampler, g1), ('match $x isa person, has name $n; get;', mock_sampler, g2), ('match $x isa person; $r(child: $x, parent: $y); get;', mock_sampler, g3), # TODO Add functionality for loading schema at a later date # ('match $x sub person; $x sub $type; get;', g4), # ('match $x sub $y; get;', g5), ] with self._client.session(keyspace=self._keyspace) as session: with session.transaction().write() as tx: tx.query(ITBuildGraphFromQueriesWithRealGrakn.SCHEMA) tx.query(ITBuildGraphFromQueriesWithRealGrakn.DATA) tx.commit() with session.transaction().read() as tx: combined_graph = build_graph_from_queries( query_sampler_variable_graph_tuples, tx) person_exp = build_thing( next(tx.query('match $x isa person; get;')).get('x'), tx) name_exp = build_thing( next(tx.query('match $x isa name; get;')).get('x'), tx) parentship_exp = build_thing( next(tx.query('match $x isa parentship; get;')).get('x'), tx) expected_combined_graph = nx.MultiDiGraph() expected_combined_graph.add_node(person_exp, type='person') expected_combined_graph.add_node(name_exp, type='name', value_type='string', value='Bob') expected_combined_graph.add_node(parentship_exp, type='parentship') expected_combined_graph.add_edge(parentship_exp, person_exp, type='child') expected_combined_graph.add_edge(parentship_exp, person_exp, type='parent') expected_combined_graph.add_edge(person_exp, name_exp, type='has') self.assertGraphsEqual(expected_combined_graph, combined_graph)
def test_client_init_valid(self): """ Test valid URI """ a_inst = GraknClient('localhost:48555') self.assertIsInstance(a_inst, GraknClient) a_inst.close()
def test_client_tx_invalid_enum(self): client = GraknClient('localhost:48555') a_session = client.session('test') with self.assertRaises(Exception): a_session.transaction('foo') client.close()
class Test(unittest.TestCase): def setUp(self): self._client = GraknClient(uri="localhost:48555") self._session = self._client.session(keyspace=keyspace_name) with open('schemas/phone-calls-schema.gql', 'r') as schema: define_query = schema.read() with self._session.transaction().write() as transaction: transaction.query(define_query) transaction.commit() print("Loaded the " + keyspace_name + " schema") def test_csv_migration(self): migrate_csv.build_phone_call_graph(migrate_csv.Inputs, data_path, keyspace_name) self.assert_migration_results() def test_json_migration(self): migrate_json.build_phone_call_graph(migrate_json.Inputs, data_path, keyspace_name) self.assert_migration_results() def test_xml_migration(self): migrate_xml.build_phone_call_graph(migrate_xml.Inputs, data_path, keyspace_name) self.assert_migration_results() def test_queries(self): queries.process_selection(0, keyspace_name) migrate_csv.build_phone_call_graph(migrate_csv.Inputs, data_path, keyspace_name) with self._session.transaction().read() as transaction: first_actual_answer = queries.query_examples[0].get( "query_function")("", transaction) first_expected_answer = [ u"+370 351 224 5176", u"+54 398 559 0423", u"+62 107 530 7500", u"+63 815 962 6097", u"+7 690 597 4443", u"+263 498 495 0617", u"+81 308 988 7153", u"+81 746 154 2598" ] self.assertItemsEqual(first_actual_answer, first_expected_answer) second_actual_answer = queries.query_examples[1].get( "query_function")("", transaction) second_expected_answer = [ u"+351 272 414 6570", u"+30 419 575 7546", u"+1 254 875 4647", u"+86 892 682 0628", u"+33 614 339 0298", u"+351 515 605 7915", u"+86 922 760 0418", u"+63 808 497 1769", u"+86 825 153 5518", u"+48 894 777 5173", u"+27 117 258 4149", u"+86 202 257 8619" ] self.assertItemsEqual(second_actual_answer, second_expected_answer) third_actual_answer = queries.query_examples[2].get( "query_function")("", transaction) third_expected_answer = [u"+86 892 682 0628", u"+54 398 559 0423"] self.assertItemsEqual(third_actual_answer, third_expected_answer) forth_actual_answer = queries.query_examples[3].get( "query_function")("", transaction) forth_expected_answer = [ u"+261 860 539 4754", u"+81 308 988 7153", u"+62 107 530 7500", u"+261 860 539 4754", u"+81 308 988 7153", u"+62 107 530 7500" ] self.assertItemsEqual(forth_actual_answer, forth_expected_answer) fifth_actual_answer = queries.query_examples[4].get( "query_function")("", transaction) fifth_expected_answer = [1242.7714285714285, 1699.4308943089432] self.assertItemsEqual(fifth_actual_answer, fifth_expected_answer) def assert_migration_results(self): with self._session.transaction().read() as transaction: number_of_people = transaction.query( "match $x isa person; get $x; count;").next().number() self.assertEqual(number_of_people, 30) number_of_companies = transaction.query( "match $x isa company; get $x; count;").next().number() self.assertEqual(number_of_companies, 1) number_of_contracts = transaction.query( "match $x isa contract; get $x; count;").next().number() self.assertEqual(number_of_contracts, 10) number_of_calls = transaction.query( "match $x isa call; get $x; count;").next().number() self.assertEqual(number_of_calls, 200) def tearDown(self): self._client.keyspaces().delete(keyspace_name) self._session.close() self._client.close() print("Deleted the " + keyspace_name + " keyspace")
def uniprotMigrate(uri, keyspace, num, num_threads, ctn): client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) batches_pr = [] if num is not 0: print(' ') print('Opening Uniprot dataset...') print(' ') tx = session.transaction().write() org = "insert $h isa organism, has organism-name 'H**o sapiens (Human)', has organism-name 'Human'; $o2 isa organism, has organism-name 'Avian';" tx.query(org) tx.commit() with open('../biograkn-covid/Dataset/Uniprot/uniprot-reviewed_yes+AND+proteome.tsv', 'rt', encoding='utf-8') as csvfile: csvreader = csv.reader(csvfile, delimiter=' ') raw_file = [] n = 0 for row in csvreader: n = n + 1 if n is not 1: raw_file.append(row) uniprotdb = [] for i in raw_file[:num]: data = {} data['uniprot-id'] = i[0] data['uniprot-entry-name'] = i[1] data['protein-name'] = i[3] data['gene-symbol'] = i[4] data['organism'] = i[5] data['function-description'] = i[7] data['ensembl-transcript'] = i[11] data['entrez-id'] = i[12] uniprotdb.append(data) insertGenes(uniprotdb, session, num_threads, ctn) insertTranscripts(uniprotdb, session, num_threads, ctn) # Insert proteins # counter = 0 pool = ThreadPool(num_threads) batches = [] for q in uniprotdb: counter = counter + 1 transcripts = transcriptHelper(q) gene = geneHelper(q)[0] if transcripts is not None: variables = [] tvariable = 1 graql = "match " for t in transcripts: variables.append(tvariable) graql = graql + "$" + str(tvariable) + " isa transcript, has ensembl-transcript-stable-id '" + t + "'; " tvariable = tvariable + 1 if gene is not None: try: graql = graql + "$g isa gene, has gene-symbol '" + gene + "';" except Exception: graql = "match $g isa gene, has gene-symbol '" + gene + "';" try: graql = graql + "$h isa organism, has organism-name '" + q['organism'] + "';" except Exception: graql = "match $h isa organism, has organism-name '" + q['organism'] + "';" graql = f"""{ graql } insert $a isa protein, has uniprot-id "{q['uniprot-id']}", has uniprot-name "{q['protein-name']}", has function-description "{q['function-description']}", has uniprot-entry-name "{q['uniprot-entry-name']}"; $r (associated-organism: $h, associating: $a) isa organism-association;""" if gene is not None: graql = graql + "$gpe (encoding-gene: $g, encoded-protein: $a) isa gene-protein-encoding;" if transcripts is not None: for v in variables: graql = f"""{ graql } $r{str(v)}(translating-transcript: ${str(v)}, translated-protein: $a) isa translation; """ if gene and transcripts is not None: for v in variables: graql = graql + "$trans" + str(v) + "(transcribing-gene: $g, encoded-transcript: $" + str(v) + ") isa transcription;" batches.append(graql) del graql if counter % ctn == 0: batches_pr.append(batches) batches = [] batches_pr.append(batches) pool.map(partial(batch_job, session), batches_pr) pool.close() pool.join() print('.....') print('Finished migrating Uniprot file.') print('.....') session.close() client.close()