def test_client_session_invalid_keyspace(self):
     client = GraknClient('localhost:48555')
     with self.assertRaises(TypeError):
         a_session = client.session(123)
         tx = a_session.transaction().read() # won't fail until opening a transaction
     inst2 = GraknClient('localhost:48555')
     with self.assertRaises(GraknError):
         a_session = inst2.session('')
         tx = a_session.transaction().read() # won't fail until opening a transaction
     client.close()
    def test_client_init_invalid_uri(self):
        """ Test invalid URI """
        with self.assertRaises(GraknError):
            a_inst = GraknClient('localhost:1000')
            a_session = a_inst.session('testkeyspace')
            a_session.transaction().read()

        with self.assertRaises(GraknError):
            a_inst = GraknClient('localhost:1000')
            with a_inst.session("test") as s:
                with s.transaction().read() as tx:
                    pass
            a_inst.close()
 def test_client_session_close(self):
     client = GraknClient('localhost:48555')
     a_session = client.session('test')
     a_session.close()
     with self.assertRaises(GraknError):
         a_session.transaction().read()
     client.close()
Ejemplo n.º 4
0
 def test_match_query(self):
     client = GraknClient("localhost:48555")
     session = client.session("define_schema")
     with session.transaction().read() as tx:
         tx.query("match $s sub thing; get;")
     session.close()
     client.close()
def proteinAtlasMigrator(uri, keyspace, num, num_threads, ctn):
    client = GraknClient(uri=uri)
    session = client.session(keyspace=keyspace)
    batches_pr = []

    if num is not 0:
        print('  ')
        print('Opening HPA dataset...')
        print('  ')
        with open(
                '../biograkn-covid/Dataset/HumanProteinAtlas/normal_tissue.tsv',
                'rt',
                encoding='utf-8') as csvfile:
            csvreader = csv.reader(csvfile, delimiter='	')
            raw_file = []
            n = 0
            for row in csvreader:
                n = n + 1
                if n is not 1:
                    d = {}
                    d['ensembl-gene-id'] = row[0]
                    d['gene-symbol'] = row[1]
                    d['tissue'] = row[2]
                    d['expression-value'] = row[4]
                    d['expression-value-reliability'] = row[5]
                    raw_file.append(d)

        tissue = []
        for r in raw_file[:num]:
            tissue.append(r['tissue'])
        tissue = (list(set(tissue)))

        insertTissue(tissue, session, num_threads)
        insertEnsemblId(raw_file, session, num_threads, ctn)
        insertGeneTissue(raw_file, session, num_threads, ctn)
Ejemplo n.º 6
0
    def setUpClass(cls):
        """ Make sure we have some sort of schema and data in DB, only done once """
        super(test_concept_Base, cls).setUpClass()

        global client, session

        # TODO this is not neat - this is basically emulating a constructor/destructor operation using globals

        client = GraknClient("localhost:48555")
        keyspace = "test_" + str(uuid.uuid4()).replace("-", "_")[:8]
        session = client.session(keyspace)
        # temp tx to set up DB, don"t save it
        tx = session.transaction().write()
        try:
            # define parentship roles to test agains
            tx.query(
                "define "
                "parent sub role; "
                "child sub role; "
                "mother sub role; "
                "son sub role; "
                "person sub entity, has age, has gender, plays parent, plays child, plays mother, plays son; "
                "age sub attribute, datatype long; "
                "gender sub attribute, datatype string; "
                "parentship sub relation, relates parent, relates child, relates mother, relates son;"
            )
        except GraknError as ce:
            print(ce)

        answers = list(tx.query("match $x isa person, has age 20; get;"))
        if len(answers) == 0:
            tx.query("insert $x isa person, has age 20;")
        tx.commit()
Ejemplo n.º 7
0
def dgidbMigrator(uri, keyspace, num_dr, num_int, num_threads, ctn):
    client = GraknClient(uri=uri)
    session = client.session(keyspace=keyspace)
    insertDrugs(uri, keyspace, num_dr, num_threads, ctn, session)
    insertInteractions(uri, keyspace, num_int, num_threads, ctn, session)
    session.close()
    client.close()
Ejemplo n.º 8
0
def go_test(val_graphs, val_ge_split, reload_fle, **kwargs):

    # opens session once again, if closed after training
    client = GraknClient(uri=URI)
    session = client.session(keyspace=KEYSPACE)

    ge_graphs, solveds_tr, solveds_ge = pipeline(
        graphs=val_graphs,  # Run the pipeline with prepared graph
        tr_ge_split=val_ge_split,
        do_test=True,
        save_fle="",
        reload_fle=reload_fle,
        **kwargs)

    with session.transaction().write() as tx:
        write_predictions_to_grakn(
            ge_graphs,
            tx)  # Write predictions to grakn with learned probabilities

    session.close()
    client.close()
    # Grakn session will be closed here due to write\insert query

    validation_evals = [solveds_tr, solveds_ge]
    return ge_graphs, validation_evals
    def test_client_session_valid_keyspace(self):
        """ Test OK uri and keyspace """
        a_inst = GraknClient('localhost:48555')
        a_session = a_inst.session('test')
        self.assertIsInstance(a_session, grakn.client.Session)
        tx = a_session.transaction().read()
        tx.close()
        a_session.close()

        # test the `with` statement
        with a_inst.session('test') as session:
            self.assertIsInstance(session, grakn.client.Session)
            tx = session.transaction().read()
            tx.close()

        a_inst.close()
Ejemplo n.º 10
0
def diagnosis_example(num_graphs=200,
                      num_processing_steps_tr=5,
                      num_processing_steps_ge=5,
                      num_training_iterations=1000,
                      keyspace=KEYSPACE,
                      uri=URI):
    """
    Run the diagnosis example from start to finish, including traceably ingesting predictions back into Grakn

    Args:
        num_graphs: Number of graphs to use for training and testing combined
        num_processing_steps_tr: The number of message-passing steps for training
        num_processing_steps_ge: The number of message-passing steps for testing
        num_training_iterations: The number of training epochs
        keyspace: The name of the keyspace to retrieve example subgraphs from
        uri: The uri of the running Grakn instance

    Returns:
        Final accuracies for training and for testing
    """

    tr_ge_split = int(num_graphs * 0.5)

    generate_example_graphs(num_graphs, keyspace=keyspace, uri=uri)

    client = GraknClient(uri=uri)
    session = client.session(keyspace=keyspace)

    graphs = create_concept_graphs(list(range(num_graphs)), session)

    with session.transaction().read() as tx:
        # Change the terminology here onwards from thing -> node and role -> edge
        node_types = get_thing_types(tx)
        [node_types.remove(el) for el in TYPES_TO_IGNORE]

        edge_types = get_role_types(tx)
        [edge_types.remove(el) for el in ROLES_TO_IGNORE]
        print(f'Found node types: {node_types}')
        print(f'Found edge types: {edge_types}')

    ge_graphs, solveds_tr, solveds_ge = pipeline(
        graphs,
        tr_ge_split,
        node_types,
        edge_types,
        num_processing_steps_tr=num_processing_steps_tr,
        num_processing_steps_ge=num_processing_steps_ge,
        num_training_iterations=num_training_iterations,
        continuous_attributes=CONTINUOUS_ATTRIBUTES,
        categorical_attributes=CATEGORICAL_ATTRIBUTES,
        output_dir=f"./events/{time.time()}/")

    with session.transaction().write() as tx:
        write_predictions_to_grakn(ge_graphs, tx)

    session.close()
    client.close()

    return solveds_tr, solveds_ge
Ejemplo n.º 11
0
 def test_define_schema(self):
     client = GraknClient("localhost:48555")
     session = client.session("define_schema")
     with session.transaction().write() as tx:
         tx.query("define person sub entity, has name; name sub attribute, datatype string;")
         tx.commit()
     session.close()
     client.close()
Ejemplo n.º 12
0
def reactomeMigrator(uri, keyspace, num_path, num_threads, ctn):
	client = GraknClient(uri=uri)
	session = client.session(keyspace=keyspace)
	pathway_associations = filterHomoSapiens(num_path)
	insertPathways(uri, keyspace, num_threads, ctn, session, pathway_associations)
	insertPathwayInteractions(uri, keyspace, num_threads, ctn, session, pathway_associations)
	session.close()
	client.close()
Ejemplo n.º 13
0
def disgenetMigrator(uri, keyspace, num, num_threads, ctn):

    client = GraknClient(uri=uri)
    session = client.session(keyspace=keyspace)
    batches_pr = []

    if num is not 0:
        print('  ')
        print('Opening Disgenet dataset...')
        print('  ')

        with open(
                '../biograkn-covid/Dataset/Disgenet/all_gene_disease_associations.tsv',
                'rt',
                encoding='utf-8') as csvfile:
            csvreader = csv.reader(csvfile, delimiter='	')
            raw_file = []
            n = 0
            for row in csvreader:
                n = n + 1
                if n is not 1:
                    raw_file.append(row)

        disgenet = []
        for i in raw_file[:num]:
            data = {}
            data['entrez-id'] = i[0].strip()
            data['gene-symbol'] = i[1]
            data['disease-id'] = i[4]
            data['disease-name'] = i[5]
            data['disgenet-score'] = float(i[9])
            disgenet.append(data)

        insertDiseases(disgenet, session, num_threads, ctn)

        counter = 0
        pool = ThreadPool(num_threads)
        batches = []
        for q in disgenet:
            counter = counter + 1
            graql = f"""
match $g isa gene, has gene-symbol "{q['gene-symbol']}", has entrez-id "{q['entrez-id']}";
$d isa disease, has disease-id "{q['disease-id']}", has disease-name "{q['disease-name']}";
insert $r (associated-gene: $g, associated-disease: $d) isa gene-disease-association, has disgenet-score {q['disgenet-score']};"""
            batches.append(graql)
            del graql
            if counter % ctn == 0:
                batches_pr.append(batches)
                batches = []
        batches_pr.append(batches)
        pool.map(partial(batch_job, session), batches_pr)
        pool.close()
        pool.join()
        print('.....')
        print('Finished migrating Disgenet.')
        print('.....')
        session.close()
        client.close()
Ejemplo n.º 14
0
def create_grakn_connection():
    global client, session, transaction, connection_to_grakn_exists

    if not connection_to_grakn_exists:
        client = GraknClient(uri="localhost:48555")
        session = client.session(keyspace=keyspace_name)
        ## create a transaction to talk to the Grakn server
        transaction = session.transaction().read()
        connection_to_grakn_exists = True
Ejemplo n.º 15
0
 def test_insert_query(self):
     client = GraknClient("localhost:48555")
     session = client.session("define_schema")
     with session.transaction().write() as tx:
         tx.query(
             "define person sub entity, has name; name sub attribute, value string;"
         )
         tx.commit()
     with session.transaction().write() as tx:
         tx.query("insert $x isa person, has name \"john\";")
         tx.commit()
     session.close()
     client.close()
Ejemplo n.º 16
0
def insertSchema(uri, keyspace):
    client = GraknClient(uri=uri)
    session = client.session(keyspace=keyspace)
    print('.....')
    print('Inserting schema...')
    print('.....')
    with open("Schema/biograkn-covid.gql", "r") as graql_file:
        schema = graql_file.read()
    with session.transaction().write() as write_transaction:
        write_transaction.query(schema)
        write_transaction.commit()
    print('.....')
    print('Success inserting schema!')
    print('.....')
Ejemplo n.º 17
0
def cord_ner_migrator(uri, keyspace, num_ner, num_threads, ctn):
    client = GraknClient(uri=uri)
    session = client.session(keyspace=keyspace)
    tx = session.transaction().write()
    print('.....')
    print('Opening CORD NER file.')
    print('.....')
    with open('../biograkn-covid/Dataset/CORD_NER/CORD-NER-full.json',
              "r") as f:
        data = json.loads("[" + f.read().replace("}\n{", "},\n{") + "]")
    data = data[:num_ner]
    insert_authors(data, num_threads, ctn, session)
    insert_journals(data, num_threads, ctn, session)
    insert_publications_journals(data, num_threads, ctn, session)
    insert_publications_with_authors(data, num_threads, 1, session)
    insert_entities_pub(data, num_threads, ctn, session)
def create_concepts(client: GraknClient, keyspace):
    log.info(f'Creating `account` concepts on "{keyspace}"\n')
    with client.session(keyspace=keyspace) as session:
        relations = [
            define_campaign_adgroup_relation(session),
            define_adgroup_criterion_relation(session),
        ]
        entities = [
            define_campaign_entity(session),
            define_adgroup_entity(session),
            define_abstract_criterion_entity(session),
        ]
        rules = [
            define_adgroup_in_campaign_rule(session),
            define_criterion_in_adgroup_rule(session),
        ]

    return {'entities': entities, 'relations': relations, 'rules': rules}
Ejemplo n.º 19
0
def tissueNetMigrator(uri, keyspace, num, num_threads, ctn):
    client = GraknClient(uri=uri)
    session = client.session(keyspace=keyspace)
    batches_pr = []

    if num is not 0:
        print('  ')
        print('Opening TissueNet dataset...')
        print('  ')

        with open('Dataset/TissueNet/HPA-Protein.tsv', 'rt',
                  encoding='utf-8') as csvfile:
            csvreader = csv.reader(csvfile, delimiter='	')
            raw_file = []
            n = 0
            for row in csvreader:
                n = n + 1
                if n is not 1:
                    raw_file.append(row)
Ejemplo n.º 20
0
def diagnosis_example(num_graphs=200,
                      num_processing_steps_tr=10,
                      num_processing_steps_ge=10,
                      num_training_iterations=1000,
                      keyspace="diagnosis",
                      uri="localhost:48555"):

    tr_ge_split = int(num_graphs * 0.5)

    generate_example_graphs(num_graphs, keyspace=keyspace, uri=uri)

    client = GraknClient(uri=uri)
    session = client.session(keyspace=keyspace)

    graphs = create_concept_graphs(list(range(num_graphs)), session)

    with session.transaction().read() as tx:
        # Change the terminology here onwards from thing -> node and role -> edge
        node_types = get_thing_types(tx)
        edge_types = get_role_types(tx)
        print(f'Found node types: {node_types}')
        print(f'Found edge types: {edge_types}')

    ge_graphs, solveds_tr, solveds_ge = pipeline(
        graphs,
        tr_ge_split,
        node_types,
        edge_types,
        num_processing_steps_tr=num_processing_steps_tr,
        num_processing_steps_ge=num_processing_steps_ge,
        num_training_iterations=num_training_iterations,
        continuous_attributes=CONTINUOUS_ATTRIBUTES,
        categorical_attributes=CATEGORICAL_ATTRIBUTES,
        output_dir=f"./events/{time.time()}/")

    with session.transaction().write() as tx:
        write_predictions_to_grakn(ge_graphs, tx)

    session.close()
    client.close()

    return solveds_tr, solveds_ge
Ejemplo n.º 21
0
def generate_example_graphs(num_examples,
                            keyspace="diagnosis",
                            uri="localhost:48555"):

    client = GraknClient(uri=uri)
    session = client.session(keyspace=keyspace)

    pmf_array = np.zeros([2, 2, 2, 2], dtype=np.float)
    pmf_array[0, 1, 0, 1] = 0.1
    pmf_array[1, 0, 1, 0] = 0.15
    pmf_array[0, 1, 1, 0] = 0.05
    pmf_array[1, 0, 0, 1] = 0.34
    pmf_array[1, 1, 1, 1] = 0.01
    pmf_array[0, 1, 1, 1] = 0.3
    pmf_array[1, 0, 1, 1] = 0.05

    def normal_dist(mean, var):
        return lambda: round(np.random.normal(mean, var, 1)[0], 2)

    pmf = PMF(
        {
            'Flu': [False, True],
            'Meningitis': [False, True],
            'Light Sensitivity': [False, normal_dist(0.3, 0.1)],
            'Fever': [False, normal_dist(0.5, 0.2)]
        },
        pmf_array,
        seed=0)

    print(pmf.to_dataframe())

    for example_id in range(0, num_examples):
        tx = session.transaction().write()
        for query in get_example_queries(pmf, example_id):
            print(query)
            tx.query(query)
        tx.commit()

    session.close()
    client.close()
Ejemplo n.º 22
0
def run_test(keyspace):
    print("Creating client and keyspace: " + keyspace)
    client = GraknClient("localhost:48555")
    session = client.session(keyspace=keyspace)

    print("Creating schema...")
    define_schema(session)

    for i in range(20):
        print("Write + Read iteration {0}".format(i))
        multiple_insert(session, n=50)

        with session.transaction().write() as tx:
            # query for everything
            conceptMaps = list(tx.query("match $x isa thing; get;"))
            print("retreived {0} answers".format(len(conceptMaps)))

            # query for concepts, then ask for attrs
            attrs = 0
            for conceptMap in conceptMaps:
                for a in conceptMap.get("x").attributes():
                    attrs += 1
            print("Retreived {0} attributes".format(attrs))
def create_concepts(client: GraknClient, keyspace):
    log.info(f'Creating `account` concepts on "{keyspace}"\n')
    with client.session(keyspace=keyspace) as session:
        relations = [
            define_ancestorship_relation(session),
            define_node_heirarchy_relation(session),
            define_sibling_relation(session),
            define_offer_relationship(session),
            define_case_value_relation(session),
            # define_subdivision_relation(session), WIP still
        ]
        entities = [
            define_product_partition_entity(session),
            define_entity_product_dimension(session),
            define_entity_product(session),
        ]
        rules = [
            define_infer_node_hierarchy_rule(session),
            define_transitive_ancestorship_rule(session),
            define_node_adjacency_rule(session),
        ]

    return {'entities': entities, 'relations': relations, 'rules': rules}
Ejemplo n.º 24
0
    lprint('Configure the gcloud CLI')
    credential_file = '/tmp/gcp-credential.json'
    with open(credential_file, 'w') as f:
        f.write(credential)
    sp.check_call(['gcloud', 'auth', 'activate-service-account', '--key-file', credential_file])
    sp.check_call(['gcloud', 'config', 'set', 'project', project])
    sp.check_call(['ssh-keygen', '-t', 'rsa', '-b', '4096', '-N', '', '-f', os.path.expanduser('~/.ssh/google_compute_engine')])

    lprint('Creating a BioGrakn instance "' + instance + '"')
    gcloud_instances_create(instance)

    external_ip = sp.check_output(['gcloud', 'compute', 'instances', 'describe', instance, '--format=get(networkInterfaces[0].accessConfigs[0].natIP)', '--zone', 'europe-west1-b'])[:-1]

    uri = external_ip + ':48555'

    client = None
    while client is None:
        try:

            client = GraknClient(uri=uri)
            with client.session(keyspace="grakn") as session:
                with session.transaction().read() as read_transaction:
                    answer_iterator = read_transaction.query("match $x isa thing; get;")
            client.close()

        except Exception:
            time.sleep(60)

finally:
    lprint('Deleting the BioGrakn instance')
    gcloud_instances_delete(instance)
Ejemplo n.º 25
0
class ITBuildGraphFromQueriesWithRealGrakn(GraphTestCase):

    KEYSPACE = "it_build_graph_from_queries"
    SCHEMA = ("define "
              "person sub entity, has name, plays parent, plays child;"
              "name sub attribute, value string;"
              "parentship sub relation, relates parent, relates child;")
    DATA = ('insert '
            '$p isa person, has name "Bob";'
            '$r(parent: $p, child: $p) isa parentship;')

    def setUp(self):
        self._keyspace = type(self).__name__.lower(
        )  # Use the name of this test class as the keyspace name
        print(self._keyspace)
        self._client = GraknClient(uri="localhost:48555")

    def tearDown(self):
        self._client.keyspaces().delete(self._keyspace)
        self._client.close()

    def test_graph_is_built_from_grakn_as_expected(self):

        g1 = nx.MultiDiGraph()
        g1.add_node('x')

        g2 = nx.MultiDiGraph()
        g2.add_node('x')
        g2.add_node('n')
        g2.add_edge('x', 'n', type='has')

        g3 = nx.MultiDiGraph()
        g3.add_node('x')
        g3.add_node('r')
        g3.add_node('y')
        g3.add_edge('r', 'x', type='child')
        g3.add_edge('r', 'y', type='parent')

        query_sampler_variable_graph_tuples = [
            ('match $x isa person; get;', mock_sampler, g1),
            ('match $x isa person, has name $n; get;', mock_sampler, g2),
            ('match $x isa person; $r(child: $x, parent: $y); get;',
             mock_sampler, g3),
            # TODO Add functionality for loading schema at a later date
            # ('match $x sub person; $x sub $type; get;', g4),
            # ('match $x sub $y; get;', g5),
        ]

        with self._client.session(keyspace=self._keyspace) as session:

            with session.transaction().write() as tx:
                tx.query(ITBuildGraphFromQueriesWithRealGrakn.SCHEMA)
                tx.query(ITBuildGraphFromQueriesWithRealGrakn.DATA)
                tx.commit()

            with session.transaction().read() as tx:
                combined_graph = build_graph_from_queries(
                    query_sampler_variable_graph_tuples, tx)

                person_exp = build_thing(
                    next(tx.query('match $x isa person; get;')).get('x'), tx)
                name_exp = build_thing(
                    next(tx.query('match $x isa name; get;')).get('x'), tx)
                parentship_exp = build_thing(
                    next(tx.query('match $x isa parentship; get;')).get('x'),
                    tx)

        expected_combined_graph = nx.MultiDiGraph()
        expected_combined_graph.add_node(person_exp, type='person')
        expected_combined_graph.add_node(name_exp,
                                         type='name',
                                         value_type='string',
                                         value='Bob')
        expected_combined_graph.add_node(parentship_exp, type='parentship')
        expected_combined_graph.add_edge(parentship_exp,
                                         person_exp,
                                         type='child')
        expected_combined_graph.add_edge(parentship_exp,
                                         person_exp,
                                         type='parent')
        expected_combined_graph.add_edge(person_exp, name_exp, type='has')

        self.assertGraphsEqual(expected_combined_graph, combined_graph)
Ejemplo n.º 26
0
def coronavirusMigrator(uri, keyspace):
    client = GraknClient(uri=uri)
    session = client.session(keyspace=keyspace)
    tx = session.transaction().write()
    print('.....')
    print('Starting with Coronavirus file.')
    print('.....')

    # Temporary manual ingestion of locations
    graql = f"""insert $c isa country, has country-name 'China'; $c2 isa country, has country-name 'Kingdom of Saudi Arabia'; 
	$c3 isa country, has country-name 'USA'; $c4 isa country, has country-name 'South Korea'; $o isa organism, has organism-name 'Mouse';"""
    tx.query(graql)
    tx.commit()

    with open('../biograkn-covid/Dataset/Coronaviruses/Genome identity.csv',
              'rt',
              encoding='utf-8') as csvfile:
        tx = session.transaction().write()
        csvreader = csv.reader(csvfile, delimiter=',')
        raw_file = []
        n = 0
        for row in csvreader:
            n = n + 1
            if n is not 1:
                raw_file.append(row)
        import_file = []
        for i in raw_file:
            data = {}
            data['genbank-id'] = i[0]
            data['identity%'] = i[2]
            data['host'] = i[3][0:-1].strip()
            data['location-discovered'] = i[4].strip()
            data['coronavirus-1'] = i[1].strip()
            try:
                data['coronavirus-2'] = i[5].strip()
            except Exception:
                pass
            try:
                data['coronavirus-3'] = i[6].strip()
            except Exception:
                pass
            import_file.append(data)
        for q in import_file:
            virus_name = ""
            try:
                virus_name = f""" has virus-name "{q['coronavirus-1']}", has virus-name "{q['coronavirus-2']}", has virus-name "{q['coronavirus-3']}", """
            except Exception:
                try:
                    virus_name = f""" has virus-name "{q['coronavirus-1']}", has virus-name "{q['coronavirus-2']}", """
                except Exception:
                    virus_name = f""" has virus-name "{q['coronavirus-1']}", """
            print(virus_name)
            graql = f"""match $c isa country, has country-name "{q['location-discovered']}"; 
			$o isa organism, has organism-name "{q['host']}";
			insert $v isa virus, has genbank-id "{q['genbank-id']}", {virus_name}
			has identity-percentage "{q['identity%']}";
			$r (discovering-location: $c, discovered-virus: $v) isa discovery;
			$r1 (hosting-organism: $o, hosted-virus: $v) isa organism-virus-hosting;"""
            print(graql)
            tx.query(graql)
        tx.commit()

    with open(
            '../biograkn-covid/Dataset/Coronaviruses/Host proteins (potential drug targets).csv',
            'rt',
            encoding='utf-8') as csvfile:
        tx = session.transaction().write()
        csvreader = csv.reader(csvfile, delimiter=',')
        raw_file = []
        n = 0
        for row in csvreader:
            n = n + 1
            if n is not 1:
                raw_file.append(row)
        import_file = []
        for i in raw_file:
            data = {}
            data['coronavirus'] = i[0].strip()
            data['uniprot-id'] = i[3].strip()
            data['entrez-id'] = i[4].strip()
            import_file.append(data)
        for q in import_file:
            graql = f"""match $v isa virus, has virus-name "{q['coronavirus']}"; 
			$p isa protein, has uniprot-id "{q['uniprot-id']}";
			$g isa gene, has entrez-id "{q['entrez-id']}";
			insert $r2 (associated-virus-gene: $g, associated-virus: $v) isa gene-virus-association;
			$r3 (hosting-virus-protein: $p, associated-virus: $v) isa protein-virus-association;"""
            tx.query(graql)
            print(graql)
        tx.commit()
    print('.....')
    print('Finished with Coronavirus file.')
    print('.....')
Ejemplo n.º 27
0
# === Flat bottom data only ==== #
#keyspace = "ssp_2class_full"
data_sparse2 = ALLDATA[(ALLDATA.loc[:,'num_rays'] == 500) | (ALLDATA.loc[:,'num_rays'] == 1000)]
data = UndersampleData(data_sparse2, max_sample = 794)

# === 3 classes of 80 samples: 500/6000/15000 ===== 
#keyspace = "ssp_2class"
#data_sparse3 = ALLDATA[(ALLDATA.loc[:,'num_rays'] == 500) | (ALLDATA.loc[:, 'num_rays'] == 6000) | (ALLDATA.loc[:, 'num_rays'] == 15000)] #3classes
#data = UndersampleData(data_sparse3, max_sample = 80)

class_population = ClassImbalance(data, plot = False)
print(class_population)


client = GraknClient(uri=URI)
session = client.session(keyspace=KEYSPACE)

with session.transaction().read() as tx:
        # Change the terminology here onwards from thing -> node and role -> edge
        node_types = get_thing_types(tx)
        [node_types.remove(el) for el in TYPES_TO_IGNORE]
        edge_types = get_role_types(tx)
        [edge_types.remove(el) for el in ROLES_TO_IGNORE]
        print(f'Found node types: {node_types}')
        print(f'Found edge types: {edge_types}')   

train_graphs, tr_ge_split, training_data, testing_data = prepare_data(session, data, train_split=0.7, validation_split = 0.2)
#, val_graphs,  val_ge_split
kgcn_vars = {
          'num_processing_steps_tr': 10,
          'num_processing_steps_ge': 10,
Ejemplo n.º 28
0
from grakn.client import GraknClient
import csv 
import os

client = GraknClient(uri="localhost:48555")
session = client.session(keyspace="disease_network")

# Parameters to set how much data to load from each dataset
nUni = 1
nInt = 1
nRea = 1
nDis = 1
nHPA = 0
nKan = 0
nGEO = 0
nDGI = 0
nTis = 0
sim = 0

# -------
# 1. START UniProt

# Uniprot
if nUni is not 0:
	print('1. Uniprot')
	with open('../dataset/uniprot/uniprot.csv', 'rt', encoding='utf-8') as csvfile:
		csvreader = csv.reader(csvfile, delimiter='	')
		raw_file = []
		n = 0
		for row in csvreader: 
			n = n + 1
Ejemplo n.º 29
0
class GraknDumper:
    def __init__(self):
        # Load configuration
        self.config = yaml.load(open(os.path.dirname(__file__) + '/config.yml'))

        # Initialize Grakn client
        self.grakn = GraknClient(uri=self.config['grakn']['hostname'] + ':' + str(self.config['grakn']['port']))
        self.session = self.grakn.session(keyspace='grakn')

        # Open the dump file
        self.dump_file = open('./dump.gql', 'w')

        # Entities
        self.entities = {}

        # Relations
        self.relations = {}

    def get_attributes(self, entity):
        attributes = {}
        attributes_iterator = entity.attributes()
        for attribute in attributes_iterator:
            attribute_type = attribute.type()
            if str(attribute_type.data_type()) == 'DataType.STRING':
                attribute_value = '"' + attribute.value() + '"'
            elif str(attribute_type.data_type()) == 'DataType.DATE':
                attribute_value = attribute.value().strftime('%Y-%m-%dT%H:%M:%S')
            elif str(attribute_type.data_type()) == 'DataType.BOOLEAN':
                attribute_value = 'true' if attribute.value() else 'false'
            else:
                attribute_value = attribute.value()
            if attribute_type.label() in multiple_attributes:
                if attribute_type.label() not in attributes:
                    attributes[attribute_type.label()] = [attribute_value]
                else:
                    attributes[attribute_type.label()].append(attribute_value)
            else:
                attributes[attribute_type.label()] = attribute_value
        return attributes

    def dump_entities(self):
        rtx = self.session.transaction().read()
        iterator = rtx.query('match $x isa entity; get;')
        for answer in iterator:
            entity = answer.map().get('x')
            entity_id = entity.id
            entity_type = entity.type().label()
            entity_attributes = self.get_attributes(entity)
            entity_dump = 'insert $' + entity_id + ' isa ' + entity_type + '\n'
            for key, value in entity_attributes.items():
                if isinstance(value, list):
                    for val in value:
                        entity_dump += '    ,has ' + key + ' ' + str(val) + '\n'
                else:
                    entity_dump += '    ,has ' + key + ' ' + str(value) + '\n'
            entity_dump += ';\n\n'
            self.entities[entity_id] = entity_attributes['internal_id']
            self.dump_file.write(entity_dump)
        rtx.close()

    def dump_relations(self):
        relations_ids = []
        rtx = self.session.transaction().read()
        iterator = rtx.query('match $x($roleFrom: $from, $roleTo: $to); $from isa entity; $to isa entity; get;', infer=False)
        for answer in iterator:
            relation = answer.map().get('x')
            relation_id = relation.id
            relation_type = relation.type().label()
            relation_from_role = answer.map().get('roleFrom').label()
            relation_from = answer.map().get('from')
            relation_to_role = answer.map().get('roleTo').label()
            relation_to = answer.map().get('to')
            if relation_id not in relations_ids:
                relation_attributes = self.get_attributes(relation)
                if relation_from.id in self.entities:
                    relation_from_id = self.entities[relation_from.id]
                else:
                    relation_from_attributes = self.get_attributes(relation_from)
                    relation_from_id = '"' + relation_from_attributes['internal_id'] + '"'
                if relation_to.id in self.entities:
                    relation_to_id = self.entities[relation_to.id]
                else:
                    relation_to_attributes = self.get_attributes(relation_to)
                    relation_to_id = '"' + relation_to_attributes['internal_id'] + '"'

                relation_dump = 'match $from has internal_id ' + relation_from_id + '; $to has internal_id ' + relation_to_id + '; insert $' + relation_id + '(' + relation_from_role + ': $from, ' + relation_to_role + ': $to) isa ' + relation_type + '\n'
                for key, value in relation_attributes.items():
                    if isinstance(value, list):
                        for val in value:
                            relation_dump += '    ,has ' + key + ' ' + str(val) + '\n'
                    else:
                        relation_dump += '    ,has ' + key + ' ' + str(value) + '\n'
                relation_dump += ';\n\n'
                relations_ids.append(relation_id)
                self.relations[relation_id] = relation_attributes['internal_id']
                self.dump_file.write(relation_dump)

    def dump_relations_with_relations(self):
        relations_ids = []
        rtx = self.session.transaction().read()
        iterator = rtx.query('match $x($roleFrom: $from, $roleTo: $to); $from isa entity; $to isa relation; get;', infer=False)
        for answer in iterator:
            relation = answer.map().get('x')
            relation_id = relation.id
            relation_type = relation.type().label()
            relation_from_role = answer.map().get('roleFrom').label()
            relation_from = answer.map().get('from')
            relation_to_role = answer.map().get('roleTo').label()
            relation_to = answer.map().get('to')
            if relation_id not in relations_ids:
                relation_attributes = self.get_attributes(relation)
                if relation_from.id in self.entities:
                    relation_from_id = self.entities[relation_from.id]
                else:
                    relation_from_attributes = self.get_attributes(relation_from)
                    relation_from_id = '"' + relation_from_attributes['internal_id'] + '"'
                if relation_to.id in self.relations:
                    relation_to_id = self.relations[relation_to.id]
                else:
                    relation_to_attributes = self.get_attributes(relation_to)
                    relation_to_id = '"' + relation_to_attributes['internal_id'] + '"'

                relation_dump = 'match $from has internal_id ' + relation_from_id + '; $to has internal_id ' + relation_to_id + '; insert $' + relation_id + '(' + relation_from_role + ': $from, ' + relation_to_role + ': $to) isa ' + relation_type + '\n'
                for key, value in relation_attributes.items():
                    if isinstance(value, list):
                        for val in value:
                            relation_dump += '    ,has ' + key + ' ' + str(val) + '\n'
                    else:
                        relation_dump += '    ,has ' + key + ' ' + str(value) + '\n'
                relation_dump += ';\n\n'
                relations_ids.append(relation_id)
                self.dump_file.write(relation_dump)

    def dump(self):
        print('Dumping...')
        self.dump_entities()
        self.dump_relations()
        self.dump_relations_with_relations()
        self.dump_file.close()
        print('Dump done.')
Ejemplo n.º 30
0
from grakn.client import GraknClient

import unittest

import migrate
import blast
import queries

client = GraknClient(uri="localhost:48555")
session = client.session(keyspace="blast")


class Test(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        with open('blast/schema.gql', 'r') as schema:
            define_query = schema.read()
            with session.transaction().write() as transaction:
                transaction.query(define_query)
                transaction.commit()
                print("Loaded the blast schema")

    def test_a_migration(self):
        migrate.init("blast/uniprot-asthma-proteins.fasta")

        with session.transaction().read() as transaction:
            number_of_proteins = transaction.query(
                "match $x isa protein; get $x; count;").next().number()
            self.assertEqual(number_of_proteins, 12)

            number_of_databases = transaction.query(