def meta_node_df(meta_node): query = META_NODE_QUERY_TEMPLATE.format(meta_node=meta_node) driver = neo4j_connect() session = driver.session() data = session.run(query).data() df = pd.json_normalize(data) return df
def query_graph(query): # collect to epigraph driver = neo4j_connect() session = driver.session() # query query_data = session.run(query).data() df = pd.json_normalize(query_data) return df
def test_connect(): query = """ MATCH (n) RETURN n LIMIT 2; """ driver = neo4j_connect() session = driver.session() data = session.run(query).data() logger.info(data) assert len(data) == 2
def get_disease_data(): driver = neo4j_connect() session = driver.session() query = """ match (d:Disease) unwind(d.efo) as mondo_efo_id return d.id as disease_id, mondo_efo_id; """ query_data = session.run(query).data() df = pd.json_normalize(query_data) logger.info(df) return df
def test_meta_node_index(): query = "CALL db.indexes()" driver = neo4j_connect() with driver.session() as session: data = session.run(query).data() actual_node_indexes = {(_["labelsOrTypes"][0], _["properties"][0]) for _ in data} expected_node_indexes = { (key, value["index"]) for key, value in meta_node_dict.items() } # We would expect expected_indexes to be a subset of current_indexes assert len(expected_node_indexes.difference(actual_node_indexes)) == 0
def source_target_dict(meta_rel): query = SOURCE_TARGET_QUERY_TEMPLATE.format(meta_rel=meta_rel) driver = neo4j_connect() session = driver.session() data = session.run(query).data() source = [_["source"] for _ in data] target = [_["target"] for _ in data] res = { "source": [_ for sub_list in source for _ in sub_list], "target": [_ for sub_list in target for _ in sub_list], } return res
def test_meta_rel_exist(): query = """ CALL db.relationshipTypes() YIELD relationshipType RETURN relationshipType AS label """ driver = neo4j_connect() with driver.session() as session: data = session.run(query).data() db_meta_rel_names = set([_["label"] for _ in data]) logger.info(f"meta_rel_names: {meta_rel_names}") logger.info(f"db_meta_rel_names: {db_meta_rel_names}") assert set(meta_rel_names) == db_meta_rel_names
def test_meta_node_exist(): query = """ CALL db.labels() YIELD label RETURN label """ driver = neo4j_connect() with driver.session() as session: data = session.run(query).data() db_meta_node_names = set([_["label"] for _ in data]) if {"Meta"}.issubset(db_meta_node_names): db_meta_node_names.remove({"Meta"}) logger.info(f"meta_node_names: {meta_node_names}") logger.info(f"db_meta_node_names: {db_meta_node_names}") assert set(meta_node_names) == db_meta_node_names
def get_variants_from_graph(): # collect to epigraph driver = neo4j_connect() session = driver.session() # query query = """ match (v:Variant) return distinct(v._id) as id limit 100 """ logger.info(query) query_data = session.run(query).data() df = pd.json_normalize(query_data) df.to_csv(variant_data, index=False) copy_source_data(data_name=data_name, filename=variant_data) return df
def check(): driver = neo4j_connect() session = driver.session() # read data data = os.path.join(dataDir, FILE) df = pd.read_csv(data, sep="\t") print(df.head()) ens_list = list(set(list(df["ensembl_gene_id"]))) print(len(ens_list)) com = """ match (g:Gene) where g.ensembl_id in {ens_list} return g.ensembl_id; """.format(ens_list=ens_list) # print(com) result = session.run(com) res_df = pd.DataFrame([dict(record) for record in result]) print(res_df) # find missing print(set(list(df["ensembl_gene_id"])) - set(list(res_df["g.ensembl_id"])))