def mark_prior_author(uid, ): query = """ MATCH (a:Author)-[:prior_{uid}]-(:Author) WITH DISTINCT a SET a.prior_{uid}=TRUE """.format(uid=uid) db.run(query, )
def mark_test_author(uid, num_author=100): query = """ MATCH (a:Author)-[:test_{uid}]-(:Author) WHERE a.prior_{uid}=TRUE WITH DISTINCT a SET a.test_{uid}=TRUE """.format(uid=uid) db.run(query, parameters={"num_author": num_author, "TopK": 10})
def deleteAll(): try: query = """ call apoc.periodic.iterate("MATCH (n) RETURN n LIMIT 1000;", "DETACH DELETE n", {batchSize:1000}) yield batches, total return batches, total """ db.run(query) except: raise ValueError("Error: Not able to delete all.")
def update_work_prior(uid): query = """ MATCH (a:Author)-[:UPLOAD]->(w:Work) WHERE a.prior_{uid} = TRUE AND w.prior_{uid}=TRUE WITH a, COUNT(DISTINCT w) AS num_work SET a.work_prior_{uid}=num_work """.format(uid=uid, ) db.run(query, )
def init_papers(): query = """ USING PERIODIC COMMIT 1000 LOAD CSV WITH HEADERS FROM $path AS line FIELDTERMINATOR '|' WITH line.`article:ID` AS work_id, line.`year:int` AS year, line.`title:string` AS title MATCH (w:Work { work_id: toInteger(work_id) }) SET w.year = toInteger(year), w.title = title """ db.run(query, parameters={"path": PAPER_DATABASE})
def delete_all_authors(self): try: query = """ MATCH (a:Author) DETACH DELETE a """ print("query", query) db.run(query, ) except: raise ValueError( "There is no connection found. Check connection and try again !" )
def init_authors(): query = """ // Load and commit every 1000 records USING PERIODIC COMMIT 1000 LOAD CSV WITH HEADERS FROM $path AS line FIELDTERMINATOR '|' WITH line.`ID` AS author_id, line.`author` AS name MATCH (a:Author) WHERE a.author_id = toInteger(author_id) SET a.name= name """ print("create set name author:{0}".format(query)) db.run(query, parameters={"path": AUTHOR_DATABASE})
def init_author_paper_upload(): query = """ // Load and commit every 1000 records USING PERIODIC COMMIT 1000 LOAD CSV WITH HEADERS FROM $path AS line FIELDTERMINATOR '|' WITH line.`START_ID` AS work_id, line.`END_ID` AS author_id MERGE (w:Work { work_id: toInteger(work_id)}) MERGE (a:Author { author_id: toInteger(author_id)}) // Create relationships between Author and Paper CREATE (a)-[:UPLOAD]->(w) """ print("create new author->paper:{0}".format(query)) db.run(query, parameters={"path": AUTHOR_PAPER_DATABASE})
def create_test_connection( uid, start, end, ): query = """ MATCH (a:Author)-[:UPLOAD]->(w:Work)<-[:UPLOAD]-(b:Author) WHERE a<>b AND a.prior_{uid}=TRUE AND b.prior_{uid}=TRUE AND EXISTS(w.title) AND w.year>={start} AND w.year<={end} AND NOT EXISTS((a)-[:test_{uid}]-(b)) CREATE (a)-[:test_{uid}]->(b) """.format(uid=uid, start=start, end=end) db.run(query, )
def mark_test_paper( uid, start, end, ): query = """ MATCH (a:Author)-[:UPLOAD]->(w:Work)<-[:UPLOAD]-(b:Author) WHERE a<>b AND a.prior_{uid} = TRUE AND b.prior_{uid} = TRUE AND EXISTS(w.title) AND w.year>={start} AND w.year<={end} WITH DISTINCT w SET w.test_{uid}=TRUE """.format(uid=uid, start=start, end=end) db.run(query, )
def _predict(self, project_uid: int): from utils.file import get_current_directory, connect_path_file path = connect_path_file(get_current_directory(neo4j=True), _temp_file) query = """ USING PERIODIC COMMIT 1000 LOAD CSV FROM $path AS line FIELDTERMINATOR '|' WITH line WITH toInteger(line[0]) AS start_nodeId, toInteger(line[1]) AS end_nodeId, toFloat(line[2]) AS score MATCH (a:Author), (b:Author) WHERE a.author_id = start_nodeId AND b.author_id = end_nodeId AND NOT EXISTS((a)-[:_{uid}_{_id}]->(b)) CREATE (a)-[:_{uid}_{_id} """.format(uid=project_uid, _id=self._id) query += """ {score: score}]->(b)""" db.run(query, parameters={"path": path})
def get_num_author(uid: int, type_db: str) -> int: query = """ MATCH (a:Author) WHERE a.{type_db}_{uid}=TRUE RETURN COUNT(DISTINCT a) AS num_au """.format(uid=uid, type_db=type_db) num_au = 0 for row in db.run(query): num_au = int(row[0]) return num_au
def get_author(self, author_id: int, project_uid: int, model_id: int, similarity=None, source_id=None) -> Author: query = """ MATCH (a:Author) WHERE a.prior_{uid}=TRUE AND a.author_id= $aId RETURN a.author_id, a.name""".format(uid=project_uid, _id=model_id) parameters = { "aId": int(author_id), } if (source_id is not None): query += ", EXISTS((a)-[:prior_{uid}]-(:Author ".format( uid=project_uid) query += "{ author_id: $source_id })) AS acquaintance" query += ", EXISTS((a)-[:test_{uid}]-(:Author ".format( uid=project_uid) query += "{ author_id: $source_id })) AS real_connected" parameters["source_id"] = int(source_id) result = { "author_id": None, "name": None, "similarity": None, "acquaintance": None, "real_connected": None } for row in db.run(query, parameters=parameters): result["author_id"] = int(row[0]) result["name"] = row[1] result["similarity"] = similarity if (source_id is not None): result["acquaintance"] = row[2] result["real_connected"] = row[3] if (result["name"] is None): return None result = Author( result["author_id"], result["name"], result["similarity"], result["acquaintance"], result["real_connected"], ) return result
def save(self, recommendation_list: list, project_uid: int,): self.delete_predictions(project_uid,) if(len(recommendation_list) == 0): raise ValueError( "Error: No authors were found to make any prediction!!") query = """ MATCH (a:Author),(b:Author) WHERE a.author_id=$a_id AND b.author_id=$b_id CREATE (a)-[:_{0}_{1} """.format(project_uid, self._id) query += "{ top:$top }]->(b)" for idx, auth_rec in enumerate(recommendation_list): a_id = auth_rec["author_id"] for top, b_id in enumerate(auth_rec["topK"]): # print("a_id", a_id, "b_id", b_id) if (b_id > 0): db.run(query, parameters={ "a_id": a_id, "b_id": b_id, "top": top})
def get_all_author_id(uid: int, type_db: str) -> list: query = """ MATCH (a:Author) WHERE a.{type_db}_{uid}=TRUE RETURN DISTINCT a.author_id """.format(uid=uid, type_db=type_db) num_au = get_num_author(uid, type_db=type_db) authors = [0 for idx in range(num_au)] for (idx, row)in enumerate(db.run(query)): authors[idx] = int(row[0]) return authors
def _init_new_test_collaborations(self, uid): try: query = """ USING PERIODIC COMMIT 1000 LOAD CSV WITH HEADERS FROM $path AS line FIELDTERMINATOR ',' WITH line.`a` AS a_author_id, line.`b` AS b_author_id MATCH (a:Author) MATCH (b:Author) WHERE a.author_id=toInteger(a_author_id) AND b.author_id=toInteger(b_author_id) """ query += """ CREATE (a)-[:test_{uid}]->(b)""".format(uid=uid) print("query", query) db.run(query, parameters={"path": PATH_TEST_GRAPH_DATABASE}) except: raise ValueError( "There is no connection found. Check connection and try again !" )
def _init_new_prior_authors(self, uid): try: parameters = "{ " parameters += " author_id:toInteger(author_id), name:name, neigh_{uid}:num_col, work_prior_{uid}:num_work, prior_{uid}:TRUE".format( uid=uid, ) parameters += " })" query = """ USING PERIODIC COMMIT 1000 LOAD CSV WITH HEADERS FROM $path AS line FIELDTERMINATOR ',' WITH line.`author_id` AS author_id, line.`name` AS name, line.`num_work` AS num_work, line.`num_col` AS num_col CREATE (:Author {parameters} """.format( parameters=parameters) print("query", query) db.run(query, parameters={"path": PATH_AUTHORS_DATABASE}) except: raise ValueError( "There is no connection found. Check connection and try again !" )
def create_prior_connection(uid, start, end, is_directed=False, is_weighted=False): if (is_directed == False and is_weighted == False): query = """ MATCH (a:Author)-[:UPLOAD]->(w:Work)<-[:UPLOAD]-(b:Author) WHERE a<>b AND EXISTS(w.title) AND w.year>={start} AND w.year<={end} AND NOT EXISTS((a)-[:prior_{uid}]-(b)) CREATE (a)-[:prior_{uid}]->(b) """.format(uid=uid, start=start, end=end) db.run(query, ) elif (is_directed == False and is_weighted == True): query = """ MATCH (a:Author)-[:prior_{uid}]->(b:Author) WHERE a.prior_{uid}=TRUE AND b.prior_{uid}=TRUE WITH a, b MATCH (a)-[:UPLOAD]->(w:Work)<-[:UPLOAD]-(b) WHERE w.prior_{uid}=TRUE WITH DISTINCT a,b, COUNT(w) AS same_work WITH a, b, same_work* 1.0/(a.work_prior_{uid} + b.work_prior_{uid} - same_work) AS score WHERE NOT EXISTS((a)-[:prior_weight_{uid}]->(b)) CREATE (a)-[:prior_weight_{uid}""".format(uid=uid, ) query += """{ score:score } ]->(b)""" db.run(query, ) elif (is_directed == True and is_weighted == True): query = """ MATCH (a:Author)-[:prior_{uid}]-(b:Author) WHERE a.prior_{uid}=TRUE AND b.prior_{uid}=TRUE WITH a,b MATCH (a)-[:UPLOAD]->(w:Work)<-[:UPLOAD]-(b) WHERE w.prior_{uid}=TRUE WITH DISTINCT a, b, COUNT(w) AS same_work WITH DISTINCT a, b, same_work* 1.0/(a.work_prior_{uid}) AS score CREATE (a)-[:prior_dir_weight_{uid}""".format(uid=uid, start=start, end=end) query += """{ score:score } ]->(b)""" db.run(query, )
def isExistPaper(): rs = None query = """ MATCH (w:Work) RETURN 1 AS existed LIMIT 1 """ try: for row in db.run(query, ): rs = int(row[0]) if (rs != 1): return False except: raise ValueError( "Error: There is an error when connecting to database.") return True
def isConnect(self): rs = None query = """ MATCH (a:Author) RETURN 1 AS existed LIMIT 1 """ try: for row in db.run(query, ): rs = int(row[0]) if (rs != 1): return False except: raise ValueError( "There is no connection. Check connection and try again !") return True
def get_authors(self, project_uid: int, model_id: int, num_authors: int) -> list: query = """ MATCH (a:Author) WHERE a.prior_{uid}=TRUE RETURN DISTINCT a.author_id, a.name LIMIT $num_authors """.format(uid=project_uid, _id=model_id) result = [None for idx_author in range(num_authors)] for (idx, row) in enumerate( db.run(query, parameters={"num_authors": int(num_authors)})): result[idx] = Author( int(row[0]), row[1], ) result = [author for author in result if author is not None] if (len(result) == 0): return None return result
def _create_csv_file(self, project_uid: int, file_path=PATH_GRAPH_NODE2VEC_NEO4J, ): """ Lấy file csv từ Neo4j Get CSV file from Neo4j """ query = """ WITH "MATCH (a:Author)-[r:prior_{0}]->(b:Author) WHERE a.prior_{0}=TRUE AND b.prior_{0}=TRUE RETURN DISTINCT a.author_id AS au1, b.author_id AS au2" AS query CALL apoc.export.csv.query(query, $file_path, """.format(project_uid) if (self.is_weighted == True and self.is_directed == False): query = """ WITH "MATCH (a:Author)-[r:prior_weight_{0}]->(b:Author) WHERE a.prior_{0}=TRUE AND b.prior_{0}=TRUE RETURN DISTINCT a.author_id AS au1, b.author_id AS au2, r.score AS weight" AS query CALL apoc.export.csv.query(query, $file_path, """.format(project_uid) elif (self.is_weighted == True and self.is_directed == True): query = """ WITH "MATCH (a:Author)-[r:prior_dir_weight_{0}]->(b:Author) WHERE a.prior_{0}=TRUE AND b.prior_{0}=TRUE RETURN DISTINCT a.author_id AS au1, b.author_id AS au2, r.score AS weight" AS query CALL apoc.export.csv.query(query, $file_path, """.format(project_uid) query += """ { }) YIELD file, rows RETURN file, rows """ # print("query", query) num_rows_effective = 0 for row in db.run(query, parameters={"file_path": file_path}): file_name = row[0] num_rows_effective = row[1] return (file_name, num_rows_effective)
def valuate(self, project_uid: int) -> dict: true_positive = 0 false_positive = 10 false_negative = 10 precision = 0 recall = 0 f_measure = 0 """ true_positive: Số liên kết thật sự đúng nhưng không được tiên đoán 1.Tìm số liên kết cần tiên đoán tại mỗi nút num_col, num_col <= 5 2.Số liên kết thật sự đúng và được chưa tiên đoán trong giới hạn num_col = num_col - Số liên kết thật sự đúng và được tiên đoán trong giới hạn num_col """ query = """ MATCH (a:Author)-[r:_{0}_{1}]->(lily:Author) WHERE a.test_{0} = TRUE AND EXISTS((a)-[:test_{0}]-(lily)) RETURN COUNT(r) AS tp """.format(project_uid, self._id) for idx, row in enumerate(db.run(query,)): true_positive = row[0] """ false_positive: Số liên kết không đúng nhưng vẫn được tiên đoán + lấy số liên kết cần tiên đoán tại mỗi nút + xem trong topk' với k'<5 và k'=số liên kết cần tiên đoán tại mỗi nút, có bao nhiêu tiên đoán sai MATCH (a:Author)-[r:test_{0}]-(b:Author) WITH DISTINCT a, COUNT(DISTINCT b.author_id) AS num_col MATCH (a)-[r:_{0}_{1}]->(lily:Author) WHERE a.test_{0} = TRUE AND r.top < num_col AND NOT EXISTS((a)-[:test_{0}]-(lily)) RETURN COUNT(r) AS fp """ query = """ MATCH (a:Author)-[r:_{0}_{1}]->(lily:Author) WHERE a.test_{0} = TRUE AND NOT EXISTS((a)-[:test_{0}]-(lily)) RETURN COUNT(r) AS fp """.format(project_uid, self._id) for idx, row in enumerate(db.run(query,)): false_positive = row[0] """ false_negative: Số liên kết thật sự đúng nhưng không được tiên đoán 1.Tìm số liên kết cần tiên đoán tại mỗi nút num_col, num_col <= TopK 2.Số liên kết thật sự đúng và được chưa tiên đoán trong giới hạn num_col = min(num_col, TopK) - Số liên kết thật sự đúng và được tiên đoán trong giới hạn num_col """ query = """ MATCH (a:Author)-[r:test_{0}]-(lily:Author) WHERE a.test_{0} = TRUE AND NOT EXISTS((a)-[:_{0}_{1}]->(lily)) RETURN COUNT(r) AS fn """.format(project_uid, self._id) for idx, row in enumerate(db.run(query,)): false_negative = row[0] print("true_positive", true_positive, "false_positive", false_positive, "false_negative", false_negative) precision = true_positive * 1.0/(true_positive + false_positive) recall = true_positive * 1.0/(true_positive + false_negative) f_measure = 2*precision * recall * 1.0/(precision + recall) print("precision", precision, "recall", recall, "f_measure", f_measure) self.valuations = { "precision": round(precision, 2), "recall": round(recall, 2), "fmeasure": round(f_measure, 2), } return copy.deepcopy(self.valuations)
def delete_connection(type_db): query = """ MATCH (:Author)-[r:{type_db}_{uid}]-(:Author) DELETE r """.format(uid=uid, type_db=type_db) db.run(query, )
def delete_predictions(self, project_uid): query = """ MATCH (:Author)-[r:_{project_uid}_{algorithm_id}]-(:Author) DELETE r """.format(project_uid=project_uid, algorithm_id=self._id) db.run(query)