class GraphConnector(object): def __init__(self, uri, user, password): self._driver = Graph(uri, auth=(user, password)) @staticmethod def start_instance(self, uri, username, password): self.__init__(uri, username, password) return self._driver @staticmethod def close(self): self._driver.close() @staticmethod def get_instance(self): return self._driver
class BaseSaver(object): # 文档字符串 ''' BaseSaver class allows users to save all infos data fetched from website. :Usage: ''' # 数据存储器的静态成员定义 SAVE_MODES = ('mongodb', 'neo4j', 'mysql') # 初始化方法: def __init__(self, save_mode="neo4j"): # 文档字符串 ''' Initialize an instance of BaseSaver. :Args: - save_mode : a str of database to save data in. ''' # 方法实现 if save_mode not in self.SAVE_MODES: raise RuntimeError('存储模式指定有误,请输入mongodb、neo4j或者mysql') self.save_mode = save_mode if self.save_mode == 'mongodb': # mongodb initialize print('>>>> we are in mongodb.') self.connector = MongoClient( **MONGO_CONF)[MONGO_CONF.get('authSource')] elif self.save_mode == 'neo4j': # neo4j initialize print('>>>> we are in neo4j.') self.connector = Graph(**NEO_CONF) else: # mysql initialize print('>>>> we are in mysql.') self.connector = pymysql.connect(**SQL_CONF) self.cursor = self.connector.cursor() sql = RESORT_SQL.format(table_name) print(sql) self.cursor.execute(sql) self.connector.commit() # 数据存储方法: def data_save(self, file_name): # 文档字符串 ''' Saves spider fetched data into different databases. Wipes out the old data and saves the new fetched ones. :Args: - file_name : a str of file name to fetch data from. ''' # 方法实现 # 此处可以拓展成任意文件类型,其他文件类型的数据转换成json再写即可 file_path = os.path.join(save_path, file_name + '.json') if not os.access(file_path, os.F_OK): raise RuntimeError(f'数据文件{file_path}不存在,请检查数据!') with open(file_path, 'r', encoding='utf-8') as file: self.json_data = json.load(file, encoding='utf-8') if self.save_mode == 'mongodb': print('>>> we are saving to mongodb.') # 删除原始数据 self.connector.drop_collection(collection) # 保存新数据 self.connector[collection].insert_many(self.json_data) elif self.save_mode == 'neo4j': print('>>> we are saving to neo4j.') # 删除原始数据, 一定要小心使用 self.graph_cleaner() # 保存新数据 self.graph_builder() else: print('>>> we are saving to mysql.') # 删除原始数据,一定要小心使用 self.cursor.execute(f"DELETE FROM {table_name}") # 准备sql语句 data_key = self.json_data[0].keys() sql_key = ','.join(data_key) sql_value = ', '.join([f'%({key})s' for key in data_key]) # 保存新数据 sql = ''' INSERT INTO {0}({1}) VALUES ({2}); '''.format(table_name, sql_key, sql_value) print(sql) self.cursor.executemany(sql, self.json_data) self.connector.commit() # 知识图谱删除方法: def graph_cleaner(self): pass # 知识图谱生成方法: def graph_builder(self): pass # 数据存储器退出方法: def __del__(self): # 文档字符串 ''' The deconstructor of BaseSaver class. Deconstructs an instance of BaseSaver, closes Databases. ''' # 方法实现 print(f'>>>> closing {self.save_mode}.') if self.save_mode == 'mongodb': self.connector.client.close() elif self.save_mode == 'mysql': self.connector.close()
class PyNeoGraph: def __init__(self, debug=False): if not debug: self.driver = Graph(bolt=True, host='localhost') def test_conn(self): query = """ MATCH (n) RETURN n LIMIT 5 """ results = self.driver.run(query).to_data_frame() if results.size == 5: return True else: return False def close(self): self.driver.close() def get_neo4j_id(self, node="i:INGREDIENT", in_list=None): """ Args: node(str): string in Cypher node format (i:INGREDIENT) etc in_list(list): list of raw ids to match nodes Returns: ids(list): list of neo4j id properties for nodes """ node_var, label = f"{node}".split(':') query = f""" MATCH ({node}) WHERE {node_var}.{label.lower()} IN {in_list} RETURN id({node_var}) """ return self.driver.run(query).to_series().to_list() def get_matching_recipes(self, main_ingredients, side_ingredients): """ Args: main_ingredients(list[str]): list of main_ingredient raw_ids and names ['7213&tomato'] etc side_ingredients(list[int]): list of side_ingredient raw_ids and names Returns: results(list[dict]): list of matching recipes """ if side_ingredients[0] == '': side_ingredients = main_ingredients main = [int(i.split('&')[0]) for i in main_ingredients] side = [int(i.split('&')[0]) for i in side_ingredients] query = """ //Q1_Matching Recipes MATCH (i:INGREDIENT)<-[:CONTAINS]-(r:RECIPE) WITH r, collect(DISTINCT i.ingredient) AS ingredients, $main_ingredients AS main, $side_ingredients AS side WHERE 1=1 and all(x IN main WHERE (x IN ingredients)) and any(x IN side WHERE (x IN ingredients)) WITH r.name as RecipeName, r.recipe as ID ORDER BY size([x IN side WHERE x IN ingredients]) DESC, r.n_ingredients WITH collect({ recipeName:RecipeName, recipeID:ID }) AS result RETURN result[0..10] """ params = {"main_ingredients": main, "side_ingredients": side} res = self.driver.run(query, params) results = res.data() results = results[0]["result[0..10]"] results = json.dumps(results) results = {'data': results} return results def get_content_based_recipes(self, user_id, main_ingredients, side_ingredients): """ Args: user_id(int): id of the user main_ingredients(list[str]): list of main_ingredient raw_ids and names ['7213&tomato'] etc side_ingredients(list[int]): list of side_ingredient raw_ids and names Returns: results(list[dict]): list of matching recipes with content-based filtering """ query = """ //Q2_Content based filtering MATCH //Find recipes similar to recpies rated by user (ID) #2203 and get their ingredients. (u:USER{user:$user})-[:RATED]->(r:RECIPE)-[s:SIMILAR]->(r2:RECIPE)-[:CONTAINS]->(i:INGREDIENT) WITH//save user_id, user rated recipes ( r ) and recipes similar to ( r ) along with a list of their aggregate ingredients u,r,r2,collect(DISTINCT i.ingredient) AS ingredients, count(r2.recipe) AS recipeCount, s.sim_score AS score, $main_ingredients AS main, $side_ingredients AS side WHERE 1=1 //filter only for recipes containing ALL main & ANY of the side ingredients and all(x IN main WHERE (x IN ingredients)) //all main and any(x IN side WHERE (x IN ingredients)) //any side WITH //return user_id, user_name, recipe rated by user, recommended recipe, similarity score and ingredient list in recommended recipe and calc number of matching ingredients in each recpie (no_sideIngr) u.user as user_id, r2.name as RecipeName, r.recipe as ID1, r2.recipe AS ID, r.name AS Name,ingredients, size([x IN side WHERE x IN ingredients]) as No_SideIngr, score ORDER BY No_SideIngr DESC, score DESC WITH collect({ recipeName:RecipeName, recipeID:ID }) AS result RETURN result[0..10] """ if side_ingredients[0] == '': side_ingredients = main_ingredients main = [int(i.split('&')[0]) for i in main_ingredients] side = [int(i.split('&')[0]) for i in side_ingredients] params = {"main_ingredients": main, "side_ingredients": side, "user": user_id} res = self.driver.run(query, params).data() results = res[0]["result[0..10]"] results = json.dumps(results) results = {'data': results} return results def get_collaborative_recipes(self, user_id, main_ingredients, side_ingredients): """ Args: user_id(int): id of the user main_ingredients(list[str]): list of main_ingredient raw_ids and names ['7213&tomato'] etc side_ingredients(list[int]): list of side_ingredient raw_ids and names Returns: results(list[dict]): list of matching recipes with collaborative filtering """ query = """ //Q3_Collaborative filter MATCH (r:RECIPE)<-[:RATED]-(u2:USER)<-[s:SIMILAR]-(u:USER {user:$user}) WITH r, count(r.recipe) AS recipeCount, s.sim_score AS score ORDER BY recipeCount DESC, score DESC WITH (r) MATCH (r)-[:CONTAINS]->(i:INGREDIENT) WITH r, collect(DISTINCT i.ingredient) AS ingredients,$main_ingredients AS main, $side_ingredients AS side MATCH (r) WHERE 1=1 and all(x IN main WHERE (x IN ingredients)) and any(x IN side WHERE (x IN ingredients)) WITH r.recipe AS ID, r.name AS RecipeName, size([x IN side WHERE x IN ingredients]) as No_SideIngr ORDER BY No_SideIngr DESC LIMIT 10 WITH collect({ recipeName:RecipeName, recipeID:ID }) AS result RETURN result[0..10] """ if side_ingredients[0] == '': side_ingredients = main_ingredients main = [int(i.split('&')[0]) for i in main_ingredients] side = [int(i.split('&')[0]) for i in side_ingredients] params = {"main_ingredients": main, "side_ingredients": side, "user": user_id} res = self.driver.run(query, params).data() results = res[0]["result[0..10]"] results = json.dumps(results) results = {'data': results} return results def get_additional_ingredients(self, main_ingredients, side_ingredients): """ Args: main_ingredients(list[str]): list of main_ingredient raw_ids and names ['7213&tomato'] etc side_ingredients(list[int]): list of side_ingredient raw_ids and names Returns: results(list[dict]): list of related ingredients based on similarity """ if side_ingredients[0] == '' and len(side_ingredients) <= 1: ingredients = main_ingredients else: ingredients = main_ingredients + side_ingredients ingredients = [int(i.split('&')[0]) for i in ingredients] query = """ //Q4_Probable_ingredient WITH $ingredients AS ingredients // Ingredient input list MATCH p=(r:RECIPE)-[:CONTAINS]->(other:INGREDIENT) USING SCAN r:RECIPE WHERE all(i in ingredients WHERE exists((r)-[:CONTAINS]-(:INGREDIENT{ingredient:i}))) AND NOT other.ingredient IN ingredients WITH count(p) AS ingrCount, other ORDER BY ingrCount DESC WITH collect({ingredientName:other.name, ingredientID:other.ingredient}) AS result RETURN result[0..10] """ params = {"ingredients": ingredients} res = self.driver.run(query, params).data() results = res[0]["result[0..10]"] results = json.dumps(results) results = {'data': results} return results def get_relevant_ingredients(self, recipe_id): """ Args: recipe_id(int): id of the recipe Returns: results(dict): dict of related ingredients """ query = """ //Q5_Ingredients in a recipe MATCH (i:INGREDIENT)<-[:CONTAINS]-(r:RECIPE), (a:INGREDIENT)<-[s:SIMILAR]-(i:INGREDIENT) WHERE r.recipe = $recipe_id RETURN i.name as ingredientName, i.ingredient as ingredientID, collect(Distinct a.name) as alternateIngredient """ params = {"recipe_id": recipe_id} results = self.driver.run(query, params).data() results = json.dumps(results) results = {'data': results} return results def get_relevant_ratings(self, recipe_id): """ Args: recipe_id(int): id of the recipe Returns: results(dict): dict of related ratings """ query = """//07_Recipe ratings MATCH (r:RECIPE)<-[o:RATED]-(u:USER) WITH r, u.user as user, o.rating AS rating WHERE r.recipe=$recipe_id WITH collect({ userID:user, rating:rating }) AS result RETURN result[0..10] """ params = {"recipe_id": recipe_id} res = self.driver.run(query, params).data() results = res[0]['result[0..10]'] results = json.dumps(results) results = {'data': results} return results def get_recipe_details(self, recipe_id): """ Args: recipe_id(int): id of the recipe Returns: results(dict): dict of recipe details """ query = """ //Q6_Recipe_details MATCH (r:RECIPE) WHERE r.recipe = $recipe_id RETURN DISTINCT r.steps as steps, r.n_ingredients as numberOfIngredients, r.nutrition_dict as nutritionDetials, r.tags as tags """ params = {'recipe_id': recipe_id} results = self.driver.run(query, params).data() results = json.dumps(results) results = {'data': results} return results def get_recipe_details_ratings(self, recipe_id): """ Args: recipe_id(int): id of the recipe Returns: results(dict): dict of recipe details for rating information """ query = """ //Q6.1_Recipe_details MATCH (u:USER)-[o:RATED]->(r:RECIPE) WHERE r.recipe = $recipe_id RETURN round(avg(tointeger(o.rating)),2) as avgRating, count(o.rating) as numberOfRatings """ params = {'recipe_id': recipe_id} results = self.driver.run(query, params).data() results = json.dumps(results) results = {'data': results} return results