コード例 #1
0
class GraphConnector(object):
    def __init__(self, uri, user, password):
        self._driver = Graph(uri, auth=(user, password))

    @staticmethod
    def start_instance(self, uri, username, password):
        self.__init__(uri, username, password)
        return self._driver

    @staticmethod
    def close(self):
        self._driver.close()

    @staticmethod
    def get_instance(self):
        return self._driver
コード例 #2
0
class BaseSaver(object):
    # 文档字符串
    '''
    BaseSaver class allows users to save all infos data fetched from website.

    :Usage:

    '''
    # 数据存储器的静态成员定义
    SAVE_MODES = ('mongodb', 'neo4j', 'mysql')

    # 初始化方法:
    def __init__(self, save_mode="neo4j"):
        # 文档字符串
        '''
        Initialize an instance of BaseSaver.

        :Args:
         - save_mode : a str of database to save data in.

        '''
        # 方法实现
        if save_mode not in self.SAVE_MODES:
            raise RuntimeError('存储模式指定有误,请输入mongodb、neo4j或者mysql')
        self.save_mode = save_mode
        if self.save_mode == 'mongodb':
            # mongodb initialize
            print('>>>> we are in mongodb.')
            self.connector = MongoClient(
                **MONGO_CONF)[MONGO_CONF.get('authSource')]
        elif self.save_mode == 'neo4j':
            # neo4j initialize
            print('>>>> we are in neo4j.')
            self.connector = Graph(**NEO_CONF)
        else:
            # mysql initialize
            print('>>>> we are in mysql.')
            self.connector = pymysql.connect(**SQL_CONF)
            self.cursor = self.connector.cursor()
            sql = RESORT_SQL.format(table_name)
            print(sql)
            self.cursor.execute(sql)
            self.connector.commit()

    # 数据存储方法:
    def data_save(self, file_name):
        # 文档字符串
        '''
        Saves spider fetched data into different databases.
        Wipes out the old data and saves the new fetched ones.

        :Args:
         - file_name : a str of file name to fetch data from.

        '''
        # 方法实现
        # 此处可以拓展成任意文件类型,其他文件类型的数据转换成json再写即可
        file_path = os.path.join(save_path, file_name + '.json')
        if not os.access(file_path, os.F_OK):
            raise RuntimeError(f'数据文件{file_path}不存在,请检查数据!')
        with open(file_path, 'r', encoding='utf-8') as file:
            self.json_data = json.load(file, encoding='utf-8')

        if self.save_mode == 'mongodb':
            print('>>> we are saving to mongodb.')
            # 删除原始数据
            self.connector.drop_collection(collection)
            # 保存新数据
            self.connector[collection].insert_many(self.json_data)
        elif self.save_mode == 'neo4j':
            print('>>> we are saving to neo4j.')
            # 删除原始数据, 一定要小心使用
            self.graph_cleaner()
            # 保存新数据
            self.graph_builder()
        else:
            print('>>> we are saving to mysql.')
            # 删除原始数据,一定要小心使用
            self.cursor.execute(f"DELETE FROM {table_name}")
            # 准备sql语句
            data_key = self.json_data[0].keys()
            sql_key = ','.join(data_key)
            sql_value = ', '.join([f'%({key})s' for key in data_key])
            # 保存新数据
            sql = '''
            INSERT INTO {0}({1})
            VALUES ({2});
            '''.format(table_name, sql_key, sql_value)
            print(sql)
            self.cursor.executemany(sql, self.json_data)
            self.connector.commit()

    # 知识图谱删除方法:
    def graph_cleaner(self):
        pass

    # 知识图谱生成方法:
    def graph_builder(self):
        pass

    # 数据存储器退出方法:
    def __del__(self):
        # 文档字符串
        '''
        The deconstructor of BaseSaver class.

        Deconstructs an instance of BaseSaver, closes Databases.
        '''
        # 方法实现
        print(f'>>>> closing {self.save_mode}.')
        if self.save_mode == 'mongodb':
            self.connector.client.close()
        elif self.save_mode == 'mysql':
            self.connector.close()
コード例 #3
0
class PyNeoGraph:

    def __init__(self, debug=False):
        if not debug:
            self.driver = Graph(bolt=True, host='localhost')

    def test_conn(self):
        query = """
                MATCH (n) 
                RETURN n LIMIT 5
                """
        results = self.driver.run(query).to_data_frame()

        if results.size == 5:
            return True
        else:
            return False

    def close(self):
        self.driver.close()

    def get_neo4j_id(self, node="i:INGREDIENT", in_list=None):
        """
            Args:
                node(str): string in Cypher node format
                    (i:INGREDIENT) etc
                in_list(list): list of raw ids to match nodes

            Returns:
                ids(list): list of neo4j id properties for nodes
        """

        node_var, label = f"{node}".split(':')

        query = f"""
            MATCH ({node})
            WHERE {node_var}.{label.lower()} IN {in_list}
            RETURN id({node_var})
        """

        return self.driver.run(query).to_series().to_list()

    def get_matching_recipes(self, main_ingredients, side_ingredients):
        """
        Args:
            main_ingredients(list[str]): list of main_ingredient raw_ids and names
                ['7213&tomato'] etc
            side_ingredients(list[int]): list of side_ingredient raw_ids and names

        Returns:
            results(list[dict]): list of matching recipes
        """

        if side_ingredients[0] == '':
            side_ingredients = main_ingredients
        main = [int(i.split('&')[0]) for i in main_ingredients]
        side = [int(i.split('&')[0]) for i in side_ingredients]

        query = """
        //Q1_Matching Recipes
        MATCH 
            (i:INGREDIENT)<-[:CONTAINS]-(r:RECIPE)
        WITH
            r, collect(DISTINCT i.ingredient) AS ingredients,
            $main_ingredients AS main, $side_ingredients AS side
        WHERE 1=1
            and all(x IN main WHERE (x IN ingredients))
            and any(x IN side WHERE (x IN ingredients))
        WITH r.name as RecipeName, r.recipe as ID
        ORDER BY size([x IN side WHERE x IN ingredients]) DESC, r.n_ingredients
        WITH collect({ recipeName:RecipeName, recipeID:ID }) AS result
        RETURN result[0..10]
        """

        params = {"main_ingredients": main,
                  "side_ingredients": side}

        res = self.driver.run(query, params)

        results = res.data()

        results = results[0]["result[0..10]"]
        results = json.dumps(results)
        results = {'data': results}

        return results

    def get_content_based_recipes(self, user_id, main_ingredients, side_ingredients):
        """
        Args:
            user_id(int): id of the user
            main_ingredients(list[str]): list of main_ingredient raw_ids and names
                ['7213&tomato'] etc
            side_ingredients(list[int]): list of side_ingredient raw_ids and names

        Returns:
            results(list[dict]): list of matching recipes with content-based filtering
        """

        query = """       
        //Q2_Content based filtering
        MATCH //Find recipes similar to recpies rated by user (ID) #2203 and get their ingredients.
        (u:USER{user:$user})-[:RATED]->(r:RECIPE)-[s:SIMILAR]->(r2:RECIPE)-[:CONTAINS]->(i:INGREDIENT)
        WITH//save user_id, user rated recipes ( r ) and recipes similar to ( r ) along with a list of their aggregate ingredients
        u,r,r2,collect(DISTINCT i.ingredient) AS ingredients, count(r2.recipe) AS recipeCount, s.sim_score AS score, $main_ingredients AS main, $side_ingredients AS side
        WHERE 1=1 //filter only for recipes containing ALL main & ANY of the side ingredients
        and all(x IN main WHERE (x IN ingredients)) //all main
        and any(x IN side WHERE (x IN ingredients)) //any side
        WITH //return user_id, user_name, recipe rated by user, recommended recipe, similarity score and ingredient list in recommended recipe and calc number of matching ingredients in each recpie (no_sideIngr)
        u.user as user_id, r2.name as RecipeName, r.recipe as ID1, r2.recipe AS ID, r.name AS Name,ingredients, size([x IN side WHERE x IN ingredients]) as No_SideIngr, score
        ORDER BY No_SideIngr DESC, score DESC
        WITH collect({ recipeName:RecipeName, recipeID:ID }) AS result
        RETURN result[0..10]
        """

        if side_ingredients[0] == '':
            side_ingredients = main_ingredients
        main = [int(i.split('&')[0]) for i in main_ingredients]
        side = [int(i.split('&')[0]) for i in side_ingredients]

        params = {"main_ingredients": main,
                  "side_ingredients": side,
                  "user": user_id}
        res = self.driver.run(query, params).data()

        results = res[0]["result[0..10]"]
        results = json.dumps(results)
        results = {'data': results}

        return results

    def get_collaborative_recipes(self, user_id, main_ingredients, side_ingredients):
        """
        Args:
            user_id(int): id of the user
            main_ingredients(list[str]): list of main_ingredient raw_ids and names
                ['7213&tomato'] etc
            side_ingredients(list[int]): list of side_ingredient raw_ids and names

        Returns:
            results(list[dict]): list of matching recipes with collaborative filtering
        """

        query = """           
            //Q3_Collaborative filter
            MATCH (r:RECIPE)<-[:RATED]-(u2:USER)<-[s:SIMILAR]-(u:USER {user:$user}) 
            WITH r, count(r.recipe) AS recipeCount, s.sim_score AS score 
            ORDER BY recipeCount DESC, score DESC 
            WITH (r) MATCH (r)-[:CONTAINS]->(i:INGREDIENT) 
            WITH r, collect(DISTINCT i.ingredient) AS ingredients,$main_ingredients AS main, $side_ingredients AS side MATCH (r) 
            WHERE 1=1 
                and all(x IN main WHERE (x IN ingredients)) 
                and any(x IN side WHERE (x IN ingredients)) 
            WITH r.recipe AS ID, r.name AS RecipeName, size([x IN side WHERE x IN ingredients]) as No_SideIngr 
            ORDER BY No_SideIngr DESC LIMIT 10
            WITH collect({ recipeName:RecipeName, recipeID:ID }) AS result
            RETURN result[0..10]
                """

        if side_ingredients[0] == '':
            side_ingredients = main_ingredients
        main = [int(i.split('&')[0]) for i in main_ingredients]
        side = [int(i.split('&')[0]) for i in side_ingredients]

        params = {"main_ingredients": main,
                  "side_ingredients": side,
                  "user": user_id}
        res = self.driver.run(query, params).data()

        results = res[0]["result[0..10]"]
        results = json.dumps(results)
        results = {'data': results}
        return results

    def get_additional_ingredients(self, main_ingredients, side_ingredients):
        """
        Args:
            main_ingredients(list[str]): list of main_ingredient raw_ids and names
                ['7213&tomato'] etc
            side_ingredients(list[int]): list of side_ingredient raw_ids and names

        Returns:
            results(list[dict]): list of related ingredients based on similarity
        """
        if side_ingredients[0] == '' and len(side_ingredients) <= 1:
            ingredients = main_ingredients
        else:
            ingredients = main_ingredients + side_ingredients
        ingredients = [int(i.split('&')[0]) for i in ingredients]

        query = """
                //Q4_Probable_ingredient
                WITH $ingredients AS ingredients	// Ingredient input list
                MATCH p=(r:RECIPE)-[:CONTAINS]->(other:INGREDIENT)
                USING SCAN r:RECIPE
                WHERE all(i in ingredients 
                    WHERE exists((r)-[:CONTAINS]-(:INGREDIENT{ingredient:i})))
                AND NOT other.ingredient IN ingredients
                WITH count(p) AS ingrCount, other
                ORDER BY ingrCount DESC
                WITH collect({ingredientName:other.name, ingredientID:other.ingredient}) AS result
                RETURN result[0..10]
                """

        params = {"ingredients": ingredients}
        res = self.driver.run(query, params).data()

        results = res[0]["result[0..10]"]
        results = json.dumps(results)
        results = {'data': results}
        return results

    def get_relevant_ingredients(self, recipe_id):
        """
        Args:
            recipe_id(int): id of the recipe

        Returns:
            results(dict): dict of related ingredients
        """

        query = """
                //Q5_Ingredients in a recipe
                MATCH (i:INGREDIENT)<-[:CONTAINS]-(r:RECIPE), (a:INGREDIENT)<-[s:SIMILAR]-(i:INGREDIENT)
                WHERE r.recipe = $recipe_id
                RETURN i.name as ingredientName, i.ingredient as ingredientID, collect(Distinct a.name) as alternateIngredient
                """

        params = {"recipe_id": recipe_id}
        results = self.driver.run(query, params).data()

        results = json.dumps(results)
        results = {'data': results}
        return results

    def get_relevant_ratings(self, recipe_id):
        """
        Args:
            recipe_id(int): id of the recipe

        Returns:
            results(dict): dict of related ratings
        """

        query = """//07_Recipe ratings
                MATCH (r:RECIPE)<-[o:RATED]-(u:USER)
                WITH r, u.user as user, o.rating AS rating
                WHERE r.recipe=$recipe_id
                WITH collect({ userID:user, rating:rating }) AS result
                RETURN result[0..10]
                """

        params = {"recipe_id": recipe_id}
        res = self.driver.run(query, params).data()

        results = res[0]['result[0..10]']
        results = json.dumps(results)
        results = {'data': results}
        return results

    def get_recipe_details(self, recipe_id):
        """
        Args:
            recipe_id(int): id of the recipe

        Returns:
            results(dict): dict of recipe details
        """

        query = """                  
                //Q6_Recipe_details
                MATCH (r:RECIPE)
                WHERE r.recipe = $recipe_id
                RETURN DISTINCT r.steps as steps, 
                r.n_ingredients as numberOfIngredients, r.nutrition_dict as nutritionDetials, r.tags as tags
                """

        params = {'recipe_id': recipe_id}
        results = self.driver.run(query, params).data()
        results = json.dumps(results)
        results = {'data': results}
        return results

    def get_recipe_details_ratings(self, recipe_id):
        """
        Args:
            recipe_id(int): id of the recipe

        Returns:
            results(dict): dict of recipe details for rating information
        """

        query = """                
                //Q6.1_Recipe_details
                MATCH (u:USER)-[o:RATED]->(r:RECIPE)
                WHERE r.recipe = $recipe_id
                RETURN round(avg(tointeger(o.rating)),2) as avgRating, count(o.rating) as numberOfRatings
                """

        params = {'recipe_id': recipe_id}
        results = self.driver.run(query, params).data()
        results = json.dumps(results)
        results = {'data': results}
        return results