def populate(self):
        #TODO set uniqueness constraints if not exists
        reset_graph()

        records_count = self.recipes.count()
        output.push('Populating graph...')
        progress = ProgressBar(records_count)
        processed = 0
        progress.start()

        cursor = self.recipes.find()
        for record in cursor:

            web_id = record['id']
            recipeName = record['recipeName']
            recipe = Recipe(id=web_id)

            ingredients = []
            for ingredient_result in record['ingredients']:
                ingredients.append(Ingredient(name=ingredient_result))

            recipe.add()
            recipe.require_ingredients(ingredients)

            processed += 1
            if processed % 100 == 0:
                progress.update(processed)

        progress.update(processed)
        cursor.close()
        progress.end()
Ejemplo n.º 2
0
def precalc(**kwargs):
    start = time.time()
    IndexService(**kwargs).index()
    AndCountService(**kwargs).count_and()
    OrCountService(**kwargs).count_or()
    LinkService(**kwargs).link()
    SortService(**kwargs).sort_pairings()
    end = time.time()
    elapsed = progress_bar._format_time(end - start)
    output.push("Total elapsed: {elapsed}".format(elapsed=elapsed))
Ejemplo n.º 3
0
    def sort_pairings(self):
        """exceptions:

        """

        combo_filter = {
            "pairings": {
                "$gt": []
            },
            "r": {
                "$gte": self.r_min,
                "$lte": self.r_max
            }
        }

        records_count = self.combinations.count(combo_filter)

        progress = ProgressBar(records_count)
        processed = 0

        BULK_LIMIT = 1000
        bulk = self.combinations.initialize_unordered_bulk_op()

        output.push("Sorting pairings...")
        progress.start()

        cursor = self.combinations.find(combo_filter)
        for combo in cursor:
            combo_id = combo['_id']

            bulk.find({
                "_id": combo_id
            }).update(
                {"$push": {
                    "pairings": {
                        "$each": [],
                        "$sort": {
                            "score": -1
                        }
                    }
                }})

            processed += 1
            if processed % BULK_LIMIT == 0:
                progress.update(processed)
                # TODO handle bulk execute errors
                bulk.execute()
                bulk = self.combinations.initialize_unordered_bulk_op()

        progress.update(processed)
        bulk.execute()
        cursor.close()
        progress.end()
Ejemplo n.º 4
0
    def link(self):
        """exceptions:

        """

        # TODO take r from options if present

        # TODO get max/min r in combinations
        # r_max = int(self.combinations.find({}).sort([("r", -1)]).limit(1).next()['r'])
        # r_min = int(self.combinations.find({}).sort([("r", 1)]).limit(1).next()['r'])

        if self.r_min == 1:
            self.r_min += 1
            if self.r_max < self.r_min:
                output.push("No combinations linkable...")
                return
        record_filter = {"r": {"$gte": self.r_min, "$lte": self.r_max}}

        records_count = self.combinations.count(record_filter)

        progress = ProgressBar(records_count)
        processed = 0

        BULK_LIMIT = 100
        bulk = self.combinations.initialize_unordered_bulk_op()

        output.push("Linking combinations...")
        progress.start()

        cursor = self.combinations.find(record_filter, no_cursor_timeout=True)
        for combo in cursor:
            ingredients = list(combo['ingredients'])
            combo_id = combo['_id']
            score = combo['score']
            for i in range(len(ingredients)):
                givens = ingredients[:i] + ingredients[i + 1:]
                candidate = ingredients[i]

                givens.sort()
                givens_combo_id = "::".join(givens)
                bulk.find({
                    "_id": givens_combo_id
                }).update({
                    "$addToSet": {
                        "pairings": {
                            "name": candidate,
                            "score": score,
                            "ref_id": combo_id
                        }
                    }
                })

            processed += 1
            if processed % BULK_LIMIT == 0:
                progress.update(processed)
                # TODO handle bulk execute errors
                bulk.execute()
                bulk = self.combinations.initialize_unordered_bulk_op()

        progress.update(processed)
        bulk.execute()
        cursor.close()
        progress.end()
Ejemplo n.º 5
0
 def end(self):
     output.push("")
Ejemplo n.º 6
0
    def count_and(self):
        """exceptions:
            not a recipe data store
            source does not exist
            destination already exists
            r_max/r_min is not an integer
        """

        # TODO register exit handler to print recipes processed on unexpected exit
        # TODO https://docs.python.org/3/library/atexit.html

        recipe_count = self.recipes.count()
        cursor = self.recipes.find(no_cursor_timeout=True)
        # TODO timeout=False is bad practice
        if self.skip:
            cursor.skip(self.skip)
            processed = self.skip
        else:
            processed = 0

        output.push("Counting ands...")
        progress = ProgressBar(recipe_count)
        progress.start()

        for recipe in cursor:
            # TODO try collecting counts into a dictionary and then updating less frequently
            # TODO Also play with batch size
            bulk = self.combinations.initialize_unordered_bulk_op()

            ingredients = recipe['ingredients']
            ingredients.sort()

            # for each possible length of combinations between r_min and r_max
            r_min = int(self.r_min)
            r_max = int(self.r_max)
            r_max = int(r_max) if r_max and len(ingredients) > int(
                r_max) else len(ingredients)
            if r_min <= r_max:
                for r in range(r_min, r_max + 1):
                    combinations = itertools.combinations(ingredients, r)
                    # for each combination of that length
                    for c in combinations:
                        # ensure that ingredients in id are alphabetically ordered
                        c = list(c)
                        c.sort()
                        combo_id = '::'.join(c)
                        bulk.find({"_id": combo_id}).upsert()\
                            .update({
                                "$set": {
                                    "_id": combo_id,
                                    "r": r,
                                    "ingredients": c
                                },
                                "$inc": {
                                    "and_count": 1
                                }
                            }
                        )
                # TODO handle writeErrors
                bulk.execute()

            processed += 1
            progress.update(processed)

        cursor.close()
        progress.end()
Ejemplo n.º 7
0
    def count_or(self):
        """exceptions:
            not a collections data store
            collection does not exist
            r_max/r_min is not an integer
            r_min/r_max are currently required (should be optional)
        """

        # TODO register exit handler to print recipes processed on unexpected exit
        # TODO https://docs.python.org/3/library/atexit.html

        combo_filter = {
            "r": {
                "$gte": self.r_min,
                "$lte": self.r_max
            },
            "or_count": {
                "$exists": False
            }
        }

        combination_count = self.combinations.count(combo_filter)
        cursor = self.combinations.find(combo_filter, no_cursor_timeout=True)

        # TODO timeout=False is bad practice
        if self.skip:
            cursor.skip(self.skip)
            processed = self.skip
        else:
            processed = 0

        progress = ProgressBar(combination_count)
        output.push("Counting ors...")
        progress.start()

        BULK_LIMIT = 1000
        bulk = self.combinations.initialize_unordered_bulk_op()
        for combination in cursor:

            combo_id = combination['_id']
            ingredients = combination['ingredients']

            # see https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
            or_count = 0
            add_sub = 1
            for r in range(1, len(ingredients) + 1):
                combinations = itertools.combinations(ingredients, r)
                for c in combinations:
                    c = list(c)
                    c.sort()
                    c_id = '::'.join(c)
                    and_count = self.get_and_count_by_id(c_id)
                    or_count += and_count * add_sub
                add_sub *= -1

            bulk.find({
                "_id": combo_id
            }).update({
                "$set": {
                    "or_count": or_count,
                    "score": float(combination['and_count']) / or_count
                }
            })

            processed += 1

            if processed % BULK_LIMIT == 0:
                # TODO handle bulk execute errors
                progress.update(processed)
                bulk.execute()
                bulk = self.combinations.initialize_unordered_bulk_op()

        progress.update(processed)
        bulk.execute()
        cursor.close()
        progress.end()