Ejemplo n.º 1
0
    def __init__(self,
                 hosts=os.getenv('AMARI_ZOOKEEPER_HOSTS', '127.0.0.1:2181'),
                 read_only=False):
        self.hosts = hosts
        self.read_only = read_only

        Log.info("Using Zookeeper hosts: \"%s\"" % hosts)
Ejemplo n.º 2
0
    def __init__(self, path):
        connection_string = "sqlite:///%s" % path
        Log.info("connection string is '%s'" % connection_string)

        self.engine = sqlalchemy.create_engine(connection_string)
        # session.configure(bind=self.engine)
        self.Session = sessionmaker(bind=self.engine)
Ejemplo n.º 3
0
 def invalidate(cls):
     """
     Invalidate (delete) the cache value and key.
     :return: None
     """
     Log.info("Invalidating cache key %s" % cls.cache_key)
     return Cache.delete(cls.cache_key)
Ejemplo n.º 4
0
    def scrape_recipe(recipe):
        url = "%s/%s" % (url_base, endpoints.get('recipe') % recipe)
        Log.info("scraping %s" % url)
        parser = UpneatRecipeParser(slug=recipe, url=url)
        raw_recipe = parser.parse()

        return raw_recipe
Ejemplo n.º 5
0
 def execute(self, sort='_score'):
     """
     Actually talk to ElasticSearch and run the query.
     :param sort: ElasticSearch attribute on which to sort the results.
     :return: SearchResults child class.
     # @TODO address the search range hacks here.
     """
     results = self.index_class.search()[0:1000].query(
         self.q).sort(sort).execute()
     Log.info("Got %s results." % results.hits.total.value)
     return SearchResults(hits=results)
Ejemplo n.º 6
0
    def __init__(self,
                 host=os.getenv('AMARI_REDIS_HOST', default='127.0.0.1'),
                 port=int(os.getenv('AMARI_REDIS_PORT', default=6379)),
                 username=None,
                 password=None,
                 ssl=False):
        self.host = host
        self.port = port
        self.username = username
        self.password = password
        self.ssl = ssl

        Log.info("Using Redis host: \"%s:%i\"" % (host, port))
Ejemplo n.º 7
0
 def rebuild(self, index_class):
     """
     Re-create an index. This deletes the entire index (not just the contents,
     but the Whole Damn Thing(tm). and re-creates it.
     :param index_class: elasticsearch_dsl.Document child representing this index.
     :return: None
     """
     try:
         index_class._index.delete()
         index_class.init()
         Log.info("Successfully rebuilt index %s" % index_class.Index.name)
     except NotFoundError:
         Log.warning("Index %s did not exist." % index_class.Index.name)
Ejemplo n.º 8
0
    def __init__(self,
                 username,
                 password,
                 database,
                 host='127.0.0.1',
                 port=5432,
                 debug_sql=False):
        connection_string = "postgres://%s:%s@%s:%i/%s" % (
            username, password, host, port, database)
        Log.info("Using Postgres host: \"%s\"" % host)

        self.engine = sqlalchemy.create_engine(connection_string,
                                               echo=debug_sql)
        self.Session = sessionmaker(bind=self.engine)
Ejemplo n.º 9
0
    def _build_tree(self, passes, root=root_node):
        tree = Tree()

        pgconn = Registry.get_database_connection()
        with pgconn.get_session() as session:

            tree.create_node(root, root)
            for item in IngredientModel.get_by_kind(session, CategoryKind):
                tree.create_node(item.slug,
                                 item.slug,
                                 parent=root,
                                 data=self._create_tree_data(item))

            for item in IngredientModel.get_by_kind(session, FamilyKind):
                tree.create_node(item.slug,
                                 item.slug,
                                 parent=item.parent,
                                 data=self._create_tree_data(item))

            ingredients_to_place = list(
                IngredientModel.get_usable_ingredients(session))
            for i in range(1, passes + 1):
                Log.debug("Pass %i/%i" % (i, passes))

                for item in ingredients_to_place[:]:
                    if item.kind == FamilyKind.value:
                        ingredients_to_place.remove(item)
                        Log.debug("Skipping %s because it is a family." %
                                  item.slug)
                        continue
                    try:
                        tree.create_node(item.slug,
                                         item.slug,
                                         parent=item.parent,
                                         data=self._create_tree_data(item))
                        ingredients_to_place.remove(item)
                    except NodeIDAbsentError:
                        Log.debug("skipping %s (Attempt %i/%s)" %
                                  (item.slug, i, passes))

                if len(ingredients_to_place) == 0:
                    Log.info("All done after pass %i" % i)
                    break

        return tree
Ejemplo n.º 10
0
    def resolve(cls, inventory, cocktail, spec_slug=None):
        results = []

        tree = IngredientTreeCache.retrieve()
        inventory.expand(tree=tree)

        Log.info("Cocktail specs: %s" % [spec.slug for spec in cocktail.specs])
        for spec in cocktail.specs:
            # Skip any specs that the user didn't ask for with the spec_slug
            # parameter.
            if spec_slug and spec.slug != spec_slug:
                Log.info("Skipping slug %s because you didn't want it." %
                         spec.slug)
                continue

            # Parse the spec
            results.append(
                cls._resolve_spec(inventory=inventory,
                                  cocktail=cocktail,
                                  spec=spec,
                                  tree=tree))

        # Return the list of results.
        return results
Ejemplo n.º 11
0
    def get_ingredients(self):
        all_ingredients = IngredientModel.query.all()
        # Log.info("Total ingredient count is %i" % len(all_ingredients))

        standardized_ingredients = []
        orphan_count = 0
        for ingredient in all_ingredients:
            # Log.info("Parsing %s" % ingredient.canonical_name)

            parent = self._get_parent_name(ingredient)
            if parent:
                kind = ProductKind.value
            else:
                kind = IngredientKind.value
                orphan_count += 1
            # Log.info("Parent is %s" % parent)

            standardized_ingredient = {
                'display_name': ingredient.canonical_name,
                'slug': Slug(ingredient.canonical_name),
                'aliases': self._get_ingredient_aliases(ingredient),
                'parent': parent,
                'kind': kind,
            }

            standardized_ingredients.append(standardized_ingredient)
            Log.info(standardized_ingredient
                     ) if not standardized_ingredient['parent'] else None

        # print(len(IngredientModel.query.all()))
        # for ingredient in IngredientModel.query.all():
        # print(ingredient.canonical_name)
        # for altname in IngredientAlternateSpellingModel.query.all():
        # print(altname.ingredient_id)
        Log.info("Orphans at %i" % orphan_count)
        return standardized_ingredients
Ejemplo n.º 12
0
    def _build_search_query(self):
        """
        "filter" = "must" without scoring. Better for caching.
        
        This function is built for Bool() queries only.
        """
        # These lists contain the AND'd queries for each url_parameter.
        # They are AND because we query like "irish-whiskey AND stirred"
        musts = []
        must_nots = []

        for url_parameter in self.supported_parameters:
            # Each parameter is something like "components" or "construction" and
            # are keys defined in the barbados.search.whatever.WhateverSearch classes.

            # Should vs Must
            # https://stackoverflow.com/questions/28768277/elasticsearch-difference-between-must-and-should-bool-query
            # tldr: Should == OR, Must == AND
            # For the purposes of multiple values per url_parameter, we have to use
            # AND (ex: components=irish-whiskey,vermouth should yield irish-whiskey AND vermouth
            # not irish-whiskey OR vermouth).
            url_parameter_conditions = []

            # Get the value for the url_parameter as passed in from the URL.
            # Example: "components=irish-whiskey,vermouth" would mean a raw_value
            # of ['irish-whiskey', 'vermouth']. Native data types apply as defined
            # in the barbados.search.whatever.WhateverSearch class.
            raw_value = getattr(self, url_parameter, None)
            if raw_value is None:
                continue

            # A value parser is a function that is used to munge the raw_value before
            # further processing. Since we abstracted the shit out of the search stuff
            # this is how we can transform things from the URL into ElasticSearch-speak
            # in a bespoke way.
            value_parser = self.query_parameters.get(url_parameter).get(
                'value_parser')
            if value_parser:
                raw_value = value_parser(raw_value)

            # Ensure that the value we got matches the expected data type.
            expected_value_type = self.query_parameters.get(url_parameter).get(
                'url_parameter_type')
            self._validate_query_parameter(parameter=url_parameter,
                                           value=raw_value,
                                           type_=expected_value_type)

            # These are the Elasticsearch document fields to search for
            # the particular value(s) we were given. These are defined in the
            # barbados.search.whatever.WhateverSearch class and are generally
            # a list of fields in Elasticsearch syntax.
            fields = self.query_parameters.get(url_parameter).get('fields')

            # When there are multiple values given in a url_parameter, we interpret
            # this to mean each value should be present in expected fields.
            # For example if we say "components=irish-whiskey,vermouth" it is
            # expected that both "irish-whiskey" and "vermouth" are in the fields.
            if expected_value_type is list:
                for value in raw_value:
                    # There's a lot going on here...
                    # Since we want the OR condition between fields (spec.components.slug || spec.components.parents)
                    # we are using Should. If we specified multiple values, we want the AND condition
                    # (rum && sherry). This builds a sub-query of Bool() for the former || situation
                    # and adds it to the list of all conditions for this query for aggregation with
                    # other url_parameters.
                    field_conditions = Bool(should=[
                        self.get_query_condition(url_parameter=url_parameter,
                                                 field=field,
                                                 value=value)
                        for field in fields
                    ])
                    url_parameter_conditions.append(field_conditions)

            # Single-valued url_parameters are much easier to look for.
            elif expected_value_type is str:
                # This loops through every ElasticSearch document field that we were told to
                # search in and add that as a condition to this url_parameter's conditions.
                for field in fields:
                    url_parameter_conditions.append(
                        self.get_query_condition(url_parameter=url_parameter,
                                                 field=field,
                                                 value=raw_value))
            # Complex queries like implicit ranges take a direct dictionary of values to pass
            # to the underlying ElasticSearch query.
            elif expected_value_type is dict or expected_value_type is bool:
                # This loops through every ElasticSearch document field that we were told to
                # search in and add that as a condition to this url_parameter's conditions.
                for field in fields:
                    url_parameter_conditions.append(
                        self.get_query_condition(url_parameter=url_parameter,
                                                 field=field,
                                                 value=raw_value))
            else:
                raise ValidationException(
                    "Unsupported url_parameter data type: %s" %
                    expected_value_type)

            # The occurrence is used to determine which method to use for
            # searching the index for this particular condition. There are
            # times when we want Should (OR) like matching slugs and display_names,
            # others that we want Must (AND) like matching `rum && sherry`.
            occurrence = self.query_parameters.get(url_parameter).get(
                'occurrence')

            # Boolean-based queries (not to be confused with ElasticSearch Bool queries!)
            # need to set their occurrence based on the value of the boolean.
            if expected_value_type is bool:
                occurrence = MustOccurrence if raw_value else MustNotOccurrence

            # Now construct the Bool() query for this url_parameter.
            url_parameter_query = Bool(
                **{occurrence.occur: url_parameter_conditions})

            # Some parameters are inverted, aka MUST NOT appear in the
            # search results. This can be useful for say allergies or if you
            # have a pathological hatred of anything pineapple.
            if self.query_parameters.get(url_parameter).get('invert'):
                must_nots.append(url_parameter_query)
            else:
                musts.append(url_parameter_query)

        # Build the overall query.
        query = Bool(must=musts, must_not=must_nots)
        Log.info("Search Conditions are %s" % query)
        return query