Beispiel #1
0
    def __init__(self,
                 hosts=os.getenv('AMARI_ZOOKEEPER_HOSTS', '127.0.0.1:2181'),
                 read_only=False):
        self.hosts = hosts
        self.read_only = read_only

        Log.info("Using Zookeeper hosts: \"%s\"" % hosts)
Beispiel #2
0
 def invalidate(cls):
     """
     Invalidate (delete) the cache value and key.
     :return: None
     """
     Log.info("Invalidating cache key %s" % cls.cache_key)
     return Cache.delete(cls.cache_key)
Beispiel #3
0
    def __init__(self, path):
        connection_string = "sqlite:///%s" % path
        Log.info("connection string is '%s'" % connection_string)

        self.engine = sqlalchemy.create_engine(connection_string)
        # session.configure(bind=self.engine)
        self.Session = sessionmaker(bind=self.engine)
Beispiel #4
0
    def scrape_recipe(recipe):
        url = "%s/%s" % (url_base, endpoints.get('recipe') % recipe)
        Log.info("scraping %s" % url)
        parser = UpneatRecipeParser(slug=recipe, url=url)
        raw_recipe = parser.parse()

        return raw_recipe
Beispiel #5
0
 def retrieve(cls):
     try:
         return pickle.loads(Cache.get(cls.cache_key))
     except KeyError:
         Log.warning(
             "Attempted to retrieve '%s' but it was empty. Repopulating..."
             % cls.cache_key)
         cls.populate()
         return pickle.loads(Cache.get(cls.cache_key))
Beispiel #6
0
 def get(self, path):
     self._connect()
     try:
         data, stat = self.zk.get(path)
         return data.decode("utf-8")
     except NoNodeError:
         raise KeyError("%s does not exist." % path)
     except Exception as e:
         Log.error(e.__class__)
         Log.error(e)
Beispiel #7
0
 def execute(self, sort='_score'):
     """
     Actually talk to ElasticSearch and run the query.
     :param sort: ElasticSearch attribute on which to sort the results.
     :return: SearchResults child class.
     # @TODO address the search range hacks here.
     """
     results = self.index_class.search()[0:1000].query(
         self.q).sort(sort).execute()
     Log.info("Got %s results." % results.hits.total.value)
     return SearchResults(hits=results)
Beispiel #8
0
 def init(self):
     """
     Re-initialize all indexes. This calls rebuild on every registered
     index class. There be dragons here.
     :return: None
     """
     for name in self._indexes.keys():
         Log.debug("Init on %s" % name)
         try:
             self.rebuild(self._indexes.get(name))
         except NotFoundError or KeyError or AttributeError as e:
             Log.warning("Error re-initing index %s: %s" % (name, e))
Beispiel #9
0
 def rebuild(self, index_class):
     """
     Re-create an index. This deletes the entire index (not just the contents,
     but the Whole Damn Thing(tm). and re-creates it.
     :param index_class: elasticsearch_dsl.Document child representing this index.
     :return: None
     """
     try:
         index_class._index.delete()
         index_class.init()
         Log.info("Successfully rebuilt index %s" % index_class.Index.name)
     except NotFoundError:
         Log.warning("Index %s did not exist." % index_class.Index.name)
Beispiel #10
0
    def __init__(self,
                 host=os.getenv('AMARI_REDIS_HOST', default='127.0.0.1'),
                 port=int(os.getenv('AMARI_REDIS_PORT', default=6379)),
                 username=None,
                 password=None,
                 ssl=False):
        self.host = host
        self.port = port
        self.username = username
        self.password = password
        self.ssl = ssl

        Log.info("Using Redis host: \"%s:%i\"" % (host, port))
Beispiel #11
0
    def _get_ingredient_primary_category(ingredient):
        category_mappings = IngredientCategoryMappingModel.query.filter(
            IngredientCategoryMappingModel.ingredient_id == ingredient.id)
        # print([mapping.category_id for mapping in category_mappings])
        # exit()
        for category_id in [
                result.category_id for result in category_mappings
        ]:
            category = IngredientCategoryModel.query.get(category_id)
            if category.position and category.position >= 5:
                return category.display_name

        Log.error("Could not find category for %s" % ingredient.canonical_name)
Beispiel #12
0
 def retrieve(cls):
     """
     Retrieve the cache's value
     :return: Various
     """
     try:
         return Cache.get(cls.cache_key)
     except KeyError:
         Log.warning(
             "Attempted to retrieve '%s' but it was empty. Repopulating..."
             % cls.cache_key)
         cls.populate()
         return Cache.get(cls.cache_key)
Beispiel #13
0
 def delete(cocktail_object):
     try:
         indexables = CocktailFactory.obj_to_index(cocktail_object,
                                                   RecipeIndex)
         for indexable in indexables:
             try:
                 RecipeIndex.delete(indexable)
             except NotFoundError:
                 Log.warning("No cache entry found for %s" % indexable)
     except KeyError as e:
         # Since this is a DELETE situation we don't particularly care to correct
         # the problem, but if we're creating or some other thing that could be
         # more problematic.
         Log.error("Recipe has bad data: %s" % e)
Beispiel #14
0
    def __init__(self,
                 username,
                 password,
                 database,
                 host='127.0.0.1',
                 port=5432,
                 debug_sql=False):
        connection_string = "postgres://%s:%s@%s:%i/%s" % (
            username, password, host, port, database)
        Log.info("Using Postgres host: \"%s\"" % host)

        self.engine = sqlalchemy.create_engine(connection_string,
                                               echo=debug_sql)
        self.Session = sessionmaker(bind=self.engine)
Beispiel #15
0
    def get_recipes():
        # character_list = list(range(0, 10)) + list(string.ascii_uppercase)
        character_list = string.ascii_uppercase[2:3]

        raw_recipes = []

        for char in character_list:
            # print(UpneatConnector._get_recipes_alpha(char))
            slugs = UpneatConnector._get_recipes_alpha(char)
            for slug in slugs:
                try:
                    raw_recipes.append(UpneatConnector.scrape_recipe(slug))
                except:
                    Log.error("ERROR WITH %s " % slug)

        return raw_recipes
Beispiel #16
0
    def _connect(self):
        if not hasattr(self, 'zk'):
            self.zk = KazooClient(hosts=self.hosts,
                                  read_only=self.read_only,
                                  timeout=5,
                                  connection_retry=self._get_retry())
        elif self.zk.state != KazooState.CONNECTED:
            Log.warning("ZooKeeper state is %s" % self.zk.state)
            pass
        elif self.zk.state == KazooState.CONNECTED:
            return
        else:
            raise Exception("We in a weird state. %s" % self.zk.state)

        try:
            return self.zk.start()
        except KazooTimeoutError as e:
            raise FatalException("Timeout connecting to ZooKeeper (%s)" % e)
Beispiel #17
0
    def get_ingredients(self):
        all_ingredients = IngredientModel.query.all()
        # Log.info("Total ingredient count is %i" % len(all_ingredients))

        standardized_ingredients = []
        orphan_count = 0
        for ingredient in all_ingredients:
            # Log.info("Parsing %s" % ingredient.canonical_name)

            parent = self._get_parent_name(ingredient)
            if parent:
                kind = ProductKind.value
            else:
                kind = IngredientKind.value
                orphan_count += 1
            # Log.info("Parent is %s" % parent)

            standardized_ingredient = {
                'display_name': ingredient.canonical_name,
                'slug': Slug(ingredient.canonical_name),
                'aliases': self._get_ingredient_aliases(ingredient),
                'parent': parent,
                'kind': kind,
            }

            standardized_ingredients.append(standardized_ingredient)
            Log.info(standardized_ingredient
                     ) if not standardized_ingredient['parent'] else None

        # print(len(IngredientModel.query.all()))
        # for ingredient in IngredientModel.query.all():
        # print(ingredient.canonical_name)
        # for altname in IngredientAlternateSpellingModel.query.all():
        # print(altname.ingredient_id)
        Log.info("Orphans at %i" % orphan_count)
        return standardized_ingredients
Beispiel #18
0
    def resolve(cls, inventory, cocktail, spec_slug=None):
        results = []

        tree = IngredientTreeCache.retrieve()
        inventory.expand(tree=tree)

        Log.info("Cocktail specs: %s" % [spec.slug for spec in cocktail.specs])
        for spec in cocktail.specs:
            # Skip any specs that the user didn't ask for with the spec_slug
            # parameter.
            if spec_slug and spec.slug != spec_slug:
                Log.info("Skipping slug %s because you didn't want it." %
                         spec.slug)
                continue

            # Parse the spec
            results.append(
                cls._resolve_spec(inventory=inventory,
                                  cocktail=cocktail,
                                  spec=spec,
                                  tree=tree))

        # Return the list of results.
        return results
Beispiel #19
0
    def _build_tree(self, passes, root=root_node):
        tree = Tree()

        pgconn = Registry.get_database_connection()
        with pgconn.get_session() as session:

            tree.create_node(root, root)
            for item in IngredientModel.get_by_kind(session, CategoryKind):
                tree.create_node(item.slug,
                                 item.slug,
                                 parent=root,
                                 data=self._create_tree_data(item))

            for item in IngredientModel.get_by_kind(session, FamilyKind):
                tree.create_node(item.slug,
                                 item.slug,
                                 parent=item.parent,
                                 data=self._create_tree_data(item))

            ingredients_to_place = list(
                IngredientModel.get_usable_ingredients(session))
            for i in range(1, passes + 1):
                Log.debug("Pass %i/%i" % (i, passes))

                for item in ingredients_to_place[:]:
                    if item.kind == FamilyKind.value:
                        ingredients_to_place.remove(item)
                        Log.debug("Skipping %s because it is a family." %
                                  item.slug)
                        continue
                    try:
                        tree.create_node(item.slug,
                                         item.slug,
                                         parent=item.parent,
                                         data=self._create_tree_data(item))
                        ingredients_to_place.remove(item)
                    except NodeIDAbsentError:
                        Log.debug("skipping %s (Attempt %i/%s)" %
                                  (item.slug, i, passes))

                if len(ingredients_to_place) == 0:
                    Log.info("All done after pass %i" % i)
                    break

        return tree
Beispiel #20
0
    def _build_search_query(self):
        """
        "filter" = "must" without scoring. Better for caching.
        
        This function is built for Bool() queries only.
        """
        # These lists contain the AND'd queries for each url_parameter.
        # They are AND because we query like "irish-whiskey AND stirred"
        musts = []
        must_nots = []

        for url_parameter in self.supported_parameters:
            # Each parameter is something like "components" or "construction" and
            # are keys defined in the barbados.search.whatever.WhateverSearch classes.

            # Should vs Must
            # https://stackoverflow.com/questions/28768277/elasticsearch-difference-between-must-and-should-bool-query
            # tldr: Should == OR, Must == AND
            # For the purposes of multiple values per url_parameter, we have to use
            # AND (ex: components=irish-whiskey,vermouth should yield irish-whiskey AND vermouth
            # not irish-whiskey OR vermouth).
            url_parameter_conditions = []

            # Get the value for the url_parameter as passed in from the URL.
            # Example: "components=irish-whiskey,vermouth" would mean a raw_value
            # of ['irish-whiskey', 'vermouth']. Native data types apply as defined
            # in the barbados.search.whatever.WhateverSearch class.
            raw_value = getattr(self, url_parameter, None)
            if raw_value is None:
                continue

            # A value parser is a function that is used to munge the raw_value before
            # further processing. Since we abstracted the shit out of the search stuff
            # this is how we can transform things from the URL into ElasticSearch-speak
            # in a bespoke way.
            value_parser = self.query_parameters.get(url_parameter).get(
                'value_parser')
            if value_parser:
                raw_value = value_parser(raw_value)

            # Ensure that the value we got matches the expected data type.
            expected_value_type = self.query_parameters.get(url_parameter).get(
                'url_parameter_type')
            self._validate_query_parameter(parameter=url_parameter,
                                           value=raw_value,
                                           type_=expected_value_type)

            # These are the Elasticsearch document fields to search for
            # the particular value(s) we were given. These are defined in the
            # barbados.search.whatever.WhateverSearch class and are generally
            # a list of fields in Elasticsearch syntax.
            fields = self.query_parameters.get(url_parameter).get('fields')

            # When there are multiple values given in a url_parameter, we interpret
            # this to mean each value should be present in expected fields.
            # For example if we say "components=irish-whiskey,vermouth" it is
            # expected that both "irish-whiskey" and "vermouth" are in the fields.
            if expected_value_type is list:
                for value in raw_value:
                    # There's a lot going on here...
                    # Since we want the OR condition between fields (spec.components.slug || spec.components.parents)
                    # we are using Should. If we specified multiple values, we want the AND condition
                    # (rum && sherry). This builds a sub-query of Bool() for the former || situation
                    # and adds it to the list of all conditions for this query for aggregation with
                    # other url_parameters.
                    field_conditions = Bool(should=[
                        self.get_query_condition(url_parameter=url_parameter,
                                                 field=field,
                                                 value=value)
                        for field in fields
                    ])
                    url_parameter_conditions.append(field_conditions)

            # Single-valued url_parameters are much easier to look for.
            elif expected_value_type is str:
                # This loops through every ElasticSearch document field that we were told to
                # search in and add that as a condition to this url_parameter's conditions.
                for field in fields:
                    url_parameter_conditions.append(
                        self.get_query_condition(url_parameter=url_parameter,
                                                 field=field,
                                                 value=raw_value))
            # Complex queries like implicit ranges take a direct dictionary of values to pass
            # to the underlying ElasticSearch query.
            elif expected_value_type is dict or expected_value_type is bool:
                # This loops through every ElasticSearch document field that we were told to
                # search in and add that as a condition to this url_parameter's conditions.
                for field in fields:
                    url_parameter_conditions.append(
                        self.get_query_condition(url_parameter=url_parameter,
                                                 field=field,
                                                 value=raw_value))
            else:
                raise ValidationException(
                    "Unsupported url_parameter data type: %s" %
                    expected_value_type)

            # The occurrence is used to determine which method to use for
            # searching the index for this particular condition. There are
            # times when we want Should (OR) like matching slugs and display_names,
            # others that we want Must (AND) like matching `rum && sherry`.
            occurrence = self.query_parameters.get(url_parameter).get(
                'occurrence')

            # Boolean-based queries (not to be confused with ElasticSearch Bool queries!)
            # need to set their occurrence based on the value of the boolean.
            if expected_value_type is bool:
                occurrence = MustOccurrence if raw_value else MustNotOccurrence

            # Now construct the Bool() query for this url_parameter.
            url_parameter_query = Bool(
                **{occurrence.occur: url_parameter_conditions})

            # Some parameters are inverted, aka MUST NOT appear in the
            # search results. This can be useful for say allergies or if you
            # have a pathological hatred of anything pineapple.
            if self.query_parameters.get(url_parameter).get('invert'):
                must_nots.append(url_parameter_query)
            else:
                musts.append(url_parameter_query)

        # Build the overall query.
        query = Bool(must=musts, must_not=must_nots)
        Log.info("Search Conditions are %s" % query)
        return query
Beispiel #21
0
 def fail(self, message):
     Log.error(message)
     if self.fatal:
         raise ValidationException(message)
Beispiel #22
0
 def delete(cls, ingredient_object):
     index = IngredientFactory.obj_to_index(ingredient_object, cls.for_index)
     try:
         IngredientIndex.delete(index)
     except NotFoundError:
         Log.warn("Object %s was not found in index on DELETE. This probably isn't a problem?" % ingredient_object.slug)