def __init__(self, hosts=os.getenv('AMARI_ZOOKEEPER_HOSTS', '127.0.0.1:2181'), read_only=False): self.hosts = hosts self.read_only = read_only LogService.info("Using Zookeeper hosts: \"%s\"" % hosts)
def import_(self, filepath): dicts_to_import = RecipeImporter._fetch_data_from_path(filepath) if len(dicts_to_import) > 1: self.delete(delete_all=True) for cocktail_dict in dicts_to_import: try: slug = Slug(cocktail_dict['display_name']) LogService.info("Working %s" % slug) c = CocktailFactory.raw_to_obj(cocktail_dict, slug) except KeyError as e: LogService.error("Something has bad data!") LogService.error(cocktail_dict) LogService.error(e) continue self.delete(cocktail=c) db_obj = CocktailModel(**ObjectSerializer.serialize(c, 'dict')) with self.pgconn.get_session() as session: session.add(db_obj) LogService.info("Successfully [re]created %s" % c.slug) ObjectValidator.validate(db_obj, session=session, fatal=False) Indexers.get_indexer(c).index(c) CocktailScanCache.invalidate()
def scrape_recipe(recipe): url = "%s/%s" % (url_base, endpoints.get('recipe') % recipe) LogService.info("scraping %s" % url) parser = UpneatRecipeParser(slug=recipe, url=url) raw_recipe = parser.parse() return raw_recipe
def get_value(self, skip_registry=False): """ Retrieve the value of a setting from the various sources. Order goes: Registry, Environment Variable, Default. This potentially enforces a type as well. :param skip_registry: Boolean to skip looking at the registry. :return: Value of the setting. """ registry_value = RegistryService.get(self.path, default_none=True) env_value = os.getenv(key=self.env, default=None) default_value = self.default potential_values = (registry_value, env_value, default_value) if skip_registry: potential_values = (env_value, default_value) # https://stackoverflow.com/questions/18533620/getting-the-first-non-none-value-from-list try: setting_value = next(value for value in potential_values if value is not None) except StopIteration: raise SettingsException("No valid setting found for %s" % self.path) LogService.info("Setting %s => %s" % (self.path, setting_value)) return setting_value
def _resolve_spec(inventory, cocktail, spec, tree): """ Generate a RecipeResolutionSummary object for this particular recipe. Reminder that Recipe = Cocktail + Spec. :param inventory: The Inventory object to resolve against. :param cocktail: The Cocktail object to resolve. :param spec: The Spec object that you wanted to resolve. :param tree: IngredientTree. :return: A SpecResolutionSummary. """ # Components use the ingredient slug as their slug so we can safely # assume a 1:1 mapping between them. LogService.info("Resolving spec %s" % spec.slug) rs = RecipeResolutionFactory.from_objects(inventory, cocktail, spec) try: rs = RecipeResolutionFactory.produce_obj(id=rs.id) LogService.info("Found resolution %s in the database" % rs.id) except KeyError: LogService.warn( "Document %s not found in database. Regenerating..." % rs.id) rs = RecipeResolver._populate_components(summary=rs, cocktail=cocktail, spec=spec, inventory=inventory, tree=tree) return rs
def update_obj(cls, obj, id_value, id_attr='slug', commit=True): """ Update an existing model based on its current object state. :param obj: The object to delete. :param id_value: The ID of the object we should be updating. :param id_attr: Identity attribute. :param commit: Whether to commit this transaction now or deal with it yourself. Useful for batches. :return: New model. """ with DatabaseService.get_session() as session: model = cls._get_model_safe(session, obj, id_attr, id_value) # This feels unsafe, but should be OK. # https://stackoverflow.com/questions/9667138/how-to-update-sqlalchemy-row-entry for key, value in ObjectSerializer.serialize(obj, 'dict').items(): old_value = getattr(model, key) setattr(model, key, value) if old_value != value: LogService.info("Updating %s: '%s'->'%s'" % (key, old_value, value)) if commit: session.commit() return model
def invalidate(cls): """ Invalidate (delete) the cache value and key. :return: None """ LogService.info("Invalidating cache key %s" % cls.cache_key) return CacheService.delete(cls.cache_key)
def __init__(self, path): connection_string = "sqlite:///%s" % path LogService.info("connection string is '%s'" % connection_string) self.engine = sqlalchemy.create_engine(connection_string) # session.configure(bind=self.engine) self.Session = sessionmaker(bind=self.engine)
def delete(self): LogService.debug("Deleting old data from database") with self.pgconn.get_session() as session: deleted = session.query(self.model).delete() LogService.info("Deleted %s" % deleted) Indexes.rebuild(ListIndex)
def delete(endpoint, args): LogService.info("Deleting %s" % args.slug) if args.slug == 'all': result = requests.delete(endpoint) else: result = requests.delete("%s/%s" % (endpoint, args.slug)) Resource._handle_error(result)
def connect(): hosts = elasticsearch_settings.get('hosts') scheme = elasticsearch_settings.get('scheme') port = elasticsearch_settings.get('port') LogService.info("Using ElasticSearch hosts: \"%s\" via %s/%i" % (hosts, scheme, port)) connections.create_connection(scheme=scheme, hosts=hosts, port=port)
def execute(self, sort='_score'): """ Actually talk to ElasticSearch and run the query. :param sort: ElasticSearch attribute on which to sort the results. :return: SearchResults child class. # @TODO address the search range hacks here. """ results = self.index_class.search()[0:1000].query( self.q).sort(sort).execute() LogService.info("Got %s results." % results.hits.total.value) return SearchResults(hits=results)
def __init__(self, host, port, username, password, ssl, request_timeout, flask_database_id): self.host = host self.port = port self.username = username self.password = password self.ssl = ssl self.request_timeout = request_timeout self.flask_database_id = flask_database_id LogService.info( "Redis connection: redis://%s:%s@%s:%s?ssl=%s" % (self.username, self.password, self.host, self.port, self.ssl))
def _build_tree(self, passes, root=root_node): """ Construct the treelib.Tree object. :param passes: Number of iterations to construct to tree in. :param root: String ID of the root node of the tree. :return: Completed treelib.Tree object """ tree = Tree() with DatabaseService.get_session() as session: tree.create_node(root, root) for i in IngredientModel.get_by_kind(session, CategoryKind): i = IngredientFactory.model_to_obj(i) tree.create_node(i.slug, i.slug, parent=root, data=i) ingredients_to_place = [ IngredientFactory.model_to_obj(item) for item in IngredientModel.get_usable_ingredients(session) ] for idx in range(1, passes + 1): LogService.debug("Pass %i/%i" % (idx, passes)) # If you remove items from a list you're iterating over you # dynamically change the indexing, making things get out of whack. # You can get around this by making a copy of the list and iterating # over that while you remove items from the original list. # https://thispointer.com/python-remove-elements-from-a-list-while-iterating/ for i in ingredients_to_place.copy(): try: tree.create_node(i.slug, i.slug, parent=i.parent, data=i) # This is to maintain a list of all index elements since finding those # is somewhat hard after the fact. if i.kind == IndexKind: self._index_node_ids.append(i.slug) ingredients_to_place.remove(i) except NodeIDAbsentError: LogService.debug("skipping %s (Attempt %i/%s)" % (i.slug, idx, passes)) if len(ingredients_to_place) == 0: LogService.info("All done after pass %i" % idx) break LogService.info("Tree has len %i" % len(tree)) return tree
def rebuild(self, index_class): """ Re-create an index. This deletes the entire index (not just the contents, but the Whole Damn Thing(tm). and re-creates it. :param index_class: elasticsearch_dsl.Document child representing this index. :return: None """ try: index_class._index.delete() except NotFoundError: LogService.warning("Index %s did not exist." % index_class.Index.name) # Proceed with rebuild. index_class.init() LogService.info("Successfully rebuilt index %s" % index_class.Index.name)
def import_(self, filepath): data = IngredientImporter._fetch_data_from_path(filepath) # Delete old data self.delete() LogService.info("Starting import") for ingredient in data: i = Ingredient(**ingredient) db_obj = IngredientModel(**ObjectSerializer.serialize(i, 'dict')) # Test for existing with self.pgconn.get_session() as session: # existing = IngredientModel.query.get(i.slug) existing = session.query(IngredientModel).get(i.slug) if existing: if existing.kind == IngredientKinds( 'category' ).value or existing.kind == IngredientKinds( 'family').value: if i.kind is IngredientKinds('ingredient'): LogService.error( "Skipping %s (t:%s) since a broader entry exists (%s)" % (i.slug, i.kind.value, existing.kind)) else: LogService.error( "%s (p:%s) already exists as a %s (p:%s)" % (i.slug, i.parent, existing.kind, existing.parent)) else: LogService.error( "%s (p:%s) already exists as a %s (p:%s)" % (i.slug, i.parent, existing.kind, existing.parent)) else: session.add(db_obj) Indexers.get_indexer(i).index(i) LogService.info("Validating") with self.pgconn.get_session() as session: objects = session.query(IngredientModel).all() for db_obj in objects: # Validate ObjectValidator.validate(db_obj, session=session, fatal=False) # Invalidate the cache IngredientTreeCache.invalidate()
def create(self, args): """ This needs to be more reliable and do other things. :param args: :return: """ if args.slug != 'all': Resource.create(self.endpoint, args, self.path) return LogService.info('Creating all ingredients') data = london.util.load_yaml_data_from_path(self.path) LogService.info("Found %i items." % len(data)) success = 0 retries = [] for item in data: try: result = requests.post(self.endpoint, json=london.util.to_json(item)) result.raise_for_status() success += 1 except requests.exceptions.RequestException as e: # LogService.warning("Encountered error (%s). Will retry later." % e) retries.append(item) LogService.info("Succeeded with %i items." % success) LogService.info("Retrying with %i items. " % len(retries)) for item in list(retries): try: result = requests.post(self.endpoint, json=london.util.to_json(item)) result.raise_for_status() success += Resource._handle_error(result) retries.remove(item) except requests.exceptions.RequestException as e: LogService.error("Encountered error (%s). No more retries." % e) LogService.error(item) LogService.info("Succeeded with %i items." % success) # Refresh all indexes # There seems to be a problem where I hit ElasticSearch too quickly # and don't get all of the indexes in time. Manifested as 9 indexes # instead of 11. sleep(2) self._refresh_indexes()
def _kibana_settings(): """ I am pedantic and want dark mode enabled on the Kibana instance. This code serves no useful purpose within the app. :return: """ headers = {'kbn-version': '7.5.0', 'Content-Type': 'application/json'} data = '{"changes":{"theme:darkMode":true}}' kibana_host = os.getenv('AMARI_KIBANA_HOST', default='localhost') resp = requests.post("http://%s:5601/api/kibana/settings" % kibana_host, headers=headers, data=data) if resp.status_code == 200: LogService.info("Kibana set to dark mode.") else: LogService.error("Error setting dark mode: %s" % resp.text)
def delete(self, cocktail=None, delete_all=False): if cocktail: with self.pgconn.get_session() as session: existing = session.query(CocktailModel).get(cocktail.slug) if existing: LogService.debug("Deleting %s" % existing.slug) deleted = session.delete(existing) return if delete_all is True: with self.pgconn.get_session() as session: LogService.debug("Deleting all CocktailModel") deleted = session.query(CocktailModel).delete() LogService.info("Deleted %s from %s" % (deleted, CocktailModel.__tablename__)) Indexes.rebuild(RecipeIndex)
def execute(self): """ Generate and perform this query :param session: SQLAlchemy session :return: List of results. """ self._generate_criteria(model=self.model) LogService.info("QueryBuilder conditions are: %s" % self.criteria) # I don't get why this is so complicated to build. Feels excessive and # maybe not scalable but whatever. # @TODO pagination? module_name = self.model.__module__ with DatabaseService.get_session() as session: search_obj = Search(session, module_name, (self.model, ), filter_by=self.criteria, all=True) return search_obj.results.get('data')
def import_(self, filepath): data = MenuImporter._fetch_data_from_path(filepath) # Delete old data self.delete() LogService.info("Starting import") for menu in data: m = DrinkListFactory.raw_to_obj(menu) db_obj = MenuModel(**ObjectSerializer.serialize(m, 'dict')) # Test for existing with self.pgconn.get_session() as session: session.add(db_obj) Indexers.get_indexer(m).index(m) # Validate self.validate() # Clear Cache and Index MenuScanCache.invalidate()
def __init__(self, username, password, host, port, database, debug_sql): self.username = username self.password = password self.host = host self.port = port self.database = database self.debug_sql = debug_sql connection_string = "postgres://%s:%s@%s:%i/%s" % ( self.username, self.password, self.host, self.port, self.database) # https://stackoverflow.com/questions/48995979/how-to-replace-all-characters-in-a-string-with-one-character/48996018 masked_connection_string = connection_string.replace( self.password, '*' * len(self.password)) LogService.info("Postgres string: %s" % masked_connection_string) LogService.warning('Starting PostgreSQL connection!') self.engine = sqlalchemy.create_engine(connection_string, echo=self.debug_sql) self.Session = sessionmaker(bind=self.engine) self.ScopedSession = scoped_session(self.Session) self._setup_events()
def get_ingredients(self): all_ingredients = IngredientModel.query.all() # Log.info("Total ingredient count is %i" % len(all_ingredients)) standardized_ingredients = [] orphan_count = 0 for ingredient in all_ingredients: # Log.info("Parsing %s" % ingredient.canonical_name) parent = self._get_parent_name(ingredient) if parent: kind = ProductKind.value else: kind = IngredientKind.value orphan_count += 1 # Log.info("Parent is %s" % parent) standardized_ingredient = { 'display_name': ingredient.canonical_name, 'slug': Slug(ingredient.canonical_name), 'aliases': self._get_ingredient_aliases(ingredient), 'parent': parent, 'kind': kind, } standardized_ingredients.append(standardized_ingredient) LogService.info( standardized_ingredient ) if not standardized_ingredient['parent'] else None # print(len(IngredientModel.query.all())) # for ingredient in IngredientModel.query.all(): # print(ingredient.canonical_name) # for altname in IngredientAlternateSpellingModel.query.all(): # print(altname.ingredient_id) LogService.info("Orphans at %i" % orphan_count) return standardized_ingredients
def resolve(cls, inventory, cocktail, spec_slug=None): """ Process a Recipe resolution request. This request could be for all specs of a cocktail or just one. :param inventory: Inventory to resolve against. :param cocktail: Cocktail object to resolve. :param spec_slug: Optional Slug of the spec to resolve (None means do all of them) :return: List of SpecResolutionSummary objects. """ results = [] # We retrieve the tree here and expand the inventory so that there isn't potential # inconsistency between retrieving the tree now vs later. It does mean we have to # pass it around to various functions. tree = IngredientTreeCache.retrieve() inventory.expand(tree=tree) LogService.info("Cocktail %s has specs: %s" % (cocktail.slug, [spec.slug for spec in cocktail.specs])) for spec in cocktail.specs: # Skip any specs that the user didn't ask for with the spec_slug # parameter. if spec_slug and spec.slug != spec_slug: LogService.info( "Skipping spec %s because you didn't want it." % spec.slug) continue # Parse the spec results.append( cls._resolve_spec(inventory=inventory, cocktail=cocktail, spec=spec, tree=tree)) # Return the list of results. return results
def _populate_components(summary, cocktail, spec, inventory, tree): """ Fill in the components of a RecipeResolutionSummary. :param summary: RecipeResolutionSummary object. :param cocktail: Cocktail object. :param spec: Spec object. :param inventory: Inventory object. :param tree: IngredientTree object. This is loaded elsewhere to prevent over-loading. :return: RecipeResolutionSummary. """ # Just in case we were given a populated object, blow away the components. summary.components = [] # Go through all of them. for component in list(spec.components): if inventory.contains(component.slug): substitutes, resolution_status = RecipeResolver._get_direct_resolution( inventory, component) else: substitutes, resolution_status = RecipeResolver._get_nondirect_resolution( inventory, component, tree) # Construct the SpecResolution object. LogService.info("Resolution for %s::%s::%s is %s" % (cocktail.slug, spec.slug, component.slug, resolution_status.status)) r = ComponentResolution(slug=component.slug, status=resolution_status, substitutes=substitutes, parents=tree.parents(component.slug)) # Add the resolution to the summary summary.add_component(r) # Done return summary
def create(endpoint, args, path): LogService.info("Creating %s" % args.slug) data = london.util.load_yaml_data_from_path(path) LogService.info("Found %i items." % len(data)) success = 0 for item in data: if item.get('slug') == args.slug or args.slug == 'all': # print(item) result = requests.post(endpoint, json=london.util.to_json(item)) success += Resource._handle_error(result) LogService.info("Succeeded with %i items." % success)
def _refresh_indexes(self): search_url = "%s/search" % self.endpoint parameters = {'kind': 'index'} search_results = requests.get(url=search_url, headers={}, params=parameters).json() LogService.info("Found %i indexes" % len(search_results)) for result in search_results: slug = result.get('slug') refresh_endpoint = "%s/%s/refresh" % (self.endpoint, slug) LogService.info("Refreshing index %s" % slug) requests.post(refresh_endpoint) LogService.info("Refreshed all indexes!")
def _build_search_query(self): """ "filter" = "must" without scoring. Better for caching. This function is built for Bool() queries only. """ # These lists contain the AND'd queries for each url_parameter. # They are AND because we query like "irish-whiskey AND stirred" musts = [] must_nots = [] for url_parameter in self.supported_parameters: # Each parameter is something like "components" or "construction" and # are keys defined in the barbados.search.whatever.WhateverSearch classes. # Should vs Must # https://stackoverflow.com/questions/28768277/elasticsearch-difference-between-must-and-should-bool-query # tldr: Should == OR, Must == AND # For the purposes of multiple values per url_parameter, we have to use # AND (ex: components=irish-whiskey,vermouth should yield irish-whiskey AND vermouth # not irish-whiskey OR vermouth). url_parameter_conditions = [] # Get the value for the url_parameter as passed in from the URL. # Example: "components=irish-whiskey,vermouth" would mean a raw_value # of ['irish-whiskey', 'vermouth']. Native data types apply as defined # in the barbados.search.whatever.WhateverSearch class. raw_value = getattr(self, url_parameter, None) if raw_value is None: continue # A value parser is a function that is used to munge the raw_value before # further processing. Since we abstracted the shit out of the search stuff # this is how we can transform things from the URL into ElasticSearch-speak # in a bespoke way. value_parser = self.query_parameters.get(url_parameter).get( 'value_parser') if value_parser: raw_value = value_parser(raw_value) # Ensure that the value we got matches the expected data type. expected_value_type = self.query_parameters.get(url_parameter).get( 'url_parameter_type') self._validate_query_parameter(parameter=url_parameter, value=raw_value, type_=expected_value_type) # These are the Elasticsearch document fields to search for # the particular value(s) we were given. These are defined in the # barbados.search.whatever.WhateverSearch class and are generally # a list of fields in Elasticsearch syntax. fields = self.query_parameters.get(url_parameter).get('fields') # When there are multiple values given in a url_parameter, we interpret # this to mean each value should be present in expected fields. # For example if we say "components=irish-whiskey,vermouth" it is # expected that both "irish-whiskey" and "vermouth" are in the fields. if expected_value_type is list: for value in raw_value: # There's a lot going on here... # Since we want the OR condition between fields (spec.components.slug || spec.components.parents) # we are using Should. If we specified multiple values, we want the AND condition # (rum && sherry). This builds a sub-query of Bool() for the former || situation # and adds it to the list of all conditions for this query for aggregation with # other url_parameters. field_conditions = Bool(should=self.get_query_conditions( url_parameter=url_parameter, fields=fields, value=value)) url_parameter_conditions.append(field_conditions) # Single-valued url_parameters are much easier to look for. elif expected_value_type is str: # This loops through every ElasticSearch document field that we were told to # search in and add that as a condition to this url_parameter's conditions. url_parameter_conditions += self.get_query_conditions( url_parameter=url_parameter, fields=fields, value=raw_value) # Complex queries like implicit ranges take a direct dictionary of values to pass # to the underlying ElasticSearch query. elif expected_value_type is dict or expected_value_type is bool: # This loops through every ElasticSearch document field that we were told to # search in and add that as a condition to this url_parameter's conditions. url_parameter_conditions += self.get_query_conditions( url_parameter=url_parameter, fields=fields, value=raw_value) else: raise SearchException( "Unsupported url_parameter data type: %s" % expected_value_type) # The occurrence is used to determine which method to use for # searching the index for this particular condition. There are # times when we want Should (OR) like matching slugs and display_names, # others that we want Must (AND) like matching `rum && sherry`. occurrence = self.query_parameters.get(url_parameter).get( 'occurrence') # Boolean-based queries (not to be confused with ElasticSearch Bool queries!) # need to set their occurrence based on the value of the boolean. if expected_value_type is bool: occurrence = MustOccurrence if raw_value else MustNotOccurrence # Now construct the Bool() query for this url_parameter. url_parameter_query = Bool( **{occurrence.occur: url_parameter_conditions}) # Some parameters are inverted, aka MUST NOT appear in the # search results. This can be useful for say allergies or if you # have a pathological hatred of anything pineapple. if self.query_parameters.get(url_parameter).get('invert'): must_nots.append(url_parameter_query) else: musts.append(url_parameter_query) # Build the overall query. query = Bool(must=musts, must_not=must_nots) LogService.info("Search Conditions are %s" % query) return query
def validate(self): LogService.info("Validating") with self.pgconn.get_session() as session: objects = session.query(self.model).all() for db_obj in objects: ObjectValidator.validate(db_obj, session=session, fatal=False)