def fetch_categs(puuids): """ Fetch all ByPrice Categories from given products Params: ----- puuids : list List of Product UUIDs Returns: ----- bp_categs : list List of categories >>> ['Salud', 'Bebés'] """ # Query _qry = """SELECT name FROM category WHERE id_category IN ( SELECT id_category FROM product_category WHERE product_uuid IN {} ) AND source = 'byprice' """.format(tuplify(puuids)) try: logger.debug(_qry) bp_categs = pd.read_sql(_qry, g._db.conn).to_dict(orient='records') logger.debug(bp_categs) except Exception as e: logger.error(e) return [] return [b['name'] for b in bp_categs]
def get(_val, by='gtin', _cols=['item_uuid'], limit=None): """ Static method to get Item info Params: ----- _val : str Value to query from by : str Column to query in _cols : list Columns to retrieve limit : int Elements to limit query Returns: ----- _items : list List of elements """ _cols = ','.join(_cols) if _cols else 'item_uuid' _query = "SELECT {} FROM item WHERE {} IN {}" \ .format(_cols, by, tuplify(_val)) if limit: _query += ' LIMIT {}'.format(limit) logger.debug(_query) try: _items = g._db.query(_query).fetch() logger.debug("Got {} items".format(len(_items))) except Exception as e: logger.error(e) raise errors.ApiError(70003, "Issues fetching elements in DB") return _items
def intel_query(**kwargs): """ Static method to query by defined column values Params: ----- kwargs : dict Arguments, must be iuuids Returns: ----- _resp : list List of product objects """ logger.debug('fetching: {}'.format(kwargs)) # get item info query = "SELECT item_uuid, gtin, name AS item_name FROM item WHERE item_uuid IN {}".format( tuplify(kwargs['iuuids'])) logger.debug(query) item_res = g._db.query(query).fetch() # get product ims info query = "SELECT item_uuid, name AS ims_name FROM product WHERE item_uuid IN {} AND source = 'ims'".format( tuplify(kwargs['iuuids'])) logger.debug(query) prod_res = g._db.query(query).fetch() #merge results item_df = pd.DataFrame(item_res) if len(prod_res) > 0: product_df = pd.DataFrame(prod_res) res_df = pd.merge(item_df, product_df, on="item_uuid", how="left") res_df.fillna('', inplace=True) #res_df = res_df.assign(final_name=[ if x >= 50 else 'no' for x in res_df['salary']]) res_df['name'] = res_df.apply( lambda row: row['item_name'] if row['ims_name'] == '' else row['ims_name'], axis=1) res_df.drop(['ims_name', 'item_name'], axis=1, inplace=True) else: res_df = item_df.rename(columns={"item_name": "name"}) _resp = res_df.to_dict(orient='records') return _resp
def fetch_attrs(puuids): """ Fetch all Attributes from given products Params: ----- puuids : list List of Product UUIDs Returns: ----- _respattrs : list List of attributes >>> [{ 'class_name' : str, 'attr_name' : str, 'attr_key' : str, 'value': str } ] """ # Query _qry = """SELECT pa.value, a.key as attr_key, a.name as attr, c.name_es as class, c.key as class_key, a.source FROM product_attr pa LEFT OUTER JOIN attr a ON (pa.id_attr = a.id_attr) LEFT OUTER JOIN clss c ON (a.id_clss = c.id_clss) WHERE pa.product_uuid IN {} ORDER BY class """.format(tuplify(puuids)) try: logger.debug(_qry) _respattrs = pd.read_sql(_qry, g._db.conn).to_dict(orient='records') logger.debug(_respattrs) except Exception as e: logger.error(e) return [] return _respattrs
def query(_by, **kwargs): """ Static method to query by defined column values Params: ----- _by : str Key from which query is performed kwargs : dict Extra arguments, such as (p, ipp, cols, etc..) Returns: ----- _resp : list List of product objects """ logger.debug('Querying by: {}'.format(_by)) logger.debug('fetching: {}'.format(kwargs)) # Format columns if kwargs['cols']: _cols = ','.join([x for x in \ (kwargs['cols'].split(',') \ + Item.__base_q) \ if x in Item.__attrs__]) else: _cols = ','.join([x for x in Item.__base_q]) # Format querying keys if kwargs['keys']: _keys = 'WHERE ' + _by + ' IN ' + str(tuplify(kwargs['keys'])) else: if _by != 'item_uuid': _keys = 'WHERE {} IS NULL'.format(_by) else: _keys = '' # Format paginators _p = int(kwargs['p']) if _p < 1: _p = 1 _ipp = int(kwargs['ipp']) # if _ipp > 10000: # _ipp = 10000 # Order by statement if 'orderby' in kwargs: _orderby = kwargs['orderby'] if kwargs['orderby'] else 'item_uuid' else: _orderby = 'item_uuid' if _orderby not in Item.__base_q: _orderby = 'item_uuid' # Build query _qry = """SELECT {} FROM item {} ORDER BY {} OFFSET {} LIMIT {} """\ .format(_cols, _keys, _orderby, (_p - 1)*_ipp, _ipp) logger.debug(_qry) # Query DB try: _resp = g._db.query(_qry).fetch() logger.info("Found {} items".format(len(_resp))) except Exception as e: logger.error(e) logger.warning("Issues fetching elements in DB!") if APP_MODE == "CONSUMER": return False if APP_MODE == "SERVICE": raise errors.ApiError(70003, "Issues fetching elements in DB") return _resp
def get_elastic_items(params): """ Static Method to verify correct connection with Catalogue Postgres DB """ items = params.get("items") type_ = params.get("type") if not items: logger.error("No items defined in params") return False if type_ == "item_uuid": try: qry_item_uuids = """ SELECT item_uuid, gtin, name as best_name, description, page_views FROM item WHERE item_uuid IN {} """.format(tuplify(items)) df = pd.read_sql(qry_item_uuids, g._db.conn) qry_product_uuids = """ SELECT p.product_uuid, p.item_uuid, p.name name2, p.source, c.name class_name, c.key class_key,a.key attr_key, a.name attr_name, pa.value FROM product p LEFT JOIN item_attr pa ON p.item_uuid = pa.item_uuid LEFT JOIN attr a ON a.id_attr = pa.id_attr LEFT JOIN clss c ON a.id_clss = c.id_clss where p.item_uuid IN {} """.format(tuplify(items)) df2 = pd.read_sql(qry_product_uuids, g._db.conn) qry_categories = """ SELECT p.item_uuid, c.name as name_category, c.source FROM product p LEFT JOIN product_category pc on pc.product_uuid = p.product_uuid INNER JOIN category c on c.id_category = pc.id_category and p.item_uuid IN {} """.format(tuplify(items)) df_categories = pd.read_sql(qry_categories, g._db.conn) except Exception as e: logger.error("Postgres Catalogue Connection error") logger.error(e) return False try: df['names'], df['retailers'], df['product_uuids'], df['attributes'], df['brands'], df['categories'], \ df['ingredients'], df['providers'], df['categories_raw'] = None, None, None, None, None, None, None, None, None for index, row in df.iterrows(): row['names'] = list(df2[df2.item_uuid == row.item_uuid] ["name2"].drop_duplicates()) row['retailers'] = list(df2[df2.item_uuid == row.item_uuid] ["source"].drop_duplicates()) row['product_uuids'] = list( df2[df2.item_uuid == row.item_uuid]["product_uuid"].drop_duplicates()) row['attributes'] = list( df2[df2.item_uuid.isin([row.item_uuid]) & (~df2.attr_key.isnull()) & (~df2.attr_name.isnull())][[ 'class_name', 'class_key', 'attr_key', 'attr_name', 'value' ]].T.to_dict().values()) row['brands'] = list( df2[df2.item_uuid.isin([row.item_uuid]) & (~df2.attr_key.isnull()) & (~df2.attr_name.isnull()) & df2.class_key.str.contains('brand')]. drop_duplicates('attr_key').attr_name) # All Categories Raw row['categories_raw'] = list(df_categories[ df_categories.item_uuid.isin([row.item_uuid]) & (~df_categories.source.isin( ["byprice", "byprice_farma"]))].name_category) # All Categories row['categories'] = list( set(df_categories[ df_categories.item_uuid.isin([row.item_uuid]) & (df_categories.source.isin(["byprice"]))]. name_category)) row['ingredients'] = list( df2[df2.item_uuid.isin([row.item_uuid]) & (~df2.attr_key.isnull()) & (~df2.attr_name.isnull()) & df2.class_key.str.contains('ingredient')]. drop_duplicates('attr_key').attr_name) row['providers'] = list( df2[df2.item_uuid.isin([row.item_uuid]) & (~df2.attr_key.isnull()) & (~df2.attr_name.isnull()) & df2.class_key.str.contains('provider')]. drop_duplicates('attr_key').attr_name) row['tags'] = list( df2[df2.item_uuid.isin([row.item_uuid]) & (~df2.attr_key.isnull()) & (~df2.attr_name.isnull()) & df2.class_key.str.contains('tag')]. drop_duplicates('attr_key').attr_name) df.loc[index] = row items = list(df.T.to_dict().values()) except Exception as e: logger.error("Error parsing the item") logger.error(e) elif type_ == "product_uuid": try: qry_product_uuids = """ SELECT p.product_uuid, p.gtin, p.description, p.item_uuid, p.name best_name, p.source, c.name class_name, c.key class_key,a.key attr_key, a.name attr_name, pa.value FROM product p LEFT JOIN product_attr pa ON p.product_uuid = pa.product_uuid LEFT JOIN attr a ON a.id_attr = pa.id_attr LEFT JOIN clss c ON a.id_clss = c.id_clss WHERE p.product_uuid IN {} """.format(tuplify(items)) df2 = pd.read_sql(qry_product_uuids, g._db.conn) qry_categories = """ SELECT p.product_uuid, c.name as name_category, c.source FROM product p LEFT JOIN product_category pc on pc.product_uuid = p.product_uuid INNER JOIN category c on c.id_category = pc.id_category and p.product_uuid IN {} """.format(tuplify(items)) df_categories = pd.read_sql(qry_categories, g._db.conn) except Exception as e: logger.error("Postgres Catalogue Connection error") logger.error(e) return False try: df = df2.drop_duplicates('product_uuid')[[ 'product_uuid', 'best_name', 'source', 'description', 'gtin' ]] df['names'], df['retailers'], df['product_uuids'], df['attributes'], df['brands'], df['categories'], \ df['ingredients'], df['providers'], df['categories_raw'] = None, None, None, None, None, None, None, None, None for index, row in df.iterrows(): row['names'] = [row.best_name] row['retailers'] = [row.source] row['product_uuids'] = [row.product_uuid] #row['attributes'] = list(df2[df2.product_uuid.isin([row.product_uuid]) & (~df2.attr_key.isnull()) & (~df2.attr_name.isnull())][ # ['class_name', 'class_key', 'attr_key', 'attr_name', # 'value']].T.to_dict().values()) row['attributes'] = [] row['brands'] = list( df2[df2.product_uuid.isin([row.product_uuid]) & (~df2.attr_key.isnull()) & (~df2.attr_name.isnull()) & df2.class_key.str.contains('brand')]. drop_duplicates('attr_key').attr_name) # Raw Categories row['categories_raw'] = list(df_categories[ df_categories.product_uuid.isin([row.product_uuid]) & (~df_categories.source.isin( ["byprice", "byprice_farma"]))].name_category) # Categories ByPrice row['categories'] = list( set(df_categories[ df_categories.product_uuid.isin([row.product_uuid]) & (df_categories.source.isin(["byprice"]))]. name_category)) row['ingredients'] = list( df2[df2.product_uuid.isin([row.product_uuid]) & (~df2.attr_key.isnull()) & (~df2.attr_name.isnull()) & df2.class_key.str.contains('ingredient')]. drop_duplicates('attr_key').attr_name) row['providers'] = list( df2[df2.product_uuid.isin([row.product_uuid]) & (~df2.attr_key.isnull()) & (~df2.attr_name.isnull()) & df2.class_key.str.contains('provider')]. drop_duplicates('attr_key').attr_name) row['tags'] = list( df2[df2.product_uuid.isin([row.product_uuid]) & (~df2.attr_key.isnull()) & (~df2.attr_name.isnull()) & df2.class_key.str.contains('tag')]. drop_duplicates('attr_key').attr_name) df.loc[index] = row items = list(df.T.to_dict().values()) except Exception as e: logger.error("Error parsing the item") logger.error(e) return False return items