def get_ticket_bystore(): """ Get prices from an especific ticket from N number of items by day and closest stores """ logger.info("Fetching Ticket values") item_uuids, product_uuids = [], [] params = request.get_json() if 'uuids' in params: item_uuids = params['uuids'] logger.debug("Item UUIDs: {}".format(item_uuids)) elif 'puuids' in params: product_uuids = params['puuids'] logger.debug("Item UUIDs: {}".format(product_uuids)) else: raise errors.AppError(80002, "Request UUIDs parameters missing") # Retrieve optional params lat = float(params['lat']) if 'lat' in params.keys() else 19.431380 lng = float(params['lng']) if 'lng' in params.keys() else -99.133486 radius = float(params['r']) if 'r' in params.keys() else 10.0 # Call function to obtain ticket from all medicines ticket = Product.generate_ticket(item_uuids, product_uuids, lat, lng, radius) if not ticket: raise errors.AppError(80005, "Could not generate results for those Items") return jsonify(ticket)
def get_today_prices_by_file(): """ Get prices CSV by specific store and past 48hrs """ logger.debug('Getting prices in CSV ...') params = request.args.to_dict() _needed = set({'ret', 'sid', 'stn'}) if not _needed.issubset(params.keys()): raise errors.AppError( 80002, "Name, Retailer or Store UUID parameters missing") prod = Product\ .get_st_catalog_file(params['sid'], params['stn'], params['ret']) if not prod: logger.warning("No prices in Selected Store") raise errors.AppError(80009, "No prices in selected Store") logger.info("Serving CSV file ...") return Response( prod, mimetype="text/csv", headers={ "Content-disposition": "attachment; filename={}_{}.csv"\ .format(params['ret'].upper(), params['stn'])} )
def validate_params(params): """ Params validation method Params: ----- params : dict params to validate Returns: ----- dict Validated params """ if not params: raise errors.AppError(40002, "Params Missing!", 400) if 'uuids' not in params: raise errors.AppError(40003, "Uuids param Missing!", 400) elif not isinstance(params['uuids'], list): raise errors.AppError(40003, "Uuids param must be a list!", 400) if 'retailers' not in params: raise errors.AppError(40003, "Retailers param Missing!", 400) elif not isinstance(params['retailers'], list): raise errors.AppError(40003, "Retailers param must be a list!", 400) if 'today' not in params: params['today'] = datetime.datetime.utcnow() else: params['today'] = params['today'] return params
def generate_promos_by_day_stream(): """ Return the promos applying that day, limited by the parameters Params: { 'day' : '2017-09-20' 'num_promos' : 20 } """ logger.info("Promos daily stream endpoint!") if not request.args: raise errors.AppError(80002, "No params in request") if 'day' not in request.args: raise errors.AppError(80004, "Day params missing") else: try: datetime.datetime.strptime(request.args['day'], '%Y-%m-%d') except: raise errors.AppError( 80012, "Incorrect data format, should be YYYY-MM-DD") if 'num_promos' not in request.args or int(request.args['num_promos']) < 1: num_promos = 0 else: num_promos = int(request.args['num_promos']) def generate(): return Promos.get_cassandra_promos_by_day(request.args['day'], num_promos) return Response(stream_with_context(generate()), content_type='application/json')
def check_prices_geolocated(): """ Get price date for alerts, from the given stores, items and stores. Returns only the items that are off the variation in the items and stores specified. @Payload: - stores <list (uuids, retailer)> - items <list <tuple (uuid, price)>> - retailers <list (str)> - date <str (%Y-%m-%d)> : date to get the prices from - variation - variation_type """ logger.info('Alert geolocated endpoint...') params = request.get_json() if 'retailers' not in params: raise errors.AppError("invalid_request", "Retailers parameters missing") if 'items' not in params: raise errors.AppError("invalid_request", "items parameters missing") if 'date' not in params: params['date'] = datetime.datetime.utcnow().strftime('%Y-%m-%d') logger.debug('Params correct...') try: prices = Alert.get_geolocated(params) except: raise errors.AppError('server_serror', "Alerts geolocation failed") return jsonify(prices)
def dump_items(): """ Endpoint to query items and stores by daterange and filters: Price by Item IDs and Stores retrieval @Params: - (dict) : Form data with following structure { "filters": [ {"item": "452iub4-54o3iu6b3o4b-46i54362"}, {"item": "452iub4-54o3iu6b3o4b-46i54362"}, {"store": "452iub4-54o3iu6b3o4b-46i54362"}, {"store": "452iub4-54o3iu6b3o4b-46i54362"} ], "retailers": { "chedraui": "Chedraui", "walmart": "Walmart", }, "date_start": "2017-10-01", "date_end": "2018-01-12", "interval": "day" // "month", "week" or "day" } Returns: - <dict> """ params = request.get_json() # Params validation if 'filters' not in params: raise errors.AppError(40003, "Filters param Missing!", 400) if 'retailers' not in params: raise errors.AppError(40003, "Retailers param Missing!", 400) if 'date_start' not in params: raise errors.AppError(40003, "Start Date param Missing!", 400) if 'date_end' not in params: raise errors.AppError(40003, "End Date param Missing!", 400) if 'interval' not in params: # In case interval is not explicit, set to day params['interval'] = 'day' # Fetch Prices prices = Dump.get_compare_by_store(params['filters'], params['retailers'], params['date_start'], params['date_end'], params['interval']) def generate(): yield '[' for i, row in enumerate(prices): if i + 1 < len(prices): yield json.dumps(row) + "," else: yield json.dumps(row) yield ']' logger.info("Serving dump items!") return Response(stream_with_context(generate()), content_type='application/json')
def file_response(data, m_type, extension): """ Method to construct CSV/Excel Exports Params: ----- - data : (dict) JSON data - m_type : (str) Task Method type - extension : (str) Extension file to build Returns: ----- (flask.Response) Formated Response as MIME type """ logger.info('Generating {} response..'.format(extension)) df = None # Map/Table Case if m_type == 'prices_map': try: df = load_maps_df(data) except Exception as e: logger.warning('Could not fetch {} result!'\ .format(extension.upper())) logger.error(e) raise errors.AppError(40005, 'Issues fetching {} results'\ .format(extension.upper())) # Any other case not available else: logger.warning ('Task method not available!') raise errors.AppError(40006, 'Task Method not available ') if extension.lower() == 'excel': # Excel Wrapper # Building IO Wrapper _buffer = BytesIO() df.to_excel(_buffer) _mime = "application/vnd.ms-excel" _ext = 'xlsx' else: # CSV Wrapper # Building IO Wrapper _buffer = StringIO() io_wrapper = df.to_csv(_buffer) _mime = 'text/csv' _ext = 'csv' # Creating Filename fname = '{}.{}'.format(m_type, _ext) _buffer.seek(0) # Returning Response logger.info('Serving {} response...'.format(extension.upper())) return Response(_buffer, mimetype=_mime, headers={"Content-disposition": "attachment; \ filename={}".format(fname)})
def get_history_prices_bystore(): """ Get prices from an specific item for the past period of time, and closest stores. """ item_uuid, product_uuid = None, None # Validate UUIDs if 'uuid' in request.args: item_uuid = request.args.get('uuid') logger.debug("Item UUID: " + str(item_uuid)) elif 'puuid' in request.args: product_uuid = request.args.get('puuid') logger.debug("Product UUID: " + str(product_uuid)) else: raise errors.AppError(80002, "Request UUID parameters missing") # Get default prior amount of days period = int(request.args.get('days')) if 'days' in request.args else 7 # Call to fetch prices prod = Product.get_history_by_store(item_uuid, product_uuid, period) if not prod: '''logger.warning("No prices in queried {} {}!".format('item' if item_uuid else 'product',\ item_uuid if item_uuid else product_uuid))''' return jsonify({}) logger.info('Found {} metrics'.format(len(prod['history']))) return jsonify(prod)
def get_today_prices_bystore(): """ Get prices from an specific item by day, and closest stores """ logger.debug('Getting prices from uuid...') item_uuid, product_uuid = None, None # Validate UUIDs if 'uuid' in request.args: item_uuid = request.args.get('uuid') logger.debug("Item UUID: " + str(item_uuid)) elif 'puuid' in request.args: product_uuid = request.args.get('puuid') logger.debug("Product UUID: " + str(product_uuid)) else: raise errors.AppError(80002, "Request UUID parameters missing") # Get default Location in case of not sending the correct one lat = float( request.args.get('lat')) if 'lat' in request.args else 19.431380 lng = float( request.args.get('lng')) if 'lng' in request.args else -99.133486 radius = float(request.args.get('r')) if 'r' in request.args else 10.0 # Retrieve prices prod = Product.get_by_store(item_uuid, product_uuid, lat, lng, radius) if not prod: '''logger.warning("No prices in queried {} {}!".format('item' if item_uuid else 'product',\ item_uuid if item_uuid else product_uuid))''' return jsonify([]) logger.info('Found {} prices'.format(len(prod))) logger.debug("Response prices:") logger.debug(prod[:1] if len(prod) > 1 else []) return jsonify(prod)
def jsonfier(prod): """ TODO: Reuse code if useful, otherwiser to clean later """ logger.debug('Constructing JSON response...') if not prod: if isinstance(prod, list): raise errors.AppError(80011, 'No products available with those filters!', 200) raise errors.AppError(80009, "No prices available!") try: jp = jsonify(prod) except Exception as e: logger.error(e) raise errors.AppError(89999, 'Internal Error', 400) logger.debug('Sending JSON response...') return jp
def count_by_store_hours(): """ Get the prices of all items of certain store for the past X hours """ logger.info("Fetching Prices per Store in last X hours") params = request.args.to_dict() _needed = set({'r', 'sid', 'last_hours'}) if not _needed.issubset(params.keys()): raise errors.AppError( 80002, "Hours, Retailer or Store UUID parameters missing") logger.debug(params) count = Product\ .get_count_by_store_24hours(params['r'], params['sid'], params['last_hours']) if not count: raise errors.AppError(80005, "Issues fetching store results") return jsonify(count)
def market_file(prod): """ TODO: Reuse code if useful, otherwiser to clean later """ if not prod: raise errors.AppError(80009, "No prices available!") prod_csv = Stats.convert_csv_market(prod) return Response( prod_csv, mimetype="text/csv", headers={"Content-disposition": "attachment; filename=mercados.csv"})
def get_one(): """ Testing connection method """ logger.debug("Testing connection with one product") prod = Product.get_one() if not prod: raise errors.AppError("invalid_request", "Could not fetch data from Cassandra") return jsonify(prod)
def get_all_by_store(): """ Get the prices of all items of certain store Params: * r - # Retailer Key * sid - # Store UUID TODO: Make it Work """ logger.info("Fetching Prices per Store") # Params validation params = request.args.to_dict() if 'sid' not in params or 'r' not in params: raise errors.AppError(80002, "Retailer or Store UUID parameters missing") logger.debug(params) catalogue = Product\ .get_store_catalogue(params['r'], params['sid']) if not catalogue: raise errors.AppError(80005, "Issues fetching store results") return jsonify(catalogue)
def check_prices_today(): """ Verify if an especific request of N number of items from today and the prior day with retailer exclusions Params: { 'uuids' : ['2h354iu23h5423i5uh23i5', '30748123412057g1085h5oh3'], 'retailers' : ['walmart','chedraui'], 'today' : '2017-09-20' } """ logger.info('Geo Alert prices endpoint.') params = request.get_json() if 'uuids' not in params: raise errors.AppError("invalid_request", "UUIDs parameters missing") if 'retailers' not in params: raise errors.AppError("invalid_request", "Retailers parameters missing") if 'today' not in params: params['today'] = datetime.date.today().__str__() logger.debug('Params correct...') prices = Alert.prices_vs_prior(params) return jsonify(prices)
def get_stats_by_item(): """ Today's max, min & avg price from an specific item_uuid or product_uuid """ logger.info("Fetching product stats by item") # Validate UUIDs item_uuid, product_uuid = None, None if 'item_uuid' in request.args: item_uuid = request.args.get('item_uuid') logger.debug("Item UUID: " + str(item_uuid)) elif 'prod_uuid' in request.args: product_uuid = request.args.get('prod_uuid') logger.debug("Product UUID: " + str(product_uuid)) else: raise errors.AppError(80002, "Request UUID parameters missing") # Call function to fetch prices prod = Product.get_stats(item_uuid, product_uuid) return jsonify(prod)
def valid_stores(retailer): """ Verify with are valid stores from a given retailer Params: ----- retailer: str Retailer key Returrns: list List of valid stores """ # Get the list of active stores from geolocation stores = g._geolocation.get_stores([retailer]) logger.debug("Got {} total stores".format(len(stores))) # Time _now = datetime.datetime.utcnow() then = _now - datetime.timedelta(days=3) if not stores: logger.warning("No stores for given retailer : %s" % retailer) raise errors.AppError( "price_geo_error", "Could not get stores from geolocation service") # For every store, get at least one record for the day valid_stores = [] _dates = tupleize_date(_now.date(), 3) for store in stores: # Get one store try: rows = g._db.query(""" SELECT date FROM price_by_store WHERE store_uuid = {} AND date IN {} LIMIT 1 """.format(store['uuid'], _dates)) if rows: valid_stores.append(store) except Exception as e: logger.error(e) return valid_stores
def get_recent_from_s3(fname): """ Get most recent existance of the file name given in S3 for hte given environment Params: ----- fname: str Filename to check """ s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) _key = ENV.lower() + "/" + fname # Fetch file try: remote_r = s3.get_object(Bucket=BUCKET, Key=_key) fbuff = remote_r['Body'] df = pd.read_csv(fbuff).drop('Unnamed: 0', axis=1, errors='ignore') except Exception: logger.warning("Not found file! " + BUCKET + ', ' + _key) raise errors.AppError("no_file_found", "Not found dump file in S3") return df
def dump_catalogue(): """ Get the entire catalogue of a retailer and download it """ logger.info("Start retrieving catalogue") retailer = request.args.get("retailer", None) retailer_name = request.args.get("retailer_name", retailer) store_uuid = request.args.get("store_uuid", None) store_name = request.args.get("store_name", "Default") fmt = request.args.get("fmt", "csv") try: hours = int(request.args.get("hours", 32)) except: hours = 32 if 'extras' in request.args: extras = request.args.get("extras").split(',') else: extras = [] # If not retailer or store, raise app error if not retailer and not store_uuid: raise errors.AppError("dump_error", "Missing parameters in request") items = [] items_ret = [] data_sources = request.args.get("data_source", None).split(',') for data_source in data_sources: # Get all the items logger.info("Getting total items from {}".format(data_source)) items = items + g._catalogue.get_by_source( data_source=data_source, cols=extras + ['item_uuid'], qsize=2000) logger.info("Got {} total items".format(len(items))) if len(items_ret) == 0: items_ret = items_ret + g._catalogue.get_by_source( data_source=retailer, cols=extras + ['item_uuid', 'gtin'], qsize=2000) # Fetch UUIDS only with existing Item UUID _uuids = set(i['item_uuid'] for i in items if i['item_uuid']) _uuids_ret = {i['product_uuid']: i for i in items_ret} # Get all the prices of the retailer #logger.info("Got {} total items".format(len(items))) logger.info("Getting prices from C* form the last {} hours".format(hours)) catalogue = Price.get_by_store(store_uuid, hours) # Only the items that are permitted valid = [] logger.info("Got {} prices".format(len(catalogue))) logger.info("Looping through catalogue") for c in catalogue: try: tmp = _uuids_ret[c['product_uuid']] # Filter to not return products from outside the data source if tmp['item_uuid'] not in _uuids: continue # Format ord_d = OrderedDict([("gtin", tmp['gtin']), ("item_uuid", tmp.get('item_uuid', None)), ("name", tmp['name']), ("price", c['price']), ("price_original", c['price_original']), ("discount", (c['price'] - c['price_original'])), ("promo", c['promo']), ("retailer", retailer_name), ("store", store_name)]) for ex in extras: ord_d.update([(ex, tmp[ex])]) valid.append(ord_d) except Exception as e: logger.error(e) # Build dataframe df = pd.DataFrame(valid) logger.info("Serving catalogue - {} prods".format(len(df))) if fmt == 'json': # Transform to dict table_head = list(df.columns) table_body = [list(v) for v in list(df.values)] logger.info("Serving JSON") return jsonify({"columns": table_head, "records": table_body}) # If direct download, drop item_uuid df.drop('item_uuid', axis=1, inplace=True) return download_dataframe( df, fmt=fmt, name="catalogue_{}_{}".format( retailer, datetime.datetime.utcnow().strftime("%Y-%m-%d")))
def dump_download(): """ Download dump Request Params: ----- data_source : Type of Download given the source catalogue format : Downloadable format (csv | excel) retailers : Comma Separated Retailers """ logger.info("Starting to download dumps.. ") # Data source data_sources = request.args.get("data_source", "ims").split(',') # Define Dump format fmt = request.args.get("format", "csv") rets = request.args.get("retailers", None) # Count the times downloaded '''with open(DATA_DIR+'/downloads.json','r') as file: count_file = json.load(file) count_file['count'] += 1 with open(DATA_DIR+'/downloads.json','w') as file: file.write(json.dumps(count_file))''' # Get all retailers from geo logger.info("Requesting all retailers") total_rets = g._geolocation.get_retailers() retailer_names = {r['key']: r['name'] for r in total_rets} # Get the requested retailers if rets: retailers = rets.split(",") else: retailers = retailer_names.keys() df = pd.DataFrame() for data_source in data_sources: # Adjust dataframe fname = data_source + "_stats_aggregate.csv" logger.info("Reading csv file from S3") _df = Dump.get_recent_from_s3(fname) if _df.empty: raise errors.AppError("no_file", "No available dump file found!") cols = ['gtin', 'name', 'item_uuid'] for ret in retailers: cols.append(ret + "_max") cols.append(ret + "_min") cols.append(ret + "_avg") df = df.append(_df[cols].copy()) # Rename the columns logger.info("Renaming columns") for key in retailers: r_name = retailer_names[key] logger.info("Renaming {} -> {}".format(key + "_max", r_name + " (max)")) df.rename(columns={ key + "_max": r_name + " (max)", key + "_min": r_name + " (min)", key + "_avg": r_name + " (avg)", }, inplace=True) # Drop rows without prices df.set_index('gtin', inplace=True) result_df = df.dropna(thresh=3).replace(np.nan, '-') logger.info("Building output") if fmt == 'json': # Transform to dict result_df.reset_index(inplace=True) table_head = list(result_df.columns) table_body = [list(v) for v in list(result_df.values)] logger.info("Serving JSON") return jsonify({"columns": table_head, "records": table_body}) # If direct download reomve Item uuid result_df.drop('item_uuid', axis=1, inplace=True) result_df.drop_duplicates(inplace=True) return download_dataframe(result_df, fmt=fmt, name="prices_retailers_" + datetime.datetime.utcnow().strftime("%Y-%m-%d"))
def prices_vs_prior(params): """ Compute prices for alarms. Params: ----- params : dict Request params: uuids, retailers and date. Returns: ----- prices : dict Dict containing list of current and prior prices """ logger.debug('Fetching Alarm prices...') # Format params params['filters'] = [{'item_uuid': i} for i in params['uuids']] params['filters'] += [{'retailer': i} for i in params['retailers']] rets = Stats.fetch_rets(params['filters']) if not rets: raise errors.AppError(80011, "No retailers found.") # Items from service filt_items = Stats\ .fetch_from_catalogue(params['filters'], rets) if not filt_items: logger.warning("No Products found!") return {'today': [], 'prevday': []} # Date if isinstance(params['today'], datetime.datetime): _now = params['today'] else: _now = datetime.datetime\ .strptime(str(params['today']), '%Y-%m-%d') try: # Today prices today_df = Alarm.get_cassandra_prices(filt_items, _now, 2) # Yesterday prices yday_df = Alarm.get_cassandra_prices( filt_items, _now - datetime.timedelta(days=1), 2) logger.debug('Prices fetched from Cassandra...') except Exception as e: logger.error(e) raise errors.AppError(80005, "Could not retrieve data from DB.") if today_df.empty: logger.warning("No Products found today!") return {'today': [], 'prevday': []} # Products DF info_df = pd.DataFrame( filt_items, columns=['item_uuid', 'product_uuid', 'name', 'gtin', 'source']) # Add item_uuid to retrieve elements today_df = pd.merge(today_df, info_df, on='product_uuid', how='left') yday_df = pd.merge(yday_df, info_df, on='product_uuid', how='left') ### TODO # Add rows with unmatched products! non_matched = today_df[today_df['item_uuid'].isnull() | (today_df['item_uuid'] == '')].copy() ### END TODO # Format only products with matched results today_df = today_df[~(today_df['item_uuid'].isnull()) & (today_df['item_uuid'] != '')] yday_df = yday_df[~(yday_df['item_uuid'].isnull()) & (yday_df['item_uuid'] != '')] # Filter elements with not valid retailers today_df = today_df[today_df['source'].isin(rets)] if not yday_df.empty: yday_df = yday_df[yday_df['source'].isin(rets)] # Convert datetime to date today_df['date'] = today_df['time'].apply(lambda x: x.date().__str__()) if not yday_df.empty: yday_df['date'] = yday_df['time'].apply( lambda x: x.date().__str__()) # Order and Drop duplicates today_df.sort_values(by=['item_uuid', 'source', 'price'], inplace=True) if not yday_df.empty: yday_df.sort_values(by=['item_uuid', 'source', 'price'], inplace=True) today_df.drop_duplicates(subset=['item_uuid', 'source'], keep='first', inplace=True) if not yday_df.empty: yday_df.drop_duplicates(subset=['item_uuid', 'source'], keep='first', inplace=True) # Drop Store UUID and time column today_df.drop(['store_uuid', 'product_uuid', 'name', 'time', 'gtin'], inplace=True, axis=1) if not yday_df.empty: yday_df.drop( ['store_uuid', 'product_uuid', 'name', 'time', 'gtin'], inplace=True, axis=1) logger.debug('Dataframes filtered!') resp = { 'today': today_df\ .rename(columns={'source':'retailer'})\ .to_dict(orient='records'), 'prevday': yday_df\ .rename(columns={'source':'retailer'})\ .to_dict(orient='records') } logger.info("Serving Alarm response..") # Convert in dict return resp