def helper(ands, minus, day): es_client_predictions = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_predictions_index'], cfg['es_predictions_type']) es_client_tbr = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_tbr_index'], cfg['es_tbr_type']) # B4 and B3 - B2 - B1 = (p4 and p3).(t4 and t3) - (p4 and p3)and(p1 or p2) * avgTBR # avgTBR = [(p4 and p3 and p2)*(t4 and t3 and t2) + (p4 and p3 and p1)*(t4 and t3 and t1)] / [(p4 and p3 and p2) + (p4 and p3 and p1)] # if ands and minus has intersection then the result is 0 if len(set(ands) & set(minus)) > 0: return 0 _, p_and = get_prediction_count( ands, [], bookings_map, day, es_client_predictions) if p_and == 0: return 0 t_and = get_tbr_ratio(ands, bookings_map, es_client_tbr) if t_and == 0: return 0 p_ands_p_ors_tbr = 0 if len(minus) > 0: _, p_ands_p_ors = get_prediction_count( ands, minus, bookings_map, day, es_client_predictions) if p_ands_p_ors != 0: avg_tbr = calculate_avg_tbr( ands, minus, bookings_map, day, es_client_predictions, es_client_tbr) p_ands_p_ors_tbr = p_ands_p_ors * avg_tbr return int(p_and * t_and - p_ands_p_ors_tbr)
def get_model_stats_from_es(cfg, model_name, model_version): ''' [{'date': '2020-01-17', 'model': {'name': 's32', 'version': 1}, 'stats': {'g_g_m': [0.32095959595959594, 0.4668649491714752], 'g_g_f': [0.3654040404040404, 0.4815635452904544], 'g_g_x': [0.31363636363636366, 0.46398999646418304], 'a_1': [0.198989898989899, 0.3992572317838901], 'a_2': [0.2474747474747475, 0.4315630593164027], 'a_3': [0.295959595959596, 0.45649211860504146], 'a_4': [0.25757575757575757, 0.43731748751040456], 't_3G': [0.0, 1.0], 't_4G': [0.0, 1.0], 'si_1': [0.37424242424242427, 0.4839470491115894], 'si_2': [0.4042929292929293, 0.49077533664980666], 'si_3': [0.22146464646464648, 0.4152500106648333], 'price_cat_0': [0.0, 1.0], 'price_cat_1': [0.3333333333333333, 0.4714243623012701], 'price_cat_2': [0.3333333333333333, 0.47142436230126994], 'price_cat_3': [0.3333333333333333, 0.47142436230126994], 'holiday_stats': [0.044444444444444446, 0.20723493215097805]}}] ''' es = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_model_index'], cfg['es_model_type']) body = { "query": { "bool": { "must": [{ "match": { "": model_name } }, { "match": { "model.version": model_version } }] } } } doc = if doc == None or len(doc) != 1: raise Exception('model/version {}/{} not valid'.format( model_name, model_version)) return doc[0]
def test_es_predictions_search(self): es_client_predictions = ESClient(self.cfg['es_host'], self.cfg['es_port'], self.cfg['es_predictions_index'], self.cfg['es_predictions_type']) predictions ={"size": 100}) self.assertTrue(len(predictions) > 0) self.assertTrue(len(predictions) >= 40)
def test_4(cfg): es_client_booking = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_booking_index'], cfg['es_booking_type']) bookings ={}) # get at most 1000 results for now bookings = optimizer.util.adjust_booking_dates(bookings) bookings_map = optimizer.util.get_bookings_map(bookings) ands = ['b1', 'b2'] day = cfg['today'] r = optimizer.main.get_bb_count(cfg, bookings_map)(ands, [], day) print(r)
def test_2(cfg): es_client_tbr = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_tbr_index'], cfg['es_tbr_type']) es_client_booking = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_booking_index'], cfg['es_booking_type']) bookings ={}) # get at most 1000 results for now bookings_map = optimizer.util.get_bookings_map(bookings) ands = ['b1', 'b2'] query = optimizer.dao.query_builder.get_tbr_ratio(ands, bookings_map, es_client_tbr) print(query)
def test_3(cfg): es_client_predictions = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_predictions_index'], cfg['es_predictions_type']) es_client_booking = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_booking_index'], cfg['es_booking_type']) bookings ={}) # get at most 1000 results for now bookings_map = optimizer.util.get_bookings_map(bookings) ands = ['b1', 'b2'] day = cfg['today'] (query, result) = optimizer.dao.query_builder.get_prediction_count( ands, [], bookings_map, day, es_client_predictions) print(query) print(result)
def test_1(cfg): es_client_predictions = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_predictions_index'], cfg['es_predictions_type']) es_client_booking = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_booking_index'], cfg['es_booking_type']) bookings ={}) # get at most 1000 results for now bookings_map = optimizer.util.get_bookings_map(bookings) ands = ['b6'] ors = ['b7'] day = cfg['today'] day = optimizer.util.convert_date(day) query = optimizer.dao.query_builder.get_prediction_count( ands, ors, bookings_map, day, es_client_predictions) print(query)
def helper(ands, minus, amount, day, allocated): es_client_bb = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_bb_index'], cfg['es_bb_type']) optimizer.dao.query_builder.index_bb( day, ands, minus, allocated, es_client_bb) return 1
def get_model_stats(cfg, model_name, model_version): es = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_model_index'], cfg['es_model_type']) body = { "query": {"bool": {"must": [ {"match": { "": model_name }}, {"match": { "model.version": model_version }} ]}} } doc = if doc == None or len(doc) != 1: raise Exception( 'model/version {}/{} not valid'.format(model_name, model_version)) return doc[0]
def test_get_tbr_ratio(self): es_client_tbr = ESClient(self.cfg['es_host'], self.cfg['es_port'], self.cfg['es_tbr_index'], self.cfg['es_tbr_type']) ands = ['b6', 'b7'] get_tbr_ratio = optimizer.dao.query_builder.get_tbr_ratio( ands, self.bookings_map, es_client_tbr) print('get_tbr_ratio=' + str(get_tbr_ratio)) self.assertTrue(get_tbr_ratio == 1.0)
def test_9(cfg): es_client_bb = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_bb_index'], cfg['es_bb_type']) ands = ['b1', 'b2'] minus = ['b3'] day = cfg['today'] allocated = {'b1': 100, 'b2': 50} result = optimizer.dao.query_builder.index_bb(day, ands, minus, allocated, es_client_bb) print(result)
def test_5(cfg): es_client_booking = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_booking_index'], cfg['es_booking_type']) bookings ={}) # get at most 1000 results for now bookings = optimizer.util.adjust_booking_dates(bookings) bookings_map = optimizer.util.get_bookings_map(bookings) ands = ['b1', 'b2'] day = cfg['today'] df_day = day df_ands = ['b1'] df_allocated = {} # inventory of bb (df row) df_amount = 4000 booking = bookings[0] # total inventory of connected resources total_inventory = 10000 h, _ = optimizer.algo.hwm.update_allocation_for_booking( None, day, booking, total_inventory) r = h(df_day, df_ands, df_allocated, df_amount) print(r)
def helper(ands, minus, amount, day, allocated): es_client_predictions = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_predictions_index'], cfg['es_predictions_type']) _, result = get_ucdoc_prediction_count( ands, minus, bookings_map, day, es_client_predictions) # apply tbr on prediction values prediction_inventory = sum(result.values()) tbr_ratio = amount * 1.0 / prediction_inventory result.update((x, int(y * tbr_ratio)) for x, y in result.items()) # {'magazinelock,3,5G,g_x,2,pt,1004,icc': 788, 'minusonepage,1,5G,g_f,4,pt,1003,icc': 5017} resources = result # {'b2': 800, 'b3': 1000, 'b1': 500} demands = allocated # the sort of booking here is random allocation_map = hwm_generic_allocation( resources, resources.keys(), demands, demands.keys()) return allocation_map
def run(cfg): global hive_context sc = SparkContext() hive_context = HiveContext(sc) sc.setLogLevel('WARN') # ESClient requires host ip es_client_booking = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_booking_index'], cfg['es_booking_type']) bookings ={}) # get at most 1000 results for now bookings = optimizer.util.filter_valid_bookings(bookings) # adjust dates in bookings bookings = optimizer.util.adjust_booking_dates(bookings) bookings_map = optimizer.util.get_bookings_map(bookings) df = hive_context.createDataFrame(sc.emptyRDD(), optimizer.util.get_common_pyspark_schema()) today = cfg['today'] # YYYY-MM-DD days = optimizer.util.get_days_from_bookings(today, bookings) df = generate_resources(cfg, df, bookings_map, days, bookings, hive_context) # Row(day='2018-04-02', ands=['b1', 'b3', 'b2'], minus=[], allocated={}, amount=43562) print('defining resources') df.cache() print(df.take(1)) # run the allocation df = hwm_allocation(df, bookings, days) # Row(day='2018-04-02', ands=['b1', 'b3', 'b2'], minus=[], amount=43562, allocated={'b2': 800, 'b3': 1000, 'b1': 500}) print('bb-bookings allocation') df.cache() print(df.take(1)) # lock bookings lock_booking(es_client_booking, True) # remove bbs remove_booking_buckets(cfg, days) # save new booking-buckets into es df = save_booking_buckets_in_es(cfg, df) print('bbs saved') df.cache() print(df.take(1)) # unlock bookings lock_booking(es_client_booking, False) day = days[-1] tomorrow = optimizer.util.get_next_date(day) # use only tomorrow to create the allocation plan df = df.filter( == tomorrow) # this method add the bbs ucdocs allocation_map with their values df = add_ucdoc_bb_allocation_map(cfg, df, bookings_map) # [Row(day='2018-04-02', ands=['b1', 'b3', 'b2'], minus=[], amount=43562, allocated={'b2': 800, 'b3': 1000, 'b1': 500}, allocation_map={'minusonepage,3,5G,g_x,2,pt,1002,icc': {'b2': 1, 'b3': 2, 'b1': 1}, 'magazinelock,2,3G,g_x,3,pt,1005,icc': {'b2': 56, 'b3': 70, 'b1': 35}, 'magazinelock,2,4G,g_x,3,pt,1005,icc': {'b2': 56, 'b3': 70, 'b1': 35}, 'minusonepage,3,5G,g_x,2,pt,1003,icc': {'b2': 6, 'b3': 8, 'b1': 4}, 'minusonepage,1,4G,g_x,2,pt,1003,icc': {'b2': 16, 'b3': 20, 'b1': 10}, 'minusonepage,2,4G,g_f,4,pt,1002,icc': {'b2': 12, 'b3': 15, 'b1': 8}, 'cloudFolder,2,5G,g_x,3,pt,1005,icc': {'b2': 57, 'b3': 72, 'b1': 36}, 'minusonepage,2,3G,g_x,3,pt,1002,icc': {'b2': 3, 'b3': 4, 'b1': 2}, 'minusonepage,1,3G,g_x,1,pt,1005,icc': {'b2': 27, 'b3': 33, 'b1': 17}, 'minusonepage,1,3G,g_x,4,pt,1004,icc': {'b2': 72, 'b3': 90, 'b1': 45}, 'magazinelock,2,5G,g_x,4,pt,1004,icc': {'b2': 32, 'b3': 40, 'b1': 20}, 'cloudFolder,2,3G,g_f,3,pt,1002,icc': {'b2': 16, 'b3': 20, 'b1': 10}, 'cloudFolder,3,5G,g_f,2,pt,1004,icc': {'b2': 27, 'b3': 34, 'b1': 17}})] print('ucdocs-bookings allocation') df.cache() print(df.take(1)) # at this point we have a df which is a allocation of bookings to bbs df =, explode(df.allocation_map)) # Row(day='2018-04-02', key='magazinelock,3,5G,g_x,2,pt,1004,icc', value={'b2': 14, 'b3': 18, 'b1': 9}) print('exploded') df.cache() print(df.take(1)) # agg all the allocation maps for a ucdoc _map_type = MapType(StringType(), IntegerType()) _audf = udf(agg_allocation_maps, _map_type) df = df.groupBy('key').agg(_audf(collect_list('value')).alias('allmap')) # [Row(key='cloudFolder,3,5G,g_f,2,pt,1004,icc', allmap={'b2': 27, 'b3': 34, 'b1': 17})] print('final aggregation') df.cache() print(df.take(1)) # writing into hdfs filename = 'allmap-{}-{}'.format( optimizer.util.convert_date_remove_dash(day), str(int(time.time()))), format='json')
def remove_booking_buckets(cfg, days): es_client_bb = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_bb_index'], cfg['es_bb_type']) time_ms = round(time.time()*1000) delete_bbs(days, time_ms, es_client_bb)
from import ESClient from pyspark import SparkContext, SparkConf, Row from pyspark.sql.functions import concat_ws, count, lit, col, udf, expr, collect_list from pyspark.sql import HiveContext from pyspark.sql.types import IntegerType, StringType import math # read es es_host = '' es_port = '9200' es_index = 'predictions_02052020' es_type = 'doc' es = ESClient(es_host, es_port, es_index, es_type) hits ={"size": 1000}) es_records = {} for ucdoc in hits: uckey = ucdoc['uckey'] predictions = ucdoc['ucdoc']['predictions'] for day, hours in predictions.items(): hour = -1 for hour_doc in hours: hour += 1 es_records[(uckey, day, hour, '0')] = hour_doc['h0'] es_records[(uckey, day, hour, '1')] = hour_doc['h1'] es_records[(uckey, day, hour, '2')] = hour_doc['h2'] es_records[(uckey, day, hour, '3')] = hour_doc['h3'] # print(next(iter(es_records.items()))) # print('************')