Ejemplo n.º 1
0
    def helper(ands, minus, day):
        es_client_predictions = ESClient(cfg['es_host'], cfg['es_port'],
                                         cfg['es_predictions_index'], cfg['es_predictions_type'])
        es_client_tbr = ESClient(cfg['es_host'], cfg['es_port'],
                                 cfg['es_tbr_index'], cfg['es_tbr_type'])

        # B4 and B3 - B2 - B1 = (p4 and p3).(t4 and t3) - (p4 and p3)and(p1 or p2) * avgTBR
        # avgTBR = [(p4 and p3 and p2)*(t4 and t3 and t2) + (p4 and p3 and p1)*(t4 and t3 and t1)] / [(p4 and p3 and p2) + (p4 and p3 and p1)]

        # if ands and minus has intersection then the result is 0
        if len(set(ands) & set(minus)) > 0:
            return 0

        _, p_and = get_prediction_count(
            ands, [], bookings_map, day, es_client_predictions)
        if p_and == 0:
            return 0

        t_and = get_tbr_ratio(ands, bookings_map, es_client_tbr)
        if t_and == 0:
            return 0

        p_ands_p_ors_tbr = 0
        if len(minus) > 0:
            _, p_ands_p_ors = get_prediction_count(
                ands, minus, bookings_map, day,   es_client_predictions)
            if p_ands_p_ors != 0:
                avg_tbr = calculate_avg_tbr(
                    ands, minus, bookings_map, day, es_client_predictions, es_client_tbr)
                p_ands_p_ors_tbr = p_ands_p_ors * avg_tbr

        return int(p_and * t_and - p_ands_p_ors_tbr)
Ejemplo n.º 2
0
def get_model_stats_from_es(cfg, model_name, model_version):
    '''
    [{'date': '2020-01-17', 'model': {'name': 's32', 'version': 1},
    'stats': {'g_g_m': [0.32095959595959594, 0.4668649491714752], 'g_g_f': [0.3654040404040404, 0.4815635452904544], 'g_g_x': [0.31363636363636366, 0.46398999646418304], 'a_1': [0.198989898989899, 0.3992572317838901], 'a_2': [0.2474747474747475, 0.4315630593164027], 'a_3': [0.295959595959596, 0.45649211860504146], 'a_4': [0.25757575757575757, 0.43731748751040456], 't_3G': [0.0, 1.0], 't_4G': [0.0, 1.0], 'si_1': [0.37424242424242427, 0.4839470491115894], 'si_2': [0.4042929292929293, 0.49077533664980666], 'si_3': [0.22146464646464648, 0.4152500106648333], 'price_cat_0': [0.0, 1.0], 'price_cat_1': [0.3333333333333333, 0.4714243623012701], 'price_cat_2': [0.3333333333333333, 0.47142436230126994], 'price_cat_3': [0.3333333333333333, 0.47142436230126994], 'holiday_stats': [0.044444444444444446, 0.20723493215097805]}}]
    '''
    es = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_model_index'],
                  cfg['es_model_type'])
    body = {
        "query": {
            "bool": {
                "must": [{
                    "match": {
                        "model.name": model_name
                    }
                }, {
                    "match": {
                        "model.version": model_version
                    }
                }]
            }
        }
    }
    doc = es.search(body)
    if doc == None or len(doc) != 1:
        raise Exception('model/version {}/{} not valid'.format(
            model_name, model_version))
    return doc[0]
Ejemplo n.º 3
0
    def test_es_predictions_search(self):
        es_client_predictions = ESClient(self.cfg['es_host'],
                                         self.cfg['es_port'],
                                         self.cfg['es_predictions_index'],
                                         self.cfg['es_predictions_type'])
        predictions = es_client_predictions.search({"size": 100})

        self.assertTrue(len(predictions) > 0)
        self.assertTrue(len(predictions) >= 40)
Ejemplo n.º 4
0
def test_4(cfg):
    es_client_booking = ESClient(cfg['es_host'], cfg['es_port'],
                                 cfg['es_booking_index'],
                                 cfg['es_booking_type'])
    bookings = es_client_booking.search({})  # get at most 1000 results for now
    bookings = optimizer.util.adjust_booking_dates(bookings)
    bookings_map = optimizer.util.get_bookings_map(bookings)
    ands = ['b1', 'b2']
    day = cfg['today']
    r = optimizer.main.get_bb_count(cfg, bookings_map)(ands, [], day)
    print(r)
Ejemplo n.º 5
0
def test_2(cfg):
    es_client_tbr = ESClient(cfg['es_host'], cfg['es_port'],
                             cfg['es_tbr_index'], cfg['es_tbr_type'])
    es_client_booking = ESClient(cfg['es_host'], cfg['es_port'],
                                 cfg['es_booking_index'],
                                 cfg['es_booking_type'])
    bookings = es_client_booking.search({})  # get at most 1000 results for now
    bookings_map = optimizer.util.get_bookings_map(bookings)
    ands = ['b1', 'b2']
    query = optimizer.dao.query_builder.get_tbr_ratio(ands, bookings_map,
                                                      es_client_tbr)
    print(query)
Ejemplo n.º 6
0
def test_3(cfg):
    es_client_predictions = ESClient(cfg['es_host'], cfg['es_port'],
                                     cfg['es_predictions_index'],
                                     cfg['es_predictions_type'])
    es_client_booking = ESClient(cfg['es_host'], cfg['es_port'],
                                 cfg['es_booking_index'],
                                 cfg['es_booking_type'])
    bookings = es_client_booking.search({})  # get at most 1000 results for now
    bookings_map = optimizer.util.get_bookings_map(bookings)
    ands = ['b1', 'b2']
    day = cfg['today']
    (query, result) = optimizer.dao.query_builder.get_prediction_count(
        ands, [], bookings_map, day, es_client_predictions)
    print(query)
    print(result)
Ejemplo n.º 7
0
def test_1(cfg):
    es_client_predictions = ESClient(cfg['es_host'], cfg['es_port'],
                                     cfg['es_predictions_index'],
                                     cfg['es_predictions_type'])
    es_client_booking = ESClient(cfg['es_host'], cfg['es_port'],
                                 cfg['es_booking_index'],
                                 cfg['es_booking_type'])
    bookings = es_client_booking.search({})  # get at most 1000 results for now
    bookings_map = optimizer.util.get_bookings_map(bookings)
    ands = ['b6']
    ors = ['b7']
    day = cfg['today']
    day = optimizer.util.convert_date(day)
    query = optimizer.dao.query_builder.get_prediction_count(
        ands, ors, bookings_map, day, es_client_predictions)
    print(query)
Ejemplo n.º 8
0
    def helper(ands, minus, amount, day, allocated):
        es_client_bb = ESClient(cfg['es_host'], cfg['es_port'],
                                cfg['es_bb_index'], cfg['es_bb_type'])

        optimizer.dao.query_builder.index_bb(
            day, ands, minus, allocated, es_client_bb)
        return 1
Ejemplo n.º 9
0
def get_model_stats(cfg, model_name, model_version):
    es = ESClient(cfg['es_host'], cfg['es_port'],
                  cfg['es_model_index'], cfg['es_model_type'])
    body = {
        "query": {"bool": {"must": [
            {"match": {
                "model.name": model_name
            }},
            {"match": {
                "model.version": model_version
            }}
        ]}}
    }
    doc = es.search(body)
    if doc == None or len(doc) != 1:
        raise Exception(
            'model/version {}/{} not valid'.format(model_name, model_version))
    return doc[0]
Ejemplo n.º 10
0
    def test_get_tbr_ratio(self):
        es_client_tbr = ESClient(self.cfg['es_host'], self.cfg['es_port'],
                                 self.cfg['es_tbr_index'],
                                 self.cfg['es_tbr_type'])
        ands = ['b6', 'b7']
        get_tbr_ratio = optimizer.dao.query_builder.get_tbr_ratio(
            ands, self.bookings_map, es_client_tbr)

        print('get_tbr_ratio=' + str(get_tbr_ratio))
        self.assertTrue(get_tbr_ratio == 1.0)
Ejemplo n.º 11
0
def test_9(cfg):
    es_client_bb = ESClient(cfg['es_host'], cfg['es_port'], cfg['es_bb_index'],
                            cfg['es_bb_type'])
    ands = ['b1', 'b2']
    minus = ['b3']
    day = cfg['today']
    allocated = {'b1': 100, 'b2': 50}
    result = optimizer.dao.query_builder.index_bb(day, ands, minus, allocated,
                                                  es_client_bb)
    print(result)
Ejemplo n.º 12
0
def test_5(cfg):
    es_client_booking = ESClient(cfg['es_host'], cfg['es_port'],
                                 cfg['es_booking_index'],
                                 cfg['es_booking_type'])
    bookings = es_client_booking.search({})  # get at most 1000 results for now
    bookings = optimizer.util.adjust_booking_dates(bookings)
    bookings_map = optimizer.util.get_bookings_map(bookings)
    ands = ['b1', 'b2']
    day = cfg['today']

    df_day = day
    df_ands = ['b1']
    df_allocated = {}
    # inventory of bb (df row)
    df_amount = 4000
    booking = bookings[0]
    # total inventory of connected resources
    total_inventory = 10000
    h, _ = optimizer.algo.hwm.update_allocation_for_booking(
        None, day, booking, total_inventory)
    r = h(df_day, df_ands, df_allocated, df_amount)
    print(r)
Ejemplo n.º 13
0
    def helper(ands, minus, amount, day, allocated):
        es_client_predictions = ESClient(cfg['es_host'], cfg['es_port'],
                                         cfg['es_predictions_index'], cfg['es_predictions_type'])
        _, result = get_ucdoc_prediction_count(
            ands, minus, bookings_map, day, es_client_predictions)

        # apply tbr on prediction values
        prediction_inventory = sum(result.values())
        tbr_ratio = amount * 1.0 / prediction_inventory
        result.update((x, int(y * tbr_ratio)) for x, y in result.items())

        # {'magazinelock,3,5G,g_x,2,pt,1004,icc': 788, 'minusonepage,1,5G,g_f,4,pt,1003,icc': 5017}
        resources = result

        # {'b2': 800, 'b3': 1000, 'b1': 500}
        demands = allocated

        # the sort of booking here is random
        allocation_map = hwm_generic_allocation(
            resources, resources.keys(), demands, demands.keys())

        return allocation_map
Ejemplo n.º 14
0
def run(cfg):

    global hive_context

    sc = SparkContext()
    hive_context = HiveContext(sc)
    sc.setLogLevel('WARN')

    # ESClient requires host ip

    es_client_booking = ESClient(cfg['es_host'], cfg['es_port'],
                                 cfg['es_booking_index'], cfg['es_booking_type'])
    bookings = es_client_booking.search({})  # get at most 1000 results for now
    bookings = optimizer.util.filter_valid_bookings(bookings)
    # adjust dates in bookings
    bookings = optimizer.util.adjust_booking_dates(bookings)
    bookings_map = optimizer.util.get_bookings_map(bookings)

    df = hive_context.createDataFrame(sc.emptyRDD(), optimizer.util.get_common_pyspark_schema())
    today = cfg['today']  # YYYY-MM-DD
    days = optimizer.util.get_days_from_bookings(today, bookings)

    df = generate_resources(cfg, df, bookings_map, days, bookings, hive_context)
    # Row(day='2018-04-02', ands=['b1', 'b3', 'b2'], minus=[], allocated={}, amount=43562)
    print('defining resources')
    df.cache()
    print(df.take(1))

    # run the allocation
    df = hwm_allocation(df, bookings, days)

    # Row(day='2018-04-02', ands=['b1', 'b3', 'b2'], minus=[], amount=43562, allocated={'b2': 800, 'b3': 1000, 'b1': 500})
    print('bb-bookings allocation')
    df.cache()
    print(df.take(1))

    # lock bookings
    lock_booking(es_client_booking, True)

    # remove bbs
    remove_booking_buckets(cfg, days)

    # save new booking-buckets into es
    df = save_booking_buckets_in_es(cfg, df)
    print('bbs saved')
    df.cache()
    print(df.take(1))

    # unlock bookings
    lock_booking(es_client_booking, False)
    day = days[-1]
    tomorrow = optimizer.util.get_next_date(day)

    # use only tomorrow to create the allocation plan
    df = df.filter(df.day == tomorrow)

    # this method add the bbs ucdocs allocation_map with their values
    df = add_ucdoc_bb_allocation_map(cfg, df, bookings_map)

    # [Row(day='2018-04-02', ands=['b1', 'b3', 'b2'], minus=[], amount=43562, allocated={'b2': 800, 'b3': 1000, 'b1': 500}, allocation_map={'minusonepage,3,5G,g_x,2,pt,1002,icc': {'b2': 1, 'b3': 2, 'b1': 1}, 'magazinelock,2,3G,g_x,3,pt,1005,icc': {'b2': 56, 'b3': 70, 'b1': 35}, 'magazinelock,2,4G,g_x,3,pt,1005,icc': {'b2': 56, 'b3': 70, 'b1': 35}, 'minusonepage,3,5G,g_x,2,pt,1003,icc': {'b2': 6, 'b3': 8, 'b1': 4}, 'minusonepage,1,4G,g_x,2,pt,1003,icc': {'b2': 16, 'b3': 20, 'b1': 10}, 'minusonepage,2,4G,g_f,4,pt,1002,icc': {'b2': 12, 'b3': 15, 'b1': 8}, 'cloudFolder,2,5G,g_x,3,pt,1005,icc': {'b2': 57, 'b3': 72, 'b1': 36}, 'minusonepage,2,3G,g_x,3,pt,1002,icc': {'b2': 3, 'b3': 4, 'b1': 2}, 'minusonepage,1,3G,g_x,1,pt,1005,icc': {'b2': 27, 'b3': 33, 'b1': 17}, 'minusonepage,1,3G,g_x,4,pt,1004,icc': {'b2': 72, 'b3': 90, 'b1': 45}, 'magazinelock,2,5G,g_x,4,pt,1004,icc': {'b2': 32, 'b3': 40, 'b1': 20}, 'cloudFolder,2,3G,g_f,3,pt,1002,icc': {'b2': 16, 'b3': 20, 'b1': 10}, 'cloudFolder,3,5G,g_f,2,pt,1004,icc': {'b2': 27, 'b3': 34, 'b1': 17}})]
    print('ucdocs-bookings allocation')
    df.cache()
    print(df.take(1))

    # at this point we have a df which is a allocation of bookings to bbs
    df = df.select(df.day, explode(df.allocation_map))

    # Row(day='2018-04-02', key='magazinelock,3,5G,g_x,2,pt,1004,icc', value={'b2': 14, 'b3': 18, 'b1': 9})
    print('exploded')
    df.cache()
    print(df.take(1))

    # agg all the allocation maps for a ucdoc
    _map_type = MapType(StringType(), IntegerType())
    _audf = udf(agg_allocation_maps, _map_type)
    df = df.groupBy('key').agg(_audf(collect_list('value')).alias('allmap'))

    # [Row(key='cloudFolder,3,5G,g_f,2,pt,1004,icc', allmap={'b2': 27, 'b3': 34, 'b1': 17})]
    print('final aggregation')
    df.cache()
    print(df.take(1))

    # writing into hdfs
    filename = 'allmap-{}-{}'.format(
        optimizer.util.convert_date_remove_dash(day), str(int(time.time())))
    df.write.save(filename, format='json')
Ejemplo n.º 15
0
def remove_booking_buckets(cfg, days):
    es_client_bb = ESClient(cfg['es_host'], cfg['es_port'],
                            cfg['es_bb_index'], cfg['es_bb_type'])
    time_ms = round(time.time()*1000)
    delete_bbs(days, time_ms, es_client_bb)
Ejemplo n.º 16
0
from imscommon.es.ims_esclient import ESClient
from pyspark import SparkContext, SparkConf, Row
from pyspark.sql.functions import concat_ws, count, lit, col, udf, expr, collect_list
from pyspark.sql import HiveContext
from pyspark.sql.types import IntegerType, StringType
import math

# read es
es_host = '10.193.217.111'
es_port = '9200'
es_index = 'predictions_02052020'
es_type = 'doc'
es = ESClient(es_host, es_port, es_index, es_type)
hits = es.search({"size": 1000})

es_records = {}
for ucdoc in hits:
    uckey = ucdoc['uckey']
    predictions = ucdoc['ucdoc']['predictions']
    for day, hours in predictions.items():
        hour = -1
        for hour_doc in hours:
            hour += 1
            es_records[(uckey, day, hour, '0')] = hour_doc['h0']
            es_records[(uckey, day, hour, '1')] = hour_doc['h1']
            es_records[(uckey, day, hour, '2')] = hour_doc['h2']
            es_records[(uckey, day, hour, '3')] = hour_doc['h3']

# print(next(iter(es_records.items())))
# print('************')