Example #1
0
def process(entry, result):
    """
    Given a JSON object formatted by Extractor.py, parse variables "t", "cc", and "rg", and the results to the list of possible results.
    :param entry: the JSON object that represents one impression
    :param result: the list of possible results
    :return: None
    """

    # Event - time
    t = entry["t"] / 1000   # Divide t by 1000 because the unit of measurement is 1 millisecond for t

    # Get the UTC time from the timestamp, and parse minute of the hour, hour of the day, and day of the week
    utc_t = datetime.utcfromtimestamp(t)
    min = utc_t.minute
    sd.binarize(result, min, 60)
    hour = utc_t.hour
    sd.binarize(result, hour, 24)
    day = utc_t.weekday()
    sd.binarize(result, day, 7)

    # Determine if it is weekend
    if day == 5 or day == 6:
        result.append(1)
    else:
        result.append(0)

    # Determine if it is Friday or Saturday
    if day == 4 or day == 5:
        result.append(1)
    else:
        result.append(0)

    try:
        # Try to local time using UTC time and country and region

        # Determine time zone using country and region
        country = entry["cc"]
        if country in ["US", "CA", "AU"]:
            tz = pytz.timezone(region_timezone_[entry["rg"]])
        else:
            tz = pytz.timezone(pytz.country_timezones(country)[0])

        # Get hour of the day and day of the week in local time
        local_t = tz.normalize(utc_t.astimezone(tz))
        local_hour = local_t.hour
        sd.binarize(result, local_hour, 24)
        local_day = local_t.weekday()
        sd.binarize(result, local_day, 7)
    except:
        # If local time cannot be extracted, set all variables in this section to be 0
        result.extend([0]*31)

    # Event - country
    sd.add_to_result(result, entry["cc"], countries_)

    # Event - region
    sd.add_to_result(result, entry["rg"], regions_)
Example #2
0
def process(entry, result):
    # Event - time
    t = entry["t"] / 1000

    min = datetime.fromtimestamp(t).minute
    sd.binarize(result, min, 60)

    hour = datetime.fromtimestamp(t).hour
    sd.binarize(result, hour, 24)

    day = datetime.fromtimestamp(t).weekday()
    sd.binarize(result, day, 7)

    hour_of_week = day*24+hour
    sd.binarize(result, hour_of_week, 7*24)

    # Event - country
    sd.add_to_result(result, entry["cc"], countries_)

    # Event - region
    sd.add_to_result(result, entry["rg"], regions_)
Example #3
0
def process(margin, entry, result, mode):
    """
    Given a JSON object formatted by Extractor.py, parse variables "bidderid", "verticalid", "bidfloor", "format", "product", "w", and "h",
    and the results to the list of possible results.
    :param entry: the JSON object that represents one impression
    :param result: the list of possible results
    :return: None
    """

    # Auction - Bidrequests - bidder id
    bidder_id = entry["bidderid"]
    if bidder_id == 36:  # Adjusting the index for DSP 36 since we ignore DSP 35 and 37
        bidder_id = 35
    sd.binarize(result, bidder_id-1, 35)

    # Auction - Bidrequests - vertical id
    sd.binarize(result, entry["verticalid"]-1, 16)

    # Auction - Bidrequests - Impressions - bid Floor
    bid_floor = round(float(entry["bidfloor"]), 2)

    if bid_floor-margin == 0:
        result.append(0)
    else:
        result.append(1)

    # If bid floor is to be parsed into binary format, create a boolean variable for every interval of size 0.5 from 0 to 28,
    # and according to the value of the bid floor, set the associated boolean variable to 1.
    # Otherwise, just record the value of bid floor.
    if mode == "bin":
        index = 0
        if bid_floor < 28:
            index = int(bid_floor*20)
        bid_floor_list = [0]*560
        bid_floor_list[index] = 1
        result.extend(bid_floor_list)
    else:
        result.append(bid_floor)

    # Determine if bid floor is a multiple of 0.05 or of 0.1
    for n in [20, 10]:
        bid_floor_tmp = n*bid_floor
        if bid_floor_tmp == int(bid_floor_tmp):
            result.append(1)
        else:
            result.append(0)

    # Determine if bid floor is greater than the values in thres_list
    index = 0
    thres_list = [1.5, 2, 2.5, 3, 28]
    for thres in thres_list:
        if bid_floor > thres:
            result.append(1)
            index += 1
        else:
            n = len(thres_list) - index
            result.extend([0]*n)
            break

    # Auction - Bidrequests - Impressions - format
    sd.binarize(result, formats_.index(entry["format"]), len(formats_))

    # Auction - Bidrequests - Impressions - product
    sd.binarize(result, entry["product"]-1, 6)

    # Auction - Bidrequests - Impressions - banner
    width = entry["w"]
    height = entry["h"]

    # Determine if banner belongs to any of the following types:
    #   1) h in (0, 200] and w in (0, 500]
    #   2) h in (0, 200] and w in (500, infinity)
    #   3) h in (200, infinity) and w in (0, 500]
    banner_cat = [0, 0, 0]
    if 0 < height <= 200:
        if 0 < width <= 500:
            banner_cat[0] = 1
        elif width > 500:
            banner_cat[1] = 1
    elif (height > 200) and (width <= 500):
        banner_cat[2] = 1

    sd.add_to_result(result, (width, height), banners_)
    result.extend(banner_cat)