Ejemplo n.º 1
0
def process(entry, result):
    # Auction - margin
    margin = round(float(entry["margin"]), 2)
    sd.add_to_result(result, margin, margins_)

    # Auction - tmax
    tmax = entry["tmax"]
    if not tmax == "None":
        # Determine if tmax is multiple of 5 or 10
        if tmax % 5 == 0:
            result.append(1)
        else:
            result.append(0)

        if tmax % 10 == 0:
            result.append(1)
        else:
            result.append(0)

        for thres in [500, 700]:
            if tmax <= thres:
                result.append(1)
            else:
                result.append(0)

        index = 0
        tmax_list = [30, 45, 50, 70, 85, 1000]
        for item in tmax_list:
            if tmax == item:
                result.append(1)
                result.extend([0]*(len(tmax_list)-index-1))
                break
            result.append(0)
            index += 1
        if index >= len(tmax_list):
            result.append(1)
        else:
            result.append(0)

        result.append(0)
    else:
        result.extend([0]*11)
        result.append(1)    # Use the last column to indicate missing tmax

    # Auction - bkc
    bkc_result = [0]*(len(bkcids_)+2)
    bkc_str = entry["bkc"]
    if len(bkc_str) == 0:
        bkc_result[len(bkc_result)-1] = 1
    else:
        bkc_list = bkc_str.split(",")
        for item in bkc_list:
            try:
                index = bkcids_.index(item)
            except:
                index = len(bkc_result)-2
            bkc_result[index] = 1
    result.extend(bkc_result)

    return margin
Ejemplo n.º 2
0
def process(entry, result):
    """
    Given a JSON object formatted by Extractor.py, parse variables "t", "cc", and "rg", and the results to the list of possible results.
    :param entry: the JSON object that represents one impression
    :param result: the list of possible results
    :return: None
    """

    # Event - time
    t = entry["t"] / 1000   # Divide t by 1000 because the unit of measurement is 1 millisecond for t

    # Get the UTC time from the timestamp, and parse minute of the hour, hour of the day, and day of the week
    utc_t = datetime.utcfromtimestamp(t)
    min = utc_t.minute
    sd.binarize(result, min, 60)
    hour = utc_t.hour
    sd.binarize(result, hour, 24)
    day = utc_t.weekday()
    sd.binarize(result, day, 7)

    # Determine if it is weekend
    if day == 5 or day == 6:
        result.append(1)
    else:
        result.append(0)

    # Determine if it is Friday or Saturday
    if day == 4 or day == 5:
        result.append(1)
    else:
        result.append(0)

    try:
        # Try to local time using UTC time and country and region

        # Determine time zone using country and region
        country = entry["cc"]
        if country in ["US", "CA", "AU"]:
            tz = pytz.timezone(region_timezone_[entry["rg"]])
        else:
            tz = pytz.timezone(pytz.country_timezones(country)[0])

        # Get hour of the day and day of the week in local time
        local_t = tz.normalize(utc_t.astimezone(tz))
        local_hour = local_t.hour
        sd.binarize(result, local_hour, 24)
        local_day = local_t.weekday()
        sd.binarize(result, local_day, 7)
    except:
        # If local time cannot be extracted, set all variables in this section to be 0
        result.extend([0]*31)

    # Event - country
    sd.add_to_result(result, entry["cc"], countries_)

    # Event - region
    sd.add_to_result(result, entry["rg"], regions_)
Ejemplo n.º 3
0
def process(entry, result):
    # Event - time
    t = entry["t"] / 1000

    min = datetime.fromtimestamp(t).minute
    sd.binarize(result, min, 60)

    hour = datetime.fromtimestamp(t).hour
    sd.binarize(result, hour, 24)

    day = datetime.fromtimestamp(t).weekday()
    sd.binarize(result, day, 7)

    hour_of_week = day*24+hour
    sd.binarize(result, hour_of_week, 7*24)

    # Event - country
    sd.add_to_result(result, entry["cc"], countries_)

    # Event - region
    sd.add_to_result(result, entry["rg"], regions_)
Ejemplo n.º 4
0
def process(margin, entry, result, mode):
    """
    Given a JSON object formatted by Extractor.py, parse variables "bidderid", "verticalid", "bidfloor", "format", "product", "w", and "h",
    and the results to the list of possible results.
    :param entry: the JSON object that represents one impression
    :param result: the list of possible results
    :return: None
    """

    # Auction - Bidrequests - bidder id
    bidder_id = entry["bidderid"]
    if bidder_id == 36:  # Adjusting the index for DSP 36 since we ignore DSP 35 and 37
        bidder_id = 35
    sd.binarize(result, bidder_id-1, 35)

    # Auction - Bidrequests - vertical id
    sd.binarize(result, entry["verticalid"]-1, 16)

    # Auction - Bidrequests - Impressions - bid Floor
    bid_floor = round(float(entry["bidfloor"]), 2)

    if bid_floor-margin == 0:
        result.append(0)
    else:
        result.append(1)

    # If bid floor is to be parsed into binary format, create a boolean variable for every interval of size 0.5 from 0 to 28,
    # and according to the value of the bid floor, set the associated boolean variable to 1.
    # Otherwise, just record the value of bid floor.
    if mode == "bin":
        index = 0
        if bid_floor < 28:
            index = int(bid_floor*20)
        bid_floor_list = [0]*560
        bid_floor_list[index] = 1
        result.extend(bid_floor_list)
    else:
        result.append(bid_floor)

    # Determine if bid floor is a multiple of 0.05 or of 0.1
    for n in [20, 10]:
        bid_floor_tmp = n*bid_floor
        if bid_floor_tmp == int(bid_floor_tmp):
            result.append(1)
        else:
            result.append(0)

    # Determine if bid floor is greater than the values in thres_list
    index = 0
    thres_list = [1.5, 2, 2.5, 3, 28]
    for thres in thres_list:
        if bid_floor > thres:
            result.append(1)
            index += 1
        else:
            n = len(thres_list) - index
            result.extend([0]*n)
            break

    # Auction - Bidrequests - Impressions - format
    sd.binarize(result, formats_.index(entry["format"]), len(formats_))

    # Auction - Bidrequests - Impressions - product
    sd.binarize(result, entry["product"]-1, 6)

    # Auction - Bidrequests - Impressions - banner
    width = entry["w"]
    height = entry["h"]

    # Determine if banner belongs to any of the following types:
    #   1) h in (0, 200] and w in (0, 500]
    #   2) h in (0, 200] and w in (500, infinity)
    #   3) h in (200, infinity) and w in (0, 500]
    banner_cat = [0, 0, 0]
    if 0 < height <= 200:
        if 0 < width <= 500:
            banner_cat[0] = 1
        elif width > 500:
            banner_cat[1] = 1
    elif (height > 200) and (width <= 500):
        banner_cat[2] = 1

    sd.add_to_result(result, (width, height), banners_)
    result.extend(banner_cat)
Ejemplo n.º 5
0
def process(entry, result):
    # Auction - margin
    margin = round(float(entry["margin"]), 2)
    sd.add_to_result(result, margin, margins_)

    # Auction - tmax
    tmax = entry["tmax"]
    if not tmax == "None":
        # Determine if tmax is multiple of 5 or 10
        if_multiple_tmax(result, tmax, 5)
        if_multiple_tmax(result, tmax, 10)

        for thres in [500, 700]:
            if tmax <= thres:
                result.append(1)
            else:
                result.append(0)

        if tmax <= 20:
            result.append(1)
            result.extend([0]*80)
        elif tmax <= 85:
            result.append(0)
            result_tmp = [0]*65
            result_tmp[tmax-21] = 1
            result.extend(result_tmp)
            result.extend([0]*15)
        elif tmax <= 135:
            result.extend([0]*66)
            result_tmp = [0]*10
            result_tmp[(tmax-86) / 5] = 1
            result.extend(result_tmp)
            result.extend([0]*5)
        else:
            result.extend([0]*76)
            result_tmp = [0]*5
            if tmax <= 200:
                result_tmp[0] = 1
            elif tmax <= 500:
                result_tmp[1] = 1
            elif tmax <= 999:
                result_tmp[2] = 1
            elif tmax == 1000:
                result_tmp[3] = 1
            else:
                result_tmp[4] = 1
            result.extend(result_tmp)
        result.append(0)
    else:
        result.extend([0]*85)
        result.append(1)    # Use the last column to indicate missing tmax

    # Auction - bkc
    bkc_result = [0]*(len(bkcids_)+2)
    bkc_str = entry["bkc"]
    if len(bkc_str) == 0:
        bkc_result[len(bkc_result)-1] = 1
    else:
        bkc_list = bkc_str.split(",")
        for item in bkc_list:
            try:
                index = bkcids_.index(item)
            except:
                index = len(bkc_result)-2
            bkc_result[index] = 1
    result.extend(bkc_result)

    return margin
Ejemplo n.º 6
0
def process(entry, result):
    """
    Given a JSON object formatted by Extractor.py, parse variables "margin", "tmax", and "bkc", and the results to the list of possible results.
    :param entry: the JSON object that represents one impression
    :param result: the list of possible results
    :return: the value of margin, which will be used when processing bid floor
    """

    # Auction - margin
    margin = round(float(entry["margin"]), 2)
    sd.add_to_result(result, margin, margins_)

    # Auction - tmax
    tmax = entry["tmax"]
    if not tmax == "None":
        # Determine if tmax is multiple of 5 or 10
        if tmax % 5 == 0:
            result.append(1)
        else:
            result.append(0)
        if tmax % 10 == 0:
            result.append(1)
        else:
            result.append(0)

        # Determine if tmax is less than or equal to 500, and if tmax is less than or equal to 700
        for thres in [500, 700]:
            if tmax <= thres:
                result.append(1)
            else:
                result.append(0)

        # Determine if tmax equal to any of the values in tmax_list
        index = 0
        tmax_list = [30, 45, 50, 70, 85, 1000]
        for item in tmax_list:
            if tmax == item:
                result.append(1)
                result.extend([0]*(len(tmax_list)-index-1))
                break
            result.append(0)
            index += 1
        if index >= len(tmax_list):
            result.append(1)
        else:
            result.append(0)

        # Add one variable to indicate tmax is not mssing
        result.append(0)
    else:
        # If tmax is missing, use the last variable to indicate so
        result.extend([0]*11)
        result.append(1)

    # Auction - bkc
    bkc_result = [0]*(len(bkcids_)+2)
    bkc_str = entry["bkc"]
    if len(bkc_str) == 0:
        bkc_result[len(bkc_result)-1] = 1
    else:
        bkc_list = bkc_str.split(",")
        for item in bkc_list:
            try:
                index = bkcids_.index(item)
            except:
                index = len(bkc_result)-2
            bkc_result[index] = 1
    result.extend(bkc_result)

    return margin