def process(entry, result): # Auction - margin margin = round(float(entry["margin"]), 2) sd.add_to_result(result, margin, margins_) # Auction - tmax tmax = entry["tmax"] if not tmax == "None": # Determine if tmax is multiple of 5 or 10 if tmax % 5 == 0: result.append(1) else: result.append(0) if tmax % 10 == 0: result.append(1) else: result.append(0) for thres in [500, 700]: if tmax <= thres: result.append(1) else: result.append(0) index = 0 tmax_list = [30, 45, 50, 70, 85, 1000] for item in tmax_list: if tmax == item: result.append(1) result.extend([0]*(len(tmax_list)-index-1)) break result.append(0) index += 1 if index >= len(tmax_list): result.append(1) else: result.append(0) result.append(0) else: result.extend([0]*11) result.append(1) # Use the last column to indicate missing tmax # Auction - bkc bkc_result = [0]*(len(bkcids_)+2) bkc_str = entry["bkc"] if len(bkc_str) == 0: bkc_result[len(bkc_result)-1] = 1 else: bkc_list = bkc_str.split(",") for item in bkc_list: try: index = bkcids_.index(item) except: index = len(bkc_result)-2 bkc_result[index] = 1 result.extend(bkc_result) return margin
def process(entry, result): """ Given a JSON object formatted by Extractor.py, parse variables "t", "cc", and "rg", and the results to the list of possible results. :param entry: the JSON object that represents one impression :param result: the list of possible results :return: None """ # Event - time t = entry["t"] / 1000 # Divide t by 1000 because the unit of measurement is 1 millisecond for t # Get the UTC time from the timestamp, and parse minute of the hour, hour of the day, and day of the week utc_t = datetime.utcfromtimestamp(t) min = utc_t.minute sd.binarize(result, min, 60) hour = utc_t.hour sd.binarize(result, hour, 24) day = utc_t.weekday() sd.binarize(result, day, 7) # Determine if it is weekend if day == 5 or day == 6: result.append(1) else: result.append(0) # Determine if it is Friday or Saturday if day == 4 or day == 5: result.append(1) else: result.append(0) try: # Try to local time using UTC time and country and region # Determine time zone using country and region country = entry["cc"] if country in ["US", "CA", "AU"]: tz = pytz.timezone(region_timezone_[entry["rg"]]) else: tz = pytz.timezone(pytz.country_timezones(country)[0]) # Get hour of the day and day of the week in local time local_t = tz.normalize(utc_t.astimezone(tz)) local_hour = local_t.hour sd.binarize(result, local_hour, 24) local_day = local_t.weekday() sd.binarize(result, local_day, 7) except: # If local time cannot be extracted, set all variables in this section to be 0 result.extend([0]*31) # Event - country sd.add_to_result(result, entry["cc"], countries_) # Event - region sd.add_to_result(result, entry["rg"], regions_)
def process(entry, result): # Event - time t = entry["t"] / 1000 min = datetime.fromtimestamp(t).minute sd.binarize(result, min, 60) hour = datetime.fromtimestamp(t).hour sd.binarize(result, hour, 24) day = datetime.fromtimestamp(t).weekday() sd.binarize(result, day, 7) hour_of_week = day*24+hour sd.binarize(result, hour_of_week, 7*24) # Event - country sd.add_to_result(result, entry["cc"], countries_) # Event - region sd.add_to_result(result, entry["rg"], regions_)
def process(margin, entry, result, mode): """ Given a JSON object formatted by Extractor.py, parse variables "bidderid", "verticalid", "bidfloor", "format", "product", "w", and "h", and the results to the list of possible results. :param entry: the JSON object that represents one impression :param result: the list of possible results :return: None """ # Auction - Bidrequests - bidder id bidder_id = entry["bidderid"] if bidder_id == 36: # Adjusting the index for DSP 36 since we ignore DSP 35 and 37 bidder_id = 35 sd.binarize(result, bidder_id-1, 35) # Auction - Bidrequests - vertical id sd.binarize(result, entry["verticalid"]-1, 16) # Auction - Bidrequests - Impressions - bid Floor bid_floor = round(float(entry["bidfloor"]), 2) if bid_floor-margin == 0: result.append(0) else: result.append(1) # If bid floor is to be parsed into binary format, create a boolean variable for every interval of size 0.5 from 0 to 28, # and according to the value of the bid floor, set the associated boolean variable to 1. # Otherwise, just record the value of bid floor. if mode == "bin": index = 0 if bid_floor < 28: index = int(bid_floor*20) bid_floor_list = [0]*560 bid_floor_list[index] = 1 result.extend(bid_floor_list) else: result.append(bid_floor) # Determine if bid floor is a multiple of 0.05 or of 0.1 for n in [20, 10]: bid_floor_tmp = n*bid_floor if bid_floor_tmp == int(bid_floor_tmp): result.append(1) else: result.append(0) # Determine if bid floor is greater than the values in thres_list index = 0 thres_list = [1.5, 2, 2.5, 3, 28] for thres in thres_list: if bid_floor > thres: result.append(1) index += 1 else: n = len(thres_list) - index result.extend([0]*n) break # Auction - Bidrequests - Impressions - format sd.binarize(result, formats_.index(entry["format"]), len(formats_)) # Auction - Bidrequests - Impressions - product sd.binarize(result, entry["product"]-1, 6) # Auction - Bidrequests - Impressions - banner width = entry["w"] height = entry["h"] # Determine if banner belongs to any of the following types: # 1) h in (0, 200] and w in (0, 500] # 2) h in (0, 200] and w in (500, infinity) # 3) h in (200, infinity) and w in (0, 500] banner_cat = [0, 0, 0] if 0 < height <= 200: if 0 < width <= 500: banner_cat[0] = 1 elif width > 500: banner_cat[1] = 1 elif (height > 200) and (width <= 500): banner_cat[2] = 1 sd.add_to_result(result, (width, height), banners_) result.extend(banner_cat)
def process(entry, result): # Auction - margin margin = round(float(entry["margin"]), 2) sd.add_to_result(result, margin, margins_) # Auction - tmax tmax = entry["tmax"] if not tmax == "None": # Determine if tmax is multiple of 5 or 10 if_multiple_tmax(result, tmax, 5) if_multiple_tmax(result, tmax, 10) for thres in [500, 700]: if tmax <= thres: result.append(1) else: result.append(0) if tmax <= 20: result.append(1) result.extend([0]*80) elif tmax <= 85: result.append(0) result_tmp = [0]*65 result_tmp[tmax-21] = 1 result.extend(result_tmp) result.extend([0]*15) elif tmax <= 135: result.extend([0]*66) result_tmp = [0]*10 result_tmp[(tmax-86) / 5] = 1 result.extend(result_tmp) result.extend([0]*5) else: result.extend([0]*76) result_tmp = [0]*5 if tmax <= 200: result_tmp[0] = 1 elif tmax <= 500: result_tmp[1] = 1 elif tmax <= 999: result_tmp[2] = 1 elif tmax == 1000: result_tmp[3] = 1 else: result_tmp[4] = 1 result.extend(result_tmp) result.append(0) else: result.extend([0]*85) result.append(1) # Use the last column to indicate missing tmax # Auction - bkc bkc_result = [0]*(len(bkcids_)+2) bkc_str = entry["bkc"] if len(bkc_str) == 0: bkc_result[len(bkc_result)-1] = 1 else: bkc_list = bkc_str.split(",") for item in bkc_list: try: index = bkcids_.index(item) except: index = len(bkc_result)-2 bkc_result[index] = 1 result.extend(bkc_result) return margin
def process(entry, result): """ Given a JSON object formatted by Extractor.py, parse variables "margin", "tmax", and "bkc", and the results to the list of possible results. :param entry: the JSON object that represents one impression :param result: the list of possible results :return: the value of margin, which will be used when processing bid floor """ # Auction - margin margin = round(float(entry["margin"]), 2) sd.add_to_result(result, margin, margins_) # Auction - tmax tmax = entry["tmax"] if not tmax == "None": # Determine if tmax is multiple of 5 or 10 if tmax % 5 == 0: result.append(1) else: result.append(0) if tmax % 10 == 0: result.append(1) else: result.append(0) # Determine if tmax is less than or equal to 500, and if tmax is less than or equal to 700 for thres in [500, 700]: if tmax <= thres: result.append(1) else: result.append(0) # Determine if tmax equal to any of the values in tmax_list index = 0 tmax_list = [30, 45, 50, 70, 85, 1000] for item in tmax_list: if tmax == item: result.append(1) result.extend([0]*(len(tmax_list)-index-1)) break result.append(0) index += 1 if index >= len(tmax_list): result.append(1) else: result.append(0) # Add one variable to indicate tmax is not mssing result.append(0) else: # If tmax is missing, use the last variable to indicate so result.extend([0]*11) result.append(1) # Auction - bkc bkc_result = [0]*(len(bkcids_)+2) bkc_str = entry["bkc"] if len(bkc_str) == 0: bkc_result[len(bkc_result)-1] = 1 else: bkc_list = bkc_str.split(",") for item in bkc_list: try: index = bkcids_.index(item) except: index = len(bkc_result)-2 bkc_result[index] = 1 result.extend(bkc_result) return margin