def process(entry, result): """ Given a JSON object formatted by Extractor.py, parse variables "t", "cc", and "rg", and the results to the list of possible results. :param entry: the JSON object that represents one impression :param result: the list of possible results :return: None """ # Event - time t = entry["t"] / 1000 # Divide t by 1000 because the unit of measurement is 1 millisecond for t # Get the UTC time from the timestamp, and parse minute of the hour, hour of the day, and day of the week utc_t = datetime.utcfromtimestamp(t) min = utc_t.minute sd.binarize(result, min, 60) hour = utc_t.hour sd.binarize(result, hour, 24) day = utc_t.weekday() sd.binarize(result, day, 7) # Determine if it is weekend if day == 5 or day == 6: result.append(1) else: result.append(0) # Determine if it is Friday or Saturday if day == 4 or day == 5: result.append(1) else: result.append(0) try: # Try to local time using UTC time and country and region # Determine time zone using country and region country = entry["cc"] if country in ["US", "CA", "AU"]: tz = pytz.timezone(region_timezone_[entry["rg"]]) else: tz = pytz.timezone(pytz.country_timezones(country)[0]) # Get hour of the day and day of the week in local time local_t = tz.normalize(utc_t.astimezone(tz)) local_hour = local_t.hour sd.binarize(result, local_hour, 24) local_day = local_t.weekday() sd.binarize(result, local_day, 7) except: # If local time cannot be extracted, set all variables in this section to be 0 result.extend([0]*31) # Event - country sd.add_to_result(result, entry["cc"], countries_) # Event - region sd.add_to_result(result, entry["rg"], regions_)
def process(entry, result): # Event - time t = entry["t"] / 1000 min = datetime.fromtimestamp(t).minute sd.binarize(result, min, 60) hour = datetime.fromtimestamp(t).hour sd.binarize(result, hour, 24) day = datetime.fromtimestamp(t).weekday() sd.binarize(result, day, 7) hour_of_week = day*24+hour sd.binarize(result, hour_of_week, 7*24) # Event - country sd.add_to_result(result, entry["cc"], countries_) # Event - region sd.add_to_result(result, entry["rg"], regions_)
def process(margin, entry, result, mode): """ Given a JSON object formatted by Extractor.py, parse variables "bidderid", "verticalid", "bidfloor", "format", "product", "w", and "h", and the results to the list of possible results. :param entry: the JSON object that represents one impression :param result: the list of possible results :return: None """ # Auction - Bidrequests - bidder id bidder_id = entry["bidderid"] if bidder_id == 36: # Adjusting the index for DSP 36 since we ignore DSP 35 and 37 bidder_id = 35 sd.binarize(result, bidder_id-1, 35) # Auction - Bidrequests - vertical id sd.binarize(result, entry["verticalid"]-1, 16) # Auction - Bidrequests - Impressions - bid Floor bid_floor = round(float(entry["bidfloor"]), 2) if bid_floor-margin == 0: result.append(0) else: result.append(1) # If bid floor is to be parsed into binary format, create a boolean variable for every interval of size 0.5 from 0 to 28, # and according to the value of the bid floor, set the associated boolean variable to 1. # Otherwise, just record the value of bid floor. if mode == "bin": index = 0 if bid_floor < 28: index = int(bid_floor*20) bid_floor_list = [0]*560 bid_floor_list[index] = 1 result.extend(bid_floor_list) else: result.append(bid_floor) # Determine if bid floor is a multiple of 0.05 or of 0.1 for n in [20, 10]: bid_floor_tmp = n*bid_floor if bid_floor_tmp == int(bid_floor_tmp): result.append(1) else: result.append(0) # Determine if bid floor is greater than the values in thres_list index = 0 thres_list = [1.5, 2, 2.5, 3, 28] for thres in thres_list: if bid_floor > thres: result.append(1) index += 1 else: n = len(thres_list) - index result.extend([0]*n) break # Auction - Bidrequests - Impressions - format sd.binarize(result, formats_.index(entry["format"]), len(formats_)) # Auction - Bidrequests - Impressions - product sd.binarize(result, entry["product"]-1, 6) # Auction - Bidrequests - Impressions - banner width = entry["w"] height = entry["h"] # Determine if banner belongs to any of the following types: # 1) h in (0, 200] and w in (0, 500] # 2) h in (0, 200] and w in (500, infinity) # 3) h in (200, infinity) and w in (0, 500] banner_cat = [0, 0, 0] if 0 < height <= 200: if 0 < width <= 500: banner_cat[0] = 1 elif width > 500: banner_cat[1] = 1 elif (height > 200) and (width <= 500): banner_cat[2] = 1 sd.add_to_result(result, (width, height), banners_) result.extend(banner_cat)