out_row_count = 0 for user_id in user_dict.keys(): user_features, user_features_header = getUserFeatures( user_dict[user_id], prefecture_location_dict, model_start_date) try: userid_coupon_purchase_detail_list = coupon_purchase_detail_dict[ user_id] except: userid_coupon_purchase_detail_list = [] user_hist_features, user_hist_features_header, master_list = getUserHistFeatures( userid_coupon_purchase_detail_list, coupon_list_feature_dict, model_start_date, model_start_date) user_visit_features, user_visit_features_header = getUserVisitFeatures( user_id, coupon_visit_dict, model_start_date) for coupon_id in coupon_list_train_dict.keys(): coupon_area_features, coupon_area_features_header = getCouponAreaFeatures( user_dict[user_id], coupon_area_dict, coupon_id) coupon_features, coupon_features_header = getCouponFeatures( coupon_list_train_dict[coupon_id], master_list) dv_value = getDV(user_id, coupon_id, dv_dict) if out_row_count == 0: out_header = [ "USER_ID_hash", "COUPON_ID_hash" ] + user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + [ "DV" ] out_header_len = len(out_header) out_file.writerow(out_header) out_row = [ user_id, coupon_id ] + user_features + user_hist_features + user_visit_features + coupon_area_features + coupon_features + [ dv_value
new_coupon_purchase_detail_userid_list = [] for coupon_purchase_detail in coupon_purchase_detail_userid_list[: ind]: if datetime.datetime.strptime(coupon_purchase_detail['I_DATE'], "%Y-%m-%d %H:%M:%S").date( ) < date_before_purchase_date: new_coupon_purchase_detail_userid_list.append( coupon_purchase_detail) #print new_coupon_purchase_detail_userid_list user_hist_features, user_hist_features_header, master_list = getUserHistFeatures( new_coupon_purchase_detail_userid_list, coupon_list_feature_dict, model_start_date, date_before_purchase_date) user_visit_features, user_visit_features_header = getUserVisitFeatures( user_id, coupon_visit_dict, date_before_purchase_date) coupon_area_features, coupon_area_features_header = getCouponAreaFeatures( user_dict[user_id], coupon_area_dict, purchase_coupon) # getting coupon for dv = 1 # coupon_features, coupon_features_header = getCouponFeatures( coupon_list_feature_dict[purchase_coupon], master_list) dv_value = 1 if out_row_count == 0: out_header = [ "USER_ID_hash", "COUPON_ID_hash" ] + user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + [ "DV" ] out_header_len = len(out_header) out_file.writerow(out_header) out_row = [ user_id, purchase_coupon
except: date_before_purchase_date = puchase_date daywise_coupon_list = daywise_coupon_list_dict[date_before_purchase_date] # getting user features # user_features, user_features_header = getUserFeatures(user_dict[user_id], prefecture_location_dict, date_before_purchase_date) # getting the new history list and getting user_hist features based on it # new_coupon_purchase_detail_userid_list = [] for coupon_purchase_detail in coupon_purchase_detail_userid_list[:ind]: if datetime.datetime.strptime(coupon_purchase_detail['I_DATE'], "%Y-%m-%d %H:%M:%S").date() < date_before_purchase_date: new_coupon_purchase_detail_userid_list.append(coupon_purchase_detail) #print new_coupon_purchase_detail_userid_list user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(new_coupon_purchase_detail_userid_list, coupon_list_feature_dict, model_start_date, date_before_purchase_date) user_visit_features, user_visit_features_header = getUserVisitFeatures(user_id, coupon_visit_dict, date_before_purchase_date) coupon_area_features, coupon_area_features_header = getCouponAreaFeatures(user_dict[user_id], coupon_area_dict, purchase_coupon) # getting coupon for dv = 1 # coupon_features, coupon_features_header = getCouponFeatures(coupon_list_feature_dict[purchase_coupon], master_list) dv_value = 1 if out_row_count == 0: out_header = ["USER_ID_hash", "COUPON_ID_hash"]+ user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + ["DV"] out_header_len = len(out_header) out_file.writerow( out_header ) out_row = [user_id, purchase_coupon] + user_features + user_hist_features + user_visit_features + coupon_area_features + coupon_features + [dv_value] assert len(out_row) == out_header_len out_file.writerow( out_row ) out_row_count += 1 # getting some coupons for dv=0 # no_of_non_purchased_coupons = 4
coupon_visit_dict = getCouponVisitDict(coupon_visit_feature_file) print "Preparing the data.." user_count = 0 out_row_count = 0 for user_id in user_dict.keys(): user_features, user_features_header = getUserFeatures(user_dict[user_id], prefecture_location_dict, model_start_date) try: userid_coupon_purchase_detail_list = coupon_purchase_detail_dict[user_id] except: userid_coupon_purchase_detail_list = [] user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(userid_coupon_purchase_detail_list, coupon_list_feature_dict, model_start_date, model_start_date) user_visit_features, user_visit_features_header = getUserVisitFeatures(user_id, coupon_visit_dict, model_start_date) for coupon_id in coupon_list_train_dict.keys(): coupon_area_features, coupon_area_features_header = getCouponAreaFeatures(user_dict[user_id], coupon_area_dict, coupon_id) coupon_features, coupon_features_header = getCouponFeatures(coupon_list_train_dict[coupon_id], master_list) dv_value = getDV(user_id, coupon_id, dv_dict) if out_row_count == 0: out_header = ["USER_ID_hash", "COUPON_ID_hash"]+ user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + ["DV"] out_header_len = len(out_header) out_file.writerow( out_header ) out_row = [user_id, coupon_id] + user_features + user_hist_features + user_visit_features + coupon_area_features + coupon_features + [dv_value] assert len(out_row) == out_header_len out_file.writerow( out_row ) out_row_count += 1 #break user_count +=1 #break if user_count % 200 == 0: print "Processed Users : ", user_count