예제 #1
0
 out_row_count = 0
 for user_id in user_dict.keys():
     user_features, user_features_header = getUserFeatures(
         user_dict[user_id], prefecture_location_dict, model_start_date)
     try:
         userid_coupon_purchase_detail_list = coupon_purchase_detail_dict[
             user_id]
     except:
         userid_coupon_purchase_detail_list = []
     user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(
         userid_coupon_purchase_detail_list, coupon_list_feature_dict,
         model_start_date, model_start_date)
     user_visit_features, user_visit_features_header = getUserVisitFeatures(
         user_id, coupon_visit_dict, model_start_date)
     for coupon_id in coupon_list_train_dict.keys():
         coupon_area_features, coupon_area_features_header = getCouponAreaFeatures(
             user_dict[user_id], coupon_area_dict, coupon_id)
         coupon_features, coupon_features_header = getCouponFeatures(
             coupon_list_train_dict[coupon_id], master_list)
         dv_value = getDV(user_id, coupon_id, dv_dict)
         if out_row_count == 0:
             out_header = [
                 "USER_ID_hash", "COUPON_ID_hash"
             ] + user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + [
                 "DV"
             ]
             out_header_len = len(out_header)
             out_file.writerow(out_header)
         out_row = [
             user_id, coupon_id
         ] + user_features + user_hist_features + user_visit_features + coupon_area_features + coupon_features + [
             dv_value
            new_coupon_purchase_detail_userid_list = []
            for coupon_purchase_detail in coupon_purchase_detail_userid_list[:
                                                                             ind]:
                if datetime.datetime.strptime(coupon_purchase_detail['I_DATE'],
                                              "%Y-%m-%d %H:%M:%S").date(
                                              ) < date_before_purchase_date:
                    new_coupon_purchase_detail_userid_list.append(
                        coupon_purchase_detail)
            #print new_coupon_purchase_detail_userid_list
            user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(
                new_coupon_purchase_detail_userid_list,
                coupon_list_feature_dict, model_start_date,
                date_before_purchase_date)
            user_visit_features, user_visit_features_header = getUserVisitFeatures(
                user_id, coupon_visit_dict, date_before_purchase_date)
            coupon_area_features, coupon_area_features_header = getCouponAreaFeatures(
                user_dict[user_id], coupon_area_dict, purchase_coupon)

            # getting coupon for dv = 1 #
            coupon_features, coupon_features_header = getCouponFeatures(
                coupon_list_feature_dict[purchase_coupon], master_list)
            dv_value = 1
            if out_row_count == 0:
                out_header = [
                    "USER_ID_hash", "COUPON_ID_hash"
                ] + user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + [
                    "DV"
                ]
                out_header_len = len(out_header)
                out_file.writerow(out_header)
            out_row = [
                user_id, purchase_coupon
			except:
				date_before_purchase_date = puchase_date
				daywise_coupon_list = daywise_coupon_list_dict[date_before_purchase_date]

			# getting user features #
			user_features, user_features_header = getUserFeatures(user_dict[user_id], prefecture_location_dict, date_before_purchase_date)

			# getting the new history list and getting user_hist features based on it #
			new_coupon_purchase_detail_userid_list = []
			for coupon_purchase_detail in coupon_purchase_detail_userid_list[:ind]:
				if  datetime.datetime.strptime(coupon_purchase_detail['I_DATE'], "%Y-%m-%d %H:%M:%S").date() < date_before_purchase_date:
					new_coupon_purchase_detail_userid_list.append(coupon_purchase_detail)
			#print new_coupon_purchase_detail_userid_list
			user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(new_coupon_purchase_detail_userid_list, coupon_list_feature_dict, model_start_date, date_before_purchase_date)
			user_visit_features, user_visit_features_header = getUserVisitFeatures(user_id, coupon_visit_dict, date_before_purchase_date)
			coupon_area_features, coupon_area_features_header = getCouponAreaFeatures(user_dict[user_id], coupon_area_dict, purchase_coupon)
		
			# getting coupon for dv = 1 #
			coupon_features, coupon_features_header = getCouponFeatures(coupon_list_feature_dict[purchase_coupon], master_list)
			dv_value = 1
			if out_row_count == 0:
                                out_header = ["USER_ID_hash", "COUPON_ID_hash"]+ user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + ["DV"]
                                out_header_len = len(out_header)
                                out_file.writerow( out_header )
			out_row = [user_id, purchase_coupon] + user_features + user_hist_features + user_visit_features + coupon_area_features + coupon_features + [dv_value]
                        assert len(out_row) == out_header_len
                        out_file.writerow( out_row )
                        out_row_count += 1
	
			# getting some coupons for dv=0 #
			no_of_non_purchased_coupons = 4
        coupon_visit_dict = getCouponVisitDict(coupon_visit_feature_file)
	

	print "Preparing the data.."
	user_count = 0
	out_row_count = 0
	for user_id in user_dict.keys():
		user_features, user_features_header = getUserFeatures(user_dict[user_id], prefecture_location_dict, model_start_date)
		try:
			userid_coupon_purchase_detail_list = coupon_purchase_detail_dict[user_id]
		except:
			userid_coupon_purchase_detail_list = []
		user_hist_features, user_hist_features_header, master_list = getUserHistFeatures(userid_coupon_purchase_detail_list, coupon_list_feature_dict, model_start_date, model_start_date)
		user_visit_features, user_visit_features_header = getUserVisitFeatures(user_id, coupon_visit_dict, model_start_date)
		for coupon_id in coupon_list_train_dict.keys():
			coupon_area_features, coupon_area_features_header = getCouponAreaFeatures(user_dict[user_id], coupon_area_dict, coupon_id)
			coupon_features, coupon_features_header = getCouponFeatures(coupon_list_train_dict[coupon_id], master_list)
			dv_value = getDV(user_id, coupon_id, dv_dict)
			if out_row_count == 0:
				out_header = ["USER_ID_hash", "COUPON_ID_hash"]+ user_features_header + user_hist_features_header + user_visit_features_header + coupon_area_features_header + coupon_features_header + ["DV"]
				out_header_len = len(out_header)
				out_file.writerow( out_header )
			out_row = [user_id, coupon_id] + user_features + user_hist_features + user_visit_features + coupon_area_features + coupon_features + [dv_value]
			assert len(out_row) == out_header_len
			out_file.writerow( out_row )
			out_row_count += 1
			#break
		user_count +=1
		#break
		if user_count % 200 == 0:
				print "Processed Users : ", user_count