from reading.dataset import Dataset from reading.position import Position from reading.user import User from reading.user_app_actions import User_App_Actions import matplotlib.pyplot as plt import numpy as np if __name__ == '__main__': debug = False print('reading data...') ad = Advertisement(Configure.ad_path, debug=debug) app = App(Configure.app_categories_path, debug=debug) data_set = Dataset(Configure.train_path, Configure.test_path, debug=debug) position = Position(Configure.position_path, debug=debug) user = User(Configure.user_path, debug=debug) user_app_actions = User_App_Actions(Configure.user_app_actions_path, debug=debug) days = [0] * 32 hours = [0] * 24 minutes = [0] * 60 conver_days = [0] * 32 conver_hours = [0] * 24 conver_minutes = [0] * 60 click_in_con_days= [0] * 32 click_in_con_hours = [0] * 24 click_in_con_minutes = [0] * 60 for v in data_set.get_data_list(): click = v['clickTime']
def add_to_user(self, user: User): for record in self.data_list: user.add_dataset(record) user.fresh()
import pandas as pd from conf.configure import Configure from reading.advertisement import Advertisement from reading.dataset import Dataset from reading.user import User from reading.app import App if __name__ == '__main__': ad = Advertisement(Configure.ad_path) app = App(Configure.app_categories_path) data_set = Dataset(Configure.train_path, Configure.test_path) user = User(Configure.user_path) result0 = [0] * 100 result1 = [0] * 100 for d in data_set.get_data_list(): userID = d['userID'] age = user.get_value(userID)['age'] label = d['label'] if label < 0: continue if label == 0: result0[age] += 1 else: result1[age] += 1 for i in range(len(result0)): print(i, result0[i] + result1[i], result0[i], result1[i])
from reading.app import App from reading.dataset import Dataset from reading.position import Position from reading.user import User from reading.user_app_actions import User_App_Actions from reading.user_app_installed import User_App_Installed from feature.dmatrix import DMatrix from model.xgb_func import * from handle.handle import * if __name__ == '__main__': debug = False print('reading data...') ad = Advertisement(Configure.ad_path, debug=debug) data_set = Dataset(Configure.train_path, Configure.test_path, debug=debug) user = User(Configure.user_path, debug=debug) from_t = 290000 end_t = 310000 result = dict() for d in data_set.get_data_list(): if d['clickTime'] < from_t or d['clickTime'] >= end_t: continue userID = d['userID'] baby = user.get_value(userID)['haveBaby'] creativeID = d['creativeID'] appID = ad.get_value(creativeID)['appID'] label = d['label'] if baby not in result: result[baby] = dict() if appID not in result[baby]:
# data_set = pd.read_csv(Configure.train_path) # user = pd.read_csv(Configure.user_path) # app = pd.read_csv(Configure.app_categories_path) # # data = data_set.merge(user, how='left', on='userID') # data = data.merge(ad, how='left', on='creativeID') # data = data.merge(app, how='left', on='appID') # data = data[data['label'] >= 0] # g = data.groupby(['appCategory', 'haveBaby', 'label']).count() # for group_name, df in g: # print(group_name, df) ad = Advertisement(Configure.ad_path) app = App(Configure.app_categories_path) data_set = Dataset(Configure.train_path, Configure.test_path) user = User(Configure.user_path) from_t = 270000 end_t = 290000 result = dict() for d in data_set.get_data_list(): creativeID = d['creativeID'] userID = d['userID'] haveBaby = user.get_value(userID)['haveBaby'] appID = ad.get_value(creativeID)['appID'] appCategory = app.get_value(appID)['appCategory'] label = d['label'] clickTime = d['clickTime'] if clickTime < from_t or clickTime >= end_t: continue if appCategory not in result: