from conf.configure import Configure from reading.advertisement import Advertisement from reading.app import App from reading.dataset import Dataset from reading.position import Position from reading.user import User from reading.user_app_actions import User_App_Actions import matplotlib.pyplot as plt import numpy as np if __name__ == '__main__': debug = False print('reading data...') ad = Advertisement(Configure.ad_path, debug=debug) app = App(Configure.app_categories_path, debug=debug) data_set = Dataset(Configure.train_path, Configure.test_path, debug=debug) position = Position(Configure.position_path, debug=debug) user = User(Configure.user_path, debug=debug) user_app_actions = User_App_Actions(Configure.user_app_actions_path, debug=debug) days = [0] * 32 hours = [0] * 24 minutes = [0] * 60 conver_days = [0] * 32 conver_hours = [0] * 24 conver_minutes = [0] * 60 click_in_con_days= [0] * 32 click_in_con_hours = [0] * 24
import pandas as pd from conf.configure import Configure from reading.advertisement import Advertisement from reading.dataset import Dataset from reading.user import User from reading.app import App if __name__ == '__main__': ad = Advertisement(Configure.ad_path) app = App(Configure.app_categories_path) data_set = Dataset(Configure.train_path, Configure.test_path) user = User(Configure.user_path) result0 = [0] * 100 result1 = [0] * 100 for d in data_set.get_data_list(): userID = d['userID'] age = user.get_value(userID)['age'] label = d['label'] if label < 0: continue if label == 0: result0[age] += 1 else: result1[age] += 1 for i in range(len(result0)): print(i, result0[i] + result1[i], result0[i], result1[i])
def add_to_app_cat(self, ad: Advertisement, app: App): for record in self.data_list: creativeID = record.creativeID appID = ad.get_value(creativeID).appID appCategory = app.get_value(appID).appCategory app.add_dataset(record, appCategory)
if __name__ == '__main__': # ad = pd.read_csv(Configure.ad_path) # data_set = pd.read_csv(Configure.train_path) # user = pd.read_csv(Configure.user_path) # app = pd.read_csv(Configure.app_categories_path) # # data = data_set.merge(user, how='left', on='userID') # data = data.merge(ad, how='left', on='creativeID') # data = data.merge(app, how='left', on='appID') # data = data[data['label'] >= 0] # g = data.groupby(['appCategory', 'haveBaby', 'label']).count() # for group_name, df in g: # print(group_name, df) ad = Advertisement(Configure.ad_path) app = App(Configure.app_categories_path) data_set = Dataset(Configure.train_path, Configure.test_path) user = User(Configure.user_path) from_t = 270000 end_t = 290000 result = dict() for d in data_set.get_data_list(): creativeID = d['creativeID'] userID = d['userID'] haveBaby = user.get_value(userID)['haveBaby'] appID = ad.get_value(creativeID)['appID'] appCategory = app.get_value(appID)['appCategory'] label = d['label'] clickTime = d['clickTime'] if clickTime < from_t or clickTime >= end_t: