def app_dlist(dlist, appID, clickTime, correlate, ad : Advertisement): result = -1 for d in dlist: if d['clickTime'] >= clickTime: break da = appID db = ad.get_value(d['creativeID'])['appID'] _prod = get_prod(correlate, da, db, d['label']) if _prod > result: result = _prod return result
def add_to_app_cat(self, ad: Advertisement, app: App): for record in self.data_list: creativeID = record.creativeID appID = ad.get_value(creativeID).appID appCategory = app.get_value(appID).appCategory app.add_dataset(record, appCategory)
def app_dlist(d, ad: Advertisement): result = common_part(d) result['condition'] = ad.get_value(d['creativeID'])['appID'] return result
if __name__ == '__main__': debug = False print('reading data...') ad = Advertisement(Configure.ad_path, debug=debug) data_set = Dataset(Configure.train_path, Configure.test_path, debug=debug) user = User(Configure.user_path, debug=debug) from_t = 290000 end_t = 310000 result = dict() for d in data_set.get_data_list(): if d['clickTime'] < from_t or d['clickTime'] >= end_t: continue userID = d['userID'] baby = user.get_value(userID)['haveBaby'] creativeID = d['creativeID'] appID = ad.get_value(creativeID)['appID'] label = d['label'] if baby not in result: result[baby] = dict() if appID not in result[baby]: result[baby][appID] = [0, 0] result[baby][appID][label] += 1 fout = open(Configure.baby_app_correlate.format(from_t, end_t), 'w') for baby in result: for appID in result[baby]: s = result[baby][appID] r = s[1] / (s[0] + s[1]) if s[1] > 0 else 0 fout.write('{},{},{},{},{}\n'.format(baby, appID, s[0], s[1], r)) fout.close()
for length in range(1000, 10000, 1000): len_limit = length for cond in ['adID', 'camgaignID', 'advertiserID', 'appID', 'positionID', 'creativeID']: for mode in ['train', 'test']: func = func_dict[cond] correlate = read_correlate(path_dict[cond][mode]) end_t = Configure.train_end_t if mode == 'train' else ( Configure.test_end_t if mode == 'test' else Configure.submit_end_t) from_t = Configure.train_begin_t if mode == 'train' else ( Configure.test_begin_t if mode == 'test' else Configure.submit_begin_t) labels = list() predtions = list() for user_id in data_set.get_keys_by_user_id(): dlist = data_set.get_value_by_user_id(user_id) for record in dlist: if record['clickTime'] < from_t or record['clickTime'] >= end_t: continue labels.append(record['label']) predtions.append(func(dlist, record[cond], record['clickTime'], correlate) if cond in ['creativeID', 'positionID'] else func(dlist, ad.get_value(record['creativeID'])[cond], record['clickTime'], correlate, ad)) _labels = list() _predictions = list() for a, b in zip(labels, predtions): if b >=0: _labels.append(a) _predictions.append(b) print(len_limit, cond, mode, logloss(_labels, _predictions), len(_labels))
from feature_role.lda_role import Lda_Role from reading.dataset import Dataset from conf.configure import Configure from reading.advertisement import Advertisement import numpy as np if __name__ == '__main__': data_set = Dataset(Configure.train_path, Configure.test_path) ad = Advertisement(Configure.ad_path) param = {'userID': 311729, 'appID': 420} role = Lda_Role() result = dict() for d in data_set.get_data_list(): param = { 'userID': d['userID'], 'appID': ad.get_value(d['creativeID'])['appID'] } vec = role.run(param) clickDay = d['clickTime'] // 10000 label = d['label'] if label < 0: label = 0 if clickDay not in result: result[clickDay] = dict() result[clickDay][0] = dict() result[clickDay][1] = dict() result[clickDay][0]['vec'] = np.zeros([len(vec)]) result[clickDay][0]['include_c'] = 0 result[clickDay][0]['exclude_c'] = 0 result[clickDay][1]['vec'] = np.zeros([len(vec)]) result[clickDay][1]['include_c'] = 0