Beispiel #1
0
def app_dlist(dlist, appID, clickTime, correlate, ad : Advertisement):
    result = -1
    for d in dlist:
        if d['clickTime'] >= clickTime:
            break
        da = appID
        db = ad.get_value(d['creativeID'])['appID']
        _prod = get_prod(correlate, da, db, d['label'])
        if _prod > result:
            result = _prod
    return result
Beispiel #2
0
 def add_to_app_cat(self, ad: Advertisement, app: App):
     for record in self.data_list:
         creativeID = record.creativeID
         appID = ad.get_value(creativeID).appID
         appCategory = app.get_value(appID).appCategory
         app.add_dataset(record, appCategory)
Beispiel #3
0
def app_dlist(d, ad: Advertisement):
    result = common_part(d)
    result['condition'] = ad.get_value(d['creativeID'])['appID']
    return result
Beispiel #4
0
if __name__ == '__main__':
    debug = False
    print('reading data...')
    ad = Advertisement(Configure.ad_path, debug=debug)
    data_set = Dataset(Configure.train_path, Configure.test_path, debug=debug)
    user = User(Configure.user_path, debug=debug)

    from_t = 290000
    end_t = 310000
    result = dict()
    for d in data_set.get_data_list():
        if d['clickTime'] < from_t or d['clickTime'] >= end_t:
            continue
        userID = d['userID']
        baby = user.get_value(userID)['haveBaby']
        creativeID = d['creativeID']
        appID = ad.get_value(creativeID)['appID']
        label = d['label']
        if baby not in result:
            result[baby] = dict()
        if appID not in result[baby]:
            result[baby][appID] = [0, 0]
        result[baby][appID][label] += 1
    fout = open(Configure.baby_app_correlate.format(from_t, end_t), 'w')
    for baby in result:
        for appID in result[baby]:
            s = result[baby][appID]
            r = s[1] / (s[0] + s[1]) if s[1] > 0 else 0
            fout.write('{},{},{},{},{}\n'.format(baby, appID, s[0], s[1], r))
    fout.close()
Beispiel #5
0

    for length in range(1000, 10000, 1000):
        len_limit = length
        for cond in ['adID', 'camgaignID', 'advertiserID', 'appID', 'positionID', 'creativeID']:
            for mode in ['train', 'test']:
                func = func_dict[cond]
                correlate = read_correlate(path_dict[cond][mode])
                end_t = Configure.train_end_t if mode == 'train' else (
                    Configure.test_end_t if mode == 'test' else Configure.submit_end_t)
                from_t = Configure.train_begin_t if mode == 'train' else (
                    Configure.test_begin_t if mode == 'test' else Configure.submit_begin_t)

                labels = list()
                predtions = list()
                for user_id in data_set.get_keys_by_user_id():
                    dlist = data_set.get_value_by_user_id(user_id)
                    for record in dlist:
                        if record['clickTime'] < from_t or record['clickTime'] >= end_t:
                            continue
                        labels.append(record['label'])
                        predtions.append(func(dlist, record[cond], record['clickTime'], correlate) if cond in ['creativeID', 'positionID']
                                         else func(dlist, ad.get_value(record['creativeID'])[cond], record['clickTime'], correlate, ad))
                _labels = list()
                _predictions = list()
                for a, b in zip(labels, predtions):
                    if b >=0:
                        _labels.append(a)
                        _predictions.append(b)
                print(len_limit, cond, mode, logloss(_labels, _predictions), len(_labels))
Beispiel #6
0
from feature_role.lda_role import Lda_Role
from reading.dataset import Dataset
from conf.configure import Configure
from reading.advertisement import Advertisement
import numpy as np

if __name__ == '__main__':
    data_set = Dataset(Configure.train_path, Configure.test_path)
    ad = Advertisement(Configure.ad_path)
    param = {'userID': 311729, 'appID': 420}
    role = Lda_Role()
    result = dict()
    for d in data_set.get_data_list():
        param = {
            'userID': d['userID'],
            'appID': ad.get_value(d['creativeID'])['appID']
        }
        vec = role.run(param)
        clickDay = d['clickTime'] // 10000
        label = d['label']
        if label < 0:
            label = 0
        if clickDay not in result:
            result[clickDay] = dict()
            result[clickDay][0] = dict()
            result[clickDay][1] = dict()
            result[clickDay][0]['vec'] = np.zeros([len(vec)])
            result[clickDay][0]['include_c'] = 0
            result[clickDay][0]['exclude_c'] = 0
            result[clickDay][1]['vec'] = np.zeros([len(vec)])
            result[clickDay][1]['include_c'] = 0