Ejemplo n.º 1
0
from reading.dataset import Dataset
from reading.position import Position
from reading.user import User
from reading.user_app_actions import User_App_Actions
import matplotlib.pyplot as plt
import numpy as np


if __name__ == '__main__':
    debug = False
    print('reading data...')
    ad = Advertisement(Configure.ad_path, debug=debug)
    app = App(Configure.app_categories_path, debug=debug)
    data_set = Dataset(Configure.train_path, Configure.test_path, debug=debug)
    position = Position(Configure.position_path, debug=debug)
    user = User(Configure.user_path, debug=debug)
    user_app_actions = User_App_Actions(Configure.user_app_actions_path, debug=debug)

    days = [0] * 32
    hours = [0] * 24
    minutes = [0] * 60

    conver_days = [0] * 32
    conver_hours = [0] * 24
    conver_minutes = [0] * 60

    click_in_con_days=  [0] * 32
    click_in_con_hours = [0] * 24
    click_in_con_minutes = [0] * 60
    for v in data_set.get_data_list():
        click = v['clickTime']
Ejemplo n.º 2
0
 def add_to_user(self, user: User):
     for record in self.data_list:
         user.add_dataset(record)
     user.fresh()
Ejemplo n.º 3
0
import pandas as pd
from conf.configure import Configure
from reading.advertisement import Advertisement
from reading.dataset import Dataset
from reading.user import User
from reading.app import App

if __name__ == '__main__':
    ad = Advertisement(Configure.ad_path)
    app = App(Configure.app_categories_path)
    data_set = Dataset(Configure.train_path, Configure.test_path)
    user = User(Configure.user_path)

    result0 = [0] * 100
    result1 = [0] * 100
    for d in data_set.get_data_list():
        userID = d['userID']
        age = user.get_value(userID)['age']
        label = d['label']
        if label < 0:
            continue
        if label == 0:
            result0[age] += 1
        else:
            result1[age] += 1
    for i in range(len(result0)):
        print(i, result0[i] + result1[i], result0[i], result1[i])
Ejemplo n.º 4
0
from reading.app import App
from reading.dataset import Dataset
from reading.position import Position
from reading.user import User
from reading.user_app_actions import User_App_Actions
from reading.user_app_installed import User_App_Installed
from feature.dmatrix import DMatrix
from model.xgb_func import *
from handle.handle import *

if __name__ == '__main__':
    debug = False
    print('reading data...')
    ad = Advertisement(Configure.ad_path, debug=debug)
    data_set = Dataset(Configure.train_path, Configure.test_path, debug=debug)
    user = User(Configure.user_path, debug=debug)

    from_t = 290000
    end_t = 310000
    result = dict()
    for d in data_set.get_data_list():
        if d['clickTime'] < from_t or d['clickTime'] >= end_t:
            continue
        userID = d['userID']
        baby = user.get_value(userID)['haveBaby']
        creativeID = d['creativeID']
        appID = ad.get_value(creativeID)['appID']
        label = d['label']
        if baby not in result:
            result[baby] = dict()
        if appID not in result[baby]:
Ejemplo n.º 5
0
    # data_set = pd.read_csv(Configure.train_path)
    # user = pd.read_csv(Configure.user_path)
    # app = pd.read_csv(Configure.app_categories_path)
    #
    # data = data_set.merge(user, how='left', on='userID')
    # data = data.merge(ad, how='left', on='creativeID')
    # data = data.merge(app, how='left', on='appID')
    # data = data[data['label'] >= 0]
    # g = data.groupby(['appCategory', 'haveBaby', 'label']).count()
    # for group_name, df in g:
    #     print(group_name, df)

    ad = Advertisement(Configure.ad_path)
    app = App(Configure.app_categories_path)
    data_set = Dataset(Configure.train_path, Configure.test_path)
    user = User(Configure.user_path)

    from_t = 270000
    end_t = 290000
    result = dict()
    for d in data_set.get_data_list():
        creativeID = d['creativeID']
        userID = d['userID']
        haveBaby = user.get_value(userID)['haveBaby']
        appID = ad.get_value(creativeID)['appID']
        appCategory = app.get_value(appID)['appCategory']
        label = d['label']
        clickTime = d['clickTime']
        if clickTime < from_t or clickTime >= end_t:
            continue
        if appCategory not in result: