def remote_process(local_address, local_port, message_pipe):
    """
    local_address - str, ip address
    local_port - int, port number
    message_pipe - multiprocessing.Pipe, read-only
    """

    signal.signal(signal.SIGINT, signal.SIG_IGN)
    signal.signal(signal.SIGTERM, utils.existence_handler)

    try:
        #server_socket = socket.create_server((local_address, local_port), backlog=1)
        server_socket = socket.socket()
        server_socket.bind((local_address, local_port))
        server_socket.listen(1)
        while True:
            session, _ = server_socket.accept()
            print("Remote machine connected.")
            session.setblocking(True)
            session_active = True

            while utils.is_active(session):
                message = None
                while message_pipe.poll():
                    message = message_pipe.recv()
                if message is not None:
                    print(str(message))
                    session.sendall(message)

                time.sleep(0.1)
    except KeyboardInterrupt:
        if "session" in vars() and utils.is_active(session):
            session.shutdown(socket.SHUT_RDWR)
        server_socket.shutdown(socket.SHUT_RDWR)
Beispiel #2
0
    def serve(self):
        LOG.info("Heartbeat thread start")
        domains = self.helper.list_all_domains()
        for dom in domains:
            if not self.RUN_TH:
                LOG.info("Break from hearbeat thread")
                break

            uuid = utils.get_domain_uuid(dom)
            if not uuid:
                LOG.warn("Get domain uuid failed")
                continue

            if not utils.is_active(dom):
                LOG.info("domain is not active, uuid %s" % uuid)
                continue

            heartbeat_cmd = json.dumps({"execute": "guest-ping"})
            response = self.helper.exec_qga_command(dom, heartbeat_cmd,
                                            timeout=CONF.heartbeat_cmd_timeout)
            LOG.debug("Ping command response from qga: %s" % response)
            if response:
                self.sender.report_heartbeat(uuid)
            else:
                LOG.warn("Ping command failed, uuid: %s" % uuid)

        LOG.info("Heartbeat thread end")
def main():
    db_instances = instance.get_all_instances_on_host()
    db_uuids = [inst['id'] for inst in db_instances]
    
    helper = helper_.LibvirtQemuHelper()
    
    hyper_domains = helper.list_all_domains()
    monitor_domains_with_project_id = []
    
    for dom in hyper_domains:
        dom_uuid = dom.UUIDString()
#        if dom_uuid in db_uuids:
#            project_id = None
#            for inst in db_instances:
#                if dom_uuid == inst['id']:
#                    project_id = inst['tenant_id']
#        monitor_domains_with_project_id.append((dom, project_id))
        monitor_domains_with_project_id.append(dom)
    for dom in monitor_domains_with_project_id:
        uuid = utils.get_domain_uuid(dom)
        if not uuid:
            LOG.warn("Get domain uuid failed")
            continue

        if not utils.is_active(dom):
            LOG.info("Domain is not active, uuid: %s" % uuid)
            continue

        get_system_usage = data_stat.GetSystemUsage(dom, helper)
        get_system_usage.get_system_usage_datas()
Beispiel #4
0
    def serve(self):
        LOG.info("Heartbeat thread start")
        domains = self.helper.list_all_domains()
        for dom in domains:
            if not self.RUN_TH:
                LOG.info("Break from hearbeat thread")
                break

            uuid = utils.get_domain_uuid(dom)
            if not uuid:
                LOG.warn("Get domain uuid failed")
                continue

            if not utils.is_active(dom):
                LOG.info("domain is not active, uuid %s" % uuid)
                continue

            heartbeat_cmd = json.dumps({"execute": "guest-ping"})
            response = self.helper.exec_qga_command(dom, heartbeat_cmd,
                                            timeout=CONF.heartbeat_cmd_timeout)
            LOG.debug("Ping command response from qga: %s" % response)
            if response:
                self.sender.report_heartbeat(uuid)
            else:
                LOG.warn("Ping command failed, uuid: %s" % uuid)

        LOG.info("Heartbeat thread end")
def local_process(socket_file, message_pipe):
    """
    socket_file - str, path to the socket_file
    message_pipe - multiprocessing.Connection, write-only
    """

    signal.signal(signal.SIGINT, signal.SIG_IGN)
    signal.signal(signal.SIGTERM, utils.existence_handler)

    try:
        daemon_socket = socket.socket(socket.AF_UNIX)
        daemon_socket.bind(socket_file)
        daemon_socket.listen()
        while True:
            session, _ = daemon_socket.accept()
            session.setblocking(True)

            message = session.recv(512)
            message = protocols.repack_message(message)
            message_pipe.send(message)

            time.sleep(0.1)
    except KeyboardInterrupt:
        if "session" in vars() and utils.is_active(session):
            session.shutdown(socket.SHUT_RDWR)
        daemon_socket.shutdown(socket.SHUT_RDWR)
Beispiel #6
0
def current_user():
    if 'session_id' in request.cookies:
        session_id = request.cookies['session_id']
        # s = Session.one_for_session_id(session_id=session_id)
        key = 'session_id_{}'.format(session_id)
        user_id = cache.get(key)
        if user_id is not None:
            log('current_user key <{}> user_id <{}>'.format(key, user_id))
            u = User.one(id=user_id)
            # 是否封号
            return is_active(u, 401)
    else:
        return None
Beispiel #7
0
def predict_30(val, register, app, video, act):
    def get_features0(df, d):
        #tapp = app[app.day==d]
        tvideo = video[video.day == d]
        tact = act[act.day == d]
        #df = docount(df,tapp,'app',['user_id']);gc.collect()
        df = docount(df, tvideo, 'video', ['user_id'])
        gc.collect()
        df['videorate'] = df['video$user_id#'] / (tvideo.shape[0] + 0.000001)
        df = docount(df, tact, 'act', ['user_id'])
        gc.collect()
        df['actrate'] = df['act$user_id#'] / (tact.shape[0] + 0.000001)

        page_list = list(tact['page'].unique())
        for c in [0, 1, 2, 3, 4]:
            df = docount(df, tact[tact['page'] == c], 'act_page=' + str(c),
                         ['user_id'])
            gc.collect()
            df['act_page=' + str(c) +
               '$user_id#rate'] = df['act_page=' + str(c) + '$user_id#'] / (
                   df['act$user_id#'] + 0.00001)

        df['act_page=23$user_id#'] = df['act_page=2$user_id#'] + df[
            'act_page=3$user_id#']
        df['act_page=023$user_id#'] = df['act_page=2$user_id#'] + df[
            'act_page=3$user_id#'] + df['act_page=0$user_id#']

        action_list = list(tact['action_type'].unique())
        for c in [0, 1, 2, 3, 4, 5]:
            df = docount(df, tact[tact['action_type'] == c],
                         'action_type=' + str(c), ['user_id'])
            gc.collect()
            df['action_type=' + str(c) +
               '$user_id#rate'] = df['action_type=' + str(c) + '$user_id#'] / (
                   df['act$user_id#'] + 0.00001)

        df['action_type=01$user_id#'] = df['action_type=0$user_id#'] + df[
            'action_type=1$user_id#']

        def iszero(s):
            if s == 0:
                return 0
            return 1

        df['pageall'] = df['act_page=0$user_id#'].apply(iszero)
        for c in [1, 2, 3, 4]:
            df['pageall'] = df['pageall'] * df['act_page=0$user_id#']
        df['pageall'] = df['act_page=0$user_id#'].apply(iszero)

        df['actionall'] = df['action_type=0$user_id#'].apply(iszero)
        for c in [1, 2, 3, 4, 5]:
            df['pageall'] = df['pageall'] * df['action_type=0$user_id#']
        df['actionall'] = df['action_type=0$user_id#'].apply(iszero)

        df['act0'] = df['act$user_id#'].apply(iszero)
        df['video0'] = df['video$user_id#'].apply(iszero)

        def bigact(s):
            if s >= 50:
                return 5
            else:
                return int(s / 10)

        df['act$user_id#10'] = df['act$user_id#'].apply(bigact)

        df['author_id'] = df['user_id']
        df = docount(df, tact, 'act', ['author_id'])
        gc.collect()
        df = doiq(df, tact, 'act', ['user_id'], 'video_id')
        gc.collect()
        df = doiq(df, tact, 'act', ['user_id'], 'author_id')
        gc.collect()

        df['act$author_video_m'] = df['act$user_id_by_video_id_iq'] / df[
            'act$user_id_by_author_id_iq']

        del df['register_day'], df['author_id']
        return df

    def get_features_all(df, df1):
        lendf = len(df)
        df = df.append(df1)
        del df1
        gc.collect()

        for c in ['act$user_id#']:
            #df = domean(df,df,'All',['device_type'],c);gc.collect()
            df = domean(df, df, 'All', ['register_type'], c)
            gc.collect()
            #df = dovar(df,df,'All',['register_type'],c);gc.collect()
        df = docount(df, df, 'ALL', ['register_type'])
        df = docount(df, df, 'ALL', ['device_type'])

        del df['user_id'],

        ccc = [
            'device_type', 'actrate', 'All$register_type_by_act$user_id#_mean',
            'act_page=1$user_id#', 'action_type=0$user_id#rate',
            'action_type=1$user_id#rate', 'register_type',
            'act$user_id_by_author_id_iq', 'act$user_id_by_video_id_iq',
            'videorate', 'act_page=1$user_id#rate', 'act$author_video_m',
            'action_type=2$user_id#rate', 'act_page=3$user_id#rate',
            'act_page=0$user_id#', 'action_type=0$user_id#',
            'act_page=2$user_id#', 'act_page=2$user_id#rate',
            'action_type=1$user_id#', 'act$user_id#',
            'act_page=4$user_id#rate', 'act_page=0$user_id#rate', 'pageall',
            'act_page=4$user_id#', 'action_type=3$user_id#rate',
            'act_page=23$user_id#', 'act_page=3$user_id#', 'video$user_id#',
            'action_type=2$user_id#', 'action_type=3$user_id#',
            'act_page=023$user_id#', 'act$author_id#',
            'action_type=01$user_id#', 'action_type=5$user_id#rate',
            'ALL$register_type#', 'action_type=5$user_id#', 'act$user_id#10',
            'action_type=4$user_id#', 'actionall',
            'action_type=4$user_id#rate', 'act0', 'video0'
        ]
        ccc1 = []

        ddd = [
            'All$register_type_by_act$user_id#_mean',
            'act_page=1$user_id#',
            'action_type=1$user_id#rate',
            'act$user_id_by_author_id_iq',
            'act$user_id_by_video_id_iq',
            'act$author_video_m',
            'act_page=2$user_id#',
            'act_page=2$user_id#rate',
            'action_type=1$user_id#',
            'act$user_id#',
            'act_page=4$user_id#rate',
            'act_page=4$user_id#',
            'action_type=3$user_id#rate',
            'act_page=23$user_id#',
            'act_page=3$user_id#',
            'video$user_id#',
            'action_type=2$user_id#',
            'action_type=3$user_id#',
            'act$author_id#',
            'action_type=01$user_id#',
            'ALL$register_type#',
            'ALL$device_type#',
            'action_type=5$user_id#rate',
            'action_type=5$user_id#',
            'act$user_id#10',
            'action_type=4$user_id#',
            'actionall',
            'action_type=4$user_id#rate',
            'act0',
        ]

        used = [
            'device_type',
            'register_type',
            'actrate',
            'action_type=0$user_id#rate',
            'videorate',
            'act_page=1$user_id#rate',
            'action_type=2$user_id#rate',
            'act_page=3$user_id#rate',
            'act_page=0$user_id#',
            'action_type=0$user_id#',
            'act_page=0$user_id#rate',
            'pageall',
            'act_page=023$user_id#',
            'video0',
            'All$register_type_by_act$user_id#_mean',
            'ALL$register_type#',
        ]

        df = df[used]

        df1 = df[lendf:]
        df = df[:lendf]
        return df, df1

    path = '../data1/30/'
    if os.path.exists(path + 'train_df.csv'):
        train_df = pd.read_csv(path + 'train_df.csv')
        train_y = pd.read_csv(path + 'train_y.csv')

    else:
        train_df = pd.DataFrame()
        train_y = pd.DataFrame()
        for i in range(1, 24):
            df = register[register.register_day == i]
            y = is_active(df, i + 1, i + 7, app, video, act)
            df = get_features0(df, i)
            train_df = train_df.append(df)
            train_y = train_y.append(y)
            if i == 22:
                valst = len(train_df)
                print(valst)

        train_df.to_csv(path + 'train_df.csv', index=False)
        train_y.to_csv(path + 'train_y.csv', index=False)

    train_y = train_y['Y']
    if val:
        #35134
        valst = 35134
        test_df = train_df[valst:]
        val_y = train_y[valst:]
        train_df = train_df[:valst]
        train_y = train_y[:valst]
    else:
        if os.path.exists(path + 'test_df.csv'):
            test_df = pd.read_csv(path + 'test_df.csv')
        else:
            test_df = register[register.register_day == 30]
            test_df = get_features0(test_df, 30)
            test_df.to_csv(path + 'test_df.csv', index=False)

    #train_df['Y'] = train_y
    #act0train = train_df[train_df['act$user_id#']==0]
    #print(len(act0train),len(act0train[act0train['Y']==1]))
    #del train_df['Y']
    #act0ids = test_df[test_df['act$user_id#']==0]['user_id']

    ids = test_df['user_id']
    train_df, test_df = get_features_all(train_df, test_df)

    pre_train, test_y = predict_data(train_df,
                                     train_y,
                                     10,
                                     test_df,
                                     importance=1)

    if val == 1:
        print(len(train_y), sum(train_y))
        showresults(train_y, pre_train)
        showresults(val_y, test_y)
        showfalse(ids, test_df, val_y, test_y)
        showtop(val_y, test_y, nums=1457)
        showtop(train_y, pre_train, nums=23260)
        #showtop(train_y,pre_train,nums=15485)
        #showprecision(val_y,test_y)
        #showprecision(train_y,pre_train)
        return ids, test_y, getbest(ids, test_y, th=0.4)
    else:
        showresults(train_y, pre_train)
        showtop(train_y, pre_train, nums=24717)
        #showtop(train_y,pre_train,nums=16943)
        #showprecision(train_y,pre_train)
        return ids, test_y, getbest(ids, test_y, rank=1490)
Beispiel #8
0
def predict_29(val,register,app,video,act):
    def get_features(df,d1,d2):
        tapp = app[(app.day>=d1) & (app.day<=d2)]
        tact = act[(act.day>=d1) & (act.day<=d2)]
        tvideo = video[(video.day>=d1) & (video.day<=d2)]
        tapp.day = tapp.day - d1
        tact.day = tact.day - d1
        tvideo.day = tvideo.day - d1
        lastday = d2-d1      
        #df['register_time'] = d2-df.register_day+1
        
        df = docount(df,tapp,'app',['user_id']);gc.collect() 
        df = docount(df,tapp[tapp.day==lastday],'last_day_app',['user_id']);gc.collect()
        #df['app_mean#'] = df['app$user_id#']/2
        df = docount(df,tvideo,'video',['user_id']);gc.collect()
        df['videorate'] = df['video$user_id#']/(tvideo.shape[0]+0.000001)
        #df['video_mean#'] = df['video$user_id#']/2
        df = docount(df,tact,'act',['user_id']);gc.collect()
        df = docount(df,tact[tact.day==lastday],'last_day_act',['user_id']);gc.collect()
        df = docount(df,tact[tact.day==lastday-1],'first_day_act',['user_id']);gc.collect()
        df['actrate'] = df['act$user_id#']/(tact.shape[0]+0.000001)
        df['last_day_actrate'] = df['last_day_act$user_id#']/(tact.shape[0]+0.000001)
        df['first_day_actrate'] = df['first_day_act$user_id#']/(tact.shape[0]+0.000001)
        df['actrate_gap'] = df['last_day_actrate'] - df['first_day_actrate']
        df['act_gap'] = df['last_day_act$user_id#'] - df['first_day_act$user_id#']
        #df['act_mean#'] = df['act$user_id#']/2
        #page_list = list(tact['page'].unique())
        def iszero(s):
            if s==0:
                return 0
            return 1
        df['act0'] = df['act$user_id#'].apply(iszero)
        df['video0'] = df['video$user_id#'].apply(iszero)    
        
        
        
        for c in [1]: 
            df = docount(df,tact[tact.day==lastday][tact['page']==c],'last_day_act_page='+str(c),['user_id']);gc.collect()
        
        for c in [0,1,2,3,4]: 
            df = docount(df,tact[tact['page']==c],'act_page='+str(c),['user_id']);gc.collect()
            df['act_page='+str(c)+'$user_id#rate'] = df['act_page='+str(c)+'$user_id#']/(df['act$user_id#']+0.00001)
        
        df['act_page=23$user_id#'] = df['act_page=2$user_id#'] + df['act_page=3$user_id#']
        df['act_page=023$user_id#'] = df['act_page=2$user_id#'] + df['act_page=3$user_id#']+df['act_page=0$user_id#']

        
        action_list = list(tact['action_type'].unique())
        for c in [0,1,2,3,4,5]: 
            df = docount(df,tact[tact['action_type']==c],'action_type='+str(c),['user_id']);gc.collect()
            df = docount(df,tact[tact.day==lastday][tact['action_type']==c],'last_day_action_type='+str(c),['user_id']);gc.collect()
            df['action_type='+str(c)+'$user_id#rate'] = df['action_type='+str(c)+'$user_id#']/(df['act$user_id#']+0.00001)


        df['author_id'] = df['user_id']
        
        df = doiq(df,tact[tact.day==lastday],'last_day_act',['user_id'],'video_id');gc.collect()
        df = doiq(df,tact[tact.day==lastday],'last_day_act',['user_id'],'author_id');gc.collect()
        df['last_day_act$author_video_m'] = df['last_day_act$user_id_by_video_id_iq']/df['last_day_act$user_id_by_author_id_iq']
        
        df = doiq(df,tact[tact.day==lastday-1],'first_day_act',['user_id'],'video_id');gc.collect()
        df = doiq(df,tact[tact.day==lastday-1],'first_day_act',['user_id'],'author_id');gc.collect()
        df['first_day_act$author_video_m'] = df['first_day_act$user_id_by_video_id_iq']/df['first_day_act$user_id_by_author_id_iq']

        
        df = doiq(df,tact[tact.day>=lastday-1],'last2_day_act',['user_id'],'video_id');gc.collect()
        df = doiq(df,tact[tact.day>=lastday-1],'last2_day_act',['user_id'],'author_id');gc.collect()
        df['last2_day_act$author_video_m'] = df['last2_day_act$user_id_by_video_id_iq']/df['last2_day_act$user_id_by_author_id_iq']

        
        
        del df['register_day'],df['author_id']
        return df

    def get_features_all(df,df1):
        lendf = len(df)
        df= df.append(df1)
        del df1
        gc.collect()
        df = docount(df,df,'ALL',['register_type']) 

        del df['user_id']
        
        ccc = ['device_type', 'register_type', 'action_type=0$user_id#rate', 'act_page=1$user_id#', 'first_day_act$user_id_by_author_id_iq', 'action_type=2$user_id#rate', 'act_page=0$user_id#rate', 'last_day_act$author_video_m', 'action_type=1$user_id#rate', 'act_page=2$user_id#', 'actrate', 'last_day_act$user_id_by_author_id_iq', 'app$user_id#', 'last_day_act_page=1$user_id#', 'act_page=3$user_id#rate', 'last_day_action_type=0$user_id#', 'first_day_act$user_id_by_video_id_iq', 'videorate', 'act_page=1$user_id#rate', 'last2_day_act$user_id_by_author_id_iq', 'last2_day_act$user_id_by_video_id_iq', 'first_day_actrate', 'act_page=2$user_id#rate', 'last_day_actrate', 'first_day_act$author_video_m', 'last2_day_act$author_video_m', 'ALL$register_type#', 'act_page=0$user_id#', 'actrate_gap', 'action_type=3$user_id#rate', 'last_day_act$user_id#', 'act$user_id#', 'last_day_act$user_id_by_video_id_iq', 'action_type=0$user_id#', 'action_type=1$user_id#', 'act_gap', 'action_type=2$user_id#', 'action_type=3$user_id#', 'first_day_act$user_id#', 'act_page=3$user_id#', 'act_page=4$user_id#rate', 'video$user_id#', 'last_day_action_type=1$user_id#', 'act_page=23$user_id#', 'act_page=023$user_id#', 'act_page=4$user_id#', 'last_day_action_type=2$user_id#', 'last_day_action_type=3$user_id#', 'action_type=5$user_id#rate', 'action_type=5$user_id#', 'last_day_app$user_id#', 'last_day_action_type=4$user_id#', 'action_type=4$user_id#', 'last_day_action_type=5$user_id#', 'act0', 'action_type=4$user_id#rate', 'video0']
        ccc1 = [ ]
        
        ddd = ['action_type=2$user_id#rate','action_type=1$user_id#rate','last_day_act$user_id_by_author_id_iq',
               'last_day_act_page=1$user_id#','act_page=3$user_id#rate','first_day_act$user_id_by_video_id_iq',
               'videorate','act_page=1$user_id#rate','last2_day_act$user_id_by_author_id_iq','last2_day_act$user_id_by_video_id_iq',
               'act_page=2$user_id#rate','last_day_actrate', 'first_day_act$author_video_m','last2_day_act$author_video_m',
               'ALL$register_type#','act_page=0$user_id#','actrate_gap','action_type=3$user_id#rate',
               'last_day_act$user_id#','act$user_id#','last_day_act$user_id_by_video_id_iq', 'action_type=0$user_id#', 
               'action_type=1$user_id#','act_gap', 'action_type=2$user_id#','action_type=3$user_id#',
               'first_day_act$user_id#', 'act_page=3$user_id#','act_page=4$user_id#rate', 'video$user_id#', 
               'last_day_action_type=1$user_id#','act_page=23$user_id#', 'act_page=023$user_id#','act_page=4$user_id#', 
               'last_day_action_type=2$user_id#','last_day_action_type=3$user_id#', 'action_type=5$user_id#rate',
               'action_type=5$user_id#', 'last_day_app$user_id#','last_day_action_type=4$user_id#',
               'action_type=4$user_id#','last_day_action_type=5$user_id#', 'act0', 'action_type=4$user_id#rate', 'video0']
        
        used = ['device_type', 'register_type', 'action_type=0$user_id#rate', 'act_page=1$user_id#',
                'first_day_act$user_id_by_author_id_iq', 'act_page=0$user_id#rate','last_day_act$author_video_m',
                'act_page=2$user_id#','actrate','app$user_id#', 'last_day_action_type=0$user_id#',
                'first_day_actrate', 'action_type=5$user_id#rate', ]
        
        df = df[used]
        
        
         
        df1 = df[lendf:]
        df = df[:lendf]
        return df,df1
    
    path = '../data1/29/'
    
    if val:
        if os.path.exists(path+'val_df.csv'):
            test_df = pd.read_csv(path+'val_df.csv')
            val_y = pd.read_csv(path+'val_y.csv')
        else:
            test_df = register[(register.register_day==22)]
            test_df = get_features(test_df,22,23)
            val_y = is_active(test_df,24,30,app,video,act)
            test_df.to_csv(path+'val_df.csv',index=False)
            val_y.to_csv(path+'val_y.csv',index=False)
        val_y = val_y['Y']
        if os.path.exists(path+'val_train_df.csv'):
            train_df = pd.read_csv(path+'val_train_df.csv')
            train_y = pd.read_csv(path+'val_train_y.csv')
        else:    
            train_df = pd.DataFrame()   
            train_y = pd.DataFrame()                  
            for i in range(1,22):
                df = register[(register.register_day==i)]
                y = is_active(df,i+2,i+8,app,video,act)
                df = get_features(df,i,i+1)
                train_df = train_df.append(df)
                train_y = train_y.append(y)
            train_df.to_csv(path+'val_train_df.csv',index=False)
            train_y.to_csv(path+'val_train_y.csv',index=False)
    else:
        if os.path.exists(path+'test_df.csv'):
            test_df = pd.read_csv(path+'test_df.csv')
        else:
            test_df = register[(register.register_day==29)]
            test_df = get_features(test_df,29,30)
            test_df.to_csv(path+'test_df.csv',index=False)
                               
        if os.path.exists(path+'train_df.csv'):
            train_df = pd.read_csv(path+'train_df.csv')
            train_y = pd.read_csv(path+'train_y.csv')
        else:            
            if os.path.exists(path+'val_train_df.csv'):
                train_df = pd.read_csv(path+'val_train_df.csv')
                train_y = pd.read_csv(path+'val_train_y.csv')
                val_df = pd.read_csv(path+'val_df.csv')
                val_y = pd.read_csv(path+'val_y.csv')
                train_df = train_df.append(val_df)
                train_y = train_y.append(val_y)
            else:
                train_df = pd.DataFrame()   
                train_y = pd.DataFrame()                  
                for i in range(1,23):
                    df = register[(register.register_day==i)]
                    y = is_active(df,i+2,i+8,app,video,act)
                    df = get_features(df,i,i+1)
                    train_df = train_df.append(df)
                    train_y = train_y.append(y)  
            train_df.to_csv(path+'train_df.csv',index=False)
            train_y.to_csv(path+'train_y.csv',index=False)                 
    train_y = train_y['Y']

    ids = test_df['user_id']
    train_df,test_df = get_features_all(train_df,test_df)
    
    pre_train,test_y = predict_data(train_df,train_y,10,test_df,importance=1)
    
    if val==1:   
        print (len(train_y),sum(train_y))
        showresults(train_y,pre_train)
        showresults(val_y,test_y) 
        showtop(val_y,test_y,nums=1337)
        showtop(train_y,pre_train,nums=19589)
        return ids,test_y,getbest(ids,test_y,th=0.4)
    else:
        showresults(train_y,pre_train)     
        showtop(train_y,pre_train,nums=20926)
        return ids,test_y,getbest(ids,test_y,rank=1294)
Beispiel #9
0
def predict_1_23(val, register, app, video, act):
    path = '../data1/1_23/'

    def get_features(df, d1, d2):
        tapp = app[(app.day >= d1) & (app.day <= d2)]
        tact = act[(act.day >= d1) & (act.day <= d2)]
        tvideo = video[(video.day >= d1) & (video.day <= d2)]
        tapp.day = tapp.day - d1
        tact.day = tact.day - d1
        tvideo.day = tvideo.day - d1
        lastday = d2 - d1
        #app
        df = docount(df, tapp, 'app', ['user_id'])
        #df = domin(df,tapp,'app',['user_id'],'day')
        df = domax(df, tapp, 'app', ['user_id'], 'day')

        df['last_app_day'] = lastday - df['app$user_id_by_day_max'] + 1
        #df['app_day_gap'] = df['app$user_id_by_day_max']- df['app$user_id_by_day_min']+1
        df['app_day_missing'] = df['register_time'] - df['app$user_id#']
        df['app_mean#'] = df['app$user_id#'] / df['register_time']
        del df['app$user_id#'], df['app$user_id_by_day_max']

        df = dovar(df, tapp, 'app', ['user_id'], 'day')
        #df = domean(df,tapp[tapp.day>lastday-8],'app_last_8',['user_id'],'day')
        #df = dovar(df,tapp[tapp.day>lastday-8],'app_last_8',['user_id'],'day')

        for i in range(8):
            df = docount(df, tapp[tapp.day >= lastday - i],
                         'app_last_' + str(i), ['user_id'])
            if i >= 3:
                df = domean(df, tapp[tapp.day >= lastday - i],
                            'app_last_' + str(i), ['user_id'], 'day')
                df = dovar(df, tapp[tapp.day >= lastday - i],
                           'app_last_' + str(i), ['user_id'], 'day')
        #df = docount(df,tapp[tapp.day>lastday-7],'app_last_7',['user_id'])
        #df = docount(df,tapp[tapp.day>lastday-3],'app_last_3',['user_id'])
        #df = docount(df,tapp[tapp.day==lastday],'app_last_1',['user_id'])

        gc.collect()
        #video
        df = docount(df, tvideo, 'video', ['user_id'])
        df = domin(df, tvideo, 'video', ['user_id'], 'day')
        df = domax(df, tvideo, 'video', ['user_id'], 'day')
        df = doiq(df, tvideo, 'video', ['user_id'], 'day')
        df['last_video_day'] = lastday - df['video$user_id_by_day_max'] + 1
        df['first_video_day'] = lastday - df['video$user_id_by_day_min'] + 1
        df['video_day_gap'] = df['video$user_id_by_day_max'] - df[
            'video$user_id_by_day_min'] + 1
        #df['video_day_missing'] = df['register_time'] - df['video$user_id_by_day_iq']
        df['video_mean#'] = df['video$user_id#'] / df['register_time']
        del df['video$user_id#'], df['video$user_id_by_day_max'], df[
            'video$user_id_by_day_min']

        df = dovar(df, tvideo, 'video', ['user_id'], 'day')
        df = domean(df, tvideo[tvideo.day > lastday - 8], 'video_last_8',
                    ['user_id'], 'day')
        df = dovar(df, tvideo[tvideo.day > lastday - 8], 'video_last_8',
                   ['user_id'], 'day')

        df = docount(df, tvideo[tvideo.day > lastday - 8], 'video_last_8',
                     ['user_id'])
        #df = docount(df,tvideo[tvideo.day>lastday-3],'video_last_3',['user_id'])
        #df = docount(df,tvideo[tvideo.day==lastday],'video_last_1',['user_id'])
        gc.collect()
        #act
        gp = tact.groupby(['user_id', 'day']).size().unstack()
        df = pd.merge(df,
                      gp.max(1).rename('actcount_max').reset_index(),
                      on=['user_id'],
                      how='left')
        df = pd.merge(df,
                      gp.mean(1).rename('actcount_mean').reset_index(),
                      on=['user_id'],
                      how='left')
        df = pd.merge(df,
                      gp.var(1).rename('actcount_var').reset_index(),
                      on=['user_id'],
                      how='left')

        df = docount(df, tact, 'act', ['user_id'])
        df = domin(df, tact, 'act', ['user_id'], 'day')
        df = domax(df, tact, 'act', ['user_id'], 'day')
        df = doiq(df, tact, 'act', ['user_id'], 'day')
        #df['last_act_day'] = lastday - df['act$user_id_by_day_max']+1
        df['act_day_gap'] = df['act$user_id_by_day_max'] - df[
            'act$user_id_by_day_min'] + 1
        df['act_day_missing'] = df['register_time'] - df[
            'act$user_id_by_day_iq']
        df['act_mean#'] = df['act$user_id#'] / df['register_time']
        del df['act$user_id#']

        df = dovar(df, tact, 'act', ['user_id'], 'day')
        #df = domean(df,tact[tact.day>lastday-8],'act_last_8',['user_id'],'day')
        #df = dovar(df,tact[tact.day>lastday-8],'act_last_8',['user_id'],'day')

        for i in range(8):
            df = docount(df, tact[tact.day >= lastday - i],
                         'act_last_' + str(i), ['user_id'])
            if i >= 3:
                df = domean(df, tact[tact.day >= lastday - i],
                            'act_last_' + str(i), ['user_id'], 'day')
                df = dovar(df, tact[tact.day >= lastday - i],
                           'act_last_' + str(i), ['user_id'], 'day')

                gp = tact[tact.day >= lastday - i].groupby(
                    ['user_id', 'day']).size().unstack()
                df = pd.merge(df,
                              gp.max(1).rename('act_last_' + str(i) +
                                               '_actcount_max').reset_index(),
                              on=['user_id'],
                              how='left')
                df = pd.merge(
                    df,
                    gp.mean(1).rename('act_last_' + str(i) +
                                      '_actcount_mean').reset_index(),
                    on=['user_id'],
                    how='left')
                df = pd.merge(df,
                              gp.var(1).rename('act_last_' + str(i) +
                                               '_actcount_var').reset_index(),
                              on=['user_id'],
                              how='left')
        #df = docount(df,tact[tact.day>lastday-7],'act_last_7',['user_id'])
        #df = docount(df,tact[tact.day>lastday-3],'act_last_3',['user_id'])
        #df = docount(df,tact[tact.day==lastday],'act_last_1',['user_id'])
        gc.collect()

        page_list = list(tact['page'].unique())
        for c in page_list:
            df = docount(df, tact[tact['page'] == c], 'act_page=' + str(c),
                         ['user_id'])
            df['act_page=' + str(c) +
               '$user_id#'] = df['act_page=' + str(c) +
                                 '$user_id#'] / df['register_time']

        for c in page_list:
            df = docount(df,
                         tact[(tact['page'] == c) & (tact.day > lastday - 8)],
                         'act_last_8_page=' + str(c), ['user_id'])
        for c in page_list:
            df = docount(df,
                         tact[(tact['page'] == c) & (tact.day > lastday - 3)],
                         'act_last_3_page=' + str(c), ['user_id'])

        df['author_id'] = df['user_id']
        df = docount(df, tact, 'act', ['author_id'])
        df['act$author_id#'] = df['act$author_id#'] / df['register_time']

        df = doiq(df, tact, 'act', ['user_id'], 'author_id')
        df['act$user_id_by_author_id_iq'] = df[
            'act$user_id_by_author_id_iq'] / df['register_time']

        df = doiq(df, tact, 'act', ['user_id'], 'video_id')
        df['act$user_id_by_video_id_iq'] = df[
            'act$user_id_by_video_id_iq'] / df['register_time']

        for i in range(8):
            df = doiq(df, tact[tact.day >= lastday - i], 'act_last_' + str(i),
                      ['user_id'], 'author_id')
            df = doiq(df, tact[tact.day >= lastday - i], 'act_last_' + str(i),
                      ['user_id'], 'video_id')

        #action_list = list(tact['action_type'].unique())
        for c in [0, 1, 2, 3, 5]:
            df = docount(df, tact[tact['action_type'] == c],
                         'action_type=' + str(c), ['user_id'])
            gc.collect()
            df['action_type=' + str(c) +
               '$user_id#'] = df['action_type=' + str(c) +
                                 '$user_id#'] / df['register_time']
        for c in [0, 1, 2, 3]:
            df = docount(
                df,
                tact[(tact['action_type'] == c) & (tact.day > lastday - 8)],
                'act_last_8_action_type=' + str(c), ['user_id'])
        for c in [0, 1, 2, 3]:
            df = docount(
                df,
                tact[(tact['action_type'] == c) & (tact.day > lastday - 3)],
                'act_last_3_action_type=' + str(c), ['user_id'])
        ''' 
        def getmaxcontinuedays(s):
            s = np.array(s)
            ans = 0
            t = 0
            for i in s:
                if i>0:
                    t =  t+ 1
                else:
                    if t>ans:
                        ans = t
                    t = 0
            if t>ans:
                ans=t
            return ans
  
        gp = tapp.groupby(['user_id','day']).size().unstack()
        gp = gp.fillna(0)
        
        #print (gp)
        gp['app_max_continue_days'] = gp.apply(getmaxcontinuedays,axis=1)
        #print (gp)
        df = pd.merge(df,gp.reset_index()[['user_id','app_max_continue_days']],on=['user_id'],how='left') 
         
        gp = tact.groupby(['user_id','day']).size().unstack()
        gp = gp.fillna(0)
        
        #print (gp)
        gp['act_max_continue_days'] = gp.apply(getmaxcontinuedays,axis=1)
        #print (gp)
        df = pd.merge(df,gp.reset_index()[['user_id','act_max_continue_days']],on=['user_id'],how='left') 
        '''

        del df['author_id']
        gc.collect()

        return df

    def get_features_all(df, df1):
        lendf = len(df)
        df = df.append(df1)
        del df1
        gc.collect()

        #ccc = ['app_mean#', 'last_app_day', 'app$user_id_by_day_var', 'act$user_id_by_day_var', 'device_type', 'act$user_id_by_video_id_iq', 'app_last_4$user_id_by_day_var', 'act_last_0$user_id_by_author_id_iq', 'app_last_4$user_id#', 'register_type', 'act$user_id_by_day_max', 'actcount_var', 'act_last_0$user_id#', 'act_mean#', 'actcount_max', 'act_last_7$user_id_by_day_var', 'app_last_7$user_id_by_day_var', 'app_last_1$user_id#', 'action_type=2$user_id#', 'act_page=1$user_id#', 'action_type=0$user_id#', 'act_last_1$user_id#', 'app_last_5$user_id#', 'act$user_id_by_day_min', 'act_page=3$user_id#', 'act$user_id_by_day_iq', 'actcount_mean', 'act_last_0$user_id_by_video_id_iq', 'act_last_2$user_id_by_author_id_iq', 'app_last_7$user_id_by_day_mean', 'act_last_8_action_type=2$user_id#', 'act_last_8_page=1$user_id#', 'act_last_4$user_id_by_day_mean', 'act$user_id_by_author_id_iq', 'app_last_5$user_id_by_day_mean', 'act_day_gap', 'app_day_missing', 'act_last_7_actcount_var', 'action_type=3$user_id#', 'act_last_4_actcount_var', 'act_last_1$user_id_by_author_id_iq', 'app_last_3$user_id_by_day_var', 'act_last_3_actcount_var', 'act_last_1$user_id_by_video_id_iq', 'act_last_3_page=1$user_id#', 'act_page=2$user_id#', 'act_page=0$user_id#', 'act_last_3$user_id_by_video_id_iq', 'act_last_6_actcount_max', 'app_last_2$user_id#', 'act_last_2$user_id#', 'app_last_6$user_id_by_day_mean', 'act_last_6_actcount_var', 'act_last_3_action_type=2$user_id#', 'act_last_6$user_id_by_video_id_iq', 'act_last_7$user_id_by_video_id_iq', 'act_last_5_actcount_var', 'act_last_3$user_id#', 'act_last_7$user_id_by_author_id_iq', 'act_last_2$user_id_by_video_id_iq', 'act_last_8_page=3$user_id#', 'act_page=4$user_id#', 'act_last_7_actcount_max', 'act_last_5$user_id_by_day_var', 'act_last_7$user_id_by_day_mean', 'act_last_8_action_type=0$user_id#', 'act_last_3_actcount_max', 'app_last_5$user_id_by_day_var', 'app_last_0$user_id#', 'app_last_6$user_id_by_day_var', 'act_day_missing', 'action_type=1$user_id#', 'act_last_6_actcount_mean', 'act_last_6$user_id_by_day_mean', 'act_last_3$user_id_by_author_id_iq', 'act_last_8_page=0$user_id#', 'act_last_3_actcount_mean', 'act_last_6$user_id_by_author_id_iq', 'video_last_8$user_id_by_day_var', 'act_last_5$user_id_by_day_mean', 'act_last_3_page=0$user_id#', 'register_time', 'act_last_3$user_id_by_day_var', 'last_video_day', 'act_last_6$user_id_by_day_var', 'act_last_4$user_id#', 'act_last_5$user_id_by_author_id_iq', 'act_last_4$user_id_by_author_id_iq', 'first_video_day', 'video_mean#', 'act_last_8_action_type=3$user_id#', 'act_last_3_action_type=0$user_id#', 'act_last_3_page=3$user_id#', 'app_last_4$user_id_by_day_mean', 'app_last_3$user_id#', 'act_last_8_page=4$user_id#', 'act_last_6$user_id#', 'act_last_3$user_id_by_day_mean', 'act_last_7$user_id#', 'act_last_5$user_id_by_video_id_iq', 'video_last_8$user_id_by_day_mean', 'act_last_4$user_id_by_day_var', 'act_last_7_actcount_mean', 'app_last_7$user_id#', 'video$user_id_by_day_var', 'act_last_5_actcount_max', 'act_last_3_page=4$user_id#', 'act_last_8_page=2$user_id#', 'act_last_5$user_id#', 'act_last_4_actcount_max', 'video$user_id_by_day_iq', 'act_last_4$user_id_by_video_id_iq', 'act_last_5_actcount_mean', 'act$author_id#', 'app_last_6$user_id#', 'act_last_4_actcount_mean', 'act_last_8_action_type=1$user_id#', 'video_day_gap', 'act_last_3_action_type=1$user_id#', 'act_last_3_page=2$user_id#', 'app_last_3$user_id_by_day_mean', 'action_type=5$user_id#', 'video_last_8$user_id#', 'act_last_3_action_type=3$user_id#']
        #for i in range(100,124):
        #    del df[ccc[i]]

        del df['user_id']

        df1 = df[lendf:]
        df = df[:lendf]
        return df, df1

    df1 = register[register.register_day < 10]
    df1['register_time'] = 17 - register.register_day
    df2 = register[register.register_day < 17]
    df2['register_time'] = 24 - register.register_day

    test_df = register[register.register_day < 24]
    test_df['register_time'] = 31 - test_df.register_day

    del df1['register_day'], df2['register_day'], test_df['register_day']

    if os.path.exists(path + 'train_y1.csv'):
        train_y1 = pd.read_csv(path + 'train_y1.csv')

    else:
        train_y1 = is_active(df1, 17, 23, app, video, act)
        train_y1.to_csv(path + 'train_y1.csv', index=False)
    train_y1 = train_y1['Y']
    if os.path.exists(path + 'train_y2.csv'):
        train_y2 = pd.read_csv(path + 'train_y2.csv')

    else:
        train_y2 = is_active(df2, 24, 30, app, video, act)
        train_y2.to_csv(path + 'train_y2.csv', index=False)
    train_y2 = train_y2['Y']

    if os.path.exists(path + 'df1.csv'):
        df1 = pd.read_csv(path + 'df1.csv')
    else:
        df1 = get_features(df1, 1, 16)
        df1.to_csv(path + 'df1.csv', index=False)

    if os.path.exists(path + 'df2.csv'):
        df2 = pd.read_csv(path + 'df2.csv')
    else:
        df2 = get_features(df2, 1, 23)
        df2.to_csv(path + 'df2.csv', index=False)

    if val:
        train_df = df1
        test_df = df2
        train_y = train_y1
        val_y = train_y2
    else:
        if os.path.exists(path + 'test_df.csv'):
            test_df = pd.read_csv(path + 'test_df.csv')
        else:
            test_df = get_features(test_df, 1, 30)
            test_df.to_csv(path + 'test_df.csv', index=False)

        train_df = df1.append(df2)
        train_y = train_y1.append(train_y2)
        #train_df = df2
        #train_y = train_y2

    del df1, df2
    gc.collect()
    ids = test_df['user_id']
    train_df, test_df = get_features_all(train_df, test_df)
    '''
    train_df['Y'] = train_y
    print (len(train_df))
    train_js = train_df[train_df['act_mean#']==0]  
    train_df = train_df[train_df['act_mean#']>0]  
    print (len(train_df))
    train_y = train_df['Y']
    del train_df['Y']
    train_y_js = train_js['Y']
    del train_js['Y']
    
    test_df['Y'] = val_y
    test_js =  test_df[test_df['act_mean#']==0] 
    test_df =  test_df[test_df['act_mean#']>0] 
    val_y = test_df['Y']
    del test_df['Y']
    js_y = test_js['Y']
    del test_js['Y']
    '''
    pre_train, test_y = predict_data(train_df,
                                     train_y,
                                     10,
                                     test_df,
                                     importance=1)
    #pre_train_js,test_y_js = predict_data(train_js,train_y_js,10,test_js,importance=1)
    '''
    test_df['Y'] = val_y
    test_df['Y1'] = test_y
    test_js =  test_df[test_df['act_mean#']==0] 
    print(len(test_js))
    print(len(test_js[test_js['Y1']>=0.4]))
    print(len(test_js[(test_js['Y1']>=0.4) & (test_js['Y']==1)]))
    test_df[(test_df['act_mean#']==0) & (test_df['Y1']>=0.4)]['Y1'] = 0
    print (len(test_df[(test_df['act_mean#']==0) & (test_df['Y1']>=0.4)]))
    test_y[(test_df['act_mean#']==0) & (test_df['Y1']>=0.4)] = 0
    '''

    if val == 1:
        showresults(val_y, test_y)
        showtop(val_y, test_y, nums=10705)
        return ids, test_y, getbest(ids, test_y, rank=10705)
    else:
        showresults(train_y, pre_train)
        showtop(train_y, pre_train, nums=16449)
        return ids, test_y, getbest(ids, test_y, th=0.4)
Beispiel #10
0
    #cols = cols+cols1
    #df.drop(columns=cols,inplace=True)
    df1 = df[lendf:]
    df = df[:lendf]
    return df, df1


if val:
    if os.path.exists(path + 'val_df.csv'):
        test_df = pd.read_csv(path + 'val_df.csv')
        val_y = pd.read_csv(path + 'val_y.csv')
    else:
        test_df = register[register.register_day <= 21]
        test_df = get_features(test_df, 23)
        val_y = is_active(test_df, 24, 30, app, video, act)
        test_df.to_csv(path + 'val_df.csv', index=False)
        val_y.to_csv(path + 'val_y.csv', index=False)
    val_y = val_y['Y']
    if os.path.exists(path + 'val_train_df.csv'):
        train_df = pd.read_csv(path + 'val_train_df.csv')
        train_y = pd.read_csv(path + 'val_train_y.csv')
    else:
        train_df = pd.DataFrame()
        train_y = pd.DataFrame()
        for i in [16]:
            df = register[register.register_day <= i - 2]
            y = is_active(df, i + 1, i + 7, app, video, act)
            df = get_features(df, i)
            train_df = train_df.append(df)
            train_y = train_y.append(y)
Beispiel #11
0
ids30, test_y30, ans30 = predict_30(val, register, app, video, act)

if v4:
    ids1_23, test_y1_23, ans1_23 = predict_1_23(val, register, app, video, act)

    ids24_28, test_y24_28, ans24_28 = predict_24_28(val, register, app, video,
                                                    act)
    ans = ans1_23 + ans24_28 + ans29 + ans30
else:
    ids1_28, test_y1_28, ans1_28 = predict_1_28(val, register, app, video, act)
    ans = ans1_28 + ans29 + ans30

if val:

    val_df = register[register.register_day < 24].reset_index(drop=True)
    val_y = is_active(val_df, 24, 30, app, video, act)

    val_df['Y'] = val_y['Y']

    trueans = list(val_df[val_df['Y'] == 1]['user_id'])
    val_y = val_df['Y']
    if v4:
        res1 = pd.DataFrame({'user_id': ids1_23, 'Y1': test_y1_23})
        res2 = pd.DataFrame({'user_id': ids24_28, 'Y1': test_y24_28})
        res3 = pd.DataFrame({'user_id': ids29, 'Y1': test_y29})
        res4 = pd.DataFrame({'user_id': ids30, 'Y1': test_y30})
        res = res1.append(res2).append(res3).append(res4)
    else:
        res1 = pd.DataFrame({'user_id': ids1_28, 'Y1': test_y1_28})
        res3 = pd.DataFrame({'user_id': ids29, 'Y1': test_y29})
        res4 = pd.DataFrame({'user_id': ids30, 'Y1': test_y30})
Beispiel #12
0
def predict_1_28(val,register,app,video,act):
    path = '../data1/1_28/'
        
    def get_features_all(df,df1):
        lendf = len(df)
        df= df.append(df1)
        del df1
        gc.collect()
        
        df = docount(df,df,'ALL',['register_type'])
        df = docount(df,df,'ALL',['device_type'])
        
        
        del df['user_id']

        df1 = df[lendf:]
        df = df[:lendf]
        return df,df1
        
    df1 = register[register.register_day<15]
    df1['register_time'] = 17-register.register_day
    df2 = register[register.register_day<22]
    df2['register_time'] = 24-register.register_day
    df2[df2['register_time']>16]['register_time'] = 16


    
    test_df = register[register.register_day<29]
    test_df['register_time'] = 31-test_df.register_day
    df2[df2['register_time']>16]['register_time'] = 16

    
    del df1['register_day'],df2['register_day'],test_df['register_day']
    
    if os.path.exists(path+'train_y1.csv'):
        train_y1=pd.read_csv(path+'train_y1.csv')
        
    else:
        train_y1 = is_active(df1,17,23,app,video,act)
        train_y1.to_csv(path+'train_y1.csv',index=False)
    train_y1 = train_y1['Y']
    if os.path.exists(path+'train_y2.csv'):
        train_y2=pd.read_csv(path+'train_y2.csv')
        
    else:
        train_y2 = is_active(df2,24,30,app,video,act)
        train_y2.to_csv(path+'train_y2.csv',index=False)
    train_y2 = train_y2['Y']        
        
    if os.path.exists(path+'df1.csv'):
        df1=pd.read_csv(path+'df1.csv')
    else:
        df1 = get_features_ks(df1,1,16,app,video,act)
        df1.to_csv(path+'df1.csv',index=False)
    
    if os.path.exists(path+'df2.csv'):
        df2=pd.read_csv(path+'df2.csv')
    else:
        df2 = get_features_ks(df2,8,23,app,video,act)
        df2.to_csv(path+'df2.csv',index=False)
        
    if val:
        train_df = df1
        test_df = df2
        train_y = train_y1
        val_y = train_y2
    else:
        if os.path.exists(path+'test_df.csv'):
            test_df=pd.read_csv(path+'test_df.csv')
        else:
            test_df = get_features_ks(test_df,15,30,app,video,act)
            test_df.to_csv(path+'test_df.csv',index=False)
        
        train_df = df1.append(df2)
        train_y = train_y1.append(train_y2)
        #train_df = df2
        #train_y = train_y2
    
    del df1,df2
    gc.collect()
    ids = test_df['user_id']
    train_df,test_df = get_features_all(train_df,test_df)    
    '''
    train_df['Y'] = train_y
    print (len(train_df))
    train_js = train_df[train_df['act_mean#']==0]  
    train_df = train_df[train_df['act_mean#']>0]  
    print (len(train_df))
    train_y = train_df['Y']
    del train_df['Y']
    train_y_js = train_js['Y']
    del train_js['Y']
    
    test_df['Y'] = val_y
    test_js =  test_df[test_df['act_mean#']==0] 
    test_df =  test_df[test_df['act_mean#']>0] 
    val_y = test_df['Y']
    del test_df['Y']
    js_y = test_js['Y']
    del test_js['Y']
    '''
    pre_train,test_y = predict_data(train_df,train_y,10,test_df,importance=1)
    #pre_train_js,test_y_js = predict_data(train_js,train_y_js,10,test_js,importance=1)
    '''
    test_df['Y'] = val_y
    test_df['Y1'] = test_y
    test_js =  test_df[test_df['act_mean#']==0] 
    print(len(test_js))
    print(len(test_js[test_js['Y1']>=0.4]))
    print(len(test_js[(test_js['Y1']>=0.4) & (test_js['Y']==1)]))
    test_df[(test_df['act_mean#']==0) & (test_df['Y1']>=0.4)]['Y1'] = 0
    print (len(test_df[(test_df['act_mean#']==0) & (test_df['Y1']>=0.4)]))
    test_y[(test_df['act_mean#']==0) & (test_df['Y1']>=0.4)] = 0
    '''
    
    if val==1:   
        showresults(val_y,test_y) 
        showtop(val_y,test_y,nums=15428)
        showtop(val_y,test_y,nums=15905)
        showfalse(ids,test_df,val_y,test_y)
        #showprecision(val_y,test_y)
        return ids,test_y,getbest(ids,test_y,th=0.4)   
    else:
        showresults(train_y,pre_train)     
        showtop(train_y,pre_train,nums=25713)
    
        #return ids,test_y,getbest(ids,test_y,th=0.4) 
        return ids,test_y,getbest(ids,test_y,rank=22088) 
Beispiel #13
0
    def serve(self):
        LOG.info("Monitor thread start")
        monitor_domains_with_project_id = self._update_instances()
        for (dom, project_id) in monitor_domains_with_project_id:
            if not self.RUN_TH:
                LOG.info("Break from monitor thread")
                break

            uuid = utils.get_domain_uuid(dom)
            if not uuid:
                LOG.warn("Get domain uuid failed")
                continue

            if not utils.is_active(dom):
                LOG.info("Domain is not active, uuid: %s" % uuid)
                continue

            info_file_dict = utils.get_info_file_dict(dom, project_id)
            if not info_file_dict:
                LOG.warn("Info file load failed, uuid: %s" % uuid)
                continue

            monitor_setting_root = utils.get_monitor_setting_root(dom)
            if not monitor_setting_root:
                LOG.warn("Monitor setting file load failed, uuid: %s" % uuid)
                continue

            get_system_usage = GetSystemUsage(dom, self.helper)
            temp_ok = get_system_usage.load_temp()
            last_partitions = get_system_usage.temp['disk_partition_info']

            all_usage_dict = get_system_usage.get_system_usage_datas()

            new_partitions = get_system_usage.temp['disk_partition_info']
            metrics = utils.get_monitor_metrics(info_file_dict,
                                                monitor_setting_root)
            metric_names = [m.attrib.get('name') for m in metrics]
            LOG.debug("Metric names of %s: %s" % (uuid, metric_names))
            identify_id = utils.get_identify_id(info_file_dict, uuid)

            # FIXME(wangpan): hardcode here the 'diskPartition' metric
            if ('diskPartition' in metric_names and
                    last_partitions != new_partitions):
                LOG.info("Notifing partitions change of %s, old: %s, new: %s" %
                            (uuid, last_partitions, new_partitions))
                notify_succ = sender.notify_platform_partition_change(
                                        new_partitions, info_file_dict,
                                        monitor_setting_root, identify_id)
                if not notify_succ:
                    LOG.warn("Notifing partitions change failed")
                    get_system_usage.temp['disk_partition_info'] = \
                                                            last_partitions

            get_system_usage.save_temp()

            if temp_ok:
                metric_datas = DataFormater().format_data(metrics,
                                                all_usage_dict,
                                                monitor_setting_root,
                                                info_file_dict, identify_id)
                send_request = sender.SendRequest(info_file_dict,
                                                  json.dumps(metric_datas))

                response = send_request.send_request_to_server()
                if response and response.status_code == 200:
                    LOG.debug("Send monitor data of %s successfully" % uuid)
                else:
                    LOG.error("Send monitor data of %s faild" % uuid)
            else:
                LOG.info("First start or temp file is expired, %s" % uuid)

        LOG.info("Monitor thread end")
Beispiel #14
0
    def serve(self):
        LOG.info("Monitor thread start")
        monitor_domains_with_project_id = self._update_instances()
        for (dom, project_id) in monitor_domains_with_project_id:
            if not self.RUN_TH:
                LOG.info("Break from monitor thread")
                break

            uuid = utils.get_domain_uuid(dom)
            if not uuid:
                LOG.warn("Get domain uuid failed")
                continue

            if not utils.is_active(dom):
                LOG.info("Domain is not active, uuid: %s" % uuid)
                continue

            info_file_dict = utils.get_info_file_dict(dom, project_id)
            if not info_file_dict:
                LOG.warn("Info file load failed, uuid: %s" % uuid)
                continue

            monitor_setting_root = utils.get_monitor_setting_root(dom)
            if not monitor_setting_root:
                LOG.warn("Monitor setting file load failed, uuid: %s" % uuid)
                continue

            get_system_usage = GetSystemUsage(dom, self.helper)
            temp_ok = get_system_usage.load_temp()
            last_partitions = get_system_usage.temp['disk_partition_info']

            all_usage_dict = get_system_usage.get_system_usage_datas()

            new_partitions = get_system_usage.temp['disk_partition_info']
            metrics = utils.get_monitor_metrics(info_file_dict,
                                                monitor_setting_root)
            metric_names = [m.attrib.get('name') for m in metrics]
            LOG.debug("Metric names of %s: %s" % (uuid, metric_names))
            identify_id = utils.get_identify_id(info_file_dict, uuid)

            # FIXME(wangpan): hardcode here the 'diskPartition' metric
            if ('diskPartition' in metric_names and
                    last_partitions != new_partitions):
                LOG.info("Notifing partitions change of %s, old: %s, new: %s" %
                            (uuid, last_partitions, new_partitions))
                notify_succ = sender.notify_platform_partition_change(
                                        new_partitions, info_file_dict,
                                        monitor_setting_root, identify_id)
                if not notify_succ:
                    LOG.warn("Notifing partitions change failed")
                    get_system_usage.temp['disk_partition_info'] = \
                                                            last_partitions

            get_system_usage.save_temp()

            if temp_ok:
                metric_datas = DataFormater().format_data(metrics,
                                                all_usage_dict,
                                                monitor_setting_root,
                                                info_file_dict, identify_id)
                send_request = sender.SendRequest(info_file_dict,
                                                  json.dumps(metric_datas))

                response = send_request.send_request_to_server()
                if response and response.status_code == 200:
                    LOG.debug("Send monitor data of %s successfully" % uuid)
                else:
                    LOG.error("Send monitor data of %s faild" % uuid)
            else:
                LOG.info("First start or temp file is expired, %s" % uuid)

        LOG.info("Monitor thread end")
Beispiel #15
0
def predict_24_28(val,register,app,video,act):
    
    def get_features(df,d1,d2):
        tapp = app[(app.day>=d1) & (app.day<=d2)]
        tact = act[(act.day>=d1) & (act.day<=d2)]
        tvideo = video[(video.day>=d1) & (video.day<=d2)]
        tapp.day = tapp.day - d1
        tact.day = tact.day - d1
        tvideo.day = tvideo.day - d1
        lastday = d2-d1

        df['register_time'] = d2-df.register_day+1
        del df['register_day']
        
        #app
        df = docount(df,tapp,'app',['user_id'])
        df['app_mean#'] = df['app$user_id#']/df['register_time']
        #df = domax(df,tapp,'app',['user_id'],'day')
        #df['last_app_day'] = lastday - df['app$user_id_by_day_max']+1
        del df['app$user_id#']
        #df['app_day_missing'] = df['register_time'] - df['app$user_id#']
        #df['app$user_id#'] = df['app$user_id#']/df['register_time']
        
        #df = dovar(df,tapp,'app',['user_id'],'day')
        #df = docount(df,tapp[tapp.day>lastday-2],'app_last_2',['user_id'])        
        #df = docount(df,tapp[tapp.day>lastday-1],'app_last_1',['user_id']) 
        #df = docount(df,tapp[tapp.day==lastday],'app_last_1',['user_id'])
        gc.collect()
        #video
        #df = docount(df,tvideo,'video',['user_id'])
        #df['video_mean#'] = df['video$user_id#']/df['register_time']
        #df = domax(df,tvideo,'video',['user_id'],'day')
        #df['last_video_day'] = lastday - df['video$user_id_by_day_max']+1
        #del df['video$user_id_by_day_max']
        #df = doiq(df,tvideo,'video',['user_id'],'day')
        #df['last_video_day'] = lastday - df['video$user_id_by_day_max']+1
        #df['video_day_missing'] = df['register_time'] - df['video$user_id_by_day_iq']
        #df['video$user_id#'] = df['video$user_id#']/df['register_time']
        
        #df = dovar(df,tvideo,'video',['user_id'],'day')     
        df = docount(df,tvideo[tvideo.day>lastday-2],'video_last_2',['user_id'])
        df = docount(df,tvideo[tvideo.day>lastday-3],'video_last_3',['user_id'])
        #df = docount(df,tvideo[tvideo.day==lastday],'video_last_1',['user_id'])
        gc.collect()
        #act
        #gp = act.groupby(['user_id','day']).size().unstack()
        #df = pd.merge(df,gp.max(1).rename('actcount_max').reset_index(),on=['user_id'],how='left')   
        #df = pd.merge(df,gp.mean(1).rename('actcount_mean').reset_index(),on=['user_id'],how='left')
        #df = pd.merge(df,gp.var(1).rename('actcount_var').reset_index(),on=['user_id'],how='left')        
        
        #df = docount(df,tact,'act',['user_id'])
        #df['act_mean#'] = df['act$user_id#']/df['register_time']
        df = domax(df,tact,'act',['user_id'],'day')
        df['last_act_day'] = lastday - df['act$user_id_by_day_max']+1
        del df['act$user_id_by_day_max']
        #df = doiq(df,tact,'act',['user_id'],'day')
        #df['last_act_day'] = lastday - df['act$user_id_by_day_max']+1
        #df['act_day_missing'] = df['register_time'] - df['act$user_id_by_day_iq']
        #df['act$user_id#'] = df['act$user_id#']/df['register_time']
        
        #gp = tact.groupby(['user_id','day']).size().unstack()
        #df = pd.merge(df,gp.max(1).rename('actcount_max').reset_index(),on=['user_id'],how='left')   
        #df = pd.merge(df,gp.mean(1).rename('actcount_mean').reset_index(),on=['user_id'],how='left')
        #df = pd.merge(df,gp.var(1).rename('actcount_var').reset_index(),on=['user_id'],how='left')

        #df = dovar(df,tact,'act',['user_id'],'day')      
        df = docount(df,tact[tact.day>lastday-2],'act_last_2',['user_id']) 
        df = docount(df,tact[tact.day>lastday-3],'act_last_3',['user_id'])
        #df = docount(df,tact[tact.day==lastday],'act_last_1',['user_id'])
        gc.collect()
        
        #page_list = list(tact['page'].unique())
                
        for c in [0,1,2,3]: 
            df = docount(df,tact[(tact['page']==c) & (tact.day>lastday-3)],'act_last_3_page='+str(c),['user_id']) 
            df = docount(df,tact[(tact['page']==c) & (tact.day>lastday-2)],'act_last_2_page='+str(c),['user_id'])
            df = docount(df,tact[(tact['page']==c) & (tact.day>lastday-1)],'act_last_1_page='+str(c),['user_id']) 
        
        df = doiq(df,tact[tact.day>lastday-3],'act_last_3',['user_id'],'author_id')  
        df = doiq(df,tact[tact.day>lastday-3],'act_last_3',['user_id'],'video_id')
        
        df = doiq(df,tact[tact.day>lastday-2],'act_last_2',['user_id'],'author_id')  
        df = doiq(df,tact[tact.day>lastday-2],'act_last_2',['user_id'],'video_id')
        
        df = doiq(df,tact[tact.day>lastday-1],'act_last_1',['user_id'],'author_id')  
        df = doiq(df,tact[tact.day>lastday-1],'act_last_1',['user_id'],'video_id')
        
        for c in [0,1,2,3]: 
            df = docount(df,tact[(tact['action_type']==c) & (tact.day>lastday-3)],'act_last_3_action_type='+str(c),['user_id'])
            df = docount(df,tact[(tact['action_type']==c) & (tact.day>lastday-2)],'act_last_2_action_type='+str(c),['user_id'])
            df = docount(df,tact[(tact['action_type']==c) & (tact.day>lastday-1)],'act_last_1_action_type='+str(c),['user_id'])

        
        gc.collect()
        
        
        return df
    
    
    path = '../data1/24_28/'
    if val:
        if os.path.exists(path+'val_df.csv'):
            test_df = pd.read_csv(path+'val_df.csv')
            val_y = pd.read_csv(path+'val_y.csv')
        else:
            test_df = register[(register.register_day>=17) & (register.register_day<=21)]
            test_df = get_features(test_df,17,23)
            val_y = is_active(test_df,24,30,app,video,act)
            test_df.to_csv(path+'val_df.csv',index=False)
            val_y.to_csv(path+'val_y.csv',index=False)
        val_y = val_y['Y']
        if os.path.exists(path+'val_train_df.csv'):
            train_df = pd.read_csv(path+'val_train_df.csv')
            train_y = pd.read_csv(path+'val_train_y.csv')
        else:    
            train_df = pd.DataFrame()   
            train_y = pd.DataFrame()                  
            for i in range(1,11):
                df = register[(register.register_day>=i) & (register.register_day<=i+4)]
                y = is_active(df,i+7,i+13,app,video,act)
                df = get_features(df,i,i+6)
                train_df = train_df.append(df)
                train_y = train_y.append(y)
            train_df.to_csv(path+'val_train_df.csv',index=False)
            train_y.to_csv(path+'val_train_y.csv',index=False)
    else:
        if os.path.exists(path+'test_df.csv'):
            test_df = pd.read_csv(path+'test_df.csv')
        else:
            test_df = register[(register.register_day>=24) & (register.register_day<=28)]
            test_df = get_features(test_df,24,30)
            test_df.to_csv(path+'test_df.csv',index=False)
                               
        if os.path.exists(path+'train_df.csv'):
            train_df = pd.read_csv(path+'train_df.csv')
            train_y = pd.read_csv(path+'train_y.csv')
        else:            
            if os.path.exists(path+'val_train_df.csv'):
                train_df = pd.read_csv(path+'val_train_df.csv')
                train_y = pd.read_csv(path+'val_train_y.csv') 
                for i in range(11,18):
                    df = register[(register.register_day>=i) & (register.register_day<=i+4)]
                    y = is_active(df,i+7,i+13,app,video,act)
                    df = get_features(df,i,i+6)
                    train_df = train_df.append(df)
                    train_y = train_y.append(y)  
            else:
                train_df = pd.DataFrame()   
                train_y = pd.DataFrame()                  
                for i in range(1,18):
                    df = register[(register.register_day>=i) & (register.register_day<=i+4)]
                    y = is_active(df,i+7,i+13,app,video,act)
                    df = get_features(df,i,i+6)
                    train_df = train_df.append(df)
                    train_y = train_y.append(y)  
            train_df.to_csv(path+'train_df.csv',index=False)
            train_y.to_csv(path+'train_y.csv',index=False)                 
    train_y = train_y['Y']
    #print(sum(train_y)/len(train_y))
        
    def get_features_all(df,df1):
        lendf = len(df)
        
        df= df.append(df1)
        del df1
        gc.collect()
        
        #for c in ['act_last_2$user_id#']:
        #    df = domean(df,df,'All',['device_type'],c);gc.collect()
        #    df = domean(df,df,'All',['register_type'],c);gc.collect()
            
        #del df
            
        #ccc = ['device_type', 'app_mean#', 'register_type', 'register_time', 'act_last_3_page=1$user_id#', 'last_act_day', 'act_last_3$user_id_by_video_id_iq', 'act_last_3_page=2$user_id#', 'act_last_3$user_id_by_author_id_iq', 'act_last_3_action_type=1$user_id#', 'act_last_1$user_id_by_author_id_iq', 'act_last_3_page=3$user_id#', 'act_last_3_page=0$user_id#', 'act_last_1$user_id_by_video_id_iq', 'act_last_2$user_id_by_author_id_iq', 'act_last_3$user_id#', 'act_last_2$user_id_by_video_id_iq', 'act_last_3_action_type=2$user_id#', 'act_last_3_action_type=0$user_id#', 'act_last_2_page=2$user_id#', 'act_last_2_page=1$user_id#', 'act_last_2_page=3$user_id#', 'act_last_1_page=1$user_id#', 'act_last_2$user_id#', 'act_last_2_page=0$user_id#', 'act_last_1_action_type=0$user_id#', 'act_last_2_action_type=1$user_id#', 'act_last_1_page=2$user_id#', 'act_last_3_action_type=3$user_id#', 'act_last_1_page=3$user_id#', 'act_last_2_action_type=0$user_id#', 'video_last_3$user_id#', 'act_last_1_page=0$user_id#', 'act_last_2_action_type=2$user_id#', 'act_last_2_action_type=3$user_id#', 'video_last_2$user_id#', 'act_last_1_action_type=1$user_id#', 'act_last_1_action_type=3$user_id#', 'act_last_1_action_type=2$user_id#']
        #for i in range(38,39):
        #    del df[ccc[i]]            
            
            
        
        del df['user_id']
        #del df['last_app_day'],df['last_video_day'],df['video_last_1$user_id#'],df['app_last_1$user_id#']
        #del df['act_last_1$user_id#'],df['app_last_2$user_id#']
        
 
        df1 = df[lendf:]
        df = df[:lendf]
        return df,df1
        
    
    ids = test_df['user_id']
    train_df,test_df = get_features_all(train_df,test_df)    
    
    pre_train,test_y = predict_data(train_df,train_y,10,test_df,importance=1)
    #print(test_y)
    if val==1:   
        print (len(train_y),sum(train_y))
        showresults(val_y,test_y) 
        showtop(val_y,test_y,nums=4723)
        showtop(train_y,pre_train,nums=38507)
        #return ids,test_y,getbest(ids,test_y,rank=4723)
        return ids,test_y,getbest(ids,test_y,th=0.4)
    else:
        showresults(train_y,pre_train)     
        showtop(train_y,pre_train,nums=70275)
        return ids,test_y,getbest(ids,test_y,rank=5498)