Exemple #1
0
def test(train_file=train_file,
         test_file=test_file,
         uid_voc=uid_voc,
         mid_voc=mid_voc,
         cat_voc=cat_voc,
         batch_size=128,
         maxlen=100):
    # sample_io
    sample_io = SampleIO(train_file,
                         test_file,
                         uid_voc,
                         mid_voc,
                         cat_voc,
                         batch_size,
                         maxlen,
                         embedding_dim=EMBEDDING_DIM,
                         light_embedding_dim=LIGHT_EMBEDDING_DIM)
    model = Model_DIEN(EMBEDDING_DIM,
                       HIDDEN_SIZE,
                       ATTENTION_SIZE,
                       LIGHT_EMBEDDING_DIM,
                       LIGHT_HIDDEN_SIZE,
                       LIGHT_ATTENTION_SIZE,
                       use_rocket_training=use_rocket_training())
    # test
    datas = sample_io.next_test()
    test_ops = tf_test_model(*model.xdl_embedding(
        datas, EMBEDDING_DIM, LIGHT_EMBEDDING_DIM, *sample_io.get_n()))
    eval_sess = xdl.TrainSession()
    print(
        'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f'
        % eval_model(eval_sess, test_ops))
def test(train_file=train_file,
         test_file=test_file,
         uid_voc=uid_voc,
         mid_voc=mid_voc,
         cat_voc=cat_voc,
         batch_size=128,
         maxlen=100):
   # sample_io
    sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc,
                         cat_voc, batch_size, maxlen, EMBEDDING_DIM)

    if xdl.get_config('model') == 'din':    
        model = Model_DIN(
            EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    elif xdl.get_config('model') == 'dien':    
        model = Model_DIEN(
            EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    else:
        raise Exception('only support din and dien model')

    # test
    datas = sample_io.next_test()
    test_ops = tf_test_model(
        *model.xdl_embedding(datas, EMBEDDING_DIM, *sample_io.get_n()))
    eval_sess = xdl.TrainSession()
    print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' %
          eval_model(eval_sess, test_ops))
Exemple #3
0
def test(train_file=train_file,
         test_file=test_file,
         uid_voc=uid_voc,
         mid_voc=mid_voc,
         cat_voc=cat_voc,
         batch_size=128,
         maxlen=100):
    # sample_io
    sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc,
                         batch_size, maxlen, EMBEDDING_DIM)

    if xdl.get_config('model') == 'din':
        model = Model_DIN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    elif xdl.get_config('model') == 'dien':
        model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    else:
        raise Exception('only support din and dien model')

    # test
    # datas = sample_io.next_test()
    # test_ops = tf_test_model(*model.xdl_embedding(datas, EMBEDDING_DIM, *sample_io.get_n()))
    # print('='*10,'start test','='*10)
    test_ops = model.build_final_net(EMBEDDING_DIM, sample_io, is_train=False)
    print('=' * 10 + 'start test' + '=' * 10)
    saver = xdl.Saver()
    checkpoint_version = "ckpt-...............12000"
    saver.restore(version=checkpoint_version)
    eval_sess = xdl.TrainSession()
    print(
        'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f'
        % eval_model(eval_sess, test_ops))
Exemple #4
0
def test(train_file=train_file,
         test_file=test_file,
         uid_voc=uid_voc,
         mid_voc=mid_voc,
         cat_voc=cat_voc,
         batch_size=128,
         maxlen=100):
   # sample_io
    sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc,
                         cat_voc, batch_size, maxlen,
                         embedding_dim=EMBEDDING_DIM, 
                         light_embedding_dim=LIGHT_EMBEDDING_DIM)
    model = Model_DIEN(
        EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, LIGHT_EMBEDDING_DIM, 
        LIGHT_HIDDEN_SIZE, LIGHT_ATTENTION_SIZE, use_rocket_training=use_rocket_training())
    # test
    datas = sample_io.next_test()
    test_ops = tf_test_model(
        *model.xdl_embedding(datas, EMBEDDING_DIM, LIGHT_EMBEDDING_DIM, *sample_io.get_n()))
    eval_sess = xdl.TrainSession()
    print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' %
          eval_model(eval_sess, test_ops))
Exemple #5
0
def train(train_file=train_file,
          test_file=test_file,
          uid_voc=uid_voc,
          mid_voc=mid_voc,
          cat_voc=cat_voc,
          item_info=item_info,
          reviews_info=reviews_info,
          batch_size=128,
          maxlen=100,
          test_iter=700):
    model = Model_DIEN(EMBEDDING_DIM,
                       HIDDEN_SIZE,
                       ATTENTION_SIZE,
                       LIGHT_EMBEDDING_DIM,
                       LIGHT_HIDDEN_SIZE,
                       LIGHT_ATTENTION_SIZE,
                       use_rocket_training=use_rocket_training())
    sample_io = SampleIO(train_file,
                         test_file,
                         uid_voc,
                         mid_voc,
                         cat_voc,
                         item_info,
                         reviews_info,
                         batch_size,
                         maxlen,
                         embedding_dim=EMBEDDING_DIM,
                         light_embedding_dim=LIGHT_EMBEDDING_DIM)
    with xdl.model_scope('train'):
        train_ops = model.build_final_net(EMBEDDING_DIM, LIGHT_EMBEDDING_DIM,
                                          sample_io)
        lr = 0.001
        # Adam Adagrad
        train_ops.append(xdl.Adam(lr).optimize())
        hooks = []
        log_format = "[%(time)s] lstep[%(lstep)s] gstep[%(gstep)s] lqps[%(lqps)s] gqps[%(gqps)s] loss[%(loss)s]"
        hooks = [QpsMetricsHook(), MetricsPrinterHook(log_format)]
        if xdl.get_task_index() == 0:
            hooks.append(
                xdl.CheckpointHook(
                    xdl.get_config('checkpoint', 'save_interval')))
        train_sess = xdl.TrainSession(hooks=hooks)

    with xdl.model_scope('test'):
        test_ops = model.build_final_net(EMBEDDING_DIM,
                                         LIGHT_EMBEDDING_DIM,
                                         sample_io,
                                         is_train=False)
        test_sess = xdl.TrainSession()

    model.run(train_ops, train_sess, test_ops, test_sess, test_iter=test_iter)
Exemple #6
0
def train(train_file=train_file,
          test_file=test_file,
          uid_voc=uid_voc,
          mid_voc=mid_voc,
          cat_voc=cat_voc,
          item_info=item_info,
          reviews_info=reviews_info,
          batch_size=128,
          maxlen=100,
          test_iter=700):
    if xdl.get_config('model') == 'din':
        model = Model_DIN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    elif xdl.get_config('model') == 'dien':
        model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    else:
        raise Exception('only support din and dien')

    sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc,
                         item_info, reviews_info, batch_size, maxlen,
                         EMBEDDING_DIM)
    with xdl.model_scope('train'):
        train_ops = model.build_final_net(EMBEDDING_DIM, sample_io)
        lr = 0.001
        # Adam Adagrad
        train_ops.append(xdl.Adam(lr).optimize())
        hooks = []
        log_format = "[%(time)s] lstep[%(lstep)s] gstep[%(gstep)s] lqps[%(lqps)s] gqps[%(gqps)s] loss[%(loss)s]"
        hooks = [QpsMetricsHook(), MetricsPrinterHook(log_format)]
        if xdl.get_task_index() == 0:
            hooks.append(
                xdl.CheckpointHook(
                    xdl.get_config('checkpoint', 'save_interval')))
        train_sess = xdl.TrainSession(hooks=hooks)

    with xdl.model_scope('test'):
        test_ops = model.build_final_net(EMBEDDING_DIM,
                                         sample_io,
                                         is_train=False)
        test_sess = xdl.TrainSession()

    print('=' * 10 + 'start train' + '=' * 10)
    model.run(train_ops, train_sess, test_ops, test_sess, test_iter=test_iter)
Exemple #7
0
def test(train_file=train_file,
         test_file=test_file,
         uid_voc=uid_voc,
         mid_voc=mid_voc,
         cat_voc=cat_voc,
         item_info=item_info,
         reviews_info=reviews_info,
         batch_size=99,
         maxlen=100):

    if xdl.get_config('model') == 'din':
        model = Model_DIN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    elif xdl.get_config('model') == 'dien':
        model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    else:
        raise Exception('only support din and dien model')

    # create item cate dict
    i_c = {}
    for i in item_c:
        ii = i.strip().split('\t')
        i_c[ii[0]] = ii[1]

    saver = xdl.Saver()
    checkpoint_version = "ckpt-...............20000"
    saver.restore(version=checkpoint_version)

    last_hist = []
    target_list = []
    seq = []
    test_set = pkl.load(open(test_file, 'rb'))
    knn_table = pkl.load(
        open('../data/ali_knn_table/knn' + str(test_file[-5]) + '_no_pro2.pkl',
             'rb'))
    print('length before deal with : ', len(test_set))
    test_knn = open('../data/test_knn', 'w')
    count22 = 0
    for i in test_set:
        # knn
        ss = i.strip().split('\t')
        last = ss[4].split('/')[-1]

        # append last, target, and seq
        last_hist.append(last)
        target_list.append(ss[2])
        seq.append((ss[1], ss[4]))  # uid and hist
        knn = knn_table[last]

        for k in knn:
            count22 += 1
            if k in i_c:
                tmp = '1\t' + ss[1] + '\t' + k + '\t' + i_c[k] + '\t' + ss[
                    4] + '\t' + ss[5]
            else:
                tmp = '1\t' + ss[1] + '\t' + k + '\t' + 'UNK' + '\t' + ss[
                    4] + '\t' + ss[5]
            print >> test_knn, tmp

    test_knn.close()

    print('after last_hist :', len(last_hist))
    print('all test_knn length :', count22)

    # sample_io
    test_knn_f = os.path.join(get_data_prefix(), 'test_knn')
    sample_io = SampleIO(train_file, test_knn_f, uid_voc, mid_voc, cat_voc,
                         item_info, reviews_info, batch_size, maxlen,
                         EMBEDDING_DIM)

    print('all length:', len(last_hist))

    test_ops = model.build_final_net(EMBEDDING_DIM, sample_io, is_train=False)
    print('=' * 10 + 'start test' + '=' * 10)
    eval_sess = xdl.TrainSession()
    pro_all, test_auc, loss_sum, accuracy_sum, aux_loss_sum = eval_model(
        eval_sess, test_ops)
    print(
        'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f'
        % (test_auc, loss_sum, accuracy_sum, aux_loss_sum))

    print('after pro length :', len(pro_all))
    print('=' * 50)

    # sort the knn with prob
    rank_all_knn = {}
    rank = []
    for i in range(len(last_hist)):
        knn = knn_table[last_hist[i]]
        pro = pro_all[i]

        c = list(zip(knn, pro))
        c = sorted(c, key=lambda t: t[1], reverse=True)
        rank_all = [sss[0] for sss in c]
        rank_all_knn[seq[i][0]] = rank_all

        if target_list[i] in rank_all:
            rank.append(rank_all.index(target_list[i]) + 1)
        else:
            rank.append(100)

    # print(rank_all_knn)
    # save the result of re-rank
    user = [i[0] for i in seq]
    hist = [i[1] for i in seq]
    assert len(last_hist) == len(user)
    results = list(zip(user, hist, last_hist, target_list, rank))
    # results = pd.DataFrame(results, columns = ['last','target','rank'])
    # esults.to_csv('ali_dien_rank.csv',index=False)
    with open('ali_dien_rank_4days' + test_file[-11:], 'wb') as d:
        pkl.dump(results, d)