Ejemplo n.º 1
0
def train():
    batch = reader.read()
    sess = xdl.TrainSession()
    emb1 = xdl.embedding('emb1', batch['sparse0'], xdl.TruncatedNormal(stddev=0.001), 8, 1024, vtype='hash')
    emb2 = xdl.embedding('emb2', batch['sparse1'], xdl.TruncatedNormal(stddev=0.001), 8, 1024, vtype='hash')
    loss = model(batch['deep0'], [emb1, emb2], batch['label'])
    train_op = xdl.SGD(0.5).optimize()
    log_hook = xdl.LoggerHook(loss, "loss:{0}", 10)
    sess = xdl.TrainSession(hooks=[log_hook])
    while not sess.should_stop():
        sess.run(train_op)
Ejemplo n.º 2
0
def train(train_file=train_file,
          test_file=test_file,
          uid_voc=uid_voc,
          mid_voc=mid_voc,
          cat_voc=cat_voc,
          item_info=item_info,
          reviews_info=reviews_info,
          batch_size=128,
          maxlen=100,
          test_iter=700):
    model = Model_DIEN(EMBEDDING_DIM,
                       HIDDEN_SIZE,
                       ATTENTION_SIZE,
                       LIGHT_EMBEDDING_DIM,
                       LIGHT_HIDDEN_SIZE,
                       LIGHT_ATTENTION_SIZE,
                       use_rocket_training=use_rocket_training())
    sample_io = SampleIO(train_file,
                         test_file,
                         uid_voc,
                         mid_voc,
                         cat_voc,
                         item_info,
                         reviews_info,
                         batch_size,
                         maxlen,
                         embedding_dim=EMBEDDING_DIM,
                         light_embedding_dim=LIGHT_EMBEDDING_DIM)
    with xdl.model_scope('train'):
        train_ops = model.build_final_net(EMBEDDING_DIM, LIGHT_EMBEDDING_DIM,
                                          sample_io)
        lr = 0.001
        # Adam Adagrad
        train_ops.append(xdl.Adam(lr).optimize())
        hooks = []
        log_format = "[%(time)s] lstep[%(lstep)s] gstep[%(gstep)s] lqps[%(lqps)s] gqps[%(gqps)s] loss[%(loss)s]"
        hooks = [QpsMetricsHook(), MetricsPrinterHook(log_format)]
        if xdl.get_task_index() == 0:
            hooks.append(
                xdl.CheckpointHook(
                    xdl.get_config('checkpoint', 'save_interval')))
        train_sess = xdl.TrainSession(hooks=hooks)

    with xdl.model_scope('test'):
        test_ops = model.build_final_net(EMBEDDING_DIM,
                                         LIGHT_EMBEDDING_DIM,
                                         sample_io,
                                         is_train=False)
        test_sess = xdl.TrainSession()

    model.run(train_ops, train_sess, test_ops, test_sess, test_iter=test_iter)
Ejemplo n.º 3
0
def test(train_file=train_file,
         test_file=test_file,
         uid_voc=uid_voc,
         mid_voc=mid_voc,
         cat_voc=cat_voc,
         batch_size=128,
         maxlen=100):
    # sample_io
    sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc,
                         batch_size, maxlen, EMBEDDING_DIM)

    if xdl.get_config('model') == 'din':
        model = Model_DIN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    elif xdl.get_config('model') == 'dien':
        model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    else:
        raise Exception('only support din and dien model')

    # test
    # datas = sample_io.next_test()
    # test_ops = tf_test_model(*model.xdl_embedding(datas, EMBEDDING_DIM, *sample_io.get_n()))
    # print('='*10,'start test','='*10)
    test_ops = model.build_final_net(EMBEDDING_DIM, sample_io, is_train=False)
    print('=' * 10 + 'start test' + '=' * 10)
    saver = xdl.Saver()
    checkpoint_version = "ckpt-...............12000"
    saver.restore(version=checkpoint_version)
    eval_sess = xdl.TrainSession()
    print(
        'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f'
        % eval_model(eval_sess, test_ops))
Ejemplo n.º 4
0
def train():
    if model_type == 'din_mogujie':
        model = Model_DIN_MOGUJIE(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE,
                                  False, train_file, batch_size)
    else:
        raise Exception('only support din_mogujie and dien')

    #data set

    with xdl.model_scope('train'):

        train_ops = model.build_network()
        lr = 0.001
        # Adam Adagrad
        train_ops.append(xdl.Adam(lr).optimize())
        hooks = []
        log_format = "[%(time)s] lstep[%(lstep)s] gstep[%(gstep)s] lqps[%(lqps)s] gqps[%(gqps)s] loss[%(loss)s]"
        hooks = [QpsMetricsHook(), MetricsPrinterHook(log_format)]
        if xdl.get_task_index() == 0:
            hooks.append(xdl.CheckpointHook(save_interval))
        train_sess = xdl.TrainSession(hooks=hooks)
    """
    with xdl.model_scope('test'):
        test_ops = model.build_network(
            EMBEDDING_DIM, is_train=False)
        test_sess = xdl.TrainSession()
    """
    model.run(train_ops, train_sess)
def run(name1, name2, scope):
    with xdl.model_scope(scope):
        labels = xdl.mock_dense_op(shape=[1, 1], value=1.0)
        mock_embs = mock_embedding(name1, name2)
        loss = model(mock_embs, labels)
        train_op = xdl.SGD(lr).optimize()
        hooks = []
        sess = xdl.TrainSession(hooks)
        run_ops = [train_op, loss]
        op_names = ['none', 'loss']

        embed_vars = [
            var for var in trainable_variables() if is_embedding_var(var)
        ]
        sparse_embed_grads = []
        for var in embed_vars:
            sparse_embed_grads.append(xdl.get_sparse_grads(var.name))
            op_names.append(var.name + '.indices')
            op_names.append(var.name + '.grads')
        for i in range(len(sparse_embed_grads)):
            run_ops.append(sparse_embed_grads[i].indices)
            run_ops.append(sparse_embed_grads[i].grad)
        var_list = sess.run(run_ops)
        if name1 != name2:
            return var_list[3], var_list[5]
        return var_list[3]
Ejemplo n.º 6
0
def test(train_file=train_file,
         test_file=test_file,
         uid_voc=uid_voc,
         mid_voc=mid_voc,
         cat_voc=cat_voc,
         batch_size=128,
         maxlen=100):
   # sample_io
    sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc,
                         cat_voc, batch_size, maxlen, EMBEDDING_DIM)

    if xdl.get_config('model') == 'din':    
        model = Model_DIN(
            EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    elif xdl.get_config('model') == 'dien':    
        model = Model_DIEN(
            EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    else:
        raise Exception('only support din and dien model')

    # test
    datas = sample_io.next_test()
    test_ops = tf_test_model(
        *model.xdl_embedding(datas, EMBEDDING_DIM, *sample_io.get_n()))
    eval_sess = xdl.TrainSession()
    print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' %
          eval_model(eval_sess, test_ops))
Ejemplo n.º 7
0
def test(train_file=train_file,
         test_file=test_file,
         uid_voc=uid_voc,
         mid_voc=mid_voc,
         cat_voc=cat_voc,
         batch_size=128,
         maxlen=100):
    # sample_io
    sample_io = SampleIO(train_file,
                         test_file,
                         uid_voc,
                         mid_voc,
                         cat_voc,
                         batch_size,
                         maxlen,
                         embedding_dim=EMBEDDING_DIM,
                         light_embedding_dim=LIGHT_EMBEDDING_DIM)
    model = Model_DIEN(EMBEDDING_DIM,
                       HIDDEN_SIZE,
                       ATTENTION_SIZE,
                       LIGHT_EMBEDDING_DIM,
                       LIGHT_HIDDEN_SIZE,
                       LIGHT_ATTENTION_SIZE,
                       use_rocket_training=use_rocket_training())
    # test
    datas = sample_io.next_test()
    test_ops = tf_test_model(*model.xdl_embedding(
        datas, EMBEDDING_DIM, LIGHT_EMBEDDING_DIM, *sample_io.get_n()))
    eval_sess = xdl.TrainSession()
    print(
        'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f'
        % eval_model(eval_sess, test_ops))
Ejemplo n.º 8
0
def gear():
    forward = xdl.mock_dense_op(shape=[1, 16], value=0.01, name_="forward")
    backward = xdl.mock_dense_op(shape=[1, 16], value=0.02, name_="backward")
    labels = xdl.mock_dense_op(shape=[1, 1], value=1.0, name_="label1")
    init_grad = xdl.mock_dense_op(shape=[1, 1], value=0.3, name_="init_grad")
    forward.set_shape([1, 16])
    backward.set_shape([1, 16])
    labels.set_shape([1, 1])
    init_grad.set_shape([1, 1])
    predict = ams_gear([forward], [backward], init_grad)(gear_model)(None)
    with xdl.model_scope("ams_gear_forward"):
        sess = xdl.TrainSession()
        prediction = sess.run(predict)
    with xdl.model_scope("ams_gear_backward"):
        grads = xdl.get_gradient("fc_weight")
        sess = xdl.TrainSession()
        fc_weight_grad = sess.run(grads)
        return prediction, fc_weight_grad
Ejemplo n.º 9
0
def train():
    images, labels = xdl.py_func(read_train, [],
                                 output_type=[np.float32, np.float32])
    images_test, labels_test = xdl.py_func(
        read_test, [], output_type=[np.float32, np.float32])
    with xdl.model_scope('train'):
        loss = model(images, labels)
        train_op = xdl.Adagrad(0.5).optimize()
        train_sess = xdl.TrainSession()

    with xdl.model_scope('test'):
        accuracy = eval_model(images_test, labels_test)
        eval_sess = xdl.TrainSession()
    for _ in range(100):
        for _ in range(1000):
            train_sess.run(train_op)

        print("accuracy %s" % eval_sess.run(accuracy))
Ejemplo n.º 10
0
def run():
    user_ms = xdl.ModelServer(
        "user_graph", user_graph_train, xdl.DataType.float,
        xdl.ModelServer.Forward.UniqueCache(xdl.get_task_num()),
        xdl.ModelServer.Backward.UniqueCache(xdl.get_task_num()))
    xdl.current_env().start_model_server(user_ms)
    ad_ms = xdl.ModelServer(
        "ad_graph", ad_graph_train, xdl.DataType.float,
        xdl.ModelServer.Forward.UniqueCache(xdl.get_task_num()),
        xdl.ModelServer.Backward.UniqueCache(xdl.get_task_num()))
    xdl.current_env().start_model_server(ad_ms)
    batch = reader().read()

    user0 = xdl.embedding("user0",
                          batch["user0"],
                          xdl.TruncatedNormal(stddev=0.001),
                          16,
                          2 * 1024 * 1024,
                          "sum",
                          vtype="hash")

    user1 = xdl.embedding("user1",
                          batch["user1"],
                          xdl.TruncatedNormal(stddev=0.001),
                          16,
                          2 * 1024 * 1024,
                          "sum",
                          vtype="hash")

    ad0 = batch["ad0"]
    ad1 = batch["ad1"]
    img0 = user_ms(batch["user_img"].ids)
    ids0 = xdl.py_func(to_tf_segment_id, [batch["user_img"].segments],
                       [np.int32])[0]
    img1 = ad_ms(batch["ad_img"].ids)
    ids1 = xdl.py_func(to_tf_segment_id, [batch["ad_img"].segments],
                       [np.int32])[0]
    label = batch['label']
    loss = ams_main(main_model)(user0,
                                user1,
                                ad0,
                                ad1,
                                label,
                                ids0,
                                ids1,
                                gear_inputs=[img0, img1])

    optimizer = xdl.Adam(0.0005).optimize()

    run_ops = [loss, optimizer]

    sess = xdl.TrainSession([])
    while not sess.should_stop():
        values = sess.run(run_ops)
        if values is not None:
            print 'loss: ', values[0]
Ejemplo n.º 11
0
def train(train_file=train_file,
          test_file=test_file,
          uid_voc=uid_voc,
          mid_voc=mid_voc,
          cat_voc=cat_voc,
          item_info=item_info,
          reviews_info=reviews_info,
          batch_size=128,
          maxlen=100,
          test_iter=700):
    if xdl.get_config('model') == 'din':
        model = Model_DIN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    elif xdl.get_config('model') == 'dien':
        model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    else:
        raise Exception('only support din and dien')

    sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc,
                         item_info, reviews_info, batch_size, maxlen,
                         EMBEDDING_DIM)
    with xdl.model_scope('train'):
        train_ops = model.build_final_net(EMBEDDING_DIM, sample_io)
        lr = 0.001
        # Adam Adagrad
        train_ops.append(xdl.Adam(lr).optimize())
        hooks = []
        log_format = "[%(time)s] lstep[%(lstep)s] gstep[%(gstep)s] lqps[%(lqps)s] gqps[%(gqps)s] loss[%(loss)s]"
        hooks = [QpsMetricsHook(), MetricsPrinterHook(log_format)]
        if xdl.get_task_index() == 0:
            hooks.append(
                xdl.CheckpointHook(
                    xdl.get_config('checkpoint', 'save_interval')))
        train_sess = xdl.TrainSession(hooks=hooks)

    with xdl.model_scope('test'):
        test_ops = model.build_final_net(EMBEDDING_DIM,
                                         sample_io,
                                         is_train=False)
        test_sess = xdl.TrainSession()

    print('=' * 10 + 'start train' + '=' * 10)
    model.run(train_ops, train_sess, test_ops, test_sess, test_iter=test_iter)
Ejemplo n.º 12
0
def main():
    dense = xdl.mock_dense_op(shape=[1, 16], value=0.01, name_="dense")
    gear = xdl.mock_dense_op(shape=[1, 1], value=0.01, name_="gear")
    labels = xdl.mock_dense_op(shape=[1, 1], value=1.0, name_="label")
    gear.set_shape([1, 1])
    dense.set_shape([1, 16])
    labels.set_shape([1, 1])
    with xdl.model_scope("ams_main"):
        loss = ams_main(main_model)(dense, labels, gear_inputs=[gear])
        sess = xdl.TrainSession()
        return sess.run([xdl.get_collection("gear_grad")])
Ejemplo n.º 13
0
def train():
    images, labels = xdl.py_func(read_train, [],
                                 output_type=[np.float32, np.float32])
    images_test, labels_test = xdl.py_func(
        read_test, [], output_type=[np.float32, np.float32])
    with xdl.model_scope('train'):
        loss = model(images, labels)
        train_op = xdl.Adagrad(0.5).optimize()
        if xdl.get_task_index() == 0:
            ckpt_hook = xdl.CheckpointHook(1000)
            train_sess = xdl.TrainSession(hooks=[ckpt_hook])
        else:
            train_sess = xdl.TrainSession()

    with xdl.model_scope('test'):
        accuracy = eval_model(images_test, labels_test)
        eval_sess = xdl.TrainSession()
    for _ in range(100):
        for _ in range(1000):
            train_sess.run(train_op)

        print("accuracy %s" % eval_sess.run(accuracy))
Ejemplo n.º 14
0
def main():
  dense = xdl.mock_dense_op(shape=[1, 16], value=0.01, name_="dense")
  gear = xdl.mock_dense_op(shape=[1, 1], value=0.01, name_="gear")
  labels = xdl.mock_dense_op(shape=[1, 1], value=1.0, name_="label")
  ids, values, segments = xdl.mock_sparse_op(dense_shape=[1, 16], name_="wide")
  sparse = xdl.SparseTensor(ids, values, segments)
  emb = xdl.embedding("sparse", sparse, xdl.Ones(), 1, 16, 'sum')
  gear.set_shape([None, 1])
  dense.set_shape([None, 16])
  labels.set_shape([None, 1])
  with xdl.model_scope("ams_main"):
    loss = ams_main(main_model)(dense, emb, labels, gear_inputs=[gear])
    sess = xdl.TrainSession()
    return sess.run(xdl.get_collection("gear_grad"))
Ejemplo n.º 15
0
 def test_all(self):
     dense = xdl.mock_dense_op(shape=[1, 16], value=0.01, name_="dense")
     labels = xdl.mock_dense_op(shape=[1, 1], value=1.0, name_="label")
     ids = xdl.convert_to_tensor(
         np.array([[0, 0], [0, 1], [0, 2]], dtype=np.int64))
     values = xdl.convert_to_tensor(
         np.array([1.0, 2.0, 3.0], dtype=np.float32))
     segments = xdl.convert_to_tensor(np.array([3], dtype=np.int32))
     sparse = xdl.SparseTensor(ids, values, segments)
     emb = xdl.embedding("sparse",
                         sparse,
                         xdl.Ones(),
                         1,
                         16,
                         'sum',
                         vtype='hash')
     loss = model(dense, emb, labels)
     train_op = xdl.SGD(0.5).optimize()
     sess = xdl.TrainSession()
     _, l, g = sess.run(
         [train_op, loss,
          xdl.get_sparse_grads('sparse').grad])
     self.assertTrue((l == np.array(0.0024364376, dtype=np.float32)).all())
     self.assertTrue(
         (g == np.array([[-0.002433472], [-0.004866944], [-0.007300416]],
                        dtype=np.float32)).all())
     sparse_var = xdl.get_variable_by_name('sparse')
     weights = sess.run(
         sparse_var.gather(
             np.array([[0, 0], [0, 1], [0, 2]], dtype=np.int64)))
     self.assertTrue(
         (weights == np.array([[1.0012168], [1.0024334], [1.0036502]],
                              dtype=np.float32)).all())
     _, l, g = sess.run(
         [train_op, loss,
          xdl.get_sparse_grads('sparse').grad])
     self.assertTrue((l == np.array(0.002395329, dtype=np.float32)).all())
     self.assertTrue(
         (g == np.array([[-0.0023924622], [-0.0047849244], [-0.0071773864]],
                        dtype=np.float32)).all())
     weights = sess.run(
         sparse_var.gather(
             np.array([[0, 0], [0, 1], [0, 2]], dtype=np.int64)))
     self.assertTrue(
         (weights == np.array([[1.002413], [1.0048258], [1.0072389]],
                              dtype=np.float32)).all())
Ejemplo n.º 16
0
def main():
    dense = xdl.mock_dense_op(shape=[1, 16], value=0.01, name_="dense")
    labels = xdl.mock_dense_op(shape=[1, 1], value=1.0, name_="label")
    ids = xdl.convert_to_tensor(
        np.array([[0, 0], [0, 1], [0, 2]], dtype=np.int64))
    values = xdl.convert_to_tensor(np.array([1.0, 2.0, 3.0], dtype=np.float32))
    segments = xdl.convert_to_tensor(np.array([3], dtype=np.int32))
    sparse = xdl.SparseTensor(ids, values, segments)
    emb = xdl.embedding("sparse",
                        sparse,
                        xdl.Ones(),
                        1,
                        16,
                        'sum',
                        vtype='hash')
    loss = model(dense, emb, labels)
    train_op = xdl.SGD(0.5).optimize()
    sess = xdl.TrainSession()
    loss, gradients = sess.run([loss, xdl.get_sparse_grads('sparse').grad])
    return loss, gradients
Ejemplo n.º 17
0
def run(name1, name2, scope, optimizer):
    with xdl.model_scope(scope):
        labels = xdl.mock_dense_op(shape=[1, 1], value=1.0)
        mock_embs = mock_embedding(name1, name2)
        loss = model(mock_embs, labels)
        if optimizer == 'sgd':
            train_op = xdl.SGD(0.5).optimize()
        elif optimizer == 'momentum':
            train_op = xdl.Momentum(0.005, 0.99).optimize()
        elif optimizer == 'ftrl':
            train_op = xdl.Ftrl(0.01).optimize()
        elif optimizer == 'adam':
            train_op = xdl.Adam(0.001).optimize()
        elif optimizer == 'adagrad':
            train_op = xdl.Adagrad(0.04, 0.1).optimize()
        elif optimizer == 'rmsprop':
            train_op = xdl.RMSProp(0.001).optimize()
        else:
            train_op = xdl.SGD(0.5).optimize()
        hooks = []
        sess = xdl.TrainSession(hooks)
        run_ops = [train_op, loss]
        op_names = ['none', 'loss']

        embed_vars = [
            var for var in trainable_variables_with_scope(scope)
            if is_embedding_var(var)
        ]
        sparse_embed_grads = []
        for var in embed_vars:
            sparse_embed_grads.append(xdl.get_sparse_grads(var.name))
            op_names.append(var.name + '.indices')
            op_names.append(var.name + '.grads')
        for i in range(len(sparse_embed_grads)):
            run_ops.append(sparse_embed_grads[i].indices)
            run_ops.append(sparse_embed_grads[i].grad)
        var_list = sess.run(run_ops)
        if name1 != name2:
            return var_list[3], var_list[5]
        return var_list[3]
Ejemplo n.º 18
0
def train(is_training=True):
    #np.set_printoptions(threshold='nan')
    if is_training or xdl.get_task_index() == 0:
        init()
    else:
        return

    file_type = xdl.parsers.txt
    if is_training:
        data_io = xdl.DataIO("tdm", file_type=file_type, fs_type=xdl.fs.hdfs,
                             namenode="hdfs://your/namenode/hdfs/path:9000", enable_state=False)

        feature_count = 69
        for i in xrange(1, feature_count + 1):
            data_io.feature(name=("item_%s" % i), type=xdl.features.sparse, table=1)
        data_io.feature(name="unit_id_expand", type=xdl.features.sparse, table=0)

        data_io.batch_size(intconf('train_batch_size'))
        data_io.epochs(intconf('train_epochs'))
        data_io.threads(intconf('train_threads'))
        data_io.label_count(2)
        base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir'))
        data = base_path + conf('train_sample') + '_' + r'[\d]+'
        sharding = xdl.DataSharding(data_io.fs())
        sharding.add_path(data)
        paths = sharding.partition(rank=xdl.get_task_index(), size=xdl.get_task_num())
        print 'train: sharding.partition() =', paths
        data_io.add_path(paths)
        iop = xdl.GetIOP("TDMOP")
    else:
        data_io = xdl.DataIO("tdm", file_type=file_type, fs_type=xdl.fs.hdfs,
                             namenode="hdfs://your/namenode/hdfs/path:9000", enable_state=False)

        feature_count = 69
        for i in xrange(1, feature_count + 1):
            data_io.feature(name=("item_%s" % i), type=xdl.features.sparse, table=1)
        data_io.feature(name="unit_id_expand", type=xdl.features.sparse, table=0)

        data_io.batch_size(intconf('predict_batch_size'))
        data_io.epochs(intconf('predict_epochs'))
        data_io.threads(intconf('predict_threads'))
        data_io.label_count(2)
        base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir'))
        data = base_path + conf('test_sample')
        data_io.add_path(data)
        print 'predict: add_path =', data
        iop = xdl.GetIOP("TDMPREDICTOP")
        #data_io.finish_delay(True)
    assert iop is not None
    key_value = {}
    key_value["key"] = "value"
    key_value["debug"] = conf('tdmop_debug')
    key_value["layer_counts"] = conf('tdmop_layer_counts')
    key_value["pr_test_each_layer_retrieve_num"] = "400"
    key_value["pr_test_final_layer_retrieve_num"] = "200"
    iop.init(key_value)
    data_io.add_op(iop)
    data_io.split_group(False)
    if not is_training:
        data_io.keep_sample(True)
        data_io.pause(intconf('predict_io_pause_num'), True)
    data_io.startup()

    if not is_training:
        if xdl.get_task_index() == 0:
            saver = xdl.Saver()
            saver.restore(conf('saver_ckpt'))

    batch = data_io.read()

    emb_combiner = 'mean'    # mean | sum
    ind = batch["indicators"][0]
    ids = batch["_ids"][0]
    emb = []
    emb_dim = 24
    if is_training:
        feature_add_probability = 1.
    else:
        feature_add_probability = 0.
    import xdl.python.sparse_engine.embedding as embedding
    emb_name = "item_emb"
    for i in xrange(1, feature_count + 1):
        #emb_name = "item_%s_emb" % i
        eb = xdl.embedding(emb_name, batch["item_%s" % i], xdl.Normal(stddev=0.001), emb_dim, 50000, emb_combiner, vtype="hash", feature_add_probability=feature_add_probability)
        with xdl.device('GPU'):
            eb_take = xdl.take_op(eb, batch["indicators"][0])
        eb_take.set_shape(eb.shape)
        emb.append(eb_take)
    #emb_name = "unit_id_expand_emb"
    unit_id_expand_emb = xdl.embedding(emb_name, batch["unit_id_expand"], xdl.Normal(stddev=0.001), emb_dim, 50000, emb_combiner, vtype="hash", feature_add_probability=feature_add_probability)

    @xdl.mxnet_wrapper(is_training=is_training, device_type='gpu')
    def dnn_model_define(user_input, indicator, unit_id_emb, label, bs, eb_dim, fea_groups, active_op='prelu', use_batch_norm=True):
        # 把用户输入按fea_groups划分窗口,窗口内做avg pooling
        fea_groups = [int(s) for s in fea_groups.split(',')]
        total_group_length = np.sum(np.array(fea_groups))
        print "fea_groups", fea_groups, "total_group_length", total_group_length, "eb_dim", eb_dim
        user_input_before_reshape = mx.sym.concat(*user_input)
        user_input = mx.sym.reshape(user_input_before_reshape, shape=(-1, total_group_length, eb_dim))
    
        layer_data = []
        # start att
        att_user_input = mx.sym.reshape(user_input, (bs, total_group_length, eb_dim))
        att_node_input = mx.sym.reshape(unit_id_emb, (bs, 1, eb_dim))
        att_node_input = mx.sym.broadcast_to(data=att_node_input, shape=(0, total_group_length, 0))
        att_din = mx.sym.concat(att_user_input, att_user_input * att_node_input, att_node_input, dim=2)

        att_active_op = 'prelu'
        att_layer_arr = []
        att_layer1 = FullyConnected3D(3*eb_dim, 36, active_op=att_active_op, version=1, batch_size=bs)
        att_layer_arr.append(att_layer1)
        att_layer2 = FullyConnected3D(36, 1, active_op=att_active_op, version=2, batch_size=bs)
        att_layer_arr.append(att_layer2)

        layer_data.append(att_din)
        for layer in att_layer_arr:
            layer_data.append(layer.call(layer_data[-1]))
        att_dout = layer_data[-1]
        att_dout = mx.sym.broadcast_to(data=att_dout, shape=(0, 0, eb_dim))

        user_input = mx.sym.reshape(user_input, shape=(bs, -1, eb_dim))
        user_input = user_input * att_dout
        # end att

        idx = 0
        for group_length in fea_groups:
            block_before_sum = mx.sym.slice_axis(user_input, axis=1, begin=idx, end=idx+group_length)
            block = mx.sym.sum_axis(block_before_sum, axis=1) / group_length
            if idx == 0:
                grouped_user_input = block
            else:
                grouped_user_input = mx.sym.concat(grouped_user_input, block, dim=1)
            idx += group_length
    
        indicator = mx.symbol.BlockGrad(indicator)
        label = mx.symbol.BlockGrad(label)
        # 按indicator来扩展user fea,然后过网络
        #grouped_user_input_after_take = mx.symbol.take(grouped_user_input, indicator)
        grouped_user_input_after_take = grouped_user_input
        din = mx.symbol.concat(*[grouped_user_input_after_take, unit_id_emb], dim=1)
    
        net_version = "d"
        layer_arr = []
        layer1 = mx_dnn_layer(11 * eb_dim, 128, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (1, net_version))
        layer_arr.append(layer1)
        layer2 = mx_dnn_layer(128, 64, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (2, net_version))
        layer_arr.append(layer2)
        layer3 = mx_dnn_layer(64, 32, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (3, net_version))
        layer_arr.append(layer3)
        layer4 = mx_dnn_layer(32, 2, active_op='', use_batch_norm=False, version="%d_%s" % (4, net_version))
        layer_arr.append(layer4)
        #layer_data = [din]
        layer_data.append(din)
        for layer in layer_arr:
            layer_data.append(layer.call(layer_data[-1]))
        dout = layer_data[-1]
    
        # 正常label两列加和必为1,补全的label为0,故减一之后即可得到-1,作为ignore label
        ph_label_sum = mx.sym.sum(label, axis=1)
        ph_label_ignore = ph_label_sum - 1
        ph_label_ignore = mx.sym.reshape(ph_label_ignore, shape=(-1, 1))
        ph_label_click = mx.sym.slice_axis(label, axis=1, begin=1, end=2)
        ph_label_click = ph_label_click + ph_label_ignore
        ph_label_click = mx.sym.reshape(ph_label_click, shape=(bs, ))
    
        prop = mx.symbol.SoftmaxOutput(data=dout, label=ph_label_click, grad_scale=1.0, use_ignore=True, normalization='valid')
        origin_loss = mx.sym.log(prop) * label
        ph_label_sum = mx.sym.reshape(ph_label_sum, shape=(bs, 1))
        origin_loss = mx.sym.broadcast_mul(origin_loss, ph_label_sum)
        loss = - mx.symbol.sum(origin_loss) / mx.sym.sum(ph_label_sum)
        return prop, loss

    re = dnn_model_define(emb, batch["indicators"][0], unit_id_expand_emb, batch["label"], data_io._batch_size, emb_dim, '20,20,10,10,2,2,2,1,1,1')
    prop = re[0]
    loss = re[1]

    if is_training:
        train_op = xdl.Adam(learning_rate=intconf('learning_rate'), lr_decay=False).optimize()
        #train_op = xdl.SGD(0.1).optimize()
        #fc_1_weight_grad = xdl.get_gradient("fc_w_1_d")
        #fc_1_bias_grad = xdl.get_gradient("fc_b_1_d")
    else:
        fin = data_io.set_prop(prop=prop)

    hooks = []
    if is_training:
        if conf("train_mode") == "sync":
            hooks.append(xdl.SyncRunHook(xdl.get_task_index(), xdl.get_task_num()))
        if xdl.get_task_index() == 0:
            ckpt_hook = xdl.CheckpointHook(intconf('save_checkpoint_interval'))
            hooks.append(ckpt_hook)
        log_hook = xdl.LoggerHook([loss], "#### loss:{0}")
    else:
        log_hook = xdl.LoggerHook([loss], "#### loss:{0}")
    hooks.append(log_hook)

    from xdl.python.training.training_utils import get_global_step
    global_step = get_global_step()

    sess = xdl.TrainSession(hooks)

    elapsed_time = 0.
    statis_begin_loop = 200
    loop_num = 0
    while not sess.should_stop():
        print ">>>>>>>>>>>> %d >>>>>>>>>>>" % loop_num
        begin_time = time.time()
        for itr in xrange(200):
            if is_training:
                result = sess.run([train_op, xdl.get_collection(xdl.UPDATE_OPS)])
                #result = sess.run([train_op, xdl.get_collection(xdl.UPDATE_OPS), unit_id_expand_emb])
            else:
                result = sess.run([loss, fin, global_step.value])
                #result = sess.run([loss, fin, ids, global_step.value])
            if result is None:
                print "result is None, finished success."
                break
            if not is_training:
                print "global_step =", result[-1]
                #print "batch['_ids'] =", result[-2]
            #else:
            #   print "unit_id_expand_emb = { mean =", result[-1].mean(), ", std =", result[-1].std(), "}"
            loop_num += 1
        if loop_num > statis_begin_loop:
            elapsed_time += time.time() - begin_time
            #print 'batch_size = %d, qps = %f batch/s' % (data_io._batch_size, (loop_num - statis_begin_loop) / elapsed_time)

    if is_training:
        xdl.execute(xdl.ps_synchronize_leave_op(np.array(xdl.get_task_index(), dtype=np.int32)))
        if xdl.get_task_index() == 0:
            print 'start put item_emb'
            def _string_to_int8(src):
                return np.array([ord(ch) for ch in src], dtype=np.int8)
            from xdl.python.utils.config import get_ckpt_dir
            output_dir = conf('model_url')
            op = xdl.ps_convert_ckpt_variable_op(checkpoint_dir=_string_to_int8(get_ckpt_dir()), 
                                                 output_dir=_string_to_int8(output_dir), 
                                                 variables=_string_to_int8("item_emb"))
            xdl.execute(op)
            shell_cmd("rm -f data/item_emb")
            shell_cmd("hadoop fs -get %s/item_emb data/item_emb" % output_dir)
            shell_cmd("sed -i 's/..//' data/item_emb")
            shell_cmd("hadoop fs -put -f data/item_emb %s" % output_dir)
            print 'finish put item_emb'
Ejemplo n.º 19
0
def train():
    batch = data_io.read()
    print batch

    embs = list()

    for i in range(1, embs_len + 1):
        name = "item_%d" % i
        emb = xdl.embedding(name,
                            batch[name],
                            xdl.Ones(),
                            1,
                            1000,
                            'sum',
                            vtype='hash')
        embs.append(emb)
        print "emb =", name, ", shape =", emb.shape
    print "origin batch[label].shape =", batch["label"].shape

    loss, prop, label, indicator, din, dout, fc1_weight, fc1_bias, fc2_weight, fc2_bias = model(
        embs, batch["label"], 4, 7)
    train_op = xdl.SGD(0.5).optimize()

    item1_grad = xdl.get_gradient('item_1')
    item2_grad = xdl.get_gradient('item_2')
    item3_grad = xdl.get_gradient('item_3')
    item4_grad = xdl.get_gradient('item_4')
    fc1_weight_grad = xdl.get_gradient('fc1_weight')
    fc1_bias_grad = xdl.get_gradient('fc1_bias')
    fc2_weight_grad = xdl.get_gradient('fc2_weight')
    fc2_bias_grad = xdl.get_gradient('fc2_bias')

    sess = xdl.TrainSession()

    loop_num = 0
    while not sess.should_stop():
        if loop_num == 5:
            break
        print "\n>>>>>>>>>>>> loop_num = %d" % loop_num
        result = sess.run([train_op, loss, prop, batch['label'], label, indicator, din, dout, \
                           batch['item_1'].ids, batch['item_1'].segments, batch['item_1'].values, \
                           batch['item_2'].ids, batch['item_2'].segments, batch['item_2'].values, \
                           batch['item_3'].ids, batch['item_3'].segments, batch['item_3'].values, \
                           batch['item_4'].ids, batch['item_4'].segments, batch['item_4'].values, \
                           item1_grad, item2_grad, item3_grad, item4_grad, \
                           fc1_weight, fc1_bias, fc1_weight_grad, fc1_bias_grad, \
                           fc2_weight, fc2_bias, fc2_weight_grad, fc2_bias_grad])
        if result is None:
            break
        print "loss:", result[-31]
        print "prop:", result[-30]
        print "origin label:", result[-29]
        print "label:", result[-28]
        print "indicator:", result[-27]
        print "din:", result[-26]
        print "dout:", result[-25]
        print "item_1: ids=", result[-24], "\n        segments=", result[
            -23], "\n        values=", result[-22]
        print "item_2: ids=", result[-21], "\n        segments=", result[
            -20], "\n        values=", result[-19]
        print "item_3: ids=", result[-18], "\n        segments=", result[
            -17], "\n        values=", result[-16]
        print "item_4: ids=", result[-15], "\n        segments=", result[
            -14], "\n        values=", result[-13]
        print "item1_grad", result[-12]
        print "item2_grad", result[-11]
        print "item1_grad", result[-10]
        print "item2_grad", result[-9]
        print "fc1_weight", result[-8]
        print "fc1_bias", result[-7]
        print "fc1_weight_grad", result[-6]
        print "fc1_bias_grad", result[-5]
        print "fc2_weight", result[-4]
        print "fc2_bias", result[-3]
        print "fc2_weight_grad", result[-2]
        print "fc2_bias_grad", result[-1]
        loop_num += 1
Ejemplo n.º 20
0
def test(train_file=train_file,
         test_file=test_file,
         uid_voc=uid_voc,
         mid_voc=mid_voc,
         cat_voc=cat_voc,
         item_info=item_info,
         reviews_info=reviews_info,
         batch_size=99,
         maxlen=100):

    if xdl.get_config('model') == 'din':
        model = Model_DIN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    elif xdl.get_config('model') == 'dien':
        model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
    else:
        raise Exception('only support din and dien model')

    # create item cate dict
    i_c = {}
    for i in item_c:
        ii = i.strip().split('\t')
        i_c[ii[0]] = ii[1]

    saver = xdl.Saver()
    checkpoint_version = "ckpt-...............20000"
    saver.restore(version=checkpoint_version)

    last_hist = []
    target_list = []
    seq = []
    test_set = pkl.load(open(test_file, 'rb'))
    knn_table = pkl.load(
        open('../data/ali_knn_table/knn' + str(test_file[-5]) + '_no_pro2.pkl',
             'rb'))
    print('length before deal with : ', len(test_set))
    test_knn = open('../data/test_knn', 'w')
    count22 = 0
    for i in test_set:
        # knn
        ss = i.strip().split('\t')
        last = ss[4].split('/')[-1]

        # append last, target, and seq
        last_hist.append(last)
        target_list.append(ss[2])
        seq.append((ss[1], ss[4]))  # uid and hist
        knn = knn_table[last]

        for k in knn:
            count22 += 1
            if k in i_c:
                tmp = '1\t' + ss[1] + '\t' + k + '\t' + i_c[k] + '\t' + ss[
                    4] + '\t' + ss[5]
            else:
                tmp = '1\t' + ss[1] + '\t' + k + '\t' + 'UNK' + '\t' + ss[
                    4] + '\t' + ss[5]
            print >> test_knn, tmp

    test_knn.close()

    print('after last_hist :', len(last_hist))
    print('all test_knn length :', count22)

    # sample_io
    test_knn_f = os.path.join(get_data_prefix(), 'test_knn')
    sample_io = SampleIO(train_file, test_knn_f, uid_voc, mid_voc, cat_voc,
                         item_info, reviews_info, batch_size, maxlen,
                         EMBEDDING_DIM)

    print('all length:', len(last_hist))

    test_ops = model.build_final_net(EMBEDDING_DIM, sample_io, is_train=False)
    print('=' * 10 + 'start test' + '=' * 10)
    eval_sess = xdl.TrainSession()
    pro_all, test_auc, loss_sum, accuracy_sum, aux_loss_sum = eval_model(
        eval_sess, test_ops)
    print(
        'test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f'
        % (test_auc, loss_sum, accuracy_sum, aux_loss_sum))

    print('after pro length :', len(pro_all))
    print('=' * 50)

    # sort the knn with prob
    rank_all_knn = {}
    rank = []
    for i in range(len(last_hist)):
        knn = knn_table[last_hist[i]]
        pro = pro_all[i]

        c = list(zip(knn, pro))
        c = sorted(c, key=lambda t: t[1], reverse=True)
        rank_all = [sss[0] for sss in c]
        rank_all_knn[seq[i][0]] = rank_all

        if target_list[i] in rank_all:
            rank.append(rank_all.index(target_list[i]) + 1)
        else:
            rank.append(100)

    # print(rank_all_knn)
    # save the result of re-rank
    user = [i[0] for i in seq]
    hist = [i[1] for i in seq]
    assert len(last_hist) == len(user)
    results = list(zip(user, hist, last_hist, target_list, rank))
    # results = pd.DataFrame(results, columns = ['last','target','rank'])
    # esults.to_csv('ali_dien_rank.csv',index=False)
    with open('ali_dien_rank_4days' + test_file[-11:], 'wb') as d:
        pkl.dump(results, d)
Ejemplo n.º 21
0
def train(is_training=True):
    if is_training or xdl.get_task_index() == 0:
        init()
    else:
        return

    file_type = xdl.parsers.txt
    if is_training:
        data_io = xdl.DataIO("tdm",
                             file_type=file_type,
                             fs_type=xdl.fs.hdfs,
                             namenode="hdfs://your/namenode/hdfs/path:9000",
                             enable_state=False)

        feature_count = 69
        for i in xrange(1, feature_count + 1):
            data_io.feature(name=("item_%s" % i),
                            type=xdl.features.sparse,
                            table=1)
        data_io.feature(name="unit_id_expand",
                        type=xdl.features.sparse,
                        table=0)

        data_io.batch_size(intconf('train_batch_size'))
        data_io.epochs(intconf('train_epochs'))
        data_io.threads(intconf('train_threads'))
        data_io.label_count(2)
        base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir'))
        data = base_path + conf('train_sample') + '_' + r'[\d]+'
        sharding = xdl.DataSharding(data_io.fs())
        sharding.add_path(data)
        paths = sharding.partition(rank=xdl.get_task_index(),
                                   size=xdl.get_task_num())
        print 'train: sharding.partition() =', paths
        data_io.add_path(paths)
        iop = xdl.GetIOP("TDMOP")
    else:
        data_io = xdl.DataIO("tdm",
                             file_type=file_type,
                             fs_type=xdl.fs.hdfs,
                             namenode="hdfs://your/namenode/hdfs/path:9000",
                             enable_state=False)

        feature_count = 69
        for i in xrange(1, feature_count + 1):
            data_io.feature(name=("item_%s" % i),
                            type=xdl.features.sparse,
                            table=1)
        data_io.feature(name="unit_id_expand",
                        type=xdl.features.sparse,
                        table=0)
        data_io.feature(name="test_unit_id", type=xdl.features.sparse, table=1)

        data_io.batch_size(intconf('predict_batch_size'))
        data_io.epochs(intconf('predict_epochs'))
        data_io.threads(intconf('predict_threads'))
        data_io.label_count(2)
        base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir'))
        data = base_path + conf('test_sample')
        data_io.add_path(data)
        print 'predict: add_path =', data
        iop = xdl.GetIOP("TDMPREDICTOP")
        #data_io.finish_delay(True)
    assert iop is not None
    key_value = {}
    key_value["key"] = "value"
    key_value["debug"] = conf('tdmop_debug')
    key_value["layer_counts"] = conf('tdmop_layer_counts')
    key_value["start_sample_layer"] = "22"
    key_value["pr_test_each_layer_retrieve_num"] = "400"
    key_value["pr_test_final_layer_retrieve_num"] = "200"
    if not is_training:
        key_value["expand_mode"] = "vector"
    iop.init(key_value)
    data_io.add_op(iop)
    data_io.split_group(False)
    data_io.startup()

    if not is_training:
        if xdl.get_task_index() == 0:
            saver = xdl.Saver()
            saver.restore(conf('saver_ckpt'))

    batch = data_io.read()

    emb_combiner = 'mean'  # mean | sum
    if not is_training:
        gt_ids = batch["_ids"][-1]
        gt_segments = batch["_segments"][-1]
    emb = []
    emb_dim = 24
    if is_training:
        feature_add_probability = 1.
    else:
        feature_add_probability = 0.
    import xdl.python.sparse_engine.embedding as embedding
    emb_name = "item_emb"
    for i in xrange(1, feature_count + 1):
        eb = xdl.embedding(emb_name,
                           batch["item_%s" % i],
                           xdl.Normal(stddev=0.001),
                           emb_dim,
                           50000,
                           emb_combiner,
                           vtype="hash",
                           feature_add_probability=feature_add_probability)
        with xdl.device('GPU'):
            eb_take = xdl.take_op(eb, batch["indicators"][0])
        eb_take.set_shape(eb.shape)
        emb.append(eb_take)
    unit_id_expand_emb = xdl.embedding(
        emb_name,
        batch["unit_id_expand"],
        xdl.Normal(stddev=0.001),
        emb_dim,
        50000,
        emb_combiner,
        vtype="hash",
        feature_add_probability=feature_add_probability)

    @xdl.mxnet_wrapper(is_training=is_training, device_type='gpu')
    def dnn_model_define(user_input,
                         indicator,
                         unit_id_emb,
                         label,
                         bs,
                         eb_dim,
                         sample_num,
                         fea_groups,
                         active_op='prelu',
                         use_batch_norm=True):
        # 把用户输入按fea_groups划分窗口,窗口内做avg pooling
        fea_groups = [int(s) for s in fea_groups.split(',')]
        total_group_length = np.sum(np.array(fea_groups))
        print "fea_groups", fea_groups, "total_group_length", total_group_length, "eb_dim", eb_dim
        user_input_before_reshape = mx.sym.concat(*user_input)
        user_input = mx.sym.reshape(user_input_before_reshape,
                                    shape=(-1, total_group_length, eb_dim))

        idx = 0
        for group_length in fea_groups:
            block_before_sum = mx.sym.slice_axis(user_input,
                                                 axis=1,
                                                 begin=idx,
                                                 end=idx + group_length)
            block = mx.sym.sum_axis(block_before_sum, axis=1) / group_length
            if idx == 0:
                grouped_user_input = block
            else:
                grouped_user_input = mx.sym.concat(grouped_user_input,
                                                   block,
                                                   dim=1)
            idx += group_length

        indicator = mx.symbol.BlockGrad(indicator)
        label = mx.symbol.BlockGrad(label)
        grouped_user_input_after_take = grouped_user_input

        net_version = "e"
        layer_arr = []
        layer1 = mx_dnn_layer(10 * eb_dim,
                              128,
                              active_op=active_op,
                              use_batch_norm=use_batch_norm,
                              version="%d_%s" % (1, net_version))
        layer_arr.append(layer1)
        layer2 = mx_dnn_layer(128,
                              64,
                              active_op=active_op,
                              use_batch_norm=use_batch_norm,
                              version="%d_%s" % (2, net_version))
        layer_arr.append(layer2)
        layer3 = mx_dnn_layer(64,
                              24,
                              active_op='',
                              use_batch_norm=False,
                              version="%d_%s" % (3, net_version))
        layer_arr.append(layer3)

        layer_data = [grouped_user_input_after_take]
        for layer in layer_arr:
            layer_data.append(layer.call(layer_data[-1]))
        dout = layer_data[-1]

        inner_product = mx.sym.sum(dout * unit_id_emb, axis=1)

        softmax_input = mx.sym.Reshape(inner_product,
                                       shape=(bs / sample_num, sample_num))

        # 用正例的label减1作为softmax的label
        ph_label_click = mx.sym.slice_axis(label, axis=1, begin=1, end=2)
        ph_label_click = mx.sym.reshape(
            ph_label_click, shape=(bs / sample_num, sample_num)) - 1
        ph_label_click = mx.sym.slice_axis(ph_label_click,
                                           axis=1,
                                           begin=0,
                                           end=1)
        ph_label_click = mx.sym.reshape(ph_label_click,
                                        shape=(bs / sample_num, ))

        prop = mx.symbol.SoftmaxOutput(data=softmax_input,
                                       label=ph_label_click,
                                       normalization='valid',
                                       use_ignore=True)

        positive_prop = mx.sym.slice_axis(prop, axis=1, begin=0, end=1)
        positive_prop = mx.sym.reshape(positive_prop,
                                       shape=(bs / sample_num, ))

        # 实际的有效样本数量是(bs/sample_num)减去需要ignore的label数量
        loss = -mx.sym.sum(mx.symbol.log(positive_prop)) / (
            bs / sample_num + mx.sym.sum(ph_label_click))

        user_vector = mx.sym.reshape(dout,
                                     shape=(bs / sample_num, sample_num,
                                            eb_dim))
        user_vector = mx.sym.slice_axis(user_vector, axis=1, begin=0, end=1)
        user_vector = mx.sym.reshape(user_vector,
                                     shape=(bs / sample_num, eb_dim))

        return prop, loss, mx.sym.BlockGrad(user_vector)

    if is_training:
        re = dnn_model_define(emb, batch["indicators"][0], unit_id_expand_emb,
                              batch["label"], data_io._batch_size, emb_dim,
                              600, '20,20,10,10,2,2,2,1,1,1')
    else:
        re = dnn_model_define(emb, batch["indicators"][0], unit_id_expand_emb,
                              batch["label"], data_io._batch_size, emb_dim, 1,
                              '20,20,10,10,2,2,2,1,1,1')
    prop = re[0]
    loss = re[1]

    if is_training:
        train_op = xdl.Adam(learning_rate=intconf('learning_rate')).optimize()
    else:
        user_vector = re[2]

    hooks = []
    if is_training:
        if conf("train_mode") == "sync":
            hooks.append(
                xdl.SyncRunHook(xdl.get_task_index(), xdl.get_task_num()))
        if xdl.get_task_index() == 0:
            ckpt_hook = xdl.CheckpointHook(intconf('save_checkpoint_interval'))
            hooks.append(ckpt_hook)
        log_hook = xdl.LoggerHook([loss], "#### loss:{0}")
    else:
        log_hook = xdl.LoggerHook([loss], "#### loss:{0}")
    hooks.append(log_hook)

    from xdl.python.training.training_utils import get_global_step
    global_step = get_global_step()

    sess = xdl.TrainSession(hooks)

    elapsed_time = 0.
    statis_begin_loop = 200
    loop_num = 0

    if not is_training:
        urun_re = iop.urun({"get_level_ids": key_value["start_sample_layer"]})
        item_num = len(urun_re)
        item_ids = np.array([int(iid) for iid in urun_re.keys()],
                            dtype=np.int64).reshape((item_num, 1))
        print 'item_ids shape: '
        print item_ids.shape
        zeros = np.zeros((item_num, 1), dtype=np.int64)
        hash_ids = np.concatenate((zeros, item_ids), axis=1)
        item_embeddings = xdl.execute(
            xdl.ps_sparse_pull_op(hash_ids,
                                  var_name="item_emb",
                                  var_type="hash",
                                  save_ratio=1.0,
                                  otype=xdl.DataType.float))
        item_embeddings = item_embeddings.transpose()
        print 'item_embeddings shape: '
        print item_embeddings.shape

        hit_num_list = []
        precision_list = []
        recall_list = []
        gt_num_list = []
        user_idx = 1

    while not sess.should_stop():
        print ">>>>>>>>>>>> %d >>>>>>>>>>>" % loop_num
        begin_time = time.time()
        for itr in xrange(200):
            if is_training:
                result = sess.run(
                    [train_op, xdl.get_collection(xdl.UPDATE_OPS)])
            else:
                result = sess.run(
                    [user_vector, global_step.value, gt_ids, gt_segments])
            if result is None:
                print "result is None, finished success."
                break
            if not is_training:
                print "global_step =", result[1]
                batch_uv = result[0]
                batch_gt = result[2]
                batch_seg = result[3]

                batch_uv = batch_uv[0:len(batch_seg)]
                batch_scores = np.matmul(batch_uv, item_embeddings)

                sorted_idx = np.argsort(-batch_scores, axis=1)

                sorted_idx = sorted_idx[:, :int(
                    key_value["pr_test_final_layer_retrieve_num"])]
                gt_id_start_idx = 0
                for i in xrange(len(batch_seg)):
                    pred_set = set(item_ids[sorted_idx[i, :], 0])
                    gt_dict = {}
                    for gt in batch_gt[gt_id_start_idx:batch_seg[i], 1]:
                        if gt in gt_dict:
                            gt_dict[gt] += 1
                        else:
                            gt_dict[gt] = 1

                    test_gt_list = batch_gt[gt_id_start_idx:batch_seg[i],
                                            1].tolist()
                    test_gt_str = ','.join(
                        [str(gtid) for gtid in test_gt_list])
                    test_pred_list = item_ids[sorted_idx[i, :], 0].tolist()
                    test_pred_str = ','.join(
                        [str(gtid) for gtid in test_pred_list])

                    user_idx += 1

                    gt_set = set(batch_gt[gt_id_start_idx:batch_seg[i], 1])
                    comm_set = gt_set.intersection(pred_set)

                    hit_num = sum([
                        float(gt_dict[item]) if item in gt_dict else 0.0
                        for item in comm_set
                    ])
                    hit_num_list.append(hit_num)

                    if len(pred_set) > 0:
                        precision = hit_num / len(pred_set)
                    else:
                        precision = 0.0

                    if len(gt_dict) > 0:
                        recall = hit_num / (batch_seg[i] - gt_id_start_idx)
                    else:
                        recall = 0.0

                    precision_list.append(precision)
                    recall_list.append(recall)
                    gt_num_list.append(float(batch_seg[i] - gt_id_start_idx))

                    gt_id_start_idx = batch_seg[i]

                print "=================================================="
                print 'predicted user num is: %d' % len(hit_num_list)
                print 'gt num is: %f' % sum(gt_num_list)
                print 'precision: %f' % (sum(precision_list) /
                                         len(hit_num_list))
                print 'recall: %f' % (sum(recall_list) / len(hit_num_list))
                print 'global recall: %f' % (sum(hit_num_list) /
                                             sum(gt_num_list))
                print "=================================================="

            loop_num += 1
        if loop_num > statis_begin_loop:
            elapsed_time += time.time() - begin_time
            #print 'batch_size = %d, qps = %f batch/s' % (data_io._batch_size, (loop_num - statis_begin_loop) / elapsed_time)

    if not is_training:
        print "=================================================="
        print 'predicted user num is: %d' % len(hit_num_list)
        print 'gt num is: %f' % sum(gt_num_list)
        print 'precision: %f' % (sum(precision_list) / len(hit_num_list))
        print 'recall: %f' % (sum(recall_list) / len(hit_num_list))
        print 'global recall: %f' % (sum(hit_num_list) / sum(gt_num_list))
        print "=================================================="

    if is_training:
        xdl.execute(
            xdl.ps_synchronize_leave_op(
                np.array(xdl.get_task_index(), dtype=np.int32)))
        if xdl.get_task_index() == 0:
            print 'start put item_emb'

            def _string_to_int8(src):
                return np.array([ord(ch) for ch in src], dtype=np.int8)

            from xdl.python.utils.config import get_ckpt_dir
            output_dir = conf('model_url')
            op = xdl.ps_convert_ckpt_variable_op(
                checkpoint_dir=_string_to_int8(get_ckpt_dir()),
                output_dir=_string_to_int8(output_dir),
                variables=_string_to_int8("item_emb"))
            xdl.execute(op)
            shell_cmd("rm -f data/item_emb")
            shell_cmd("hadoop fs -get %s/item_emb data/item_emb" % output_dir)
            shell_cmd("sed -i 's/..//' data/item_emb")
            shell_cmd("hadoop fs -put -f data/item_emb %s" % output_dir)
            print 'finish put item_emb'
Ejemplo n.º 22
0
def run(is_training, files):

    data_io = reader("esmm", files, 2, batch_size, 2, user_fn, ad_fn)
    batch = data_io.read()

    user_embs = list()
    for fn in user_fn:
        emb = xdl.embedding('u_' + fn,
                            batch[fn],
                            xdl.TruncatedNormal(stddev=0.001),
                            embed_size,
                            1000,
                            'sum',
                            vtype='hash')
        user_embs.append(emb)

    ad_embs = list()
    for fn in ad_fn:
        emb = xdl.embedding('a_' + fn,
                            batch[fn],
                            xdl.TruncatedNormal(stddev=0.001),
                            embed_size,
                            1000,
                            'sum',
                            vtype='hash')
        ad_embs.append(emb)

    var_list = model(is_training)(ad_embs, user_embs, batch["indicators"][0],
                                  batch["label"])
    keys = [
        'loss', 'ctr_prop', 'ctcvr_prop', 'cvr_prop', 'ctr_label',
        'ctcvr_label', 'cvr_label'
    ]
    run_vars = dict(zip(keys, list(var_list)))

    hooks = []
    if is_training:
        train_op = xdl.Adam(lr).optimize()
        hooks = get_collection(READER_HOOKS)
        if hooks is None:
            hooks = []
        if xdl.get_task_index() == 0:
            ckpt_hook = xdl.CheckpointHook(1000)
            hooks.append(ckpt_hook)

        run_vars.update({None: train_op})

    if is_debug > 1:
        print("=========gradients")
        grads = xdl.get_gradients()
        grads_keys = grads[''].keys()
        grads_keys.sort()
        for key in grads_keys:
            run_vars.update({"grads {}".format(key): grads[''][key]})

    hooks.append(QpsMetricsHook())
    log_format = "lstep[%(lstep)s] gstep[%(gstep)s] " \
                 "lqps[%(lqps)s] gqps[%(gqps)s]"
    hooks.append(MetricsPrinterHook(log_format, 100))

    ckpt = xdl.get_config("checkpoint", "ckpt")
    if ckpt is not None and len(ckpt) > 0:
        if int(xdl.get_task_index()) == 0:
            from xdl.python.training.saver import Saver
            saver = Saver()
            print("restore from %s" % ckpt)
            saver.restore(ckpt)
        else:
            time.sleep(120)

    sess = xdl.TrainSession(hooks)

    if is_training:
        itr = 1
        ctr_auc = Auc('ctr')
        ctcvr_auc = Auc('ctcvr')
        cvr_auc = Auc('cvr')
        while not sess.should_stop():
            print('iter=', itr)
            values = sess.run(run_vars.values())
            if not values:
                continue
            value_map = dict(zip(run_vars.keys(), values))
            print('loss=', value_map['loss'])
            ctr_auc.add(value_map['ctr_prop'], value_map['ctr_label'])
            ctcvr_auc.add(value_map['ctcvr_prop'], value_map['ctcvr_label'])
            cvr_auc.add_with_filter(value_map['cvr_prop'],
                                    value_map['cvr_label'],
                                    np.where(value_map['ctr_label'] == 1))
            itr += 1
        ctr_auc.show()
        ctcvr_auc.show()
        cvr_auc.show()
    else:
        ctr_test_auc = Auc('ctr')
        ctcvr_test_auc = Auc('ctcvr')
        cvr_test_auc = Auc('cvr')
        for i in xrange(test_batch_num):
            print('iter=', i + 1)
            values = sess.run(run_vars.values())
            value_map = dict(zip(run_vars.keys(), values))
            print('test_loss=', value_map['loss'])
            ctr_test_auc.add(value_map['ctr_prop'], value_map['ctr_label'])
            ctcvr_test_auc.add(value_map['ctcvr_prop'],
                               value_map['ctcvr_label'])
            cvr_test_auc.add_with_filter(value_map['cvr_prop'],
                                         value_map['cvr_label'],
                                         np.where(value_map['ctr_label'] == 1))
        ctr_test_auc.show()
        ctcvr_test_auc.show()
        cvr_test_auc.show()