def _wrapper(*inputs, **kwargs): forwards = forward_inputs if isinstance(forward_inputs, list) else [forward_inputs] backwards = backward_inputs if isinstance(backward_inputs, list) else [backward_inputs] with xdl.model_scope("ams_gear_forward"): with tf.variable_scope(name_or_scope='', reuse=tf.AUTO_REUSE): forward_results = tf_wrapper(is_training=False, **tf_args)(gear_fn)(*(forwards + list(inputs[1:])), **kwargs) with xdl.model_scope("ams_gear_backward"): with tf.variable_scope(name_or_scope='', reuse=tf.AUTO_REUSE): _ = tf_wrapper(init_grad=init_grad, **tf_args)(gear_fn)(*(backwards + list(inputs[1:])), **kwargs) return forward_results
def train(train_file=train_file, test_file=test_file, uid_voc=uid_voc, mid_voc=mid_voc, cat_voc=cat_voc, item_info=item_info, reviews_info=reviews_info, batch_size=128, maxlen=100, test_iter=700): model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, LIGHT_EMBEDDING_DIM, LIGHT_HIDDEN_SIZE, LIGHT_ATTENTION_SIZE, use_rocket_training=use_rocket_training()) sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc, item_info, reviews_info, batch_size, maxlen, embedding_dim=EMBEDDING_DIM, light_embedding_dim=LIGHT_EMBEDDING_DIM) with xdl.model_scope('train'): train_ops = model.build_final_net(EMBEDDING_DIM, LIGHT_EMBEDDING_DIM, sample_io) lr = 0.001 # Adam Adagrad train_ops.append(xdl.Adam(lr).optimize()) hooks = [] log_format = "[%(time)s] lstep[%(lstep)s] gstep[%(gstep)s] lqps[%(lqps)s] gqps[%(gqps)s] loss[%(loss)s]" hooks = [QpsMetricsHook(), MetricsPrinterHook(log_format)] if xdl.get_task_index() == 0: hooks.append( xdl.CheckpointHook( xdl.get_config('checkpoint', 'save_interval'))) train_sess = xdl.TrainSession(hooks=hooks) with xdl.model_scope('test'): test_ops = model.build_final_net(EMBEDDING_DIM, LIGHT_EMBEDDING_DIM, sample_io, is_train=False) test_sess = xdl.TrainSession() model.run(train_ops, train_sess, test_ops, test_sess, test_iter=test_iter)
def _wrapper(*inputs, **kwargs): forwards = forward_inputs if isinstance( forward_inputs, list) else [forward_inputs] backwards = backward_inputs if isinstance( backward_inputs, list) else [backward_inputs] with xdl.model_scope("ams_gear_forward"): forward_results = mxnet_wrapper( is_training=False, device_type=device_type)(gear_fn)( *(forwards + list(inputs[1:])), **kwargs) with xdl.model_scope("ams_gear_backward"): _ = mxnet_wrapper(init_grad=init_grad, device_type=device_type)(gear_fn)( *(backwards + list(inputs[1:])), **kwargs) return forward_results
def train(): if model_type == 'din_mogujie': model = Model_DIN_MOGUJIE(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, False, train_file, batch_size) else: raise Exception('only support din_mogujie and dien') #data set with xdl.model_scope('train'): train_ops = model.build_network() lr = 0.001 # Adam Adagrad train_ops.append(xdl.Adam(lr).optimize()) hooks = [] log_format = "[%(time)s] lstep[%(lstep)s] gstep[%(gstep)s] lqps[%(lqps)s] gqps[%(gqps)s] loss[%(loss)s]" hooks = [QpsMetricsHook(), MetricsPrinterHook(log_format)] if xdl.get_task_index() == 0: hooks.append(xdl.CheckpointHook(save_interval)) train_sess = xdl.TrainSession(hooks=hooks) """ with xdl.model_scope('test'): test_ops = model.build_network( EMBEDDING_DIM, is_train=False) test_sess = xdl.TrainSession() """ model.run(train_ops, train_sess)
def run(name1, name2, scope): with xdl.model_scope(scope): labels = xdl.mock_dense_op(shape=[1, 1], value=1.0) mock_embs = mock_embedding(name1, name2) loss = model(mock_embs, labels) train_op = xdl.SGD(lr).optimize() hooks = [] sess = xdl.TrainSession(hooks) run_ops = [train_op, loss] op_names = ['none', 'loss'] embed_vars = [ var for var in trainable_variables() if is_embedding_var(var) ] sparse_embed_grads = [] for var in embed_vars: sparse_embed_grads.append(xdl.get_sparse_grads(var.name)) op_names.append(var.name + '.indices') op_names.append(var.name + '.grads') for i in range(len(sparse_embed_grads)): run_ops.append(sparse_embed_grads[i].indices) run_ops.append(sparse_embed_grads[i].grad) var_list = sess.run(run_ops) if name1 != name2: return var_list[3], var_list[5] return var_list[3]
def train(): images, labels = xdl.py_func(read_train, [], output_type=[np.float32, np.float32]) images_test, labels_test = xdl.py_func( read_test, [], output_type=[np.float32, np.float32]) with xdl.model_scope('train'): loss = model(images, labels) train_op = xdl.Adagrad(0.5).optimize() train_sess = xdl.TrainSession() with xdl.model_scope('test'): accuracy = eval_model(images_test, labels_test) eval_sess = xdl.TrainSession() for _ in range(100): for _ in range(1000): train_sess.run(train_op) print("accuracy %s" % eval_sess.run(accuracy))
def gear(): forward = xdl.mock_dense_op(shape=[1, 16], value=0.01, name_="forward") backward = xdl.mock_dense_op(shape=[1, 16], value=0.02, name_="backward") labels = xdl.mock_dense_op(shape=[1, 1], value=1.0, name_="label1") init_grad = xdl.mock_dense_op(shape=[1, 1], value=0.3, name_="init_grad") forward.set_shape([1, 16]) backward.set_shape([1, 16]) labels.set_shape([1, 1]) init_grad.set_shape([1, 1]) predict = ams_gear([forward], [backward], init_grad)(gear_model)(None) with xdl.model_scope("ams_gear_forward"): sess = xdl.TrainSession() prediction = sess.run(predict) with xdl.model_scope("ams_gear_backward"): grads = xdl.get_gradient("fc_weight") sess = xdl.TrainSession() fc_weight_grad = sess.run(grads) return prediction, fc_weight_grad
def train(train_file=train_file, test_file=test_file, uid_voc=uid_voc, mid_voc=mid_voc, cat_voc=cat_voc, item_info=item_info, reviews_info=reviews_info, batch_size=128, maxlen=100, test_iter=700): if xdl.get_config('model') == 'din': model = Model_DIN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif xdl.get_config('model') == 'dien': model = Model_DIEN(EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: raise Exception('only support din and dien') sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc, item_info, reviews_info, batch_size, maxlen, EMBEDDING_DIM) with xdl.model_scope('train'): train_ops = model.build_final_net(EMBEDDING_DIM, sample_io) lr = 0.001 # Adam Adagrad train_ops.append(xdl.Adam(lr).optimize()) hooks = [] log_format = "[%(time)s] lstep[%(lstep)s] gstep[%(gstep)s] lqps[%(lqps)s] gqps[%(gqps)s] loss[%(loss)s]" hooks = [QpsMetricsHook(), MetricsPrinterHook(log_format)] if xdl.get_task_index() == 0: hooks.append( xdl.CheckpointHook( xdl.get_config('checkpoint', 'save_interval'))) train_sess = xdl.TrainSession(hooks=hooks) with xdl.model_scope('test'): test_ops = model.build_final_net(EMBEDDING_DIM, sample_io, is_train=False) test_sess = xdl.TrainSession() print('=' * 10 + 'start train' + '=' * 10) model.run(train_ops, train_sess, test_ops, test_sess, test_iter=test_iter)
def main(): dense = xdl.mock_dense_op(shape=[1, 16], value=0.01, name_="dense") gear = xdl.mock_dense_op(shape=[1, 1], value=0.01, name_="gear") labels = xdl.mock_dense_op(shape=[1, 1], value=1.0, name_="label") gear.set_shape([1, 1]) dense.set_shape([1, 16]) labels.set_shape([1, 1]) with xdl.model_scope("ams_main"): loss = ams_main(main_model)(dense, labels, gear_inputs=[gear]) sess = xdl.TrainSession() return sess.run([xdl.get_collection("gear_grad")])
def ad_graph_train(forward, backward, grads): server_size = xdl.current_env().model_server_size(xdl.current_env().task_name()) server_id = xdl.current_env().task_id() forward_img = xdl.hdfs_data_source_op(forward, img_data_source, server_size, server_id, 4096, xdl.DataType.float) backward_img = xdl.hdfs_data_source_op(backward, img_data_source, server_size, server_id, 4096, xdl.DataType.float) forward_img.set_shape([None, 4096]) backward_img.set_shape([None, 4096]) grads.set_shape([None, 12]) predict = ams_gear([forward_img], [backward_img], grads)(ad_graph_model)(None)[0] with xdl.model_scope("ams_gear_backward"): optimizer = xdl.Adam(0.0005).optimize(update_global_step=False) return predict, optimizer
def train(train_file=train_file, test_file=test_file, uid_voc=uid_voc, mid_voc=mid_voc, cat_voc=cat_voc, item_info=item_info, reviews_info=reviews_info, batch_size=128, maxlen=100, test_iter=700): if xdl.get_config('model') == 'din': model = Model_DIN( EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) elif xdl.get_config('model') == 'dien': model = Model_DIEN( EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) else: raise Exception('only support din and dien') sample_io = SampleIO(train_file, test_file, uid_voc, mid_voc, cat_voc, item_info, reviews_info, batch_size, maxlen, EMBEDDING_DIM) with xdl.model_scope('train'): train_ops = model.build_final_net(EMBEDDING_DIM, sample_io) lr = 0.001 # Adam Adagrad train_ops.append(xdl.Adam(lr).optimize()) hooks = [] log_format = "[%(time)s] lstep[%(lstep)s] gstep[%(gstep)s] lqps[%(lqps)s] gqps[%(gqps)s] loss[%(loss)s]" hooks = [QpsMetricsHook(), MetricsPrinterHook(log_format)] if xdl.get_task_index() == 0: hooks.append(xdl.CheckpointHook(xdl.get_config('checkpoint', 'save_interval'))) train_sess = xdl.TrainSession(hooks=hooks) with xdl.model_scope('test'): test_ops = model.build_final_net( EMBEDDING_DIM, sample_io, is_train=False) test_sess = xdl.TrainSession() model.run(train_ops, train_sess, test_ops, test_sess, test_iter=test_iter)
def train(): images, labels = xdl.py_func(read_train, [], output_type=[np.float32, np.float32]) images_test, labels_test = xdl.py_func( read_test, [], output_type=[np.float32, np.float32]) with xdl.model_scope('train'): loss = model(images, labels) train_op = xdl.Adagrad(0.5).optimize() if xdl.get_task_index() == 0: ckpt_hook = xdl.CheckpointHook(1000) train_sess = xdl.TrainSession(hooks=[ckpt_hook]) else: train_sess = xdl.TrainSession() with xdl.model_scope('test'): accuracy = eval_model(images_test, labels_test) eval_sess = xdl.TrainSession() for _ in range(100): for _ in range(1000): train_sess.run(train_op) print("accuracy %s" % eval_sess.run(accuracy))
def train(): images, labels = xdl.py_func(read_train, [], output_type=[ np.float32, np.float32]) images_test, labels_test = xdl.py_func( read_test, [], output_type=[np.float32, np.float32]) with xdl.model_scope('train'): loss = model(images, labels) train_op = xdl.Adagrad(0.5).optimize() if xdl.get_task_index() == 0: ckpt_hook = xdl.CheckpointHook(1000) train_sess = xdl.TrainSession(hooks=[ckpt_hook]) else: train_sess = xdl.TrainSession() with xdl.model_scope('test'): accuracy = eval_model(images_test, labels_test) eval_sess = xdl.TrainSession() for _ in range(100): for _ in range(1000): train_sess.run(train_op) print("accuracy %s" % eval_sess.run(accuracy))
def main(): dense = xdl.mock_dense_op(shape=[1, 16], value=0.01, name_="dense") gear = xdl.mock_dense_op(shape=[1, 1], value=0.01, name_="gear") labels = xdl.mock_dense_op(shape=[1, 1], value=1.0, name_="label") ids, values, segments = xdl.mock_sparse_op(dense_shape=[1, 16], name_="wide") sparse = xdl.SparseTensor(ids, values, segments) emb = xdl.embedding("sparse", sparse, xdl.Ones(), 1, 16, 'sum') gear.set_shape([None, 1]) dense.set_shape([None, 16]) labels.set_shape([None, 1]) with xdl.model_scope("ams_main"): loss = ams_main(main_model)(dense, emb, labels, gear_inputs=[gear]) sess = xdl.TrainSession() return sess.run(xdl.get_collection("gear_grad"))
def user_graph_train(forward, backward, grads): server_size = xdl.current_env().model_server_size( xdl.current_env().task_name()) server_id = xdl.current_env().task_id() forward_img = xdl.hdfs_data_source_op(forward, img_data_source, server_size, server_id, 4096, xdl.DataType.float) backward_img = xdl.hdfs_data_source_op(backward, img_data_source, server_size, server_id, 4096, xdl.DataType.float) forward_img.set_shape([None, 4096]) backward_img.set_shape([None, 4096]) grads.set_shape([None, 12]) predict = ams_gear([forward_img], [backward_img], grads)(user_graph_model)(None)[0] with xdl.model_scope("ams_gear_backward"): optimizer = xdl.Adam(0.0005).optimize(update_global_step=False) return predict, optimizer
def run(name1, name2, scope, optimizer): with xdl.model_scope(scope): labels = xdl.mock_dense_op(shape=[1, 1], value=1.0) mock_embs = mock_embedding(name1, name2) loss = model(mock_embs, labels) if optimizer == 'sgd': train_op = xdl.SGD(0.5).optimize() elif optimizer == 'momentum': train_op = xdl.Momentum(0.005, 0.99).optimize() elif optimizer == 'ftrl': train_op = xdl.Ftrl(0.01).optimize() elif optimizer == 'adam': train_op = xdl.Adam(0.001).optimize() elif optimizer == 'adagrad': train_op = xdl.Adagrad(0.04, 0.1).optimize() elif optimizer == 'rmsprop': train_op = xdl.RMSProp(0.001).optimize() else: train_op = xdl.SGD(0.5).optimize() hooks = [] sess = xdl.TrainSession(hooks) run_ops = [train_op, loss] op_names = ['none', 'loss'] embed_vars = [ var for var in trainable_variables_with_scope(scope) if is_embedding_var(var) ] sparse_embed_grads = [] for var in embed_vars: sparse_embed_grads.append(xdl.get_sparse_grads(var.name)) op_names.append(var.name + '.indices') op_names.append(var.name + '.grads') for i in range(len(sparse_embed_grads)): run_ops.append(sparse_embed_grads[i].indices) run_ops.append(sparse_embed_grads[i].grad) var_list = sess.run(run_ops) if name1 != name2: return var_list[3], var_list[5] return var_list[3]