def main(unused_args): model_dir = 'model/' if not os.path.isdir(model_dir): os.makedirs(model_dir) else: shutil.rmtree('model', ignore_errors=True) data_dir = 'data/chart.csv' run_config = tf.estimator.RunConfig(model_dir=model_dir) conf = config.Config() train_idx = 0 train_window = 3000 test_window = 250 raw_data = read_data.read_csv(data_dir) train_data = read_data.slide_window(raw_data, train_idx, train_window) test_data = read_data.slide_window(raw_data, train_idx + train_window, test_window) train_x, train_y = read_data.create_dataset(train_data, conf.input_num, conf.num_step) test_x, test_y = read_data.create_dataset(test_data, conf.input_num, 1) model = RRL.create_model(conf, run_config) RRL.train_model(model, train_x, train_y, conf) actions, rewards = RRL.test_model(model, test_x, test_y, conf) print(rewards[:, -1])
def _plugin_GPU_op_VS_tf_ops(): """ Compare the result of converting to CSR between plugin GPU ops and tf ops. """ print("[INFO]: converting to CSR, plugin GPU vs tf ops..") dataset_names = ['./performance_profile/train.tfrecord'] dataset_gpu = create_dataset(dataset_names=dataset_names, feature_desc=feature_desc, batch_size=batch_size, n_epochs=1, distribute_keys=True, gpu_count=gpu_count, embedding_type=embedding_type, use_which_device='gpu') dataset_tf = CreateDataset(dataset_names=dataset_names, feature_desc=feature_desc, batch_size=batch_size, n_epochs=1, slot_num=26, max_nnz=1, convert_to_csr=True, gpu_count=gpu_count, embedding_type=embedding_type)() dataset_gpu = iter(dataset_gpu) dataset_tf = iter(dataset_tf) for iter_i in range(iterations): row_indices, values, nnz_array_gpu = next(dataset_gpu)[2:5] row_offsets_gpu, value_tensor_gpu, nnz_array_gpu = hugectr_tf_ops.distribute_keys_gpu( row_indices=row_indices, values=values, embedding_name='hugectr_embedding', embedding_type=embedding_type, batch_size=batch_size, slot_num=26, gpu_count=gpu_count, max_nnz=1) row_offsets_tf, value_tensor_tf, nnz_array_tf = next( dataset_tf)[2:5] try: tf.debugging.assert_equal( row_offsets_gpu[:, 0:row_offsets_tf.shape[1]], row_offsets_tf) tf.debugging.assert_equal( value_tensor_gpu[:, 0:value_tensor_tf.shape[1]], value_tensor_tf) tf.debugging.assert_equal(nnz_array_gpu, nnz_array_tf) except tf.errors.InvalidArgumentError as error: raise RuntimeError( "Error in %s, gpu_count %d, batch_size %d." % (embedding_type, gpu_count, batch_size), error.message) print( "[INFO]: For %s and gpu_count: %d, batch_size: %d, iteration: %d results is the same." % (embedding_type, gpu_count, batch_size, iter_i)) hugectr_tf_ops.reset()
def main(args): #---------------- feature description for criteo dataset in tfrecord. ---------- # cols = [ utils.idx2key(idx, False) for idx in range(0, utils.NUM_TOTAL_COLUMNS) ] feature_desc = dict() for col in cols: if col == 'label' or col.startswith("I"): feature_desc[col] = tf.io.FixedLenFeature([], tf.int64) # scaler else: feature_desc[col] = tf.io.FixedLenFeature( [1], tf.int64) # [slot_num, nnz] # -------------- create dataset pipeline --------------------------------------- # dataset_names = [args.data_path + "/train_0.tfrecord"] dataset = create_dataset(dataset_names=dataset_names, feature_desc=feature_desc, batch_size=args.batch_size, n_epochs=args.n_epochs, distribute_keys=tf.constant(True, dtype=tf.bool), gpu_count=len(args.gpus), embedding_type=tf.constant('distributed', dtype=tf.string)) # ----------- build model and optimizers ---------------------------------------- # optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False) model = DeepFM_PluginEmbedding(vocabulary_size=args.vocabulary_size, embedding_vec_size=args.embedding_vec_size, which_embedding="Plugin", embedding_type="distributed", dropout_rate=[0.5] * 10, deep_layers=[1024] * 10, initializer='uniform', gpus=args.gpus, batch_size=args.batch_size, batch_size_eval=args.batch_size_eval, slot_num=args.slot_num) # ----------- define train step ------------------------------------------------- # @tf.function def _train_step(dense_batch, sparse_batch, label_batch, model, loss_fn, optimizer): with tf.GradientTape() as tape: label_batch = tf.cast(label_batch, dtype=tf.float32) logits = model(dense_batch, sparse_batch, training=True) loss = loss_fn(label_batch, logits) loss /= dense_batch.shape[0] grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) return loss # ------------------------ training loop ---------------------------------------- # logging.info("Begin to train..") begin_time = time.time() display_begin = begin_time for step, datas in enumerate(dataset): label, dense, sparse = datas[0], datas[1], datas[2:-1] train_loss = _train_step(dense, sparse, label, model, loss_fn, optimizer) loss_v = train_loss.numpy() if (step % args.display == 0 and step != 0): display_end = time.time() logging.info("step: %d, loss: %.7f, elapsed time: %.5f seconds." % (step, loss_v, (display_end - display_begin))) display_begin = display_end end_time = time.time() logging.info("Train end. Elapsed time: %.3f seconds." % (end_time - begin_time))
def main(args): cols = [utils.idx2key(idx, False) for idx in range(0, utils.NUM_TOTAL_COLUMNS)] feature_desc = dict() for col in cols: if col == 'label' or col.startswith("I"): feature_desc[col] = tf.io.FixedLenFeature([], tf.int64) # scaler else: feature_desc[col] = tf.io.FixedLenFeature([1], tf.int64) # [slot_num, nnz] # dataset_names = ["train_" + str(i) + ".tfrecord" for i in range(10)] dataset_names = ["train.tfrecord"] dataset = create_dataset(dataset_names=dataset_names, feature_desc=feature_desc, batch_size=args.batch_size, n_epochs=args.n_epochs, distribute_keys=tf.constant(args.distribute_keys != 0, dtype=tf.bool), gpu_count=len(args.gpus), embedding_type=tf.constant(args.embedding_type, dtype=tf.string)) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False) if args.which_embedding == "OriginalEmbedding": model = DeepFM_OriginalEmbedding(vocabulary_size=args.vocabulary_size, embedding_vec_size=args.embedding_vec_size, which_embedding=args.which_embedding, embedding_type=args.embedding_type, dropout_rate=[0.5] * 10, deep_layers=[1024] * 10, initializer='uniform', gpus=args.gpus, batch_size=args.batch_size, batch_size_eval=args.batch_size_eval, slot_num=args.slot_num) elif args.which_embedding == "PluginEmbedding": model = DeepFM_PluginEmbedding(vocabulary_size=args.vocabulary_size, embedding_vec_size=args.embedding_vec_size, which_embedding=args.which_embedding, embedding_type=args.embedding_type, dropout_rate=[0.5] * 10, deep_layers=[1024] * 10, initializer='uniform', gpus=args.gpus, batch_size=args.batch_size, batch_size_eval=args.batch_size_eval, slot_num=args.slot_num) @tf.function def _train_step(dense_batch, sparse_batch, y_batch, model, loss_fn, optimizer): with tf.GradientTape(persistent=False) as tape: y_batch = tf.cast(y_batch, dtype=tf.float32) logits = model(dense_batch, sparse_batch, training=True) loss = loss_fn(y_batch, logits) grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) return loss logging.info("begin to train.") begin_time = time.time() train_loss_list = [] display_begin = begin_time # with tf.profiler.experimental.Profile("./origin_1030"): for step, items in enumerate(dataset): label, dense, others = items[0], items[1], items[2:] if (tf.convert_to_tensor(args.distribute_keys != 0, dtype=tf.bool)): sparse = others[0:3] else: sparse = others[-1] train_loss = _train_step(dense, sparse, label, model, loss_fn, optimizer) loss_value = train_loss.numpy() train_loss_list.append(loss_value) if (step % args.display == 0 and step != 0): display_end = time.time() logging.info("step: %d, loss: %.5f, elapsed time: %.5f seconds." %(step, loss_value, (display_end - display_begin))) display_begin = display_end if step >= 50: break end_time = time.time() logging.info("Train End. Elapsed Time: %.3f seconds." %(end_time - begin_time))