def main(): # Define training data x = np.ones(FLAGS.batch_size) y = np.ones(FLAGS.batch_size) # Define the model X = tf.placeholder(tf.float32, shape=[None]) Y = tf.placeholder(tf.float32, shape=[None]) w = tf.Variable(1.0, name="weight") b = tf.Variable(1.0, name="bias") loss = tf.square(Y - tf.mul(X, w) - b) train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss) predict_op = tf.mul(X, w) + b saver = tf.train.Saver() checkpoint_dir = FLAGS.checkpoint_dir checkpoint_file = checkpoint_dir + "/checkpoint.ckpt" if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) # Start the session with tf.Session() as sess: sess.run(tf.initialize_all_variables()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Continue training from the model {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) # Start training start_time = time.time() for epoch in range(FLAGS.epoch_number): sess.run(train_op, feed_dict={X: x, Y: y}) # Start validating if epoch % FLAGS.steps_to_validate == 0: end_time = time.time() print("[{}] Epoch: {}".format(end_time - start_time, epoch)) saver.save(sess, checkpoint_file) start_time = end_time # Print model variables w_value, b_value = sess.run([w, b]) print("The model of w: {}, b: {}".format(w_value, b_value)) # Export the model print("Exporting trained model to {}".format(FLAGS.model_path)) model_exporter = exporter.Exporter(saver) model_exporter.init( sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({"features": X}), 'outputs': exporter.generic_signature({"prediction": predict_op}) }) model_exporter.export(FLAGS.model_path, tf.constant(FLAGS.export_version), sess) print('Done exporting!')
def export_model(sess, inputs_signature, outputs_signature): # Export the model for generic inference service print("Exporting trained model to {}".format(FLAGS.model_path)) saver = tf.train.Saver(sharded=True) model_exporter = exporter.Exporter(saver) model_exporter.init( sess.graph.as_graph_def(), named_graph_signatures={ "inputs": exporter.generic_signature(inputs_signature), "outputs": exporter.generic_signature(outputs_signature) }) model_exporter.export(FLAGS.model_path, tf.constant(FLAGS.model_version), sess) print("Done exporting!")
def main(_): if len(sys.argv) < 2 or sys.argv[-1].startswith('-'): print('Usage: mnist_export.py [--training_iteration=x] ' '[--export_version=y] export_dir') sys.exit(-1) if FLAGS.training_iteration <= 0: print 'Please specify a positive value for training iteration.' sys.exit(-1) if FLAGS.export_version <= 0: print 'Please specify a positive value for version number.' sys.exit(-1) # Train model print 'Training model...' mnist = mnist_input_data.read_data_sets(FLAGS.work_dir, one_hot=True) sess = tf.InteractiveSession() x = tf.placeholder('float', shape=[None, 784]) y_ = tf.placeholder('float', shape=[None, 10]) w = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) sess.run(tf.initialize_all_variables()) y = tf.nn.softmax(tf.matmul(x, w) + b) cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) for _ in range(FLAGS.training_iteration): batch = mnist.train.next_batch(50) train_step.run(feed_dict={x: batch[0], y_: batch[1]}) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) print 'training accuracy %g' % sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}) print 'Done training!' # Export model # WARNING(break-tutorial-inline-code): The following code snippet is # in-lined in tutorials, please update tutorial documents accordingly # whenever code changes. export_path = sys.argv[-1] print 'Exporting trained model to', export_path saver = tf.train.Saver(sharded=True) model_exporter = exporter.Exporter(saver) model_exporter.init( sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({'images': x}), 'outputs': exporter.generic_signature({'scores': y})}) model_exporter.export(export_path, tf.constant(FLAGS.export_version), sess) print 'Done exporting!'
def generic_signature_fn(examples, unused_features, predictions): """Creates generic signature from given examples and predictions. This is needed for backward compatibility with default behaviour of export_estimator. Args: examples: `Tensor`. unused_features: `dict` of `Tensor`s. predictions: `Tensor` or `dict` of `Tensor`s. Returns: Tuple of default signature and empty named signatures. Raises: ValueError: If examples is `None`. """ if examples is None: raise ValueError('examples cannot be None when using this signature fn.') tensors = {'inputs': examples} if not isinstance(predictions, dict): predictions = {'outputs': predictions} tensors.update(predictions) default_signature = exporter.generic_signature(tensors) return default_signature, {}
def Export(): export_path = "/tmp/half_plus_two" with tf.Session() as sess: # Make model parameters a&b variables instead of constants to # exercise the variable reloading mechanisms. a = tf.Variable(0.5) b = tf.Variable(2.0) # Calculate, y = a*x + b # here we use a placeholder 'x' which is fed at inference time. x = tf.placeholder(tf.float32) y = tf.add(tf.mul(a, x), b) # Run an export. tf.initialize_all_variables().run() export = exporter.Exporter(tf.train.Saver()) export.init(named_graph_signatures={ "inputs": exporter.generic_signature({"x": x}), "outputs": exporter.generic_signature({"y": y}) }) export.export(export_path, tf.constant(123), sess)
def generic_signature_fn(examples, unused_features, predictions): """Creates generic signature from given examples and predictions. This is needed for backward compatibility with default behaviour of export_estimator. Args: examples: `Tensor`. unused_features: `dict` of `Tensor`s. predictions: `dict` of `Tensor`s. Returns: Tuple of default signature and named signature. """ tensors = {'inputs': examples} if not isinstance(predictions, dict): predictions = {'outputs': predictions} tensors.update(predictions) default_signature = exporter.generic_signature(tensors) return default_signature, {}
def main(): # Get hyperparameters if FLAGS.enable_colored_log: import coloredlogs coloredlogs.install() logging.basicConfig(level=logging.INFO) INPUT_FILE_FORMAT = FLAGS.input_file_format if INPUT_FILE_FORMAT not in ["tfrecord", "csv"]: logging.error("Unknow input file format: {}".format(INPUT_FILE_FORMAT)) exit(1) FEATURE_SIZE = FLAGS.feature_size LABEL_SIZE = FLAGS.label_size EPOCH_NUMBER = FLAGS.epoch_number if EPOCH_NUMBER <= 0: EPOCH_NUMBER = None BATCH_THREAD_NUMBER = FLAGS.batch_thread_number MIN_AFTER_DEQUEUE = FLAGS.min_after_dequeue BATCH_CAPACITY = BATCH_THREAD_NUMBER * FLAGS.batch_size + MIN_AFTER_DEQUEUE MODE = FLAGS.mode MODEL = FLAGS.model CHECKPOINT_PATH = FLAGS.checkpoint_path if not CHECKPOINT_PATH.startswith("fds://") and not os.path.exists( CHECKPOINT_PATH): os.makedirs(CHECKPOINT_PATH) CHECKPOINT_FILE = CHECKPOINT_PATH + "/checkpoint.ckpt" LATEST_CHECKPOINT = tf.train.latest_checkpoint(CHECKPOINT_PATH) OUTPUT_PATH = FLAGS.output_path if not OUTPUT_PATH.startswith("fds://") and not os.path.exists(OUTPUT_PATH): os.makedirs(OUTPUT_PATH) pprint.PrettyPrinter().pprint(FLAGS.__flags) # Process TFRecoreds files def read_and_decode_tfrecord(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "features": tf.FixedLenFeature([FEATURE_SIZE], tf.float32), }) label = features["label"] features = features["features"] return label, features def read_and_decode_csv(filename_queue): # TODO: Not generic for all datasets reader = tf.TextLineReader() key, value = reader.read(filename_queue) # Default values, in case of empty columns. Also specifies the type of the # decoded result. #record_defaults = [[1], [1], [1], [1], [1]] record_defaults = [[1], [1.0], [1.0], [1.0], [1.0]] col1, col2, col3, col4, col5 = tf.decode_csv( value, record_defaults=record_defaults) label = col1 features = tf.stack([col2, col3, col4, col4]) return label, features # Read TFRecords files for training filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.train_file), num_epochs=EPOCH_NUMBER) if INPUT_FILE_FORMAT == "tfrecord": label, features = read_and_decode_tfrecord(filename_queue) elif INPUT_FILE_FORMAT == "csv": label, features = read_and_decode_csv(filename_queue) batch_labels, batch_features = tf.train.shuffle_batch( [label, features], batch_size=FLAGS.batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) # Read TFRecords file for validatioin validate_filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.validate_file), num_epochs=EPOCH_NUMBER) if INPUT_FILE_FORMAT == "tfrecord": validate_label, validate_features = read_and_decode_tfrecord( validate_filename_queue) elif INPUT_FILE_FORMAT == "csv": validate_label, validate_features = read_and_decode_csv( validate_filename_queue) validate_batch_labels, validate_batch_features = tf.train.shuffle_batch( [validate_label, validate_features], batch_size=FLAGS.validate_batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) # Define the model input_units = FEATURE_SIZE output_units = LABEL_SIZE model_network_hidden_units = [int(i) for i in FLAGS.model_network.split()] def full_connect(inputs, weights_shape, biases_shape, is_train=True): weights = tf.get_variable("weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable("biases", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.matmul(inputs, weights) + biases if FLAGS.enable_bn and is_train: mean, var = tf.nn.moments(layer, axes=[0]) scale = tf.get_variable("scale", biases_shape, initializer=tf.random_normal_initializer()) shift = tf.get_variable("shift", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.nn.batch_normalization(layer, mean, var, shift, scale, FLAGS.bn_epsilon) return layer def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True): layer = full_connect(inputs, weights_shape, biases_shape, is_train) layer = tf.nn.relu(layer) return layer def customized_inference(inputs, is_train=True): hidden1_units = 128 hidden2_units = 32 hidden3_units = 8 with tf.variable_scope("input"): layer = full_connect_relu(inputs, [input_units, hidden1_units], [hidden1_units], is_train) with tf.variable_scope("layer0"): layer = full_connect_relu(layer, [hidden1_units, hidden2_units], [hidden2_units], is_train) with tf.variable_scope("layer1"): layer = full_connect_relu(layer, [hidden2_units, hidden3_units], [hidden3_units], is_train) if FLAGS.enable_dropout and is_train: layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob) with tf.variable_scope("output"): layer = full_connect(layer, [hidden3_units, output_units], [output_units], is_train) return layer def dnn_inference(inputs, is_train=True): with tf.variable_scope("input"): layer = full_connect_relu(inputs, [input_units, model_network_hidden_units[0]], [model_network_hidden_units[0]], is_train) for i in range(len(model_network_hidden_units) - 1): with tf.variable_scope("layer{}".format(i)): layer = full_connect_relu( layer, [model_network_hidden_units[i], model_network_hidden_units[i + 1]], [model_network_hidden_units[i + 1]], is_train) with tf.variable_scope("output"): layer = full_connect(layer, [model_network_hidden_units[-1], output_units], [output_units], is_train) return layer def lr_inference(inputs, is_train=True): with tf.variable_scope("lr"): layer = full_connect(inputs, [input_units, output_units], [output_units]) return layer def wide_and_deep_inference(inputs, is_train=True): return lr_inference(inputs, is_train) + dnn_inference(inputs, is_train) def cnn_inference(inputs, is_train=True): # TODO: Change if validate_batch_size is different # [BATCH_SIZE, 512 * 512 * 1] -> [BATCH_SIZE, 512, 512, 1] inputs = tf.reshape(inputs, [FLAGS.batch_size, 512, 512, 1]) # [BATCH_SIZE, 512, 512, 1] -> [BATCH_SIZE, 128, 128, 8] with tf.variable_scope("conv0"): weights = tf.get_variable("weights", [3, 3, 1, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(inputs, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool(layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME") # [BATCH_SIZE, 128, 128, 8] -> [BATCH_SIZE, 32, 32, 8] with tf.variable_scope("conv1"): weights = tf.get_variable("weights", [3, 3, 8, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(layer, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool(layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME") # [BATCH_SIZE, 32, 32, 8] -> [BATCH_SIZE, 8, 8, 8] with tf.variable_scope("conv2"): weights = tf.get_variable("weights", [3, 3, 8, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(layer, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool(layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME") # [BATCH_SIZE, 8, 8, 8] -> [BATCH_SIZE, 8 * 8 * 8] layer = tf.reshape(layer, [-1, 8 * 8 * 8]) # [BATCH_SIZE, 8 * 8 * 8] -> [BATCH_SIZE, LABEL_SIZE] with tf.variable_scope("output"): weights = tf.get_variable("weights", [8 * 8 * 8, LABEL_SIZE], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [LABEL_SIZE], initializer=tf.random_normal_initializer()) layer = tf.add(tf.matmul(layer, weights), bias) return layer def inference(inputs, is_train=True): if MODEL == "dnn": return dnn_inference(inputs, is_train) elif MODEL == "lr": return lr_inference(inputs, is_train) elif MODEL == "wide_and_deep": return wide_and_deep_inference(inputs, is_train) elif MODEL == "customized": return customized_inference(inputs, is_train) elif MODEL == "cnn": return cnn_inference(inputs, is_train) else: logging.error("Unknown model, exit now") exit(1) logging.info("Use the model: {}, model network: {}".format( MODEL, FLAGS.model_network)) logits = inference(batch_features, True) batch_labels = tf.to_int64(batch_labels) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=batch_labels) loss = tf.reduce_mean(cross_entropy, name="loss") global_step = tf.Variable(0, name="global_step", trainable=False) if FLAGS.enable_lr_decay: logging.info("Enable learning rate decay rate: {}".format( FLAGS.lr_decay_rate)) starter_learning_rate = FLAGS.learning_rate learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100000, FLAGS.lr_decay_rate, staircase=True) else: learning_rate = FLAGS.learning_rate optimizer = get_optimizer(FLAGS.optimizer, learning_rate) train_op = optimizer.minimize(loss, global_step=global_step) tf.get_variable_scope().reuse_variables() # Define accuracy op for train data train_accuracy_logits = inference(batch_features, False) train_softmax = tf.nn.softmax(train_accuracy_logits) train_correct_prediction = tf.equal( tf.argmax(train_softmax, 1), batch_labels) train_accuracy = tf.reduce_mean(tf.cast(train_correct_prediction, tf.float32)) # Define auc op for train data batch_labels = tf.cast(batch_labels, tf.int32) sparse_labels = tf.reshape(batch_labels, [-1, 1]) derived_size = tf.shape(batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(axis=1, values=[indices, sparse_labels]) outshape = tf.stack([derived_size, LABEL_SIZE]) new_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax, new_batch_labels) # Define accuracy op for validate data validate_accuracy_logits = inference(validate_batch_features, False) validate_softmax = tf.nn.softmax(validate_accuracy_logits) validate_batch_labels = tf.to_int64(validate_batch_labels) validate_correct_prediction = tf.equal( tf.argmax(validate_softmax, 1), validate_batch_labels) validate_accuracy = tf.reduce_mean(tf.cast(validate_correct_prediction, tf.float32)) # Define auc op for validate data validate_batch_labels = tf.cast(validate_batch_labels, tf.int32) sparse_labels = tf.reshape(validate_batch_labels, [-1, 1]) derived_size = tf.shape(validate_batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(axis=1, values=[indices, sparse_labels]) outshape = tf.stack([derived_size, LABEL_SIZE]) new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, validate_auc = tf.contrib.metrics.streaming_auc(validate_softmax, new_validate_batch_labels) # Define inference op inference_features = tf.placeholder("float", [None, FEATURE_SIZE]) inference_logits = inference(inference_features, False) inference_softmax = tf.nn.softmax(inference_logits) inference_op = tf.argmax(inference_softmax, 1) keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) model_signature = { "inputs": exporter.generic_signature({"keys": keys_placeholder, "features": inference_features}), "outputs": exporter.generic_signature({"keys": keys, "softmax": inference_softmax, "prediction": inference_op}) } # Initialize saver and summary saver = tf.train.Saver() tf.summary.scalar("loss", loss) tf.summary.scalar("train_accuracy", train_accuracy) tf.summary.scalar("train_auc", train_auc) tf.summary.scalar("validate_accuracy", validate_accuracy) tf.summary.scalar("validate_auc", validate_auc) summary_op = tf.summary.merge_all() init_op = [tf.global_variables_initializer(), tf.local_variables_initializer()] # Create session to run with tf.Session() as sess: logging.info("Start to run with mode: {}".format(MODE)) writer = tf.summary.FileWriter(OUTPUT_PATH, sess.graph) sess.run(init_op) if MODE == "train": # Restore session and start queue runner restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = datetime.datetime.now() try: while not coord.should_stop(): _, loss_value, step = sess.run([train_op, loss, global_step]) # Print state while training if step % FLAGS.steps_to_validate == 0: train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value, summary_value = sess.run( [train_accuracy, train_auc, validate_accuracy, validate_auc, summary_op]) end_time = datetime.datetime.now() logging.info( "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}".format( end_time - start_time, step, loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value)) writer.add_summary(summary_value, step) saver.save(sess, CHECKPOINT_FILE, global_step=step) start_time = end_time except tf.errors.OutOfRangeError: # Export the model after training export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) finally: coord.request_stop() coord.join(threads) elif MODE == "export": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) # Export the model export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) elif MODE == "savedmodel": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) logging.info("Export the saved model to {}".format( FLAGS.saved_model_path)) export_path_base = FLAGS.saved_model_path export_path = os.path.join( compat.as_bytes(export_path_base), compat.as_bytes(str(FLAGS.model_version))) model_signature = signature_def_utils.build_signature_def( inputs={ "keys": utils.build_tensor_info(keys_placeholder), "features": utils.build_tensor_info(inference_features) }, outputs={ "keys": utils.build_tensor_info(keys), "softmax": utils.build_tensor_info(inference_softmax), "prediction": utils.build_tensor_info(inference_op) }, method_name=signature_constants.PREDICT_METHOD_NAME) try: builder = saved_model_builder.SavedModelBuilder(export_path) builder.add_meta_graph_and_variables( sess, [tag_constants.SERVING], clear_devices=True, signature_def_map={ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: model_signature, }, #legacy_init_op=legacy_init_op) legacy_init_op=tf.group(tf.initialize_all_tables(), name="legacy_init_op")) builder.save() except Exception as e: logging.error("Fail to export saved model, exception: {}".format(e)) elif MODE == "inference": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) # Load inference test data inference_result_file_name = FLAGS.inference_result_file inference_test_file_name = FLAGS.inference_test_file inference_data = np.genfromtxt(inference_test_file_name, delimiter=",") inference_data_features = inference_data[:, 0:9] inference_data_labels = inference_data[:, 9] # Run inference start_time = datetime.datetime.now() prediction, prediction_softmax = sess.run( [inference_op, inference_softmax], feed_dict={inference_features: inference_data_features}) end_time = datetime.datetime.now() # Compute accuracy label_number = len(inference_data_labels) correct_label_number = 0 for i in range(label_number): if inference_data_labels[i] == prediction[i]: correct_label_number += 1 accuracy = float(correct_label_number) / label_number # Compute auc y_true = np.array(inference_data_labels) y_score = prediction_softmax[:, 1] fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score, pos_label=1) auc = metrics.auc(fpr, tpr) logging.info("[{}] Inference accuracy: {}, auc: {}".format( end_time - start_time, accuracy, auc)) # Save result into the file np.savetxt(inference_result_file_name, prediction_softmax, delimiter=",") logging.info("Save result to file: {}".format( inference_result_file_name))
def build(self): conf = self.conf dtype = self.dtype # All possible inputs graphlg.info("Creating inputs and tables...") batch_size = None self.enc_querys = tf.placeholder( tf.string, shape=[batch_size, conf.input_max_len], name="enc_querys") self.query_lens = tf.placeholder(tf.int32, shape=[batch_size], name="query_lens") self.enc_posts = tf.placeholder(tf.string, shape=[batch_size, conf.input_max_len], name="enc_posts") self.post_lens = tf.placeholder(tf.int32, shape=[batch_size], name="post_lens") self.enc_resps = tf.placeholder(tf.string, shape=[batch_size, conf.input_max_len], name="enc_resps") self.resp_lens = tf.placeholder(tf.int32, shape=[batch_size], name="resp_lens") self.enc_neg_resps = tf.placeholder( tf.string, shape=[batch_size, conf.input_max_len], name="enc_neg_resp") self.neg_resp_lens = tf.placeholder(tf.int32, shape=[batch_size], name="neg_resp_lens") #TODO table obj, lookup ops and embedding and its lookup op should be placed on the same device with tf.device("/cpu:0"): self.embedding = variable_scope.get_variable( "embedding", [conf.input_vocab_size, conf.embedding_size], initializer=tf.random_uniform_initializer(-0.08, 0.08)) self.in_table = lookup.MutableHashTable(key_dtype=tf.string, value_dtype=tf.int64, default_value=UNK_ID, shared_name="in_table", name="in_table", checkpoint=True) self.query_embs = embedding_lookup_unique( self.embedding, self.in_table.lookup(self.enc_querys)) self.post_embs = embedding_lookup_unique( self.embedding, self.in_table.lookup(self.enc_posts)) self.resp_embs = embedding_lookup_unique( self.embedding, self.in_table.lookup(self.enc_resps)) self.neg_resp_embs = embedding_lookup_unique( self.embedding, self.in_table.lookup(self.enc_neg_resps)) # MultiRNNCell graphlg.info("Creating multi-layer cells...") # Bi-RNN encoder graphlg.info("Creating bi-rnn...") with variable_scope.variable_scope("q_rnn", dtype=dtype, reuse=None) as scope: cell1 = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob) cell2 = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob) q_out, q_out_state = bidirectional_dynamic_rnn( cell_fw=cell1, cell_bw=cell2, inputs=self.query_embs, sequence_length=self.query_lens, initial_state_fw=None, initial_state_bw=None, dtype=dtype, parallel_iterations=16, swap_memory=False, time_major=False) with variable_scope.variable_scope("p_rnn", dtype=dtype, reuse=None) as scope: cell1 = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob) cell2 = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob) p_out, p_out_state = bidirectional_dynamic_rnn( cell_fw=cell1, cell_bw=cell2, inputs=self.post_embs, sequence_length=self.post_lens, initial_state_fw=None, initial_state_bw=None, dtype=dtype, parallel_iterations=16, swap_memory=False, time_major=False) with variable_scope.variable_scope("r_rnn", dtype=dtype, reuse=None) as scope: cell1 = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob) cell2 = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob) r_out, r_out_state = bidirectional_dynamic_rnn( cell_fw=cell1, cell_bw=cell2, inputs=self.resp_embs, sequence_length=self.resp_lens, initial_state_fw=None, initial_state_bw=None, dtype=dtype, parallel_iterations=16, swap_memory=False, time_major=False) with variable_scope.variable_scope("r_rnn", dtype=dtype, reuse=True) as scope: cell1 = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob, reuse=True) cell2 = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob, reuse=True) neg_r_out, neg_r_out_state = bidirectional_dynamic_rnn( cell_fw=cell1, cell_bw=cell2, inputs=self.neg_resp_embs, sequence_length=self.neg_resp_lens, initial_state_fw=None, initial_state_bw=None, dtype=dtype, parallel_iterations=16, swap_memory=False, time_major=False) fw, bw = q_out_state q_out_state = tf.concat([fw[-1].h, bw[-1].h], axis=1) fw, bw = p_out_state p_out_state = tf.concat([fw[-1].h, bw[-1].h], axis=1) fw, bw = r_out_state r_out_state = tf.concat([fw[-1].h, bw[-1].h], axis=1) fw, bw = neg_r_out_state neg_r_out_state = tf.concat([fw[-1].h, bw[-1].h], axis=1) q_out = tf.concat(q_out, axis=2) p_out = tf.concat(p_out, axis=2) r_out = tf.concat(r_out, axis=2) neg_r_out = tf.concat(neg_r_out, axis=2) # Out state Cosine dist norm_q = tf.sqrt( tf.reduce_sum(tf.square(q_out_state), 1, keep_dims=True)) norm_p = tf.sqrt( tf.reduce_sum(tf.square(p_out_state), 1, keep_dims=True)) norm_r = tf.sqrt( tf.reduce_sum(tf.square(r_out_state), 1, keep_dims=True)) norm_neg_r = tf.sqrt( tf.reduce_sum(tf.square(neg_r_out_state), 1, keep_dims=True)) cos_qp = tf.reduce_sum(q_out_state * p_out_state, 1, keep_dims=True) / (norm_q * norm_p) cos_qr = tf.reduce_sum(q_out_state * r_out_state, 1, keep_dims=True) / (norm_q * norm_r) cos_qnegr = tf.reduce_sum(q_out_state * neg_r_out_state, 1, keep_dims=True) / (norm_q * norm_neg_r) # Outputs 2-dim intersection graphlg.info("Creating cos dist...") qp_sim = tf.expand_dims(tf.matmul(q_out, p_out, transpose_b=True), -1) qr_sim = tf.expand_dims(tf.matmul(q_out, r_out, transpose_b=True), -1) qnegr_sim = tf.expand_dims( tf.matmul(q_out, neg_r_out, transpose_b=True), -1) # n-CNN max-poolling graphlg.info("Creating interactions...") with variable_scope.variable_scope("qp_cnn", dtype=dtype, reuse=None) as scope: qp_map = FeatureMatrix(conf.conv_conf, qp_sim, scope=scope, dtype=dtype) with variable_scope.variable_scope("qr_cnn", dtype=dtype, reuse=None) as scope: qr_map = FeatureMatrix(conf.conv_conf, qr_sim, scope=scope, dtype=dtype) with variable_scope.variable_scope("qr_cnn", dtype=dtype, reuse=True) as scope: qnegr_map = FeatureMatrix(conf.conv_conf, qnegr_sim, scope=scope, dtype=dtype) # h becomes 1 after max poolling qp_vec = tf.concat([tf.contrib.layers.flatten(qp_map), cos_qp], 1) qr_vec = tf.concat([tf.contrib.layers.flatten(qr_map), cos_qr], 1) qnegr_vec = tf.concat( [tf.contrib.layers.flatten(qnegr_map), cos_qnegr], 1) graphlg.info("Creating fully connected...") with variable_scope.variable_scope("qp_fc", dtype=dtype, reuse=None) as scope: qp_fc = FC(inputs=qp_vec, h_size=conf.fc_h_size, o_size=1, act=tf.nn.sigmoid) with variable_scope.variable_scope("qr_fc", dtype=dtype, reuse=None) as scope: qr_fc = FC(inputs=qr_vec, h_size=conf.fc_h_size, o_size=1, act=relu) with variable_scope.variable_scope("qr_fc", dtype=dtype, reuse=True) as scope: qnegr_fc = FC(inputs=qnegr_vec, h_size=conf.fc_h_size, o_size=1, act=relu) self.scores = tf.squeeze(qp_fc * qr_fc) self.neg_scores = tf.squeeze(qp_fc * qnegr_fc) graphlg.info("Creating optimizer and backpropagation...") self.global_params = [] self.trainable_params = tf.trainable_variables() self.optimizer_params = [] if not self.for_deploy: with variable_scope.variable_scope(self.model_kind, dtype=dtype) as scope: #self.loss = tf.losses.hinge_loss(self.neg_scores, self.scores) self.loss = tf.reduce_mean( tf.nn.relu(1 + self.neg_scores - self.scores)) self.summary = tf.summary.scalar("%s/loss" % name, self.loss) graphlg.info("Creating backpropagation graph and optimizers...") self.learning_rate = tf.Variable(float(conf.learning_rate), trainable=False, name="learning_rate") self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * conf.learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False, name="global_step") self.data_idx = tf.Variable(0, trainable=False, name="data_idx") self.data_idx_inc_op = self.data_idx.assign(self.data_idx + conf.batch_size) self.optimizers = { "SGD": tf.train.GradientDescentOptimizer(self.learning_rate), "Adadelta": tf.train.AdadeltaOptimizer(self.learning_rate), "Adagrad": tf.train.AdagradOptimizer(self.learning_rate), "AdagradDA": tf.train.AdagradDAOptimizer(self.learning_rate, self.global_step), "Moment": tf.train.MomentumOptimizer(self.learning_rate, 0.9), "Ftrl": tf.train.FtrlOptimizer(self.learning_rate), "RMSProp": tf.train.RMSPropOptimizer(self.learning_rate) } self.opt = self.optimizers[conf.opt_name] tmp = set(tf.global_variables()) if job_type == "worker": self.opt = SyncReplicasOptimizer(self.opt, conf.replicas_to_aggregate, conf.total_num_replicas) grads_and_vars = self.opt.compute_gradients(loss=self.loss) gradients, variables = zip(*grads_and_vars) else: gradients = tf.gradients(self.loss, tf.trainable_variables()) variables = tf.trainable_variables() clipped_gradients, self.grad_norm = tf.clip_by_global_norm( gradients, conf.max_gradient_norm) self.update = self.opt.apply_gradients( zip(clipped_gradients, variables), self.global_step) self.optimizer_params.append(self.learning_rate) self.optimizer_params.extend( list(set(tf.global_variables()) - tmp)) self.global_params.extend([self.global_step, self.data_idx]) self.saver = tf.train.Saver(max_to_keep=conf.max_to_keep) self.model_exporter = exporter.Exporter(self.saver) inputs = { "enc_querys:0": self.enc_querys, "query_lens:0": self.query_lens, "enc_posts:0": self.enc_posts, "post_lens:0": self.post_lens, "enc_resps:0": self.enc_resps, "resp_lens:0": self.resp_lens } outputs = {"out": self.scores} self.model_exporter.init(tf.get_default_graph().as_graph_def(), named_graph_signatures={ "inputs": exporter.generic_signature(inputs), "outputs": exporter.generic_signature(outputs) })
def compute(): # define placeholder for inputs to network xs = tf.placeholder(tf.float32, shape=(None, 784)) # 28x28 ys = tf.placeholder(tf.float32, shape=(None, 10)) keep_prob = tf.placeholder(tf.float32) x_image = tf.reshape(xs, [-1, 28, 28, 1]) # print(x_image.shape) # [n_samples, 28,28,1] ## conv1 layer ## W_conv1 = weight_variable([5, 5, 1, 32]) # patch 5x5, in size 1, out size 32 b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # output size 28x28x32 h_pool1 = max_pool_2x2(h_conv1) # output size 14x14x32 ## conv2 layer ## W_conv2 = weight_variable([5, 5, 32, 64]) # patch 5x5, in size 32, out size 64 b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) # output size 14x14x64 h_pool2 = max_pool_2x2(h_conv2) # output size 7x7x64 ## func1 layer ## W_fc1 = weight_variable([7 * 7 * 64, 1024]) b_fc1 = bias_variable([1024]) # [n_samples, 7, 7, 64] ->> [n_samples, 7*7*64] h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) ## func2 layer ## W_fc2 = weight_variable([1024, 10]) b_fc2 = bias_variable([10]) prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) # the error between prediction and real data cross_entropy = tf.reduce_mean( -tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1])) # loss train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) sess = tf.Session() # important step sess.run(tf.initialize_all_variables()) for i in range(10): batch_xs, batch_ys = mnist.train.next_batch(100) sess.run(train_step, feed_dict={ xs: batch_xs, ys: batch_ys, keep_prob: 0.5 }) print( compute_accuracy(prediction, xs, ys, keep_prob, mnist.test.images, mnist.test.labels, sess)) print("Done training!") FLAGS = tf.app.flags.FLAGS FLAGS.export_version = 9 export_path = './export' print('Exporting trained model to %s' % export_path) init_op = tf.group(tf.initialize_all_tables(), name='init_op') saver = tf.train.Saver(sharded=True) model_exporter = exporter.Exporter(saver) model_exporter.init(sess.graph.as_graph_def(), init_op=init_op, named_graph_signatures={ 'inputs': exporter.generic_signature({ 'x': xs, 'keep_prob': keep_prob }), 'outputs': exporter.generic_signature( {'y_predict': prediction}) }) model_exporter.export(export_path, tf.constant(FLAGS.export_version), sess) print('Done exporting!')
def main(): # Create train data train_X = np.linspace(-1, 1, 100) train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.33 + 10 learning_rate = FLAGS.learning_rate start_training_time = datetime.datetime.now() print("Use the optimizer: {}".format(FLAGS.optimizer)) if FLAGS.optimizer == "sgd": optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif FLAGS.optimizer == "adadelta": optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif FLAGS.optimizer == "adagrad": optimizer = tf.train.AdagradOptimizer(learning_rate) elif FLAGS.optimizer == "adam": optimizer = tf.train.AdamOptimizer(learning_rate) elif FLAGS.optimizer == "ftrl": optimizer = tf.train.FtrlOptimizer(learning_rate) elif FLAGS.optimizer == "rmsprop": optimizer = tf.train.RMSPropOptimizer(learning_rate) else: print("Unknow optimizer: {}, exit now".format(FLAGS.optimizer)) exit(1) # Run standalone training if os.environ.get("TF_CONFIG", "") == "": # Define the model keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) X = tf.placeholder("float", shape=[None, 1]) Y = tf.placeholder("float", shape=[None, 1]) w = tf.Variable(0.0, name="weight") b = tf.Variable(0.0, name="bias") global_step = tf.Variable(0, name="global_step", trainable=False) loss = tf.reduce_sum(tf.square(Y - tf.multiply(X, w) - b)) train_op = optimizer.minimize(loss, global_step=global_step) predict_op = tf.multiply(X, w) + b tf.summary.scalar("loss", loss) tf.summary.scalar("training/hptuning/metric", loss) summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) print("Save tensorboard files into: {}".format(FLAGS.output_path)) writer = tf.summary.FileWriter(FLAGS.output_path, sess.graph) print("Run training with epoch number: {}".format( FLAGS.max_epochs)) for i in range(FLAGS.max_epochs): for (x, y) in zip(train_X, train_Y): x = np.array([[x]]) y = np.array([[y]]) sess.run(train_op, feed_dict={X: x, Y: y}) if i % FLAGS.checkpoint_period == 0: x = np.array([[train_X[0]]]) y = np.array([[train_Y[0]]]) summary_value, loss_value, step = sess.run( [summary_op, loss, global_step], feed_dict={ X: x, Y: y }) writer.add_summary(summary_value, step) print("Epoch: {}, loss: {}".format(i, loss_value)) writer.close() end_training_time = datetime.datetime.now() print( "[{}] End of standalone training.".format(end_training_time - start_training_time)) print("Get the model, w: {}, b: {}".format(sess.run(w), sess.run(b))) export_inputs_signature = {"keys": keys_placeholder, "X": X} export_outputs_signature = {"keys": keys, "predict": predict_op} export_model(sess, export_inputs_signature, export_outputs_signature) # Run distributed training else: # Exampmle: {"cluster": {"ps": ["127.0.0.1:3001"], "worker": ["127.0.0.1:3002", "127.0.0.1:3003"], "master": ["127.0.0.1:3004"]}, "task": {"index": 0, "type": "master"}} env = json.loads(os.environ.get("TF_CONFIG", "{}")) task_data = env.get("task", None) cluster_spec = env["cluster"] task_type = task_data["type"] task_index = task_data["index"] cluster = tf.train.ClusterSpec(cluster_spec) server = tf.train.Server(cluster, job_name=task_type, task_index=task_index) if task_type == "ps": server.join() elif task_type == "worker" or task_type == "master": with tf.device( tf.train.replica_device_setter( worker_device="/job:{}/task:{}".format( task_type, task_index), cluster=cluster)): # Define the model keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) X = tf.placeholder("float", shape=[None, 1]) Y = tf.placeholder("float", shape=[None, 1]) w = tf.Variable(0.0, name="weight") b = tf.Variable(0.0, name="bias") global_step = tf.Variable(0, name="global_step", trainable=False) loss = tf.reduce_sum(tf.square(Y - tf.multiply(X, w) - b)) train_op = optimizer.minimize(loss, global_step=global_step) predict_op = tf.multiply(X, w) + b tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() saver = tf.train.Saver() #saver = tf.train.Saver(sharded=True) constant_model_version = tf.constant(FLAGS.model_version) model_exporter = exporter.Exporter(saver) model_exporter.init(tf.get_default_graph().as_graph_def(), named_graph_signatures={ "inputs": exporter.generic_signature({ "keys": keys_placeholder, "X": X }), "outputs": exporter.generic_signature({ "keys": keys, "predict": predict_op }) }) sv = tf.train.Supervisor( is_chief=(task_type == "master"), logdir=FLAGS.checkpoint_path, init_op=init_op, #summary_op=summary_op, summary_op=None, saver=saver, global_step=global_step, save_model_secs=60) try: with sv.managed_session(server.target) as sess: print("Save tensorboard files into: {}".format( FLAGS.output_path)) writer = tf.summary.FileWriter(FLAGS.output_path, sess.graph) print("Run training with epoch number: {}".format( FLAGS.max_epochs)) for i in range(FLAGS.max_epochs): for (x, y) in zip(train_X, train_Y): x = np.array([[x]]) y = np.array([[y]]) sess.run(train_op, feed_dict={X: x, Y: y}) if i % FLAGS.checkpoint_period == 0: x = np.array([[train_X[0]]]) y = np.array([[train_Y[0]]]) summary_value, loss_value, step = sess.run( [summary_op, loss, global_step], feed_dict={ X: x, Y: y }) print("Epoch: {}, loss: {}".format( i, loss_value)) if task_type == "master": writer.add_summary(summary_value, step) writer.close() end_training_time = datetime.datetime.now() print("[{}] End of distributed training.".format( end_training_time - start_training_time)) if task_type == "master": print("Exporting trained model to {}".format( FLAGS.model_path)) model_exporter.export(FLAGS.model_path, constant_model_version, sess) except Exception as e: print(e)
def main(): print("Start Pokemon classifier") # Initialize train and test data TRAIN_IMAGE_NUMBER = 646 TEST_IMAGE_NUMBER = 68 IMAGE_SIZE = 32 RGB_CHANNEL_SIZE = 3 LABEL_SIZE = 17 train_dataset = np.ndarray( shape=(TRAIN_IMAGE_NUMBER, IMAGE_SIZE, IMAGE_SIZE, RGB_CHANNEL_SIZE), dtype=np.float32) test_dataset = np.ndarray( shape=(TEST_IMAGE_NUMBER, IMAGE_SIZE, IMAGE_SIZE, RGB_CHANNEL_SIZE), dtype=np.float32) train_labels = np.ndarray(shape=(TRAIN_IMAGE_NUMBER, ), dtype=np.int32) test_labels = np.ndarray(shape=(TEST_IMAGE_NUMBER, ), dtype=np.int32) TRAIN_DATA_DIR = "./data/train/" TEST_DATA_DIR = "./data/test/" VALIDATE_DATA_DIR = "./data/validate/" IMAGE_FORMAT = ".png" index = 0 pokemon_type_id_map = {"Bug": 0, "Dark": 1, "Dragon": 2, "Electric": 3, "Fairy": 4, "Fighting": 5, "Fire": 6, "Ghost": 7, "Grass": 8, "Ground": 9, "Ice": 10, "Normal": 11, "Poison": 12, "Psychic": 13, "Rock": 14, "Steel": 15, "Water": 16} pokemon_types = ["Bug", "Dark", "Dragon", "Electric", "Fairy", "Fighting", "Fire", "Ghost", "Grass", "Ground", "Ice", "Normal", "Poison", "Psychic", "Rock", "Steel", "Water"] # Load train images for pokemon_type in os.listdir(TRAIN_DATA_DIR): for image_filename in os.listdir(os.path.join(TRAIN_DATA_DIR, pokemon_type)): if image_filename.endswith(IMAGE_FORMAT): image_filepath = os.path.join(TRAIN_DATA_DIR, pokemon_type, image_filename) image_ndarray = ndimage.imread(image_filepath, mode="RGB") train_dataset[index] = image_ndarray train_labels[index] = pokemon_type_id_map.get(pokemon_type) index += 1 index = 0 # Load test image for pokemon_type in os.listdir(TEST_DATA_DIR): for image_filename in os.listdir(os.path.join(TEST_DATA_DIR, pokemon_type)): if image_filename.endswith(IMAGE_FORMAT): image_filepath = os.path.join(TEST_DATA_DIR, pokemon_type, image_filename) image_ndarray = ndimage.imread(image_filepath, mode="RGB") test_dataset[index] = image_ndarray test_labels[index] = pokemon_type_id_map.get(pokemon_type) index += 1 # Define the model keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) x = tf.placeholder(tf.float32, shape=(None, IMAGE_SIZE, IMAGE_SIZE, RGB_CHANNEL_SIZE)) y = tf.placeholder(tf.int32, shape=(None, )) batch_size = FLAGS.batch_size epoch_number = FLAGS.epoch_number checkpoint_dir = FLAGS.checkpoint_dir if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) tensorboard_dir = FLAGS.tensorboard_dir mode = FLAGS.mode checkpoint_file = checkpoint_dir + "/checkpoint.ckpt" steps_to_validate = FLAGS.steps_to_validate def cnn_inference(x): # Convolution layer result: [BATCH_SIZE, 16, 16, 64] with tf.variable_scope("conv1"): weights = tf.get_variable("weights", [3, 3, 3, 32], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [32], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(x, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool(layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") # Convolution layer result: [BATCH_SIZE, 8, 8, 64] with tf.variable_scope("conv2"): weights = tf.get_variable("weights", [3, 3, 32, 64], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [64], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(layer, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool(layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") # Reshape for full-connect network layer = tf.reshape(layer, [-1, 8 * 8 * 64]) #import ipdb;ipdb.set_trace() # Full connected layer result: [BATCH_SIZE, 17] with tf.variable_scope("fc1"): # weights.get_shape().as_list()[0]] = 8 * 8 * 64 weights = tf.get_variable("weights", [8 * 8 * 64, LABEL_SIZE], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [LABEL_SIZE], initializer=tf.random_normal_initializer()) layer = tf.add(tf.matmul(layer, weights), bias) return layer def lstm_inference(x): RNN_HIDDEN_UNITS = 128 # x was [BATCH_SIZE, 32, 32, 3] # x changes to [32, BATCH_SIZE, 32, 3] x = tf.transpose(x, [1, 0, 2, 3]) # x changes to [32 * BATCH_SIZE, 32 * 3] x = tf.reshape(x, [-1, IMAGE_SIZE * RGB_CHANNEL_SIZE]) # x changes to array of 32 * [BATCH_SIZE, 32 * 3] x = tf.split(0, IMAGE_SIZE, x) weights = tf.Variable(tf.random_normal([RNN_HIDDEN_UNITS, LABEL_SIZE])) biases = tf.Variable(tf.random_normal([LABEL_SIZE])) # output size is 128, state size is (c=128, h=128) lstm_cell = rnn_cell.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0) # outputs is array of 32 * [BATCH_SIZE, 128] outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) # outputs[-1] is [BATCH_SIZE, 128] return tf.matmul(outputs[-1], weights) + biases def bidirectional_lstm_inference(x): RNN_HIDDEN_UNITS = 128 # x was [BATCH_SIZE, 32, 32, 3] # x changes to [32, BATCH_SIZE, 32, 3] x = tf.transpose(x, [1, 0, 2, 3]) # x changes to [32 * BATCH_SIZE, 32 * 3] x = tf.reshape(x, [-1, IMAGE_SIZE * RGB_CHANNEL_SIZE]) # x changes to array of 32 * [BATCH_SIZE, 32 * 3] x = tf.split(0, IMAGE_SIZE, x) weights = tf.Variable(tf.random_normal([2 * RNN_HIDDEN_UNITS, LABEL_SIZE])) biases = tf.Variable(tf.random_normal([LABEL_SIZE])) # output size is 128, state size is (c=128, h=128) fw_lstm_cell = rnn_cell.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0) bw_lstm_cell = rnn_cell.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0) # outputs is array of 32 * [BATCH_SIZE, 128] outputs, _, _ = rnn.bidirectional_rnn(fw_lstm_cell, bw_lstm_cell, x, dtype=tf.float32) # outputs[-1] is [BATCH_SIZE, 128] return tf.matmul(outputs[-1], weights) + biases def stacked_lstm_inference(x): RNN_HIDDEN_UNITS = 128 # x was [BATCH_SIZE, 32, 32, 3] # x changes to [32, BATCH_SIZE, 32, 3] x = tf.transpose(x, [1, 0, 2, 3]) # x changes to [32 * BATCH_SIZE, 32 * 3] x = tf.reshape(x, [-1, IMAGE_SIZE * RGB_CHANNEL_SIZE]) # x changes to array of 32 * [BATCH_SIZE, 32 * 3] x = tf.split(0, IMAGE_SIZE, x) weights = tf.Variable(tf.random_normal([RNN_HIDDEN_UNITS, LABEL_SIZE])) biases = tf.Variable(tf.random_normal([LABEL_SIZE])) # output size is 128, state size is (c=128, h=128) lstm_cell = rnn_cell.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0) lstm_cells = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * 2) # outputs is array of 32 * [BATCH_SIZE, 128] outputs, states = rnn.rnn(lstm_cells, x, dtype=tf.float32) # outputs[-1] is [BATCH_SIZE, 128] return tf.matmul(outputs[-1], weights) + biases def inference(inputs): print("Use the model: {}".format(FLAGS.model)) if FLAGS.model == "cnn": return cnn_inference(inputs) elif FLAGS.model == "lstm": return lstm_inference(inputs) elif FLAGS.model == "bidirectional_lstm": return bidirectional_lstm_inference(inputs) elif FLAGS.model == "stacked_lstm": return stacked_lstm_inference(inputs) else: print("Unknow model, exit now") exit(1) # Define train op logit = inference(x) loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logit, y)) learning_rate = FLAGS.learning_rate print("Use the optimizer: {}".format(FLAGS.optimizer)) if FLAGS.optimizer == "sgd": optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif FLAGS.optimizer == "adadelta": optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif FLAGS.optimizer == "adagrad": optimizer = tf.train.AdagradOptimizer(learning_rate) elif FLAGS.optimizer == "adam": optimizer = tf.train.AdamOptimizer(learning_rate) elif FLAGS.optimizer == "ftrl": optimizer = tf.train.FtrlOptimizer(learning_rate) elif FLAGS.optimizer == "rmsprop": optimizer = tf.train.RMSPropOptimizer(learning_rate) else: print("Unknow optimizer: {}, exit now".format(FLAGS.optimizer)) exit(1) global_step = tf.Variable(0, name='global_step', trainable=False) train_op = optimizer.minimize(loss, global_step=global_step) # Define accuracy and inference op tf.get_variable_scope().reuse_variables() logits = inference(x) validate_softmax = tf.nn.softmax(logits) predict_op = tf.argmax(validate_softmax, 1) correct_prediction = tf.equal(predict_op, tf.to_int64(y)) accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) saver = tf.train.Saver() tf.scalar_summary('loss', loss) init_op = tf.initialize_all_variables() # Create session to run graph with tf.Session() as sess: summary_op = tf.merge_all_summaries() writer = tf.train.SummaryWriter(tensorboard_dir, sess.graph) sess.run(init_op) if mode == "train": ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Continue training from the model {}".format( ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) start_time = datetime.datetime.now() for epoch in range(epoch_number): _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: train_dataset, y: train_labels}) if epoch % steps_to_validate == 0: end_time = datetime.datetime.now() train_accuracy_value, summary_value = sess.run( [accuracy_op, summary_op], feed_dict={x: train_dataset, y: train_labels}) test_accuracy_value = sess.run(accuracy_op, feed_dict={x: test_dataset, y: test_labels}) print( "[{}] Epoch: {}, loss: {}, train_accuracy: {}, test_accuracy: {}".format( end_time - start_time, epoch, loss_value, train_accuracy_value, test_accuracy_value)) saver.save(sess, checkpoint_file, global_step=step) writer.add_summary(summary_value, step) start_time = end_time # Export the model print("Exporting trained model to {}".format(FLAGS.model_path)) model_exporter = exporter.Exporter(saver) model_exporter.init( sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({"keys": keys_placeholder, "features": x}), 'outputs': exporter.generic_signature({"keys": keys, "prediction": predict_op}) }) model_exporter.export(FLAGS.model_path, tf.constant(FLAGS.export_version), sess) print 'Done exporting!' elif mode == "inference": ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Load the model {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) start_time = datetime.datetime.now() image_ndarray = ndimage.imread(FLAGS.image, mode="RGB") # TODO: Update for server without gui #print_image(image_ndarray) image_ndarray = image_ndarray.reshape(1, IMAGE_SIZE, IMAGE_SIZE, RGB_CHANNEL_SIZE) prediction = sess.run(predict_op, feed_dict={x: image_ndarray}) end_time = datetime.datetime.now() pokemon_type = pokemon_types[prediction[0]] print("[{}] Predict type: {}".format(end_time - start_time, pokemon_type)) else: print("Unknow mode, please choose 'train' or 'inference'") print("End of Pokemon classifier")
def main(): # Define training data x = np.ones(FLAGS.batch_size) y = np.ones(FLAGS.batch_size) # Define the model X = tf.placeholder(tf.float32, shape=[None], name="X") Y = tf.placeholder(tf.float32, shape=[None], name="yhat") w = tf.Variable(1.0, name="weight") b = tf.Variable(1.0, name="bias") loss = tf.square(Y - tf.mul(X, w) - b) train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss) predict_op = tf.mul(X, w) + b saver = tf.train.Saver() checkpoint_dir = FLAGS.checkpoint_dir checkpoint_file = checkpoint_dir + "/checkpoint.ckpt" if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) # Start the session with tf.Session() as sess: sess.run(tf.initialize_all_variables()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Continue training from the model {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) saver_def = saver.as_saver_def() print(saver_def.filename_tensor_name) print(saver_def.restore_op_name) # Start training start_time = time.time() for epoch in range(FLAGS.epoch_number): sess.run(train_op, feed_dict={X: x, Y: y}) # Start validating if epoch % FLAGS.steps_to_validate == 0: end_time = time.time() print("[{}] Epoch: {}".format(end_time - start_time, epoch)) saver.save(sess, checkpoint_file) tf.train.write_graph(sess.graph_def, checkpoint_dir, 'trained_model.pb', as_text=False) tf.train.write_graph(sess.graph_def, checkpoint_dir, 'trained_model.txt', as_text=True) start_time = end_time # Print model variables w_value, b_value = sess.run([w, b]) print("The model of w: {}, b: {}".format(w_value, b_value)) # Export the model print("Exporting trained model to {}".format(FLAGS.model_path)) model_exporter = exporter.Exporter(saver) model_exporter.init( sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({"features": X}), 'outputs': exporter.generic_signature({"prediction": predict_op}) }) model_exporter.export(FLAGS.model_path, tf.constant(FLAGS.export_version), sess) print('Done exporting!')
def main(): # Change these for different models FEATURE_SIZE = 124 LABEL_SIZE = 2 TRAIN_TFRECORDS_FILE = "data/a8a_train.libsvm.tfrecords" VALIDATE_TFRECORDS_FILE = "data/a8a_test.libsvm.tfrecords" learning_rate = FLAGS.learning_rate epoch_number = FLAGS.epoch_number thread_number = FLAGS.thread_number batch_size = FLAGS.batch_size validate_batch_size = FLAGS.validate_batch_size min_after_dequeue = FLAGS.min_after_dequeue capacity = thread_number * batch_size + min_after_dequeue mode = FLAGS.mode checkpoint_dir = FLAGS.checkpoint_dir if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) tensorboard_dir = FLAGS.tensorboard_dir if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) return serialized_example # Read TFRecords files for training filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(TRAIN_TFRECORDS_FILE), num_epochs=epoch_number) serialized_example = read_and_decode(filename_queue) batch_serialized_example = tf.train.shuffle_batch( [serialized_example], batch_size=batch_size, num_threads=thread_number, capacity=capacity, min_after_dequeue=min_after_dequeue) features = tf.parse_example(batch_serialized_example, features={ "label": tf.FixedLenFeature( [], tf.float32), "ids": tf.VarLenFeature(tf.int64), "values": tf.VarLenFeature(tf.float32), }) batch_labels = features["label"] batch_ids = features["ids"] batch_values = features["values"] # Read TFRecords file for validation validate_filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(VALIDATE_TFRECORDS_FILE), num_epochs=epoch_number) validate_serialized_example = read_and_decode(validate_filename_queue) validate_batch_serialized_example = tf.train.shuffle_batch( [validate_serialized_example], batch_size=validate_batch_size, num_threads=thread_number, capacity=capacity, min_after_dequeue=min_after_dequeue) validate_features = tf.parse_example( validate_batch_serialized_example, features={ "label": tf.FixedLenFeature( [], tf.float32), "ids": tf.VarLenFeature(tf.int64), "values": tf.VarLenFeature(tf.float32), }) validate_batch_labels = validate_features["label"] validate_batch_ids = validate_features["ids"] validate_batch_values = validate_features["values"] # Define the model input_units = FEATURE_SIZE hidden1_units = 128 hidden2_units = 32 hidden3_units = 8 output_units = LABEL_SIZE def full_connect(inputs, weights_shape, biases_shape, is_train=True): with tf.device('/cpu:0'): weights = tf.get_variable("weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable("biases", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.matmul(inputs, weights) + biases if FLAGS.enable_bn and is_train: mean, var = tf.nn.moments(layer, axes=[0]) scale = tf.get_variable("scale", biases_shape, initializer=tf.random_normal_initializer()) shift = tf.get_variable("shift", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.nn.batch_normalization(layer, mean, var, shift, scale, FLAGS.bn_epsilon) return layer def sparse_full_connect(sparse_ids, sparse_values, weights_shape, biases_shape, is_train=True): with tf.device('/cpu:0'): weights = tf.get_variable("weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable("biases", biases_shape, initializer=tf.random_normal_initializer()) return tf.nn.embedding_lookup_sparse(weights, sparse_ids, sparse_values, combiner="sum") + biases def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True): return tf.nn.relu(full_connect(inputs, weights_shape, biases_shape, is_train)) def dnn_inference(sparse_ids, sparse_values, is_train=True): with tf.variable_scope("layer1"): sparse_layer = sparse_full_connect(sparse_ids, sparse_values, [input_units, hidden1_units], [hidden1_units], is_train) layer = tf.nn.relu(sparse_layer) with tf.variable_scope("layer2"): layer = full_connect_relu(layer, [hidden1_units, hidden2_units], [hidden2_units], is_train) with tf.variable_scope("layer3"): layer = full_connect_relu(layer, [hidden2_units, hidden3_units], [hidden3_units], is_train) if FLAGS.enable_dropout and is_train: layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob) with tf.variable_scope("output"): layer = full_connect(layer, [hidden3_units, output_units], [output_units], is_train) return layer def lr_inference(sparse_ids, sparse_values, is_train=True): with tf.variable_scope("logistic_regression"): layer = sparse_full_connect(sparse_ids, sparse_values, [input_units, output_units], [output_units]) return layer def wide_and_deep_inference(sparse_ids, sparse_values, is_train=True): return lr_inference(sparse_ids, sparse_values, is_train) + dnn_inference( sparse_ids, sparse_values, is_train) def inference(sparse_ids, sparse_values, is_train=True): print("Use the model: {}".format(FLAGS.model)) if FLAGS.model == "lr": return lr_inference(sparse_ids, sparse_values, is_train) elif FLAGS.model == "dnn": return dnn_inference(sparse_ids, sparse_values, is_train) elif FLAGS.model == "wide_and_deep": return wide_and_deep_inference(sparse_ids, sparse_values, is_train) else: print("Unknown model, exit now") exit(1) logits = inference(batch_ids, batch_values, True) batch_labels = tf.to_int64(batch_labels) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, batch_labels) loss = tf.reduce_mean(cross_entropy, name='loss') print("Use the optimizer: {}".format(FLAGS.optimizer)) if FLAGS.optimizer == "sgd": optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif FLAGS.optimizer == "momentum": # optimizer = tf.train.MomentumOptimizer(learning_rate) print("Not support optimizer: {} yet, exit now".format(FLAGS.optimizer)) exit(1) elif FLAGS.optimizer == "adadelta": optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif FLAGS.optimizer == "adagrad": optimizer = tf.train.AdagradOptimizer(learning_rate) elif FLAGS.optimizer == "adam": optimizer = tf.train.AdamOptimizer(learning_rate) elif FLAGS.optimizer == "ftrl": optimizer = tf.train.FtrlOptimizer(learning_rate) elif FLAGS.optimizer == "rmsprop": optimizer = tf.train.RMSPropOptimizer(learning_rate) else: print("Unknow optimizer: {}, exit now".format(FLAGS.optimizer)) exit(1) with tf.device('/cpu:0'): global_step = tf.Variable(0, name='global_step', trainable=False) train_op = optimizer.minimize(loss, global_step=global_step) tf.get_variable_scope().reuse_variables() # Define accuracy op for train data train_accuracy_logits = inference(batch_ids, batch_values, False) train_softmax = tf.nn.softmax(train_accuracy_logits) train_correct_prediction = tf.equal( tf.argmax(train_softmax, 1), batch_labels) train_accuracy = tf.reduce_mean(tf.cast(train_correct_prediction, tf.float32)) # Define auc op for train data batch_labels = tf.cast(batch_labels, tf.int32) sparse_labels = tf.reshape(batch_labels, [-1, 1]) derived_size = tf.shape(batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(1, [indices, sparse_labels]) outshape = tf.pack([derived_size, LABEL_SIZE]) new_train_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax, new_train_batch_labels) # Define accuracy op for validate data validate_accuracy_logits = inference(validate_batch_ids, validate_batch_values, False) validate_softmax = tf.nn.softmax(validate_accuracy_logits) validate_batch_labels = tf.to_int64(validate_batch_labels) validate_correct_prediction = tf.equal( tf.argmax(validate_softmax, 1), validate_batch_labels) validate_accuracy = tf.reduce_mean(tf.cast(validate_correct_prediction, tf.float32)) # Define auc op for validate data validate_batch_labels = tf.cast(validate_batch_labels, tf.int32) sparse_labels = tf.reshape(validate_batch_labels, [-1, 1]) derived_size = tf.shape(validate_batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(1, [indices, sparse_labels]) outshape = tf.pack([derived_size, LABEL_SIZE]) new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, validate_auc = tf.contrib.metrics.streaming_auc(validate_softmax, new_validate_batch_labels) # Define inference op sparse_index = tf.placeholder(tf.int64, [None, 2]) sparse_ids = tf.placeholder(tf.int64, [None]) sparse_values = tf.placeholder(tf.float32, [None]) sparse_shape = tf.placeholder(tf.int64, [2]) inference_ids = tf.SparseTensor(sparse_index, sparse_ids, sparse_shape) inference_values = tf.SparseTensor(sparse_index, sparse_values, sparse_shape) inference_logits = inference(inference_ids, inference_values, False) inference_softmax = tf.nn.softmax(inference_logits) inference_op = tf.argmax(inference_softmax, 1) # Initialize saver and summary checkpoint_file = checkpoint_dir + "/checkpoint.ckpt" steps_to_validate = FLAGS.steps_to_validate tf.scalar_summary("loss", loss) tf.scalar_summary("train_accuracy", train_accuracy) tf.scalar_summary("train_auc", train_auc) tf.scalar_summary("validate_accuracy", validate_accuracy) tf.scalar_summary("validate_auc", validate_auc) saver = tf.train.Saver() keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) # Create session to run with tf.Session() as sess: summary_op = tf.merge_all_summaries() writer = tf.train.SummaryWriter(tensorboard_dir, sess.graph) sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) if mode == "train": ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Continue training from the model {}".format( ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) # Get coordinator and run queues to read data coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = datetime.datetime.now() try: while not coord.should_stop(): _, loss_value, step = sess.run([train_op, loss, global_step]) if step % steps_to_validate == 0: train_accuracy_value, train_auc_value, validate_accuracy_value, auc_value, summary_value = sess.run( [train_accuracy, train_auc, validate_accuracy, validate_auc, summary_op]) end_time = datetime.datetime.now() print( "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}".format( end_time - start_time, step, loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, auc_value)) writer.add_summary(summary_value, step) saver.save(sess, checkpoint_file, global_step=step) start_time = end_time except tf.errors.OutOfRangeError: print("Done training after reading all data") print("Exporting trained model to {}".format(FLAGS.model_path)) model_exporter = exporter.Exporter(saver) model_exporter.init( sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({"keys": keys_placeholder, "indexs": sparse_index, "ids": sparse_ids, "values": sparse_values, "shape": sparse_shape}), 'outputs': exporter.generic_signature( {"keys": keys, "softmax": inference_softmax, "prediction": inference_op}) }) model_exporter.export(FLAGS.model_path, tf.constant(FLAGS.export_version), sess) finally: coord.request_stop() # Wait for threads to exit coord.join(threads) elif mode == "export": print("Start to export model directly") # Load the checkpoint files ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Load the model from {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: print("No checkpoint found, exit now") exit(1) # Export the model files print("Exporting trained model to {}".format(FLAGS.model_path)) model_exporter = exporter.Exporter(saver) model_exporter.init( sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({"keys": keys_placeholder, "indexs": sparse_index, "ids": sparse_ids, "values": sparse_values, "shape": sparse_shape}), 'outputs': exporter.generic_signature( {"keys": keys, "softmax": inference_softmax, "prediction": inference_op}) }) model_exporter.export(FLAGS.model_path, tf.constant(FLAGS.export_version), sess) elif mode == "inference": print("Start to run inference") start_time = datetime.datetime.now() inference_result_file_name = "./inference_result.txt" inference_test_file_name = "./data/a8a_test.libsvm" labels = [] feature_ids = [] feature_values = [] feature_index = [] ins_num = 0 for line in open(inference_test_file_name, "r"): tokens = line.split(" ") labels.append(int(tokens[0])) feature_num = 0 for feature in tokens[1:]: feature_id, feature_value = feature.split(":") feature_ids.append(int(feature_id)) feature_values.append(float(feature_value)) feature_index.append([ins_num, feature_num]) feature_num += 1 ins_num += 1 ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Use the model {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: print("No model found, exit now") exit(1) prediction, prediction_softmax = sess.run( [inference_op, inference_softmax], feed_dict={sparse_index: feature_index, sparse_ids: feature_ids, sparse_values: feature_values, sparse_shape: [ins_num, FEATURE_SIZE]}) end_time = datetime.datetime.now() print("[{}] Inference result: {}".format(end_time - start_time, prediction)) # Compute accuracy label_number = len(labels) correct_label_number = 0 for i in range(label_number): if labels[i] == prediction[i]: correct_label_number += 1 accuracy = float(correct_label_number) / label_number # Compute auc expected_labels = np.array(labels) predict_labels = prediction_softmax[:, 0] fpr, tpr, thresholds = metrics.roc_curve(expected_labels, predict_labels, pos_label=0) auc = metrics.auc(fpr, tpr) print("For inference data, accuracy: {}, auc: {}".format(accuracy, auc)) # Save inference result into file np.savetxt(inference_result_file_name, prediction, delimiter=",") print("Save result to file: {}".format(inference_result_file_name))
init_op = tf.group(tf.tables_initializer(), name='init_op') serving_input_x = cnn.input_x values, indices = tf.nn.top_k(cnn.input_y, 2) table = tf.contrib.lookup.index_to_string_table_from_tensor( tf.constant([str(i) for i in range(2)])) prediction_classes = table.lookup(tf.to_int64(indices)) model_exporter.init( sess.graph.as_graph_def(), init_op=init_op, default_graph_signature=exporter.classification_signature( input_tensor=serving_input_x, classes_tensor=prediction_classes, scores_tensor=values), named_graph_signatures={ 'inputs': exporter.generic_signature({'images': cnn.input_x}), 'outputs': exporter.generic_signature({'scores': cnn.input_y})}) export_path = "<keep the path here>" model_exporter.export(export_path, tf.constant(FLAGS.export_version), sess) ## END ## # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step
# make sure all the summaries are flushed to disk valid_writer.flush() train_writer.flush() base_model_dir = os.path.join(FLAGS.tmp_dir, 'model') if not os.path.exists(base_model_dir): os.makedirs(base_model_dir) saver = tf.train.Saver(sharded=False) model_exporter = exporter.Exporter(saver) model_exporter.init(sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature( {'images': x_}), 'outputs': exporter.generic_signature( {'scores': softmax_pred}) }) model_exporter.export(base_model_dir, tf.constant(FLAGS.export_version), sess) if FLAGS.copy_to_gcs: gcs_copy(base_log_dir, FLAGS.gcs_export_uri) gcs_copy(base_model_dir, FLAGS.gcs_export_uri) coord.request_stop() coord.join(threads)
def main(): print("Start playing game") # Initial Gym environement env = gym.make(FLAGS.gym_env) experience_replay_queue = deque() action_number = env.action_space.n # The shape of CarPole is [4, 0], Pacman is [210, 160, 3] state_number = env.observation_space.shape[0] if len(env.observation_space.shape) >= 3: state_number2 = env.observation_space.shape[1] state_number3 = env.observation_space.shape[2] else: state_number2 = env.observation_space.shape[0] state_number3 = env.observation_space.shape[0] # Define dnn model def dnn_inference(inputs, is_train=True): # The inputs is [BATCH_SIZE, state_number], outputs is [BATCH_SIZE, action_number] hidden1_unit_number = 20 with tf.variable_scope("fc1"): weights = tf.get_variable("weight", [state_number, hidden1_unit_number], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [hidden1_unit_number], initializer=tf.random_normal_initializer()) layer = tf.add(tf.matmul(inputs, weights), bias) if FLAGS.enable_bn and is_train: mean, var = tf.nn.moments(layer, axes=[0]) scale = tf.get_variable("scale", hidden1_unit_number, initializer=tf.random_normal_initializer()) shift = tf.get_variable("shift", hidden1_unit_number, initializer=tf.random_normal_initializer()) layer = tf.nn.batch_normalization(layer, mean, var, shift, scale, FLAGS.bn_epsilon) layer = tf.nn.relu(layer) with tf.variable_scope("fc2"): weights = tf.get_variable("weight", [hidden1_unit_number, action_number], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [action_number], initializer=tf.random_normal_initializer()) layer = tf.add(tf.matmul(layer, weights), bias) return layer # Define cnn model def cnn_inference(inputs, is_train=True): LABEL_SIZE = action_number # The inputs is [BATCH_SIZE, 210, 160, 3], outputs is [BATCH_SIZE, action_number] with tf.variable_scope("conv1"): weights = tf.get_variable("weights", [3, 3, 3, 32], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [32], initializer=tf.random_normal_initializer()) # Should not use polling layer = tf.nn.conv2d(inputs, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) # The inputs is [BATCH_SIZE, 210, 160, 32], outputs is [BATCH_SIZE, 210, 160, 64] with tf.variable_scope("conv2"): weights = tf.get_variable("weights", [3, 3, 32, 64], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [64], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(layer, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) # Reshape for full-connect network layer = tf.reshape(layer, [-1, 210 * 160 * 64]) # Full connected layer result: [BATCH_SIZE, LABEL_SIZE] with tf.variable_scope("fc1"): weights = tf.get_variable("weights", [210 * 160 * 64, LABEL_SIZE], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [LABEL_SIZE], initializer=tf.random_normal_initializer()) layer = tf.add(tf.matmul(layer, weights), bias) return layer # Define train op model = FLAGS.model print("Use the model: {}".format(model)) if model == "dnn": states_placeholder = tf.placeholder(tf.float32, [None, state_number]) inference = dnn_inference elif model == "cnn": states_placeholder = tf.placeholder(tf.float32, [None, state_number, state_number2, state_number3]) inference = cnn_inference else: print("Unknow model, exit now") exit(1) logit = inference(states_placeholder, True) actions_placeholder = tf.placeholder(tf.float32, [None, action_number]) predict_rewords = tf.reduce_sum( tf.mul(logit, actions_placeholder), reduction_indices=1) rewards_placeholder = tf.placeholder(tf.float32, [None]) loss = tf.reduce_mean(tf.square(rewards_placeholder - predict_rewords)) learning_rate = FLAGS.learning_rate print("Use the optimizer: {}".format(FLAGS.optimizer)) if FLAGS.optimizer == "sgd": optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif FLAGS.optimizer == "adadelta": optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif FLAGS.optimizer == "adagrad": optimizer = tf.train.AdagradOptimizer(learning_rate) elif FLAGS.optimizer == "adam": optimizer = tf.train.AdamOptimizer(learning_rate) elif FLAGS.optimizer == "ftrl": optimizer = tf.train.FtrlOptimizer(learning_rate) elif FLAGS.optimizer == "rmsprop": optimizer = tf.train.RMSPropOptimizer(learning_rate) else: print("Unknow optimizer: {}, exit now".format(FLAGS.optimizer)) exit(1) global_step = tf.Variable(0, name="global_step", trainable=False) train_op = optimizer.minimize(loss, global_step=global_step) # Get the action with most rewoard when giving the state batch_best_actions = tf.argmax(logit, 1) best_action = batch_best_actions[0] batch_best_q = tf.reduce_max(logit, 1) best_q = batch_best_q[0] if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) checkpoint_file = FLAGS.checkpoint_dir + "/checkpoint.ckpt" init_op = tf.initialize_all_variables() saver = tf.train.Saver() tf.scalar_summary("loss", loss) # Create session with tf.Session() as sess: summary_op = tf.merge_all_summaries() writer = tf.train.SummaryWriter(FLAGS.tensorboard_dir, sess.graph) sess.run(init_op) if FLAGS.mode == "train": # Restore from checkpoint if it exists ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Restore model from the file {}".format( ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) for episode in range(FLAGS.episode_number): # Start new epoisode to train state = env.reset() loss_value = -1 for step in xrange(FLAGS.episode_step_number): # Get action from exploration and exploitation if random.random() <= FLAGS.exploration_exploitation_epsilon: action = random.randint(0, action_number - 1) else: action = sess.run(best_action, feed_dict={states_placeholder: [state]}) # Run this action on this state next_state, reward, done, _ = env.step(action) # Get new state add to replay experience queue one_hot_action = np.zeros(action_number) one_hot_action[action] = 1 experience_replay_queue.append((state, one_hot_action, reward, next_state, done)) if len(experience_replay_queue) > FLAGS.experience_replay_size: experience_replay_queue.popleft() # Get enough data to train with batch if len(experience_replay_queue) > FLAGS.batch_size: # Get batch experience replay to train batch_data = random.sample(experience_replay_queue, FLAGS.batch_size) batch_states = [] batch_actions = [] batch_rewards = [] batch_next_states = [] expected_rewards = [] for experience_replay in batch_data: batch_states.append(experience_replay[0]) batch_actions.append(experience_replay[1]) batch_rewards.append(experience_replay[2]) batch_next_states.append(experience_replay[3]) # Get expected reword done = experience_replay[4] if done: expected_rewards.append(experience_replay[2]) else: # TODO: need to optimizer and compute within TensorFlow next_best_q = sess.run( best_q, feed_dict={states_placeholder: [experience_replay[3]]}) expected_rewards.append(experience_replay[2] + FLAGS.discount_factor * next_best_q) _, loss_value, step = sess.run( [train_op, loss, global_step], feed_dict={ rewards_placeholder: expected_rewards, actions_placeholder: batch_actions, states_placeholder: batch_states }) else: print("Add more data to train with batch") state = next_state if done: break # Validate for some episode if episode % FLAGS.episode_to_validate == 0: print("Episode: {}, global step: {}, the loss: {}".format( episode, step, loss_value)) state = env.reset() total_reward = 0 for i in xrange(FLAGS.episode_step_number): if FLAGS.render_game: time.sleep(FLAGS.render_sleep_time) env.render() action = sess.run(best_action, feed_dict={states_placeholder: [state]}) state, reward, done, _ = env.step(action) total_reward += reward if done: break print("Eposide: {}, total reward: {}".format(episode, total_reward)) saver.save(sess, checkpoint_file, global_step=step) # End of training process model_exporter = exporter.Exporter(saver) model_exporter.init(sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({ "states": states_placeholder }), 'outputs': exporter.generic_signature({ "actions": batch_best_actions }) }) model_exporter.export(FLAGS.model_path, tf.constant(FLAGS.export_version), sess) print "Done exporting!" elif FLAGS.mode == "untrained": total_reward = 0 state = env.reset() for i in xrange(FLAGS.episode_step_number): if FLAGS.render_game: time.sleep(FLAGS.render_sleep_time) env.render() action = env.action_space.sample() next_state, reward, done, _ = env.step(action) total_reward += reward if done: print("End of untrained because of done, reword: {}".format( total_reward)) break elif FLAGS.mode == "inference": # Restore from checkpoint if it exists ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Restore model from the file {}".format( ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: print("Model not found, exit now") exit(0) total_reward = 0 state = env.reset() index = 1 while True: time.sleep(FLAGS.render_sleep_time) if FLAGS.render_game: env.render() action = sess.run(best_action, feed_dict={states_placeholder: [state]}) next_state, reward, done, _ = env.step(action) state = next_state total_reward += reward if done: print("End of inference because of done, reword: {}".format( total_reward)) break else: if total_reward > index * 100: print("Not done yet, current reword: {}".format(total_reward)) index += 1 else: print("Unknown mode: {}".format(FLAGS.mode)) print("End of playing game")
def init_test(): Params.batch_size = 1 dictionaries = pre_train_word.load_vocab_pkls() vocab, response_vocab = dictionaries Stats.word_vocab_size = len(vocab) Stats.num_classes = len(response_vocab) initializer = tf.random_uniform_initializer(-init_scale, init_scale) sess = tf.Session() try: with tf.variable_scope("model", reuse=None, initializer=initializer): model_train = DoubleRNN() model_saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(pre_train_word.Paths.model_name, latest_filename='checkpoints') if ckpt and ckpt.model_checkpoint_path: print("Loading model from :{}".format(ckpt.model_checkpoint_path)) model_saver.restore(sess, ckpt.model_checkpoint_path) else: print('NO SAVED MODEL FOUND. RETURNING NONE') return None ####Tensor-flow serving block starts############## init_op = tf.group(tf.initialize_all_tables(), name='init_op') saver = tf.train.Saver(sharded=True) model_exporter = exporter.Exporter(saver) model_exporter.init(sess.graph.as_graph_def(), init_op=init_op, named_graph_signatures={ 'inputs': exporter.generic_signature({ 'a1_utter': model_train.a1, 'a2_utter': model_train.a2, 'u1_utter': model_train.u1, 'u2_utter': model_train.u2, 'a1_len': model_train.a1_seq_length, 'a2_len': model_train.a2_seq_length, 'u1_len': model_train.u1_seq_length, 'u2_len': model_train.u2_seq_length, 'response': model_train.response, 'num_contexts': model_train.num_contexts }), 'outputs': exporter.generic_signature( {'loss': model_train.cost_per_5_arr}) }) model_exporter.export( '/home/phegde/Desktop/amelia-v3-temp/v3-launch-demo-models/hlstm_model', tf.constant(9), sess) print("exporting done") ####Tensor-flow serving block ends############## return sess, model_train, vocab, response_vocab except IOError as e: print(e) return None
def main(flags): mnist = input_data.read_data_sets(flags.mnist_data_dir, reshape=False, one_hot=True) # neural network with 1 layer of 10 softmax neurons # # · · · · · · · · · · (input data, flattened pixels) X [batch, 784] # 784 = 28 * 28 # \x/x\x/x\x/x\x/x\x/ -- fully connected layer (softmax) W [784, 10] b[10] # · · · · · · · · Y [batch, 10] # The model is: # # Y = softmax( X * W + b) # X: matrix for 100 grayscale images of 28x28 pixels, flattened (there are 100 images in a mini-batch) # W: weight matrix with 784 lines and 10 columns # b: bias vector with 10 dimensions # +: add with broadcasting: adds the vector to each line of the matrix (numpy) # softmax(matrix) applies softmax on each line # softmax(line) applies an exp to each value then divides by the norm of the resulting line # Y: output matrix with 100 lines and 10 columns # input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch X = tf.placeholder(tf.float32, [None, 28, 28, 1]) # correct answers will go here Y_ = tf.placeholder(tf.float32, [None, 10]) # weights W[784, 10] 784=28*28 W = tf.Variable(tf.zeros([784, 10])) # biases b[10] b = tf.Variable(tf.zeros([10])) # flatten the images into a single line of pixels # -1 in the shape definition means "the only possible dimension that will preserve the number of elements" XX = tf.reshape(X, [-1, 784]) # The model Y = tf.nn.softmax(tf.matmul(XX, W) + b) # loss function: cross-entropy = - sum( Y_i * log(Yi) ) # Y: the computed output vector # Y_: the desired output vector # cross-entropy # log takes the log of each element, * multiplies the tensors element by element # reduce_mean will add all the components in the tensor # so here we end up with the total cross-entropy for all images in the batch cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 1000.0 # normalized for batches of 100 images, # *10 because "mean" included an unwanted division by 10 # accuracy of the trained model, between 0 (worst) and 1 (best) correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # training, learning rate = 0.005 train_step = tf.train.GradientDescentOptimizer(0.005).minimize(cross_entropy) # init init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) for i in range(flags.iterations): batch_X, batch_Y = mnist.train.next_batch(flags.batch_size) # the backpropagation training step sess.run(train_step, feed_dict={X: batch_X, Y_: batch_Y}) print("Iteration:" + str(i) + "/" + str(flags.iterations)) print("******* Training is Done! *******") print("Accuracy:", sess.run(accuracy, feed_dict={X: mnist.test.images, Y_: mnist.test.labels})) # Export model to Tensorflow Serving saver = tf.train.Saver() model_exporter = exporter.Exporter(saver) model_exporter.init( sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({'x': X}), 'outputs': exporter.generic_signature({'y': Y}) } ) model_exporter.export(flags.model_dir, tf.constant(flags.model_version), sess) print("Model Saved at", str(flags.model_dir), "version:", flags.model_version)
train_accuracy_value, validate_accuracy_value, auc_value)) writer.add_summary(summary_value, step) saver.save(sess, checkpoint_file, global_step=step) start_time = end_time except tf.errors.OutOfRangeError: print("Done training after reading all data") print("Exporting trained model to {}".format(FLAGS.model_path)) model_exporter = exporter.Exporter(saver) model_exporter.init(sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({ "keys": keys_placeholder, "features": inference_features }), 'outputs': exporter.generic_signature({ "keys": keys, "softmax": inference_softmax, "prediction": inference_op }) }) model_exporter.export(FLAGS.model_path, tf.constant(FLAGS.export_version), sess) print 'Done exporting!'
def main(): parseArgs(args) args.log_dir_path = args.output + os.path.sep + 'test_' + os.path.split( args.checkpoint)[1] args.log_prefix = args.a0_model_name + '_' if not os.path.exists(args.log_dir_path): os.makedirs(args.log_dir_path) if args.save_graph is not None: args.message("Resize batchsize to 1 for serving...") if args.extra_wdict is not None: args.wdict_path = args.extra_wdict args.testfnms = [args.extra_testfnms] args.message("Loading extra word dictionary from " + args.wdict_path) configproto = tf.ConfigProto() configproto.gpu_options.allow_growth = True configproto.allow_soft_placement = True args.batchsize = 400 test_batchsize = args.batchsize #test_batchsize = 1 with tf.Graph().as_default(), tf.Session(config=configproto) as sess: model = graph_moudle.Model() model.init_global_step() vt, vs, vo = model.model_setup() tf.initialize_all_variables().run() args.message('Loading trained model ' + str(args.checkpoint)) model.saver.restore(sess, args.checkpoint) args.write_variables(vt) if args.save_graph: if args.save_only_trainable: exporter_saver = tf.train.Saver( var_list=tf.trainable_variables(), sharded=True) else: exporter_saver = tf.train.Saver(sharded=True) online_feed_dict = { 'q': model.query_sent_in, 'qm': model.query_sent_mask, 't': model.title_sent_in, 'tm': model.title_sent_mask, 'is_training': model.is_training, 'keep_prob': model.keep_prob } online_fetch_dict = {'score': model.score} model_exporter = exporter.Exporter(exporter_saver) model_exporter.init( sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature(online_feed_dict), 'outputs': exporter.generic_signature(online_fetch_dict) }) model_exporter.export(args.log_dir_path, tf.constant(0), sess) args.message("Successfully export graph to path:" + args.log_dir_path) #tf.train.write_graph(sess.graph_def, args.log_dir_path, 'graph.pbtxt', False) return args.write_args(args) fw = open(args.predit_output, "w") fw2 = open("./data/detail.txt", "w") for test_file in args.testfnms: print('Test ' + str(test_file)) f = open(test_file) data = read_input(f.readlines()) for key, kviter in groupby(data, itemgetter(2)): pairs = [] itemNum = 0 for k in kviter: #if(len(k[0])>15 and len(k[1])>15): pairs.append("\t".join(k)) itemNum = itemNum + 1 if itemNum > 20: break if (itemNum < 5): continue n_test = len(pairs) n_batches = int(n_test / test_batchsize) if n_test % test_batchsize != 0: n_batches += 1 tpair_loss, tacc, tacc01 = 0.0, 0.0, 0.0 score_sum = 0 result = "" detail = "" for i in range(n_batches): q, qm, t, tm, g = model.data_proc( pairs[i * test_batchsize:(i + 1) * test_batchsize]) pair_loss, acc, acc01, score = \ model.run_epoch(sess, [q, qm, t, tm, g, False]) for j in range(0, itemNum): if i * test_batchsize + j >= n_test: break pair_line = pairs[i * test_batchsize + j].rstrip() pair_tokens = pair_line.split('\t') if len(pair_tokens) > 2: score_sum = score_sum + score[j][0] score_str1 = key + '\t' + pair_tokens[ 0] + '\t' + pair_tokens[1] + '\t' + str( score[j][0]) + '\n' detail = detail + score_str1 # fw2.write(score_str1) score_avg = score_sum / itemNum result = result + key + '\t' + str(score_avg) + '\n' fw2.write(detail) fw.write(result) f.close() fw.close() fw2.close()
def main(): parseArgs(args) args.log_dir_path = args.output + os.path.sep + 'test_' + os.path.split( args.checkpoint)[1] args.log_prefix = args.a0_model_name + '_' if not os.path.exists(args.log_dir_path): os.makedirs(args.log_dir_path) if args.save_graph: args.message("Resize batchsize to 1 for serving...") if args.extra_wdict is not None: args.wdict_path = args.extra_wdict args.testfnms = [args.extra_testfnms] args.message("Loading extra word dictionary from " + args.wdict_path) configproto = tf.ConfigProto() configproto.gpu_options.allow_growth = True configproto.allow_soft_placement = True args.batchsize = 400 test_batchsize = args.batchsize #test_batchsize = 1 with tf.Graph().as_default(), tf.Session(config=configproto) as sess: model = graph_moudle.Model() model.init_global_step() vt, vs, vo = model.model_setup() tf.initialize_all_variables().run() args.message('Loading trained model ' + str(args.checkpoint)) model.saver.restore(sess, args.checkpoint) args.write_variables(vt) if args.save_graph: if args.save_only_trainable: exporter_saver = tf.train.Saver( var_list=tf.trainable_variables(), sharded=True) else: exporter_saver = tf.train.Saver(sharded=True) online_feed_dict = { 'q': model.query_sent_in, 'qm': model.query_sent_mask, 't': model.title_sent_in, 'tm': model.title_sent_mask, 'is_training': model.is_training, 'keep_prob': model.keep_prob } online_fetch_dict = {'score': model.score} model_exporter = exporter.Exporter(exporter_saver) model_exporter.init( sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature(online_feed_dict), 'outputs': exporter.generic_signature(online_fetch_dict) }) model_exporter.export(args.log_dir_path, tf.constant(0), sess) args.message("Successfully export graph to path:" + args.log_dir_path) #tf.train.write_graph(sess.graph_def, args.log_dir_path, 'graph.pbtxt', False) return args.write_args(args) fw = open(args.predit_output, "w") for test_file in args.testfnms: print('Test ' + str(test_file)) f = open(test_file) pairs = f.readlines() n_test = len(pairs) n_batches = int(n_test / test_batchsize) if n_test % test_batchsize != 0: n_batches += 1 tpair_loss, tacc, tacc01 = 0.0, 0.0, 0.0 for i in range(n_batches): q, qm, t, tm, g = model.data_proc( pairs[i * test_batchsize:(i + 1) * test_batchsize]) pair_loss, acc, acc01, score = \ model.run_epoch(sess, [q, qm, t, tm, g, False]) tpair_loss, tacc, tacc01 = tpair_loss + pair_loss, tacc + acc, tacc01 + acc01 out_str = "%f %f %f" % (pair_loss, acc, acc01) for j in range(0, len(score)): if i * test_batchsize + j >= n_test: break pair_line = pairs[i * test_batchsize + j].rstrip() pair_tokens = pair_line.split('\t') if len(pair_tokens) > 2: score_str1 = pair_tokens[0] + '\t' + pair_tokens[ 1] + '\t' + str(score[j][0]) + '\n' #score_str1 = pair_tokens[0] + '\t' + pair_tokens[1] + '\t' + pair_tokens[2] + '\t' + str(score[j][0]) + '\n' #score_str2 = pair_tokens[0] + '\t' + pair_tokens[3] + '\t' + str(score[j][1]) + '\n' #score_str1 = pair_tokens[0] + '\t' + pair_tokens[1] + '\t' + pair_tokens[2] + '\t' + str(score[j][0]) + '\t' + pair_tokens[3] + '\t' + pair_tokens[4] + '\t' + str(score[j][1]) + '\n' #score_str2 = pair_tokens[0] + '\t' + pair_tokens[2] + '\t' + str(score[j][1]) + '\n' #score_str = pair_line + '\t' + str(score[j][0])+'-'+str(score[j][1]) + '\n' #sys.stderr.write(score_str1) fw.write(score_str1) #sys.stderr.write(score_str1+score_str2) #print(out_str) out_str = "Test " + str( test_file) + " with checkpoint " + args.checkpoint args.message(out_str) n_batches = float(n_batches) out_str = "pair loss:%f acc:%f acc01:%f" \ % (tpair_loss / n_batches, tacc / n_batches, tacc01 / n_batches) args.message(out_str) f.close() fw.close()
if i % 1000 == 0: mse = sess.run(loss, feed_dict={ x: train_feature_batch, y: train_label_batch }) # , keep_prob: 1.0}) test_mse = sess.run(loss, feed_dict={ x: test_feature, y: test_label }) # , keep_prob: 1.0}) print("epoch " + str(i / 100) + ", Minibatch Loss= " + "{:.6f}".format(mse) + ", test set mse= " + "{:.6f}".format(test_mse)) # ---------------------------- save model ---------------------------- from tensorflow.contrib.session_bundle import exporter saver = tf.train.Saver() model_dir = os.path.abspath('./model') model_exporter = exporter.Exporter(saver) model_version = 1 model_exporter.init(sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({'feature': x}), 'outputs': exporter.generic_signature({'score': y_}) }) model_exporter.export(model_dir, tf.constant(model_version), sess)
def main(): # Pre-process hyperparameters FEATURE_SIZE = FLAGS.feature_size LABEL_SIZE = FLAGS.label_size EPOCH_NUMBER = FLAGS.epoch_number if EPOCH_NUMBER <= 0: EPOCH_NUMBER = None BATCH_THREAD_NUMBER = FLAGS.batch_thread_number MIN_AFTER_DEQUEUE = FLAGS.min_after_dequeue BATCH_CAPACITY = BATCH_THREAD_NUMBER * FLAGS.batch_size + MIN_AFTER_DEQUEUE MODE = FLAGS.mode MODEL = FLAGS.model CHECKPOINT_PATH = FLAGS.checkpoint_path if not CHECKPOINT_PATH.startswith("fds://") and not os.path.exists( CHECKPOINT_PATH): os.makedirs(CHECKPOINT_PATH) CHECKPOINT_FILE = CHECKPOINT_PATH + "/checkpoint.ckpt" LATEST_CHECKPOINT = tf.train.latest_checkpoint(CHECKPOINT_PATH) OUTPUT_PATH = FLAGS.output_path if not OUTPUT_PATH.startswith("fds://") and not os.path.exists( OUTPUT_PATH): os.makedirs(OUTPUT_PATH) pprint.PrettyPrinter().pprint(FLAGS.__flags) # Process TFRecoreds files def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) if MODEL == "bidirectional_rnn": features = tf.parse_single_example( serialized_example, features={ "label": tf.FixedLenFeature([3], tf.float32), "features": tf.FixedLenFeature([FEATURE_SIZE], tf.float32), }) else: features = tf.parse_single_example( serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "features": tf.FixedLenFeature([FEATURE_SIZE], tf.float32), }) label = features["label"] features = features["features"] print(label) return label, features # Read TFRecords files for training filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.train_tfrecords_file), num_epochs=EPOCH_NUMBER) label, features = read_and_decode(filename_queue) batch_labels, batch_features = tf.train.shuffle_batch( [label, features], batch_size=FLAGS.batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) # Read TFRecords file for validatioin validate_filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.validate_tfrecords_file), num_epochs=EPOCH_NUMBER) validate_label, validate_features = read_and_decode( validate_filename_queue) validate_batch_labels, validate_batch_features = tf.train.shuffle_batch( [validate_label, validate_features], batch_size=FLAGS.validate_batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) # Define the model input_units = FEATURE_SIZE output_units = LABEL_SIZE model_network_hidden_units = [int(i) for i in FLAGS.model_network.split()] def full_connect(inputs, weights_shape, biases_shape, is_train=True): weights = tf.get_variable("weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable("biases", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.matmul(inputs, weights) + biases if FLAGS.enable_bn and is_train: mean, var = tf.nn.moments(layer, axes=[0]) scale = tf.get_variable("scale", biases_shape, initializer=tf.random_normal_initializer()) shift = tf.get_variable("shift", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.nn.batch_normalization(layer, mean, var, shift, scale, FLAGS.bn_epsilon) return layer def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True): layer = full_connect(inputs, weights_shape, biases_shape, is_train) layer = tf.nn.relu(layer) return layer def customized_inference(inputs, is_train=True): hidden1_units = 128 hidden2_units = 32 hidden3_units = 8 with tf.variable_scope("input"): layer = full_connect_relu(inputs, [input_units, hidden1_units], [hidden1_units], is_train) with tf.variable_scope("layer0"): layer = full_connect_relu(layer, [hidden1_units, hidden2_units], [hidden2_units], is_train) with tf.variable_scope("layer1"): layer = full_connect_relu(layer, [hidden2_units, hidden3_units], [hidden3_units], is_train) if FLAGS.enable_dropout and is_train: layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob) with tf.variable_scope("output"): layer = full_connect(layer, [hidden3_units, output_units], [output_units], is_train) return layer def dnn_inference(inputs, is_train=True): with tf.variable_scope("input"): layer = full_connect_relu( inputs, [input_units, model_network_hidden_units[0]], [model_network_hidden_units[0]], is_train) for i in range(len(model_network_hidden_units) - 1): with tf.variable_scope("layer{}".format(i)): layer = full_connect_relu(layer, [ model_network_hidden_units[i], model_network_hidden_units[i + 1] ], [model_network_hidden_units[i + 1]], is_train) with tf.variable_scope("output"): layer = full_connect( layer, [model_network_hidden_units[-1], output_units], [output_units], is_train) return layer def lr_inference(inputs, is_train=True): with tf.variable_scope("lr"): layer = full_connect(inputs, [input_units, output_units], [output_units]) return layer def wide_and_deep_inference(inputs, is_train=True): return lr_inference(inputs, is_train) + dnn_inference(inputs, is_train) def cnn_inference(inputs, is_train=True): # TODO: Change if validate_batch_size is different # [BATCH_SIZE, 512 * 512 * 1] -> [BATCH_SIZE, 512, 512, 1] # inputs = tf.reshape(inputs, [FLAGS.batch_size, 512, 512, 1]) # inputs = tf.reshape(inputs, [FLAGS.batch_size,120,7,1]) inputs = tf.reshape(inputs, [FLAGS.batch_size, 128, 32, 1]) # [BATCH_SIZE, 512, 512, 1] -> [BATCH_SIZE, 128, 128, 8] with tf.variable_scope("conv0"): weights = tf.get_variable( "weights", [3, 3, 1, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(inputs, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool(layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") # [BATCH_SIZE, 128, 128, 8] -> [BATCH_SIZE, 32, 32, 8] with tf.variable_scope("conv1"): weights = tf.get_variable( "weights", [3, 3, 8, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(layer, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool(layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") # [BATCH_SIZE, 32, 32, 8] -> [BATCH_SIZE, 8, 8, 8] with tf.variable_scope("conv2"): weights = tf.get_variable( "weights", [3, 3, 8, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(layer, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool(layer, ksize=[1, 4, 1, 1], strides=[1, 4, 1, 1], padding="SAME") # [BATCH_SIZE, 8, 8, 8] -> [BATCH_SIZE, 8 * 8 * 8] layer = tf.reshape(layer, [-1, 8 * 8 * 8]) # [BATCH_SIZE, 8 * 8 * 8] -> [BATCH_SIZE, LABEL_SIZE] with tf.variable_scope("output"): weights = tf.get_variable( "weights", [8 * 8 * 8, LABEL_SIZE], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [LABEL_SIZE], initializer=tf.random_normal_initializer()) layer = tf.add(tf.matmul(layer, weights), bias) return layer def resnet_inference(inputs, is_train=True): x = tf.reshape(inputs, [FLAGS.batch_size, 64, 64, 1]) with slim.arg_scope(resnet_arg_scope(is_training=(MODE == "train"))): net, enpoints = resnet_v2.resnet_v2_50( x, num_classes=FLAGS.label_size) net = tf.reshape(net, [FLAGS.batch_size, FLAGS.label_size]) return net #双向lstm (rnn) def bidirectional_rnn_inference(inputs, is_train=True): n_steps = 128 n_input = 32 n_hidden = 128 # hidden layer num of features n_classes = FLAGS.label_size x = tf.reshape(inputs, [-1, n_steps, n_input]) # Define weights weights = { # Hidden layer weights => 2*n_hidden because of forward + backward cells 'out': tf.Variable(tf.random_normal([2 * n_hidden, n_classes])) } biases = {'out': tf.Variable(tf.random_normal([n_classes]))} # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshape to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(x, n_steps, 0) # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output try: outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) except Exception: # Old TensorFlow version only returns outputs not states outputs = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out'] def inference(inputs, is_train=True): if MODEL == "dnn": return dnn_inference(inputs, is_train) elif MODEL == "lr": return lr_inference(inputs, is_train) elif MODEL == "wide_and_deep": return wide_and_deep_inference(inputs, is_train) elif MODEL == "customized": return customized_inference(inputs, is_train) elif MODEL == "cnn": return cnn_inference(inputs, is_train) elif MODEL == "resnet": return resnet_inference(inputs, is_train) elif MODEL == "bidirectional_rnn": return bidirectional_rnn_inference(inputs, is_train) else: print("Unknown model, exit now") exit(1) print("Use the model: {}, model network: {}".format( MODEL, FLAGS.model_network)) logits = inference(batch_features, True) batch_labels = tf.to_int64(batch_labels) if MODEL == "bidirectional_rnn": loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=batch_labels)) else: cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=batch_labels) loss = tf.reduce_mean(cross_entropy, name="loss") global_step = tf.Variable(0, name="global_step", trainable=False) if FLAGS.enable_lr_decay: print("Enable learning rate decay rate: {}".format( FLAGS.lr_decay_rate)) starter_learning_rate = FLAGS.learning_rate learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100000, FLAGS.lr_decay_rate, staircase=True) else: learning_rate = FLAGS.learning_rate optimizer = get_optimizer(FLAGS.optimizer, learning_rate) train_op = optimizer.minimize(loss, global_step=global_step) tf.get_variable_scope().reuse_variables() # Define accuracy op for train data train_accuracy_logits = inference(batch_features, False) train_softmax = tf.nn.softmax(train_accuracy_logits) if MODEL == "bidirectional_rnn": train_correct_prediction = tf.equal( tf.argmax(train_accuracy_logits, 1), tf.argmax(batch_labels, 1)) else: train_correct_prediction = tf.equal(tf.argmax(train_softmax, 1), batch_labels) train_accuracy = tf.reduce_mean( tf.cast(train_correct_prediction, tf.float32)) # Define auc op for train data if MODEL == "bidirectional_rnn": new_batch_labels = batch_labels else: batch_labels = tf.cast(batch_labels, tf.int32) sparse_labels = tf.reshape(batch_labels, [-1, 1]) derived_size = tf.shape(batch_labels)[0] print(derived_size.shape) outshape = tf.stack([derived_size, LABEL_SIZE]) indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(axis=1, values=[indices, sparse_labels]) new_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax, new_batch_labels) # Define accuracy op for validate data validate_accuracy_logits = inference(validate_batch_features, False) validate_batch_labels = tf.to_int64(validate_batch_labels) validate_softmax = tf.nn.softmax(validate_accuracy_logits) if MODEL == "bidirectional_rnn": validate_correct_prediction = tf.equal( tf.argmax(validate_accuracy_logits, 1), tf.argmax(validate_batch_labels, 1)) else: validate_correct_prediction = tf.equal(tf.argmax(validate_softmax, 1), validate_batch_labels) validate_accuracy = tf.reduce_mean( tf.cast(validate_correct_prediction, tf.float32)) # Define auc op for validate data if MODEL == "bidirectional_rnn": new_validate_batch_labels = validate_batch_labels else: validate_batch_labels = tf.cast(validate_batch_labels, tf.int32) sparse_labels = tf.reshape(validate_batch_labels, [-1, 1]) derived_size = tf.shape(validate_batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(axis=1, values=[indices, sparse_labels]) outshape = tf.stack([derived_size, LABEL_SIZE]) new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, validate_auc = tf.contrib.metrics.streaming_auc( validate_softmax, new_validate_batch_labels) # Define inference op inference_features = tf.placeholder("float", [None, FEATURE_SIZE]) inference_logits = inference(inference_features, False) if MODEL == "bidirectional_rnn": inference_softmax = inference_logits else: inference_softmax = tf.nn.softmax(inference_logits) inference_op = tf.argmax(inference_softmax, 1) keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) model_signature = { "inputs": exporter.generic_signature({ "keys": keys_placeholder, "features": inference_features }), "outputs": exporter.generic_signature({ "keys": keys, "softmax": inference_softmax, "prediction": inference_op }) } # Initialize saver and summary saver = tf.train.Saver() tf.summary.scalar("loss", loss) tf.summary.scalar("train_accuracy", train_accuracy) tf.summary.scalar("train_auc", train_auc) tf.summary.scalar("validate_accuracy", validate_accuracy) tf.summary.scalar("validate_auc", validate_auc) summary_op = tf.summary.merge_all() init_op = [ tf.global_variables_initializer(), tf.local_variables_initializer() ] # Create session to run with tf.Session() as sess: print("Start to run with mode: {}".format(MODE)) writer = tf.summary.FileWriter(OUTPUT_PATH, sess.graph) sess.run(init_op) if MODE == "train": # Restore session and start queue runner restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = datetime.datetime.now() try: icount = 0 while not coord.should_stop(): _, loss_value, step = sess.run( [train_op, loss, global_step]) # Print state while training if step % FLAGS.steps_to_validate == 0: train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value, summary_value = sess.run( [ train_accuracy, train_auc, validate_accuracy, validate_auc, summary_op ]) end_time = datetime.datetime.now() print( "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}" .format(end_time - start_time, step, loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value)) # icount = icount + 1 # if icount>= 10 : writer.add_summary(summary_value, step) saver.save(sess, CHECKPOINT_FILE, global_step=step) # icount = 0 ; start_time = end_time except tf.errors.OutOfRangeError: # Export the model after training export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) finally: coord.request_stop() coord.join(threads) elif MODE == "export": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): print("No checkpoint found, exit now") exit(1) # Export the model export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) elif MODE == "inference": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): print("No checkpoint found, exit now") exit(1) # Load inference test data inference_result_file_name = FLAGS.inference_result_file inference_test_file_name = FLAGS.inference_test_file inference_data = np.genfromtxt(inference_test_file_name, delimiter=",") inference_data_features = inference_data[:, 0:4096] inference_data_labels = inference_data[:, 4096] print(inference_data_features) print(inference_data_labels) # Run inference start_time = datetime.datetime.now() # print(tf.shape(inference_features)) # print(inference_data_features.shape) # temp = {inference_features: inference_data_features} # print(temp) prediction, prediction_softmax = sess.run( [inference_op, inference_softmax], feed_dict={inference_features: inference_data_features}) end_time = datetime.datetime.now() # Compute accuracy label_number = len(inference_data_labels) correct_label_number = 0 for i in range(label_number): if inference_data_labels[i] == prediction[i]: correct_label_number += 1 accuracy = float(correct_label_number) / label_number # Compute auc expected_labels = np.array(inference_data_labels) predict_labels = prediction_softmax[:, 0] fpr, tpr, thresholds = metrics.roc_curve(expected_labels, predict_labels, pos_label=0) auc = metrics.auc(fpr, tpr) print("[{}] Inference accuracy: {}, auc: {}".format( end_time - start_time, accuracy, auc)) # Save result into the file np.savetxt(inference_result_file_name, prediction, delimiter=",") print("Save result to file: {}".format(inference_result_file_name))
def doBasicsOneExportPath(self, export_path, clear_devices=False, global_step=GLOBAL_STEP, sharded=True): # Build a graph with 2 parameter nodes on different devices. tf.reset_default_graph() with tf.Session(target="", config=config_pb2.ConfigProto( device_count={"CPU": 2})) as sess: # v2 is an unsaved variable derived from v0 and v1. It is used to # exercise the ability to run an init op when restoring a graph. with sess.graph.device("/cpu:0"): v0 = tf.Variable(10, name="v0") with sess.graph.device("/cpu:1"): v1 = tf.Variable(20, name="v1") v2 = tf.Variable(1, name="v2", trainable=False, collections=[]) assign_v2 = tf.assign(v2, tf.add(v0, v1)) init_op = tf.group(assign_v2, name="init_op") tf.add_to_collection("v", v0) tf.add_to_collection("v", v1) tf.add_to_collection("v", v2) global_step_tensor = tf.Variable(global_step, name="global_step") named_tensor_bindings = { "logical_input_A": v0, "logical_input_B": v1 } signatures = { "foo": exporter.regression_signature(input_tensor=v0, output_tensor=v1), "generic": exporter.generic_signature(named_tensor_bindings) } asset_filepath_orig = os.path.join(tf.test.get_temp_dir(), "hello42.txt") asset_file = tf.constant(asset_filepath_orig, name="filename42") tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, asset_file) with gfile.FastGFile(asset_filepath_orig, "w") as f: f.write("your data here") assets_collection = tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS) ignored_asset = os.path.join(tf.test.get_temp_dir(), "ignored.txt") with gfile.FastGFile(ignored_asset, "w") as f: f.write("additional data here") tf.initialize_all_variables().run() # Run an export. save = tf.train.Saver({ "v0": v0, "v1": v1 }, restore_sequentially=True, sharded=sharded) export = exporter.Exporter(save) export.init( sess.graph.as_graph_def(), init_op=init_op, clear_devices=clear_devices, default_graph_signature=exporter.classification_signature( input_tensor=v0), named_graph_signatures=signatures, assets_collection=assets_collection) export.export(export_path, global_step_tensor, sess, exports_to_keep=gc.largest_export_versions(2)) # Restore graph. compare_def = tf.get_default_graph().as_graph_def() tf.reset_default_graph() with tf.Session(target="", config=config_pb2.ConfigProto( device_count={"CPU": 2})) as sess: save = tf.train.import_meta_graph( os.path.join(export_path, exporter.VERSION_FORMAT_SPECIFIER % global_step, exporter.META_GRAPH_DEF_FILENAME)) self.assertIsNotNone(save) meta_graph_def = save.export_meta_graph() collection_def = meta_graph_def.collection_def # Validate custom graph_def. graph_def_any = collection_def[exporter.GRAPH_KEY].any_list.value self.assertEquals(len(graph_def_any), 1) graph_def = tf.GraphDef() graph_def_any[0].Unpack(graph_def) if clear_devices: for node in compare_def.node: node.device = "" self.assertProtoEquals(compare_def, graph_def) # Validate init_op. init_ops = collection_def[exporter.INIT_OP_KEY].node_list.value self.assertEquals(len(init_ops), 1) self.assertEquals(init_ops[0], "init_op") # Validate signatures. signatures_any = collection_def[ exporter.SIGNATURES_KEY].any_list.value self.assertEquals(len(signatures_any), 1) signatures = manifest_pb2.Signatures() signatures_any[0].Unpack(signatures) default_signature = signatures.default_signature self.assertEqual( default_signature.classification_signature.input.tensor_name, "v0:0") bindings = signatures.named_signatures[ "generic"].generic_signature.map self.assertEquals(bindings["logical_input_A"].tensor_name, "v0:0") self.assertEquals(bindings["logical_input_B"].tensor_name, "v1:0") read_foo_signature = ( signatures.named_signatures["foo"].regression_signature) self.assertEquals(read_foo_signature.input.tensor_name, "v0:0") self.assertEquals(read_foo_signature.output.tensor_name, "v1:0") # Validate the assets. assets_any = collection_def[exporter.ASSETS_KEY].any_list.value self.assertEquals(len(assets_any), 1) asset = manifest_pb2.AssetFile() assets_any[0].Unpack(asset) assets_path = os.path.join( export_path, exporter.VERSION_FORMAT_SPECIFIER % global_step, exporter.ASSETS_DIRECTORY, "hello42.txt") asset_contents = gfile.GFile(assets_path).read() self.assertEqual(asset_contents, "your data here") self.assertEquals("hello42.txt", asset.filename) self.assertEquals("filename42:0", asset.tensor_binding.tensor_name) ignored_asset_path = os.path.join( export_path, exporter.VERSION_FORMAT_SPECIFIER % global_step, exporter.ASSETS_DIRECTORY, "ignored.txt") self.assertFalse(gfile.Exists(ignored_asset_path)) # Validate graph restoration. if sharded: save.restore( sess, os.path.join( export_path, exporter.VERSION_FORMAT_SPECIFIER % global_step, exporter.VARIABLES_FILENAME_PATTERN)) else: save.restore( sess, os.path.join( export_path, exporter.VERSION_FORMAT_SPECIFIER % global_step, exporter.VARIABLES_FILENAME)) self.assertEqual(10, tf.get_collection("v")[0].eval()) self.assertEqual(20, tf.get_collection("v")[1].eval()) tf.get_collection(exporter.INIT_OP_KEY)[0].run() self.assertEqual(30, tf.get_collection("v")[2].eval())
def main(_): if len(sys.argv) < 2 or sys.argv[-1].startswith('-'): print('Usage: mnist_export.py [--training_iteration=x] ' '[--export_version=y] export_dir') sys.exit(-1) if FLAGS.training_iteration <= 0: print('Please specify a positive value for training iteration.') sys.exit(-1) if FLAGS.export_version <= 0: print('Please specify a positive value for version number.') sys.exit(-1) # Train model print('Training model...') mnist = mnist_input_data.read_data_sets(FLAGS.work_dir, one_hot=True) sess = tf.InteractiveSession() serialized_tf_example = tf.placeholder(tf.string, name='tf_example') feature_configs = { 'x': tf.FixedLenFeature(shape=[784], dtype=tf.float32), } tf_example = tf.parse_example(serialized_tf_example, feature_configs) x = tf.identity(tf_example['x'], name='x') # use tf.identity() to assign name y_ = tf.placeholder('float', shape=[None, 10]) w = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) sess.run(tf.initialize_all_variables()) y = tf.nn.softmax(tf.matmul(x, w) + b, name='y') cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) train_step = tf.train.GradientDescentOptimizer(0.01).minimize( cross_entropy) values, indices = tf.nn.top_k(y, 10) prediction_classes = tf.contrib.lookup.index_to_string( tf.to_int64(indices), mapping=tf.constant([str(i) for i in range(10)])) for _ in range(FLAGS.training_iteration): batch = mnist.train.next_batch(50) train_step.run(feed_dict={x: batch[0], y_: batch[1]}) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) print('training accuracy %g' % sess.run(accuracy, feed_dict={ x: mnist.test.images, y_: mnist.test.labels })) print('Done training!') # Export model # WARNING(break-tutorial-inline-code): The following code snippet is # in-lined in tutorials, please update tutorial documents accordingly # whenever code changes. export_path = sys.argv[-1] print('Exporting trained model to %s' % export_path) init_op = tf.group(tf.initialize_all_tables(), name='init_op') saver = tf.train.Saver(sharded=True) model_exporter = exporter.Exporter(saver) model_exporter.init( sess.graph.as_graph_def(), init_op=init_op, default_graph_signature=exporter.classification_signature( input_tensor=serialized_tf_example, classes_tensor=prediction_classes, scores_tensor=values), named_graph_signatures={ 'inputs': exporter.generic_signature({'images': x}), 'outputs': exporter.generic_signature({'scores': y}) }) model_exporter.export(export_path, tf.constant(FLAGS.export_version), sess) print('Done exporting!')
def export(): # Create index->synset mapping synsets = [] with open(SYNSET_FILE) as f: synsets = f.read().splitlines() # Create synset->metadata mapping texts = {} with open(METADATA_FILE) as f: for line in f.read().splitlines(): parts = line.split('\t') assert len(parts) == 2 texts[parts[0]] = parts[1] with tf.Graph().as_default(): # Build inference model. # Please refer to Tensorflow inception model for details. # Input transformation. serialized_tf_example = tf.placeholder(tf.string, name='tf_example') feature_configs = { 'image/encoded': tf.FixedLenFeature(shape=[], dtype=tf.string), } tf_example = tf.parse_example(serialized_tf_example, feature_configs) jpegs = tf_example['image/encoded'] images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) # Run inference. logits, _ = inception_model.inference(images, NUM_CLASSES + 1) # Transform output to topK result. values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES) # Create a constant string Tensor where the i'th element is # the human readable class description for the i'th index. # Note that the 0th index is an unused background class # (see inception model definition code). class_descriptions = ['unused background'] for s in synsets: class_descriptions.append(texts[s]) class_tensor = tf.constant(class_descriptions) classes = tf.contrib.lookup.index_to_string(tf.to_int64(indices), mapping=class_tensor) # Restore variables from training checkpoint. variable_averages = tf.train.ExponentialMovingAverage( inception_model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: # Restore variables from training checkpoints. ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print('Successfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) else: print('No checkpoint file found at %s' % FLAGS.checkpoint_dir) return # Export inference model. init_op = tf.group(tf.initialize_all_tables(), name='init_op') classification_signature = exporter.classification_signature( input_tensor=serialized_tf_example, classes_tensor=classes, scores_tensor=values) named_graph_signature = { 'inputs': exporter.generic_signature({'images': jpegs}), 'outputs': exporter.generic_signature({ 'classes': classes, 'scores': values })} model_exporter = exporter.Exporter(saver) model_exporter.init( init_op=init_op, default_graph_signature=classification_signature, named_graph_signatures=named_graph_signature) model_exporter.export(FLAGS.export_dir, tf.constant(global_step), sess) print('Successfully exported model to %s' % FLAGS.export_dir)
def main(_): if len(sys.argv) < 2 or sys.argv[-1].startswith('-'): print('Usage: mnist_export.py [--training_iteration=x] ' '[--export_version=y] export_dir') sys.exit(-1) if FLAGS.training_iteration <= 0: print('Please specify a positive value for training iteration.') sys.exit(-1) if FLAGS.export_version <= 0: print('Please specify a positive value for version number.') sys.exit(-1) # Train model print('Training model...') mnist = mnist_input_data.read_data_sets(FLAGS.work_dir, one_hot=True) sess = tf.InteractiveSession() serialized_tf_example = tf.placeholder(tf.string, name='tf_example') feature_configs = { 'x': tf.FixedLenFeature(shape=[784], dtype=tf.float32), } tf_example = tf.parse_example(serialized_tf_example, feature_configs) x = tf.identity(tf_example['x'], name='x') # use tf.identity() to assign name y_ = tf.placeholder('float', shape=[None, 10]) w = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) sess.run(tf.initialize_all_variables()) y = tf.nn.softmax(tf.matmul(x, w) + b, name='y') cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) values, indices = tf.nn.top_k(y, 10) prediction_classes = tf.contrib.lookup.index_to_string( tf.to_int64(indices), mapping=tf.constant([str(i) for i in range(10)])) for _ in range(FLAGS.training_iteration): batch = mnist.train.next_batch(50) train_step.run(feed_dict={x: batch[0], y_: batch[1]}) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) print('training accuracy %g' % sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})) print('Done training!') # Export model # WARNING(break-tutorial-inline-code): The following code snippet is # in-lined in tutorials, please update tutorial documents accordingly # whenever code changes. export_path = sys.argv[-1] print('Exporting trained model to %s' % export_path) init_op = tf.group(tf.initialize_all_tables(), name='init_op') saver = tf.train.Saver(sharded=True) model_exporter = exporter.Exporter(saver) model_exporter.init( sess.graph.as_graph_def(), init_op=init_op, default_graph_signature=exporter.classification_signature( input_tensor=serialized_tf_example, classes_tensor=prediction_classes, scores_tensor=values), named_graph_signatures={ 'inputs': exporter.generic_signature({'images': x}), 'outputs': exporter.generic_signature({'scores': y})}) model_exporter.export(export_path, tf.constant(FLAGS.export_version), sess) print('Done exporting!')
def main(): # Create train data train_X = np.linspace(-1, 1, 100) train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.33 + 10 learning_rate = FLAGS.learning_rate start_training_time = datetime.datetime.now() print("Use the optimizer: {}".format(FLAGS.optimizer)) if FLAGS.optimizer == "sgd": optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif FLAGS.optimizer == "adadelta": optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif FLAGS.optimizer == "adagrad": optimizer = tf.train.AdagradOptimizer(learning_rate) elif FLAGS.optimizer == "adam": optimizer = tf.train.AdamOptimizer(learning_rate) elif FLAGS.optimizer == "ftrl": optimizer = tf.train.FtrlOptimizer(learning_rate) elif FLAGS.optimizer == "rmsprop": optimizer = tf.train.RMSPropOptimizer(learning_rate) else: print("Unknow optimizer: {}, exit now".format(FLAGS.optimizer)) exit(1) # Run standalone training if os.environ.get("TF_CONFIG", "") == "": # Define the model keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) X = tf.placeholder("float", shape=[None, 1]) Y = tf.placeholder("float", shape=[None, 1]) w = tf.Variable(0.0, name="weight") b = tf.Variable(0.0, name="bias") global_step = tf.Variable(0, name="global_step", trainable=False) loss = tf.reduce_sum(tf.square(Y - tf.multiply(X, w) - b)) train_op = optimizer.minimize(loss, global_step=global_step) predict_op = tf.multiply(X, w) + b tf.summary.scalar("loss", loss) tf.summary.scalar("training/hptuning/metric", loss) summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) print("Save tensorboard files into: {}".format(FLAGS.output_path)) writer = tf.summary.FileWriter(FLAGS.output_path, sess.graph) print("Run training with epoch number: {}".format(FLAGS.max_epochs)) for i in range(FLAGS.max_epochs): for (x, y) in zip(train_X, train_Y): x = np.array([[x]]) y = np.array([[y]]) sess.run(train_op, feed_dict={X: x, Y: y}) if i % FLAGS.checkpoint_period == 0: x = np.array([[train_X[0]]]) y = np.array([[train_Y[0]]]) summary_value, loss_value, step = sess.run( [summary_op, loss, global_step], feed_dict={X: x, Y: y}) writer.add_summary(summary_value, step) print("Epoch: {}, loss: {}".format(i, loss_value)) writer.close() end_training_time = datetime.datetime.now() print("[{}] End of standalone training.".format(end_training_time - start_training_time)) print("Get the model, w: {}, b: {}".format(sess.run(w), sess.run(b))) export_inputs_signature = {"keys": keys_placeholder, "X": X} export_outputs_signature = {"keys": keys, "predict": predict_op} export_model(sess, export_inputs_signature, export_outputs_signature) # Run distributed training else: # Exampmle: {"cluster": {"ps": ["127.0.0.1:3001"], "worker": ["127.0.0.1:3002", "127.0.0.1:3003"], "master": ["127.0.0.1:3004"]}, "task": {"index": 0, "type": "master"}} env = json.loads(os.environ.get("TF_CONFIG", "{}")) task_data = env.get("task", None) cluster_spec = env["cluster"] task_type = task_data["type"] task_index = task_data["index"] cluster = tf.train.ClusterSpec(cluster_spec) server = tf.train.Server(cluster, job_name=task_type, task_index=task_index) if task_type == "ps": server.join() elif task_type == "worker" or task_type == "master": with tf.device(tf.train.replica_device_setter( worker_device="/job:{}/task:{}".format(task_type, task_index), cluster=cluster)): # Define the model keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) X = tf.placeholder("float", shape=[None, 1]) Y = tf.placeholder("float", shape=[None, 1]) w = tf.Variable(0.0, name="weight") b = tf.Variable(0.0, name="bias") global_step = tf.Variable(0, name="global_step", trainable=False) loss = tf.reduce_sum(tf.square(Y - tf.multiply(X, w) - b)) train_op = optimizer.minimize(loss, global_step=global_step) predict_op = tf.multiply(X, w) + b tf.summary.scalar("loss", loss) summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() saver = tf.train.Saver() #saver = tf.train.Saver(sharded=True) constant_model_version = tf.constant(FLAGS.model_version) model_exporter = exporter.Exporter(saver) model_exporter.init( tf.get_default_graph().as_graph_def(), named_graph_signatures={ "inputs": exporter.generic_signature({"keys": keys_placeholder, "X": X}), "outputs": exporter.generic_signature({"keys": keys, "predict": predict_op}) }) sv = tf.train.Supervisor(is_chief=(task_type == "master"), logdir=FLAGS.checkpoint_path, init_op=init_op, #summary_op=summary_op, summary_op=None, saver=saver, global_step=global_step, save_model_secs=60) try: with sv.managed_session(server.target) as sess: print("Save tensorboard files into: {}".format(FLAGS.output_path)) writer = tf.summary.FileWriter(FLAGS.output_path, sess.graph) print("Run training with epoch number: {}".format( FLAGS.max_epochs)) for i in range(FLAGS.max_epochs): for (x, y) in zip(train_X, train_Y): x = np.array([[x]]) y = np.array([[y]]) sess.run(train_op, feed_dict={X: x, Y: y}) if i % FLAGS.checkpoint_period == 0: x = np.array([[train_X[0]]]) y = np.array([[train_Y[0]]]) summary_value, loss_value, step = sess.run( [summary_op, loss, global_step], feed_dict={X: x, Y: y}) print("Epoch: {}, loss: {}".format(i, loss_value)) if task_type == "master": writer.add_summary(summary_value, step) writer.close() end_training_time = datetime.datetime.now() print("[{}] End of distributed training.".format( end_training_time - start_training_time)) if task_type == "master": print("Exporting trained model to {}".format(FLAGS.model_path)) model_exporter.export(FLAGS.model_path, constant_model_version, sess) except Exception as e: print(e)
def main(): # Get hyperparameters if FLAGS.enable_colored_log: import coloredlogs coloredlogs.install() logging.basicConfig(level=logging.INFO) FEATURE_SIZE = FLAGS.feature_size LABEL_SIZE = FLAGS.label_size EPOCH_NUMBER = FLAGS.epoch_number if EPOCH_NUMBER <= 0: EPOCH_NUMBER = None BATCH_THREAD_NUMBER = FLAGS.batch_thread_number MIN_AFTER_DEQUEUE = FLAGS.min_after_dequeue BATCH_CAPACITY = BATCH_THREAD_NUMBER * FLAGS.batch_size + MIN_AFTER_DEQUEUE MODE = FLAGS.mode MODEL = FLAGS.model OPTIMIZER = FLAGS.optimizer CHECKPOINT_PATH = FLAGS.checkpoint_path if not CHECKPOINT_PATH.startswith("fds://") and not os.path.exists( CHECKPOINT_PATH): os.makedirs(CHECKPOINT_PATH) CHECKPOINT_FILE = CHECKPOINT_PATH + "/checkpoint.ckpt" LATEST_CHECKPOINT = tf.train.latest_checkpoint(CHECKPOINT_PATH) OUTPUT_PATH = FLAGS.output_path if not OUTPUT_PATH.startswith("fds://") and not os.path.exists( OUTPUT_PATH): os.makedirs(OUTPUT_PATH) pprint.PrettyPrinter().pprint(FLAGS.__flags) # Read TFRecords files for training def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) return serialized_example # Read TFRecords files for training filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.train_tfrecords_file), num_epochs=EPOCH_NUMBER) serialized_example = read_and_decode(filename_queue) batch_serialized_example = tf.train.shuffle_batch( [serialized_example], batch_size=FLAGS.batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) features = tf.parse_example(batch_serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "ids": tf.VarLenFeature(tf.int64), "values": tf.VarLenFeature(tf.float32), }) batch_labels = features["label"] batch_ids = features["ids"] batch_values = features["values"] # Read TFRecords file for validation validate_filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.validate_tfrecords_file), num_epochs=EPOCH_NUMBER) validate_serialized_example = read_and_decode(validate_filename_queue) validate_batch_serialized_example = tf.train.shuffle_batch( [validate_serialized_example], batch_size=FLAGS.validate_batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) validate_features = tf.parse_example(validate_batch_serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "ids": tf.VarLenFeature(tf.int64), "values": tf.VarLenFeature(tf.float32), }) validate_batch_labels = validate_features["label"] validate_batch_ids = validate_features["ids"] validate_batch_values = validate_features["values"] # Define the model input_units = FEATURE_SIZE output_units = LABEL_SIZE model_network_hidden_units = [int(i) for i in FLAGS.model_network.split()] def full_connect(inputs, weights_shape, biases_shape, is_train=True): with tf.device("/cpu:0"): weights = tf.get_variable( "weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable( "biases", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.matmul(inputs, weights) + biases if FLAGS.enable_bn and is_train: mean, var = tf.nn.moments(layer, axes=[0]) scale = tf.get_variable( "scale", biases_shape, initializer=tf.random_normal_initializer()) shift = tf.get_variable( "shift", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.nn.batch_normalization(layer, mean, var, shift, scale, FLAGS.bn_epsilon) return layer def sparse_full_connect(sparse_ids, sparse_values, weights_shape, biases_shape, is_train=True): weights = tf.get_variable("weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable("biases", biases_shape, initializer=tf.random_normal_initializer()) return tf.nn.embedding_lookup_sparse( weights, sparse_ids, sparse_values, combiner="sum") + biases def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True): return tf.nn.relu( full_connect(inputs, weights_shape, biases_shape, is_train)) def customized_inference(sparse_ids, sparse_values, is_train=True): hidden1_units = 128 hidden2_units = 32 hidden3_units = 8 with tf.variable_scope("input"): sparse_layer = sparse_full_connect(sparse_ids, sparse_values, [input_units, hidden1_units], [hidden1_units], is_train) layer = tf.nn.relu(sparse_layer) with tf.variable_scope("layer0"): layer = full_connect_relu(layer, [hidden1_units, hidden2_units], [hidden2_units], is_train) with tf.variable_scope("layer1"): layer = full_connect_relu(layer, [hidden2_units, hidden3_units], [hidden3_units], is_train) if FLAGS.enable_dropout and is_train: layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob) with tf.variable_scope("output"): layer = full_connect(layer, [hidden3_units, output_units], [output_units], is_train) return layer def dnn_inference(sparse_ids, sparse_values, is_train=True): with tf.variable_scope("input"): sparse_layer = sparse_full_connect( sparse_ids, sparse_values, [input_units, model_network_hidden_units[0]], [model_network_hidden_units[0]], is_train) layer = tf.nn.relu(sparse_layer) for i in range(len(model_network_hidden_units) - 1): with tf.variable_scope("layer{}".format(i)): layer = full_connect_relu(layer, [ model_network_hidden_units[i], model_network_hidden_units[i + 1] ], [model_network_hidden_units[i + 1]], is_train) with tf.variable_scope("output"): layer = full_connect( layer, [model_network_hidden_units[-1], output_units], [output_units], is_train) return layer def lr_inference(sparse_ids, sparse_values, is_train=True): with tf.variable_scope("logistic_regression"): layer = sparse_full_connect(sparse_ids, sparse_values, [input_units, output_units], [output_units]) return layer def wide_and_deep_inference(sparse_ids, sparse_values, is_train=True): return lr_inference(sparse_ids, sparse_values, is_train) + dnn_inference( sparse_ids, sparse_values, is_train) def inference(sparse_ids, sparse_values, is_train=True): if MODEL == "dnn": return dnn_inference(sparse_ids, sparse_values, is_train) elif MODEL == "lr": return lr_inference(sparse_ids, sparse_values, is_train) elif MODEL == "wide_and_deep": return wide_and_deep_inference(sparse_ids, sparse_values, is_train) elif MODEL == "customized": return customized_inference(sparse_ids, sparse_values, is_train) else: logging.error("Unknown model, exit now") exit(1) logging.info("Use the model: {}, model network: {}".format( MODEL, FLAGS.model_network)) logits = inference(batch_ids, batch_values, True) batch_labels = tf.to_int64(batch_labels) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=batch_labels) loss = tf.reduce_mean(cross_entropy, name="loss") global_step = tf.Variable(0, name="global_step", trainable=False) if FLAGS.enable_lr_decay: logging.info("Enable learning rate decay rate: {}".format( FLAGS.lr_decay_rate)) starter_learning_rate = FLAGS.learning_rate learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100000, FLAGS.lr_decay_rate, staircase=True) else: learning_rate = FLAGS.learning_rate optimizer = get_optimizer(FLAGS.optimizer, learning_rate) train_op = optimizer.minimize(loss, global_step=global_step) tf.get_variable_scope().reuse_variables() # Define accuracy op for train data train_accuracy_logits = inference(batch_ids, batch_values, False) train_softmax = tf.nn.softmax(train_accuracy_logits) train_correct_prediction = tf.equal(tf.argmax(train_softmax, 1), batch_labels) train_accuracy = tf.reduce_mean( tf.cast(train_correct_prediction, tf.float32)) # Define auc op for train data batch_labels = tf.cast(batch_labels, tf.int32) sparse_labels = tf.reshape(batch_labels, [-1, 1]) derived_size = tf.shape(batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(axis=1, values=[indices, sparse_labels]) outshape = tf.stack([derived_size, LABEL_SIZE]) new_train_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax, new_train_batch_labels) # Define accuracy op for validate data validate_accuracy_logits = inference(validate_batch_ids, validate_batch_values, False) validate_softmax = tf.nn.softmax(validate_accuracy_logits) validate_batch_labels = tf.to_int64(validate_batch_labels) validate_correct_prediction = tf.equal(tf.argmax(validate_softmax, 1), validate_batch_labels) validate_accuracy = tf.reduce_mean( tf.cast(validate_correct_prediction, tf.float32)) # Define auc op for validate data validate_batch_labels = tf.cast(validate_batch_labels, tf.int32) sparse_labels = tf.reshape(validate_batch_labels, [-1, 1]) derived_size = tf.shape(validate_batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(axis=1, values=[indices, sparse_labels]) outshape = tf.stack([derived_size, LABEL_SIZE]) new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, validate_auc = tf.contrib.metrics.streaming_auc( validate_softmax, new_validate_batch_labels) # Define inference op sparse_index = tf.placeholder(tf.int64, [None, 2]) sparse_ids = tf.placeholder(tf.int64, [None]) sparse_values = tf.placeholder(tf.float32, [None]) sparse_shape = tf.placeholder(tf.int64, [2]) inference_ids = tf.SparseTensor(sparse_index, sparse_ids, sparse_shape) inference_values = tf.SparseTensor(sparse_index, sparse_values, sparse_shape) inference_logits = inference(inference_ids, inference_values, False) inference_softmax = tf.nn.softmax(inference_logits) inference_op = tf.argmax(inference_softmax, 1) keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) model_signature = { "inputs": exporter.generic_signature({ "keys": keys_placeholder, "indexs": sparse_index, "ids": sparse_ids, "values": sparse_values, "shape": sparse_shape }), "outputs": exporter.generic_signature({ "keys": keys, "softmax": inference_softmax, "prediction": inference_op }) } # Initialize saver and summary saver = tf.train.Saver() tf.summary.scalar("loss", loss) tf.summary.scalar("train_accuracy", train_accuracy) tf.summary.scalar("train_auc", train_auc) tf.summary.scalar("validate_accuracy", validate_accuracy) tf.summary.scalar("validate_auc", validate_auc) summary_op = tf.summary.merge_all() init_op = [ tf.global_variables_initializer(), tf.local_variables_initializer() ] # Create session to run with tf.Session() as sess: logging.info("Start to run with mode: {}".format(MODE)) writer = tf.summary.FileWriter(OUTPUT_PATH, sess.graph) sess.run(init_op) if MODE == "train": # Restore session and start queue runner restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = datetime.datetime.now() try: while not coord.should_stop(): _, loss_value, step = sess.run( [train_op, loss, global_step]) # Print state while training if step % FLAGS.steps_to_validate == 0: train_accuracy_value, train_auc_value, validate_accuracy_value, auc_value, summary_value = sess.run( [ train_accuracy, train_auc, validate_accuracy, validate_auc, summary_op ]) end_time = datetime.datetime.now() logging.info( "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}" .format(end_time - start_time, step, loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, auc_value)) writer.add_summary(summary_value, step) saver.save(sess, CHECKPOINT_FILE, global_step=step) start_time = end_time except tf.errors.OutOfRangeError: # Export the model after training export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) finally: coord.request_stop() coord.join(threads) elif MODE == "export": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) # Export the model export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) elif MODE == "savedmodel": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) logging.info("Export the saved model to {}".format( FLAGS.saved_model_path)) export_path_base = FLAGS.saved_model_path export_path = os.path.join( compat.as_bytes(export_path_base), compat.as_bytes(str(FLAGS.model_version))) model_signature = signature_def_utils.build_signature_def( inputs={ "keys": utils.build_tensor_info(keys_placeholder), "indexs": utils.build_tensor_info(sparse_index), "ids": utils.build_tensor_info(sparse_ids), "values": utils.build_tensor_info(sparse_values), "shape": utils.build_tensor_info(sparse_shape) }, outputs={ "keys": utils.build_tensor_info(keys), "softmax": utils.build_tensor_info(inference_softmax), "prediction": utils.build_tensor_info(inference_op) }, method_name=signature_constants.PREDICT_METHOD_NAME) try: builder = saved_model_builder.SavedModelBuilder(export_path) builder.add_meta_graph_and_variables( sess, [tag_constants.SERVING], clear_devices=True, signature_def_map={ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: model_signature, }, #legacy_init_op=legacy_init_op) legacy_init_op=tf.group(tf.initialize_all_tables(), name="legacy_init_op")) builder.save() except Exception as e: logging.error( "Fail to export saved model, exception: {}".format(e)) elif MODE == "inference": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) # Load inference test data inference_result_file_name = "./inference_result.txt" inference_test_file_name = "./data/a8a_test.libsvm" labels = [] feature_ids = [] feature_values = [] feature_index = [] ins_num = 0 for line in open(inference_test_file_name, "r"): tokens = line.split(" ") labels.append(int(tokens[0])) feature_num = 0 for feature in tokens[1:]: feature_id, feature_value = feature.split(":") feature_ids.append(int(feature_id)) feature_values.append(float(feature_value)) feature_index.append([ins_num, feature_num]) feature_num += 1 ins_num += 1 # Run inference start_time = datetime.datetime.now() prediction, prediction_softmax = sess.run( [inference_op, inference_softmax], feed_dict={ sparse_index: feature_index, sparse_ids: feature_ids, sparse_values: feature_values, sparse_shape: [ins_num, FEATURE_SIZE] }) end_time = datetime.datetime.now() # Compute accuracy label_number = len(labels) correct_label_number = 0 for i in range(label_number): if labels[i] == prediction[i]: correct_label_number += 1 accuracy = float(correct_label_number) / label_number # Compute auc expected_labels = np.array(labels) predict_labels = prediction_softmax[:, 0] fpr, tpr, thresholds = metrics.roc_curve(expected_labels, predict_labels, pos_label=0) auc = metrics.auc(fpr, tpr) logging.info("[{}] Inference accuracy: {}, auc: {}".format( end_time - start_time, accuracy, auc)) # Save result into the file np.savetxt(inference_result_file_name, prediction_softmax, delimiter=",") logging.info( "Save result to file: {}".format(inference_result_file_name))
def main(): if tf.__version__.split('.')[0] != "1": raise Exception("Tensorflow version 1 required") if a.seed is None: a.seed = random.randint(0, 2**31 - 1) tf.set_random_seed(a.seed) np.random.seed(a.seed) random.seed(a.seed) if not os.path.exists(a.output_dir): os.makedirs(a.output_dir) if a.mode == "test" or a.mode == "export": if a.checkpoint is None: raise Exception("checkpoint required for test mode") # load some options from the checkpoint options = {"which_direction", "ngf", "ndf", "lab_colorization"} with open(os.path.join(a.checkpoint, "options.json")) as f: for key, val in json.loads(f.read()).items(): if key in options: print("loaded", key, "=", val) setattr(a, key, val) # disable these features in test mode a.scale_size = CROP_SIZE a.flip = False for k, v in a._get_kwargs(): print(k, "=", v) with open(os.path.join(a.output_dir, "options.json"), "w") as f: f.write(json.dumps(vars(a), sort_keys=True, indent=4)) if a.mode == "export": # export the generator to a meta graph that can be imported later for standalone generation if a.lab_colorization: raise Exception("export not supported for lab_colorization") input = tf.placeholder(tf.string, shape=[1], name="input_base64") input_data = tf.decode_base64(input[0]) input_image = tf.image.decode_png(input_data) # remove alpha channel if present input_image = tf.cond(tf.equal(tf.shape(input_image)[2], 4), lambda: input_image[:, :, :3], lambda: input_image) # convert grayscale to RGB input_image = tf.cond(tf.equal(tf.shape(input_image)[2], 1), lambda: tf.image.grayscale_to_rgb(input_image), lambda: input_image) input_image = tf.image.convert_image_dtype(input_image, dtype=tf.float32) input_image.set_shape([CROP_SIZE, CROP_SIZE, 3]) batch_input = tf.expand_dims(input_image, axis=0) with tf.variable_scope("generator"): batch_output = deprocess( create_generator(preprocess(batch_input), 3)) output_image = tf.image.convert_image_dtype(batch_output, dtype=tf.uint8)[0] if a.output_filetype == "png": output_data = tf.image.encode_png(output_image) elif a.output_filetype == "jpeg": output_data = tf.image.encode_jpeg(output_image, quality=80) else: raise Exception("invalid filetype") output = tf.convert_to_tensor([tf.encode_base64(output_data)], name="output_base64") init_op = tf.global_variables_initializer() restore_saver = tf.train.Saver() export_saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) print("loading model from checkpoint") checkpoint = tf.train.latest_checkpoint(a.checkpoint) restore_saver.restore(sess, checkpoint) print("exporting model") exports_dir = os.path.abspath(os.path.join(a.output_dir, 'export')) if not os.path.exists(exports_dir): os.makedirs(exports_dir) model_exporter = exporter.Exporter(export_saver) model_exporter.init(sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({'x': input}), 'outputs': exporter.generic_signature( {'predictions': output}) }) model_exporter.export(export_dir_base=exports_dir, global_step_tensor=tf.constant(2), sess=sess) # export_saver.export_meta_graph(filename=os.path.join(a.output_dir, "export.meta")) # export_saver.save(sess, os.path.join(a.output_dir, "export"), write_meta_graph=False) return examples = load_examples() print("examples count = %d" % examples.count) final_input = tf.placeholder_with_default(examples.inputs, shape=(None, CROP_SIZE, CROP_SIZE, 3), name="final_input") final_dropout = tf.placeholder(tf.float32, name='final_dropout') # inputs and targets are [batch_size, height, width, channels] model = create_model(final_input, examples.targets, final_dropout) # undo colorization splitting on images that we use for display/output if a.lab_colorization: if a.which_direction == "AtoB": # inputs is brightness, this will be handled fine as a grayscale image # need to augment targets and outputs with brightness targets = augment(examples.targets, examples.inputs) outputs = augment(model.outputs, examples.inputs) # inputs can be deprocessed normally and handled as if they are single channel # grayscale images inputs = deprocess(examples.inputs) elif a.which_direction == "BtoA": # inputs will be color channels only, get brightness from targets inputs = augment(examples.inputs, examples.targets) targets = deprocess(examples.targets) outputs = deprocess(model.outputs) else: raise Exception("invalid direction") else: inputs = examples.inputs targets = tf.argmax(examples.targets, axis=-1) # targets = examples.targets targets = tf.expand_dims(targets, axis=-1) outputs = tf.argmax(model.outputs, axis=-1) outputs = tf.expand_dims(outputs, axis=-1) def convert(image): if a.aspect_ratio != 1.0: # upscale to correct aspect ratio size = [CROP_SIZE, int(round(CROP_SIZE * a.aspect_ratio))] image = tf.image.resize_images( image, size=size, method=tf.image.ResizeMethod.BICUBIC) return tf.image.convert_image_dtype(image, dtype=tf.uint8, saturate=True) # reverse any processing on images so they can be written to disk or displayed to user with tf.name_scope("convert_inputs"): converted_inputs = convert(deprocess(inputs)) with tf.name_scope("convert_targets"): converted_targets = (targets * (np.round(256 / (NUM_OF_CLASSESS - 1)) - 1)) # converted_targets = 100 * targets # converted_targets = convert(converted_targets) converted_targets = tf.cast(converted_targets, tf.uint8) with tf.name_scope("convert_outputs"): # converted_outputs = 100 * outputs converted_outputs = (outputs * (np.round(256 / (NUM_OF_CLASSESS - 1)) - 1)) # converted_outputs = convert(converted_outputs) converted_outputs = tf.cast(converted_outputs, tf.uint8) # converted_outputs = color_image(converted_outputs, NUM_OF_CLASSESS) with tf.name_scope("encode_images"): display_fetches = { "paths": examples.paths, "inputs": tf.map_fn(tf.image.encode_png, converted_inputs, dtype=tf.string, name="input_pngs"), "targets": tf.map_fn(tf.image.encode_png, converted_targets, dtype=tf.string, name="target_pngs"), "outputs": tf.map_fn(tf.image.encode_png, converted_outputs, dtype=tf.string, name="output_pngs"), } # summaries with tf.name_scope("inputs_summary"): tf.summary.image("inputs", converted_inputs) with tf.name_scope("targets_summary"): tf.summary.image("targets", converted_targets) with tf.name_scope("outputs_summary"): tf.summary.image("outputs", converted_outputs) tf.summary.scalar("gen_loss", model.gen_loss) tf.summary.scalar("learning_rate", model.lr_rate) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name + "/values", var) for grad, var in model.gen_grads_and_vars: tf.summary.histogram(var.op.name + "/gradients", grad) with tf.name_scope("parameter_count"): parameter_count = tf.reduce_sum( [tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()]) saver = tf.train.Saver(max_to_keep=1) logdir = a.output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None) with sv.managed_session() as sess: print("parameter_count =", sess.run(parameter_count)) if a.checkpoint is not None: print("loading model from checkpoint") checkpoint = tf.train.latest_checkpoint(a.checkpoint) saver.restore(sess, checkpoint) max_steps = 2**32 if a.max_epochs is not None: max_steps = examples.steps_per_epoch * a.max_epochs if a.max_steps is not None: max_steps = a.max_steps if a.mode == "test": # testing # at most, process the test data once max_steps = min(examples.steps_per_epoch, max_steps) for step in range(max_steps): results = sess.run(display_fetches) filesets = save_images(results) for i, f in enumerate(filesets): print("evaluated image", f["name"]) index_path = append_index(filesets) print("wrote index at", index_path) else: # training start = time.time() for step in range(max_steps): def should(freq): return freq > 0 and ((step + 1) % freq == 0 or step == max_steps - 1) options = None run_metadata = None if should(a.trace_freq): options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() fetches = { "train": model.train, "global_step": sv.global_step, } if should(a.progress_freq): fetches["gen_loss"] = model.gen_loss if should(a.summary_freq): fetches["summary"] = sv.summary_op if should(a.display_freq): fetches["display"] = display_fetches results = sess.run(fetches, feed_dict={final_dropout: 0.5}, options=options, run_metadata=run_metadata) if should(a.summary_freq): print("recording summary") sv.summary_writer.add_summary(results["summary"], results["global_step"]) if should(a.display_freq): print("saving display images") filesets = save_images(results["display"], step=results["global_step"]) append_index(filesets, step=True) if should(a.trace_freq): print("recording trace") sv.summary_writer.add_run_metadata( run_metadata, "step_%d" % results["global_step"]) if should(a.progress_freq): # global_step will have the correct step count if we resume from a checkpoint train_epoch = math.ceil(results["global_step"] / examples.steps_per_epoch) train_step = (results["global_step"] - 1) % examples.steps_per_epoch + 1 rate = (step + 1) * a.batch_size / (time.time() - start) remaining = (max_steps - step) * a.batch_size / rate print( "progress epoch %d step %d image/sec %0.1f remaining %dm" % (train_epoch, train_step, rate, remaining / 60)) print("gen_loss", results["gen_loss"]) if should(a.save_freq): print("saving model") saver.save(sess, os.path.join(a.output_dir, "model"), global_step=sv.global_step) if sv.should_stop(): break
def export(): r''' Restores the trained variables into a simpler graph that will be exported for serving. ''' log_info('Exporting the model...') with tf.device('/cpu:0'): tf.reset_default_graph() session = tf.Session(config=session_config) # Run inference # Input tensor will be of shape [batch_size, n_steps, n_input + 2*n_input*n_context] input_tensor = tf.placeholder(tf.float32, [None, None, n_input + 2*n_input*n_context], name='input_node') seq_length = tf.placeholder(tf.int32, [None], name='input_lengths') # Calculate the logits of the batch using BiRNN logits = BiRNN(input_tensor, tf.to_int64(seq_length), no_dropout) # Beam search decode the batch decoded, _ = tf.nn.ctc_beam_search_decoder(logits, seq_length, merge_repeated=False) decoded = tf.convert_to_tensor( [tf.sparse_tensor_to_dense(sparse_tensor) for sparse_tensor in decoded], name='output_node') # TODO: Transform the decoded output to a string # Create a saver and exporter using variables from the above newly created graph saver = tf.train.Saver(tf.global_variables()) model_exporter = exporter.Exporter(saver) # Restore variables from training checkpoint # TODO: This restores the most recent checkpoint, but if we use validation to counterract # over-fitting, we may want to restore an earlier checkpoint. checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) checkpoint_path = checkpoint.model_checkpoint_path saver.restore(session, checkpoint_path) log_info('Restored checkpoint at training epoch %d' % (int(checkpoint_path.split('-')[-1]) + 1)) # Initialise the model exporter and export the model model_exporter.init(session.graph.as_graph_def(), named_graph_signatures = { 'inputs': exporter.generic_signature( { 'input': input_tensor, 'input_lengths': seq_length}), 'outputs': exporter.generic_signature( { 'outputs': decoded})}) if FLAGS.remove_export: actual_export_dir = os.path.join(FLAGS.export_dir, '%08d' % FLAGS.export_version) if os.path.isdir(actual_export_dir): log_info('Removing old export') shutil.rmtree(actual_FLAGS.export_dir) try: # Export serving model model_exporter.export(FLAGS.export_dir, tf.constant(FLAGS.export_version), session) # Export graph input_graph_name = 'input_graph.pb' tf.train.write_graph(session.graph, FLAGS.export_dir, input_graph_name, as_text=False) # Freeze graph input_graph_path = os.path.join(FLAGS.export_dir, input_graph_name) input_saver_def_path = '' input_binary = True output_node_names = 'output_node' restore_op_name = 'save/restore_all' filename_tensor_name = 'save/Const:0' output_graph_path = os.path.join(FLAGS.export_dir, 'output_graph.pb') clear_devices = False freeze_graph.freeze_graph(input_graph_path, input_saver_def_path, input_binary, checkpoint_path, output_node_names, restore_op_name, filename_tensor_name, output_graph_path, clear_devices, '') log_info('Models exported at %s' % (FLAGS.export_dir)) except RuntimeError: log_error(sys.exc_info()[1])
def build(self): conf = self.conf name = self.name job_type = self.job_type dtype = self.dtype # Input maps self.in_table = lookup.MutableHashTable(key_dtype=tf.string, value_dtype=tf.int64, default_value=UNK_ID, shared_name="in_table", name="in_table", checkpoint=True) self.topic_in_table = lookup.MutableHashTable( key_dtype=tf.string, value_dtype=tf.int64, default_value=2, shared_name="topic_in_table", name="topic_in_table", checkpoint=True) self.out_table = lookup.MutableHashTable(key_dtype=tf.int64, value_dtype=tf.string, default_value="_UNK", shared_name="out_table", name="out_table", checkpoint=True) graphlg.info("Creating placeholders...") self.enc_str_inps = tf.placeholder(tf.string, shape=(None, conf.input_max_len), name="enc_inps") self.enc_lens = tf.placeholder(tf.int32, shape=[None], name="enc_lens") self.enc_str_topics = tf.placeholder(tf.string, shape=(None, None), name="enc_topics") self.dec_str_inps = tf.placeholder( tf.string, shape=[None, conf.output_max_len + 2], name="dec_inps") self.dec_lens = tf.placeholder(tf.int32, shape=[None], name="dec_lens") # table lookup self.enc_inps = self.in_table.lookup(self.enc_str_inps) self.enc_topics = self.topic_in_table.lookup(self.enc_str_topics) self.dec_inps = self.in_table.lookup(self.dec_str_inps) batch_size = tf.shape(self.enc_inps)[0] with variable_scope.variable_scope(self.model_kind, dtype=dtype) as scope: # Create encode graph and get attn states graphlg.info("Creating embeddings and do lookup...") t_major_enc_inps = tf.transpose(self.enc_inps) with ops.device("/cpu:0"): self.embedding = variable_scope.get_variable( "embedding", [conf.input_vocab_size, conf.embedding_size]) self.emb_enc_inps = embedding_lookup_unique( self.embedding, t_major_enc_inps) self.topic_embedding = variable_scope.get_variable( "topic_embedding", [conf.topic_vocab_size, conf.topic_embedding_size], trainable=False) self.emb_enc_topics = embedding_lookup_unique( self.topic_embedding, self.enc_topics) graphlg.info("Creating out projection weights...") if conf.out_layer_size != None: w = tf.get_variable( "proj_w", [conf.out_layer_size, conf.output_vocab_size], dtype=dtype) else: w = tf.get_variable("proj_w", [conf.num_units, conf.output_vocab_size], dtype=dtype) b = tf.get_variable("proj_b", [conf.output_vocab_size], dtype=dtype) self.out_proj = (w, b) graphlg.info("Creating encoding dynamic rnn...") with variable_scope.variable_scope("encoder", dtype=dtype) as scope: if conf.bidirectional: cell_fw = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob) cell_bw = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob) self.enc_outs, self.enc_states = bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=self.emb_enc_inps, sequence_length=self.enc_lens, dtype=dtype, parallel_iterations=16, time_major=True, scope=scope) fw_s, bw_s = self.enc_states self.enc_states = tuple([ tf.concat([f, b], axis=1) for f, b in zip(fw_s, bw_s) ]) self.enc_outs = tf.concat( [self.enc_outs[0], self.enc_outs[1]], axis=2) else: cell = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob) self.enc_outs, self.enc_states = dynamic_rnn( cell=cell, inputs=self.emb_enc_inps, sequence_length=self.enc_lens, parallel_iterations=16, scope=scope, dtype=dtype, time_major=True) attn_len = tf.shape(self.enc_outs)[0] graphlg.info("Preparing init attention and states for decoder...") initial_state = self.enc_states attn_states = tf.transpose(self.enc_outs, perm=[1, 0, 2]) attn_size = self.conf.num_units topic_attn_size = self.conf.num_units k = tf.get_variable( "topic_proj", [1, 1, self.conf.topic_embedding_size, topic_attn_size]) topic_attn_states = nn_ops.conv2d( tf.expand_dims(self.emb_enc_topics, 2), k, [1, 1, 1, 1], "SAME") topic_attn_states = tf.squeeze(topic_attn_states, axis=2) graphlg.info("Creating decoder cell...") with variable_scope.variable_scope("decoder", dtype=dtype) as scope: cell = CreateMultiRNNCell(conf.cell_model, attn_size, conf.num_layers, conf.output_keep_prob) # topic if not self.for_deploy: graphlg.info( "Embedding decoder inps, tars and tar weights...") t_major_dec_inps = tf.transpose(self.dec_inps) t_major_tars = tf.slice(t_major_dec_inps, [1, 0], [conf.output_max_len + 1, -1]) t_major_dec_inps = tf.slice(t_major_dec_inps, [0, 0], [conf.output_max_len + 1, -1]) t_major_tar_wgts = tf.cumsum(tf.one_hot( self.dec_lens - 1, conf.output_max_len + 1, axis=0), axis=0, reverse=True) with ops.device("/cpu:0"): emb_dec_inps = embedding_lookup_unique( self.embedding, t_major_dec_inps) hp_train = helper.ScheduledEmbeddingTrainingHelper( inputs=emb_dec_inps, sequence_length=self.enc_lens, embedding=self.embedding, sampling_probability=0.0, out_proj=self.out_proj, except_ids=None, time_major=True) output_layer = None my_decoder = AttnTopicDecoder( cell=cell, helper=hp_train, initial_state=initial_state, attn_states=attn_states, attn_size=attn_size, topic_attn_states=topic_attn_states, topic_attn_size=topic_attn_size, output_layer=output_layer) t_major_cell_outs, final_state = decoder.dynamic_decode( decoder=my_decoder, output_time_major=True, maximum_iterations=conf.output_max_len + 1, scope=scope) t_major_outs = t_major_cell_outs.rnn_output # Branch 1 for debugging, doesn't have to be called self.outputs = tf.transpose(t_major_outs, perm=[1, 0, 2]) L = tf.shape(self.outputs)[1] w, b = self.out_proj self.outputs = tf.reshape(self.outputs, [-1, int(w.shape[0])]) self.outputs = tf.matmul(self.outputs, w) + b # For masking the except_ids when debuging #m = tf.shape(self.outputs)[0] #self.mask = tf.zeros([m, int(w.shape[1])]) #for i in [3]: # self.mask = self.mask + tf.one_hot(indices=tf.ones([m], dtype=tf.int32) * i, on_value=100.0, depth=int(w.shape[1])) #self.outputs = self.outputs - self.mask self.outputs = tf.argmax(self.outputs, axis=1) self.outputs = tf.reshape(self.outputs, [-1, L]) self.outputs = self.out_table.lookup( tf.cast(self.outputs, tf.int64)) # Branch 2 for loss self.loss = dyn_sequence_loss(self.conf, t_major_outs, self.out_proj, t_major_tars, t_major_tar_wgts) self.summary = tf.summary.scalar("%s/loss" % self.name, self.loss) # backpropagation self.build_backprop(self.loss, conf, dtype) #saver self.trainable_params.extend(tf.trainable_variables() + [self.topic_embedding]) need_to_save = self.global_params + self.trainable_params + self.optimizer_params + tf.get_default_graph( ).get_collection("saveable_objects") + [ self.topic_embedding ] self.saver = tf.train.Saver(need_to_save, max_to_keep=conf.max_to_keep) else: hp_infer = helper.GreedyEmbeddingHelper( embedding=self.embedding, start_tokens=tf.ones(shape=[batch_size], dtype=tf.int32), end_token=EOS_ID, out_proj=self.out_proj) output_layer = None #layers_core.Dense(self.conf.outproj_from_size, use_bias=True) my_decoder = AttnTopicDecoder( cell=cell, helper=hp_infer, initial_state=initial_state, attn_states=attn_states, attn_size=attn_size, topic_attn_states=topic_attn_states, topic_attn_size=topic_attn_size, output_layer=output_layer) cell_outs, final_state = decoder.dynamic_decode( decoder=my_decoder, scope=scope, maximum_iterations=40) self.outputs = cell_outs.sample_id #lookup self.outputs = self.out_table.lookup( tf.cast(self.outputs, tf.int64)) #saver self.trainable_params.extend(tf.trainable_variables()) self.saver = tf.train.Saver(max_to_keep=conf.max_to_keep) # Exporter for serving self.model_exporter = exporter.Exporter(self.saver) inputs = { "enc_inps": self.enc_str_inps, "enc_lens": self.enc_lens } outputs = {"out": self.outputs} self.model_exporter.init( tf.get_default_graph().as_graph_def(), named_graph_signatures={ "inputs": exporter.generic_signature(inputs), "outputs": exporter.generic_signature(outputs) }) graphlg.info("Graph done") graphlg.info("") self.dec_states = final_state
def Export(): with tf.Session() as sess: # Make model parameters a&b variables instead of constants to # exercise the variable reloading mechanisms. a = tf.Variable(0.5, name="a") b = tf.Variable(2.0, name="b") # Create a placeholder for serialized tensorflow.Example messages to be fed. serialized_tf_example = tf.placeholder(tf.string, name="tf_example") # Parse the tensorflow.Example looking for a feature named "x" with a single # floating point value. feature_configs = {"x": tf.FixedLenFeature([1], dtype=tf.float32),} tf_example = tf.parse_example(serialized_tf_example, feature_configs) # Use tf.identity() to assign name x = tf.identity(tf_example["x"], name="x") # Calculate, y = a*x + b y = tf.add(tf.mul(a, x), b, name="y") # Setup a standard Saver for our variables. save = tf.train.Saver( { "a": a, "b": b }, sharded=True, write_version=tf.train.SaverDef.V2 if FLAGS.use_checkpoint_v2 else tf.train.SaverDef.V1) # asset_path contains the base directory of assets used in training (e.g. # vocabulary files). original_asset_path = tf.constant("/tmp/original/export/assets") # Ops reading asset files should reference the asset_path tensor # which stores the original asset path at training time and the # overridden assets directory at restore time. asset_path = tf.Variable(original_asset_path, name="asset_path", trainable=False, collections=[]) assign_asset_path = asset_path.assign(original_asset_path) # Use a fixed global step number. global_step_tensor = tf.Variable(123, name="global_step") # Create a RegressionSignature for our input and output. regression_signature = exporter.regression_signature( input_tensor=serialized_tf_example, # Use tf.identity here because we export two signatures here. # Otherwise only graph for one of the signatures will be loaded # (whichever is created first) during serving. output_tensor=tf.identity(y)) named_graph_signature = { "inputs": exporter.generic_signature({"x": x}), "outputs": exporter.generic_signature({"y": y}) } # Create two filename assets and corresponding tensors. # TODO(b/26254158) Consider adding validation of file existance as well as # hashes (e.g. sha1) for consistency. original_filename1 = tf.constant("hello1.txt") tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, original_filename1) filename1 = tf.Variable(original_filename1, name="filename1", trainable=False, collections=[]) assign_filename1 = filename1.assign(original_filename1) original_filename2 = tf.constant("hello2.txt") tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, original_filename2) filename2 = tf.Variable(original_filename2, name="filename2", trainable=False, collections=[]) assign_filename2 = filename2.assign(original_filename2) # Init op contains a group of all variables that we assign. init_op = tf.group(assign_asset_path, assign_filename1, assign_filename2) # CopyAssets is used as a callback during export to copy files to the # given export directory. def CopyAssets(filepaths, export_path): print("copying asset files to: %s" % export_path) for filepath in filepaths: print("copying asset file: %s" % filepath) # Run an export. tf.initialize_all_variables().run() export = exporter.Exporter(save) export.init( sess.graph.as_graph_def(), init_op=init_op, default_graph_signature=regression_signature, named_graph_signatures=named_graph_signature, assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS), assets_callback=CopyAssets) export.export(FLAGS.export_dir, global_step_tensor, sess)
for i in range(1000): batch_xs, batch_ys = mnist.train.next_batch(100) summary, _ = sess.run([merged, train_step], feed_dict={ x: batch_xs, y_: batch_ys }) trainwriter.add_summary(summary, i) # model export path tf.add_to_collection('variable', W) tf.add_to_collection('variable', b) export_path = 'data/mnist_mode' print('Exporting trained model to', export_path) # # tf.reset_default_graph() saver = tf.train.Saver(sharded=True) model_exporter = exporter.Exporter(saver) model_exporter.init(sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature({'images': x}), 'outputs': exporter.generic_signature({'scores': y}) }) model_exporter.export(export_path, tf.constant(1), sess) """ can also save the model using saver as follows saver.save(sess, '/home/manikanta/tensorflow/mnist_model') """
def build(self, for_deploy, variants=""): conf = self.conf name = self.name job_type = self.job_type dtype = self.dtype self.beam_size = 1 if (not for_deploy or variants == "score") else sum( self.conf.beam_splits) # Input maps self.in_table = lookup.MutableHashTable(key_dtype=tf.string, value_dtype=tf.int64, default_value=UNK_ID, shared_name="in_table", name="in_table", checkpoint=True) self.enc_str_inps = tf.placeholder(tf.string, shape=(None, conf.input_max_len), name="enc_inps") self.enc_lens = tf.placeholder(tf.int32, shape=[None], name="enc_lens") self.tags = tf.placeholder(tf.int32, shape=[None, conf.tag_num], name="tags") self.down_wgts = tf.placeholder(tf.float32, shape=[None], name="down_wgts") # lookup self.enc_inps = self.in_table.lookup(self.enc_str_inps) #self.enc_inps = tf.Print(self.enc_inps, [self.enc_inps], message="enc_inps", summarize=100000) with variable_scope.variable_scope(self.model_kind, dtype=dtype) as scope: # Create encode graph and get attn states graphlg.info("Creating embeddings and embedding enc_inps.") with ops.device("/cpu:0"): self.embedding = variable_scope.get_variable( "embedding", [conf.output_vocab_size, conf.embedding_size], initializer=tf.random_uniform_initializer(-0.08, 0.08)) self.emb_enc_inps = embedding_lookup_unique( self.embedding, self.enc_inps) graphlg.info("Creating dynamic rnn...") if conf.bidirectional: with variable_scope.variable_scope("encoder", dtype=dtype) as scope: cell_fw = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob) cell_bw = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob) self.enc_outs, self.enc_states = bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=self.emb_enc_inps, sequence_length=self.enc_lens, dtype=dtype, parallel_iterations=16, scope=scope) fw_s, bw_s = self.enc_states self.enc_states = [] for f, b in zip(fw_s, bw_s): if isinstance(f, LSTMStateTuple): self.enc_states.append( LSTMStateTuple(tf.concat([f.c, b.c], axis=1), tf.concat([f.h, b.h], axis=1))) else: self.enc_states.append(tf.concat([f, b], 1)) self.enc_outs = tf.concat([self.enc_outs[0], self.enc_outs[1]], axis=2) mem_size = 2 * conf.num_units enc_state_size = 2 * conf.num_units else: with variable_scope.variable_scope("encoder", dtype=dtype) as scope: cell = CreateMultiRNNCell(conf.cell_model, conf.num_units, conf.num_layers, conf.output_keep_prob) self.enc_outs, self.enc_states = dynamic_rnn( cell=cell, inputs=self.emb_enc_inps, sequence_length=self.enc_lens, parallel_iterations=16, scope=scope, dtype=dtype) mem_size = conf.num_units enc_state_size = conf.num_units self.enc_outs = tf.expand_dims(self.enc_outs, -1) with variable_scope.variable_scope("cnn", dtype=dtype, reuse=None) as scope: feature_map = FeatureMatrix(conf.conv_conf, self.enc_outs, scope=scope, dtype=dtype) vec = tf.contrib.layers.flatten(feature_map) with variable_scope.variable_scope("fc", dtype=dtype, reuse=False) as scope: fc_out = FC(inputs=vec, h_size=conf.fc_h_size, o_size=conf.tag_num, act=relu) self.outputs = fc_out if not for_deploy: #self.tags = tf.Print(self.tags, [self.tags], message="tags", summarize=10000) loss = tf.losses.softmax_cross_entropy(self.tags, self.outputs) see_loss = loss tf.summary.scalar("loss", see_loss) self.summary_ops = tf.summary.merge_all() self.update = self.backprop(loss) self.train_outputs_map["loss"] = see_loss self.train_outputs_map["update"] = self.update self.fo_outputs_map["loss"] = see_loss self.debug_outputs_map["loss"] = see_loss self.debug_outputs_map["outputs"] = self.outputs, self.debug_outputs_map["update"] = self.update #saver self.trainable_params.extend(tf.trainable_variables()) self.saver = tf.train.Saver(max_to_keep=conf.max_to_keep) else: if variants == "": self.infer_outputs_map["tags"] = tf.nn.softmax(self.outputs) else: pass #saver self.trainable_params.extend(tf.trainable_variables()) self.saver = tf.train.Saver(max_to_keep=conf.max_to_keep) # Exporter for serving self.model_exporter = exporter.Exporter(self.saver) inputs = { "enc_inps:0": self.enc_str_inps, "enc_lens:0": self.enc_lens } outputs = self.infer_outputs_map self.model_exporter.init(tf.get_default_graph().as_graph_def(), named_graph_signatures={ "inputs": exporter.generic_signature(inputs), "outputs": exporter.generic_signature(outputs) }) graphlg.info("Graph done") graphlg.info("") return
def main(): # Get hyperparameters if FLAGS.enable_colored_log: import coloredlogs coloredlogs.install() logging.basicConfig(level=logging.INFO) INPUT_FILE_FORMAT = FLAGS.input_file_format if INPUT_FILE_FORMAT not in ["tfrecord", "csv"]: logging.error("Unknow input file format: {}".format(INPUT_FILE_FORMAT)) exit(1) FEATURE_SIZE = FLAGS.feature_size LABEL_SIZE = FLAGS.label_size EPOCH_NUMBER = FLAGS.epoch_number if EPOCH_NUMBER <= 0: EPOCH_NUMBER = None BATCH_THREAD_NUMBER = FLAGS.batch_thread_number MIN_AFTER_DEQUEUE = FLAGS.min_after_dequeue BATCH_CAPACITY = BATCH_THREAD_NUMBER * FLAGS.batch_size + MIN_AFTER_DEQUEUE MODE = FLAGS.mode MODEL = FLAGS.model CHECKPOINT_PATH = FLAGS.checkpoint_path if not CHECKPOINT_PATH.startswith("fds://") and not os.path.exists( CHECKPOINT_PATH): os.makedirs(CHECKPOINT_PATH) CHECKPOINT_FILE = CHECKPOINT_PATH + "/checkpoint.ckpt" LATEST_CHECKPOINT = tf.train.latest_checkpoint(CHECKPOINT_PATH) OUTPUT_PATH = FLAGS.output_path if not OUTPUT_PATH.startswith("fds://") and not os.path.exists(OUTPUT_PATH): os.makedirs(OUTPUT_PATH) pprint.PrettyPrinter().pprint(FLAGS.__flags) # Process TFRecoreds files def read_and_decode_tfrecord(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "features": tf.FixedLenFeature([FEATURE_SIZE], tf.float32), }) label = features["label"] features = features["features"] return label, features def read_and_decode_csv(filename_queue): # TODO: Not generic for all datasets reader = tf.TextLineReader() key, value = reader.read(filename_queue) # Default values, in case of empty columns. Also specifies the type of the # decoded result. #record_defaults = [[1], [1], [1], [1], [1]] record_defaults = [[1], [1.0], [1.0], [1.0], [1.0]] col1, col2, col3, col4, col5 = tf.decode_csv( value, record_defaults=record_defaults) label = col1 features = tf.stack([col2, col3, col4, col4]) return label, features # Read TFRecords files for training filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.train_file), num_epochs=EPOCH_NUMBER) if INPUT_FILE_FORMAT == "tfrecord": label, features = read_and_decode_tfrecord(filename_queue) elif INPUT_FILE_FORMAT == "csv": label, features = read_and_decode_csv(filename_queue) batch_labels, batch_features = tf.train.shuffle_batch( [label, features], batch_size=FLAGS.batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) # Read TFRecords file for validatioin validate_filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.validate_file), num_epochs=EPOCH_NUMBER) if INPUT_FILE_FORMAT == "tfrecord": validate_label, validate_features = read_and_decode_tfrecord( validate_filename_queue) elif INPUT_FILE_FORMAT == "csv": validate_label, validate_features = read_and_decode_csv( validate_filename_queue) validate_batch_labels, validate_batch_features = tf.train.shuffle_batch( [validate_label, validate_features], batch_size=FLAGS.validate_batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) # Define the model input_units = FEATURE_SIZE output_units = LABEL_SIZE model_network_hidden_units = [int(i) for i in FLAGS.model_network.split()] def full_connect(inputs, weights_shape, biases_shape, is_train=True): weights = tf.get_variable( "weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable( "biases", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.matmul(inputs, weights) + biases if FLAGS.enable_bn and is_train: mean, var = tf.nn.moments(layer, axes=[0]) scale = tf.get_variable( "scale", biases_shape, initializer=tf.random_normal_initializer()) shift = tf.get_variable( "shift", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.nn.batch_normalization(layer, mean, var, shift, scale, FLAGS.bn_epsilon) return layer def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True): layer = full_connect(inputs, weights_shape, biases_shape, is_train) layer = tf.nn.relu(layer) return layer def customized_inference(inputs, is_train=True): hidden1_units = 128 hidden2_units = 32 hidden3_units = 8 with tf.variable_scope("input"): layer = full_connect_relu(inputs, [input_units, hidden1_units], [hidden1_units], is_train) with tf.variable_scope("layer0"): layer = full_connect_relu(layer, [hidden1_units, hidden2_units], [hidden2_units], is_train) with tf.variable_scope("layer1"): layer = full_connect_relu(layer, [hidden2_units, hidden3_units], [hidden3_units], is_train) if FLAGS.enable_dropout and is_train: layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob) with tf.variable_scope("output"): layer = full_connect(layer, [hidden3_units, output_units], [output_units], is_train) return layer def dnn_inference(inputs, is_train=True): with tf.variable_scope("input"): layer = full_connect_relu(inputs, [input_units, model_network_hidden_units[0]], [model_network_hidden_units[0]], is_train) for i in range(len(model_network_hidden_units) - 1): with tf.variable_scope("layer{}".format(i)): layer = full_connect_relu(layer, [ model_network_hidden_units[i], model_network_hidden_units[i + 1] ], [model_network_hidden_units[i + 1]], is_train) with tf.variable_scope("output"): layer = full_connect(layer, [model_network_hidden_units[-1], output_units], [output_units], is_train) return layer def lr_inference(inputs, is_train=True): with tf.variable_scope("lr"): layer = full_connect(inputs, [input_units, output_units], [output_units]) return layer def wide_and_deep_inference(inputs, is_train=True): return lr_inference(inputs, is_train) + dnn_inference(inputs, is_train) def cnn_inference(inputs, is_train=True): # TODO: Change if validate_batch_size is different # [BATCH_SIZE, 512 * 512 * 1] -> [BATCH_SIZE, 512, 512, 1] inputs = tf.reshape(inputs, [FLAGS.batch_size, 512, 512, 1]) # [BATCH_SIZE, 512, 512, 1] -> [BATCH_SIZE, 128, 128, 8] with tf.variable_scope("conv0"): weights = tf.get_variable( "weights", [3, 3, 1, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d( inputs, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool( layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME") # [BATCH_SIZE, 128, 128, 8] -> [BATCH_SIZE, 32, 32, 8] with tf.variable_scope("conv1"): weights = tf.get_variable( "weights", [3, 3, 8, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d( layer, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool( layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME") # [BATCH_SIZE, 32, 32, 8] -> [BATCH_SIZE, 8, 8, 8] with tf.variable_scope("conv2"): weights = tf.get_variable( "weights", [3, 3, 8, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d( layer, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool( layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME") # [BATCH_SIZE, 8, 8, 8] -> [BATCH_SIZE, 8 * 8 * 8] layer = tf.reshape(layer, [-1, 8 * 8 * 8]) # [BATCH_SIZE, 8 * 8 * 8] -> [BATCH_SIZE, LABEL_SIZE] with tf.variable_scope("output"): weights = tf.get_variable( "weights", [8 * 8 * 8, LABEL_SIZE], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [LABEL_SIZE], initializer=tf.random_normal_initializer()) layer = tf.add(tf.matmul(layer, weights), bias) return layer def inference(inputs, is_train=True): if MODEL == "dnn": return dnn_inference(inputs, is_train) elif MODEL == "lr": return lr_inference(inputs, is_train) elif MODEL == "wide_and_deep": return wide_and_deep_inference(inputs, is_train) elif MODEL == "customized": return customized_inference(inputs, is_train) elif MODEL == "cnn": return cnn_inference(inputs, is_train) else: logging.error("Unknown model, exit now") exit(1) logging.info("Use the model: {}, model network: {}".format( MODEL, FLAGS.model_network)) logits = inference(batch_features, True) batch_labels = tf.to_int64(batch_labels) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=batch_labels) loss = tf.reduce_mean(cross_entropy, name="loss") global_step = tf.Variable(0, name="global_step", trainable=False) if FLAGS.enable_lr_decay: logging.info( "Enable learning rate decay rate: {}".format(FLAGS.lr_decay_rate)) starter_learning_rate = FLAGS.learning_rate learning_rate = tf.train.exponential_decay( starter_learning_rate, global_step, 100000, FLAGS.lr_decay_rate, staircase=True) else: learning_rate = FLAGS.learning_rate optimizer = get_optimizer(FLAGS.optimizer, learning_rate) train_op = optimizer.minimize(loss, global_step=global_step) tf.get_variable_scope().reuse_variables() # Define accuracy op for train data train_accuracy_logits = inference(batch_features, False) train_softmax = tf.nn.softmax(train_accuracy_logits) train_correct_prediction = tf.equal( tf.argmax(train_softmax, 1), batch_labels) train_accuracy = tf.reduce_mean( tf.cast(train_correct_prediction, tf.float32)) # Define auc op for train data batch_labels = tf.cast(batch_labels, tf.int32) sparse_labels = tf.reshape(batch_labels, [-1, 1]) derived_size = tf.shape(batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(axis=1, values=[indices, sparse_labels]) outshape = tf.stack([derived_size, LABEL_SIZE]) new_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax, new_batch_labels) # Define accuracy op for validate data validate_accuracy_logits = inference(validate_batch_features, False) validate_softmax = tf.nn.softmax(validate_accuracy_logits) validate_batch_labels = tf.to_int64(validate_batch_labels) validate_correct_prediction = tf.equal( tf.argmax(validate_softmax, 1), validate_batch_labels) validate_accuracy = tf.reduce_mean( tf.cast(validate_correct_prediction, tf.float32)) # Define auc op for validate data validate_batch_labels = tf.cast(validate_batch_labels, tf.int32) sparse_labels = tf.reshape(validate_batch_labels, [-1, 1]) derived_size = tf.shape(validate_batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(axis=1, values=[indices, sparse_labels]) outshape = tf.stack([derived_size, LABEL_SIZE]) new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, validate_auc = tf.contrib.metrics.streaming_auc(validate_softmax, new_validate_batch_labels) # Define inference op inference_features = tf.placeholder("float", [None, FEATURE_SIZE]) inference_logits = inference(inference_features, False) inference_softmax = tf.nn.softmax(inference_logits) inference_op = tf.argmax(inference_softmax, 1) keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) model_signature = { "inputs": exporter.generic_signature({ "keys": keys_placeholder, "features": inference_features }), "outputs": exporter.generic_signature({ "keys": keys, "softmax": inference_softmax, "prediction": inference_op }) } # Initialize saver and summary saver = tf.train.Saver() tf.summary.scalar("loss", loss) tf.summary.scalar("train_accuracy", train_accuracy) tf.summary.scalar("train_auc", train_auc) tf.summary.scalar("validate_accuracy", validate_accuracy) tf.summary.scalar("validate_auc", validate_auc) summary_op = tf.summary.merge_all() init_op = [ tf.global_variables_initializer(), tf.local_variables_initializer() ] # Create session to run with tf.Session() as sess: logging.info("Start to run with mode: {}".format(MODE)) writer = tf.summary.FileWriter(OUTPUT_PATH, sess.graph) sess.run(init_op) if MODE == "train": # Restore session and start queue runner restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = datetime.datetime.now() try: while not coord.should_stop(): if FLAGS.benchmark_mode: sess.run(train_op) else: _, step = sess.run([train_op, global_step]) # Print state while training if step % FLAGS.steps_to_validate == 0: loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value, summary_value = sess.run( [ loss, train_accuracy, train_auc, validate_accuracy, validate_auc, summary_op ]) end_time = datetime.datetime.now() logging.info( "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}". format(end_time - start_time, step, loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value)) writer.add_summary(summary_value, step) saver.save(sess, CHECKPOINT_FILE, global_step=step) start_time = end_time except tf.errors.OutOfRangeError: if FLAGS.benchmark_mode: print("Finish training for benchmark") exit(0) else: # Export the model after training export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) finally: coord.request_stop() coord.join(threads) elif MODE == "export": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) # Export the model export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) elif MODE == "savedmodel": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) logging.info( "Export the saved model to {}".format(FLAGS.saved_model_path)) export_path_base = FLAGS.saved_model_path export_path = os.path.join( compat.as_bytes(export_path_base), compat.as_bytes(str(FLAGS.model_version))) model_signature = signature_def_utils.build_signature_def( inputs={ "keys": utils.build_tensor_info(keys_placeholder), "features": utils.build_tensor_info(inference_features) }, outputs={ "keys": utils.build_tensor_info(keys), "softmax": utils.build_tensor_info(inference_softmax), "prediction": utils.build_tensor_info(inference_op) }, method_name=signature_constants.PREDICT_METHOD_NAME) try: builder = saved_model_builder.SavedModelBuilder(export_path) builder.add_meta_graph_and_variables( sess, [tag_constants.SERVING], clear_devices=True, signature_def_map={ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: model_signature, }, #legacy_init_op=legacy_init_op) legacy_init_op=tf.group( tf.initialize_all_tables(), name="legacy_init_op")) builder.save() except Exception as e: logging.error("Fail to export saved model, exception: {}".format(e)) elif MODE == "inference": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) # Load inference test data inference_result_file_name = FLAGS.inference_result_file inference_test_file_name = FLAGS.inference_test_file inference_data = np.genfromtxt(inference_test_file_name, delimiter=",") inference_data_features = inference_data[:, 0:9] inference_data_labels = inference_data[:, 9] # Run inference start_time = datetime.datetime.now() prediction, prediction_softmax = sess.run( [inference_op, inference_softmax], feed_dict={inference_features: inference_data_features}) end_time = datetime.datetime.now() # Compute accuracy label_number = len(inference_data_labels) correct_label_number = 0 for i in range(label_number): if inference_data_labels[i] == prediction[i]: correct_label_number += 1 accuracy = float(correct_label_number) / label_number # Compute auc y_true = np.array(inference_data_labels) y_score = prediction_softmax[:, 1] fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score, pos_label=1) auc = metrics.auc(fpr, tpr) logging.info("[{}] Inference accuracy: {}, auc: {}".format( end_time - start_time, accuracy, auc)) # Save result into the file np.savetxt(inference_result_file_name, prediction_softmax, delimiter=",") logging.info( "Save result to file: {}".format(inference_result_file_name))
def main(): parseArgs(args) args.log_dir_path = args.output + os.path.sep + 'test_' + os.path.split(args.checkpoint)[1] args.log_prefix = args.a0_model_name + '_' if not os.path.exists(args.log_dir_path): os.makedirs(args.log_dir_path) if args.save_graph: args.message("Resize batchsize to 1 for serving...") if args.extra_wdict is not None: args.wdict_path = args.extra_wdict args.testfnms = [args.extra_testfnms] args.message("Loading extra word dictionary from " + args.wdict_path) configproto = tf.ConfigProto() configproto.gpu_options.allow_growth = True configproto.allow_soft_placement = True test_batchsize = args.batchsize with tf.Graph().as_default(), tf.Session(config=configproto) as sess: model = graph_moudle.Model() model.init_global_step() vt, vs, vo = model.model_setup() tf.initialize_all_variables().run() args.message('Loading trained model ' + str(args.checkpoint)) model.saver.restore(sess, args.checkpoint) args.write_variables(vt) if args.save_graph: if args.save_only_trainable: exporter_saver = tf.train.Saver(var_list=tf.trainable_variables(), sharded=True) else: exporter_saver = tf.train.Saver(sharded=True) online_feed_dict = {'q': model.query_sent_in, 'qm': model.query_sent_mask, 't': model.title_sent_in, 'tm': model.title_sent_mask, 'is_training': model.is_training, 'keep_prob': model.keep_prob} online_fetch_dict = {'score': model.score} model_exporter = exporter.Exporter(exporter_saver) model_exporter.init(sess.graph.as_graph_def(), named_graph_signatures={ 'inputs': exporter.generic_signature(online_feed_dict), 'outputs': exporter.generic_signature(online_fetch_dict)}) model_exporter.export(args.log_dir_path, tf.constant(0), sess) args.message("Successfully export graph to path:" + args.log_dir_path) #tf.train.write_graph(sess.graph_def, args.log_dir_path, 'graph.pbtxt', False) return args.write_args(args) #args.testfnms = ['/search/odin/data/strict_anchor_data/demo'] for test_file in args.testfnms: print('Test ' + str(test_file)) f = open(test_file) pairs = f.readlines() n_test = len(pairs) n_batches = int(n_test/test_batchsize) tpair_loss, tacc, tacc01 = 0.0, 0.0, 0.0 for i in range(n_batches): data_proc_slice = model.data_proc(pairs[i*test_batchsize: (i+1)*test_batchsize]) pair_loss, acc, acc01, score = \ model.run_epoch(sess, data_proc_slice, False) tpair_loss, tacc, tacc01 = tpair_loss + pair_loss, tacc + acc, tacc01 + acc01 out_str = "%f %f %f" % (pair_loss, acc, acc01) print(out_str) out_str = "Test " + str(test_file) + " with checkpoint " + args.checkpoint args.message(out_str) n_batches = float(n_batches) out_str = "pair loss:%f acc:%f acc01:%f" \ % (tpair_loss / n_batches, tacc / n_batches, tacc01 / n_batches) args.message(out_str) f.close()
def train(mnist): x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input') y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input') regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) y = inference(x, regularizer) global_step = tf.Variable(0, trainable=False) variable_averages = \ tf.train.ExponentialMovingAverage \ (MOVING_AVERAGE_DECAY, global_step) variables_averages_op = \ variable_averages.apply \ (tf.trainable_variables()) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits \ (logits=y, labels=tf.argmax(y_, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy) loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses')) learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY, staircase=True) train_step = \ tf.train.GradientDescentOptimizer(learning_rate) \ .minimize(loss, global_step=global_step) with tf.control_dependencies([train_step, variables_averages_op]): train_op = tf.no_op(name='train') saver = tf.train.Saver() with tf.Session() as sess: tf.global_variables_initializer().run() export = exporter.Exporter(saver) for i in range(TRAINING_STEPS): xs, ys = mnist.train.next_batch(BATCH_SIZE) _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={ x: xs, y_: ys }) if i % 1000 == 0: print( "After %d training step(s), loss on training batch is %g." % (step, loss_value)) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step) saver.export_meta_graph(os.path.join(MODEL_SAVE_PATH, MODEL_NAME) + ".json", as_text=True) export.init( named_graph_signatures={ "inputs": exporter.generic_signature({"input_matrix": x}), "outputs": exporter.generic_signature({"output_lable": tf.argmax(y, 1)}), "regress": exporter.regression_signature(x, y) }) export.export(MODEL_SAVE_PATH + MODEL_NAME, tf.constant(123), sess)
def Export(export_dir, use_checkpoint_v2): with tf.Session() as sess: # Make model parameters a&b variables instead of constants to # exercise the variable reloading mechanisms. a = tf.Variable(0.5, name="a") b = tf.Variable(2.0, name="b") # Create a placeholder for serialized tensorflow.Example messages to be fed. serialized_tf_example = tf.placeholder(tf.string, name="tf_example") # Parse the tensorflow.Example looking for a feature named "x" with a single # floating point value. feature_configs = {"x": tf.FixedLenFeature([1], dtype=tf.float32),} tf_example = tf.parse_example(serialized_tf_example, feature_configs) # Use tf.identity() to assign name x = tf.identity(tf_example["x"], name="x") # Calculate, y = a*x + b y = tf.add(tf.multiply(a, x), b, name="y") # Setup a standard Saver for our variables. save = tf.train.Saver( { "a": a, "b": b }, sharded=True, write_version=tf.train.SaverDef.V2 if use_checkpoint_v2 else tf.train.SaverDef.V1) # asset_path contains the base directory of assets used in training (e.g. # vocabulary files). original_asset_path = tf.constant("/tmp/original/export/assets") # Ops reading asset files should reference the asset_path tensor # which stores the original asset path at training time and the # overridden assets directory at restore time. asset_path = tf.Variable(original_asset_path, name="asset_path", trainable=False, collections=[]) assign_asset_path = asset_path.assign(original_asset_path) # Use a fixed global step number. global_step_tensor = tf.Variable(123, name="global_step") # Create a RegressionSignature for our input and output. regression_signature = exporter.regression_signature( input_tensor=serialized_tf_example, # Use tf.identity here because we export two signatures here. # Otherwise only graph for one of the signatures will be loaded # (whichever is created first) during serving. output_tensor=tf.identity(y)) named_graph_signature = { "inputs": exporter.generic_signature({"x": x}), "outputs": exporter.generic_signature({"y": y}) } # Create two filename assets and corresponding tensors. # TODO(b/26254158) Consider adding validation of file existence as well as # hashes (e.g. sha1) for consistency. original_filename1 = tf.constant("hello1.txt") tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, original_filename1) filename1 = tf.Variable(original_filename1, name="filename1", trainable=False, collections=[]) assign_filename1 = filename1.assign(original_filename1) original_filename2 = tf.constant("hello2.txt") tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, original_filename2) filename2 = tf.Variable(original_filename2, name="filename2", trainable=False, collections=[]) assign_filename2 = filename2.assign(original_filename2) # Init op contains a group of all variables that we assign. init_op = tf.group(assign_asset_path, assign_filename1, assign_filename2) # CopyAssets is used as a callback during export to copy files to the # given export directory. def CopyAssets(filepaths, export_path): print("copying asset files to: %s" % export_path) for filepath in filepaths: print("copying asset file: %s" % filepath) # Run an export. tf.global_variables_initializer().run() export = exporter.Exporter(save) export.init( sess.graph.as_graph_def(), init_op=init_op, default_graph_signature=regression_signature, named_graph_signatures=named_graph_signature, assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS), assets_callback=CopyAssets) export.export(export_dir, global_step_tensor, sess)
def export(): # Create index->synset mapping synsets = [] with open(SYNSET_FILE) as f: synsets = f.read().splitlines() # Create synset->metadata mapping texts = {} with open(METADATA_FILE) as f: for line in f.read().splitlines(): parts = line.split('\t') assert len(parts) == 2 texts[parts[0]] = parts[1] with tf.Graph().as_default(): # Build inference model. # Please refer to Tensorflow inception model for details. # Input transformation. jpegs = tf.placeholder(tf.string) images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) # Run inference. logits, _ = inception_model.inference(images, NUM_CLASSES + 1) # Transform output to topK result. values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES) # Create a constant string Tensor where the i'th element is # the human readable class description for the i'th index. # Note that the 0th index is an unused background class # (see inception model definition code). class_descriptions = ['unused background'] for s in synsets: class_descriptions.append(texts[s]) class_tensor = tf.constant(class_descriptions) classes = tf.contrib.lookup.index_to_string(tf.to_int64(indices), mapping=class_tensor) # Restore variables from training checkpoint. variable_averages = tf.train.ExponentialMovingAverage( inception_model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: # Restore variables from training checkpoints. ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print('Successfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) else: print('No checkpoint file found at %s' % FLAGS.checkpoint_dir) return # Export inference model. init_op = tf.group(tf.initialize_all_tables(), name='init_op') model_exporter = exporter.Exporter(saver) model_exporter.init(init_op=init_op, named_graph_signatures={ 'inputs': exporter.generic_signature({'images': jpegs}), 'outputs': exporter.generic_signature({'classes': classes, 'scores': values})}) model_exporter.export(FLAGS.export_dir, tf.constant(global_step), sess) print('Successfully exported model to %s' % FLAGS.export_dir)
def main(): # Pre-process hyperparameters FEATURE_SIZE = FLAGS.feature_size LABEL_SIZE = FLAGS.label_size EPOCH_NUMBER = FLAGS.epoch_number if EPOCH_NUMBER <= 0: EPOCH_NUMBER = None BATCH_THREAD_NUMBER = FLAGS.batch_thread_number MIN_AFTER_DEQUEUE = FLAGS.min_after_dequeue BATCH_CAPACITY = BATCH_THREAD_NUMBER * FLAGS.batch_size + MIN_AFTER_DEQUEUE MODE = FLAGS.mode MODEL = FLAGS.model CHECKPOINT_PATH = FLAGS.checkpoint_path if not CHECKPOINT_PATH.startswith("fds://") and not os.path.exists( CHECKPOINT_PATH): os.makedirs(CHECKPOINT_PATH) CHECKPOINT_FILE = CHECKPOINT_PATH + "/checkpoint.ckpt" LATEST_CHECKPOINT = tf.train.latest_checkpoint(CHECKPOINT_PATH) OUTPUT_PATH = FLAGS.output_path if not OUTPUT_PATH.startswith("fds://") and not os.path.exists( OUTPUT_PATH): os.makedirs(OUTPUT_PATH) # Process TFRecoreds files def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "features": tf.FixedLenFeature([FEATURE_SIZE], tf.float32), }) label = features["label"] features = features["features"] return label, features # Read TFRecords files for training filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.train_tfrecords_file), num_epochs=EPOCH_NUMBER) label, features = read_and_decode(filename_queue) batch_labels, batch_features = tf.train.shuffle_batch( [label, features], batch_size=FLAGS.batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) # Read TFRecords file for validatioin validate_filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.validate_tfrecords_file), num_epochs=EPOCH_NUMBER) validate_label, validate_features = read_and_decode( validate_filename_queue) validate_batch_labels, validate_batch_features = tf.train.shuffle_batch( [validate_label, validate_features], batch_size=FLAGS.validate_batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) # Define the model input_units = FEATURE_SIZE output_units = LABEL_SIZE model_network_hidden_units = [int(i) for i in FLAGS.model_network.split()] def full_connect(inputs, weights_shape, biases_shape, is_train=True): weights = tf.get_variable("weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable("biases", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.matmul(inputs, weights) + biases if FLAGS.enable_bn and is_train: mean, var = tf.nn.moments(layer, axes=[0]) scale = tf.get_variable("scale", biases_shape, initializer=tf.random_normal_initializer()) shift = tf.get_variable("shift", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.nn.batch_normalization(layer, mean, var, shift, scale, FLAGS.bn_epsilon) return layer def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True): layer = full_connect(inputs, weights_shape, biases_shape, is_train) layer = tf.nn.relu(layer) return layer def customized_inference(inputs, is_train=True): hidden1_units = 128 hidden2_units = 32 hidden3_units = 8 with tf.variable_scope("input"): layer = full_connect_relu(inputs, [input_units, hidden1_units], [hidden1_units], is_train) with tf.variable_scope("layer0"): layer = full_connect_relu(layer, [hidden1_units, hidden2_units], [hidden2_units], is_train) with tf.variable_scope("layer1"): layer = full_connect_relu(layer, [hidden2_units, hidden3_units], [hidden3_units], is_train) if FLAGS.enable_dropout and is_train: layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob) with tf.variable_scope("output"): layer = full_connect(layer, [hidden3_units, output_units], [output_units], is_train) return layer def dnn_inference(inputs, is_train=True): with tf.variable_scope("input"): layer = full_connect_relu( inputs, [input_units, model_network_hidden_units[0]], [model_network_hidden_units[0]], is_train) for i in range(len(model_network_hidden_units) - 1): with tf.variable_scope("layer{}".format(i)): layer = full_connect_relu(layer, [ model_network_hidden_units[i], model_network_hidden_units[i + 1] ], [model_network_hidden_units[i + 1]], is_train) with tf.variable_scope("output"): layer = full_connect( layer, [model_network_hidden_units[-1], output_units], [output_units], is_train) return layer def lr_inference(inputs, is_train=True): with tf.variable_scope("lr"): layer = full_connect(inputs, [input_units, output_units], [output_units]) return layer def wide_and_deep_inference(inputs, is_train=True): return lr_inference(inputs, is_train) + dnn_inference(inputs, is_train) def cnn_inference(inputs, is_train=True): # TODO: Change if validate_batch_size is different # [BATCH_SIZE, 512 * 512 * 1] -> [BATCH_SIZE, 512, 512, 1] inputs = tf.reshape(inputs, [FLAGS.batch_size, 512, 512, 1]) # [BATCH_SIZE, 512, 512, 1] -> [BATCH_SIZE, 128, 128, 8] with tf.variable_scope("conv0"): weights = tf.get_variable( "weights", [3, 3, 1, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(inputs, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool(layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME") # [BATCH_SIZE, 128, 128, 8] -> [BATCH_SIZE, 32, 32, 8] with tf.variable_scope("conv1"): weights = tf.get_variable( "weights", [3, 3, 8, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(layer, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool(layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME") # [BATCH_SIZE, 32, 32, 8] -> [BATCH_SIZE, 8, 8, 8] with tf.variable_scope("conv2"): weights = tf.get_variable( "weights", [3, 3, 8, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(layer, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool(layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME") # [BATCH_SIZE, 8, 8, 8] -> [BATCH_SIZE, 8 * 8 * 8] layer = tf.reshape(layer, [-1, 8 * 8 * 8]) # [BATCH_SIZE, 8 * 8 * 8] -> [BATCH_SIZE, LABEL_SIZE] with tf.variable_scope("output"): weights = tf.get_variable( "weights", [8 * 8 * 8, LABEL_SIZE], initializer=tf.random_normal_initializer()) bias = tf.get_variable("bias", [LABEL_SIZE], initializer=tf.random_normal_initializer()) layer = tf.add(tf.matmul(layer, weights), bias) return layer def inference(inputs, is_train=True): if MODEL == "dnn": return dnn_inference(inputs, is_train) elif MODEL == "lr": return lr_inference(inputs, is_train) elif MODEL == "wide_and_deep": return wide_and_deep_inference(inputs, is_train) elif MODEL == "customized": return customized_inference(inputs, is_train) elif MODEL == "cnn": return cnn_inference(inputs, is_train) else: print("Unknown model, exit now") exit(1) print("Use the model: {}, model network: {}".format( MODEL, FLAGS.model_network)) logits = inference(batch_features, True) batch_labels = tf.to_int64(batch_labels) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits, batch_labels) loss = tf.reduce_mean(cross_entropy, name="loss") global_step = tf.Variable(0, name="global_step", trainable=False) if FLAGS.enable_lr_decay: print("Enable learning rate decay rate: {}".format( FLAGS.lr_decay_rate)) starter_learning_rate = FLAGS.learning_rate learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100000, FLAGS.lr_decay_rate, staircase=True) else: learning_rate = FLAGS.learning_rate optimizer = get_optimizer(FLAGS.optimizer, learning_rate) train_op = optimizer.minimize(loss, global_step=global_step) tf.get_variable_scope().reuse_variables() # Define accuracy op for train data train_accuracy_logits = inference(batch_features, False) train_softmax = tf.nn.softmax(train_accuracy_logits) train_correct_prediction = tf.equal(tf.argmax(train_softmax, 1), batch_labels) train_accuracy = tf.reduce_mean( tf.cast(train_correct_prediction, tf.float32)) # Define auc op for train data batch_labels = tf.cast(batch_labels, tf.int32) sparse_labels = tf.reshape(batch_labels, [-1, 1]) derived_size = tf.shape(batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(1, [indices, sparse_labels]) outshape = tf.pack([derived_size, LABEL_SIZE]) new_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax, new_batch_labels) # Define accuracy op for validate data validate_accuracy_logits = inference(validate_batch_features, False) validate_softmax = tf.nn.softmax(validate_accuracy_logits) validate_batch_labels = tf.to_int64(validate_batch_labels) validate_correct_prediction = tf.equal(tf.argmax(validate_softmax, 1), validate_batch_labels) validate_accuracy = tf.reduce_mean( tf.cast(validate_correct_prediction, tf.float32)) # Define auc op for validate data validate_batch_labels = tf.cast(validate_batch_labels, tf.int32) sparse_labels = tf.reshape(validate_batch_labels, [-1, 1]) derived_size = tf.shape(validate_batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(1, [indices, sparse_labels]) outshape = tf.pack([derived_size, LABEL_SIZE]) new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, validate_auc = tf.contrib.metrics.streaming_auc( validate_softmax, new_validate_batch_labels) # Define inference op inference_features = tf.placeholder("float", [None, FEATURE_SIZE]) inference_logits = inference(inference_features, False) inference_softmax = tf.nn.softmax(inference_logits) inference_op = tf.argmax(inference_softmax, 1) keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) model_signature = { "inputs": exporter.generic_signature({ "keys": keys_placeholder, "features": inference_features }), "outputs": exporter.generic_signature({ "keys": keys, "softmax": inference_softmax, "prediction": inference_op }) } # Initialize saver and summary saver = tf.train.Saver() tf.scalar_summary("loss", loss) tf.scalar_summary("train_accuracy", train_accuracy) tf.scalar_summary("train_auc", train_auc) tf.scalar_summary("validate_accuracy", validate_accuracy) tf.scalar_summary("validate_auc", validate_auc) summary_op = tf.merge_all_summaries() # Create session to run with tf.Session() as sess: print("Start to run with mode: {}".format(MODE)) writer = tf.train.SummaryWriter(OUTPUT_PATH, sess.graph) sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) if MODE == "train": # Restore session and start queue runner restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = datetime.datetime.now() try: while not coord.should_stop(): _, loss_value, step = sess.run( [train_op, loss, global_step]) # Print state while training if step % FLAGS.steps_to_validate == 0: train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value, summary_value = sess.run( [ train_accuracy, train_auc, validate_accuracy, validate_auc, summary_op ]) end_time = datetime.datetime.now() print( "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}" .format(end_time - start_time, step, loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value)) writer.add_summary(summary_value, step) saver.save(sess, CHECKPOINT_FILE, global_step=step) start_time = end_time except tf.errors.OutOfRangeError: # Export the model after training export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) finally: coord.request_stop() coord.join(threads) elif MODE == "export": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): print("No checkpoint found, exit now") exit(1) # Export the model export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) elif MODE == "inference": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): print("No checkpoint found, exit now") exit(1) # Load inference test data inference_result_file_name = FLAGS.inference_result_file inference_test_file_name = FLAGS.inference_test_file inference_data = np.genfromtxt(inference_test_file_name, delimiter=",") inference_data_features = inference_data[:, 0:9] inference_data_labels = inference_data[:, 9] # Run inference start_time = datetime.datetime.now() prediction, prediction_softmax = sess.run( [inference_op, inference_softmax], feed_dict={inference_features: inference_data_features}) end_time = datetime.datetime.now() # Compute accuracy label_number = len(inference_data_labels) correct_label_number = 0 for i in range(label_number): if inference_data_labels[i] == prediction[i]: correct_label_number += 1 accuracy = float(correct_label_number) / label_number # Compute auc expected_labels = np.array(inference_data_labels) predict_labels = prediction_softmax[:, 0] fpr, tpr, thresholds = metrics.roc_curve(expected_labels, predict_labels, pos_label=0) auc = metrics.auc(fpr, tpr) print("[{}] Inference accuracy: {}, auc: {}".format( end_time - start_time, accuracy, auc)) # Save result into the file np.savetxt(inference_result_file_name, prediction, delimiter=",") print("Save result to file: {}".format(inference_result_file_name))
def doBasicsOneExportPath(self, export_path, clear_devices=False, global_step=GLOBAL_STEP, sharded=True): # Build a graph with 2 parameter nodes on different devices. tf.reset_default_graph() with tf.Session( target="", config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: # v2 is an unsaved variable derived from v0 and v1. It is used to # exercise the ability to run an init op when restoring a graph. with sess.graph.device("/cpu:0"): v0 = tf.Variable(10, name="v0") with sess.graph.device("/cpu:1"): v1 = tf.Variable(20, name="v1") v2 = tf.Variable(1, name="v2", trainable=False, collections=[]) assign_v2 = tf.assign(v2, tf.add(v0, v1)) init_op = tf.group(assign_v2, name="init_op") tf.add_to_collection("v", v0) tf.add_to_collection("v", v1) tf.add_to_collection("v", v2) global_step_tensor = tf.Variable(global_step, name="global_step") named_tensor_bindings = {"logical_input_A": v0, "logical_input_B": v1} signatures = { "foo": exporter.regression_signature(input_tensor=v0, output_tensor=v1), "generic": exporter.generic_signature(named_tensor_bindings) } asset_filepath_orig = os.path.join(tf.test.get_temp_dir(), "hello42.txt") asset_file = tf.constant(asset_filepath_orig, name="filename42") tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, asset_file) with gfile.FastGFile(asset_filepath_orig, "w") as f: f.write("your data here") assets_collection = tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS) ignored_asset = os.path.join(tf.test.get_temp_dir(), "ignored.txt") with gfile.FastGFile(ignored_asset, "w") as f: f.write("additional data here") tf.initialize_all_variables().run() # Run an export. save = tf.train.Saver({"v0": v0, "v1": v1}, restore_sequentially=True, sharded=sharded) export = exporter.Exporter(save) export.init(sess.graph.as_graph_def(), init_op=init_op, clear_devices=clear_devices, default_graph_signature=exporter.classification_signature( input_tensor=v0), named_graph_signatures=signatures, assets_collection=assets_collection) export.export(export_path, global_step_tensor, sess, exports_to_keep=gc.largest_export_versions(2)) # Restore graph. compare_def = tf.get_default_graph().as_graph_def() tf.reset_default_graph() with tf.Session( target="", config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: save = tf.train.import_meta_graph( os.path.join(export_path, constants.VERSION_FORMAT_SPECIFIER % global_step, constants.META_GRAPH_DEF_FILENAME)) self.assertIsNotNone(save) meta_graph_def = save.export_meta_graph() collection_def = meta_graph_def.collection_def # Validate custom graph_def. graph_def_any = collection_def[constants.GRAPH_KEY].any_list.value self.assertEquals(len(graph_def_any), 1) graph_def = tf.GraphDef() graph_def_any[0].Unpack(graph_def) if clear_devices: for node in compare_def.node: node.device = "" self.assertProtoEquals(compare_def, graph_def) # Validate init_op. init_ops = collection_def[constants.INIT_OP_KEY].node_list.value self.assertEquals(len(init_ops), 1) self.assertEquals(init_ops[0], "init_op") # Validate signatures. signatures_any = collection_def[constants.SIGNATURES_KEY].any_list.value self.assertEquals(len(signatures_any), 1) signatures = manifest_pb2.Signatures() signatures_any[0].Unpack(signatures) default_signature = signatures.default_signature self.assertEqual( default_signature.classification_signature.input.tensor_name, "v0:0") bindings = signatures.named_signatures["generic"].generic_signature.map self.assertEquals(bindings["logical_input_A"].tensor_name, "v0:0") self.assertEquals(bindings["logical_input_B"].tensor_name, "v1:0") read_foo_signature = ( signatures.named_signatures["foo"].regression_signature) self.assertEquals(read_foo_signature.input.tensor_name, "v0:0") self.assertEquals(read_foo_signature.output.tensor_name, "v1:0") # Validate the assets. assets_any = collection_def[constants.ASSETS_KEY].any_list.value self.assertEquals(len(assets_any), 1) asset = manifest_pb2.AssetFile() assets_any[0].Unpack(asset) assets_path = os.path.join(export_path, constants.VERSION_FORMAT_SPECIFIER % global_step, constants.ASSETS_DIRECTORY, "hello42.txt") asset_contents = gfile.GFile(assets_path).read() self.assertEqual(asset_contents, "your data here") self.assertEquals("hello42.txt", asset.filename) self.assertEquals("filename42:0", asset.tensor_binding.tensor_name) ignored_asset_path = os.path.join(export_path, constants.VERSION_FORMAT_SPECIFIER % global_step, constants.ASSETS_DIRECTORY, "ignored.txt") self.assertFalse(gfile.Exists(ignored_asset_path)) # Validate graph restoration. if sharded: save.restore(sess, os.path.join( export_path, constants.VERSION_FORMAT_SPECIFIER % global_step, constants.VARIABLES_FILENAME_PATTERN)) else: save.restore(sess, os.path.join( export_path, constants.VERSION_FORMAT_SPECIFIER % global_step, constants.VARIABLES_FILENAME)) self.assertEqual(10, tf.get_collection("v")[0].eval()) self.assertEqual(20, tf.get_collection("v")[1].eval()) tf.get_collection(constants.INIT_OP_KEY)[0].run() self.assertEqual(30, tf.get_collection("v")[2].eval())
def main(): # Get hyperparameters if FLAGS.enable_colored_log: import coloredlogs coloredlogs.install() logging.basicConfig(level=logging.INFO) FEATURE_SIZE = FLAGS.feature_size LABEL_SIZE = FLAGS.label_size EPOCH_NUMBER = FLAGS.epoch_number if EPOCH_NUMBER <= 0: EPOCH_NUMBER = None BATCH_THREAD_NUMBER = FLAGS.batch_thread_number MIN_AFTER_DEQUEUE = FLAGS.min_after_dequeue BATCH_CAPACITY = BATCH_THREAD_NUMBER * FLAGS.batch_size + MIN_AFTER_DEQUEUE MODE = FLAGS.mode MODEL = FLAGS.model OPTIMIZER = FLAGS.optimizer CHECKPOINT_PATH = FLAGS.checkpoint_path if not CHECKPOINT_PATH.startswith("fds://") and not os.path.exists( CHECKPOINT_PATH): os.makedirs(CHECKPOINT_PATH) CHECKPOINT_FILE = CHECKPOINT_PATH + "/checkpoint.ckpt" LATEST_CHECKPOINT = tf.train.latest_checkpoint(CHECKPOINT_PATH) OUTPUT_PATH = FLAGS.output_path if not OUTPUT_PATH.startswith("fds://") and not os.path.exists(OUTPUT_PATH): os.makedirs(OUTPUT_PATH) pprint.PrettyPrinter().pprint(FLAGS.__flags) # Read TFRecords files for training def read_and_decode(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) return serialized_example # Read TFRecords files for training filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.train_tfrecords_file), num_epochs=EPOCH_NUMBER) serialized_example = read_and_decode(filename_queue) batch_serialized_example = tf.train.shuffle_batch( [serialized_example], batch_size=FLAGS.batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) features = tf.parse_example(batch_serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "ids": tf.VarLenFeature(tf.int64), "values": tf.VarLenFeature(tf.float32), }) batch_labels = features["label"] batch_ids = features["ids"] batch_values = features["values"] # Read TFRecords file for validation validate_filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.validate_tfrecords_file), num_epochs=EPOCH_NUMBER) validate_serialized_example = read_and_decode(validate_filename_queue) validate_batch_serialized_example = tf.train.shuffle_batch( [validate_serialized_example], batch_size=FLAGS.validate_batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) validate_features = tf.parse_example( validate_batch_serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "ids": tf.VarLenFeature(tf.int64), "values": tf.VarLenFeature(tf.float32), }) validate_batch_labels = validate_features["label"] validate_batch_ids = validate_features["ids"] validate_batch_values = validate_features["values"] # Define the model input_units = FEATURE_SIZE output_units = LABEL_SIZE model_network_hidden_units = [int(i) for i in FLAGS.model_network.split()] def full_connect(inputs, weights_shape, biases_shape, is_train=True): with tf.device("/cpu:0"): weights = tf.get_variable("weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable("biases", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.matmul(inputs, weights) + biases if FLAGS.enable_bn and is_train: mean, var = tf.nn.moments(layer, axes=[0]) scale = tf.get_variable("scale", biases_shape, initializer=tf.random_normal_initializer()) shift = tf.get_variable("shift", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.nn.batch_normalization(layer, mean, var, shift, scale, FLAGS.bn_epsilon) return layer def sparse_full_connect(sparse_ids, sparse_values, weights_shape, biases_shape, is_train=True): weights = tf.get_variable("weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable("biases", biases_shape, initializer=tf.random_normal_initializer()) return tf.nn.embedding_lookup_sparse( weights, sparse_ids, sparse_values, combiner="sum") + biases def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True): return tf.nn.relu(full_connect(inputs, weights_shape, biases_shape, is_train)) def customized_inference(sparse_ids, sparse_values, is_train=True): hidden1_units = 128 hidden2_units = 32 hidden3_units = 8 with tf.variable_scope("input"): sparse_layer = sparse_full_connect(sparse_ids, sparse_values, [input_units, hidden1_units], [hidden1_units], is_train) layer = tf.nn.relu(sparse_layer) with tf.variable_scope("layer0"): layer = full_connect_relu(layer, [hidden1_units, hidden2_units], [hidden2_units], is_train) with tf.variable_scope("layer1"): layer = full_connect_relu(layer, [hidden2_units, hidden3_units], [hidden3_units], is_train) if FLAGS.enable_dropout and is_train: layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob) with tf.variable_scope("output"): layer = full_connect(layer, [hidden3_units, output_units], [output_units], is_train) return layer def dnn_inference(sparse_ids, sparse_values, is_train=True): with tf.variable_scope("input"): sparse_layer = sparse_full_connect( sparse_ids, sparse_values, [input_units, model_network_hidden_units[0]], [model_network_hidden_units[0]], is_train) layer = tf.nn.relu(sparse_layer) for i in range(len(model_network_hidden_units) - 1): with tf.variable_scope("layer{}".format(i)): layer = full_connect_relu(layer, [ model_network_hidden_units[i], model_network_hidden_units[i + 1] ], [model_network_hidden_units[i + 1]], is_train) with tf.variable_scope("output"): layer = full_connect(layer, [model_network_hidden_units[-1], output_units], [output_units], is_train) return layer def lr_inference(sparse_ids, sparse_values, is_train=True): with tf.variable_scope("logistic_regression"): layer = sparse_full_connect(sparse_ids, sparse_values, [input_units, output_units], [output_units]) return layer def wide_and_deep_inference(sparse_ids, sparse_values, is_train=True): return lr_inference(sparse_ids, sparse_values, is_train) + dnn_inference( sparse_ids, sparse_values, is_train) def inference(sparse_ids, sparse_values, is_train=True): if MODEL == "dnn": return dnn_inference(sparse_ids, sparse_values, is_train) elif MODEL == "lr": return lr_inference(sparse_ids, sparse_values, is_train) elif MODEL == "wide_and_deep": return wide_and_deep_inference(sparse_ids, sparse_values, is_train) elif MODEL == "customized": return customized_inference(sparse_ids, sparse_values, is_train) else: logging.error("Unknown model, exit now") exit(1) logging.info("Use the model: {}, model network: {}".format( MODEL, FLAGS.model_network)) logits = inference(batch_ids, batch_values, True) batch_labels = tf.to_int64(batch_labels) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=batch_labels) loss = tf.reduce_mean(cross_entropy, name="loss") global_step = tf.Variable(0, name="global_step", trainable=False) if FLAGS.enable_lr_decay: logging.info("Enable learning rate decay rate: {}".format( FLAGS.lr_decay_rate)) starter_learning_rate = FLAGS.learning_rate learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100000, FLAGS.lr_decay_rate, staircase=True) else: learning_rate = FLAGS.learning_rate optimizer = get_optimizer(FLAGS.optimizer, learning_rate) train_op = optimizer.minimize(loss, global_step=global_step) tf.get_variable_scope().reuse_variables() # Define accuracy op for train data train_accuracy_logits = inference(batch_ids, batch_values, False) train_softmax = tf.nn.softmax(train_accuracy_logits) train_correct_prediction = tf.equal( tf.argmax(train_softmax, 1), batch_labels) train_accuracy = tf.reduce_mean(tf.cast(train_correct_prediction, tf.float32)) # Define auc op for train data batch_labels = tf.cast(batch_labels, tf.int32) sparse_labels = tf.reshape(batch_labels, [-1, 1]) derived_size = tf.shape(batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(axis=1, values=[indices, sparse_labels]) outshape = tf.stack([derived_size, LABEL_SIZE]) new_train_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax, new_train_batch_labels) # Define accuracy op for validate data validate_accuracy_logits = inference(validate_batch_ids, validate_batch_values, False) validate_softmax = tf.nn.softmax(validate_accuracy_logits) validate_batch_labels = tf.to_int64(validate_batch_labels) validate_correct_prediction = tf.equal( tf.argmax(validate_softmax, 1), validate_batch_labels) validate_accuracy = tf.reduce_mean(tf.cast(validate_correct_prediction, tf.float32)) # Define auc op for validate data validate_batch_labels = tf.cast(validate_batch_labels, tf.int32) sparse_labels = tf.reshape(validate_batch_labels, [-1, 1]) derived_size = tf.shape(validate_batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(axis=1, values=[indices, sparse_labels]) outshape = tf.stack([derived_size, LABEL_SIZE]) new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, validate_auc = tf.contrib.metrics.streaming_auc(validate_softmax, new_validate_batch_labels) # Define inference op sparse_index = tf.placeholder(tf.int64, [None, 2]) sparse_ids = tf.placeholder(tf.int64, [None]) sparse_values = tf.placeholder(tf.float32, [None]) sparse_shape = tf.placeholder(tf.int64, [2]) inference_ids = tf.SparseTensor(sparse_index, sparse_ids, sparse_shape) inference_values = tf.SparseTensor(sparse_index, sparse_values, sparse_shape) inference_logits = inference(inference_ids, inference_values, False) inference_softmax = tf.nn.softmax(inference_logits) inference_op = tf.argmax(inference_softmax, 1) keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) model_signature = { "inputs": exporter.generic_signature({"keys": keys_placeholder, "indexs": sparse_index, "ids": sparse_ids, "values": sparse_values, "shape": sparse_shape}), "outputs": exporter.generic_signature({"keys": keys, "softmax": inference_softmax, "prediction": inference_op}) } # Initialize saver and summary saver = tf.train.Saver() tf.summary.scalar("loss", loss) tf.summary.scalar("train_accuracy", train_accuracy) tf.summary.scalar("train_auc", train_auc) tf.summary.scalar("validate_accuracy", validate_accuracy) tf.summary.scalar("validate_auc", validate_auc) summary_op = tf.summary.merge_all() init_op = [tf.global_variables_initializer(), tf.local_variables_initializer( )] # Create session to run with tf.Session() as sess: logging.info("Start to run with mode: {}".format(MODE)) writer = tf.summary.FileWriter(OUTPUT_PATH, sess.graph) sess.run(init_op) if MODE == "train": # Restore session and start queue runner restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = datetime.datetime.now() try: while not coord.should_stop(): _, loss_value, step = sess.run([train_op, loss, global_step]) # Print state while training if step % FLAGS.steps_to_validate == 0: train_accuracy_value, train_auc_value, validate_accuracy_value, auc_value, summary_value = sess.run( [train_accuracy, train_auc, validate_accuracy, validate_auc, summary_op]) end_time = datetime.datetime.now() logging.info( "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}".format( end_time - start_time, step, loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, auc_value)) writer.add_summary(summary_value, step) saver.save(sess, CHECKPOINT_FILE, global_step=step) start_time = end_time except tf.errors.OutOfRangeError: # Export the model after training export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) finally: coord.request_stop() coord.join(threads) elif MODE == "export": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) # Export the model export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) elif MODE == "savedmodel": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) logging.info("Export the saved model to {}".format( FLAGS.saved_model_path)) export_path_base = FLAGS.saved_model_path export_path = os.path.join( compat.as_bytes(export_path_base), compat.as_bytes(str(FLAGS.model_version))) model_signature = signature_def_utils.build_signature_def( inputs={ "keys": utils.build_tensor_info(keys_placeholder), "indexs": utils.build_tensor_info(sparse_index), "ids": utils.build_tensor_info(sparse_ids), "values": utils.build_tensor_info(sparse_values), "shape": utils.build_tensor_info(sparse_shape) }, outputs={ "keys": utils.build_tensor_info(keys), "softmax": utils.build_tensor_info(inference_softmax), "prediction": utils.build_tensor_info(inference_op) }, method_name=signature_constants.PREDICT_METHOD_NAME) try: builder = saved_model_builder.SavedModelBuilder(export_path) builder.add_meta_graph_and_variables( sess, [tag_constants.SERVING], clear_devices=True, signature_def_map={ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: model_signature, }, #legacy_init_op=legacy_init_op) legacy_init_op=tf.group(tf.initialize_all_tables(), name="legacy_init_op")) builder.save() except Exception as e: logging.error("Fail to export saved model, exception: {}".format(e)) elif MODE == "inference": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) # Load inference test data inference_result_file_name = "./inference_result.txt" inference_test_file_name = "./data/a8a_test.libsvm" labels = [] feature_ids = [] feature_values = [] feature_index = [] ins_num = 0 for line in open(inference_test_file_name, "r"): tokens = line.split(" ") labels.append(int(tokens[0])) feature_num = 0 for feature in tokens[1:]: feature_id, feature_value = feature.split(":") feature_ids.append(int(feature_id)) feature_values.append(float(feature_value)) feature_index.append([ins_num, feature_num]) feature_num += 1 ins_num += 1 # Run inference start_time = datetime.datetime.now() prediction, prediction_softmax = sess.run( [inference_op, inference_softmax], feed_dict={sparse_index: feature_index, sparse_ids: feature_ids, sparse_values: feature_values, sparse_shape: [ins_num, FEATURE_SIZE]}) end_time = datetime.datetime.now() # Compute accuracy label_number = len(labels) correct_label_number = 0 for i in range(label_number): if labels[i] == prediction[i]: correct_label_number += 1 accuracy = float(correct_label_number) / label_number # Compute auc expected_labels = np.array(labels) predict_labels = prediction_softmax[:, 0] fpr, tpr, thresholds = metrics.roc_curve(expected_labels, predict_labels, pos_label=0) auc = metrics.auc(fpr, tpr) logging.info("[{}] Inference accuracy: {}, auc: {}".format( end_time - start_time, accuracy, auc)) # Save result into the file np.savetxt(inference_result_file_name, prediction_softmax, delimiter=",") logging.info("Save result to file: {}".format( inference_result_file_name))