def _call_train( self, _sentinel=None, # pylint: disable=invalid-name, input_fn=None, steps=None, hooks=None, max_steps=None, saving_listeners=None): if _sentinel is not None: raise ValueError("_call_train should be called with keyword args only") # Estimator in core cannot work with monitors. We need to convert them # to hooks. For Estimator in contrib, it is converted internally. So, it is # safe to convert for both cases. hooks = monitors.replace_monitors_with_hooks(hooks, self._estimator) if self._core_estimator_used: return self._estimator.train( input_fn=input_fn, steps=steps, max_steps=max_steps, hooks=hooks, saving_listeners=saving_listeners) else: return self._estimator.fit( input_fn=input_fn, steps=steps, max_steps=max_steps, monitors=hooks)
def _call_train( self, _sentinel=None, # pylint: disable=invalid-name, input_fn=None, steps=None, hooks=None, max_steps=None): if _sentinel is not None: raise ValueError( "_call_train should be called with keyword args only") # Estimator in core cannot work with monitors. We need to convert them # to hooks. For Estimator in contrib, it is converted internally. So, it is # safe to convert for both cases. hooks = monitors.replace_monitors_with_hooks(hooks, self._estimator) if self._core_estimator_used: return self._estimator.train(input_fn=input_fn, steps=steps, max_steps=max_steps, hooks=hooks) else: return self._estimator.fit(input_fn=input_fn, steps=steps, max_steps=max_steps, monitors=hooks)
def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None): """See trainable.Trainable.""" # TODO(roumposg): Remove when deprecated monitors are removed. hooks = monitor_lib.replace_monitors_with_hooks(monitors, self) self._estimator.fit( x=x, y=y, input_fn=input_fn, steps=steps, batch_size=batch_size, monitors=hooks, max_steps=max_steps ) return self
def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None): """See trainable.Trainable.""" hooks = monitor_lib.replace_monitors_with_hooks(monitors, self) self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps, batch_size=batch_size, monitors=hooks, max_steps=max_steps) return self
def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None): """See trainable.Trainable.""" # TODO(roumposg): Remove when deprecated monitors are removed. hooks = monitor_lib.replace_monitors_with_hooks(monitors, self) self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps, batch_size=batch_size, monitors=hooks, max_steps=max_steps) return self
def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None): """See trainable.Trainable. Note: Labels must be integer class indices.""" # TODO(roumposg): Remove when deprecated monitors are removed. hooks = monitor_lib.replace_monitors_with_hooks(monitors, self) if self._additional_run_hook: hooks.append(self._additional_run_hook) self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps, batch_size=batch_size, monitors=hooks, max_steps=max_steps) return self
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = {"ner": NerProcessor} if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list) + 1, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") filed_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) #lms_hook = LMSSessionRunHook({'optimizer_hook'}) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) eval_examples = processor.get_dev_examples(FLAGS.data_dir) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") filed_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) eval_steps = None if FLAGS.use_tpu: eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) list_of_monitors_and_hooks = [ tf.contrib.learn.monitors.ValidationMonitor(input_fn=eval_input_fn, eval_steps=eval_steps, every_n_steps=500) ] hooks = monitor_lib.replace_monitors_with_hooks( list_of_monitors_and_hooks, estimator) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps, hooks=hooks) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") filed_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) eval_steps = None if FLAGS.use_tpu: eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with open(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: token_path = os.path.join(FLAGS.output_dir, "token_test.txt") with open(os.path.join(FLAGS.output_dir, 'label2id.pkl'), 'rb') as rf: label2id = pickle.load(rf) id2label = {value: key for key, value in label2id.items()} if os.path.exists(token_path): os.remove(token_path) predict_examples = processor.get_test_examples(FLAGS.data_dir) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") filed_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file, mode="test") tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d", len(predict_examples)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) if FLAGS.use_tpu: # Warning: According to tpu_estimator.py Prediction on TPU is an # experimental feature and hence not supported here raise ValueError("Prediction in TPU not supported") predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) #result = estimator.predict(input_fn=predict_input_fn) #output_predict_file = os.path.join(FLAGS.output_dir, "label_test.txt") #with open(output_predict_file,'w') as writer: # for prediction in result: # output_line = "\n".join(id2label[id] for id in prediction if id!=0) + "\n" # writer.write(output_line) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "label_test.txt") with open(output_predict_file, 'w') as writer: writer.write("word gold_label predict_label" + "\n") for ppi, rr in enumerate(result): tf.logging.info(rr) prediction = rr["predictions"] gold_label = rr["gold_label"] origin_id = rr["origin_id"] origin_words = tokenizer.convert_ids_to_tokens(origin_id) #origin_words=[] #for x in predict_examples[ppi].text.split(' '): # origin_words.extend(tokenizer.tokenize(x)) output_line = predict_examples[ppi].text + "\n" # print(len(origin_words),len(prediction[[i for i, e in enumerate(prediction) if e != 0]])) for idi, id in enumerate(prediction[[ i for i, e in enumerate(prediction) if e != 0 ]][1:-1]): output_line = output_line + origin_words[ idi + 1] + "__" + id2label[gold_label[ idi + 1]] + "__" + id2label[id] + "\n" writer.write(output_line + "\n")
def main(model_dir='/tmp/mnist_networks/Q4'): params = { # Q1b 'batch_size': 100, # Q1c 'steps': 5000, # Q1d 'learning_rate': 0.01, # Q1e 'iter_prints': 250, # Q1f / Q5b 'optimizer': 'sgb', # Q2 'input_layer_dim': [-1, 28, 28, 1], 'conv_dim': [5, 5], 'pool_dim': [2, 2], 'dense_units': 1024, # Q3 'dropout_rate': 0.4, # Q4a 'valid_prec': 0.2, # Q4b 'early_stopping_rounds': 3, # Q5a # 'lr_reduce_every_n' : 400, # 'lr_reduce_by' : 0.5 } # Q1a - Load training and eval data mnist = tf.contrib.learn.datasets.load_dataset("mnist") train_data = mnist.train.images # Returns np.array train_labels = np.asarray(mnist.train.labels, dtype=np.int32) eval_data = mnist.test.images # Returns np.array eval_labels = np.asarray(mnist.test.labels, dtype=np.int32) # Q4a valid_len = int((len(train_data) + len(eval_data)) * params['valid_prec']) ind = np.arange(eval_data.shape[0]) np.random.shuffle(ind) train_ind, valid_ind = ind[:len(ind) - valid_len], ind[len(ind) - valid_len:] train_data, train_labels, valid_data, valid_labels = train_data[train_ind], train_labels[train_ind],\ train_data[valid_ind], train_labels[valid_ind] # Create the Estimator mnist_classifier = tf.estimator.Estimator( model_fn=cnn_model_fn, params=params, config=tf.contrib.learn.RunConfig(save_checkpoints_steps=1, save_summary_steps=250), model_dir=model_dir) # Train the model train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": train_data}, y=train_labels, batch_size=params['batch_size'], num_epochs=None, shuffle=True) # Q4a valid_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": valid_data}, y=valid_labels, batch_size=params['batch_size'], num_epochs=1, shuffle=False) # Q4b validation_monitor = ValidationMonitor( every_n_steps=1, input_fn=valid_input_fn, early_stopping_metric="loss", early_stopping_metric_minimize=True, early_stopping_rounds=params['early_stopping_rounds']) list_of_monitors_and_hooks = [validation_monitor] hooks = replace_monitors_with_hooks(list_of_monitors_and_hooks, mnist_classifier) mnist_classifier.train(input_fn=train_input_fn, steps=params['steps'], hooks=hooks) # Q1f Evaluate the model and print results eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": eval_data}, y=eval_labels, num_epochs=1, shuffle=False) eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn) print(eval_results)
def _train_model(self, input_fn, steps, feed_fn=None, init_op=None, init_feed_fn=None, init_fn=None, device_fn=None, monitors=None, log_every_steps=100, fail_on_nan_loss=True, max_steps=None): # TODO(wicke): Remove this once Model and associated code are gone. if hasattr(self._config, 'execution_mode'): if self._config.execution_mode not in ('all', 'train'): return # Stagger startup of worker sessions based on task id. sleep_secs = min( self._config.training_worker_max_startup_secs, self._config.task * self._config.training_worker_session_startup_stagger_secs) if sleep_secs: logging.info('Waiting %d secs before starting task %d.', sleep_secs, self._config.task) time.sleep(sleep_secs) # Device allocation device_fn = device_fn or self._device_fn self._graph = ops.Graph() with self._graph.as_default() as g, g.device(device_fn): random_seed.set_random_seed(self._config.tf_random_seed) global_step = contrib_framework.create_global_step(g) features, labels = input_fn() self._check_inputs(features, labels) # The default return type of _get_train_ops is ModelFnOps. But there are # some subclasses of tf.contrib.learn.Estimator which override this # method and use the legacy signature, namely _get_train_ops returns a # (train_op, loss) tuple. The following else-statement code covers these # cases, but will soon be deleted after the subclasses are updated. # TODO(b/32664904): Update subclasses and delete the else-statement. train_ops = self._get_train_ops(features, labels) if isinstance(train_ops, ModelFnOps): # Default signature train_op = train_ops.train_op loss_op = train_ops.loss else: # Legacy signature if len(train_ops) != 2: raise ValueError('Expected a tuple of train_op and loss, got {}'. format(train_ops)) train_op = train_ops[0] loss_op = train_ops[1] hooks = monitor_lib.replace_monitors_with_hooks(monitors, self) ops.add_to_collection(ops.GraphKeys.LOSSES, loss_op) return graph_actions._monitored_train( # pylint: disable=protected-access graph=g, output_dir=self._model_dir, train_op=train_op, loss_op=loss_op, global_step_tensor=global_step, init_op=init_op, init_feed_dict=init_feed_fn() if init_feed_fn is not None else None, init_fn=init_fn, log_every_steps=log_every_steps, supervisor_is_chief=self.config.is_chief, supervisor_master=self._config.master, supervisor_save_model_secs=self._config.save_checkpoints_secs, supervisor_save_model_steps=self._config.save_checkpoints_steps, supervisor_save_summaries_steps=self._config.save_summary_steps, keep_checkpoint_max=self._config.keep_checkpoint_max, feed_fn=feed_fn, steps=steps, fail_on_nan_loss=fail_on_nan_loss, hooks=hooks, max_steps=max_steps)
def _train_model(self, input_fn, steps, feed_fn=None, init_op=None, init_feed_fn=None, init_fn=None, device_fn=None, monitors=None, log_every_steps=100, fail_on_nan_loss=True, max_steps=None): # TODO(wicke): Remove this once Model and associated code are gone. if hasattr(self._config, 'execution_mode'): if self._config.execution_mode not in ('all', 'train'): return # Stagger startup of worker sessions based on task id. sleep_secs = min( self._config.training_worker_max_startup_secs, self._config.task_id * self._config.training_worker_session_startup_stagger_secs) if sleep_secs: logging.info('Waiting %d secs before starting task %d.', sleep_secs, self._config.task_id) time.sleep(sleep_secs) # Device allocation device_fn = device_fn or self._device_fn self._graph = ops.Graph() with self._graph.as_default() as g, g.device(device_fn): random_seed.set_random_seed(self._config.tf_random_seed) global_step = contrib_framework.create_global_step(g) features, labels = input_fn() self._check_inputs(features, labels) # The default return type of _get_train_ops is ModelFnOps. But there are # some subclasses of tf.contrib.learn.Estimator which override this # method and use the legacy signature, namely _get_train_ops returns a # (train_op, loss) tuple. The following else-statement code covers these # cases, but will soon be deleted after the subclasses are updated. # TODO(b/32664904): Update subclasses and delete the else-statement. train_ops = self._get_train_ops(features, labels) if isinstance(train_ops, model_fn_lib.ModelFnOps): # Default signature train_op = train_ops.train_op loss_op = train_ops.loss else: # Legacy signature if len(train_ops) != 2: raise ValueError('Expected a tuple of train_op and loss, got {}'. format(train_ops)) train_op = train_ops[0] loss_op = train_ops[1] hooks = monitor_lib.replace_monitors_with_hooks(monitors, self) ops.add_to_collection(ops.GraphKeys.LOSSES, loss_op) return graph_actions._monitored_train( # pylint: disable=protected-access graph=g, output_dir=self._model_dir, train_op=train_op, loss_op=loss_op, global_step_tensor=global_step, init_op=init_op, init_feed_dict=init_feed_fn() if init_feed_fn is not None else None, init_fn=init_fn, log_every_steps=log_every_steps, supervisor_is_chief=self.config.is_chief, supervisor_master=self._config.master, supervisor_save_model_secs=self._config.save_checkpoints_secs, supervisor_save_model_steps=self._config.save_checkpoints_steps, supervisor_save_summaries_steps=self._config.save_summary_steps, keep_checkpoint_max=self._config.keep_checkpoint_max, feed_fn=feed_fn, steps=steps, fail_on_nan_loss=fail_on_nan_loss, hooks=hooks, max_steps=max_steps)
def trainDNNTF2(A, Cl, A_test, Cl_test, Root): printInfo() import tensorflow as tf import tensorflow.contrib.learn as skflow from sklearn import preprocessing from tensorflow.contrib.learn.python.learn import monitors as monitor_lib if dnntfDef.logCheckpoint == True: tf.logging.set_verbosity(tf.logging.INFO) if dnntfDef.alwaysRetrain == False: model_directory = Root + "/DNN-TF_" + str(len( dnntfDef.hidden_layers)) + "HL_" + str(dnntfDef.hidden_layers[0]) print("\n Training model saved in: ", model_directory, "\n") else: dnntfDef.alwaysImprove = True model_directory = None print("\n Training model not saved\n") #********************************************** ''' Initialize Estimator and training data ''' #********************************************** print(' Initializing TensorFlow...') tf.reset_default_graph() totA = np.vstack((A, A_test)) totCl = np.append(Cl, Cl_test) numTotClasses = np.unique(totCl).size le = preprocessing.LabelEncoder() totCl2 = le.fit_transform(totCl) Cl2 = le.transform(Cl) Cl2_test = le.transform(Cl_test) train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": np.array(A)}, y=np.array(Cl2), num_epochs=None, shuffle=dnntfDef.shuffleTrain) test_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": np.array(A_test)}, y=np.array(Cl2_test), num_epochs=1, shuffle=dnntfDef.shuffleTest) validation_monitor = [ skflow.monitors.ValidationMonitor( input_fn=test_input_fn, eval_steps=1, every_n_steps=dnntfDef.valMonitorSecs) ] feature_columns = [ tf.feature_column.numeric_column("x", shape=[totA.shape[1]]) ] #********************************************** ''' Define learning rate ''' #********************************************** if dnntfDef.learning_rate_decay == False: learning_rate = dnntfDef.learning_rate else: learning_rate = tf.train.exponential_decay( dnntfDef.learning_rate, tf.Variable(0, trainable=False), dnntfDef.learning_rate_decay_steps, dnntfDef.learning_rate_decay_rate, staircase=True) clf = tf.estimator.DNNClassifier( feature_columns=feature_columns, hidden_units=dnntfDef.hidden_layers, optimizer=dnntfDef.optimizer, n_classes=numTotClasses, activation_fn=dnntfDef.activationFn, model_dir=model_directory, config=tf.estimator.RunConfig().replace( save_summary_steps=dnntfDef.timeCheckpoint), dropout=dnntfDef.dropout_perc) hooks = monitor_lib.replace_monitors_with_hooks(validation_monitor, clf) #********************************************** ''' Define parameters for savedmodel ''' #********************************************** feature_spec = {'x': tf.FixedLenFeature([numTotClasses], tf.float32)} def serving_input_receiver_fn(): serialized_tf_example = tf.placeholder(dtype=tf.string, shape=[None], name='input_tensors') receiver_tensors = {'inputs': serialized_tf_example} features = tf.parse_example(serialized_tf_example, feature_spec) return tf.estimator.export.ServingInputReceiver( features, receiver_tensors) print("\n Number of global steps:", dnntfDef.trainingSteps) #********************************************** ''' Train ''' #********************************************** if dnntfDef.alwaysImprove == True or os.path.exists( model_directory) is False: print(" (Re-)training using dataset: ", Root, "\n") clf.train(input_fn=train_input_fn, steps=dnntfDef.trainingSteps, hooks=hooks) print(" Exporting savedmodel in: ", Root, "\n") clf.export_savedmodel(model_directory, serving_input_receiver_fn) else: print(" Retreaving training model from: ", model_directory, "\n") accuracy_score = clf.evaluate(input_fn=test_input_fn, steps=1) printInfo() print('\n ==================================') print(' \033[1mtf.DNNCl\033[0m - Accuracy') print(' ==================================') print("\n Accuracy: {:.2f}%".format(100 * accuracy_score["accuracy"])) print(" Loss: {:.2f}".format(accuracy_score["loss"])) print(" Global step: {:.2f}\n".format(accuracy_score["global_step"])) print(' ==================================\n') return clf, le