def __init__(self, embedding_dim): super(DNN_Encoder, self).__init__() self.embedding_dim = embedding_dim self.fc1 = tf.keras.layers.Dense(embedding_dim * 2) self.fc2 = tf.keras.layers.Dense(embedding_dim) self.checkpoint = tfe.Checkpoint( optimizer=tf.train.AdamOptimizer(), encoder=self, optimizer_step=tf.train.get_or_create_global_step())
def __init__(self, model, checkpoint_path): self.model = model # self.learning_rate = tfe.Variable(0.001) # self.optimizer = tf.train.AdamOptimizer(self.learning_rate) # self.global_step = tf.train.get_or_create_global_step() # restore checkpoint = tfe.Checkpoint(model=self.model.net) checkpoint.restore(tf.train.latest_checkpoint(checkpoint_path))
def predict(self): tf.enable_eager_execution() pre_data = Datasets(FLAGS.predictpath) sys.stderr.write( "pre_data.predice--------- :%s\n"% (pre_data.predict) ) outputlist = [] train_data = _divide_into_batches(pre_data.predict, 1) learning_rate = tf.contrib.eager.Variable(0.00001, name="learning_rate") model = LSTMModel( pre_data.vocab_size(), FLAGS.embedding_dim, FLAGS.hidden_dim, FLAGS.num_layers, 0, False,0) #optimizer = tf.train.GradientDescentOptimizer(learning_rate) optimizer = tf.train.AdamOptimizer( learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False, name='Adam' ) #checkpoint = tf.train.Checkpoint(model=model) checkpoint = tfe.Checkpoint(optimizer=optimizer, model=model, optimizer_step=tf.train.get_or_create_global_step()) checkpoint.restore(tf.train.latest_checkpoint(FLAGS.logdir)) sys.stderr.write( "train_data--------------------------- :%s \n"% train_data ) #inp, target = _get_batch(train_data, 0, FLAGS.seq_len) inputs = train_data[0:train_data.shape[0], :] inp = tf.constant(inputs) sys.stderr.write( "inp------------------- :%s \n"% inp ) out = model(inp, training=False) sys.stderr.write( "out------------------- :%s \n"% out ) pred_class_index=tf.argmax(out, 1,output_type=tf.int64).numpy() sys.stderr.write( "pred_class_index :%s \n"% pred_class_index ) for i in pred_class_index: sys.stderr.write( "pred_num------------------index:%d- :%s \n"% (i,pre_data.get_key(i)) ) a,b = out.shape lats_predict = out[a-1] sys.stderr.write( "out[%d]-------------- :%s \n"% (a,out[a-1]) ) for i in range(3): max_operator=tf.argmax(lats_predict, 0,output_type=tf.int64).numpy() sys.stderr.write( "i th:%d-----operator: %d------nums :%s \n"% (i,max_operator,pre_data.get_key(max_operator)) ) part1 = lats_predict[:max_operator] part2 = lats_predict[max_operator+1:] val = tf.constant([-1.]) lats_predict = tf.concat([part1,val,part2], axis=0)
def dice_eval(self, dataset_validation, checkpoint_path): self.checkpoint = tfe.Checkpoint(model=self.model.net) self.checkpoint.restore(checkpoint_path) v_size = dataset_validation.size() dice = [] for i in range(v_size): sub_x, sub_y, sub_mask = dataset_validation(1) feed_dict = {'x': sub_x, 'y': sub_y, 'mask': sub_mask} sub_dice = self.model.evaluation_dice(feed_dict)[1:] dice.append(sub_dice) return np.array(dice)
def __init__(self, embedding_dim, units, vocab_size): super(RNN_Decoder, self).__init__() self.embedding_dim = embedding_dim self.units = units self.vocab_size = vocab_size self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) self.gru = gru(self.units) self.fc1 = tf.keras.layers.Dense(self.units) self.fc2 = tf.keras.layers.Dense(vocab_size) self.checkpoint = tfe.Checkpoint( optimizer=tf.train.AdamOptimizer(), decoder=self, optimizer_step=tf.train.get_or_create_global_step())
def main(_): tf.enable_eager_execution() if not FLAGS.data_path: raise ValueError("Must specify --data-path") corpus = Datasets(FLAGS.data_path) train_data = _divide_into_batches(corpus.train, FLAGS.batch_size) eval_data = _divide_into_batches(corpus.valid, 10) have_gpu = tfe.num_gpus() > 0 use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu with tf.device("/device:GPU:0" if have_gpu else None): # Make learning_rate a Variable so it can be included in the checkpoint # and we can resume training with the last saved learning_rate. learning_rate = tfe.Variable(20.0, name="learning_rate") model = PTBModel(corpus.vocab_size(), FLAGS.embedding_dim, FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout, use_cudnn_rnn) optimizer = tf.train.GradientDescentOptimizer(learning_rate) checkpoint = tfe.Checkpoint( learning_rate=learning_rate, model=model, # GradientDescentOptimizer has no state to checkpoint, but noting it # here lets us swap in an optimizer that does. optimizer=optimizer) # Restore existing variables now (learning_rate), and restore new variables # on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(FLAGS.logdir)) sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy()) best_loss = None for _ in range(FLAGS.epoch): train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip) eval_loss = evaluate(model, eval_data) if not best_loss or eval_loss < best_loss: if FLAGS.logdir: checkpoint.save(os.path.join(FLAGS.logdir, "ckpt")) best_loss = eval_loss else: learning_rate.assign(learning_rate / 4.0) sys.stderr.write( "eval_loss did not reduce in this epoch, " "changing learning rate to %f for the next epoch\n" % learning_rate.numpy())
def pseudo_generation(self, dataset, checkpoint_path, weight=1.0, print_i=False): self.checkpoint = tfe.Checkpoint(model=self.model.net) self.checkpoint.restore(checkpoint_path) d_size = dataset.size() probs = None for i in range(d_size): if print_i and (i + 1) % 10 == 0: print(i) x, _, _ = dataset(1) sub_prob = self.model.predict(x) if probs is None: probs = sub_prob else: probs = np.concatenate((probs, sub_prob), axis=0) probs = probs * weight return probs
def results_eval(self, dataset_validation, checkpoint_path): self.checkpoint = tfe.Checkpoint(model=self.model.net) self.checkpoint.restore(checkpoint_path) v_size = dataset_validation.size() eval_results = {} eval_results.update({'dice': []}) eval_results.update({'iou': []}) eval_results.update({'acc': []}) eval_results.update({'sensitivity': []}) eval_results.update({'specificity': []}) for i in range(v_size): sub_x, sub_y, sub_mask = dataset_validation(1) feed_dict = {'x': sub_x, 'y': sub_y, 'mask': sub_mask} results = self.model.evaluation(feed_dict) eval_results['dice'].append(results['dice']) eval_results['iou'].append(results['iou']) eval_results['acc'].append(results['acc']) eval_results['sensitivity'].append(results['sensitivity']) eval_results['specificity'].append(results['specificity']) return eval_results
def main(_): tf.enable_eager_execution() if not FLAGS.data_path: raise ValueError("Must specify --data-path") corpus = Datasets(FLAGS.data_path) train_data = _divide_into_batches(corpus.train, FLAGS.batch_size) eval_data = _divide_into_batches(corpus.valid, 10) have_gpu = tfe.num_gpus() > 0 use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu with tf.device("/device:GPU:0" if have_gpu else None): # Make learning_rate a Variable so it can be included in the checkpoint # and we can resume training with the last saved learning_rate. learning_rate = tf.contrib.eager.Variable(0.001, name="learning_rate") model = LSTMModel( corpus.vocab_size(), FLAGS.embedding_dim, FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout, use_cudnn_rnn,0.5) #optimizer = tf.train.GradientDescentOptimizer(learning_rate) optimizer = tf.train.AdamOptimizer( learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False, name='Adam' ) checkpoint = tfe.Checkpoint(optimizer=optimizer, model=model, optimizer_step=tf.train.get_or_create_global_step()) ''' = tf.train.Checkpoint( learning_rate=learning_rate, model=model, # GradientDescentOptimizer has no state to checkpoint, but noting it # here lets us swap in an optimizer that does. optimizer=optimizer) ''' # Restore existing variables now (learning_rate), and restore new variables # on creation if a checkpoint exists. checkpoint.restore(tf.train.latest_checkpoint(FLAGS.logdir)) sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy()) best_loss = None best_accuracy = 0.0 writer = tf.contrib.summary.create_file_writer(FLAGS.logdir) global_step=tf.train.get_or_create_global_step() # return global step var writer.set_as_default() for _ in range(FLAGS.epoch): train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip) eval_loss,eval_accuracy = evaluate(model, eval_data) global_step.assign_add(1) with tf.contrib.summary.record_summaries_every_n_global_steps(1): tf.contrib.summary.scalar('epoch_acc', eval_accuracy) tf.contrib.summary.scalar('epoch_loss',eval_loss) if not best_loss or eval_loss < best_loss or eval_accuracy > best_accuracy: if FLAGS.logdir: checkpoint.save(os.path.join(FLAGS.logdir, "ckpt")) best_loss = eval_loss best_accuracy = eval_accuracy ''' sys.stderr.write( "model.variables:%s"% (model.variables)) sess = tf.Session() saver = tf.train.Saver(tf.global_variables()) model_path = "/tmp/tf/" save_path = saver.save(sess,model_path) ''' else: learning_rate.assign(learning_rate * 0.95) sys.stderr.write("eval_loss did not reduce in this epoch, " "changing learning rate to %f for the next epoch\n" % learning_rate.numpy()) sys.stderr.write( "one epoch,best_loss: :%f \n"% (best_loss))
def main(): # Logging split. print('\n{}'.format(60 * '-')) # Load data & split into training & testing sets. train, test = load_data() X_train, y_train = train X_test, y_test = test # Number of training/testing samples. n_train, n_test = y_train.shape[0], y_test.shape[0] print('{:,} train samples\t&\t{:,} testing samples' .format(n_train, n_test)) # Image dimensions. img_shape = X_train.shape[1:] img_size, img_depth = img_shape[0], 1 img_size_flat = img_size * img_size * img_depth print("Image = Shape: {}\tSize: {}\tDepth: {}\tFlat: {}" .format(img_shape, img_size, img_depth, img_size_flat)) # Output dimensions. classes = np.unique(y_train) num_classes = len(classes) print('Labels = Classes: {}\tLength: {}'.format(classes, num_classes)) # Logging split. print('{}\n'.format(60 * '-')) X_train, y_train = pre_process(X_train, y_train) # X_test, y_test = pre_process(X_test, y_test) data_train = process_data(X_train, y_train, batch_size=128, buffer_size=1000) # data_test = process_data(X_test, y_test, # batch_size=68, buffer_size=1000) epochs = 5 save_path = './saved/mnist-eager/model' save_step = 500 learning_rate = 1e-2 model = Model() optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) global_step = tf.train.get_or_create_global_step() saver = tfe.Checkpoint(model=model, optimizer=optimizer, global_step=global_step) print('{0}\n\t\tTRAINING STARTED!\n{0}\n'.format(55 * '-')) for epoch in range(epochs): try: for batch, (features, labels) in enumerate(data_train): # Calculate the derivative of loss w.r.t. model variables. grads = compute_grads(model, features, labels) optimizer.apply_gradients(zip(grads, model.variables), global_step=tf.train.get_or_create_global_step()) loss = loss_func(model=model, features=features, labels=labels) # Log training progress. print(('\rEpoch: {:,}\tStep: {:,}\tBatch: {:,}' '\tLoss: {:.3f}').format(epoch + 1, global_step.numpy(), batch + 1, loss.numpy()), end='') if global_step.numpy() % save_step == 0: print('\nSaving model to {}'.format(save_path)) saver.save(save_path) except KeyboardInterrupt: print('\n{}\nTraining interrupted by user'.format(55 * '')) saver.save(file_prefix=save_path) print('Model saved to {}'.format(save_path)) break # !- End epochs. print('\n\n{0}\n\t\tTRAINING ENDED!\n{0}\n'.format(55 * '-'))
def train(self, data_provider, epochs, output_path, restore=False, train_summary=True, validation_summary=True, base_ckpt=None, eval_iter=5, save_best_ckpt=False, min_delta=None, max_patience=3, min_epochs=4): # make dir if not os.path.exists(output_path): os.makedirs(output_path) summary_path = output_path + '/summary' prediction_path = output_path + '/prediction' checkpoint_path = output_path + '/checkpoint' if not restore: shutil.rmtree(summary_path, ignore_errors=True) shutil.rmtree(checkpoint_path, ignore_errors=True) shutil.rmtree(prediction_path, ignore_errors=True) if not os.path.exists(summary_path): os.makedirs(summary_path) if not os.path.exists(prediction_path): os.makedirs(prediction_path) if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) if base_ckpt is not None: U.copytree(base_ckpt, checkpoint_path) # init global_step global_step = tf.train.get_or_create_global_step() global_step.assign(0) # restore self.checkpoint = tfe.Checkpoint(model=self.model.net) if restore: self.checkpoint.restore(checkpoint_path + '/best_ckpt') # global_step.assign(0) # summary writer train_writer = tf.contrib.summary.create_file_writer(summary_path + '/train') validation_writer = tf.contrib.summary.create_file_writer( summary_path + '/validation') dataset_train = data_provider.get('train') dataset_validation = data_provider.get('validation') summary, imgs = self._store_prediction(dataset_validation, '%s/_init' % prediction_path) train_dice = [] validation_dice = [] best_loss = float('inf') best_epoch = 0 patience = 0 while global_step < epochs: train_evaluation = None for i in range(self.training_iters): batch_x, batch_y, batch_mask = dataset_train(self.batch_size) feed_dict = {'x': batch_x, 'y': batch_y, 'mask': batch_mask} grads = self.model.get_grads(feed_dict) self.optimizer.apply_gradients( zip(grads, self.model.net.variables)) # if i % 5 == 0: # print(' --iter %d'%i) sub_eval = self.model.evaluation(feed_dict) sub_eval.pop('prediction') train_evaluation = U.add_dict(train_evaluation, sub_eval) # print('step: %d'%i) print('epoch %d -- ' % global_step, end='') train_evaluation = U.div_dict(train_evaluation, self.training_iters) train_evaluation['learning rate'] = self.learning_rate.numpy() output_str = U.eval_to_str(train_evaluation) print('train %s' % output_str) if train_summary: self.write_summary(train_evaluation, train_writer) train_dice.append(train_evaluation['dice']) if dataset_validation is not None and (global_step.numpy() + 1) % eval_iter == 0: summary, imgs = self._store_prediction( dataset_validation, '%s/epoch_%d' % (prediction_path, global_step)) validation_dice.append(summary['dice']) if validation_summary: self.write_summary(summary, validation_writer) self.write_image_summary(imgs, validation_writer) # early stopping curr_loss = summary['loss'] if min_delta is not None: if best_loss - curr_loss > min_delta: patience = 0 else: patience += 1 if curr_loss < best_loss: best_loss = curr_loss best_epoch = global_step.numpy() # save best result if save_best_ckpt: self.checkpoint.write(checkpoint_path + '/best_ckpt') print('best ckpt saved') if global_step >= min_epochs - 1 and patience > max_patience: break if not save_best_ckpt: self.checkpoint.write(checkpoint_path + '/best_ckpt') print('ckpt saved') # save model # if global_step.numpy() % 10 == 0: # self.checkpoint.save(checkpoint_path+'/ckpt') global_step.assign_add(1) # finish # self.checkpoint.write(checkpoint_path+'/final_ckpt') # final_dice = validation_dice[-1] return best_epoch
def restore(self, ckpt_path): self.checkpoint = tfe.Checkpoint(model=self.model.net) if ckpt_path is not None: self.checkpoint.restore(ckpt_path) else: assert False, 'no ckpt!'
def ckpt_restore(self, checkpoint_path): self.checkpoint = tfe.Checkpoint(model=self.model.net) self.checkpoint.restore(checkpoint_path)