def __init__(self, learning_rate, noise_level, input_layer_size, class_count, encoder_layer_definitions, denoising_cost_multipliers): assert class_count == encoder_layer_definitions[-1][0] self.learning_rate = learning_rate self.denoising_cost_multipliers = denoising_cost_multipliers self.placeholders = _Placeholders(input_layer_size, class_count) self.output = _ForwardPass(self.placeholders, noise_level=noise_level, encoder_layer_definitions=encoder_layer_definitions) self.accuracy_measure = self._accuracy_measure( self.placeholders, self.output) self.supervised_train_step = self._supervised_train_step( self.placeholders, self.output) self.unsupervised_train_step = self._unsupervised_train_step( self.placeholders, self.output) self.unsupervised_summaries = tf.merge_all_summaries("unsupervised") self.supervised_summaries = tf.merge_all_summaries("supervised") self.test_summaries = tf.merge_all_summaries("test") self.saver = tf.train.Saver()
def _setup_summary_writer(self, logdir): """Sets up the summary writer to prepare for later optional visualization.""" # Create summary to monitor loss tf.scalar_summary("loss", self._model_loss) # Set up a single operator to merge all the summaries tf.merge_all_summaries() # Set up summary writer to the specified log directory self._summary_writer = tf.train.SummaryWriter(os.path.join(logdir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')), graph_def=self._session.graph_def)
def _set_model(self, model): import tensorflow as tf import keras.backend.tensorflow_backend as KTF self.model = model self.sess = KTF.get_session() if self.histogram_freq and not self.merged: mod_type = self.model.get_config()['name'] if mod_type == 'Sequential': layers = {l.get_config()['name']: l for l in self.model.layers} elif mod_type == 'Graph': layers = self.model.nodes else: raise Exception('Unrecognized model:', self.model.get_config()['name']) for l in layers: cur_layer = layers[l] if hasattr(cur_layer, 'W'): tf.histogram_summary('{}_W'.format(l), cur_layer.W) if hasattr(cur_layer, 'b'): tf.histogram_summary('{}_b'.format(l), cur_layer.b) if hasattr(cur_layer, 'get_output'): tf.histogram_summary('{}_out'.format(l), cur_layer.get_output()) self.merged = tf.merge_all_summaries() self.writer = tf.train.SummaryWriter(self.log_dir, self.sess.graph_def)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' images, labels = cifar10.inputs(eval_data=eval_data) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g) while True: for i in range(20): eval_once(saver, summary_writer, top_k_op, summary_op,i) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def __init__(self, optimizer, categories, num_of_terms, num_of_hidden_nodes): self.optimizer = optimizer self.categories = categories self.num_of_categories = len(self.categories) self.num_of_terms = num_of_terms self.num_of_hidden_nodes = num_of_hidden_nodes self.input_ph = tf.placeholder(tf.float32, [None, self.num_of_terms], name="input") self.supervisor_ph = tf.placeholder(tf.float32, [None, self.num_of_categories], name="supervisor") with tf.name_scope("inference") as scope: weight1_var = tf.Variable(tf.truncated_normal([self.num_of_terms, self.num_of_hidden_nodes], stddev=0.1), name="weight1") weight2_var = tf.Variable(tf.truncated_normal([self.num_of_hidden_nodes, self.num_of_categories], stddev=0.1), name="weight2") bias1_var = tf.Variable(tf.zeros([self.num_of_hidden_nodes]), name="bias1") bias2_var = tf.Variable(tf.zeros([self.num_of_categories]), name="bias2") hidden_op = tf.nn.relu(tf.matmul(self.input_ph, weight1_var) + bias1_var) self.output_op = tf.nn.softmax(tf.matmul(hidden_op, weight2_var) + bias2_var) with tf.name_scope("loss") as scope: cross_entropy = -tf.reduce_sum(self.supervisor_ph * tf.log(self.output_op)) l2_sqr = tf.nn.l2_loss(weight1_var) + tf.nn.l2_loss(weight2_var) lambda_2 = 0.01 self.loss_op = cross_entropy + lambda_2 * l2_sqr tf.scalar_summary("loss", self.loss_op) with tf.name_scope("training") as scope: self.training_op = self.optimizer.minimize(self.loss_op) with tf.name_scope("accuracy") as scope: correct_prediction = tf.equal(tf.argmax(self.output_op, 1), tf.argmax(self.supervisor_ph, 1)) self.accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, "float")) tf.scalar_summary("accuracy", self.accuracy_op) self.summary_op = tf.merge_all_summaries()
def time_tensorflow_run(session, target, info_string): num_steps_burn_in = 10 total_duration = 0.0 total_duration_squared = 0.0 for i in xrange(FLAGS.num_batches + num_steps_burn_in): run_options = None run_metadata = None if FLAGS.enable_trace and i == num_steps_burn_in - 1: run_options = config_pb2.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter('/home/minjie/tmp/pipeline', session.graph) # Run session start_time = time.time() _ = session.run(target, options=run_options, run_metadata=run_metadata) duration = time.time() - start_time if FLAGS.enable_trace and i == num_steps_burn_in - 1: tl = tf.python.client.timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open('tf_trace.ctf', 'w') as f: f.write(ctf) if i > num_steps_burn_in: if not i % 10: print ('%s: step %d, duration = %.3f speed = %.3f images/sec' % (datetime.now(), i - num_steps_burn_in, duration, FLAGS.batch_size / duration)) total_duration += duration total_duration_squared += duration * duration mn = total_duration / FLAGS.num_batches vr = total_duration_squared / FLAGS.num_batches - mn * mn sd = math.sqrt(vr) print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' % (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
def testSummariesAreFlushedToDiskWithoutGlobalStep(self): output_dir = os.path.join(self.get_temp_dir(), 'flush_test_no_global_step') if tf.gfile.Exists(output_dir): # For running on jenkins. tf.gfile.DeleteRecursively(output_dir) names_to_metrics, names_to_updates = self._create_names_to_metrics( self._predictions, self._labels) for k in names_to_metrics: v = names_to_metrics[k] tf.scalar_summary(k, v) summary_writer = tf.train.SummaryWriter(output_dir) initial_op = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) eval_op = tf.group(*names_to_updates.values()) with self.test_session() as sess: slim.evaluation.evaluation( sess, initial_op=initial_op, eval_op=eval_op, summary_op=tf.merge_all_summaries(), summary_writer=summary_writer) names_to_values = {name: names_to_metrics[name].eval() for name in names_to_metrics} self._verify_summaries(output_dir, names_to_values)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): # Get images and labels for CIFAR-10. images, labels = inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = svhn.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) top_k_predict_op = tf.argmax(logits, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage(svhn.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) while True: eval_once(saver, summary_writer, top_k_op, top_k_predict_op, summary_op, images) break
def train(self, data=0, steps=-1, dropout=None, display_step=10, test_step=200, batch_size=10, do_resume=False): # epochs=-1, if data: self.data = data steps = 9999999 if steps == -1 else steps session = self.session # with tf.device(_cpu): # import tensorflow.contrib.layers as layers # t = tf.verify_tensor_all_finite(t, msg) tf.add_check_numerics_ops() try: self.summaries = tf.summary.merge_all() except: self.summaries = tf.merge_all_summaries() try: self.summary_writer = tf.summary.FileWriter(current_logdir(), session.graph) # except: self.summary_writer = tf.train.SummaryWriter(current_logdir(), session.graph) # if not dropout: dropout = 1. # keep all x = self.x y = self.y keep_prob = self.keep_prob try: saver = tf.train.Saver(tf.global_variables()) except: saver = tf.train.Saver(tf.all_variables()) snapshot = self.name + str(get_last_tensorboard_run_nr()) checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if do_resume and checkpoint: print("LOADING " + checkpoint + " !!!") saver.restore(session, checkpoint) try: session.run([tf.global_variables_initializer()]) except: session.run([tf.initialize_all_variables()]) step = 0 # show first while step < steps: batch_xs, batch_ys = self.next_batch(batch_size, session) # print("step %d \r" % step)# end=' ') # tf.train.shuffle_batch_join(example_list, batch_size, capacity=min_queue_size + batch_size * 16, min_queue_size) # Fit training using batch data feed_dict = {x: batch_xs, y: batch_ys, keep_prob: dropout, self.train_phase: True} loss, _ = session.run([self.cost, self.optimizer], feed_dict=feed_dict) if step % display_step == 0: seconds = int(time.time()) - start # Calculate batch accuracy, loss feed = {x: batch_xs, y: batch_ys, keep_prob: 1., self.train_phase: False} acc, summary = session.run([self.accuracy, self.summaries], feed_dict=feed) # self.summary_writer.add_summary(summary, step) # only test summaries for smoother curve print("\rStep {:d} Loss= {:.6f} Accuracy= {:.3f} Time= {:d}s".format(step, loss, acc, seconds), end=' ') if str(loss) == "nan": return print("\nLoss gradiant explosion, exiting!!!") # restore! if step % test_step == 0: self.test(step) if step % save_step == 0 and step > 0: print("SAVING snapshot %s" % snapshot) saver.save(session, checkpoint_dir + snapshot + ".ckpt", self.global_step) step += 1 print("\nOptimization Finished!") self.test(step, number=10000) # final test
def evaluate(): with tf.Graph().as_default(): # testデータのロード images, labels = data_inputs.inputs('data/train_kirin_norm_32.tfrecords') logits = model.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay) variables_to_restore = {} for v in tf.trainable_variables(): if v in tf.trainable_variables(): restore_name = variable_averages.average_name(v) else: restore_name = v.op.name variables_to_restore[restore_name] = v saver = tf.train.Saver(variables_to_restore) summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def __train(train_image, train_label, model, batch_size=10, max_steps=200): (images_placeholder, labels_placeholder, keep_prob, train_op, acc) = model sess = tf.Session() sess.run(tf.initialize_all_variables()) summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(TRAIN_DIR, sess.graph_def) for step in range(max_steps): for i in range(math.ceil(len(train_image)/batch_size)): batch = batch_size*i sess.run(train_op, feed_dict={ images_placeholder: train_image[batch:batch+batch_size], labels_placeholder: train_label[batch:batch+batch_size], keep_prob: 0.5}) train_accuracy = sess.run(acc, feed_dict={ images_placeholder: train_image, labels_placeholder: train_label, keep_prob: 1.0}) print("step %d, training accuracy %g" % (step, train_accuracy)) summary_str = sess.run(summary_op, feed_dict={ images_placeholder: train_image, labels_placeholder: train_label, keep_prob: 1.0}) summary_writer.add_summary(summary_str, step) return sess
def _set_model(self, model): import tensorflow as tf import keras.backend.tensorflow_backend as KTF self.model = model self.sess = KTF.get_session() if self.histogram_freq and self.merged is None: layers = self.model.layers for layer in layers: if hasattr(layer, 'W'): tf.histogram_summary('{}_W'.format(layer), layer.W) if hasattr(layer, 'b'): tf.histogram_summary('{}_b'.format(layer), layer.b) if hasattr(layer, 'output'): tf.histogram_summary('{}_out'.format(layer), layer.output) self.merged = tf.merge_all_summaries() if self.write_graph: if parse_version(tf.__version__) >= parse_version('0.8.0'): self.writer = tf.train.SummaryWriter(self.log_dir, self.sess.graph) else: self.writer = tf.train.SummaryWriter(self.log_dir, self.sess.graph_def) else: self.writer = tf.train.SummaryWriter(self.log_dir)
def start_session(self): """ Creates the session. """ self.input_layer_mats = ["W_input", "b_input"] self.hidden_layer_mats = [] for i in range(self.num_hidden): self.hidden_layer_mats.append("W" + str(i)) self.hidden_layer_mats.append("b" + str(i)) self.output_layer_mats = ["W_output", "b_output"] self.weight_mats = self.input_layer_mats + self.hidden_layer_mats + self.output_layer_mats with tf.variable_scope("network") as scope: self.create_model_trainable() with tf.variable_scope("target") as scope: self.create_model_target() session = tf.Session() # TensorBoard init if self.tensorboard: self.merged_summaries = tf.merge_all_summaries() now = datetime.now().strftime('%Y-%m-%d--%H-%M-%S') self.summary_writer = tf.train.SummaryWriter('./outputs/' + now + '/', session.graph) else: self.summary_writer = None init = tf.initialize_all_variables() session.run(init) return session
def __init__(self, config): self.config = config self.input = tf.placeholder('int32', [self.config.batch_size, config.max_seq_len], name='input') self.labels = tf.placeholder('int64', [self.config.batch_size], name='labels') self.labels_one_hot = tf.one_hot(indices=self.labels, depth=config.output_dim, on_value=1.0, off_value=0.0, axis=-1) self.gru = GRUCell(config.hidden_state_dim) embeddings_we = tf.get_variable('word_embeddings', initializer=tf.random_uniform([config.vocab_size, config.embedding_dim], -1.0, 1.0)) self.emb = embed_input = tf.nn.embedding_lookup(embeddings_we, self.input) inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, config.max_seq_len, embed_input)] outputs, last_slu_state = tf.nn.rnn( cell=self.gru, inputs=inputs, dtype=tf.float32,) w_project = tf.get_variable('project2labels', initializer=tf.random_uniform([config.hidden_state_dim, config.output_dim], -1.0, 1.0)) self.logits = logits_bo = tf.matmul(last_slu_state, w_project) tf.histogram_summary('logits', logits_bo) self.probabilities = tf.nn.softmax(logits_bo) self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits_bo, self.labels_one_hot)) self.predict = tf.nn.softmax(logits_bo) # TensorBoard self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.predict, 1), self.labels), 'float32'), name='accuracy') tf.scalar_summary('CCE loss', self.loss) tf.scalar_summary('Accuracy', self.accuracy) self.tb_info = tf.merge_all_summaries()
def build_graph(self): """Build a whole graph for the model.""" self.global_step = tf.Variable(0, name='global_step', trainable=False) self._build_model() if self.mode == 'train': self._build_train_op() self.summaries = tf.merge_all_summaries()
def evaluate (tfrecord_file_paths, theme): eval_dir = 'workspace/{}/eval'.format(theme) with tf.Graph().as_default() as g: images, labels = distorted_inputs(tfrecord_file_paths=tfrecord_file_paths) logits = cifar10.inference(tf.image.resize_images(images, cifar10.IMAGE_SIZE, cifar10.IMAGE_SIZE)) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage(cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = {} for v in tf.all_variables(): if v in tf.trainable_variables(): restore_name = variable_averages.average_name(v) else: restore_name = v.op.name variables_to_restore[restore_name] = v saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(eval_dir, g) eval_once(theme, saver, summary_writer, top_k_op, summary_op)
def evaluate(): """Eval CNN for a number of steps.""" with tf.Graph().as_default() as g, tf.device("/cpu:0"): # Get sequences and labels sequences, labels = model.inputs_eval(way=0) # Build a Graph that computes the logits predictions from the # inference model. #logits = model.inference(sequences) dropout_keep_prob = tf.placeholder(tf.float32, name='drop_out') logits = model.inference(sequences, inputs.DOC_LEN, inputs.VOC_LEN, dropout_keep_prob) print("doclen %s, voclen %s\n"%(inputs.DOC_LEN, inputs.VOC_LEN)) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # # Restore the moving average version of the learned variables for eval. # variable_averages = tf.train.ExponentialMovingAverage( # model.MOVING_AVERAGE_DECAY) # variables_to_restore = variable_averages.variables_to_restore() # saver = tf.train.Saver(variables_to_restore) saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(EVAL_DIR, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op, dropout_keep_prob) if FLAGS.run_once: print("eval only once, stope eval") break print("sleep for {} seconds".format(FLAGS.eval_interval_secs)) time.sleep(FLAGS.eval_interval_secs)
def testSummaries(self): with self.cached_session() as s: var = tf.Variable([1, 2, 3], dtype=tf.float32) s.run(tf.initialize_all_variables()) x, y = np.meshgrid(np.linspace(-10, 10, 256), np.linspace(-10, 10, 256)) image = np.sin(x**2 + y**2) / np.sqrt(x**2 + y**2) * .5 + .5 image = image[None, :, :, None] # make a dummy sound freq = 440 # A = 440Hz sampling_frequency = 11000 audio = np.sin(2 * np.pi * np.linspace(0, 1, sampling_frequency) * freq) audio = audio[None, :, None] test_dir = tempfile.mkdtemp() # test summaries writer = tf.train.SummaryWriter(test_dir) summaries = [ tf.scalar_summary("scalar_var", var[0]), tf.scalar_summary("scalar_reduce_var", tf.reduce_sum(var)), tf.histogram_summary("var_histogram", var), tf.image_summary("sin_image", image), tf.audio_summary("sin_wave", audio, sampling_frequency), ] run_summaries = s.run(summaries) writer.add_summary(s.run(tf.merge_summary(inputs=run_summaries))) # This is redundant, but we want to be able to rewrite the command writer.add_summary(s.run(tf.merge_all_summaries())) writer.close() shutil.rmtree(test_dir)
def initialize_session(sess, task_params): if task_params['verbose']: print("Initalizing tensorflow session ...") saver = tf.train.Saver() if task_params['restore_from_checkpoint']: saver.restore( sess=sess, save_path=task_params['save_path']) if task_params['verbose']: print("Restoring variables from '{}'".format(task_params['save_path'])) else: sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) logdir=task_params['summaries_dir'] + '/train_' + time.strftime("%Y%m%d_%H-%M-%S") train_writer = tf.train.SummaryWriter(logdir=logdir, graph=sess.graph) summaries = tf.merge_all_summaries() return coord, threads, saver, train_writer, summaries
def __init__(self): self.session = tf.InteractiveSession() self.emulator = Emulator(settings) settings['num_actions'] = len(self.emulator.actions) self.replay = ReplayDB(settings) with tf.variable_scope('model'): self.model = Model(settings) self.summary = tf.merge_all_summaries() self.writer = tf.train.SummaryWriter('summary-log', self.session.graph_def) self.session.run(tf.initialize_all_variables()) self.saver = tf.train.Saver(max_to_keep=1000000) checkpoint = tf.train.get_checkpoint_state("networks") if checkpoint and checkpoint.model_checkpoint_path: self.saver.restore(self.session, checkpoint.model_checkpoint_path) print("Loaded checkpoint: {}".format(checkpoint.model_checkpoint_path)) else: print("Unable to load checkpoint") self.summary_cnt = 0 self.episode_cnt = 0 self.timer = self.session.run(self.model.global_step) self.no_op = tf.no_op()
def __init__(self, state_dim, action_dim, batch_size=64, gamma=0.9, buffer_size=1024 * 1024, initial_epsilon=0.5, final_epsilon=0.01, logdir='/data/log'): self.state_dim = state_dim self.action_dim = action_dim self.replay_buffer = deque() self.time_step = 0 self.epsilon = self.initial_epsilon = initial_epsilon self.final_epsilon = final_epsilon self.batch_size = batch_size self.gamma = gamma self.buffer_size = batch_size self.create_Q_network() self.create_training_method() self.reward = tf.placeholder(tf.float32) tf.scalar_summary("reward", self.reward) self.merged = tf.merge_all_summaries() self.session = tf.InteractiveSession() self.summary_writer = tf.train.SummaryWriter(logdir, self.session.graph) self.session.run(tf.initialize_all_variables())
def __init__(self, log_dir='./logs', max_queue=10, flush_secs=120): self.log_dir = log_dir self.merged = tf.merge_all_summaries() self.writer = tf.train.SummaryWriter(self.log_dir, max_queue=max_queue, flush_secs=flush_secs, graph_def=None)
def evaluate(): with tf.Graph().as_default(): eval_data = FLAGS.eval_data == 'test' images, labels = model.inputs(eval_data=eval_data) logits = model.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def _initialize_tf_utilities_and_ops(self, restore_previous_model): """Initialize TensorFlow operations. tf operations: summaries, init operations, saver, summary_writer. Restore a previously trained model if the flag restore_previous_model is true. :param restore_previous_model: if true, a previous trained model with the same name of this model is restored from disk to continue training. """ self.tf_merged_summaries = tf.merge_all_summaries() init_op = tf.initialize_all_variables() self.tf_saver = tf.train.Saver() self.tf_session.run(init_op) if restore_previous_model: print('Restore previously model from %s' % self.model_path) self.tf_saver.restore(self.tf_session, self.model_path) # Retrieve run identifier run_id = 0 for e in os.listdir(self.tf_summary_dir): if e[:3] == 'run': r = int(e[3:]) if r > run_id: run_id = r run_id += 1 run_dir = os.path.join(self.tf_summary_dir, 'run' + str(run_id)) print('Tensorboard logs dir for this run is %s' % (run_dir)) self.tf_summary_writer = tf.train.SummaryWriter( run_dir, self.tf_session.graph)
def drawGraph(self, n_row, n_latent, n_col): with tf.name_scope('matDecomp'): self._p = tf.placeholder(tf.float32, shape=[None, n_col]) self._c = tf.placeholder(tf.float32, shape=[None, n_col]) self._lambda = tf.placeholder(tf.float32) self._index = tf.placeholder(tf.float32, shape=[None, n_row]) self._A = tf.Variable(tf.truncated_normal([n_row, n_latent])) self._B = tf.Variable(tf.truncated_normal([n_latent, n_col])) self._h = tf.matmul(tf.matmul(self._index, self._A), self._B) weighted_loss = tf.reduce_mean(tf.mul(self._c, tf.squared_difference(self._p, self._h))) self._weighted_loss = weighted_loss l2_A = tf.reduce_sum(tf.square(self._A)) l2_B = tf.reduce_sum(tf.square(self._B)) n_w = tf.constant(n_row * n_latent + n_latent * n_col, tf.float32) l2 = tf.truediv(tf.add(l2_A, l2_B), n_w) reg_term = tf.mul(self._lambda, l2) self._loss = tf.add(weighted_loss, reg_term) self._mask = tf.placeholder(tf.float32, shape=[n_row, n_col]) one = tf.constant(1, tf.float32) pred = tf.cast(tf.greater_equal(tf.matmul(self._A, self._B), one), tf.float32) cor = tf.mul(tf.cast(tf.equal(pred, self._p), tf.float32), self._c) self._vali_err = tf.reduce_sum(tf.mul(cor, self._mask)) self._saver = tf.train.Saver([v for v in tf.all_variables() if v.name.find('matDecomp') != -1]) tf.scalar_summary('training_weighted_loss_l2', self._loss) tf.scalar_summary('validation_weighted_loss', self._weighted_loss) merged = tf.merge_all_summaries()
def run_training(): data_sets = data_mnist.read_data_sets() with tf.Graph().as_default(): images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size) logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) loss = mnist.loss(logits, labels_placeholder) train_op = mnist.training(loss, FLAGS.learning_rate) eval_correct = mnist.evaluation(logits, labels_placeholder) summary_op = tf.merge_all_summaries() saver = tf.train.Saver() sess = tf.Session() sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # Start the training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time if step % 100 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint') saver.save(sess, checkpoint_file, global_step=step) do_eval(sess,eval_correct, images_placeholder, labels_placeholder, data_sets.train) do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def evaluate(dataset, model, summary_path, read_checkpoint_path): with tf.Graph().as_default(): # input and evaluation procedure images, true_labels = dataset.evaluation_inputs() predictions = model.inference(images, dataset.num_classes, False) top_k_op = _in_top_k(predictions, true_labels) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=None) test_err = tf.placeholder(tf.float32, shape=[], name='test_err') # FIXME test error averaged starts at 0 test_err_avg_op = _add_test_error_summary(test_err) with tf.control_dependencies([test_err_avg_op]): summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(summary_path, tf.get_default_graph().as_graph_def()) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) last = None while True: last = _eval_once(sess, coord, last, saver, read_checkpoint_path, summary_writer, top_k_op, summary_op, test_err) if FLAGS.run_once or last == FLAGS.training_steps: break time.sleep(FLAGS.eval_interval_secs) coord.request_stop() coord.join(threads)
def run_training(self,sess, eval_correct, train_op, loss): summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(self.train_dir, graph=sess.graph) saver = tf.train.Saver() feed_dict = self.fill_feed_dict(self.train_dataset, self.train_labels, 0) for step in range(self.num_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) feed_dict = self.fill_feed_dict(self.train_dataset, self.train_labels, step+1) duration = time.time() - start_time if step % 5000 == 0: print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) if (step + 1) % 10000 == 0 or (step + 1) == self.num_steps: saver.save(sess, self.train_dir, global_step=step) print('Training Data Eval:') self.do_eval(sess, eval_correct, feed_dict[self.images_placeholder], feed_dict[self.labels_placeholder]) print('Validation Data Eval:') self.do_eval(sess, eval_correct, self.valid_dataset, self.valid_labels)
def evaluate(eval_data, model_path, global_step ): """Eval CIFAR-100 prediction performance.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-100 images, labels = data_utils.inputs(eval_data=eval_data, data_dir = FLAGS.data_dir, batch_size=FLAGS.batch_size) #Get batches # Build a Graph that computes the logits predictions from the # inference model. logits = inference(images) #Run predictions on the images logits_norm = tf.nn.softmax(logits) #Check the softmax of the images, this should normalize our scores for predictions # Calculate predictions. top_k_op = tf.nn.in_top_k(logits_norm, labels, 1) #Get the highest ranked logit_norms # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g) eval_once(eval_data, model_path, global_step, saver, summary_writer, top_k_op, summary_op)
def main(_): ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") # Create a cluster from the parameter server and worker hosts. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # Create and start a server for the local task. server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.job_name == "ps": server.join() elif FLAGS.job_name == "worker": # Assigns ops to the local worker by default. with tf.device(tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)): # Build model... x = tf.placeholder("float", [10, 10], name="x") y = tf.placeholder("float", [10, 1], name="y") initial_w = np.zeros((10, 1)) w = tf.Variable(initial_w, name="w", dtype="float32") loss = tf.pow(tf.add(y,-tf.matmul(x,w)),2,name="loss") global_step = tf.Variable(0) saver = tf.train.Saver() summary_op = tf.merge_all_summaries() init_op = tf.initialize_all_variables() # Create a "supervisor", which oversees the training process. sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0), logdir="/tmp/train_logs", init_op=init_op, summary_op=summary_op, saver=saver, global_step=global_step, save_model_secs=600) # The supervisor takes care of session initialization, restoring from # a checkpoint, and closing when done or an error occurs. with sv.managed_session(server.target) as sess: # Loop until the supervisor shuts down or 1000000 steps have completed. step = 0 while not sv.should_stop() and step < 1000000: # Run a training step asynchronously. # See `tf.train.SyncReplicasOptimizer` for additional details on how to # perform *synchronous* training. #_, step = sess.run([loss, global_step]) _, step = sess.run([loss, global_step], { x: np.random.rand(10,10), y: np.random.rand(10).reshape(-1,1) }) print("job_name: %s; task_index: %s; step: %d" % (FLAGS.job_name,FLAGS.task_index,step)) # Ask for all the services to stop. sv.stop()
def run_training(): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and # test on MNIST. data_sets = input_data.read_data_sets(tempfile.mkdtemp(), FLAGS.fake_data) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels and mark as input. placeholders = placeholder_inputs() keys_placeholder, images_placeholder, labels_placeholder = placeholders inputs = { 'key': keys_placeholder.name, 'image': images_placeholder.name } tf.add_to_collection('inputs', json.dumps(inputs)) # Build a Graph that computes predictions from the inference model. logits = mnist.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation. loss = mnist.loss(logits, labels_placeholder) # To be able to extract the id, we need to add the identity function. keys = tf.identity(keys_placeholder) # The prediction will be the index in logits with the highest score. # We also use a softmax operation to produce a probability distribution # over all possible digits. prediction = tf.argmax(logits, 1) scores = tf.nn.softmax(logits) # Mark the outputs. outputs = { 'key': keys.name, 'prediction': prediction.name, 'scores': scores.name } tf.add_to_collection('outputs', json.dumps(outputs)) # Add to the Graph the Ops that calculate and apply gradients. train_op = mnist.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct = mnist.evaluation(logits, labels_placeholder) # Build the summary operation based on the TF collection of Summaries. # Remove this if once Tensorflow 0.12 is standard. try: summary_op = tf.contrib.deprecated.merge_all_summaries() except AttributeError: summary_op = tf.merge_all_summaries() # Add the variable initializer Op. init = tf.initialize_all_variables() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Instantiate a SummaryWriter to output summaries and the Graph. # Remove this if once Tensorflow 0.12 is standard. try: summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) except AttributeError: summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # And then after everything is built: # Run the Op to initialize the variables. sess.run(init) # Start the training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint') saver.save(sess, checkpoint_file, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate against the test set. print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test) # Export the model so that it can be loaded and used later for predictions. file_io.create_dir(FLAGS.model_dir) saver.save(sess, os.path.join(FLAGS.model_dir, 'export'))
def train_model(self, sess, max_iters, restore=False): """Network training loop.""" data_layer = get_data_layer(self.roidb, self.imdb.num_classes) loss, cross_entropy, loss_box, rpn_cross_entropy, rpn_loss_box = \ self.net.build_loss() # scalar summary tf.scalar_summary('rpn_rgs_loss', rpn_loss_box) tf.scalar_summary('rpn_cls_loss', rpn_cross_entropy) tf.scalar_summary('cls_loss', cross_entropy) tf.scalar_summary('rgs_loss', loss_box) tf.scalar_summary('loss', loss) summary_op = tf.merge_all_summaries() # image writer # NOTE: this image is independent to summary_op log_image, log_image_data, log_image_name =\ self.build_image_summary() # optimizer if cfg.TRAIN.SOLVER == 'Adam': opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE) elif cfg.TRAIN.SOLVER == 'RMS': opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE) else: lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) # lr = tf.Variable(0.0, trainable=False) momentum = cfg.TRAIN.MOMENTUM opt = tf.train.MomentumOptimizer(lr, momentum) global_step = tf.Variable(0, trainable=False) with_clip = False if with_clip: tvars = tf.trainable_variables() grads, norm = tf.clip_by_global_norm(tf.gradients(loss, tvars), 1.0) train_op = opt.apply_gradients(zip(grads, tvars), global_step=global_step) else: train_op = opt.minimize(loss, global_step=global_step) # intialize variables sess.run(tf.initialize_all_variables()) restore_iter = 0 # load vgg16 if self.pretrained_model is not None and not restore: try: print('Loading pretrained model ' 'weights from {:s}').format(self.pretrained_model) self.net.load(self.pretrained_model, sess, True) except: raise 'Check your pretrained model {:s}'.format( self.pretrained_model) # resuming a trainer if restore: try: ckpt = tf.train.get_checkpoint_state(self.output_dir) print 'Restoring from {}...'.format( ckpt.model_checkpoint_path), self.saver.restore(sess, ckpt.model_checkpoint_path) stem = os.path.splitext( os.path.basename(ckpt.model_checkpoint_path))[0] restore_iter = int(stem.split('_')[-1]) sess.run(global_step.assign(restore_iter)) print 'done' except: raise 'Check your pretrained {:s}'.format( ckpt.model_checkpoint_path) last_snapshot_iter = -1 timer = Timer() # for iter in range(max_iters): for iter in range(restore_iter, max_iters): # learning rate if iter >= cfg.TRAIN.STEPSIZE: sess.run( tf.assign(lr, cfg.TRAIN.LEARNING_RATE * cfg.TRAIN.GAMMA)) else: sess.run(tf.assign(lr, cfg.TRAIN.LEARNING_RATE)) # sess.run(tf.assign(lr, 0.0)) # get one batch timer.tic() blobs = data_layer.forward() if (iter + 1) % (cfg.TRAIN.DISPLAY) == 0: print 'image: %s' % (blobs['im_name']), feed_dict = { self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes: blobs['gt_boxes'], self.net.gt_ishard: blobs['gt_ishard'], self.net.dontcare_areas: blobs['dontcare_areas'] } res_fetches = [ self.net.get_output('cls_prob'), # FRCNN class prob self.net.get_output('bbox_pred'), # FRCNN rgs output self.net.get_output('rois') ] # RPN rgs output fetch_list = [ rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box, summary_op, train_op ] + res_fetches if _DEBUG: # add profiling # link libcupti.so in LD_LIBRARY_PATH # # run_metadata = tf.RunMetadata() # rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value,\ # summary_str, _, \ # cls_prob, bbox_pred, rois, \ # = sess.run(fetches=fetch_list, # feed_dict=feed_dict, # options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # run_metadata=run_metadata # ) # # # write profiling # trace = timeline.Timeline(step_stats=run_metadata.step_stats) # with open('timeline.ctf.json', 'w') as trace_file: # trace_file.write(trace.generate_chrome_trace_format()) fetch_list = [ rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box, summary_op ] + res_fetches fetch_list += [ self.net.get_output('rpn_cls_score_reshape'), self.net.get_output('rpn_cls_prob_reshape') ] fetch_list += [] rpn_loss_cls_value, rpn_loss_box_value, loss_cls_value, loss_box_value, \ summary_str, \ cls_prob, bbox_pred, rois, \ rpn_cls_score_reshape_np, rpn_cls_prob_reshape_np\ = sess.run(fetches=fetch_list, feed_dict=feed_dict) else: fetch_list = [ rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box, summary_op, train_op ] + res_fetches fetch_list += [] rpn_loss_cls_value, rpn_loss_box_value, loss_cls_value, loss_box_value, \ summary_str, _, \ cls_prob, bbox_pred, rois = sess.run(fetches=fetch_list, feed_dict=feed_dict) self.writer.add_summary(summary=summary_str, global_step=global_step.eval()) _diff_time = timer.toc(average=False) # image summary if (iter) % cfg.TRAIN.LOG_IMAGE_ITERS == 0: # plus mean ori_im = np.squeeze(blobs['data']) + cfg.PIXEL_MEANS ori_im = ori_im.astype(dtype=np.uint8, copy=False) ori_im = _draw_gt_to_image(ori_im, blobs['gt_boxes'], blobs['gt_ishard']) ori_im = _draw_dontcare_to_image(ori_im, blobs['dontcare_areas']) # draw rects # print 'rois:', rois.shape[0] if cfg.TRAIN.BBOX_REG and cfg.TRAIN.BBOX_NORMALIZE_TARGETS: bbox_pred = bbox_pred * np.tile(self.bbox_stds, (bbox_pred.shape[0], 1)) + \ np.tile(self.bbox_means, (bbox_pred.shape[0], 1)) boxes, scores = _process_boxes_scores(cls_prob, bbox_pred, rois, blobs['im_info'][0][2], ori_im.shape) res = nms_wrapper(scores, boxes, threshold=0.7) image = cv2.cvtColor(_draw_boxes_to_image(ori_im, res), cv2.COLOR_BGR2RGB) log_image_name_str = ('%06d_' % iter) + blobs['im_name'] log_image_summary_op = \ sess.run(log_image, \ feed_dict={log_image_name: log_image_name_str,\ log_image_data: image}) self.writer.add_summary(log_image_summary_op, global_step=global_step.eval()) if (iter) % (cfg.TRAIN.DISPLAY) == 0: print 'iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\ (iter, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,\ rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval()) print 'speed: {:.3f}s / iter'.format(_diff_time) if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = iter self.snapshot(sess, iter) if last_snapshot_iter != iter: self.snapshot(sess, iter)
def learn_model(trainingset_path, model_path, model_restored_path=None, learning_rate=None, verbose=1): if not learning_rate: learning_rate = 0.0005 # Divers variables Loss = [] Epoch = [] Accuracy = [] Report = '' verbose = 1 # Training or Predicting restore = True # Results and Models folder_model = model_path if not os.path.exists(folder_model): os.makedirs(folder_model) display_step = 100 save_step = 600 # Network Parameters image_size = 256 n_input = image_size * image_size n_classes = 2 dropout = 0.75 depth = 6 hyperparameters = { 'depth': depth, 'dropout': dropout, 'image_size': image_size, 'model_restored_path': model_restored_path, 'restore': restore } with open(folder_model + '/hyperparameters.pkl', 'wb') as handle: pickle.dump(hyperparameters, handle) # Optimization Parameters batch_size = 1 training_iters = 500000 epoch_size = 200 Report += '\n\n---Savings---' Report += '\n Model saved in : ' + folder_model Report += '\n\n---PARAMETERS---\n' Report += 'learning_rate : '+ str(learning_rate)+'; \n batch_size : ' + str(batch_size) +';\n depth : ' + str(depth) \ +';\n epoch_size: ' + str(epoch_size)+';\n dropout : ' + str(dropout)+';\n restore : ' + str(restore)\ +';\n (if model restored) restored_model :' + str(model_restored_path) data_train = input_data(trainingset_path=trainingset_path, type='train') data_test = input_data(trainingset_path=trainingset_path, type='test') # Graph input x = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size)) y = tf.placeholder(tf.float32, shape=(batch_size * n_input, n_classes)) keep_prob = tf.placeholder(tf.float32) # Create some wrappers for simplicity def conv2d(x, W, b, strides=1): # Conv2D wrapper, with bias and relu activation x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') x = tf.nn.bias_add(x, b) return tf.nn.relu(x) def maxpool2d(x, k=2): # MaxPool2D wrapper return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME') # Create model def conv_net(x, weights, biases, dropout, image_size=image_size): # Reshape input picture x = tf.reshape(x, shape=[-1, image_size, image_size, 1]) data_temp = x data_temp_size = [image_size] relu_results = [] # contraction for i in range(depth): conv1 = conv2d(data_temp, weights['wc1'][i], biases['bc1'][i]) conv2 = conv2d(conv1, weights['wc2'][i], biases['bc2'][i]) relu_results.append(conv2) conv2 = maxpool2d(conv2, k=2) data_temp_size.append(data_temp_size[-1] / 2) data_temp = conv2 conv1 = conv2d(data_temp, weights['wb1'], biases['bb1']) conv2 = conv2d(conv1, weights['wb2'], biases['bb2']) data_temp_size.append(data_temp_size[-1]) data_temp = conv2 # expansion for i in range(depth): data_temp = tf.image.resize_images(data_temp, data_temp_size[-1] * 2, data_temp_size[-1] * 2) upconv = conv2d(data_temp, weights['upconv'][i], biases['upconv'][i]) data_temp_size.append(data_temp_size[-1] * 2) upconv_concat = tf.concat( concat_dim=3, values=[ tf.slice(relu_results[depth - i - 1], [0, 0, 0, 0], [ -1, data_temp_size[depth - i - 1], data_temp_size[depth - i - 1], -1 ]), upconv ]) conv1 = conv2d(upconv_concat, weights['we1'][i], biases['be1'][i]) conv2 = conv2d(conv1, weights['we2'][i], biases['be2'][i]) data_temp = conv2 finalconv = tf.nn.conv2d(conv2, weights['finalconv'], strides=[1, 1, 1, 1], padding='SAME') final_result = tf.reshape( finalconv, tf.TensorShape([ finalconv.get_shape().as_list()[0] * data_temp_size[-1] * data_temp_size[-1], 2 ])) return final_result weights = { 'wc1': [], 'wc2': [], 'we1': [], 'we2': [], 'upconv': [], 'finalconv': [], 'wb1': [], 'wb2': [] } biases = { 'bc1': [], 'bc2': [], 'be1': [], 'be2': [], 'finalconv_b': [], 'bb1': [], 'bb2': [], 'upconv': [] } # Contraction for i in range(depth): if i == 0: num_features_init = 1 num_features = 64 else: num_features = num_features_init * 2 # Store layers weight & bias weights['wc1'].append( tf.Variable(tf.random_normal( [3, 3, num_features_init, num_features], stddev=math.sqrt(2.0 / (9.0 * float(num_features_init)))), name='wc1-%s' % i)) weights['wc2'].append( tf.Variable(tf.random_normal( [3, 3, num_features, num_features], stddev=math.sqrt(2.0 / (9.0 * float(num_features)))), name='wc2-%s' % i)) biases['bc1'].append( tf.Variable(tf.random_normal( [num_features], stddev=math.sqrt(2.0 / (9.0 * float(num_features)))), name='bc1-%s' % i)) biases['bc2'].append( tf.Variable(tf.random_normal( [num_features], stddev=math.sqrt(2.0 / (9.0 * float(num_features)))), name='bc2-%s' % i)) image_size = image_size / 2 num_features_init = num_features num_features = num_features_init * 2 weights['wb1'] = tf.Variable(tf.random_normal( [3, 3, num_features_init, num_features], stddev=math.sqrt(2.0 / (9.0 * float(num_features_init)))), name='wb1-%s' % i) weights['wb2'] = tf.Variable(tf.random_normal( [3, 3, num_features, num_features], stddev=math.sqrt(2.0 / (9.0 * float(num_features)))), name='wb2-%s' % i) biases['bb1'] = tf.Variable(tf.random_normal([num_features]), name='bb2-%s' % i) biases['bb2'] = tf.Variable(tf.random_normal([num_features]), name='bb2-%s' % i) num_features_init = num_features for i in range(depth): num_features = num_features_init / 2 weights['upconv'].append( tf.Variable(tf.random_normal( [2, 2, num_features_init, num_features]), name='upconv-%s' % i)) biases['upconv'].append( tf.Variable(tf.random_normal([num_features]), name='bupconv-%s' % i)) weights['we1'].append( tf.Variable(tf.random_normal( [3, 3, num_features_init, num_features], stddev=math.sqrt(2.0 / (9.0 * float(num_features_init)))), name='we1-%s' % i)) weights['we2'].append( tf.Variable(tf.random_normal( [3, 3, num_features, num_features], stddev=math.sqrt(2.0 / (9.0 * float(num_features)))), name='we2-%s' % i)) biases['be1'].append( tf.Variable(tf.random_normal( [num_features], stddev=math.sqrt(2.0 / (9.0 * float(num_features)))), name='be1-%s' % i)) biases['be2'].append( tf.Variable(tf.random_normal( [num_features], stddev=math.sqrt(2.0 / (9.0 * float(num_features)))), name='be2-%s' % i)) num_features_init = num_features weights['finalconv'] = tf.Variable(tf.random_normal( [1, 1, num_features, n_classes]), name='finalconv-%s' % i) biases['finalconv_b'] = tf.Variable(tf.random_normal([n_classes]), name='bfinalconv-%s' % i) # Construct model pred = conv_net(x, weights, biases, keep_prob) # Define loss and optimizer cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) tf.scalar_summary('Loss', cost) index = tf.Variable(0, trainable=False) optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(cost) # Evaluate model correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) mask = tf.argmax(pred, 1) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) init = tf.initialize_all_variables() saver = tf.train.Saver(tf.all_variables()) summary_op = tf.merge_all_summaries() # Launch the graph Report += '\n\n---Intermediary results---\n' with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: last_epoch = 0 if model_restored_path: folder_restored_model = model_restored_path saver.restore(sess, folder_restored_model + "/model.ckpt") file = open(folder_restored_model + '/evolution.pkl', 'r') evolution_restored = pickle.load(file) last_epoch = evolution_restored["steps"][-1] else: sess.run(init) print 'training start' step = 1 epoch = 1 + last_epoch while step * batch_size < training_iters: batch_x, batch_y = data_train.next_batch(batch_size, rnd=True, augmented_data=True) sess.run(optimizer, feed_dict={ x: batch_x, y: batch_y, keep_prob: dropout }) if step % display_step == 0: # Calculate batch loss and accuracy loss, acc, p = sess.run( [cost, accuracy, pred], feed_dict={ x: batch_x, y: batch_y, keep_prob: 1., index: step * batch_size }) prediction = data_train.read_batch(p, batch_size)[0, :, :, 0] ground_truth = data_train.read_batch(batch_y, batch_size)[0, :, :, 0] if verbose == 2: outputs = "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc) print outputs if step % epoch_size == 0: start = time.time() A = [] L = [] print epoch data_test.set_batch_start() print data_test.batch_start for i in range(data_test.set_size): batch_x, batch_y = data_test.next_batch( batch_size, rnd=False, augmented_data=False) loss, acc = sess.run([cost, accuracy], feed_dict={ x: batch_x, y: batch_y, keep_prob: 1. }) A.append(acc) L.append(loss) if verbose >= 1: print '--\nAccuracy on patch' + str(i) + ': ' + str( acc) print 'Loss on patch' + str(i) + ': ' + str(loss) Accuracy.append(np.mean(A)) Loss.append(np.mean(L)) Epoch.append(epoch) output_2 = '\n----\n Epoch: ' + str(epoch) output_2 += '\n Accuracy: ' + str(np.mean(A)) + ';' output_2 += '\n Loss: ' + str(np.mean(L)) + ';' print '\n\n----Scores on test:---' + output_2 Report += output_2 epoch += 1 if step % save_step == 0: evolution = { 'loss': Loss, 'steps': Epoch, 'accuracy': Accuracy } with open(folder_model + '/evolution.pkl', 'wb') as handle: pickle.dump(evolution, handle) save_path = saver.save(sess, folder_model + "/model.ckpt") print("Model saved in file: %s" % save_path) file = open(folder_model + "/report.txt", 'w') file.write(Report) file.close() step += 1 save_path = saver.save(sess, folder_model + "/model.ckpt") evolution = {'loss': Loss, 'steps': Epoch, 'accuracy': Accuracy} with open(folder_model + '/evolution.pkl', 'wb') as handle: pickle.dump(evolution, handle) print("Model saved in file: %s" % save_path) print "Optimization Finished!"
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) validation_accuracy = tf.placeholder("float") tf.scalar_summary('validation_accuracy', validation_accuracy) train_accuracy = tf.placeholder("float") tf.scalar_summary('train_accuracy', train_accuracy) train_loss = tf.placeholder("float") tf.scalar_summary('train_loss', train_loss) init = tf.initialize_all_variables() with tf.Session() as sess: merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter('tensorboard/imdb_gru', sess.graph) sess.run(init) step = 0 step_all = 0 # train while step_all * batch_size <= training_iters: index_start = step*batch_size index_end = index_start+batch_size batch_x = X_train[index_start:index_end] batch_y = y_train[index_start:index_end] sess.run(optimizer, feed_dict={x: batch_x, y: batch_y}) # display_step if step_all % display_step == 0: # validation
def __init__(self, args, infer=False): # infer is set to true during sampling. self.args = args if infer: # Worry about one character at a time during sampling; no batching or BPTT. args.batch_size = 1 args.seq_length = 1 # Set cell_fn to the type of network cell we're creating -- RNN, GRU or LSTM. if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) # Call tensorflow library tensorflow-master/tensorflow/python/ops/rnn_cell # to create a layer of rnn_size cells of the specified basic type (RNN/GRU/LSTM). cell = cell_fn(args.rnn_size) #, state_is_tuple=True) # Use the same rnn_cell library to create a stack of these cells # of num_layers layers. Pass in a python list of these cells. # (The [cell] * arg.num_layers syntax literally duplicates cell multiple times in # a list. The syntax is such that [5, 6] * 3 would return [5, 6, 5, 6, 5, 6].) self.cell = cell = rnn_cell.MultiRNNCell( [cell] * args.num_layers) #, state_is_tuple=True) # Create two TF placeholder nodes of 32-bit ints (NOT floats!), # each of shape batch_size x seq_length. This shape matches the batches # (listed in x_batches and y_batches) constructed in create_batches in utils.py. # input_data will receive input batches, and targets will be what it compares against # to calculate loss. self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) # Using the zero_state function in the RNNCell master class in rnn_cell library, # create a tensor of zeros such that we can swap it in for the network state at any time # to zero out the network's state. # State dimensions are: cell_fn state size (2 for LSTM) x rnn_size x num_layers. # So an LSTM network with 100 cells per layer and 3 layers would have a state size of 600, # and initial_state would have a dimension of none x 600. self.initial_state = self.cell.zero_state(args.batch_size, tf.float32) # Scope our new variables to the scope identifier string "rnnlm". with tf.variable_scope('rnnlm'): # Create new variable softmax_w and softmax_b for output. # softmax_w is a weights matrix from the top layer of the model (of size rnn_size) # to the vocabulary output (of size vocab_size). softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) # softmax_b is a bias vector of the ouput characters (of size vocab_size). softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) # [TODO: Why specify CPU? Same as the TF translation tutorial, but don't know why.] with tf.device("/cpu:0"): # Create new variable named 'embedding' to connect the character input to the base layer # of the RNN. Its role is the conceptual inverse of softmax_w. # It contains the trainable weights from the one-hot input vector to the lowest layer of RNN. embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) # Create an embedding tensor with tf.nn.embedding_lookup(embedding, self.input_data). # This tensor has dimensions batch_size x seq_length x rnn_size. # tf.split splits that embedding lookup tensor into seq_length tensors (along dimension 1). # Thus inputs is a list of seq_length different tensors, # each of dimension batch_size x 1 x rnn_size. inputs = tf.split( 1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data)) # Iterate through these resulting tensors and eliminate that degenerate second dimension of 1, # i.e. squeeze each from batch_size x 1 x rnn_size down to batch_size x rnn_size. # Thus we now have a list of seq_length tensors, each with dimension batch_size x rnn_size. inputs = [tf.squeeze(input_, [1]) for input_ in inputs] # THIS LOOP FUNCTION IS NEVER ACTUALLY USED. # IT IS EXPLICITLY NOT USED DURING TRAINING. # DURING INFERENCE, SEQ_LENGTH == 1, SO SEQ2SEQ.RNN_DECODER() ONLY USES THE LOOP ARGUMENT # ON SEQUENCE LENGTH ITEMS SUBSEQUENT TO THE FIRST. # This looping function is used as part of seq2seq.rnn_decoder only during sampling -- not training. # prev is a 2D Tensor of shape [batch_size x cell.output_size]. # returns a 2D Tensor of shape [batch_size x cell.input_size]. def loop(prev, _): # prev is initially the top cell state. # Convert the top cell state into character logits. prev = tf.matmul(prev, softmax_w) + softmax_b # Pull the character with the greatest logit (no sampling, just argmaxing). # WHY IS THIS ARGMAXING WHEN ACTUAL SAMPLING IS DONE PROBABILISTICALLY? # DOESN'T THIS CAUSE OUTPUTS NOT TO MATCH INPUTS DURING SEQUENCE GENERATION? prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) # Re-embed that symbol as the next step's input, and return that. return tf.nn.embedding_lookup(embedding, prev_symbol) # Set up a seq2seq decoder from the seq2seq.py library. # This constructs the outputs and states nodes of the network. # Outputs is a list (of len seq_length, same as inputs) of tensors of shape [batch_size x rnn_size]. # These are the raw output values of the top layer of the network at each time step. # They have NOT been fed through the decoder projection; they are still in network space, # not character space. # State is a tensor of shape [batch_size x cell.state_size]. # This is also the step where all of the trainable parameters for the LSTM (weights and biases) are defined. outputs, self.final_state = seq2seq.rnn_decoder( inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') # tf.concat concatenates the output tensors along the rnn_size dimension, # to make a single tensor of shape [batch_size x (seq_length * rnn_size)]. # This gives the following 2D outputs matrix: # [(rnn output: batch 0, seq 0) (rnn output: batch 0, seq 1) ... (rnn output: batch 0, seq seq_len-1)] # [(rnn output: batch 1, seq 0) (rnn output: batch 1, seq 1) ... (rnn output: batch 1, seq seq_len-1)] # ... # [(rnn output: batch batch_size-1, seq 0) (rnn output: batch batch_size-1, seq 1) ... (rnn output: batch batch_size-1, seq seq_len-1)] # tf.reshape then reshapes it to a tensor of shape [(batch_size * seq_length) x rnn_size]. # Output will now be the following matrix: # [rnn output: batch 0, seq 0] # [rnn output: batch 0, seq 1] # ... # [rnn output: batch 0, seq seq_len-1] # [rnn output: batch 1, seq 0] # [rnn output: batch 1, seq 1] # ... # [rnn output: batch 1, seq seq_len-1] # ... # ... # [rnn output: batch batch_size-1, seq seq_len-1] # Note the following comment in rnn_cell.py: # Note: in many cases it may be more efficient to not use this wrapper, # but instead concatenate the whole sequence of your outputs in time, # do the projection on this batch-concatenated sequence, then split it # if needed or directly feed into a softmax. output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) # Obtain logits node by applying output weights and biases to the output tensor. # Logits is a tensor of shape [(batch_size * seq_length) x vocab_size]. # Recall that outputs is a 2D tensor of shape [(batch_size * seq_length) x rnn_size], # and softmax_w is a 2D tensor of shape [rnn_size x vocab_size]. # The matrix product is therefore a new 2D tensor of [(batch_size * seq_length) x vocab_size]. # In other words, that multiplication converts a loooong list of rnn_size vectors # to a loooong list of vocab_size vectors. # Then add softmax_b (a single vocab-sized vector) to every row of that list. # That gives you the logits! self.logits = tf.matmul(output, softmax_w) + softmax_b # Convert logits to probabilities. Probs isn't used during training! That node is never calculated. # Like logits, probs is a tensor of shape [(batch_size * seq_length) x vocab_size]. # During sampling, this means it is of shape [1 x vocab_size]. self.probs = tf.nn.softmax(self.logits) # seq2seq.sequence_loss_by_example returns 1D float Tensor containing the log-perplexity # for each sequence. (Size is batch_size * seq_length.) # Targets are reshaped from a [batch_size x seq_length] tensor to a 1D tensor, of the following layout: # target character (batch 0, seq 0) # target character (batch 0, seq 1) # ... # target character (batch 0, seq seq_len-1) # target character (batch 1, seq 0) # ... # These targets are compared to the logits to generate loss. # Logits: instead of a list of character indices, it's a list of character index probability vectors. # seq2seq.sequence_loss_by_example will do the work of generating losses by comparing the one-hot vectors # implicitly represented by the target characters against the probability distrutions in logits. # It returns a 1D float tensor (a vector) where item i is the log-perplexity of # the comparison of the ith logit distribution to the ith one-hot target vector. loss = seq2seq.sequence_loss_by_example( [ self.logits ], # logits: 1-item list of 2D Tensors of shape [batch_size x vocab_size] [ tf.reshape(self.targets, [-1]) ], # targets: 1-item list of 1D batch-sized int32 Tensors of the same length as logits [ tf.ones([args.batch_size * args.seq_length]) ], # weights: 1-item list of 1D batch-sized float-Tensors of the same length as logits args.vocab_size ) # num_decoder_symbols: integer, number of decoder symbols (output classes) # Cost is the arithmetic mean of the values of the loss tensor # (the sum divided by the total number of elements). # It is a single-element floating point tensor. This is what the optimizer seeks to minimize. self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length # Create a summary for our cost. tf.scalar_summary("cost", self.cost) # Create a node to track the learning rate as it decays through the epochs. self.lr = tf.Variable(args.learning_rate, trainable=False) self.global_epoch_fraction = tf.Variable(0.0, trainable=False) self.global_seconds_elapsed = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables( ) # tvars is a python list of all trainable TF Variable objects. # tf.gradients returns a list of tensors of length len(tvars) where each tensor is sum(dy/dx). grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer( self.lr) # Use ADAM optimizer with the current learning rate. # Zip creates a list of tuples, where each tuple is (variable tensor, gradient tensor). # Training op nudges the variables along the gradient, with the given learning rate, using the ADAM optimizer. # This is the op that a training session should be instructed to perform. self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.summary_op = tf.merge_all_summaries()
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.scalar_summary('accuracy', accuracy) # config = tf.ConfigProto() # config.gpu_options.allocator_type = 'BFC' # Initialize all vars print("Functions ready") ## TODO: Determine if I need this or .gloval_variables_initializer() init = tf.initialize_all_variables() saver = tf.train.Saver() with tf.Session() as sess: merged = tf.merge_all_summaries(key='CAE') timestr = time.strftime("%y%m%d-%H%M%S") train_writer = tf.train.SummaryWriter( './tensorflow_logs/train_vd_' + timestr + '/', sess.graph) test_writer = tf.train.SummaryWriter( './tensorflow_logs/test_vd_' + timestr + '/', sess.graph) sess.run(init) # mean_img = np.mean(mnist.train.images, axis=0) #mean_img = np.zeros((149760)) # Fit all training data #batch_size = len(testlabels)/4 batch_size = 16 # n_epochs = 140 n_epochs = 1000
with tf.name_scope('total'): cross_entropy = -tf.reduce_sum(diff) tf.scalar_summary('cross entropy', cross_entropy) with tf.name_scope('train'): train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) tf.scalar_summary('accuracy', accuracy) summary_op = tf.merge_all_summaries() init = tf.initialize_all_variables() with tf.Session() as sess: train_writer = tf.train.SummaryWriter('log', graph=sess.graph) init.run() for i in range(10): batch = mnist.train.next_batch(50) if i % 10 == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0 })
def run_training(): '''train the Neural Network''' # sanity check assert (FLAGS.input_data_type == 'float' or FLAGS.input_data_type == 'int') assert (FLAGS.output_data_type == 'float' or FLAGS.output_data_type == 'int') # import the dataset data_sets = dataset.Datasets(FLAGS.data_dir, FLAGS.separate_file, FLAGS.input_data_type, FLAGS.output_data_type) #for hotspot training ''' data_sets = dataset.Datasets(FLAGS.data_dir, FLAGS.separate_file, FLAGS.input_data_type, FLAGS.output_data_type, FLAGS.tile_size, FLAGS.num_maps) ''' with tf.Graph().as_default(): # placeholder input_pl, golden_pl = util.generate_placeholder( data_sets.num_in_neuron, data_sets.num_out_neuron, FLAGS.batch_size, FLAGS.input_data_type, FLAGS.output_data_type) # build graph if FLAGS.hidden1 == 0: assert (FLAGS.hidden2 == 0) outputs = util.layer('output_layer', input_pl, data_sets.num_in_neuron, data_sets.num_out_neuron, None) else: hidden1 = util.layer('hidden1', input_pl, data_sets.num_in_neuron, FLAGS.hidden1, util.fast_sigmoid) if FLAGS.hidden2 == 0: outputs = util.layer('output_layer', hidden1, FLAGS.hidden1, data_sets.num_out_neuron, None) else: hidden2 = util.layer('hidden2', hidden1, FLAGS.hidden1, FLAGS.hidden2, util.fast_sigmoid) outputs = util.layer('output_layer', hidden2, FLAGS.hidden2, data_sets.num_out_neuron, None) # loss #loss = bm.loss(outputs, golden_pl) loss = util.loss(outputs, golden_pl, FLAGS.benchmark) # train #train_op = bm.training(loss, FLAGS.learning_rate) train_op = util.training(loss, FLAGS.learning_rate) # accumulated error for one batch of data error = util.error(outputs, golden_pl, FLAGS.benchmark) # summary - not necessary summary = tf.merge_all_summaries() # init init = tf.initialize_all_variables() # sess sess = tf.Session() # summary writer - not necessary summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph) # everything built, run init sess.run(init) # start training #_, max_steps = data_sets.train.max_steps(FLAGS.batch_size) for step in xrange(FLAGS.max_steps): feed_dict = util.fill_feed_dict(data_sets.train, input_pl, golden_pl, FLAGS.batch_size) sess.run(train_op, feed_dict=feed_dict) # print the loss every 100 steps # write the summary # evaluate the model if not step % 100: print('step %d: loss = %.2f' % (step, sess.run(loss, feed_dict=feed_dict))) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() ''' print('training data evaluation') util.do_eval(sess, error, input_pl, golden_pl, FLAGS.batch_size, data_sets.train) ''' print('validation data evaluation') util.do_eval(sess, error, input_pl, golden_pl, FLAGS.batch_size, data_sets.validate) # final accuracy print('test data evaluation') util.do_eval(sess, error, input_pl, golden_pl, FLAGS.batch_size, data_sets.test) # filename for saving savefile = str(data_sets.num_in_neuron) + "_" + str( FLAGS.hidden1) + "_" + str(FLAGS.hidden2) + "_" + str( data_sets.num_out_neuron) + ".txt" # save weights and biases util.save_config(sess, NUM_LAYERS, FLAGS.config_dir, savefile) # save trained output #util.save_output(sess, data_sets.train, outputs, FLAGS.data_dir) #need to fetch original input data output_save = sess.run(outputs, feed_dict={input_pl: data_sets.input_data}) np.savetxt(FLAGS.data_dir + "train_result/" + savefile, output_save, delimiter=" ")
def main(): if len(sys.argv) > 1: f = np.load(sys.argv[1]) # f.files has unordered keys ['arr_8', 'arr_9', 'arr_6'...] # Sorting keys by value of numbers initial_weights = [ f[n] for n in sorted(f.files, key=lambda s: int(s[4:])) ] else: initial_weights = None # read input data dataset, labels = read_data() train_dataset, train_labels = reformat(dataset[0], labels[0]) valid_dataset, valid_labels = reformat(dataset[1], labels[1]) test_dataset, test_labels = reformat(dataset[2], labels[2]) print('Training set', train_dataset.shape, train_labels.shape) print('Valid set', valid_dataset.shape, valid_labels.shape) print('Test set', test_dataset.shape, test_labels.shape) # Training model graph = tf.Graph() with graph.as_default(): # Variables w_conv1 = tf.Variable( tf.truncated_normal( [FLAGS.patch_size, FLAGS.patch_size, FLAGS.num_channels, 48], stddev=0.1)) b_conv1 = tf.Variable(tf.constant(0.1, shape=[48])) w_conv2 = tf.Variable( tf.truncated_normal([FLAGS.patch_size, FLAGS.patch_size, 48, 64], stddev=0.1)) b_conv2 = tf.Variable(tf.constant(0.1, shape=[64])) w_conv3 = tf.Variable( tf.truncated_normal([FLAGS.patch_size, FLAGS.patch_size, 64, 128], stddev=0.1)) b_conv3 = tf.Variable(tf.constant(0.1, shape=[128])) w_fc1 = tf.Variable( tf.truncated_normal([16 * 4 * 128, 2048], stddev=0.1)) b_fc1 = tf.Variable(tf.constant(0.1, shape=[2048])) w_fc2 = tf.Variable(tf.truncated_normal([2048, FLAGS.num_classes])) b_fc2 = tf.Variable(tf.constant(0.1, shape=[FLAGS.num_classes])) # Params params = [ w_conv1, b_conv1, w_conv2, b_conv2, w_conv3, b_conv3, w_fc1, b_fc1, w_fc2, b_fc2 ] # Initial weights if initial_weights is not None: assert len(params) == len(initial_weights) assign_ops = [w.assign(v) for w, v in zip(params, initial_weights)] # Input data tf_train_dataset = tf.placeholder( tf.float32, shape=(FLAGS.batch_size, FLAGS.image_width, FLAGS.image_height, FLAGS.num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, FLAGS.num_classes)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Training computation logits = model(tf_train_dataset, w_conv1, b_conv1, w_conv2, b_conv2, w_conv3, b_conv3, w_fc1, b_fc1, w_fc2, b_fc2) with tf.name_scope('loss'): loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits( logits, tf_train_labels)) tf.scalar_summary('loss', loss) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss) # Predictions for the training, validation, and test data train_prediction = tf.nn.softmax(logits) valid_prediction = tf.nn.softmax( model(tf_valid_dataset, w_conv1, b_conv1, w_conv2, b_conv2, w_conv3, b_conv3, w_fc1, b_fc1, w_fc2, b_fc2)) test_prediction = tf.nn.softmax( model(tf_test_dataset, w_conv1, b_conv1, w_conv2, b_conv2, w_conv3, b_conv3, w_fc1, b_fc1, w_fc2, b_fc2)) # Merge all summaries merged = tf.merge_all_summaries() train_writer = tf.train.SummaryWriter(FLAGS.train_dir + '/train') # Add ops to save and restore all the variables saver = tf.train.Saver() # Do training with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() if initial_weights is not None: session.run(assign_ops) print('initialized by pre-learned values') else: print('initialized') for step in range(FLAGS.max_steps): offset = (step * FLAGS.batch_size) % (train_labels.shape[0] - FLAGS.batch_size) batch_data = train_dataset[offset:(offset + FLAGS.batch_size), :, :, :] batch_labels = train_labels[offset:(offset + FLAGS.batch_size), :] feed_dict = { tf_train_dataset: batch_data, tf_train_labels: batch_labels } try: _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict) if step % 50 == 0: summary, _ = session.run([merged, optimizer], feed_dict=feed_dict) train_writer.add_summary(summary, step) print('Minibatch loss at step %d: %f' % (step, l)) print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels)) print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_labels)) except KeyboardInterrupt: last_weights = [p.eval() for p in params] np.savez("weights.npz", *last_weights) return last_weights print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels)) # Save the variables to disk. save_path = saver.save(session, "model.ckpt") print("Model saved in file: %s" % save_path)
def make_model(model_options): gpus = model_options['gpus'].split(',') print 'Trying to use GPUs:', gpus n_gpus = len(gpus) model_options['n_gpus'] = n_gpus model_vars_list = [] model_vars = make_input(model_options) if model_options['mode'] == 'train': model_vars = make_optimizers(model_options, model_vars) for i, gpu in enumerate(gpus): with tf.device(gpu): with tf.name_scope('GPU' + gpu[-1]): model_vars_list.append(model(model_options, model_vars)) tf.get_variable_scope().reuse_variables() with tf.device('/cpu:0'): model_vars['imp_val'] =\ tf.concat(0,[tmp['imp_val'] for tmp in model_vars_list], name='Imp_Val') model_vars['imp_val_softmax'] =\ tf.concat(0,[tmp['imp_val_softmax'] for tmp in model_vars_list]) model_vars['prediction_adascan'] =\ tf.concat(0,[tmp['prediction_adascan'] for tmp in model_vars_list], name='Prediction_Adascan') if model_options['mode'] == 'test': model_vars['minibatch_names'] =\ tf.concat(0,[tmp['minibatch_names'] for tmp in model_vars_list]) model_vars['minibatch_labels'] =\ tf.concat(0,[tmp['minibatch_labels'] for tmp in model_vars_list]) if model_options['mode'] == 'train': model_vars['ce_adascan'] = model_vars_list[-1]['ce_adascan'] model_vars['total_loss_adascan'] = model_vars_list[-1][ 'total_loss_adascan'] tf.scalar_summary('adascan/train', model_vars['ce_adascan']) tf.histogram_summary('adascan/imp_val_softmax', model_vars['imp_val_softmax']) tf.histogram_summary('adascan/imp_val', model_vars['imp_val']) model_vars['grads_vgg'] =\ average_gradients([tmp['grads_vgg'] for tmp in model_vars_list]) model_vars['grads_adascan'] =\ average_gradients([tmp['grads_adascan'] for tmp in model_vars_list]) print "Trainable Variables : Adascan" for (grad, var) in model_vars['grads_vgg'] + model_vars['grads_adascan']: if grad != None: print var.name train_step_adascan = \ model_vars['opt_adascan'].apply_gradients(grad_clip(model_vars['grads_adascan'], clip=model_options['grad_clip'])) train_step_vgg = \ model_vars['opt_vgg'].apply_gradients(grad_clip(model_vars['grads_vgg'], clip=model_options['grad_clip'])) model_vars['train_step_adascan'] = train_step_adascan model_vars['train_step_vgg'] = train_step_vgg merged_summaries = tf.merge_all_summaries() model_vars['merged_summaries'] = merged_summaries return model_options, model_vars
def qlearning(): tf.reset_default_graph() tf_sess = tf.Session() tf_output_layer, l2_reg = create_network((g.NUM_PLAYERS) * 2, NUM_HIDDEN, NUM_ACTIONS) tf_action = tf.placeholder("float", [None, NUM_ACTIONS], name='action') tf_target = tf.placeholder("float", [None], name='target') tf_q_for_action = tf.reduce_sum(tf.mul(tf_output_layer, tf_action), reduction_indices=1) with tf.name_scope('cost'): #reg_losses = [tf.nn.l2_loss(tf.get_variable('layer1/weights'))] tf_cost = tf.reduce_mean(tf.square(tf_target - tf_q_for_action)) + \ l2_reg * L2_REG #+ L2_REG * sum(reg_losses) tf.scalar_summary('cost', tf_cost) tf.scalar_summary('l2_reg', l2_reg) #tf.scalar_summary('reg_loss', sum(reg_losses)) with tf.name_scope('avg_reward'): tf_rewards = tf.placeholder("float", [None], name='rewards') tf_avg_reward = tf.reduce_mean(tf_rewards) tf.scalar_summary('avg_reward', tf_avg_reward) with tf.name_scope('train_op'): tf_train_operation = \ tf.train.AdamOptimizer(INITIAL_LEARNING_RATE).minimize(tf_cost) merged = tf.merge_all_summaries() # Give this run of the program an identifier identifier = str(time.gmtime()[0:5]) identifier = identifier.replace('(', '').replace(')', '') identifier = identifier.replace(' ', '-').replace(',', '') summarise = False if summarise: train_writer = tf.train.SummaryWriter('train-' + identifier, tf_sess.graph) tf_sess.run(tf.initialize_all_variables()) epsilon_greedy = INITIAL_EPSILON_GREEDY transitions = deque() episode_lengths = [] ep_index = 0 loss = None game = g.Game() game.set_render_or_not(False) current_state = game.reset() keep_prob = 0.5 episode_lengths = [] last_nonzero_rewards = [] t_step = 0 successful_t_steps = 0 costs = [] # Record transitions while True: # Run an episode action = compute_action(tf_sess, tf_output_layer, keep_prob, current_state, epsilon_greedy) obs, reward, terminal = game.step_environment(action) next_state = obs last_nonzero_rewards.append(reward) last_nonzero_rewards = last_nonzero_rewards[-500:] episode_lengths = episode_lengths[-200:] transitions.append({ 'state': current_state, 'next_state': next_state, 'action': action, 'reward': reward, 'terminal': terminal }) if terminal or successful_t_steps >= MAX_EPISODE_LENGTH: current_state = game.reset() episode_lengths.append(successful_t_steps) successful_t_steps = 0 else: current_state = next_state successful_t_steps += 1 if len(transitions) > MINI_BATCH_SIZE and t_step % TRAIN_EVERY == 0: summary, cost, l2_reg_cost = train( tf_sess, tf_train_operation, tf_cost, l2_reg, tf_output_layer, merged, transitions, last_nonzero_rewards[-500:], KEEP_PROB) costs.append(cost) costs = costs[-500:] if t_step % 100 == 0: if summarise: train_writer.add_summary(summary, t_step) t_step = t_step + 1 epsilon_greedy = epsilon_greedy - \ (INITIAL_EPSILON_GREEDY-FINAL_EPSILON_GREEDY) / float(EPSILON_STEPS) epsilon_greedy = max(FINAL_EPSILON_GREEDY, epsilon_greedy) avg_nonzero_reward = np.mean(last_nonzero_rewards) if (ep_index % 100) == 0: print "Average nonzero reward", avg_nonzero_reward, "Std:", np.std( last_nonzero_rewards), "Average ep length:", np.mean( episode_lengths) if len(costs) > 0: print "Average cost", np.mean( costs), "l2_reg", l2_reg_cost * L2_REG print "Playing randomly with prob", epsilon_greedy ep_index = ep_index + 1 if ep_index > 1000: if np.mean(episode_lengths) > THRESHOLD: print "Min reward over last 500 is", avg_nonzero_reward, ">", THRESHOLD, ", so finished training" return tf_sess, tf_output_layer
def build_summaries(self): tf.scalar_summary("Reward_" + self.model_name, self.episode_reward) summary_vars = [self.episode_reward] summary_ops = tf.merge_all_summaries() return summary_ops, summary_vars
def evaluate(train_dir): """ Load the model and run evaluation. Current Version runs the evaluation defined in network.evaluation and prints the output to std out. Parameters ---------- train_dir : str Path to a directory which includes a folder model_files. This folder has to include a params.py, input.py and a network.py """ target_dir = os.path.join(train_dir, "model_files") params = imp.load_source("params", os.path.join(target_dir, "params.py")) data_input = imp.load_source("input", os.path.join(target_dir, "input.py")) network = imp.load_source("network", os.path.join(target_dir, "network.py")) with tf.Graph().as_default(): # Retrieve images and labels eval_data = FLAGS.eval_data == 'test' images, labels = data_input.inputs(eval_data=eval_data, data_dir=utils.cfg.data_dir, batch_size=params.batch_size) # Generate placeholders for the images and labels. keep_prob = utils.placeholder_inputs(params.batch_size) # Build a Graph that computes predictions from the inference model. logits = network.inference(images, keep_prob) # Add to the Graph the Ops for loss calculation. loss = network.loss(logits, labels) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Add the Op to compare the logits to the labels during evaluation. eval_correct = network.evaluation(logits, labels) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Run the Op to initialize the variables. init = tf.initialize_all_variables() sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) ckpt = tf.train.get_checkpoint_state(train_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: print("No checkpoints found!") exit(1) print("Doing Evaluation with lots of data.") utils.do_eval(sess=sess, eval_correct=eval_correct, keep_prob=keep_prob, num_examples=params.num_examples_per_epoch_for_eval, params=params, name="eval")
def train(from_checkpoint=False): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False, name='global_step') # Get images and labels for CIFAR-10. _, images, oneds, labels = model.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. _, _, logits, weights_all = model.inference(images, oneds, dropout=FLAGS.architecture=='5layer_concat_dropout') # Calculate loss. loss = model.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = model.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Saver for loading fixed conv weights. loader = tf.train.Saver(weights_all) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) if from_checkpoint: ckpt = tf.train.get_checkpoint_state(train_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) global_step_cnt = global_step.eval(session=sess) print('Resuming from checkpoint: step %d' % global_step_cnt) else: print('Checkpoint not found') from_checkpoint = False if not from_checkpoint: print('Training (fine-tuning) from loader: vanilla 5 layer model') if tf.gfile.Exists(train_dir): tf.gfile.DeleteRecursively(train_dir) tf.gfile.MakeDirs(train_dir) ckpt = tf.train.get_checkpoint_state(loader_dir) if ckpt and ckpt.model_checkpoint_path: sess.run(init) loader.restore(sess, ckpt.model_checkpoint_path) else: raise Exception('Loader not found') global_step_cnt = 0 # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(train_dir, sess.graph) for step in xrange(global_step_cnt, FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def build_summaries(self): episode_reward = tf.Variable(0.) tf.scalar_summary("Reward_" + self.model_name, episode_reward) summary_vars = [episode_reward] summary_ops = tf.merge_all_summaries() return summary_ops, summary_vars
def run_training(): """Train MNIST for a number of steps.""" sess = None try: # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Build a Graph that computes predictions from the inference model. input, net_out = inference_graph() print("Graph built! continuing...") # Add to the Graph the Ops for loss calculation. loss = lossF(net_out) # Add to the Graph the Ops that calculate and apply gradients. train_op = training(loss, 0.001) print("Merging summaries continuing...") # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() print("Initialize variables...") # Add the variable initializer Op. init = tf.initialize_all_variables() print("Starting session...") # Create a session for running Ops on the Graph. sess = tf.Session() print("Creating SummaryWritter...") summary_name = datetime.now().strftime("%Y_%B_%d_%H_%M_%S") summary_name = "%s-%s" % (summary_name, socket.gethostname()) summary_dir = os.path.join("/Users/boyander/test-tf", summary_name) # Run the Op to initialize the variables. sess.run(init) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(summary_dir, sess.graph) print("Started SummaryWriter -> %s" % summary_dir) # And then after everything is built: feed_dict = { input: np.expand_dims(mpimg.imread( '/Volumes/Bahia/kitti-dataset/sequences/00/image_2/000000.png' ), axis=0) } sess.run(train_op, feed_dict=feed_dict) summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, 0) summary_writer.flush() # read validation batch except Exception as e: print("Exception on TRAIN: %s" % e) traceback.print_exc() if sess: sess.close()
def run_training(hyper_param, model): ''' Train RSVP for a number of steps. Args: hyper_param: three elements, layer & feat & model model: Returns: ''' # initialize the summary to write csv_writer_acc, csv_writer_auc = autorun_util.csv_writer( model, hyper_param['feat']) # Get the sets of images and labels for training, validation, and # test on RSVP. data_sets = rsvp_input_data.read_data_sets(EEG_DATA_MAT, FLAGS.fake_data, reshape_t=False) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_placeholder, labels_placeholder, keep_prob = placeholder_inputs( FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = autorun_infer.select_running_cnn(images_placeholder, keep_prob, layer=hyper_param['layer'], feat=hyper_param['feat'], cnn_id=model) # Add to the Graph the Ops for loss calculation. loss = rsvp_quick_cnn_model.loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = rsvp_quick_cnn_model.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct = rsvp_quick_cnn_model.evaluation(logits, labels_placeholder) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Run the Op to initialize the variables. init = tf.initialize_all_variables() sess.run(init) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) # And then after everything is built, start the training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(data_sets.train, 0.5, images_placeholder, labels_placeholder, keep_prob) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % check_step == 0: # Print status to stdout. print('Step %d: loss = %.4f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) # Save a checkpoint and evaluate the model periodically. if (step + 1) % check_step == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, FLAGS.train_dir, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, logits, images_placeholder, labels_placeholder, keep_prob, data_sets.train, csv_writer_acc, csv_writer_auc) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, logits, images_placeholder, labels_placeholder, keep_prob, data_sets.validation, csv_writer_acc, csv_writer_auc) # Evaluate against the test set. print('Test Data Eval:') do_eval(sess, eval_correct, logits, images_placeholder, labels_placeholder, keep_prob, data_sets.test, csv_writer_acc, csv_writer_auc) # turn off writer after finish if csv_writer_acc is not None: csv_writer_acc.close() if csv_writer_auc is not None: csv_writer_auc.close()
def main(): args = get_arguments() try: directories = validate_directories(args) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] logdir_root = directories['logdir_root'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from with open(args.wavenet_params, 'r') as f: wavenet_params = json.load(f) # Create coordinator. coord = tf.train.Coordinator() # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = args.silence_threshold if args.silence_threshold > \ EPSILON else None gc_enabled = args.gc_channels is not None reader = AudioReader( args.data_dir, coord, sample_rate=wavenet_params['sample_rate'], gc_enabled=gc_enabled, receptive_field=WaveNetModel.calculate_receptive_field( wavenet_params["filter_width"], wavenet_params["dilations"], wavenet_params["scalar_input"], wavenet_params["initial_filter_width"]), sample_size=args.sample_size, silence_threshold=args.silence_threshold) audio_batch = reader.dequeue(args.batch_size) if gc_enabled: gc_id_batch = reader.dequeue_gc(args.batch_size) else: gc_id_batch = None # Create network. net = WaveNetModel( batch_size=args.batch_size, dilations=wavenet_params["dilations"], filter_width=wavenet_params["filter_width"], residual_channels=wavenet_params["residual_channels"], dilation_channels=wavenet_params["dilation_channels"], skip_channels=wavenet_params["skip_channels"], quantization_channels=wavenet_params["quantization_channels"], use_biases=wavenet_params["use_biases"], scalar_input=wavenet_params["scalar_input"], initial_filter_width=wavenet_params["initial_filter_width"], histograms=args.histograms, global_condition_channels=args.gc_channels, global_condition_cardinality=reader.gc_category_cardinality) if args.l2_regularization_strength == 0: args.l2_regularization_strength = None loss = net.loss(input_batch=audio_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=args.l2_regularization_strength) optimizer = optimizer_factory[args.optimizer]( learning_rate=args.learning_rate, momentum=args.momentum) trainable = tf.trainable_variables() optim = optimizer.minimize(loss, var_list=trainable) # Set up logging for TensorBoard. writer = tf.train.SummaryWriter(logdir) writer.add_graph(tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.merge_all_summaries() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.initialize_all_variables() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.trainable_variables()) try: saved_global_step = load(saver, sess, restore_from) if is_overwritten_training or saved_global_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. saved_global_step = -1 except: print("Something went wrong while restoring checkpoint. " "We will terminate training to avoid accidentally overwriting " "the previous model.") raise threads = tf.train.start_queue_runners(sess=sess, coord=coord) reader.start_threads(sess) step = None try: last_saved_step = saved_global_step for step in range(saved_global_step + 1, args.num_steps): start_time = time.time() if args.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. print('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) summary, loss_value, _ = sess.run([summaries, loss, optim], options=run_options, run_metadata=run_metadata) writer.add_summary(summary, step) writer.add_run_metadata(run_metadata, 'step_{:04d}'.format(step)) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: summary, loss_value, _ = sess.run([summaries, loss, optim]) writer.add_summary(summary, step) duration = time.time() - start_time print('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % args.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step except KeyboardInterrupt: # Introduce a line break after ^C is displayed so save message # is on its own line. print() finally: if step > last_saved_step: save(saver, sess, logdir, step) coord.request_stop() coord.join(threads)
def train(argv=None): # load data print("Loading data ... ") x_train, y_train = dependency_load_data.load_train_data() x_test, y_test = dependency_load_data.load_test_data() # concatenate and shuffle . x_sum = numpy.concatenate((x_train, x_test)) y_sum = numpy.concatenate((y_train, y_test)) numpy.random.seed(10) shuffle_indices = numpy.random.permutation(numpy.arange(len(y_sum))) x_shuffled = x_sum[shuffle_indices] y_shuffled = y_sum[shuffle_indices] # split to train and test . # x=[N_Samples,max_document_length,EMBEDDING_SIZE] # y=[N_Samples,NUM_CLASSES] x_train = x_shuffled[Test_Size:] y_train = y_shuffled[Test_Size:] x_test = x_shuffled[:Test_Size] y_test = y_shuffled[:Test_Size] print(x_train.shape) print(x_test.shape) print("exception words : " + str(dependency_load_data.get_exception_number())) # 500 steps_each_check = 500 # input # input is sentence train_data_node = tf.placeholder(tf.float32, shape=(None, NUM_STEPS, EMBEDDING_SIZE)) train_labels_node = tf.placeholder(tf.float32, shape=(None, NUM_CLASSES)) dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") filter_sizes = [2, 3, 4, 5, 6] filter_numbers = [300, 200, 150, 100, 100] # full connected - softmax layer, fc1_weights = tf.Variable( tf.truncated_normal([sum(filter_numbers), 100], stddev=0.1, seed=SEED, dtype=tf.float32)) fc1_biases = tf.Variable(tf.constant(0.01, shape=[100], dtype=tf.float32)) fc2_weights = tf.Variable( tf.truncated_normal([100, NUM_CLASSES], stddev=0.1, seed=SEED, dtype=tf.float32)) fc2_biases = tf.Variable( tf.constant(0.01, shape=[NUM_CLASSES], dtype=tf.float32)) # model def model(x): # Current data input shape: (batch_size, n_steps, n_input) x = tf.transpose(x, [1, 0, 2]) # (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, EMBEDDING_SIZE]) # get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, NUM_STEPS, x) # B-directional LSTM fw_cell = tf.nn.rnn_cell.LSTMCell(num_hidden, forget_bias=1.0, state_is_tuple=True) fw_cell = tf.nn.rnn_cell.DropoutWrapper( fw_cell, output_keep_prob=dropout_keep_prob) bw_cell = tf.nn.rnn_cell.LSTMCell(num_hidden, forget_bias=1.0, state_is_tuple=True) bw_cell = tf.nn.rnn_cell.DropoutWrapper( bw_cell, output_keep_prob=dropout_keep_prob) if rnn_layer > 1: fw_cell = tf.nn.rnn_cell.MultiRNNCell([fw_cell] * rnn_layer) bw_cell = tf.nn.rnn_cell.MultiRNNCell([bw_cell] * rnn_layer) # output = [batch_size,num_hidden*2] # outputs of Bi-directional LSTM to highway outputs, fw_final_state, bw_final_state = tf.nn.bidirectional_rnn( fw_cell, bw_cell, x, dtype=tf.float32) # Highway # convert to [batch_size,num_steps,num_hidden*2] hw_input = tf.transpose(tf.pack(outputs, axis=0), [1, 0, 2]) # convert to [batch_size x num_steps,num_hidden*2] hw_input = tf.reshape(hw_input, [-1, num_hidden * 2]) size = hw_input.get_shape()[1] # size = num_hidden*2 # tf.tanh # hw_output=[batch_size x num_steps,num_hidden*2] hw_output = highways(hw_input, size) # convert to [batch_size,num_steps,num_hidden*2] hw_output = tf.reshape(hw_output, [-1, NUM_STEPS, num_hidden * 2]) # expand dim , cnn_input=[batch_size,num_steps,num_hidden*2,1] cnn_input = tf.expand_dims(hw_output, -1) # CNN pooled_outputs = [] for idx, filter_size in enumerate(filter_sizes): conv = conv2d(cnn_input, filter_numbers[idx], filter_size, num_hidden * 2, name="kernel%d" % idx) # 1-max pooling,leave a tensor of shape[batch_size,1,1,num_filters] pool = tf.nn.max_pool( conv, ksize=[1, max_document_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID') pooled_outputs.append(tf.squeeze(pool)) if len(filter_sizes) > 1: cnn_output = tf.concat(1, pooled_outputs) else: cnn_output = pooled_outputs[0] # add dropout cnn_output = tf.nn.dropout(cnn_output, dropout_keep_prob) # fc1 layer hidden = tf.matmul(cnn_output, fc1_weights) + fc1_biases # fc2 layer fc_output = tf.matmul(hidden, fc2_weights) + fc2_biases return fc_output # Training computation # [batch_size,num_classes] logits = model(train_data_node) # add value clip to logits loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( tf.clip_by_value(logits, 1e-10, 1.0), train_labels_node)) regularization = tf.nn.l2_loss(fc1_weights)+tf.nn.l2_loss(fc1_biases)+tf.nn.l2_loss(fc2_weights)\ + tf.nn.l2_loss(fc2_biases) loss += 0.01 * regularization tf.scalar_summary('loss', loss) # optimizer global_step = tf.Variable(0, name="global_step", trainable=False) # learning_rate=tf.train.exponential_decay(start_learning_rate,global_step,5000,0.5,staircase=True) learning_rate = tf.Variable(start_learning_rate, name="learning_rate") tf.scalar_summary('lr', learning_rate) # adamoptimizer optimizer = tf.train.AdamOptimizer(learning_rate) # optimizer = tf.train.GradientDescentOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Evaluate model train_predict = tf.argmax(logits, 1) train_label = tf.argmax(train_labels_node, 1) # train accuracy train_correct_pred = tf.equal(train_predict, train_label) train_accuracy = tf.reduce_mean(tf.cast(train_correct_pred, tf.float32)) tf.scalar_summary('acc', train_accuracy) merged = tf.merge_all_summaries() def compute_index(y_label, y_predict): # macro print("{}: acc {:g}, recall {:g}, f1 {:g} ".format( "macro", accuracy_score(y_label, y_predict), recall_score(y_label, y_predict, average='macro'), f1_score(y_label, y_predict, average='macro'))) # macro print("{}: acc {:g}, recall {:g}, f1 {:g} ".format( "micro", accuracy_score(y_label, y_predict), recall_score(y_label, y_predict, average='micro'), f1_score(y_label, y_predict, average='micro'))) # weighted print("{}: acc {:g}, recall {:g}, f1 {:g} ".format( "weighted", accuracy_score(y_label, y_predict), recall_score(y_label, y_predict, average='weighted'), f1_score(y_label, y_predict, average='weighted'))) def dev_step(x_batch, y_batch, best_test_loss, sess): feed_dict = { train_data_node: x_batch, train_labels_node: y_batch, dropout_keep_prob: 1.0 } # Run the graph and fetch some of the nodes. # test dont apply train_op (train_op is update gradient). summary, step, losses, lr, acc, y_label, y_predict = sess.run( [ merged, global_step, loss, learning_rate, train_accuracy, train_label, train_predict ], feed_dict=feed_dict) test_writer.add_summary(summary, step) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, lr {:g} ,acc {:g}".format( time_str, step, losses, lr, acc)) # print("{}: step {}, loss {:g} ,acc {:g}".format(time_str, step, losses,acc)) # compute index compute_index(y_label, y_predict) new_best_test_loss = best_test_loss # decide if need to decay learning rate if (step % steps_each_check < 100) and (step > 100): loss_delta = (best_test_loss if best_test_loss is not None else 0) - losses if best_test_loss is not None and loss_delta < decay_delta: print( 'validation loss did not improve enough, decay learning rate' ) current_learning_rate = min_learning_rate if lr * learning_rate_decay < min_learning_rate else lr * learning_rate_decay if current_learning_rate == min_learning_rate: print('It is already the smallest learning rate.') sess.run(learning_rate.assign(current_learning_rate)) print('new learning rate is: ', current_learning_rate) else: # update new_best_test_loss = losses return new_best_test_loss # run the training with tf.Session() as sess: train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/train', sess.graph) test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/test') tf.initialize_all_variables().run() print('Initialized!') # Generate batches batches = data_helpers.batch_iter(list(zip(x_train, y_train)), BATCH_SIZE, NUM_EPOCHS) # batch count batch_count = 0 best_test_loss = None # Training loop.For each batch... for batch in batches: batch_count += 1 if batch_count % EVAL_FREQUENCY == 0: print("\nEvaluation:") best_test_loss = dev_step(x_test, y_test, best_test_loss, sess) print("") else: if batch_count % META_FREQUENCY == 99: x_batch, y_batch = zip(*batch) feed_dict = { train_data_node: x_batch, train_labels_node: y_batch, dropout_keep_prob: 0.5 } # Run the graph and fetch some of the nodes. # option run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, summary, step, losses, acc = sess.run( [train_op, merged, global_step, loss, train_accuracy], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) train_writer.add_run_metadata(run_metadata, 'step%03d' % step) train_writer.add_summary(summary, step) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g},acc {:g}".format( time_str, step, losses, acc)) else: x_batch, y_batch = zip(*batch) feed_dict = { train_data_node: x_batch, train_labels_node: y_batch, dropout_keep_prob: 0.5 } # Run the graph and fetch some of the nodes. _, summary, step, losses, acc = sess.run( [train_op, merged, global_step, loss, train_accuracy], feed_dict=feed_dict) train_writer.add_summary(summary, step) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, losses, acc)) train_writer.close() test_writer.close()
def main(args): # Set verbosity to get more information from TensorFlow tf.logging.set_verbosity(tf.logging.INFO) # Create a visualizer object for Tensorboard viewing summary_writer = tf.train.SummaryWriter('tensorboard', tf.get_default_graph()) # Create tensorboard folder if not exists if not os.path.exists('tensorboard'): os.makedirs('tensorboard') # Set model parameters storage_folder = FLAGS.storage_folder learning_rate = FLAGS.learning_rate epochs = FLAGS.epochs run_unit_tests = FLAGS.run_unit_tests epochs = FLAGS.epochs batch_size = FLAGS.batch_size max_sequence_length = FLAGS.max_sequence_length rnn_size = FLAGS.rnn_size embedding_size = FLAGS.embedding_size min_word_frequency = FLAGS.min_word_frequency # Get text->spam/ham data x_data, y_data = get_data() # Clean texts x_data = [clean_text(x) for x in x_data] # Change texts into numeric vectors vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(max_sequence_length, min_frequency=min_word_frequency) text_processed = np.array(list(vocab_processor.fit_transform(x_data))) # Save vocab processor (for loading and future evaluation) vocab_processor.save(os.path.join(storage_folder, "vocab")) # Shuffle and split data text_processed = np.array(text_processed) y_data = np.array([1 if x=='ham' else 0 for x in y_data]) shuffled_ix = np.random.permutation(np.arange(len(y_data))) x_shuffled = text_processed[shuffled_ix] y_shuffled = y_data[shuffled_ix] # Split train/test set ix_cutoff = int(len(y_shuffled)*0.80) x_train, x_test = x_shuffled[:ix_cutoff], x_shuffled[ix_cutoff:] y_train, y_test = y_shuffled[:ix_cutoff], y_shuffled[ix_cutoff:] vocab_size = len(vocab_processor.vocabulary_) with tf.Graph().as_default(): sess = tf.Session() # Define placeholders x_data_ph = tf.placeholder(tf.int32, [None, max_sequence_length], name='x_data_ph') y_output_ph = tf.placeholder(tf.int32, [None], name='y_output_ph') dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob') # Define Model rnn_model_outputs = rnn_model(x_data_ph, max_sequence_length, vocab_size, embedding_size, rnn_size, dropout_keep_prob) # Prediction # Although we won't use the following operation, we declare and name # the probability outputs so that we can recall them later for evaluation rnn_prediction = tf.nn.softmax(rnn_model_outputs, name="probability_outputs") # Loss function losses = tf.nn.sparse_softmax_cross_entropy_with_logits(rnn_model_outputs, y_output_ph) # Remember that for this loss function, logits=float32, labels=int32 loss = tf.reduce_mean(losses, name="loss") # Model Accuracy Operation accuracy = tf.reduce_mean(get_accuracy(rnn_model_outputs, y_output_ph), name="accuracy") # Add scalar summaries for Tensorboard with tf.name_scope('Scalar_Summaries'): tf.scalar_summary('Loss', loss) tf.scalar_summary('Accuracy', accuracy) # Declare Optimizer/train step optimizer = tf.train.GradientDescentOptimizer(learning_rate) train_step = optimizer.minimize(loss) # Declare summary merging operation summary_op = tf.merge_all_summaries() # Create a graph/Variable saving/loading operations saver = tf.train.Saver() init = tf.initialize_all_variables() sess.run(init) # Start training for epoch in range(epochs): # Shuffle training data shuffled_ix = np.random.permutation(np.arange(len(x_train))) x_train = x_train[shuffled_ix] y_train = y_train[shuffled_ix] num_batches = int(len(x_train)/batch_size) + 1 # for i in range(num_batches): # Select train data min_ix = i * batch_size max_ix = np.min([len(x_train), ((i+1) * batch_size)]) x_train_batch = x_train[min_ix:max_ix] y_train_batch = y_train[min_ix:max_ix] # Run train step train_dict = {x_data_ph: x_train_batch, y_output_ph: y_train_batch, dropout_keep_prob:0.5} _, summary = sess.run([train_step, summary_op], feed_dict=train_dict) summary_writer = tf.train.SummaryWriter('tensorboard') summary_writer.add_summary(summary, i) # Run loss and accuracy for training temp_train_loss, temp_train_acc = sess.run([loss, accuracy], feed_dict=train_dict) test_dict = {x_data_ph: x_test, y_output_ph: y_test, dropout_keep_prob:1.0} temp_test_loss, temp_test_acc = sess.run([loss, accuracy], feed_dict=test_dict) # Print Epoch Summary print('Epoch: {}, Test Loss: {:.2}, Test Acc: {:.2}'.format(epoch+1, temp_test_loss, temp_test_acc)) # Save model every epoch saver.save(sess, os.path.join(storage_folder, "model.ckpt"))
def train_model(self, sess, max_iters): """Network training loop.""" data_layer = RoIDataLayer(self.imdb, self.bbox_means, self.bbox_stds) # a multi-process data runner data_runner = self.get_data_runner(sess, data_layer) inputs = data_runner.get_inputs() inputs['num_classes'] = self.imdb.num_classes inputs['num_predicates'] = self.imdb.num_predicates inputs['n_iter'] = cfg.TRAIN.INFERENCE_ITER self.net = get_network(self.net_name)(inputs) self.net.setup() # get network-defined losses ops = self.net.losses() # multitask loss loss_list = [ops[k] for k in ops if k.startswith('loss')] ops['loss_total'] = losses.total_loss_and_summaries( loss_list, 'total_loss') # optimizer lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) momentum = cfg.TRAIN.MOMENTUM ops['train'] = tf.train.MomentumOptimizer(lr, momentum).minimize( ops['loss_total']) ops_summary = dict(ops) #merge summaries ops_summary['summary'] = tf.merge_all_summaries() train_writer = tf.train.SummaryWriter(self.tf_log, sess.graph) self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=None) sess.run(tf.initialize_all_variables()) #data_runner.start_threads(sess, n_threads=10) data_runner.start_processes(sess, n_processes=3) # intialize variables if self.pretrained_model is not None: print('Loading pretrained model ' 'weights from {:s}').format(self.pretrained_model) if self.pretrained_model.endswith('.npy'): self.net.load(self.pretrained_model, sess, load_fc=True) elif self.pretrained_model.endswith('.ckpt'): self.saver.restore(sess, self.pretrained_model) else: print('Unsupported pretrained weights format') raise last_snapshot_iter = -1 timer = Timer() iter_timer = Timer() # Training loop for iter in range(max_iters): # learning rate iter_timer.tic() if (iter + 1) % cfg.TRAIN.STEPSIZE == 0: sess.run( tf.assign(lr, cfg.TRAIN.LEARNING_RATE * cfg.TRAIN.GAMMA)) # Make one SGD update feed_dict = data_runner.get_feed_batch() feed_dict[self.net.keep_prob] = 0.5 timer.tic() if (iter + 1) % cfg.TRAIN.SUMMARY_FREQ == 0: ops_value = sess.run(ops_summary, feed_dict=feed_dict) train_writer.add_summary(ops_value['summary'], iter) else: ops_value = sess.run(ops, feed_dict=feed_dict) timer.toc() stats = 'iter: %d / %d, lr: %f' % (iter + 1, max_iters, lr.eval()) for k in ops_value: if k.startswith('loss'): stats += ', %s: %4f' % (k, ops_value[k]) print(stats) iter_timer.toc() if (iter + 1) % (10 * cfg.TRAIN.DISPLAY_FREQ) == 0: print 'speed: {:.3f}s / iter'.format(timer.average_time) print 'iter speed: {:.3f}s / iter'.format( iter_timer.average_time) if (iter + 1) % cfg.TRAIN.SNAPSHOT_FREQ == 0: last_snapshot_iter = iter self.snapshot(sess, iter) if last_snapshot_iter != iter: self.snapshot(sess, iter)
def __init__(self, is_training, config): """ :rtype: model to train or evaluate """ self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps vocab_size = config.vocab_size self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self._input_length = tf.placeholder(tf.int32, batch_size) self._targets = tf.placeholder(tf.float32, [batch_size, config.classes]) self.embedding = tf.get_variable("embedding", [vocab_size, config.embedding_size], dtype=data_type(), trainable=False) inputs_embedded = tf.nn.embedding_lookup(self.embedding, self._input_data) initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) self._output, forward, backward = self.getRNN(config, inputs_embedded, initializer, is_training) rnn_output = tf.concat([tf.reshape(forward,[batch_size, -1]), tf.reshape(backward, [batch_size, -1])], 1) if config.get_summary: variable_summaries(self._output, config.__class__.__name__) with tf.variable_scope("logistic_regression", reuse=None, initializer=initializer): #logistic_w = tf.get_variable("_W", [config.num_units * config.num_layers * 4, config.classes], dtype=data_type()) logistic_w = tf.get_variable("_W", [rnn_output.get_shape().as_list()[1], config.classes], dtype=data_type()) logistic_b = tf.get_variable("_b", [config.classes], dtype=data_type()) self._logits = tf.add(tf.matmul(rnn_output, logistic_w), logistic_b) # Construct a linear model if config.get_summary: variable_summaries(logistic_w, "logistic_w") variable_summaries(logistic_b, "logistic_b") if config.get_summary: self._merged_summary = tf.merge_all_summaries() self._predictions = tf.nn.softmax(self._logits) # No need for loss or gradients if not training if not is_training: return # Minimize error using cross entropy self._cost = tf.reduce_mean(-tf.reduce_sum(tf.nn.softmax(self._targets)*tf.log(self._predictions), reduction_indices=1)) # Gradient Descent #self._optimizer = tf.train.GradientDescentOptimizer(config.learning_rate).minimize(self._cost) #tvars = tf.trainable_variables() #grads, _ = tf.clip_by_global_norm(tf.gradients(self._cost, tvars), 1) #self._optimizer = self._optimizer.apply_gradients(zip(grads, tvars)) optimizer = tf.train.AdamOptimizer(1e-3) gradients, variables = zip(*optimizer.compute_gradients(self._cost)) gradients, _ = tf.clip_by_global_norm(gradients, 3.0) self._optimizer = optimizer.apply_gradients(zip(gradients, variables))
prevDim = STATE_SIZE prevOut = state_input Q = DeterministicMLP("Q", STATE_SIZE, Q_NET_SIZES, DISCRETIZATION, LEARNING_RATE, GAMMA, True, 0.0, 0.0) Q_target = DeterministicMLP("Q_target", STATE_SIZE, Q_NET_SIZES, DISCRETIZATION, LEARNING_RATE, GAMMA, True, 0.0, 0.0) # training procedure y_estimate = tf.placeholder(tf.float32, [None, DISCRETIZATION], name="y_estimate") saver = tf.train.Saver() init = tf.initialize_all_variables() summary = tf.merge_all_summaries() sess = tf.Session() logger = tf.train.SummaryWriter(OUT_DIR, sess.graph) # initialize variables (and target network) sess.run(init) Ws, bs = Q.get_weights() Q_target.assign(sess, Ws, bs) # initialize environment env = gym.make(ENVIRONMENT) # initialize replay buffer R = ReplayBuffer(STATE_SIZE, ACTION_SIZE, BUFFER_SIZE)
def __init__(self, env, task, visualise, config): """ An implementation of the A3C algorithm that is reasonably well-tuned for the VNC environments. Below, we will have a modest amount of complexity due to the way TensorFlow handles data parallelism. But overall, we'll define the model, specify its inputs, and describe how the policy gradients step should be computed. """ # TODO: make A3C accept config self.env = env self.task = task worker_device = "/job:worker/task:{}/cpu:0".format(task) with tf.device(tf.train.replica_device_setter(1, worker_device=worker_device)): with tf.variable_scope("global"): self.network = I2A(config) self.global_step = tf.get_variable("global_step", [], tf.int32, initializer=tf.constant_initializer(0, dtype=tf.int32), trainable=False) with tf.device(worker_device): with tf.variable_scope("local"): self.local_network = pi = I2A(config) pi.global_step = self.global_step self.ac = tf.placeholder(tf.float32, [None, env.action_space.n], name="ac") self.adv = tf.placeholder(tf.float32, [None], name="adv") self.r = tf.placeholder(tf.float32, [None], name="r") log_prob_tf = tf.nn.log_softmax(pi.logits) prob_tf = tf.nn.softmax(pi.logits) # the "policy gradients" loss: its derivative is precisely the policy gradient # notice that self.ac is a placeholder that is provided externally. # adv will contain the advantages, as calculated in process_rollout pi_loss = - tf.reduce_sum(tf.reduce_sum(log_prob_tf * self.ac, [1]) * self.adv) # loss of value function vf_loss = 0.5 * tf.reduce_sum(tf.square(pi.vf - self.r)) entropy = - tf.reduce_sum(prob_tf * log_prob_tf) bs = tf.to_float(tf.shape(pi.x)[0]) self.loss = pi_loss + 0.5 * vf_loss - entropy * 0.01 # 20 represents the number of "local steps": the number of timesteps # we run the policy before we update the parameters. # The larger local steps is, the lower is the variance in our policy gradients estimate # on the one hand; but on the other hand, we get less frequent parameter updates, which # slows down learning. In this code, we found that making local steps be much # smaller than 20 makes the algorithm more difficult to tune and to get to work. self.runner = RunnerThread(env, pi, 20, visualise) grads = tf.gradients(self.loss, pi.var_list) if use_tf12_api: tf.summary.scalar("model/policy_loss", pi_loss / bs) tf.summary.scalar("model/value_loss", vf_loss / bs) tf.summary.scalar("model/entropy", entropy / bs) tf.summary.image("model/state", pi.x) tf.summary.scalar("model/grad_global_norm", tf.global_norm(grads)) tf.summary.scalar("model/var_global_norm", tf.global_norm(pi.var_list)) self.summary_op = tf.summary.merge_all() else: tf.scalar_summary("model/policy_loss", pi_loss / bs) tf.scalar_summary("model/value_loss", vf_loss / bs) tf.scalar_summary("model/entropy", entropy / bs) tf.image_summary("model/state", pi.x) tf.scalar_summary("model/grad_global_norm", tf.global_norm(grads)) tf.scalar_summary("model/var_global_norm", tf.global_norm(pi.var_list)) self.summary_op = tf.merge_all_summaries() grads, _ = tf.clip_by_global_norm(grads, 40.0) # copy weights from the parameter server to the local model self.sync = tf.group(*[v1.assign(v2) for v1, v2 in zip(pi.var_list, self.network.var_list)]) grads_and_vars = list(zip(grads, self.network.var_list)) inc_step = self.global_step.assign_add(tf.shape(pi.x)[0]) # each worker has a different set of adam optimizer parameters opt = tf.train.AdamOptimizer(1e-4) self.train_op = tf.group(opt.apply_gradients(grads_and_vars), inc_step) self.summary_writer = None self.local_steps = 0
log("Couldn't restore the session properly, falling back to default initialization." ) log('##############################################################################' ) sess.run(tf.initialize_all_variables()) else: log("Data processing OK, creating network...") sess.run(tf.initialize_all_variables()) # Summaries for loss and accuracy loss_summary = tf.scalar_summary("Training loss", cross_entropy) valid_loss_summary = tf.scalar_summary("Validation loss", valid_mean_loss) valid_accuracy_summary = tf.scalar_summary("Validation accuracy", valid_mean_accuracy) summary_writer = tf.train.SummaryWriter(SUMMARY_DIR, sess.graph) tf.merge_all_summaries() log("=======================================================") # Training if FLAGS.train: log("Starting training...") # Batches batches = data_helpers.batch_iter(zip(x_train, y_train), FLAGS.batch_size, FLAGS.epochs) test_batches = list( data_helpers.batch_iter(zip(x_test, y_test), FLAGS.batch_size, 1)) my_batch = batches.next() # To use with human_readable_output() # Pretty-printing variables global_step = 0
def main(argv=None): # pylint: disable=unused-argument if FLAGS.model_name: subdir = FLAGS.model_name preload_model = True else: subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') preload_model = False log_dir = os.path.join(os.path.expanduser(FLAGS.logs_base_dir), subdir) if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist os.mkdir(log_dir) model_dir = os.path.join(os.path.expanduser(FLAGS.models_base_dir), subdir) if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.mkdir(model_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) store_training_info(src_path, log_dir, ' '.join(argv)) np.random.seed(seed=FLAGS.seed) dataset = facenet.get_dataset(FLAGS.data_dir) train_set, validation_set = facenet.split_dataset(dataset, FLAGS.train_set_fraction, FLAGS.split_mode) print('Model directory: %s' % model_dir) with tf.Graph().as_default(): tf.set_random_seed(FLAGS.seed) global_step = tf.Variable(0, trainable=False) # Placeholder for input images images_placeholder = tf.placeholder(tf.float32, shape=(None, FLAGS.image_size, FLAGS.image_size, 3), name='input') # Placeholder for phase_train phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') # Build the inference graph embeddings = network.inference(images_placeholder, FLAGS.pool_type, FLAGS.use_lrn, FLAGS.keep_probability, phase_train=phase_train_placeholder) # Split example embeddings into anchor, positive and negative anchor, positive, negative = tf.split(0, 3, embeddings) # Calculate triplet loss loss = facenet.triplet_loss(anchor, positive, negative, FLAGS.alpha) # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op, _ = facenet.train(loss, global_step, FLAGS.optimizer, FLAGS.learning_rate, FLAGS.moving_average_decay) # Create a saver saver = tf.train.Saver(tf.all_variables(), max_to_keep=0) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)) sess.run(init) summary_writer = tf.train.SummaryWriter(log_dir, sess.graph) with sess.as_default(): if preload_model: ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise ValueError('Checkpoint not found') # Training and validation loop for epoch in range(FLAGS.max_nrof_epochs): # Train for one epoch step = train(sess, train_set, epoch, images_placeholder, phase_train_placeholder, global_step, embeddings, loss, train_op, summary_op, summary_writer) # Store the state of the random number generator rng_state = np.random.get_state() # Test on validation set np.random.seed(seed=FLAGS.seed) validate(sess, validation_set, epoch, images_placeholder, phase_train_placeholder, global_step, embeddings, loss, 'validation', summary_writer) # Test on training set np.random.seed(seed=FLAGS.seed) validate(sess, train_set, epoch, images_placeholder, phase_train_placeholder, global_step, embeddings, loss, 'training', summary_writer) # Restore state of the random number generator np.random.set_state(rng_state) if (epoch % FLAGS.checkpoint_period == 0) or (epoch==FLAGS.max_nrof_epochs-1): # Save the model checkpoint print('Saving checkpoint') checkpoint_path = os.path.join(model_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
# expanded_mean = tf.expand_dims( tf.reduce_mean(x,0), 0 ) # expanded_mean = tf.pad( expanded_mean, [[0,n-1], [0,0], [0,0], [0,0],[0,0]]) # mean_diff = expanded_mean - tf.reshape(x, shape=[-1, 56, 64, 48, CHANNELS],) # cost_to_mean = tf.reduce_sum(tf.square(self_diff)) # tf.scalar_summary('Cost_to_Mean', cost_to_mean, collections=['CAE']) # Initialize all vars print("Functions ready") ## TODO: Determine if I need this or .gloval_variables_initializer() init = tf.initialize_all_variables() saver = tf.train.Saver() with tf.Session() as sess: merged = tf.merge_all_summaries(key='CAE') img_merged = tf.merge_all_summaries(key='CAE_img') timestr = time.strftime("%y%m%d-%H%M%S") train_writer = tf.train.SummaryWriter( './tensorflow_logs/train_32-256_mch_elu_' + timestr + '/', sess.graph) test_writer = tf.train.SummaryWriter( './tensorflow_logs/test_32-256_mch_elu_' + timestr + '/', sess.graph) init = tf.initialize_all_variables() sess.run(init) # batch_size = 16 batch_size = len(testlabels) n_epochs = 7000 epoch_i = 0 if LOAD_FEATURES:
def main(argv=None): print("Setting up image reader...") train_images, valid_images, test_images = flowers.read_dataset( FLAGS.data_dir) # image_options = {"crop": True, "crop_size": MODEL_IMAGE_SIZE, "resize": True, "resize_size": IMAGE_SIZE} # dataset_reader = dataset.BatchDatset(train_images, image_options) # images = tf.placeholder(tf.float32, [FLAGS.batch_size, IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS]) filename_queue = tf.train.string_input_producer(train_images) images = read_input_queue(filename_queue) train_phase = tf.placeholder(tf.bool) z_vec = tf.placeholder(tf.float32, [None, FLAGS.z_dim], name="z") print("Setting up network model...") tf.histogram_summary("z", z_vec) tf.image_summary("image_real", images, max_images=1) gen_images = generator(z_vec, train_phase) tf.image_summary("image_generated", gen_images, max_images=3) with tf.variable_scope("discriminator") as scope: discriminator_real_prob, logits_real, feature_real = discriminator( images, train_phase) utils.add_activation_summary( tf.identity(discriminator_real_prob, name='disc_real_prob')) scope.reuse_variables() discriminator_fake_prob, logits_fake, feature_fake = discriminator( gen_images, train_phase) utils.add_activation_summary( tf.identity(discriminator_fake_prob, name='disc_fake_prob')) discriminator_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits_real, tf.ones_like(logits_real))) discrimintator_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits_fake, tf.zeros_like(logits_fake))) discriminator_loss = discrimintator_loss_fake + discriminator_loss_real gen_loss_1 = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits_fake, tf.ones_like(logits_fake))) gen_loss_2 = tf.reduce_mean( tf.nn.l2_loss(feature_real - feature_fake)) / (IMAGE_SIZE * IMAGE_SIZE) gen_loss = gen_loss_1 + 0.1 * gen_loss_2 tf.scalar_summary("Discriminator_loss_real", discriminator_loss_real) tf.scalar_summary("Discrimintator_loss_fake", discrimintator_loss_fake) tf.scalar_summary("Discriminator_loss", discriminator_loss) tf.scalar_summary("Generator_loss", gen_loss) train_variables = tf.trainable_variables() generator_variables = [ v for v in train_variables if v.name.startswith("generator") ] # print(map(lambda x: x.op.name, generator_variables)) discriminator_variables = [ v for v in train_variables if v.name.startswith("discriminator") ] # print(map(lambda x: x.op.name, discriminator_variables)) generator_train_op = train(gen_loss, generator_variables) discriminator_train_op = train(discriminator_loss, discriminator_variables) for v in train_variables: utils.add_to_regularization_and_summary(var=v) def visualize(): count = 10 # z_feed = 10.0 * np.random.randn(count, FLAGS.z_dim) z_feed = np.tile( np.random.uniform(-1.0, 1.0, size=(1, FLAGS.z_dim)).astype(np.float32), (count, 1)) z_feed[:, 75] = sorted(10.0 * np.random.randn(count)) image = sess.run(gen_images, feed_dict={ z_vec: z_feed, train_phase: False }) for iii in xrange(count): print(image.shape) utils.save_image(image[iii, :, :, :], IMAGE_SIZE, FLAGS.logs_dir, name=str(iii)) print("Saving image" + str(iii)) sess = tf.Session() summary_op = tf.merge_all_summaries() saver = tf.train.Saver() summary_writer = tf.train.SummaryWriter(FLAGS.logs_dir, sess.graph) sess.run(tf.initialize_all_variables()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") visualize() return coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) try: for itr in xrange(MAX_ITERATIONS): batch_z = np.random.uniform(-1.0, 1.0, size=[FLAGS.batch_size, FLAGS.z_dim]).astype(np.float32) # feed_dict = {images: dataset_reader.next_batch(FLAGS.batch_size), z_vec: batch_z, train_phase: True} feed_dict = {z_vec: batch_z, train_phase: True} sess.run(discriminator_train_op, feed_dict=feed_dict) sess.run(generator_train_op, feed_dict=feed_dict) sess.run(generator_train_op, feed_dict=feed_dict) if itr % 10 == 0: g_loss_val, d_loss_val, summary_str = sess.run( [gen_loss, discriminator_loss, summary_op], feed_dict=feed_dict) print("Step: %d, generator loss: %g, discriminator_loss: %g" % (itr, g_loss_val, d_loss_val)) summary_writer.add_summary(summary_str, itr) if itr % 500 == 0: saver.save(sess, FLAGS.logs_dir + "model.ckpt", global_step=itr) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') except KeyboardInterrupt: print("Ending Training...") finally: coord.request_stop() # Wait for threads to finish. coord.join(threads)
def create_graph(self): # Pick action given state -> action = argmax( qnet(state) ) with tf.name_scope("pick_action"): self.state = tf.placeholder(tf.float32, (None,)+self.state_size , name="state") self.q_values = tf.identity(self.qnet(self.state) , name="q_values") self.predicted_actions = tf.argmax(self.q_values, dimension=1 , name="predicted_actions") tf.histogram_summary("Q values", tf.reduce_mean(tf.reduce_max(self.q_values, 1))) # save max q-values to track learning # Predict target future reward: r + gamma * max_a'[ Q'(s') ] with tf.name_scope("estimating_future_rewards"): # DQN vs DoubleDQN (DDQN) # In DQN the target is y_i^DQN = r + gamma * max_a' Q_target(next_state, a') # In DoubleDQN it's changed to y_i^DDQN = r + gamma * Q_target(next_state, argmax_a' Q(next_state, a') ) # In practice, we use the actual QNet (non target) to select the action for the next state, but then use its Q value estimated using the target network self.next_state = tf.placeholder(tf.float32, (None,)+self.state_size , name="next_state") self.next_state_mask = tf.placeholder(tf.float32, (None,) , name="next_state_mask") # 0 for terminal states self.rewards = tf.placeholder(tf.float32, (None,) , name="rewards") self.next_q_values_targetqnet = tf.stop_gradient(self.target_qnet(self.next_state), name="next_q_values_targetqnet") if self.DoubleDQN: # DoubleDQN print "Double DQN" self.next_q_values_qnet = tf.stop_gradient(self.qnet(self.next_state), name="next_q_values_qnet") self.next_selected_actions = tf.argmax(self.next_q_values_qnet, dimension=1) self.next_selected_actions_onehot = tf.one_hot(indices=self.next_selected_actions, depth=self.action_size) self.next_max_q_values = tf.stop_gradient( tf.reduce_sum( tf.mul( self.next_q_values_targetqnet, self.next_selected_actions_onehot ) , reduction_indices=[1,] ) * self.next_state_mask ) else: # DQN print "Regular DQN" self.next_max_q_values = tf.reduce_max(self.next_q_values_targetqnet, reduction_indices=[1,]) * self.next_state_mask self.target_q_values = self.rewards + self.discount_factor*self.next_max_q_values # Gradient descent with tf.name_scope("optimization_step"): self.action_mask = tf.placeholder(tf.float32, (None, self.action_size) , name="action_mask") #action that was selected self.y = tf.reduce_sum( self.q_values * self.action_mask , reduction_indices=[1,]) ## ERROR CLIPPING AS IN NATURE'S PAPER self.error = tf.abs(self.y - self.target_q_values) quadratic_part = tf.clip_by_value(self.error, 0.0, 1.0) linear_part = self.error - quadratic_part self.loss = tf.reduce_mean( 0.5*tf.square(quadratic_part) + linear_part ) qnet_gradients = self.qnet_optimizer.compute_gradients(self.loss, self.qnet.variables()) for i, (grad, var) in enumerate(qnet_gradients): if grad is not None: qnet_gradients[i] = (tf.clip_by_norm(grad, 10), var) self.qnet_optimize = self.qnet_optimizer.apply_gradients(qnet_gradients) with tf.name_scope("target_network_update"): self.hard_copy_to_target = DQN.copy_to_target_network(self.qnet, self.target_qnet) self.summarize = tf.merge_all_summaries()