def train(self, data=0, steps=-1, dropout=None, display_step=10, test_step=200, batch_size=10, do_resume=False): # epochs=-1, if data: self.data = data steps = 9999999 if steps == -1 else steps session = self.session # with tf.device(_cpu): # import tensorflow.contrib.layers as layers # t = tf.verify_tensor_all_finite(t, msg) tf.add_check_numerics_ops() try: self.summaries = tf.summary.merge_all() except: self.summaries = tf.merge_all_summaries() try: self.summary_writer = tf.summary.FileWriter(current_logdir(), session.graph) # except: self.summary_writer = tf.train.SummaryWriter(current_logdir(), session.graph) # if not dropout: dropout = 1. # keep all x = self.x y = self.y keep_prob = self.keep_prob try: saver = tf.train.Saver(tf.global_variables()) except: saver = tf.train.Saver(tf.all_variables()) snapshot = self.name + str(get_last_tensorboard_run_nr()) checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if do_resume and checkpoint: print("LOADING " + checkpoint + " !!!") saver.restore(session, checkpoint) try: session.run([tf.global_variables_initializer()]) except: session.run([tf.initialize_all_variables()]) step = 0 # show first while step < steps: batch_xs, batch_ys = self.next_batch(batch_size, session) # print("step %d \r" % step)# end=' ') # tf.train.shuffle_batch_join(example_list, batch_size, capacity=min_queue_size + batch_size * 16, min_queue_size) # Fit training using batch data feed_dict = {x: batch_xs, y: batch_ys, keep_prob: dropout, self.train_phase: True} loss, _ = session.run([self.cost, self.optimizer], feed_dict=feed_dict) if step % display_step == 0: seconds = int(time.time()) - start # Calculate batch accuracy, loss feed = {x: batch_xs, y: batch_ys, keep_prob: 1., self.train_phase: False} acc, summary = session.run([self.accuracy, self.summaries], feed_dict=feed) # self.summary_writer.add_summary(summary, step) # only test summaries for smoother curve print("\rStep {:d} Loss= {:.6f} Accuracy= {:.3f} Time= {:d}s".format(step, loss, acc, seconds), end=' ') if str(loss) == "nan": return print("\nLoss gradiant explosion, exiting!!!") # restore! if step % test_step == 0: self.test(step) if step % save_step == 0 and step > 0: print("SAVING snapshot %s" % snapshot) saver.save(session, checkpoint_dir + snapshot + ".ckpt", self.global_step) step += 1 print("\nOptimization Finished!") self.test(step, number=10000) # final test
def __init_output(self): with tf.variable_scope('output'): # Losses self.regularization_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) self.cross_entropy_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.y, name='loss')) self.loss = self.regularization_loss + self.cross_entropy_loss # Optimizer update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.optimizer = tf.train.AdamOptimizer(learning_rate=self.args.learning_rate) self.train_op = self.optimizer.minimize(self.loss) # This is for debugging NaNs. Check TensorFlow documentation. self.check_op = tf.add_check_numerics_ops() # Output and Metrics self.y_out_softmax = tf.nn.softmax(self.logits)# softmax 归一化分类 self.y_out_argmax = tf.argmax(self.y_out_softmax, axis=-1, output_type=tf.int32)# 最大值得到分类结果 self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.y, self.y_out_argmax), tf.float32))#准确度 # 记录参数 with tf.name_scope('train-summary-per-iteration'): tf.summary.scalar('loss', self.loss) tf.summary.scalar('acc', self.accuracy) self.summaries_merged = tf.summary.merge_all()
def build_computation_graphs(self): self.model.declare_params(self.param_init_function) self.tf_nodes = {} to_build = {k:v for k, v in self.model_hypers_to_build_graph.iteritems() if k in self.data.get_hypers_names()} for model_hypers, build_graph in to_build.iteritems(): print ("Construct forward graph... ", end="") forward_time_start = time.time() inputs, outputs = build_graph(self.model) loss, display_loss, output_placeholders, mask_placeholders, loss_nodes = \ self.construct_loss(outputs) print ("done in %.2fs." % (time.time() - forward_time_start)) optimizer = self.make_optimizer() gradient_time_start = time.time() print ("Construct gradient graph... ", end="") grads_and_vars = self.compute_gradients(optimizer, loss) print ("done in %.2fs." % (time.time() - gradient_time_start)) gradient_apply_time_start = time.time() print ("Construct apply gradient graph... ", end="") train_op = self.apply_update(optimizer, grads_and_vars) print ("done in %.2fs." % (time.time() - gradient_apply_time_start)) if self.do_debug: check_time_start = time.time() print ("Construct check numerics graph... ", end="") self.check_ops.append(tf.add_check_numerics_ops()) print ("done in %.2fs." % (time.time() - check_time_start)) if self.make_log: self.summary_nodes["train"] = tf.scalar_summary('train_loss', display_loss) self.summary_nodes["validate"] = tf.scalar_summary('validate_loss', display_loss) self.summary_nodes["params"] = [] for p_name, p_node in self.model.params.iteritems(): n_elements = p_node.get_shape()[0].value for i in range(n_elements): self.summary_nodes["params"].append( tf.scalar_summary('%s/%i' % (p_name, i), p_node[i])) placeholders = {} placeholders.update(inputs) placeholders.update(output_placeholders) placeholders.update(mask_placeholders) self.tf_nodes[model_hypers] = { "inputs": inputs, "outputs": outputs, "placeholders": placeholders, "loss_nodes": loss_nodes, "loss": loss, "display_loss": display_loss, "grads_and_vars": grads_and_vars, "train_op": train_op }
def testBoth(self): with self.test_session(graph=tf.Graph()): t1 = tf.constant([1.0, 0.0]) t2 = tf.constant([0.0, 0.0]) a = tf.div(t1, t2) check = tf.add_check_numerics_ops() a = control_flow_ops.with_dependencies([check], a) with self.assertRaisesOpError("Inf and NaN"): a.eval()
def testNaN(self): for use_gpu in [True, False]: with self.test_session(use_gpu=use_gpu, graph=tf.Graph()): t1 = tf.constant(0.0) t2 = tf.constant(0.0) a = tf.div(t1, t2) check = tf.add_check_numerics_ops() a = control_flow_ops.with_dependencies([check], a) with self.assertRaisesOpError("NaN"): a.eval()
def create_model(sess, dataset, forward_only): start_time = time.time() #initializer = tf.random_normal_initializer(0.0, 0.1) initializer = tf.random_uniform_initializer(-0.1, 0.1) with tf.variable_scope("model", initializer=initializer): model = LASModel( dataset, FLAGS.batch_size, FLAGS.features_width, FLAGS.features_len_max, FLAGS.vocab_size, FLAGS.embedding_size, FLAGS.tokens_len_max, FLAGS.encoder_cell_size, FLAGS.decoder_cell_size, FLAGS.attention_embedding_size, FLAGS.max_gradient_norm, FLAGS.learning_rate) tf.add_check_numerics_ops() sess.run(tf.initialize_all_variables()) tf.train.start_queue_runners(sess=sess) print('create_model graph time %f' % (time.time() - start_time)) return model
def train(args): fnames = glob.glob('../mp3/*.mp3')[:1] traces = [util.loadf(fname) for fname in fnames] traces = np.hstack(traces) dirname = 'save-vrnn' if not os.path.exists(dirname): os.makedirs(dirname) with open(os.path.join(dirname, 'config.pkl'), 'w') as f: cPickle.dump(args, f) model = VRNN(args) # load previously trained model if applicable ckpt = tf.train.get_checkpoint_state(dirname) if ckpt: model.load_model(dirname) with tf.Session() as sess: summary_writer = tf.train.SummaryWriter('logs/'+datetime.now().isoformat().replace(':','-'), sess.graph) check = tf.add_check_numerics_ops() merged = tf.merge_all_summaries() tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) start = time.time() for e in xrange(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) state = model.initial_state for b in xrange(100): #t0 = np.random.randn(args.batch_size,1,(args.chunk_samples)) #x = np.sin(2*np.pi*(np.arange(args.seq_length)[np.newaxis,:,np.newaxis]/30.+t0)) + np.random.randn(args.batch_size,args.seq_length,(args.chunk_samples))*0.1 #y = np.sin(2*np.pi*(np.arange(1,args.seq_length+1)[np.newaxis,:,np.newaxis]/30.+t0)) + np.random.randn(args.batch_size,args.seq_length,(args.chunk_samples))*0.1 if (e * 100 + b)%int(traces.shape[0]/(args.chunk_samples*args.batch_size)) == 0: data, _, _ = util.load_augment_data(traces,args.chunk_samples) print "Refreshed data" #x,y = next_batch(data,args) slopes = 10*np.random.random((1,1,2*args.chunk_samples))+1 x,y = (slopes*np.arange(args.seq_length)[np.newaxis,:,np.newaxis])-1,(slopes*np.arange(args.seq_length)[np.newaxis,:,np.newaxis]) y[:,:,args.chunk_samples:] = 0. x[:,:,args.chunk_samples:] = 0. feed = {model.input_data: x, model.target_data: y} train_loss, _, cr, summary, sigma = sess.run([model.cost, model.train_op, check, merged, model.sigma], feed) #train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) summary_writer.add_summary(summary, e * 100 + b) if (e * 100 + b) % args.save_every == 0 and ((e * 100 + b) > 0): checkpoint_path = os.path.join('save', 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * 100 + b) print "model saved to {}".format(checkpoint_path) end = time.time() print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}/{:.3f}" \ .format(e * 100 + b, args.num_epochs * 100, e, args.chunk_samples*train_loss, end - start, (sigma[:,200:]).mean(axis=0).mean(axis=0),(sigma[:,:200]).mean(axis=0).mean(axis=0)) start = time.time()
def train(args): fnames = glob.glob('../mp3/*01*.mp3')[:1] traces = [util.loadf(fname) for fname in fnames] with open(os.path.join('save', 'config.pkl'), 'w') as f: cPickle.dump(args, f) model = Model(args) with tf.Session() as sess: summary_writer = tf.train.SummaryWriter('logs/'+datetime.now().isoformat().replace(':','-'), sess.graph) check = tf.add_check_numerics_ops() merged = tf.merge_all_summaries() tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) start = time.time() for e in xrange(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) state = model.initial_state.eval() for b in xrange(100): #t0 = np.random.randn(args.batch_size,1,(args.chunk_samples)) #x = np.sin(2*np.pi*(np.arange(args.seq_length)[np.newaxis,:,np.newaxis]/30.+t0)) + np.random.randn(args.batch_size,args.seq_length,(args.chunk_samples))*0.1 #y = np.sin(2*np.pi*(np.arange(1,args.seq_length+1)[np.newaxis,:,np.newaxis]/30.+t0)) + np.random.randn(args.batch_size,args.seq_length,(args.chunk_samples))*0.1 if b%25 == 0: data, _, _ = util.load_augment_data(traces[0],args.chunk_samples) x,y = next_batch(data,args) feed = {model.input_data: x, model.target_data: y, model.initial_state: state} train_loss, state, _, cr, summary, sigma = sess.run([model.cost, model.final_state, model.train_op, check, merged, model.sigma], feed) #train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) summary_writer.add_summary(summary, e * 100 + b) if (e * 100 + b) % args.save_every == 0 and ((e * 100 + b) > 0): checkpoint_path = os.path.join('save', 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * 100 + b) print "model saved to {}".format(checkpoint_path) end = time.time() print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}, std = {}" \ .format(e * 100 + b, args.num_epochs * 100, e, train_loss, end - start, np.sqrt(sigma).mean(axis=0).mean(axis=0)) start = time.time() x,y = next_val_batch(data,args) feed = {model.input_data: x, model.target_data: y, model.initial_state: state} test_loss, state = sess.run([model.cost, model.final_state], feed) end = time.time() print ">> {}/{} (epoch {}), test_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * 100 + b, args.num_epochs * 100, e, test_loss, end - start) start = time.time()
def __init__(self, batch_size=1, z_dim=8, net_size = 384, learning_rate = 0.01, keep_prob = 1.0, loss_mode = 1, chunk_samples = 1024): """ Args: sess: TensorFlow session batch_size: The size of batch. Should be specified before training. z_dim: (optional) Dimension of dim for Z. [20] net_size: number of nodes in each hidden layer keep_prob: dropout keep probability loss_mode: 1 -> "L2" or 2 -> "Bournoulli" """ self.learning_rate = learning_rate self.batch_size = batch_size self.z_dim = z_dim self.net_size = net_size self.keep_prob = keep_prob self.loss_mode = loss_mode self.chunk_samples = chunk_samples self.x_dim = self.chunk_samples self.n_points = self.x_dim # tf Graph batch of image (batch_size, height, width, depth) self.x_raw = tf.placeholder(tf.float32, [batch_size, self.chunk_samples]) self.lamb = tf.placeholder(tf.float32, []) # distort raw data (decided in the end to leave this task to DataLoader class) self.x = self.x_raw # Create autoencoder network self._create_network() # Define loss function based variational upper-bound and # corresponding optimizer self._create_loss_optimizer() self.check = tf.add_check_numerics_ops() # Initializing the tensor flow variables init = tf.initialize_all_variables() # Launch the session self.sess = tf.InteractiveSession() self.sess.run(init) self.saver = tf.train.Saver(tf.all_variables())
def optimize_elbo(node, steps=200, adam_rate=0.1, debug=False, return_session=False): """ Convenience function to optimize an ELBO and return the breakdown of the final bound as well as the estimated posterior. """ elbo, sample_stochastic, decompose_elbo, inspect_posterior = construct_elbo(node) try: train_step = tf.train.AdamOptimizer(adam_rate).minimize(-elbo) except ValueError as e: print e steps = 0 init = tf.initialize_all_variables() if debug: debug_ops = tf.add_check_numerics_ops() sess = tf.Session() sess.run(init) for i in range(steps): fd = sample_stochastic() if debug: sess.run(debug_ops, feed_dict = fd) sess.run(train_step, feed_dict = fd) elbo_val = sess.run((elbo), feed_dict=fd) print i, elbo_val fd = sample_stochastic() elbo_terms = decompose_elbo(sess, fd) posterior = inspect_posterior(sess, fd) if return_session: return elbo_terms, posterior, sess, fd else: sess.close() return elbo_terms, posterior
def train(args, model): dirname = 'save-vrnn' if not os.path.exists(dirname): os.makedirs(dirname) with open(os.path.join(dirname, 'config.pkl'), 'w') as f: cPickle.dump(args, f) ckpt = tf.train.get_checkpoint_state(dirname) n_batches = 100 with tf.Session() as sess: summary_writer = tf.summary.FileWriter('logs/' + datetime.now().isoformat().replace(':', '-'), sess.graph) check = tf.add_check_numerics_ops() merged = tf.summary.merge_all() tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) if ckpt: saver.restore(sess, ckpt.model_checkpoint_path) print "Loaded model" start = time.time() for e in xrange(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) state = model.initial_state_c, model.initial_state_h for b in xrange(n_batches): x, y = next_batch(args) feed = {model.input_data: x, model.target_data: y} train_loss, _, cr, summary, sigma, mu, input, target= sess.run( [model.cost, model.train_op, check, merged, model.sigma, model.mu, model.flat_input, model.target], feed) summary_writer.add_summary(summary, e * n_batches + b) if (e * n_batches + b) % args.save_every == 0 and ((e * n_batches + b) > 0): checkpoint_path = os.path.join(dirname, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * n_batches + b) print "model saved to {}".format(checkpoint_path) end = time.time() print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}" \ .format(e * n_batches + b, args.num_epochs * n_batches, e, args.chunk_samples * train_loss, end - start, sigma.mean(axis=0).mean(axis=0)) start = time.time()
def train(self, adam_rate=0.1, steps=10000, print_interval=50, logdir=None, display_dict=None, sess=None): if display_dict is None or len(display_dict)==0: print_names = [] print_vars = [] else: print_names, print_vars = zip(*display_dict.items()) print_names = ["elbo",] + list(print_names) print_vars = [self.elbo,] + list(print_vars) debug = tf.add_check_numerics_ops() train_step = tf.train.AdamOptimizer(adam_rate).minimize(-self.elbo) init = tf.initialize_all_variables() if sess is None: sess = tf.Session() if logdir is not None: merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter(logdir, sess.graph_def) sess.run(init) for i in range(steps): fd = self.sample_stochastic_inputs() if i % print_interval == 0: print_vals = sess.run(print_vars, feed_dict=fd) print_str = " ".join(["%s %.4f" % (n, v) for (n, v) in zip(print_names, print_vals)]) print ("step %d " % i) + print_str if logdir is not None: summary_str = sess.run(merged, feed_dict=fd) writer.add_summary(summary_str, i) sess.run(debug, feed_dict=fd) sess.run(train_step, feed_dict = fd)
def build_model(self): """Defines the GP model. The loss is computed for partial feedback settings (bandits), so only the observed outcome is backpropagated (see weighted loss). Selects the optimizer and, finally, it also initializes the graph. """ logging.info("Initializing model %s.", self.name) self.global_step = tf.train.get_or_create_global_step() # Define state for the model (inputs, etc.) self.x_train = tf.get_variable( "training_data", initializer=tf.ones( [self.hparams.batch_size, self.n_in], dtype=tf.float64), validate_shape=False, trainable=False) self.y_train = tf.get_variable( "training_labels", initializer=tf.zeros([self.hparams.batch_size, 1], dtype=tf.float64), validate_shape=False, trainable=False) self.weights_train = tf.get_variable( "weights_train", initializer=tf.ones( [self.hparams.batch_size, self.n_out], dtype=tf.float64), validate_shape=False, trainable=False) self.input_op = tf.assign(self.x_train, self.x_in, validate_shape=False) self.input_w_op = tf.assign( self.weights_train, self.weights, validate_shape=False) self.input_std = tf.get_variable( "data_standard_deviation", initializer=tf.ones([1, self.n_out], dtype=tf.float64), dtype=tf.float64, trainable=False) self.input_mean = tf.get_variable( "data_mean", initializer=tf.zeros([1, self.n_out], dtype=tf.float64), dtype=tf.float64, trainable=True) # GP Hyperparameters self.noise = tf.get_variable( "noise", initializer=tf.cast(0.0, dtype=tf.float64)) self.amplitude = tf.get_variable( "amplitude", initializer=tf.cast(1.0, dtype=tf.float64)) self.amplitude_linear = tf.get_variable( "linear_amplitude", initializer=tf.cast(1.0, dtype=tf.float64)) self.length_scales = tf.get_variable( "length_scales", initializer=tf.zeros([1, self.n_in], dtype=tf.float64)) self.length_scales_lin = tf.get_variable( "length_scales_linear", initializer=tf.zeros([1, self.n_in], dtype=tf.float64)) # Latent embeddings of the different outputs for task covariance self.task_vectors = tf.get_variable( "latent_task_vectors", initializer=tf.random_normal( [self.n_out, self.task_latent_dim], dtype=tf.float64)) # Normalize outputs across each dimension # Since we have different numbers of observations across each task, we # normalize by their respective counts. index_counts = self.atleast_2d(tf.reduce_sum(self.weights, axis=0), self.n_out) index_counts = tf.where(index_counts > 0, index_counts, tf.ones(tf.shape(index_counts), dtype=tf.float64)) self.mean_op = tf.assign(self.input_mean, tf.reduce_sum(self.y, axis=0) / index_counts) self.var_op = tf.assign( self.input_std, tf.sqrt(1e-4 + tf.reduce_sum(tf.square( self.y - tf.reduce_sum(self.y, axis=0) / index_counts), axis=0) / index_counts)) with tf.control_dependencies([self.var_op]): y_normed = self.atleast_2d( (self.y - self.input_mean) / self.input_std, self.n_out) y_normed = self.atleast_2d(tf.boolean_mask(y_normed, self.weights > 0), 1) self.out_op = tf.assign(self.y_train, y_normed, validate_shape=False) # Observation noise alpha = tf.nn.softplus(self.noise) + 1e-6 # Covariance with tf.control_dependencies([self.input_op, self.input_w_op, self.out_op]): self.self_cov = (self.cov(self.x_in, self.x_in) * self.task_cov(self.weights, self.weights) + tf.eye(tf.shape(self.x_in)[0], dtype=tf.float64) * alpha) self.chol = tf.cholesky(self.self_cov) self.kinv = tf.cholesky_solve(self.chol, tf.eye(tf.shape(self.x_in)[0], dtype=tf.float64)) self.input_inv = tf.Variable( tf.eye(self.hparams.batch_size, dtype=tf.float64), validate_shape=False, trainable=False) self.input_cov_op = tf.assign(self.input_inv, self.kinv, validate_shape=False) # Log determinant by taking the singular values along the diagonal # of self.chol with tf.control_dependencies([self.input_cov_op]): logdet = 2.0 * tf.reduce_sum(tf.log(tf.diag_part(self.chol) + 1e-16)) # Log Marginal likelihood self.marginal_ll = -tf.reduce_sum(-0.5 * tf.matmul( tf.transpose(y_normed), tf.matmul(self.kinv, y_normed)) - 0.5 * logdet - 0.5 * self.n * np.log(2 * np.pi)) zero = tf.cast(0., dtype=tf.float64) one = tf.cast(1., dtype=tf.float64) standard_normal = tfd.Normal(loc=zero, scale=one) # Loss is marginal likelihood and priors self.loss = tf.reduce_sum( self.marginal_ll - (standard_normal.log_prob(self.amplitude) + standard_normal.log_prob(tf.exp(self.noise)) + standard_normal.log_prob(self.amplitude_linear) + tfd.Normal(loc=zero, scale=one * 10.).log_prob( self.task_vectors)) ) # Optimizer for hyperparameters optimizer = tf.train.AdamOptimizer(learning_rate=self.hparams.lr) vars_to_optimize = [ self.amplitude, self.length_scales, self.length_scales_lin, self.amplitude_linear, self.noise, self.input_mean ] if self.learn_embeddings: vars_to_optimize.append(self.task_vectors) grads = optimizer.compute_gradients(self.loss, vars_to_optimize) self.train_op = optimizer.apply_gradients(grads, global_step=self.global_step) # Predictions for test data self.y_mean, self.y_pred = self.posterior_mean_and_sample(self.x) # create tensorboard metrics self.create_summaries() self.summary_writer = tf.summary.FileWriter("{}/graph_{}".format( FLAGS.logdir, self.name), self.sess.graph) self.check = tf.add_check_numerics_ops()
def main(_): # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() # Begin by making sure we have the training data we need. If you already have # training data of your own, use `--data_url= ` on the command line to avoid # downloading. model_settings = models.prepare_model_settings( len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) audio_processor = input_data.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) fingerprint_input = tf.placeholder( tf.float32, [None, fingerprint_size], name='fingerprint_input') logits, dropout_prob = models.create_model( fingerprint_input, model_settings, FLAGS.model_architecture, FLAGS.model_size_info, is_training=True) # Define loss and optimizer ground_truth_input = tf.placeholder( tf.float32, [None, label_count], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=ground_truth_input, logits=logits)) tf.summary.scalar('cross_entropy', cross_entropy_mean) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.name_scope('train'), tf.control_dependencies(update_ops), tf.control_dependencies(control_dependencies): learning_rate_input = tf.placeholder( tf.float32, [], name='learning_rate_input') train_op = tf.train.AdamOptimizer( learning_rate_input) train_step = slim.learning.create_train_op(cross_entropy_mean, train_op) # train_step = tf.train.GradientDescentOptimizer( # learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) expected_indices = tf.argmax(ground_truth_input, 1) correct_prediction = tf.equal(predicted_indices, expected_indices) confusion_matrix = tf.confusion_matrix( expected_indices, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') tf.global_variables_initializer().run() # Parameter counts params = tf.trainable_variables() num_params = sum(map(lambda t: np.prod(tf.shape(t.value()).eval()), params)) print('Total number of Parameters: ', num_params) start_step = 1 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) start_step = global_step.eval(session=sess) tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Save list of words. with gfile.GFile( os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) # Training loop. best_accuracy = 0 training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # Figure out what the current learning rate is. training_steps_sum = 0 for i in range(len(training_steps_list)): training_steps_sum += training_steps_list[i] if training_step <= training_steps_sum: learning_rate_value = learning_rates_list[i] break # Pull the audio samples we'll use for training. train_fingerprints, train_ground_truth = audio_processor.get_data( FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency, FLAGS.background_volume, time_shift_samples, 'training', sess) # train_std = 11.558333964158848 # train_mean = -1.5683672671004598 # train_fingerprints = (train_fingerprints - train_mean)/train_std # train_fingerprints += 1 # Run the graph with this batch of training data. train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step ], feed_dict={ fingerprint_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, dropout_prob: 1.0 }) train_writer.add_summary(train_summary, training_step) tf.logging.info('Step #%d: rate %f, accuracy %.2f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = audio_processor.set_size('validation') total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'validation', sess)) # val_std = 20.91701306351207 # val_mean = -3.0561562801250295 # validation_fingerprints = (validation_fingerprints - val_mean)/val_std # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. validation_summary, validation_accuracy, conf_matrix = sess.run( [merged_summaries, evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: validation_fingerprints, ground_truth_input: validation_ground_truth, dropout_prob: 1.0 }) validation_writer.add_summary(validation_summary, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.2f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) # Save the model checkpoint when validation accuracy improves if total_accuracy > best_accuracy: best_accuracy = total_accuracy checkpoint_path = os.path.join(FLAGS.train_dir, 'best', FLAGS.model_architecture + '_'+ str(int(best_accuracy*10000)) + '.ckpt') tf.logging.info('Saving best model to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step) tf.logging.info('So far the best validation accuracy is %.2f%%' % (best_accuracy*100)) set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess) test_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, dropout_prob: 1.0 }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (test_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Final test accuracy = %.2f%% (N=%d)' % (total_accuracy * 100, set_size))
def main(_): best_acc = 0 best_step = 0 best_acc_istrain = 0 best_step_istrain = 0 # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() # Begin by making sure we have the training data we need. If you already have # training data of your own, use `--data_url= ` on the command line to avoid # downloading. model_settings = models.prepare_model_settings( len(input_data_filler.prepare_words_list_my(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) audio_processor = input_data_filler.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) ############################################## ############tensorflow modules########## fingerprint_input = tf.placeholder( tf.float32, [None, fingerprint_size], name='fingerprint_input') # ############ 模型创建 ########## istrain = tf.placeholder(tf.bool, name='istrain') logits= models.create_model( fingerprint_input, model_settings, FLAGS.model_architecture, is_training=istrain) ############ 模型创建 ########## # logits, dropout_prob= models.create_model( # fingerprint_input, # model_settings, # FLAGS.model_architecture, # is_training=True) # Define loss and optimizer ############ 真实值 ########## ground_truth_input = tf.placeholder( tf.float32, [None, label_count], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. ############ 交叉熵计算 ########## # with tf.name_scope('cross_entropy'): # cross_entropy_mean = tf.reduce_mean( # tf.nn.softmax_cross_entropy_with_logits( # labels=ground_truth_input, logits=logits)) + beta*loss_norm with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=ground_truth_input, logits=logits)) tf.summary.scalar('cross_entropy', cross_entropy_mean) ############ 学习率、准确率、混淆矩阵 ########## # learning_rate_input 学习率输入(tf.placeholder) # train_step 训练过程 (优化器) # predicted_indices 预测输出索引 # expected_indices 实际希望输出索引 # correct_prediction 正确预测矩阵 # confusion_matrix 混淆矩阵 # evaluation_step 正确分类概率(每个阶段) # global_step 全局训练阶段 # increment_global_step 全局训练阶段递增 learning_rate_input = tf.placeholder( tf.float32, [], name='learning_rate_input') update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_step = tf.train.AdamOptimizer( learning_rate_input).minimize(cross_entropy_mean) # with tf.name_scope('train'), tf.control_dependencies(control_dependencies): # learning_rate_input = tf.placeholder( # tf.float32, [], name='learning_rate_input') # # train_step = tf.train.GradientDescentOptimizer( # # learning_rate_input).minimize(cross_entropy_mean) # with tf.control_dependencies(update_ops): # train_step = tf.train.AdamOptimizer( # learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) expected_indices = tf.argmax(ground_truth_input, 1) correct_prediction = tf.equal(predicted_indices, expected_indices) confusion_matrix = tf.confusion_matrix( expected_indices, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) acc = tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables(),max_to_keep=None)# max keep file // moren 5 # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all() validation_merged_summaries = tf.summary.merge([tf.get_collection(tf.GraphKeys.SUMMARIES,'accuracy'),tf.get_collection(tf.GraphKeys.SUMMARIES,'cross_entropy')]) test_summaries = tf.summary.merge([acc]) test_summaries_istrain = tf.summary.merge([acc]) train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') test_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test') test_istrain_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test_istrain') tf.global_variables_initializer().run() start_step = 1 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) start_step = global_step.eval(session=sess) tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Save list of words. with gfile.GFile( os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) ### # model1: fc # model2: conv :940k个parameter # model3:low_latancy_conv:~~model1 # model4: 750k # Training loop. ############################################# ######## 主循环 ###### ############################################# training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # Figure out what the current learning rate is. ####### 自动切换学习率 ####### if training_step <12000+1: learning_rate_value = learning_rates_list[0]*0.02**(training_step/12000) else: learning_rate_value = learning_rates_list[0]*0.02 #0.015 12000 training_steps_sum = 0 # for i in range(len(training_steps_list)): # training_steps_sum += training_steps_list[i] # if training_step <= training_steps_sum: # learning_rate_value = learning_rates_list[i] # break # Pull the audio samples we'll use for training. ####### audio处理器导入数据 ################################## ##get_data(self, how_many, offset, model_settings, background_frequency, ## background_volume_range, time_shift, mode, sess) ######################################################################## train_fingerprints, train_ground_truth = audio_processor.get_data_my( FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency, FLAGS.background_volume, time_shift_samples, 'training', sess) #mid = np.abs(np.max(train_fingerprints) + np.min(train_fingerprints)) / 2 #half = np.max(train_fingerprints) - np.min(train_fingerprints) #train_fingerprints = ((train_fingerprints + mid) / half * 255).astype(int) #### 输入归一化 #### # train_fingerprints=input_normalization(train_fingerprints) # Run the graph with this batch of training data. train_fingerprints = np_round_and_clip(train_fingerprints) train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step ], feed_dict={ fingerprint_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, istrain:True }) train_writer.add_summary(train_summary, training_step) tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = audio_processor.set_size('validation') total_accuracy = 0 total_conf_matrix = None ############################################# ########交叉验证集重复计算正确率和混淆矩阵###### for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data_my(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'validation', sess)) #mid = np.abs(np.max(validation_fingerprints) + np.min(validation_fingerprints)) / 2 # half = np.max(validation_fingerprints) - np.min(validation_fingerprints) #validation_fingerprints = ((validation_fingerprints + mid) / half * 255).astype(int) # #### 输入归一化 #### # validation_fingerprints = input_normalization(validation_fingerprints) # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. validation_fingerprints = np_round_and_clip(validation_fingerprints) validation_summaries, validation_accuracy, conf_matrix = sess.run( [validation_merged_summaries, evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: validation_fingerprints, ground_truth_input: validation_ground_truth, istrain: True }) validation_writer.add_summary(validation_summaries, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) ############################################# ######## 测试集重复计算正确率和混淆矩阵 ###### set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) test_fingerprints, test_ground_truth = audio_processor.get_data_my( -1, 0, model_settings, 0.0, 0.0, 0, 'testing', sess) #mid = np.abs(np.max(test_fingerprints) + np.min(test_fingerprints)) / 2 #half = np.max(test_fingerprints) - np.min(test_fingerprints) #test_fingerprints = ((test_fingerprints + mid) / half * 255).astype(int) test_fingerprints = np_round_and_clip(test_fingerprints) final_summary,test_accuracy, conf_matrix = sess.run( [test_summaries,evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, istrain : False }) final_summary_istrain,test_accuracy_istrain= sess.run( [test_summaries_istrain,evaluation_step], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, istrain : True }) if test_accuracy > best_acc: best_acc = test_accuracy best_step = training_step if test_accuracy_istrain > best_acc_istrain: best_acc_istrain = test_accuracy_istrain best_step_istrain = training_step test_writer.add_summary(final_summary, training_step) test_istrain_writer.add_summary(final_summary_istrain, training_step) tf.logging.info('Confusion Matrix:\n %s' % (conf_matrix)) tf.logging.info('test accuracy = %.1f%% (N=%d)' % (test_accuracy * 100,6882)) tf.logging.info('test_istrain accuracy = %.1f%% (N=%d)' % (test_accuracy_istrain * 100,6882)) tf.logging.info('Best test accuracy before now = %.1f%% (N=%d)' % (best_acc * 100,6882) + ' at step of ' + str(best_step)) tf.logging.info('Best test_istrain accuracy before now = %.1f%% (N=%d)' % (best_acc_istrain * 100,6882) + ' at step of ' + str(best_step_istrain)) # Save the model checkpoint periodically. if (training_step % FLAGS.save_step_interval == 0 or training_step == training_steps_max): checkpoint_path = os.path.join(FLAGS.train_dir + '/'+FLAGS.model_architecture, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step) print_line = 'Best test accuracy before now = %.1f%% (N=%d)' % (best_acc * 100,6882) + ' at step of ' + str(best_step) + '\n' + \ 'Best test_istrain accuracy before now = %.1f%% (N=%d)' % (best_acc_istrain * 100,6882) + ' at step of ' + str(best_step_istrain) if training_step == training_steps_max: with open(FLAGS.train_dir + '/' +FLAGS.model_architecture+ '/details.txt', 'w') as f: f.write(print_line)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.contrib.framework.get_or_create_global_step() # Get images and labels for CIFAR-10. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): images, ratioImages, labels = gl.inputs(False) check_op = tf.add_check_numerics_ops() # Build a Graph that computes the logits predictions from the # inference model. a = tf.Print(images.shape, [images.shape]) logits = gl.inference(images, ratioImages) # Calculate loss. # loss = gl.loss_2(logits, labels) loss = gl.loss_depart(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = gl.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook()], config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) as mon_sess: buffer_labels = [] buffer_logits = [] counter = 0; conv1_buffer = [] conv2_buffer = [] conv3_buffer = [] conv4_buffer = [] while not mon_sess.should_stop(): # mon_sess = tfdbg.LocalCLIDebugWrapperSession(mon_sess) # mon_sess.add_tensor_filter("has_inf_or_nan", tfdbg.has_nan_or_inf) _0, _1, np_labels, np_logits = mon_sess.run([train_op, check_op, labels, logits]) # print(conv1.shape, conv2.shape, conv3.shape, conv4.shape) # conv1, conv2, conv3, conv4 = mon_sess.run([conv1, conv2, conv3, conv4]) buffer_labels.append(np_labels) buffer_logits.append(np_logits) # conv1_buffer.append(conv1) # conv2_buffer.append(conv2) # conv3_buffer.append(conv3) # conv4_buffer.append(conv4) # counter = counter+1 # if counter%10==0: # f_index = int(counter/1000) # np.save('train_playground/conv1/%d.npy'%(f_index), np.array(conv1_buffer)) # np.save('train_playground/conv2/%d.npy'%(f_index), np.array(conv2_buffer)) # np.save('train_playground/conv3/%d.npy'%(f_index), np.array(conv3_buffer)) # np.save('train_playground/conv4/%d.npy'%(f_index), np.array(conv4_buffer)) np.save('train_playground/np_labels.npy', np.array(buffer_labels)) np.save('train_playground/np_logits.npy', np.array(buffer_logits))
def __init__(self, batch_size, sN, sL, qL, vocab_size, embed_size, hidden_size, learning_rate=5e-3, optim_type='Adam', attention_type='bilinear', attention_layer=3, glove=False, train_glove=False, max_norm=6): """ sN: sentence number sL: sentence length qL: query length Placeholders # passage [batch_size, sN, sL] # p_len [batch_size, sN] # p_idf [batch_size, sN, sL] # query [batch_size, qL] # q_len [batch_size] # q_idf [batch_size, qL] # answer [batch_size, sN] # dropout scalar """ self.create_placeholder(batch_size, sN, sL, qL) global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf.train.exponential_decay( learning_rate, global_step, 1000, 0.95) self.lr_sum = tf.scalar_summary('learning_rate', learning_rate) self.emb = tf.get_variable( "emb", [vocab_size, embed_size], trainable=(not glove or train_glove)) embed_p = tf.nn.embedding_lookup( self.emb, self.passage, name='embed_p') # N,sN,sL,E embed_q = tf.nn.embedding_lookup( self.emb, self.query, name='embed_q') # N,qL,E self.embed_sum = tf.histogram_summary("embed", self.emb) with tf.name_scope('BoW'): wt_p = tf.expand_dims( self.p_wt, -1 ) bow_p = tf.reduce_sum( embed_p*wt_p, 2, name='bow_p' ) # N, sN, E epsilon = 1e-5 denominator = tf.to_float(tf.expand_dims( self.p_len, -1 )) + epsilon # bow_p = tf.div( bow_p, denominator, name= 'true_bow_p' ) # N, sN, 1 wt_q = tf.expand_dims( self.q_wt, -1 ) bow_q = tf.reduce_sum( embed_q*wt_q, 1, name='bow_q') # N, E denominator = tf.to_float(tf.expand_dims( self.q_len, -1 )) + epsilon # bow_q = tf.div( bow_q, denominator, name='true_bow_q' ) # N, 1 p_rep = bow_p q_rep = bow_q sN_mask = tf.to_float(self.p_len > 0, name='sN_mask') # N, sN sN_count = tf.reduce_sum(sN_mask, 1) self.sN_mask = sN_mask self.sN_count = sN_count sN_count = tf.to_int64(sN_count, name='sN_count') # self.sn_c_print = tf.Print(sN_count, [sN_count, sN_mask], message='sn count, sn mask', first_n=50) with tf.name_scope('REP_dropout'): q_rep = tf.nn.dropout(q_rep, self.dropout) p_rep = tf.nn.dropout(p_rep, self.dropout) p_rep = tf.unpack(p_rep, axis=1) atten = self.apply_attention( attention_type, embed_size/2, sN, p_rep, q_rep, layer=attention_layer) atten = atten - tf.reduce_min(atten, [1], keep_dims=True) atten = tf.mul(atten, sN_mask, name='unnormalized_attention') self.score = atten # N, sN self.alignment = tf.nn.softmax(atten, name='alignment') self.loss = tf.nn.softmax_cross_entropy_with_logits( self.score, self.answer, name='loss') self.prediction = tf.argmax(self.score, 1) self.answer_id = tf.argmax(self.answer, 1) self.correct_prediction = tf.equal( self.prediction, self.answer_id) # N self.accuracy = tf.reduce_mean( tf.cast(self.correct_prediction, tf.float32), name='accuracy') self.optim = self.get_optimizer(optim_type, learning_rate) gvs = self.optim.compute_gradients(self.loss) with tf.name_scope('clip_norm'): self.gvs = [(tf.clip_by_norm(g, max_norm), v) for g, v in gvs] self.train_op = self.optim.apply_gradients( self.gvs, global_step=global_step, name='train_op') self.check_op = tf.add_check_numerics_ops() tsum, vsum = self.create_summary(add_gv_sum=True) self.train_summary = tf.merge_summary(tsum) self.validate_summary = tf.merge_summary(vsum) # store param ======================= self.p_rep = p_rep self.q_rep = q_rep self.embed_p = embed_p self.embed_q = embed_q self.global_step = global_step self.origin_gv = gvs self.learning_rate = learning_rate
def build_model(self): """Defines the GP model. The loss is computed for partial feedback settings (bandits), so only the observed outcome is backpropagated (see weighted loss). Selects the optimizer and, finally, it also initializes the graph. """ tf.logging.info("Initializing model %s.", self.name) self.global_step = tf.train.get_or_create_global_step() # Define state for the model (inputs, etc.) self.x_train = tf.get_variable( "training_data", initializer=tf.ones([self.hparams.batch_size, self.n_in], dtype=tf.float64), validate_shape=False, trainable=False) self.y_train = tf.get_variable("training_labels", initializer=tf.zeros( [self.hparams.batch_size, 1], dtype=tf.float64), validate_shape=False, trainable=False) self.weights_train = tf.get_variable( "weights_train", initializer=tf.ones([self.hparams.batch_size, self.n_out], dtype=tf.float64), validate_shape=False, trainable=False) self.input_op = tf.assign(self.x_train, self.x_in, validate_shape=False) self.input_w_op = tf.assign(self.weights_train, self.weights, validate_shape=False) self.input_std = tf.get_variable("data_standard_deviation", initializer=tf.ones([1, self.n_out], dtype=tf.float64), dtype=tf.float64, trainable=False) self.input_mean = tf.get_variable("data_mean", initializer=tf.zeros( [1, self.n_out], dtype=tf.float64), dtype=tf.float64, trainable=True) # GP Hyperparameters self.noise = tf.get_variable("noise", initializer=tf.cast(0.0, dtype=tf.float64)) self.amplitude = tf.get_variable("amplitude", initializer=tf.cast(1.0, dtype=tf.float64)) self.amplitude_linear = tf.get_variable("linear_amplitude", initializer=tf.cast( 1.0, dtype=tf.float64)) self.length_scales = tf.get_variable("length_scales", initializer=tf.zeros( [1, self.n_in], dtype=tf.float64)) self.length_scales_lin = tf.get_variable("length_scales_linear", initializer=tf.zeros( [1, self.n_in], dtype=tf.float64)) # Latent embeddings of the different outputs for task covariance self.task_vectors = tf.get_variable( "latent_task_vectors", initializer=tf.random_normal([self.n_out, self.task_latent_dim], dtype=tf.float64)) # Normalize outputs across each dimension # Since we have different numbers of observations across each task, we # normalize by their respective counts. index_counts = self.atleast_2d(tf.reduce_sum(self.weights, axis=0), self.n_out) index_counts = tf.where( index_counts > 0, index_counts, tf.ones(tf.shape(index_counts), dtype=tf.float64)) self.mean_op = tf.assign(self.input_mean, tf.reduce_sum(self.y, axis=0) / index_counts) self.var_op = tf.assign( self.input_std, tf.sqrt(1e-4 + tf.reduce_sum(tf.square( self.y - tf.reduce_sum(self.y, axis=0) / index_counts), axis=0) / index_counts)) with tf.control_dependencies([self.var_op]): y_normed = self.atleast_2d( (self.y - self.input_mean) / self.input_std, self.n_out) y_normed = self.atleast_2d( tf.boolean_mask(y_normed, self.weights > 0), 1) self.out_op = tf.assign(self.y_train, y_normed, validate_shape=False) # Observation noise alpha = tf.nn.softplus(self.noise) + 1e-6 # Covariance with tf.control_dependencies( [self.input_op, self.input_w_op, self.out_op]): self.self_cov = ( self.cov(self.x_in, self.x_in) * self.task_cov(self.weights, self.weights) + tf.eye(tf.shape(self.x_in)[0], dtype=tf.float64) * alpha) self.chol = tf.cholesky(self.self_cov) self.kinv = tf.cholesky_solve( self.chol, tf.eye(tf.shape(self.x_in)[0], dtype=tf.float64)) self.input_inv = tf.Variable(tf.eye(self.hparams.batch_size, dtype=tf.float64), validate_shape=False, trainable=False) self.input_cov_op = tf.assign(self.input_inv, self.kinv, validate_shape=False) # Log determinant by taking the singular values along the diagonal # of self.chol with tf.control_dependencies([self.input_cov_op]): logdet = 2.0 * tf.reduce_sum( tf.log(tf.diag_part(self.chol) + 1e-16)) # Log Marginal likelihood self.marginal_ll = -tf.reduce_sum( -0.5 * tf.matmul(tf.transpose(y_normed), tf.matmul(self.kinv, y_normed)) - 0.5 * logdet - 0.5 * self.n * np.log(2 * np.pi)) zero = tf.cast(0., dtype=tf.float64) one = tf.cast(1., dtype=tf.float64) standard_normal = tfd.Normal(loc=zero, scale=one) # Loss is marginal likelihood and priors self.loss = tf.reduce_sum(self.marginal_ll - ( standard_normal.log_prob(self.amplitude) + standard_normal.log_prob(tf.exp(self.noise)) + standard_normal.log_prob(self.amplitude_linear) + tfd.Normal(loc=zero, scale=one * 10.).log_prob(self.task_vectors))) # Optimizer for hyperparameters optimizer = tf.train.AdamOptimizer(learning_rate=self.hparams.lr) vars_to_optimize = [ self.amplitude, self.length_scales, self.length_scales_lin, self.amplitude_linear, self.noise, self.input_mean ] if self.learn_embeddings: vars_to_optimize.append(self.task_vectors) grads = optimizer.compute_gradients(self.loss, vars_to_optimize) self.train_op = optimizer.apply_gradients(grads, global_step=self.global_step) # Predictions for test data self.y_mean, self.y_pred = self.posterior_mean_and_sample(self.x) # create tensorboard metrics self.create_summaries() self.summary_writer = tf.summary.FileWriter( "{}/graph_{}".format(FLAGS.logdir, self.name), self.sess.graph) self.check = tf.add_check_numerics_ops()
def create(parameters): print('Creating the neural network model.') tf.reset_default_graph() # tf Graph input x = tf.placeholder(tf.float32, shape=(None, parameters['n_steps'], parameters['n_input']), name='input') x = tf.verify_tensor_all_finite(x, "X not finite!") y = tf.placeholder(tf.float32, shape=(None, parameters['n_output']), name='expected_output') y = tf.verify_tensor_all_finite(y, "Y not finite!") #x = tf.Print(x, [x], "X: ") #y = tf.Print(y, [y], "Y: ") lstm_state_size = np.sum(parameters['lstm_layers']) * 2 # Note: Batch size is the first dimension in istate. istate = tf.placeholder(tf.float32, shape=(None, lstm_state_size), name='internal_state') lr = tf.placeholder(tf.float32, name='learning_rate') # The target to track itself and its peers, each with x, y input_size = (parameters['n_peers'] + 1) * 2 inputToRnn = parameters['input_layer'] if (parameters['input_layer'] == None): inputToRnn = parameters['n_input'] cells = [rnn_cell.LSTMCell(l, parameters['lstm_layers'][i-1] if (i > 0) else inputToRnn, num_proj=parameters['lstm_layers'][i], cell_clip=parameters['lstm_clip'], use_peepholes=True) for i,l in enumerate(parameters['lstm_layers'])] # TODO: GRUCell support here. # cells = [rnn_cell.GRUCell(l, parameters['lstm_layers'][i-1] if (i > 0) else inputToRnn) for i,l in enumerate(parameters['lstm_layers'])] model = { 'input_weights': tf.Variable(tf.random_normal( [input_size, parameters['input_layer']]), name='input_weights'), 'input_bias': tf.Variable(tf.random_normal([parameters['input_layer']]), name='input_bias'), 'output_weights': tf.Variable(tf.random_normal([parameters['lstm_layers'][-1], # 6 = 2 sigma, 2 mean, weight, rho parameters['n_mixtures'] * 6]), name='output_weights'), # We need to put at least the standard deviation output biases to about 5 to prevent zeros and infinities. # , mean = 5.0, stddev = 3.0 'output_bias': tf.Variable(tf.random_normal([parameters['n_mixtures'] * 6]), name='output_bias'), 'rnn_cell': rnn_cell.MultiRNNCell(cells), 'lr': lr, 'x': x, 'y': y, 'keep_prob': tf.placeholder(tf.float32), 'istate': istate } # if (parameters['input_layer'] <> None): #model['input_weights'] = tf.Print(model['input_weights'], [model['input_weights']], "Input weights: ", summarize=100) #model['input_bias'] = tf.Print(model['input_bias'], [model['input_bias']], "Input bias: ", summarize=100) model['input_weights'] = tf.verify_tensor_all_finite(model['input_weights'], "Input weights not finite!") model['input_bias'] = tf.verify_tensor_all_finite(model['input_bias'], "Input bias not finite!") #model['output_weights'] = tf.Print(model['output_weights'], [model['output_weights']], "Output weights: ", summarize=100) #model['output_bias'] = tf.Print(model['output_bias'], [model['output_bias']], "Output bias: ", summarize=100) model['output_weights'] = tf.verify_tensor_all_finite(model['output_weights'], "Output weights not finite!") model['output_bias'] = tf.verify_tensor_all_finite(model['output_bias'], "Output bias not finite!") pred = RNN(parameters, x, model, istate) tvars = tf.trainable_variables() avars = tf.all_variables() # Define loss and optimizer # We will take 1 m as the arbitrary goal post to be happy with the error. # The delta error is taken in squared to emphasize its importance (errors are much smaller than in absolute # positions) n_mixtures = parameters['n_mixtures'] batch_size = parameters['batch_size'] cost = mixture_loss(pred[0], y, n_mixtures, batch_size) # Clipping the gradients gradients = map(tf.to_float, tf.gradients(cost, tvars, aggregation_method = 2)) grads, _ = tf.clip_by_global_norm(gradients, parameters['clip_gradients']) optimizer = tf.train.AdamOptimizer(learning_rate = parameters['learning_rate']) train_op = optimizer.apply_gradients(zip(grads, tvars)) tf.add_check_numerics_ops() model['pred'] = pred[0] model['last_state'] = pred[1] model['cost'] = cost model['optimizer'] = train_op return model
def FaceVerification(img_path1, img_path2): img_lm_1, img_rm_1, img_nose_1, img_le_1, img_re_1 = face_region(img_path1) img_lm_2, img_rm_2, img_nose_2, img_le_2, img_re_2 = face_region(img_path2) tf.reset_default_graph() sess = tf.InteractiveSession() test_right_mouth = bundle(img_rm_1, img_rm_2) test_left_mouth = bundle(img_lm_1, img_lm_2) test_right_eye = bundle(img_re_1, img_re_2) test_left_eye = bundle(img_le_1, img_le_2) test_nose = bundle(img_nose_1, img_nose_2) x_image_nose = tf.placeholder(tf.float32, [8, None, 31, 31, 6], name='x') x_image_left_mouth = tf.placeholder(tf.float32, [8, None, 31, 31, 6], name='x') x_image_right_mouth = tf.placeholder(tf.float32, [8, None, 31, 31, 6], name='x') x_image_left_eye = tf.placeholder(tf.float32, [8, None, 31, 31, 6], name='x') x_image_right_eye = tf.placeholder(tf.float32, [8, None, 31, 31, 6], name='x') # drop out #keep_prob = tf.placeholder(tf.float32) # left mouth part # first convolutional layer W_conv1_left_mouth = weight_variable([4, 4, 6, 20], 'W_conv1_left_mouth') b_conv1_left_mouth = bias_variable([20], 'b_conv1_left_mouth') # second convolutional layer W_conv2_left_mouth = weight_variable([3, 3, 20, 40], 'W_conv2_left_mouth') b_conv2_left_mouth = bias_variable([40], 'b_conv2_left_mouth') # third convolutional layer W_conv3_left_mouth = weight_variable([3, 3, 40, 60], 'W_conv3_left_mouth') b_conv3_left_mouth = bias_variable([60], 'b_conv3_left_mouth') # forth convolutional layer W_conv4_left_mouth = weight_variable([2, 2, 60, 80], 'W_conv4_left_mouth') b_conv4_left_mouth = bias_variable([80], 'b_conv4_left_mouth') # densely connected layer W_fc1_left_mouth = weight_variable([1 * 1 * 80, 80], 'W_fc1_left_mouth') b_fc1_left_mouth = bias_variable([80], 'b_fc1_left_mouth') # right mouth part # first convolutional layer W_conv1_right_mouth = weight_variable([4, 4, 6, 20], 'W_conv1_right_mouth') b_conv1_right_mouth = bias_variable([20], 'b_conv1_right_mouth') # second convolutional layer W_conv2_right_mouth = weight_variable([3, 3, 20, 40], 'W_conv2_right_mouth') b_conv2_right_mouth = bias_variable([40], 'b_conv2_right_mouth') # third convolutional layer W_conv3_right_mouth = weight_variable([3, 3, 40, 60], 'W_conv3_right_mouth') b_conv3_right_mouth = bias_variable([60], 'b_conv3_right_mouth') # forth convolutional layer W_conv4_right_mouth = weight_variable([2, 2, 60, 80], 'W_conv4_right_mouth') b_conv4_right_mouth = bias_variable([80], 'b_conv4_right_mouth') # densely connected layer W_fc1_right_mouth = weight_variable([1 * 1 * 80, 80], 'W_fc1_right_mouth') b_fc1_right_mouth = bias_variable([80], 'b_fc1_right_mouth') # left eye part # first convolutional layer W_conv1_left_eye = weight_variable([4, 4, 6, 20], 'W_conv1_left_eye') b_conv1_left_eye = bias_variable([20], 'b_conv1_left_eye') # second convolutional layer W_conv2_left_eye = weight_variable([3, 3, 20, 40], 'W_conv2_left_eye') b_conv2_left_eye = bias_variable([40], 'b_conv2_left_eye') # third convolutional layer W_conv3_left_eye = weight_variable([3, 3, 40, 60], 'W_conv3_left_eye') b_conv3_left_eye = bias_variable([60], 'b_conv3_left_eye') # forth convolutional layer W_conv4_left_eye = weight_variable([2, 2, 60, 80], 'W_conv4_left_eye') b_conv4_left_eye = bias_variable([80], 'b_conv4_left_eye') # densely connected layer W_fc1_left_eye = weight_variable([1 * 1 * 80, 80], 'W_fc1_left_eye') b_fc1_left_eye = bias_variable([80], 'b_fc1_left_eye') # right eye part # first convolutional layer W_conv1_right_eye = weight_variable([4, 4, 6, 20], 'W_conv1_right_eye') b_conv1_right_eye = bias_variable([20], 'b_conv1_right_eye') # second convolutional layer W_conv2_right_eye = weight_variable([3, 3, 20, 40], 'W_conv2_right_eye') b_conv2_right_eye = bias_variable([40], 'b_conv2_right_eye') # third convolutional layer W_conv3_right_eye = weight_variable([3, 3, 40, 60], 'W_conv3_right_eye') b_conv3_right_eye = bias_variable([60], 'b_conv3_right_eye') # forth convolutional layer W_conv4_right_eye = weight_variable([2, 2, 60, 80], 'W_conv4_right_eye') b_conv4_right_eye = bias_variable([80], 'b_conv4_right_eye') # densely connected layer W_fc1_right_eye = weight_variable([1 * 1 * 80, 80], 'W_fc1_right_eye') b_fc1_right_eye = bias_variable([80], 'b_fc1_right_eye') # nose part # first convolutional layer W_conv1_nose = weight_variable([4, 4, 6, 20], 'W_conv1_nose') b_conv1_nose = bias_variable([20], 'b_conv1_nose') # second convolutional layer W_conv2_nose = weight_variable([3, 3, 20, 40], 'W_conv2_nose') b_conv2_nose = bias_variable([40], 'b_conv2_nose') # third convolutional layer W_conv3_nose = weight_variable([3, 3, 40, 60], 'W_conv3_nose') b_conv3_nose = bias_variable([60], 'b_conv3_nose') # forth convolutional layer W_conv4_nose = weight_variable([2, 2, 60, 80], 'W_conv4_nose') b_conv4_nose = bias_variable([80], 'b_conv4_nose') # densely connected layer W_fc1_nose = weight_variable([1 * 1 * 80, 80], 'W_fc1_nose') b_fc1_nose = bias_variable([80], 'b_fc1_nose') h_fc1_drop_left_mouth = CNN_Computaion2( x_image_left_mouth, W_conv1_left_mouth, b_conv1_left_mouth, W_conv2_left_mouth, b_conv2_left_mouth, W_conv3_left_mouth, b_conv3_left_mouth, W_conv4_left_mouth, b_conv4_left_mouth, W_fc1_left_mouth, b_fc1_left_mouth) h_fc1_drop_right_mouth = CNN_Computaion2( x_image_right_mouth, W_conv1_right_mouth, b_conv1_right_mouth, W_conv2_right_mouth, b_conv2_right_mouth, W_conv3_right_mouth, b_conv3_right_mouth, W_conv4_right_mouth, b_conv4_right_mouth, W_fc1_right_mouth, b_fc1_right_mouth) h_fc1_drop_left_eye = CNN_Computaion2(x_image_left_eye, W_conv1_left_eye, b_conv1_left_eye, W_conv2_left_eye, b_conv2_left_eye, W_conv3_left_eye, b_conv3_left_eye, W_conv4_left_eye, b_conv4_left_eye, W_fc1_left_eye, b_fc1_left_eye) h_fc1_drop_right_eye = CNN_Computaion2( x_image_right_eye, W_conv1_right_eye, b_conv1_right_eye, W_conv2_right_eye, b_conv2_right_eye, W_conv3_right_eye, b_conv3_right_eye, W_conv4_right_eye, b_conv4_right_eye, W_fc1_right_eye, b_fc1_right_eye) h_fc1_drop_nose = CNN_Computaion2(x_image_nose, W_conv1_nose, b_conv1_nose, W_conv2_nose, b_conv2_nose, W_conv3_nose, b_conv3_nose, W_conv4_nose, b_conv4_nose, W_fc1_nose, b_fc1_nose) # RBM implentation h_fc1_drop = tf.concat([ h_fc1_drop_left_mouth, h_fc1_drop_right_mouth, h_fc1_drop_left_eye, h_fc1_drop_right_eye, h_fc1_drop_nose ], 1) W_hidden = weight_variable([3200, 8], 'W_hidden', 0.5 / tf.sqrt(3200.0)) b_hidden = bias_variable([8], 'b_hidden') h_dc = tf.matmul(h_fc1_drop, W_hidden) + b_hidden W_output_1 = weight_variable([8], 'W_output_1', 0.5 / tf.sqrt(8.0)) W_output_2 = weight_variable([8], 'W_output_2', 0.5 / tf.sqrt(8.0)) b_output = bias_variable([2], 'b_output') y_ = tf.placeholder(tf.float32, [None, 2], name='y') Probability_part1 = tf.exp(h_dc + W_output_1) + 1 Probability_part2 = tf.exp(h_dc + W_output_2) + 1 Probability_part3 = tf.transpose(tf.stack( [Probability_part1, Probability_part2], 2), perm=[1, 0, 2]) Probability_part5 = tf.reduce_sum(tf.multiply(Probability_part3, [1.0, 0.0]), 2, keep_dims=True) Probability_pos_numerator = tf.reduce_sum( tf.multiply([1.0, 0.0], tf.exp(b_output))) Probability_pos_denominator = tf.reduce_sum( tf.multiply( tf.reduce_prod(tf.div(Probability_part3, Probability_part5), 0), tf.exp(b_output)), 1) Probability_pos = tf.div(Probability_pos_numerator, Probability_pos_denominator) Probability_part6 = tf.reduce_sum(tf.multiply(Probability_part3, [0.0, 1.0]), 2, keep_dims=True) Probability_neg_numerator = tf.reduce_sum( tf.multiply([0.0, 1.0], tf.exp(b_output))) Probability_neg_denominator = tf.reduce_sum( tf.multiply( tf.reduce_prod(tf.div(Probability_part3, Probability_part6), 0), tf.exp(b_output)), 1) Probability_neg = tf.div(Probability_neg_numerator, Probability_neg_denominator) Probability_dis = tf.stack([Probability_pos, Probability_neg], 1) check_op = tf.add_check_numerics_ops() prediction = tf.argmin(Probability_dis, 1) # set check point init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) saver = tf.train.Saver() check_dir = './check_point' ckpt = tf.train.get_checkpoint_state(check_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) prediction_array = sess.run(prediction, feed_dict={ x_image_nose: test_nose, x_image_left_eye: test_left_eye, x_image_right_eye: test_right_eye, x_image_left_mouth: test_left_mouth, x_image_right_mouth: test_right_mouth }) sess.close() return prediction_array[0]
def main_gpu(arg): print('Running main') print('--==>', dict(arg) ) arg.act_name = arg.act.__name__ results = {'train_errors':[], 'cv_errors':[],'test_errors':[]} path, errors_pretty, mdl_dir, json_file = set_experiment_folders(arg) set_tensorboard(arg) ## Data sets and task print( '----====> TASK NAME: %s' % arg.data_file_name ) (X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(arg) N_frac = arg.N_frac X_train, Y_train, X_cv, Y_cv, X_test, Y_test = X_train[:N_frac,:], Y_train[:N_frac,:], X_cv[:N_frac,:], Y_cv[:N_frac,:], X_test[:N_frac,:], Y_test[:N_frac,:] if arg.data_normalize == 'normalize_input': X_train, X_cv, X_test = preprocessing.scale(X_train), preprocessing.scale(X_cv), preprocessing.scale(X_test) (N_train,D) = X_train.shape (N_test,D_out) = Y_test.shape print( '(N_train,D) = ', (N_train,D) ) print( '(N_test,D_out) = ', (N_test,D_out) ) ## phase_train = tf.placeholder(tf.bool, name='phase_train') if arg.bn else None arg.steps = arg.get_steps(arg) arg.M = arg.get_batch_size(arg) arg.log_learning_rate = arg.get_log_learning_rate(arg) arg.starter_learning_rate = arg.get_start_learning_rate(arg) print( '++> starter_learning_rate ', arg.starter_learning_rate ) ## decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) arg.decay_rate = arg.get_decay_rate(arg) arg.decay_steps = arg.get_decay_steps(arg) if arg.optimization_alg == 'GD': pass elif arg.optimization_alg=='Momentum': arg.use_nesterov = arg.get_use_nesterov() arg.momentum = arg.get_momentum(arg) print('arg.use_nesterov', arg.use_nesterov) print('arg.momentum', arg.momentum) elif arg.optimization_alg == 'Adadelta': arg.rho = arg.get_rho(arg) print('arg.rho', arg.rho) elif arg.optimization_alg == 'Adagrad': #only has learning rate pass elif arg.optimization_alg == 'Adam': arg.beta1 = arg.get_beta1(arg) arg.beta2 = arg.get_beta2(arg) print('arg.beta1', arg.beta1) print('arg.beta2', arg.beta2) elif arg.optimization_alg == 'RMSProp': arg.decay = arg.get_decay(arg) arg.momentum = arg.get_momentum(arg) print('arg.decay', arg.decay) print('arg.momentum', arg.momentum) else: pass ############################## # if data_file_name == 'task_MNIST_flat_auto_encoder': # PCA_errors = {12:24.8254684915, 48:9.60052317906, 96:4.72118325768} # if len(units_list) == 1: # k = units_list[0] # else: # k = units_list[0] * len(units_list) # if not k in PCA_errors.keys(): # print( 'COMPUTING PCA... k = ', k) # X_reconstruct_pca, _, _ = mtf. get_reconstruction_pca(X_train,k=units_list[0]) # pca_error = mtf.report_l2_loss(Y=X_train,Y_pred=X_reconstruct_pca) # PCA_errors[k] = pca_error # else: # pca_error = PCA_errors[k] # print( '*************> PCA error: ', pca_error) # else: # pca_error = None # rbf_error = None # # hbf1_error = None # if model == 'hbf': # #error, Y_pred, Kern, C, subsampled_data_points = report_RBF_error_from_data(X_train, dims, stddev) # if len(units_list) > 1: # k = units_list[0]*len(units_list) # print( 'RBF units = ', k) # nb_units = [None, k] # rbf_error, _, _, _, _ = mtf.report_RBF_error_from_data(X_train, X_train, nb_units, S_init[1]) # print( rbf_error) # hbf1={12:26.7595} # if k in hbf1.keys(): # hbf1_error = hbf1[k] # else: # nb_units = dims # rbf_error, _, _, _, _ = mtf.report_RBF_error_from_data(X_train, X_train, nb_units, S_init[1]) ## pca_error = None rbf_error = None hbf1_error = None ## Make Model if arg.mdl == 'standard_nn': arg.dims = [D]+arg.units+[D_out] arg.mu_init_list = arg.get_W_mu_init(arg) arg.std_init_list = arg.get_W_std_init(arg) arg.b_init = arg.get_b_init(arg) float_type = tf.float64 x = tf.placeholder(float_type, shape=[None, D], name='x-input') # M x D nb_layers = len(arg.dims)-1 nb_hidden_layers = nb_layers-1 (inits_C,inits_W,inits_b) = mtf.get_initilizations_standard_NN(init_type=arg.init_type,dims=arg.dims,mu=arg.mu_init_list,std=arg.std_init_list,b_init=arg.b_init, X_train=X_train, Y_train=Y_train) with tf.name_scope("standardNN") as scope: mdl = mtf.build_standard_NN(arg, x,arg.dims,(None,inits_W,inits_b),phase_train,arg.trainable_bn) mdl = mtf.get_summation_layer(l=str(nb_layers),x=mdl,init=inits_C[0]) inits_S = inits_b elif arg.mdl == 'hbf': arg.dims = [D]+arg.units+[D_out] trainable_S = True if (arg.trainable_S=='train_S') else False arg.b_init = arg.get_b_init(arg) arg.S_init = arg.b_init float_type = tf.float64 #arg.mu , arg.std = arg.get_W_mu_init(arg), arg.get_W_std_init(arg) x = tf.placeholder(float_type, shape=[None, D], name='x-input') # M x D (inits_C,inits_W,inits_S,rbf_error) = mtf.get_initilizations_HBF(init_type=arg.init_type,dims=arg.dims,mu=arg.mu,std=arg.std,b_init=arg.b_init,S_init=arg.S_init, X_train=X_train, Y_train=Y_train, train_S_type=arg.train_S_type) #print(inits_W) nb_layers = len(arg.dims)-1 nb_hidden_layers = nb_layers-1 with tf.name_scope("HBF") as scope: mdl = mtf.build_HBF2(x,arg.dims,(inits_C,inits_W,inits_S),phase_train,arg.trainable_bn,trainable_S) mdl = mtf.get_summation_layer(l=str(nb_layers),x=mdl,init=inits_C[0]) elif arg.mdl == 'binary_tree_4D': pass elif arg.mdl == 'binary_tree_4D_conv_hidden_layer': print( 'binary_tree_4D' ) inits_S = None pca_error, rbf_error = None, None float_type = tf.float32 # Data sizes needed for reshaping N_cv, N_test = X_cv.shape[0], X_test.shape[0] # reshape data sets X_train, X_cv, X_test = X_train.reshape(N_train,1,D,1), X_cv.reshape(N_cv,1,D,1), X_test.reshape(N_test,1,D,1) x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input') # arg.stride_convd1, arg.filter_size = 2, 2 #fixed for Binary Tree BT #arg.mean, arg.stddev = arg.get_W_mu_init(arg), arg.get_W_std_init(arg) with tf.name_scope("build_binary_model") as scope: mdl = mtf.build_binary_tree_4D_hidden_layer(x,arg,phase_train=phase_train) arg.dims = [D]+[arg.nb_filters]+[arg.nb_final_hidden_units]+[D_out] elif arg.mdl == 'binary_tree_4D_conv_hidden_layer_automatic': print( arg.scope_name ) inits_S = None pca_error, rbf_error = None, None float_type = tf.float32 # N_cv, N_test = X_cv.shape[0], X_test.shape[0] X_train, X_cv, X_test = X_train.reshape(N_train,1,D,1), X_cv.reshape(N_cv,1,D,1), X_test.reshape(N_test,1,D,1) x = tf.placeholder(tf.float32, shape=[None,1,D,1], name='x-input') #[M, 1, D, 1] # with tf.name_scope("mdl"+arg.scope_name) as scope: mdl = mtf.bt_mdl_conv(arg,x) arg.dims = [D]+arg.F[1:]+[D_out] elif arg.mdl == 'binary_tree_8D_conv_hidden_layer': print( arg.scope_name ) inits_S = None pca_error, rbf_error = None, None float_type = tf.float32 # N_cv, N_test = X_cv.shape[0], X_test.shape[0] X_train, X_cv, X_test = X_train.reshape(N_train,1,D,1), X_cv.reshape(N_cv,1,D,1), X_test.reshape(N_test,1,D,1) x = tf.placeholder(tf.float32, shape=[None,1,D,1], name='x-input') #[M, 1, D, 1] # with tf.name_scope("mdl"+arg.scope_name) as scope: mdl = mtf.bt_mdl_conv(arg,x) arg.dims = [D]+arg.F[1:]+[D_out] ## Output and Loss y = mdl y_ = tf.placeholder(float_type, shape=[None, D_out]) # (M x D) with tf.name_scope("L2_loss") as scope: l2_loss = tf.reduce_sum( tf.reduce_mean(tf.square(y_-y), 0) ) #l2_loss = (2.0/N_train)*tf.nn.l2_loss(y_-y) #l2_loss = tf.reduce_mean(tf.square(y_-y)) nb_params = count_number_trainable_params(y) results['nb_params'] = nb_params print( '---> nb_params ', nb_params ) ## with tf.name_scope("train") as scope: # If the argument staircase is True, then global_step / decay_steps is an integer division and the decayed earning rate follows a staircase function. ## decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(learning_rate=arg.starter_learning_rate, global_step=global_step,decay_steps=arg.decay_steps, decay_rate=arg.decay_rate, staircase=arg.staircase) # Passing global_step to minimize() will increment it at each step. if arg.optimization_alg == 'GD': opt = tf.train.GradientDescentOptimizer(learning_rate) elif arg.optimization_alg == 'Momentum': opt = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=arg.momentum,use_nesterov=arg.use_nesterov) elif arg.optimization_alg == 'Adadelta': opt = tf.train.AdadeltaOptimizer(learning_rate=learning_rate, rho=arg.rho, epsilon=1e-08, use_locking=False, name='Adadelta') elif arg.optimization_alg == 'Adam': opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=arg.beta1, beta2=arg.beta2, epsilon=1e-08, name='Adam') elif arg.optimization_alg == 'Adagrad': opt = tf.train.AdagradOptimizer(learning_rate) elif arg.optimization_alg == 'RMSProp': opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=arg.decay, momentum=arg.momentum, epsilon=1e-10, name='RMSProp') ## TODO if arg.re_train == 're_train' and arg.data_file_name == 'hrushikesh': print( 'data_file_name: ', data_file_name) print( 're_train: ', re_train) var_list = [v for v in tf.all_variables() if v.name == 'C:0'] #train_step = opt.minimize(l2_loss, var_list=var_list) else: train_step = opt.minimize(l2_loss, global_step=global_step) ## with tf.name_scope('learning_rate'): learning_rate_scalar_summary = tf.scalar_summary("learning_rate", learning_rate) with tf.name_scope("l2_loss") as scope: ls_scalar_summary = tf.scalar_summary("l2_loss", l2_loss) if arg.data_file_name == 'task_MNIST_flat_auto_encoder': with tf.name_scope('input_reshape'): x_image = tf.to_float(x, name='ToFloat') image_shaped_input_x = tf.reshape(x_image, [-1, 28, 28, 1]) # tf.image_summary(tag, tensor, max_images=3, collections=None, name=None) tf.image_summary('input', image_shaped_input_x, 10) with tf.name_scope('reconstruct'): y_image = tf.to_float(y, name='ToFloat') image_shaped_input_y = tf.reshape(x_image, [-1, 28, 28, 1]) # tf.image_summary(tag, tensor, max_images=3, collections=None, name=None) tf.image_summary('reconstruct', image_shaped_input_y, 10) def register_all_variables_and_grads(y): all_vars = tf.all_variables() grad_vars = opt.compute_gradients(y,all_vars) #[ (gradient,variable) ] for (dldw,v) in grad_vars: if dldw != None: prefix_name = 'derivative_'+v.name suffix_text = 'dJd'+v.name #mtf.put_summaries(var=tf.sqrt( tf.reduce_sum(tf.square(dldw)) ),prefix_name=prefix_name,suffix_text=suffix_text) mtf.put_summaries(var=tf.abs(dldw),prefix_name=prefix_name,suffix_text='_abs_'+suffix_text) tf.histogram_summary('hist'+prefix_name, dldw) register_all_variables_and_grads(y) ## TRAIN if phase_train is not None: #DO BN feed_dict_train = {x:X_train, y_:Y_train, phase_train: False} feed_dict_cv = {x:X_cv, y_:Y_cv, phase_train: False} feed_dict_test = {x:X_test, y_:Y_test, phase_train: False} else: #Don't do BN feed_dict_train = {x:X_train, y_:Y_train} feed_dict_cv = {x:X_cv, y_:Y_cv} feed_dict_test = {x:X_test, y_:Y_test} def get_batch_feed(X, Y, M, phase_train): mini_batch_indices = np.random.randint(M,size=M) Xminibatch = X[mini_batch_indices,:] # ( M x D^(0) ) Yminibatch = Y[mini_batch_indices,:] # ( M x D^(L) ) if phase_train is not None: #DO BN feed_dict = {x: Xminibatch, y_: Yminibatch, phase_train: True} else: #Don't do BN feed_dict = {x: Xminibatch, y_: Yminibatch} return feed_dict def print_messages(*args): for i, msg in enumerate(args): print('>%s'%msg, flush=True) if arg.use_tensorboard: if tf.gfile.Exists('/tmp/mdl_logs'): tf.gfile.DeleteRecursively('/tmp/mdl_logs') tf.gfile.MakeDirs('/tmp/mdl_logs') tf.add_check_numerics_ops() # Add ops to save and restore all the variables. if arg.mdl_save: saver = tf.train.Saver(max_to_keep=arg.max_to_keep) start_time = time.time() print() #file_for_error = './ray_error_file.txt' if arg.save_config_args: arg_dict = dict(arg).copy() arg_dict = get_remove_functions_from_dict(arg_dict) pickle.dump( arg_dict, open( "pickle-slurm-%s_%s.p"%(arg.slurm_jobid,arg.slurm_array_task_id) , "wb" ) ) #with open('json-slurm-%s_%s.json', 'w+') as f_json: # json.dump(results,f_json,sort_keys=True, indent=2, separators=(',', ': ')) with open(path+errors_pretty, 'w+') as f_err_msgs: #with open(file_for_error, 'w+') as f_err_msgs: #with tf.Session() as sess: sess = tf.Session() ## prepare writers and fetches if arg.use_tensorboard: merged = tf.merge_all_summaries() #writer = tf.train.SummaryWriter(tensorboard_data_dump, sess.graph) train_writer = tf.train.SummaryWriter(arg.tensorboard_data_dump_train, sess.graph) test_writer = tf.train.SummaryWriter(arg.tensorboard_data_dump_test, sess.graph) ## fetches_train = [merged, l2_loss] fetches_cv = l2_loss fetches_test = [merged, l2_loss] else: fetches_train = l2_loss fetches_cv = l2_loss fetches_test = l2_loss sess.run( tf.initialize_all_variables() ) for i in range(arg.steps): ## Create fake data for y = W.x + b where W = 2, b = 0 #(batch_xs, batch_ys) = get_batch_feed(X_train, Y_train, M, phase_train) feed_dict_batch = get_batch_feed(X_train, Y_train, arg.M, phase_train) ## Train if i%arg.report_error_freq == 0: if arg.use_tensorboard: (summary_str_train,train_error) = sess.run(fetches=fetches_train, feed_dict=feed_dict_train) cv_error = sess.run(fetches=fetches_cv, feed_dict=feed_dict_cv) (summary_str_test,test_error) = sess.run(fetches=fetches_test, feed_dict=feed_dict_test) train_writer.add_summary(summary_str_train, i) test_writer.add_summary(summary_str_test, i) else: train_error = sess.run(fetches=fetches_train, feed_dict=feed_dict_train) cv_error = sess.run(fetches=fetches_cv, feed_dict=feed_dict_cv) test_error = sess.run(fetches=fetches_test, feed_dict=feed_dict_test) current_learning_rate = sess.run(fetches=learning_rate) loss_msg = "=> Mdl*%s*-units%s, task: %s, step %d/%d, train err %g, cv err: %g test err %g"%(arg.mdl,arg.dims,arg.data_file_name,i,arg.steps,train_error,cv_error,test_error) mdl_info_msg = "Act: %s, Opt:%s, BN %s, BN_trainable: %s After%d/%d iteration,Init: %s, current_learning_rate %s, M %s, decay_rate %s, decay_steps %s, nb_params %s" % (arg.act.__name__,arg.optimization_alg,arg.bn,arg.trainable_bn,i,arg.steps,arg.init_type,current_learning_rate,arg.M,arg.decay_rate,arg.decay_steps,nb_params) errors_to_beat = 'BEAT: hbf1_error: %s RBF error: %s PCA error: %s '%(hbf1_error, rbf_error,pca_error) print_messages(loss_msg, mdl_info_msg, errors_to_beat) print('S: ', inits_S, flush=True) print() # store results results['train_errors'].append( float(train_error) ) results['cv_errors'].append( float(cv_error) ) results['test_errors'].append( float(test_error) ) # write errors to pretty print f_err_msgs.write(loss_msg+'\n') f_err_msgs.write(mdl_info_msg+'\n') if any_is_NaN(train_error,cv_error,test_error): # if its a nan make sure to stop script print('nan_found') break if arg.mdl_save: save_path = saver.save(sess, path+mdl_dir+'/model.ckpt',global_step=i) if arg.use_tensorboard: sess.run(fetches=[merged,train_step], feed_dict=feed_dict_batch) #sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) else: sess.run(fetches=train_step, feed_dict=feed_dict_batch) #sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) sess.close() _, best_train, best_cv, best_test = arg.get_errors_from(results) results['best_train'], results['best_cv'], results['best_test'] = best_train, best_cv, best_test print('End of main') git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD']) results['git_hash'] = str(git_hash) #results['tf_rand_seed'] = tf_rand_seed # seconds = (time.time() - start_time) minutes = seconds/ 60 hours = minutes/ 60 print("--- %s seconds ---" % seconds ) print("--- %s minutes ---" % minutes ) print("--- %s hours ---" % hours ) ## dump results to JSON results['seconds'] = seconds results['minutes'] = minutes results['hours'] = hours #print results #results['arg'] = arg arg_dict = dict(arg) arg_dict = get_remove_functions_from_dict(arg_dict) results['arg_dict'] = arg_dict with open(path+json_file, 'w+') as f_json: print('Writing Json') print('path+json_file', path+json_file) json.dump(results,f_json,indent=2, separators=(',', ': ')) print( '\a') #makes beep #print(results) print('get_errors_from: ', arg.get_errors_from.__name__) print('best results: train, cv, test: ', best_train, best_cv, best_test )
def main(_): NUM_INPUTS = 4 NUM_CLASSES = 9 # the data, split between train and test sets x_train, y_train, x_test, y_test = generate_simulated_data() x_train = x_train.astype('uint8') x_test = x_test.astype('uint8') print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = y_train.astype('int64') y_test = y_test.astype('int64') tf.logging.set_verbosity(tf.logging.INFO) sess = tf.InteractiveSession() # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list( map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) input_placeholder = tf.placeholder(tf.float32, [None, NUM_INPUTS], name='graph_input') if FLAGS.quantize: input_min, input_max = 0, 256 graph_input = tf.fake_quant_with_min_max_args(input_placeholder, input_min, input_max) else: graph_input = input_placeholder logits, dropout_prob = models.create_three_fc_model(graph_input, NUM_INPUTS, 20, 20, NUM_CLASSES, is_training=True) # Define loss and optimizer ground_truth_input = tf.placeholder(tf.int64, [None], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) if FLAGS.quantize: tf.contrib.quantize.create_training_graph(quant_delay=0) with tf.name_scope('train'), tf.control_dependencies(control_dependencies): learning_rate_input = tf.placeholder(tf.float32, [], name='learning_rate_input') train_step = tf.train.GradientDescentOptimizer( learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) correct_prediction = tf.equal(predicted_indices, ground_truth_input) confusion_matrix = tf.confusion_matrix(ground_truth_input, predicted_indices, num_classes=NUM_CLASSES) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.get_default_graph().name_scope('eval'): tf.summary.scalar('cross_entropy', cross_entropy_mean) tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all(scope='eval') train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') tf.global_variables_initializer().run() start_step = 1 tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Training loop. training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # Figure out what the current learning rate is. training_steps_sum = 0 for i in range(len(training_steps_list)): training_steps_sum += training_steps_list[i] if training_step <= training_steps_sum: learning_rate_value = learning_rates_list[i] break # Pull the audio samples we'll use for training. index = (training_step * FLAGS.batch_size) % x_train.shape[0] train_fingerprints = x_train[index:index + FLAGS.batch_size] train_ground_truth = y_train[index:index + FLAGS.batch_size] # Run the graph with this batch of training data. train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step, ], feed_dict={ graph_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, dropout_prob: 0.5 }) train_writer.add_summary(train_summary, training_step) tf.logging.info( 'Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = y_test.shape[0] total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints = x_test[i:i + FLAGS.batch_size] validation_ground_truth = y_test[i:i + FLAGS.batch_size] # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. validation_summary, validation_accuracy, conf_matrix = sess.run( [merged_summaries, evaluation_step, confusion_matrix], feed_dict={ graph_input: validation_fingerprints, ground_truth_input: validation_ground_truth, dropout_prob: 1.0 }) validation_writer.add_summary(validation_summary, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) # Save the model checkpoint periodically. if (training_step % FLAGS.save_step_interval == 0 or training_step == training_steps_max): checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step)
def initialize(self, n_iter=1000, n_print=None, scale=None, auto_transform=True, logdir=None, log_timestamp=True, log_vars=None, debug=False, optimizer=None, var_list=None, use_prettytensor=False, global_step=None, n_samples=1, kl_scaling=None, maxnorm=5.): if kl_scaling is None: kl_scaling = {} if n_samples <= 0: raise ValueError( "n_samples should be greater than zero: {}".format(n_samples)) self.n_samples = n_samples self.kl_scaling = kl_scaling # from inference.py self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale self.transformations = {} if auto_transform: latent_vars = self.latent_vars.copy() self.latent_vars = {} self.latent_vars_unconstrained = {} for z, qz in six.iteritems(latent_vars): if hasattr(z, 'support') and hasattr(qz, 'support') and \ z.support != qz.support and qz.support != 'point': z_unconstrained = transform(z) self.transformations[z] = z_unconstrained if qz.support == "points": qz_unconstrained = qz else: qz_unconstrained = transform(qz) self.latent_vars_unconstrained[ z_unconstrained] = qz_unconstrained if z_unconstrained != z: qz_constrained = transform( qz_unconstrained, bijectors.Invert(z_unconstrained.bijector)) try: qz_constrained.params = \ z_unconstrained.bijector.inverse( qz_unconstrained.params) except: pass else: qz_constrained = qz_unconstrained self.latent_vars[z] = qz_constrained else: self.latent_vars[z] = qz self.latent_vars_unconstrained[z] = qz del latent_vars if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.expanduser(logdir) logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._summary_key = tf.get_default_graph().unique_name("summaries") self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() self.reset = [tf.variables_initializer([self.t])] # from variational_inference.py if var_list is None: var_list = set() trainables = tf.trainable_variables() for z, qz in six.iteritems(self.latent_vars): var_list.update(get_variables(z, collection=trainables)) var_list.update(get_variables(qz, collection=trainables)) for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable) and \ not isinstance(qx, RandomVariable): var_list.update(get_variables(x, collection=trainables)) var_list = list(var_list) self.loss, grads_and_vars = self.build_loss_and_gradients(var_list) clipped_grads_and_vars = [] for grad, var in grads_and_vars: if "kernel" in var.name or "bias" in var.name: clipped_grads_and_vars.append((tf.clip_by_norm(grad, maxnorm, axes=[0]), var)) else: clipped_grads_and_vars.append((grad, var)) # for grad, var in grads_and_vars: # clipped_grads_and_vars.append( # (tf.clip_by_value(grad, -1000., 1000.), var)) del grads_and_vars if self.logging: tf.summary.scalar("loss", self.loss, collections=[self._summary_key]) for grad, var in clipped_grads_and_vars: tf.summary.histogram("gradient/" + var.name.replace(':', '/'), grad, collections=[self._summary_key]) tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'), tf.norm(grad), collections=[self._summary_key]) self.summarize = tf.summary.merge_all(key=self._summary_key) if optimizer is None and global_step is None: global_step = tf.Variable(0, trainable=False, name="global_step") if isinstance(global_step, tf.Variable): starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) else: learning_rate = 0.01 # Build optimizer. if optimizer is None: optimizer = tf.train.AdamOptimizer(learning_rate) elif isinstance(optimizer, str): if optimizer == 'gradientdescent': optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) elif optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) elif optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate) else: raise ValueError('Optimizer class not found:', optimizer) elif not isinstance(optimizer, tf.train.Optimizer): raise TypeError( "Optimizer must be str, tf.train.Optimizer, or None.") with tf.variable_scope(None, default_name="optimizer") as scope: if not use_prettytensor: self.train = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step) else: import prettytensor as pt self.train = pt.apply_optimizer(optimizer, losses=[self.loss], global_step=global_step, var_list=var_list) self.reset.append( tf.variables_initializer( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope.name)))
def main(arg): results = {'train_errors':[], 'cv_errors':[],'test_errors':[]} path, errors_pretty, mdl_dir, json_file = set_experiment_folders(arg) # try to make directory, if it exists do NOP mtf.make_and_check_dir(path=path) mtf.make_and_check_dir(path=path+mdl_dir) # JSON results structure #results_dic = mtf.fill_results_dic_with_np_seed(np_rnd_seed=np.random.get_state(), results=results) ## Data sets and task print( '----====> TASK NAME: %s' % arg.task_name ) (X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(arg.task_name) if arg.data_normalize == 'normalize_input': X_train, X_cv, X_test = preprocessing.scale(X_train), preprocessing.scale(X_cv), preprocessing.scale(X_test) (N_train,D) = X_train.shape (N_test,D_out) = Y_test.shape print( '(N_train,D) = ', (N_train,D) ) print( '(N_test,D_out) = ', (N_test,D_out) ) ## units_list = arg.units_list dims = [D]+arg.units_list+[D_out] mu = arg.W_mu_init(dims, arg) std = arg.W_std_init(dims, arg) b_init = arg.b_init() phase_train = tf.placeholder(tf.bool, name='phase_train') if arg.bn else None steps = np.random.randint(low=arg.steps_low ,high=arg.steps_high) M = np.random.randint(low=arg.M_low , high=arg.M_high) arg.M = M log_learning_rate = np.random.uniform(low=arg.low_log_const_learning_rate, high=arg.high_log_const_learning_rate) starter_learning_rate = 10**log_learning_rate print( '++> starter_learning_rate ', starter_learning_rate ) ## decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) decay_rate = np.random.uniform(low=arg.decay_rate_low, high=arg.decay_rate_high) arg.decay_steps_high decay_steps = np.random.randint(low=arg.decay_steps_low(arg), high=arg.decay_steps_high(arg) ) staircase = arg.staircase if optimization_alg == 'GD': pass elif optimization_alg=='Momentum': use_nesterov = arg.use_nesterov momentum=np.random.uniform(low=0.1, high=0.99) results['momentum']=float(momentum) elif optimization_alg == 'Adadelta': rho=np.random.uniform(low=0.4, high=0.99) results['rho']=float(rho) elif optimization_alg == 'Adagrad': #only has learning rate pass elif optimization_alg == 'Adam': beta1 = arg.get_beta1(arg) beta2 = arg.get_beta2(arg) results['beta1']=float(beta1) results['beta2']=float(beta2) elif optimization_alg == 'RMSProp': decay = np.random.uniform(low=arg.decay_loc,high=arg.decay_high) momentum = np.random.uniform(low=arg.momentum_low,high=arg.momentum_high) results['decay']=float(decay) results['momentum']=float(momentum) else: pass results['range_learning_rate'] = [low_const_learning_rate, high_const_learning_rate] ############################## if task_name == 'task_MNIST_flat_auto_encoder': PCA_errors = {12:24.8254684915, 48:9.60052317906, 96:4.72118325768} if len(units_list) == 1: k = units_list[0] else: k = units_list[0] * len(units_list) if not k in PCA_errors.keys(): print( 'COMPUTING PCA... k = ', k) X_reconstruct_pca, _, _ = mtf. get_reconstruction_pca(X_train,k=units_list[0]) pca_error = mtf.report_l2_loss(Y=X_train,Y_pred=X_reconstruct_pca) PCA_errors[k] = pca_error else: pca_error = PCA_errors[k] print( '*************> PCA error: ', pca_error) else: pca_error = None rbf_error = None hbf1_error = None if model == 'hbf': #error, Y_pred, Kern, C, subsampled_data_points = report_RBF_error_from_data(X_train, dims, stddev) if len(units_list) > 1: k = units_list[0]*len(units_list) print( 'RBF units = ', k) nb_units = [None, k] rbf_error, _, _, _, _ = mtf.report_RBF_error_from_data(X_train, X_train, nb_units, S_init[1]) print( rbf_error) hbf1={12:26.7595} if k in hbf1.keys(): hbf1_error = hbf1[k] else: nb_units = dims rbf_error, _, _, _, _ = mtf.report_RBF_error_from_data(X_train, X_train, nb_units, S_init[1]) S_init = b_init ## ## Make Model nb_layers = len(dims)-1 nb_hidden_layers = nb_layers-1 print( '-----> Running model: %s. (nb_hidden_layers = %d, nb_layers = %d)' % (model,nb_hidden_layers,nb_layers) ) print( '-----> Units: %s)' % (dims) ) if model == 'standard_nn': rbf_error = None #tensorboard_data_dump = '/tmp/standard_nn_logs' float_type = tf.float64 x = tf.placeholder(float_type, shape=[None, D], name='x-input') # M x D (inits_C,inits_W,inits_b) = mtf.get_initilizations_standard_NN(init_type=init_type,dims=dims,mu=mu,std=std,b_init=b_init,S_init=S_init, X_train=X_train, Y_train=Y_train) with tf.name_scope("standardNN") as scope: mdl = mtf.build_standard_NN(x,dims,(inits_C,inits_W,inits_b),phase_train,trainable_bn) mdl = mtf.get_summation_layer(l=str(nb_layers),x=mdl,init=inits_C[0]) inits_S = inits_b elif model == 'hbf': trainable_S = True if (arg.trainable_S=='train_S') else False #tensorboard_data_dump = '/tmp/hbf_logs' float_type = tf.float64 x = tf.placeholder(float_type, shape=[None, D], name='x-input') # M x D (inits_C,inits_W,inits_S,rbf_error) = mtf.get_initilizations_HBF(init_type=init_type,dims=dims,mu=mu,std=std,b_init=b_init,S_init=S_init, X_train=X_train, Y_train=Y_train, train_S_type=train_S_type) print(inits_W) with tf.name_scope("HBF") as scope: mdl = mtf.build_HBF2(x,dims,(inits_C,inits_W,inits_S),phase_train,trainable_bn,trainable_S) mdl = mtf.get_summation_layer(l=str(nb_layers),x=mdl,init=inits_C[0]) elif model == 'binary_tree_4D_conv': print( 'binary_tree_4D') #tensorboard_data_dump = '/tmp/hbf_logs' inits_S = None pca_error = None rbf_error = None float_type = tf.float32 # things that need reshaping N_cv = X_cv.shape[0] N_test = X_test.shape[0] # X_train = X_train.reshape(N_train,1,D,1) #Y_train = Y_train.reshape(N_train,1,D,1) X_cv = X_cv.reshape(N_cv,1,D,1) #Y_cv = Y_cv.reshape(N_cv,1,D,1) X_test = X_test.reshape(N_test,1,D,1) #Y_test = Y_test.reshape(N_test,1,D,1) x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input') # filter_size = 2 #fixed for Binary Tree BT #nb_filters = nb_filters mean, stddev = bn_tree_init_stats stddev = float( np.random.uniform(low=0.001, high=stddev) ) print( 'stddev', stddev) x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input') with tf.name_scope("build_binary_model") as scope: mdl = mtf.build_binary_tree(x,filter_size,nb_filters,mean,stddev,stride_convd1=2,phase_train=phase_train,trainable_bn=trainable_bn) # dims = [D]+[nb_filters]+[D_out] results['nb_filters'] = nb_filters elif model == 'binary_tree_D8': #tensorboard_data_dump = '/tmp/hbf_logs' inits_S = None pca_error = None rbf_error = None float_type = tf.float32 # things that need reshaping N_cv = X_cv.shape[0] N_test = X_test.shape[0] # X_train = X_train.reshape(N_train,1,D,1) #Y_train = Y_train.reshape(N_train,1,D,1) X_cv = X_cv.reshape(N_cv,1,D,1) #Y_cv = Y_cv.reshape(N_cv,1,D,1) X_test = X_test.reshape(N_test,1,D,1) #Y_test = Y_test.reshape(N_test,1,D,1) x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input') # filter_size = 2 #fixed for Binary Tree BT nb_filters1,nb_filters2 = nb_filters mean1,stddev1,mean2,stddev2,mean3,stddev3 = bn_tree_init_stats x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input') with tf.name_scope("binary_tree_D8") as scope: mdl = mtf.build_binary_tree_8D(x,nb_filters1,nb_filters2,mean1,stddev1,mean2,stddev2,mean3,stddev3,stride_conv1=2) # dims = [D]+nb_filters+[D_out] results['nb_filters'] = nb_filters ## Output and Loss y = mdl y_ = tf.placeholder(float_type, shape=[None, D_out]) # (M x D) with tf.name_scope("L2_loss") as scope: l2_loss = tf.reduce_sum( tf.reduce_mean(tf.square(y_-y), 0) ) #l2_loss = (2.0/N_train)*tf.nn.l2_loss(y_-y) #l2_loss = tf.reduce_mean(tf.square(y_-y)) ## with tf.name_scope("train") as scope: # starter_learning_rate = 0.0000001 # decay_rate = 0.9 # decay_steps = 100 # staircase = True # decay_steps = 10000000 # staircase = False # decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(learning_rate=starter_learning_rate, global_step=global_step,decay_steps=decay_steps, decay_rate=decay_rate, staircase=staircase) # Passing global_step to minimize() will increment it at each step. if optimization_alg == 'GD': opt = tf.train.GradientDescentOptimizer(learning_rate) elif optimization_alg == 'Momentum': opt = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=momentum,use_nesterov=use_nesterov) elif optimization_alg == 'Adadelta': tf.train.AdadeltaOptimizer(learning_rate=learning_rate, rho=rho, epsilon=1e-08, use_locking=False, name='Adadelta') elif optimization_alg == 'Adam': opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=1e-08, name='Adam') elif optimization_alg == 'Adagrad': opt = tf.train.AdagradOptimizer(learning_rate) elif optimization_alg == 'RMSProp': opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=decay, momentum=momentum, epsilon=1e-10, name='RMSProp') ## if re_train == 're_train' and task_name == 'hrushikesh': print( 'task_name: ', task_name) print( 're_train: ', re_train) var_list = [v for v in tf.all_variables() if v.name == 'C:0'] #train_step = opt.minimize(l2_loss, var_list=var_list) else: train_step = opt.minimize(l2_loss, global_step=global_step) ## with tf.name_scope("l2_loss") as scope: ls_scalar_summary = tf.scalar_summary("l2_loss", l2_loss) if task_name == 'task_MNIST_flat_auto_encoder': with tf.name_scope('input_reshape'): x_image = tf.to_float(x, name='ToFloat') image_shaped_input_x = tf.reshape(x_image, [-1, 28, 28, 1]) # tf.image_summary(tag, tensor, max_images=3, collections=None, name=None) tf.image_summary('input', image_shaped_input_x, 10) with tf.name_scope('reconstruct'): y_image = tf.to_float(y, name='ToFloat') image_shaped_input_y = tf.reshape(x_image, [-1, 28, 28, 1]) # tf.image_summary(tag, tensor, max_images=3, collections=None, name=None) tf.image_summary('reconstruct', image_shaped_input_y, 10) def register_all_variables_and_grads(y): all_vars = tf.all_variables() grad_vars = opt.compute_gradients(y,all_vars) #[ (gradient,variable) ] for (dldw,v) in grad_vars: if dldw != None: prefix_name = 'derivative_'+v.name suffix_text = 'dJd'+v.name #mtf.put_summaries(var=tf.sqrt( tf.reduce_sum(tf.square(dldw)) ),prefix_name=prefix_name,suffix_text=suffix_text) mtf.put_summaries(var=tf.abs(dldw),prefix_name=prefix_name,suffix_text='_abs_'+suffix_text) tf.histogram_summary('hist'+prefix_name, dldw) register_all_variables_and_grads(y) ## TRAIN if phase_train is not None: #DO BN feed_dict_train = {x:X_train, y_:Y_train, phase_train: False} feed_dict_cv = {x:X_cv, y_:Y_cv, phase_train: False} feed_dict_test = {x:X_test, y_:Y_test, phase_train: False} else: #Don't do BN feed_dict_train = {x:X_train, y_:Y_train} feed_dict_cv = {x:X_cv, y_:Y_cv} feed_dict_test = {x:X_test, y_:Y_test} def get_batch_feed(X, Y, M, phase_train): mini_batch_indices = np.random.randint(M,size=M) Xminibatch = X[mini_batch_indices,:] # ( M x D^(0) ) Yminibatch = Y[mini_batch_indices,:] # ( M x D^(L) ) if phase_train is not None: #DO BN feed_dict = {x: Xminibatch, y_: Yminibatch, phase_train: True} else: #Don't do BN feed_dict = {x: Xminibatch, y_: Yminibatch} return feed_dict def print_messages(*args): for i, msg in enumerate(args): print('>',msg) if use_tensorboard: if tf.gfile.Exists('/tmp/mdl_logs'): tf.gfile.DeleteRecursively('/tmp/mdl_logs') tf.gfile.MakeDirs('/tmp/mdl_logs') tf.add_check_numerics_ops() # Add ops to save and restore all the variables. if mdl_save: saver = tf.train.Saver(max_to_keep=max_to_keep) start_time = time.time() file_for_error = './ray_error_file.txt' #with open(path+errors_pretty, 'w+') as f_err_msgs: with open(file_for_error, 'w+') as f_err_msgs: with tf.Session() as sess: ## prepare writers and fetches if use_tensorboard: merged = tf.merge_all_summaries() #writer = tf.train.SummaryWriter(tensorboard_data_dump, sess.graph) train_writer = tf.train.SummaryWriter(tensorboard_data_dump_train, sess.graph) test_writer = tf.train.SummaryWriter(tensorboard_data_dump_test, sess.graph) ## fetches_train = [merged, l2_loss] fetches_cv = l2_loss fetches_test = [merged, l2_loss] else: fetches_train = l2_loss fetches_cv = l2_loss fetches_test = l2_loss sess.run( tf.initialize_all_variables() ) for i in range(steps): ## Create fake data for y = W.x + b where W = 2, b = 0 #(batch_xs, batch_ys) = get_batch_feed(X_train, Y_train, M, phase_train) feed_dict_batch = get_batch_feed(X_train, Y_train, M, phase_train) ## Train if i%report_error_freq == 0: if use_tensorboard: (summary_str_train,train_error) = sess.run(fetches=fetches_train, feed_dict=feed_dict_train) cv_error = sess.run(fetches=fetches_cv, feed_dict=feed_dict_cv) (summary_str_test,test_error) = sess.run(fetches=fetches_test, feed_dict=feed_dict_test) train_writer.add_summary(summary_str_train, i) test_writer.add_summary(summary_str_test, i) else: train_error = sess.run(fetches=fetches_train, feed_dict=feed_dict_train) cv_error = sess.run(fetches=fetches_cv, feed_dict=feed_dict_cv) test_error = sess.run(fetches=fetches_test, feed_dict=feed_dict_test) loss_msg = "Mdl*%s%s*-units%s, task: %s, step %d/%d, train err %g, cv err: %g test err %g"%(model,nb_hidden_layers,dims,task_name,i,steps,train_error,cv_error,test_error) mdl_info_msg = "Opt:%s, BN %s, BN_trainable: %s After%d/%d iteration,Init: %s" % (optimization_alg,bn,trainable_bn,i,steps,init_type) errors_to_beat = 'BEAT: hbf1_error: %s RBF error: %s PCA error: %s '%(hbf1_error, rbf_error,pca_error) print_messages(loss_msg, mdl_info_msg, errors_to_beat) #sys.stdout.flush() loss_msg+="\n" mdl_info_msg+="\n" errors_to_beat+="\n" print( 'S: ', inits_S) # store results #print type(train_error) results['train_errors'].append( float(train_error) ) #print type(cv_error) results['cv_errors'].append( float(cv_error) ) #print type(test_error) results['test_errors'].append( float(test_error) ) # write errors to pretty print f_err_msgs.write(loss_msg) f_err_msgs.write(mdl_info_msg) # save mdl if mdl_save: save_path = saver.save(sess, path+mdl_dir+'/model.ckpt',global_step=i) if use_tensorboard: sess.run(fetches=[merged,train_step], feed_dict=feed_dict_batch) #sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) else: sess.run(fetches=train_step, feed_dict=feed_dict_batch) #sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD']) mtf.load_results_dic(results,git_hash=git_hash,dims=dims,mu=mu,std=std,init_constant=init_constant,b_init=b_init,S_init=S_init,\ init_type=init_type,model=model,bn=bn,path=path,\ tensorboard_data_dump_test=tensorboard_data_dump_test,tensorboard_data_dump_train=tensorboard_data_dump_train,\ report_error_freq=report_error_freq,steps=steps,M=M,optimization_alg=optimization_alg,\ starter_learning_rate=starter_learning_rate,decay_rate=decay_rate,staircase=staircase) ## results['job_name'] = job_name results['slurm_jobid'] = slurm_jobid results['slurm_array_task_id'] = slurm_array_task_id #results['tf_rand_seed'] = tf_rand_seed results['date'] = date results['bn'] = bn results['trainable_bn'] = trainable_bn seconds = (time.time() - start_time) minutes = seconds/ 60 hours = minutes/ 60 print("--- %s seconds ---" % seconds ) print("--- %s minutes ---" % minutes ) print("--- %s hours ---" % hours ) ## dump results to JSON results['seconds'] = seconds results['minutes'] = minutes results['hours'] = hours #print results with open(path+json_file, 'w+') as f_json: json.dump(results,f_json,sort_keys=True, indent=2, separators=(',', ': ')) print( '\a') #makes beep
def build_fast_forward_pass(self, step=0.003): self.check_op = tf.add_check_numerics_ops() computations = [] bob = 1 if self.node_layers[0][0].t == 'b': bob = 2 with tf.name_scope("input"): self.input = tf.placeholder(dtype=tf.float64, shape=(None, max(self.input_order) + 1, bob), name='Input') # self.input = tf.placeholder(dtype=tf.float64, # shape=(len(self.input_order)*2), name='Input') #the input to be appended to each layer input_splits = [] self.conz = tf.placeholder(shape=[1], dtype=tf.float64) #compute the input weights = [] with tf.name_scope("projection"): n = tf.constant(0.0001, dtype=tf.float64) for L in range(len(self.weights)): if L != 0: drop = tf.round( tf.random_uniform(self.weights[L].get_shape(), self.conz, 1.0, dtype=tf.float64)) weights.append( tf.add( tf.nn.relu(tf.subtract(self.weights[L] * drop, n)), n)) else: weights.append( tf.add(tf.nn.relu(tf.subtract(self.weights[L], n)), n)) with tf.name_scope('nomralization'): self.sum_of_weights = [ tf.segment_sum(x, y) if x.get_shape()[0] > 0 else None for x, y in zip(weights, self.inds) ] sum_of_weights = self.sum_of_weights self.norm_weights = [ tf.div(x, tf.gather(y, z)) if x.get_shape()[0] > 0 else None for x, y, z in zip(weights, self.sum_of_weights, self.inds) ] with tf.name_scope('LEAFS_' + str(len(self.input_order))): input_gather = tf.reshape(tf.transpose( tf.gather(tf.transpose(self.input, (1, 0, 2)), self.input_swap), (1, 0, 2)), shape=(-1, len(self.input_order) * bob)) self.counting.append(input_gather) if self.node_layers[0][0].t == 'b': #if contiuous input_computation_w = tf.multiply(input_gather, weights[0]) input_computation_s = tf.transpose( tf.segment_sum(tf.transpose(input_computation_w), self.inds[0])) input_computation_n = tf.log( tf.div(input_computation_s, sum_of_weights[0])) computations.append(input_computation_n) else: pi = tf.constant(np.pi, tf.float64) mus = self.cont[0] sigs = tf.nn.relu(self.cont[1] - 0.01 ) + 0.01 #sigma can't be smaller than 0.01 #gassian formula input_computation_g = tf.div( tf.exp( tf.negative( tf.div(tf.square(input_gather - mus), 2 * tf.multiply(sigs, sigs)))), tf.sqrt(2 * pi) * sigs) + 0.000001 input_computation_n = tf.log(input_computation_g) computations.append(input_computation_n) #split the input computation and figure out which one goes in each layer j = 0 for i in range(len(self.input_layers)): a = tf.constant(j) b = self.input_layers[i] input_splits.append( tf.slice(input_computation_n, [0, a], [-1, b])) j += b current_computation = input_splits[0] for i in range(len(self.node_layers[1:])): L = i + 1 #the layer number if self.weights[L].get_shape()[0] == 0: #product with tf.name_scope("PRD" + str(self.inds[L].get_shape()[0])): #do a segment sum in the log domain current_computation = tf.transpose( tf.segment_sum(tf.transpose(current_computation), self.inds[L])) else: with tf.name_scope("SUM" + str(self.inds[L].get_shape()[0])): self.counting.append( current_computation) #stats for counting and cccp #get the max at each node maxes = tf.transpose( tf.segment_max(tf.transpose(current_computation), self.inds[L])) back_maxes = tf.transpose( tf.gather(tf.transpose(maxes), self.inds[L])) #sub the max at each node current_computation = tf.subtract(current_computation, back_maxes) #get out of log domain current_computation = tf.exp(current_computation) #multiply by weights current_computation = tf.multiply(current_computation, weights[L]) #compute sum node current_computation = tf.transpose( tf.segment_sum(tf.transpose(current_computation), self.inds[L])) #normalize current_computation = tf.div(current_computation, sum_of_weights[L]) #re-add the maxes that we took out after entering log domain current_computation = tf.add(tf.log(current_computation), maxes) #concatenate with inputs for the next layer current_computation = tf.concat( 1, [current_computation, input_splits[L]]) #shuffle so that next node is ready current_computation = tf.transpose( tf.gather(tf.transpose(current_computation), self.shuffle[L])) computations.append(current_computation) with tf.name_scope('root_node'): self.output = current_computation with tf.name_scope('loss'): if self.multiclass: self.labels = tf.placeholder(shape=(None, len(self.node_layers[-1])), dtype=tf.float64) self.loss = -tf.reduce_mean( tf.multiply(self.output, 0.1 * (self.labels - 1) + self.labels)) else: self.loss = -tf.reduce_mean(self.output) self.loss_summary = tf.scalar_summary(self.summ, self.loss) self.opt_val = self.optimizer(0.001).minimize(self.loss) self.computations = computations
def initialize(self, n_iter=1000, n_print=None, n_minibatch=None, scale=None, logdir=None, debug=False): """Initialize inference algorithm. Parameters ---------- n_iter : int, optional Number of iterations for algorithm. n_print : int, optional Number of iterations for each print progress. To suppress print progress, then specify 0. Default is ``int(n_iter / 10)``. n_minibatch : int, optional Number of samples for data subsampling. Default is to use all the data. ``n_minibatch`` is available only for TensorFlow, Python, and PyMC3 model wrappers; use ``scale`` for Edward's language. All data must be passed in as NumPy arrays. For subsampling details, see ``tf.train.slice_input_producer`` and ``tf.train.batch``. scale : dict of RandomVariable to tf.Tensor, optional A scalar value to scale computation for any random variable that it is binded to. For example, this is useful for scaling computations with respect to local latent variables. logdir : str, optional Directory where event file will be written. For details, see ``tf.train.SummaryWriter``. Default is to write nothing. debug : bool, optional If True, add checks for ``NaN`` and ``Inf`` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 10) else: self.n_print = n_print self.t = tf.Variable(0, trainable=False) self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError() self.scale = scale self.n_minibatch = n_minibatch if n_minibatch is not None and \ self.model_wrapper is not None and \ not isinstance(self.model_wrapper, StanModel): # Re-assign data to batch tensors, with size given by # ``n_minibatch``. Don't do this for random variables in data. dict_rv = {} dict_data = {} for key, value in six.iteritems(self.data): if isinstance(value, RandomVariable): dict_rv[key] = value else: dict_data[key] = value values = list(six.itervalues(dict_data)) slices = tf.train.slice_input_producer(values) # By default use as many threads as CPUs. batches = tf.train.batch(slices, n_minibatch, num_threads=multiprocessing.cpu_count()) if not isinstance(batches, list): # ``tf.train.batch`` returns tf.Tensor if ``slices`` is a # list of size 1. batches = [batches] self.data = {key: value for key, value in zip(six.iterkeys(dict_data), batches)} self.data.update(dict_rv) if logdir is not None: self.logging = True self.train_writer = tf.train.SummaryWriter(logdir, tf.get_default_graph()) self.summarize = tf.merge_all_summaries() else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops()
def initialize(self, n_iter=1000, n_print=None, scale=None, auto_transform=True, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of `Inference` **must** implement this method. No methods which build ops should be called outside `initialize()`. Args: n_iter: int. Number of iterations for algorithm when calling `run()`. Alternatively if controlling inference manually, it is the expected number of calls to `update()`; this number determines tracking information during the print progress. n_print: int. Number of iterations for each print progress. To suppress print progress, then specify 0. Default is `int(n_iter / 100)`. scale: dict of RandomVariable to tf.Tensor. A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. auto_transform: bool. Whether to automatically transform continuous latent variables of unequal support to be on the unconstrained space. It is only applied if the argument is `True`, the latent variable pair are `ed.RandomVariable`s with the `support` attribute, the supports are both continuous and unequal. logdir: str. Directory where event file will be written. For details, see `tf.summary.FileWriter`. Default is to log nothing. log_timestamp: bool. If True (and `logdir` is specified), create a subdirectory of `logdir` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars: list. Specifies the list of variables to log after each `n_print` steps. If None, will log all variables. If `[]`, no variables will be logged. `logdir` must be specified for variables to be logged. debug: bool. If True, add checks for `NaN` and `Inf` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale # map from original latent vars to unconstrained versions self.transformations = {} if auto_transform: latent_vars = self.latent_vars.copy() # latent_vars maps original latent vars to constrained Q's. # latent_vars_unconstrained maps unconstrained vars to unconstrained Q's. self.latent_vars = {} self.latent_vars_unconstrained = {} for z, qz in six.iteritems(latent_vars): if hasattr(z, 'support') and hasattr(qz, 'support') and \ z.support != qz.support and qz.support != 'point': # transform z to an unconstrained space z_unconstrained = transform(z) self.transformations[z] = z_unconstrained # make sure we also have a qz that covers the unconstrained space if qz.support == "points": qz_unconstrained = qz else: qz_unconstrained = transform(qz) self.latent_vars_unconstrained[z_unconstrained] = qz_unconstrained # additionally construct the transformation of qz # back into the original constrained space if z_unconstrained != z: qz_constrained = transform( qz_unconstrained, bijectors.Invert(z_unconstrained.bijector)) try: # attempt to pushforward the params of Empirical distributions qz_constrained.params = z_unconstrained.bijector.inverse( qz_unconstrained.params) except: # qz_unconstrained is not an Empirical distribution pass else: qz_constrained = qz_unconstrained self.latent_vars[z] = qz_constrained else: self.latent_vars[z] = qz self.latent_vars_unconstrained[z] = qz del latent_vars if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.expanduser(logdir) logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._summary_key = tf.get_default_graph().unique_name("summaries") self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])]
sess = tf.Session() # 建立输入节点与vgg16网络 imgs = tf.placeholder(tf.float32, [None, 224, 224, 3]) target = tf.placeholder("float", [None, 100]) vgg = vgg16(imgs, '../model/vgg16_weights.npz', sess) print('VGG network created') # 损失函数与优化器 loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=vgg.fc3l, labels=target)) print([_.name for _ in vgg.parameters]) optimizer = tf.train.MomentumOptimizer(learning_rate=0.001, momentum=0.9).minimize(loss) check_op = tf.add_check_numerics_ops() # 测评 correct_prediction = tf.equal(tf.argmax(vgg.fc3l, 1), tf.argmax(target, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) num_correct_preds = tf.reduce_sum(tf.cast(correct_prediction, tf.float32)) # 加载权重与参数初始化 sess.run(tf.global_variables_initializer()) vgg.load_initial_weights(sess) print([_.name for _ in vgg.parameters]) for v in tf.trainable_variables(): print("Trainable variables", v) print('Starting training') val_batch_size = 10
def _model_fn(self, features, labels, mode, params): control_dependencies = [] checks = tf.add_check_numerics_ops() control_dependencies = [checks] input_frequency_size = self._dct_coefficient_count input_time_size = self._spectrogram_length first_filter_width = 8 first_filter_height = 20 first_filter_count = 64 second_filter_width = 4 second_filter_height = 10 second_filter_count = 64 fingerprint_input = features[self._feature_type.FEATURE_1] labels = tf.cast(labels, tf.int64) tf.logging.info( "=====> fingerprint_input {}".format(fingerprint_input)) tf.logging.info("=====> labels {}".format(labels)) fingerprint_4d = tf.reshape( fingerprint_input, [-1, input_time_size, input_frequency_size, 1]) tf.logging.info("=====> fingerprint_4d {}".format(fingerprint_4d)) first_weights = tf.Variable( tf.truncated_normal([ first_filter_height, first_filter_width, 1, first_filter_count ], stddev=0.01)) first_bias = tf.Variable(tf.zeros([first_filter_count])) first_conv = tf.nn.conv2d(fingerprint_4d, first_weights, [1, 1, 1, 1], 'SAME') + first_bias tf.logging.info("=====> first_conv {}".format(first_conv)) first_relu = tf.nn.relu(first_conv) if mode != ModeKeys.INFER: first_dropout = tf.nn.dropout(first_relu, self.sr_config._keep_prob) else: first_dropout = first_relu max_pool = tf.nn.max_pool(first_dropout, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME') second_weights = tf.Variable( tf.truncated_normal([ second_filter_height, second_filter_width, first_filter_count, second_filter_count ], stddev=0.01)) second_bias = tf.Variable(tf.zeros([second_filter_count])) second_conv = tf.nn.conv2d(max_pool, second_weights, [1, 1, 1, 1], 'SAME') + second_bias second_relu = tf.nn.relu(second_conv) tf.logging.info("=====> second_conv {}".format(second_conv)) if mode != ModeKeys.INFER: second_dropout = tf.nn.dropout(second_relu, self.sr_config._keep_prob) else: second_dropout = second_relu second_conv_shape = second_dropout.get_shape() second_conv_output_width = second_conv_shape[2] second_conv_output_height = second_conv_shape[1] second_conv_element_count = int(second_conv_output_width * second_conv_output_height * second_filter_count) flattened_second_conv = tf.reshape(second_dropout, [-1, second_conv_element_count]) final_fc_weights = tf.Variable( tf.truncated_normal( [second_conv_element_count, self.sr_config._num_classes], stddev=0.01)) final_fc_bias = tf.Variable(tf.zeros([self.sr_config._num_classes])) logits = tf.matmul(flattened_second_conv, final_fc_weights) + final_fc_bias tf.logging.info("=====> logits {}".format(logits)) if mode != ModeKeys.INFER: with tf.name_scope('cross_entropy'): losses = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) tf.logging.info("=====> losses {}".format(losses)) classes = tf.argmax(logits, 1) predictions = {"classes": classes} tf.logging.info("=====> classes {}".format(classes)) # Loss, training and eval operations are not needed during inference. loss = None train_op = None eval_metric_ops = {} if mode != ModeKeys.INFER: with tf.name_scope( 'train-optimization' ): #, tf.control_dependencies(control_dependencies): global_step = tf.train.get_global_step() learning_rate = self.sr_config._learning_rate train_op = tf.contrib.layers.optimize_loss( loss=losses, global_step=global_step, optimizer=tf.train.GradientDescentOptimizer, learning_rate=learning_rate) loss = losses correct_prediction = tf.equal(predictions["classes"], labels) confusion_matrix = tf.confusion_matrix( labels, predictions["classes"], num_classes=self.sr_config._num_classes) eval_metric_ops = { 'Accuracy': tf.metrics.accuracy(labels=tf.cast(labels, tf.int32), predictions=predictions["classes"], name='accuracy'), 'Precision': tf.metrics.precision(labels=tf.cast(labels, tf.int32), predictions=predictions["classes"], name='Precision'), 'Recall': tf.metrics.recall(labels=tf.cast(labels, tf.int32), predictions=predictions["classes"], name='Recall') } return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, # training_hooks=self.hooks )
def inference(image, bounding_box, bbox_type, bbox_character): def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) initial /= 10.0 return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.01, shape=shape) return tf.Variable(initial) def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME') def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') ''' input must be list type. each elment in list is np.ndarray type which assign 1 hangul. ''' input_data = [] for row in bounding_box: input_row = [] for x1,y1,x2,y2 in row: input_row.append(load_english_dataset.read_inference_data(image[x1:x2, y1:y2])) input_data.append(input_row) sess = tf.InteractiveSession() x = tf.placeholder("float", shape=[1,32*32*1]) #[32*32*1] y_ = tf.placeholder("float", shape=[1,class_size]) #[66], equal to number of Hangul x_image = tf.reshape(x, [-1,32,32,1]) W_conv0 = weight_variable([3,3,1,16]) b_conv0 = bias_variable([16]) h_conv0 = tf.nn.relu(conv2d(x_image, W_conv0) + b_conv0) #h_conv0=[1,32*32*16] W_conv1 = weight_variable([3,3,16,32]) b_conv1 = bias_variable([32]) #x_image = tf.reshape(x, [-1,32,32,1]) h_conv1 = tf.nn.relu(conv2d(h_conv0, W_conv1) + b_conv1) #h_conv1=[1,32*32*32] h_pool1 = max_pool_2x2(h_conv1) #h_pool1=[1,16*16*32] W_conv2 = weight_variable([3,3,32,64]) b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) #h_conv2=[16*16*64] h_pool2 = max_pool_2x2(h_conv2) #h_pool2=[8*8*64] W_conv3 = weight_variable([3,3,64,64]) b_conv3 = bias_variable([64]) h_conv3 = tf.nn.relu(conv2d(h_pool2,W_conv3) + b_conv3) #h_conv3=[8*8*64] W_fc1 = weight_variable([8*8*64, 1024]) b_fc1 = bias_variable([1024]) h_conv3_flat = tf.reshape(h_conv3, [-1, 8*8*64]) h_fc1 = tf.nn.relu(tf.matmul(h_conv3_flat, W_fc1) + b_fc1) keep_prob = tf.placeholder("float") h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # [66] is number of Hangul W_fc2 = weight_variable([1024,512]) b_fc2 = bias_variable([512]) h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop,W_fc2) + b_fc2) h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob) W_fc3 = weight_variable([512,class_size]) b_fc3 = bias_variable([class_size]) y_conv = tf.nn.softmax(tf.matmul(h_fc2_drop, W_fc3) + b_fc3) cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv + 1e-7)) global_step = tf.Variable(0, trainable=False) train_step = tf.train.AdamOptimizer(1e-6).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(y_conv,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) sess.run(tf.initialize_all_variables()) saver = tf.train.Saver(tf.all_variables()) start_time = time.time() check_op = tf.add_check_numerics_ops() print '*************************************************' check_file = checkpoint_dir + 'hangul_shortcut.ckpt' saver.restore(sess, check_file) i = 0 for row in input_data: j = -1 for input_box in row: """ only takes hangul character. """ j += 1 if bbox_type[i][j] > 6 : continue part_image_list = divide_image(image_orig=input_box, type_of_box=int(bbox_type[i][j])) part_image_list = getting_zero_padding_image(partition_image=part_image_list) concated_list = [] for part_image in part_image_list: part_image_label = sess.run(tf.argmax(y_conv, 1), feed_dict={x:part_image, keep_prob:1.0}) concated_list.extend(part_image_label) a = raw_input() print 'concated_list is >>> ', print concated_list bbox_character[i][j] = concat.concatenate(hangul_type=int(bbox_type[i][j]), concat_list=concated_list) i += 1 bbox_character_file = open(pickle_path + 'bbox+_character.txt', 'w') pickle.dump(bbox_character, bbox_character_file) bbox_character_file.close() sess.close() return concated_list
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001, num_epochs=400000, minibatch_size=32, print_cost=True): """ Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX. Arguments: X_train -- training set, of shape (input size = 12288, number of training examples = 1080) Y_train -- test set, of shape (output size = 6, number of training examples = 1080) X_test -- training set, of shape (input size = 12288, number of training examples = 120) Y_test -- test set, of shape (output size = 6, number of test examples = 120) learning_rate -- learning rate of the optimization num_epochs -- number of epochs of the optimization loop minibatch_size -- size of a minibatch print_cost -- True to print the cost every 100 epochs Returns: parameters -- parameters learnt by the model. They can then be used to predict. """ ops.reset_default_graph() ( n_x, m ) = X_train.shape # (n_x: input size, m : number of examples in the train set) n_y = Y_train.shape[0] # n_y : output size costs = [] # To keep track of the cost # Create Placeholders of shape (n_x, n_y) X, Y = create_placeholders(n_x, n_y) # Initialize parameters parameters = initialize_parameters() # Forward propagation: Build the forward propagation in the tensorflow graph Z3 = forward_propagation(X, parameters) # Z3 = tf.Print(Z3,[tf.shape(Z3)], message="my Z-values:") # Y = tf.Print(Y,[tf.shape(Y)], message="my Y-values:") # Cost function: Add cost function to tensorflow graph cost = compute_cost(Z3, Y) # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer. optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(cost) # Initialize all the variables init = tf.global_variables_initializer() check_op = tf.add_check_numerics_ops() # Start the session to compute the tensorflow graph saver = tf.train.Saver() with tf.Session() as sess: # Run the initialization sess.run(init) # saver.restore(sess, "./model.ckpt") # Do the training loop for epoch in range(num_epochs): epoch_cost = 0. # Defines a cost related to an epoch num_minibatches = int( m / minibatch_size ) # number of minibatches of size minibatch_size in the train set minibatches = random_mini_batches(X_train, Y_train, minibatch_size) for minibatch in minibatches: # Select a minibatch (minibatch_X, minibatch_Y) = minibatch # IMPORTANT: The line that runs the graph on a minibatch. # Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y). ### START CODE HERE ### (1 line) # print(minibatch_X.shape) _, minibatch_cost, _ = sess.run([optimizer, cost, check_op], feed_dict={ X: minibatch_X, Y: minibatch_Y }) ### END CODE HERE ### # print(minibatch_cost) epoch_cost += minibatch_cost / num_minibatches # Print the cost every epoch if print_cost == True and epoch % 100 == 0: print("Cost after epoch %i: %f" % (epoch, epoch_cost)) save_path = saver.save(sess, "./model.ckpt") print("Model saved in file: %s" % save_path) if print_cost == True and epoch % 5 == 0: costs.append(epoch_cost) # plot the cost # plt.plot(np.squeeze(costs)) # plt.ylabel('cost') # plt.xlabel('iterations (per tens)') # plt.title("Learning rate =" + str(learning_rate)) # plt.show() # lets save the parameters in a variable parameters = sess.run(parameters) print("Parameters have been trained!") # Calculate the correct predictions # correct_prediction = tf.sqrt(tf.squared_difference(Z3, Y)) # Calculate accuracy on the test set # accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) accuracy = tf.reduce_mean(tf.div(tf.abs(tf.subtract(Y, Z3)), Y)) print("Train Error:", accuracy.eval({X: X_train, Y: Y_train})) print("Test Error:", accuracy.eval({X: X_test, Y: Y_test})) return parameters
def tf_record_CNN_spherical(tone_version, itd_tones, ild_tones, manually_added, freq_label, sam_tones, transposed_tones, precedence_effect, narrowband_noise, all_positions_bkgd, background_textures, testing, branched, zero_padded, stacked_channel, model_version, num_epochs, train_path_pattern, bkgd_train_path_pattern, arch_ID, config_array, files, num_files, newpath, regularizer, SNR_max=40, SNR_min=5): bkgd_training_paths = glob.glob(bkgd_train_path_pattern) training_paths = glob.glob(train_path_pattern) ###Do not change parameters below unless altering network### BKGD_SIZE = [78, 48000] STIM_SIZE = [78, 89999] TONE_SIZE = [78, 59099] ITD_TONE_SIZE = [78, 39690] if zero_padded: STIM_SIZE = [78, 48000] if stacked_channel: STIM_SIZE = [39, 48000, 2] BKGD_SIZE = [39, 48000, 2] n_classes_localization = 504 n_classes_recognition = 780 localization_bin_resolution = 5 #Optimization Params batch_size = 16 learning_rate = 1e-3 loss_exponent = 12 loss_scale = 2**loss_exponent bn_training_state = True dropout_training_state = True training_state = True #Queue parameters dequeue_min = 8 dequeue_min_main = 10 #num_epochs = None #Change for network precision,must match input data type filter_dtype = tf.float32 padding = 'VALID' #Downsampling Params sr = 48000 cochleagram_sr = 8000 post_rectify = True #Display interval training statistics display_step = 25 #Changes how often data is saved to numpy arrays when dataset is large write_step = 15625 #250k examples #write_step = 25 #250k examples if itd_tones: TONE_SIZE = ITD_TONE_SIZE if ild_tones: itd_tones = True if testing: bn_training_state = False dropout_training_state = False training_state = False num_epochs = 1 #Using these values because 5/40 are the standard training SNRs if not (SNR_min > 30 or SNR_max > 40): SNR_max = 35.0 SNR_min = 30.0 print("Testing SNR(dB): Max: " + str(SNR_max) + "Min: " + str(SNR_min)) #mean_subbands = np.load("mean_subband_51400.npy")/51400 #tf_mean_subbands = tf.constant(mean_subbands,dtype=filter_dtype) def check_speed(): for i in range(30): sess.run(subbands_batch) start_time = time.time() for _ in range(30): time.sleep(0.5) print(time.time() - start_time) print("Len:", sess.run('example_queue/random_shuffle_queue_Size:0')) with tf.device("/cpu:0"): ###READING QUEUE MACHINERY### def add_labels(paths): return [(stim, stim.split('_')[-1].split('a')[0]) for stim in paths] def rms(wav): square = tf.square(wav) mean_val = tf.reduce_mean(square) return tf.sqrt(mean_val) def combine_signal_and_noise_stacked_channel(signals, backgrounds, delay, sr, cochleagram_sr, post_rectify): tensor_dict_fg = {} tensor_dict_bkgd = {} tensor_dict = {} snr = tf.random_uniform([], minval=SNR_min, maxval=SNR_max, name="snr_gen") for path1 in backgrounds: if path1 == 'train/image': background = backgrounds['train/image'] else: tensor_dict_bkgd[path1] = backgrounds[path1] for path in signals: if path == 'train/image': signal = signals['train/image'] sig_len = signal.shape[1] - delay sig = tf.slice(signal, [0, 0, 0], [39, sig_len, 2]) max_val = tf.reduce_max(sig) sig_rms = rms(tf.reduce_sum(sig, [0, 2])) sig = tf.div(sig, sig_rms) #sig = tf.Print(sig, [tf.reduce_max(sig)],message="\nMax SIG:") sf = tf.pow(tf.constant(10, dtype=tf.float32), tf.div(snr, tf.constant(20, dtype=tf.float32))) bak_rms = rms(tf.reduce_sum(background, [0, 2])) #bak_rms = tf.Print(bak_rms, [tf.reduce_max(bak_rms)],message="\nNoise RMS:") sig_rms = rms(tf.reduce_sum(sig, [0, 2])) scaling_factor = tf.div(tf.div(sig_rms, bak_rms), sf) #scaling_factor = tf.Print(scaling_factor, [scaling_factor],message="\nScaling Factor:") noise = tf.scalar_mul(scaling_factor, background) #noise = tf.Print(noise, [tf.reduce_max(noise)],message="\nMax Noise:") front = tf.slice(noise, [0, 0, 0], [39, delay, 2]) middle = tf.slice(noise, [0, delay, 0], [39, sig_len, 2]) end = tf.slice(noise, [0, (delay + int(sig_len)), 0], [39, -1, 2]) middle_added = tf.add(middle, sig) new_sig = tf.concat([front, middle_added, end], 1) #new_sig = sig rescale_factor = tf.div(max_val, tf.reduce_max(new_sig)) #rescale_factor = tf.Print(rescale_factor, [rescale_factor],message="\nRescaling Factor:") new_sig = tf.scalar_mul(rescale_factor, new_sig) new_sig_rectified = tf.nn.relu(new_sig) new_sig_reshaped = tf.reshape(new_sig_rectified, [39, 48000, 2]) #new_sig_reshaped = tf.reshape(new_sig,[72,30000,1]) #return (signal, background,noise,new_sig_reshaped) tensor_dict_fg[path] = new_sig_reshaped else: tensor_dict_fg[path] = signals[path] tensor_dict[0] = tensor_dict_fg tensor_dict[1] = tensor_dict_bkgd return tensor_dict def combine_signal_and_noise(signals, backgrounds, delay, sr, cochleagram_sr, post_rectify): tensor_dict_fg = {} tensor_dict_bkgd = {} tensor_dict = {} snr = tf.random_uniform([], minval=SNR_min, maxval=SNR_max, name="snr_gen") for path1 in backgrounds: if path1 == 'train/image': background = backgrounds['train/image'] else: tensor_dict_bkgd[path1] = backgrounds[path1] for path in signals: if path == 'train/image': signal = signals['train/image'] sig_len = signal.shape[1] - delay sig = tf.slice(signal, [0, 0], [78, sig_len]) max_val = tf.reduce_max(sig) sig_rms = rms(tf.reduce_sum(sig, 0)) sig = tf.div(sig, sig_rms) #sig = tf.Print(sig, [tf.reduce_max(sig)],message="\nMax SIG:") sf = tf.pow(tf.constant(10, dtype=tf.float32), tf.div(snr, tf.constant(20, dtype=tf.float32))) bak_rms = rms(tf.reduce_sum(background, 0)) #bak_rms = tf.Print(bak_rms, [tf.reduce_max(bak_rms)],message="\nNoise RMS:") sig_rms = rms(tf.reduce_sum(sig, 0)) scaling_factor = tf.div(tf.div(sig_rms, bak_rms), sf) #scaling_factor = tf.Print(scaling_factor, [scaling_factor],message="\nScaling Factor:") noise = tf.scalar_mul(scaling_factor, background) #noise = tf.Print(noise, [tf.reduce_max(noise)],message="\nMax Noise:") front = tf.slice(noise, [0, 0], [78, delay]) middle = tf.slice(noise, [0, delay], [78, sig_len]) end = tf.slice(noise, [0, (delay + int(sig_len))], [78, -1]) middle_added = tf.add(middle, sig) new_sig = tf.concat([front, middle_added, end], 1) #new_sig = sig rescale_factor = tf.div(max_val, tf.reduce_max(new_sig)) #rescale_factor = tf.Print(rescale_factor, [rescale_factor],message="\nRescaling Factor:") new_sig = tf.scalar_mul(rescale_factor, new_sig) new_sig_rectified = tf.nn.relu(new_sig) new_sig_reshaped = tf.reshape(new_sig_rectified, [72, 48000, 1]) #new_sig_reshaped = tf.reshape(new_sig,[72,30000,1]) #return (signal, background,noise,new_sig_reshaped) tensor_dict_fg[path] = new_sig_reshaped else: tensor_dict_fg[path] = signals[path] tensor_dict[0] = tensor_dict_fg tensor_dict[1] = tensor_dict_bkgd return tensor_dict #Best to read https://www.tensorflow.org/api_guides/python/reading_data#Reading_from_files ###READING QUEUE MACHINERY### #Best to read https://www.tensorflow.org/api_guides/python/reading_data#Reading_from_files options = tf.python_io.TFRecordOptions( tf.python_io.TFRecordCompressionType.GZIP) is_bkgd = False first = training_paths[0] for example in tf.python_io.tf_record_iterator(first, options=options): result = tf.train.Example.FromString(example) break jsonMessage = MessageToJson(tf.train.Example.FromString(example)) jsdict = json.loads(jsonMessage) feature = parse_nested_dictionary(jsdict, is_bkgd) dataset = build_tfrecords_iterator(num_epochs, train_path_pattern, is_bkgd, feature, narrowband_noise, manually_added, STIM_SIZE, localization_bin_resolution, stacked_channel) ###READING QUEUE MACHINERY### # Create a list of filenames and pass it to a queue bkgd_filename_queue = tf.train.string_input_producer( bkgd_training_paths, shuffle=True, capacity=len(bkgd_training_paths)) # Define a reader and read the next record options = tf.python_io.TFRecordOptions( tf.python_io.TFRecordCompressionType.GZIP) bkgd_reader = tf.TFRecordReader(options=options) _, bkgd_serialized_example = bkgd_reader.read(bkgd_filename_queue) is_bkgd = True bkgd_first = bkgd_training_paths[0] for bkgd_example in tf.python_io.tf_record_iterator(bkgd_first, options=options): bkgd_result = tf.train.Example.FromString(bkgd_example) break bkgd_jsonMessage = MessageToJson( tf.train.Example.FromString(bkgd_example)) bkgd_jsdict = json.loads(bkgd_jsonMessage) bkgd_feature = parse_nested_dictionary(bkgd_jsdict, is_bkgd) dataset_bkgd = build_tfrecords_iterator( num_epochs, bkgd_train_path_pattern, is_bkgd, bkgd_feature, narrowband_noise, manually_added, BKGD_SIZE, localization_bin_resolution, stacked_channel) new_dataset = tf.data.Dataset.zip((dataset, dataset_bkgd)) #SNR = tf.random_uniform([],minval=SNR_min,maxval=SNR_max,name="snr_gen") if stacked_channel: new_dataset = new_dataset.map( lambda x, y: combine_signal_and_noise_stacked_channel( x, y, 0, 48000, 8000, post_rectify=True)) else: new_dataset = new_dataset.map( lambda x, y: combine_signal_and_noise( x, y, 0, 48000, 8000, post_rectify=True)) batch_sizes = tf.constant(16, dtype=tf.int64) new_dataset = new_dataset.shuffle(buffer_size=200).batch( batch_size=batch_sizes, drop_remainder=True) #combined_iter = new_dataset.make_one_shot_iterator() combined_iter = new_dataset.make_initializable_iterator() combined_iter_dict = collections.OrderedDict() combined_iter_dict = combined_iter.get_next() if background_textures: bkgd_metadata = [ combined_iter_dict[1]['train/azim'], combined_iter_dict[1]['train/elev'] ] ###END READING QUEUE MACHINERY### def make_downsample_filt_tensor(SR=16000, ENV_SR=200, WINDOW_SIZE=1001, beta=5.0, pycoch_downsamp=False): """ Make the sinc filter that will be used to downsample the cochleagram Parameters ---------- SR : int raw sampling rate of the audio signal ENV_SR : int end sampling rate of the envelopes WINDOW_SIZE : int the size of the downsampling window (should be large enough to go to zero on the edges). beta : float kaiser window shape parameter pycoch_downsamp : Boolean if true, uses a slightly different downsampling function Returns ------- downsample_filt_tensor : tensorflow tensor, tf.float32 a tensor of shape [0 WINDOW_SIZE 0 0] the sinc windows with a kaiser lowpass filter that is applied while downsampling the cochleagram """ DOWNSAMPLE = SR / ENV_SR if not pycoch_downsamp: downsample_filter_times = np.arange(-WINDOW_SIZE / 2, int(WINDOW_SIZE / 2)) downsample_filter_response_orig = np.sinc( downsample_filter_times / DOWNSAMPLE) / DOWNSAMPLE downsample_filter_window = signallib.kaiser(WINDOW_SIZE, beta) downsample_filter_response = downsample_filter_window * downsample_filter_response_orig else: max_rate = DOWNSAMPLE f_c = 1. / max_rate # cutoff of FIR filter (rel. to Nyquist) half_len = 10 * max_rate # reasonable cutoff for our sinc-like function if max_rate != 1: downsample_filter_response = signallib.firwin(2 * half_len + 1, f_c, window=('kaiser', beta)) else: # just in case we aren't downsampling -- I think this should work? downsample_filter_response = zeros(2 * half_len + 1) downsample_filter_response[half_len + 1] = 1 # Zero-pad our filter to put the output samples at the center # n_pre_pad = int((DOWNSAMPLE - half_len % DOWNSAMPLE)) # n_post_pad = 0 # n_pre_remove = (half_len + n_pre_pad) // DOWNSAMPLE # We should rarely need to do this given our filter lengths... # while _output_len(len(h) + n_pre_pad + n_post_pad, x.shape[axis], # up, down) < n_out + n_pre_remove: # n_post_pad += 1 # downsample_filter_response = np.concatenate((np.zeros(n_pre_pad), downsample_filter_response, np.zeros(n_post_pad))) downsample_filt_tensor = tf.constant(downsample_filter_response, tf.float32) downsample_filt_tensor = tf.expand_dims(downsample_filt_tensor, 0) downsample_filt_tensor = tf.expand_dims(downsample_filt_tensor, 2) downsample_filt_tensor = tf.expand_dims(downsample_filt_tensor, 3) return downsample_filt_tensor def downsample(signal, current_rate, new_rate, window_size, beta, post_rectify=True): downsample = current_rate / new_rate message = ("The current downsample rate {} is " "not an integer. Only integer ratios " "between current and new sampling rates " "are supported".format(downsample)) assert (current_rate % new_rate == 0), message message = ("New rate must be less than old rate for this " "implementation to work!") assert (new_rate < current_rate), message # make the downsample tensor downsample_filter_tensor = make_downsample_filt_tensor( current_rate, new_rate, window_size, pycoch_downsamp=False) downsampled_signal = tf.nn.conv2d(signal, downsample_filter_tensor, strides=[1, 1, downsample, 1], padding='SAME', name='conv2d_cochleagram_raw') if post_rectify: downsampled_signal = tf.nn.relu(downsampled_signal) return downsampled_signal def put_kernels_on_grid(kernel, pad=1): '''Visualize conv. filters as an image (mostly for the 1st layer). Arranges filters into a grid, with some paddings between adjacent filters. Args: kernel: tensor of shape [Y, X, NumChannels, NumKernels] pad: number of black pixels around each filter (between them) Return: Tensor of shape [1, (Y+2*pad)*grid_Y, (X+2*pad)*grid_X, NumChannels]. ''' # get shape of the grid. NumKernels == grid_Y * grid_X def factorization(n): for i in range(int(sqrt(float(n))), 0, -1): if n % i == 0: if i == 1: print('Who would enter a prime number of filters') return (i, int(n / i)) (grid_Y, grid_X) = factorization(kernel.get_shape()[3].value) print('grid: %d = (%d, %d)' % (kernel.get_shape()[3].value, grid_Y, grid_X)) x_min = tf.reduce_min(kernel) x_max = tf.reduce_max(kernel) kernel = (kernel - x_min) / (x_max - x_min) # pad X and Y x = tf.pad(kernel, tf.constant([[pad, pad], [pad, pad], [0, 0], [0, 0]]), mode='CONSTANT') # X and Y dimensions, w.r.t. padding Y = kernel.get_shape()[0] + 2 * pad X = kernel.get_shape()[1] + 2 * pad x = tf.pad(kernel, tf.constant([[pad, pad], [pad, pad], [0, 0], [0, 0]]), mode='CONSTANT') # X and Y dimensions, w.r.t. padding Y = kernel.get_shape()[0] + 2 * pad X = kernel.get_shape()[1] + 2 * pad channels = kernel.get_shape()[2] # put NumKernels to the 1st dimension x = tf.transpose(x, (3, 0, 1, 2)) # organize grid on Y axis x = tf.reshape(x, tf.stack([grid_X, Y * grid_Y, X, channels])) # switch X and Y axes x = tf.transpose(x, (0, 2, 1, 3)) # organize grid on X axis x = tf.reshape(x, tf.stack([1, X * grid_X, Y * grid_Y, channels])) # back to normal order (not combining with the next step for clarity) x = tf.transpose(x, (2, 1, 3, 0)) # to tf.image_summary order [batch_size, height, width, channels], # where in this case batch_size == 1 x = tf.transpose(x, (3, 0, 1, 2)) # scaling to [0, 255] is not necessary for tensorboard return x #Many lines are commented out to allow for quick architecture changes #TODO:This should be abstracted to arcitectures are defined by some sort of #config dictionary or file def gradients_with_loss_scaling(loss, loss_scale): """Gradient calculation with loss scaling to improve numerical stability when training with float16. """ grads = [ (grad[0] / loss_scale, grad[1]) for grad in tf.train.AdamOptimizer( learning_rate=learning_rate, epsilon=1e-4).compute_gradients( loss * loss_scale, colocate_gradients_with_ops=True) ] return grads def float32_variable_storage_getter( getter, name, shape=None, dtype=None, initializer=tf.contrib.layers.xavier_initializer(uniform=False), regularizer=None, trainable=True, *args, **kwargs): storage_dtype = tf.float32 if trainable else dtype variable = getter(name, shape, dtype=storage_dtype, initializer=initializer, regularizer=regularizer, trainable=trainable, *args, **kwargs) if trainable and dtype != tf.float32: variable = tf.cast(variable, dtype) return variable strides = 1 time_stride = 1 freq_stride = 2 time_pool = 4 freq_pool = 1 k = 2 k_wide = 8 # config_array=[[["/gpu:0"],['conv',[2,50,32],[2,1]],['relu'],['pool',[1,4]]],[["/gpu:1"],['conv',[4,20,64],[1,1]],['bn'],['relu'],['pool',[1,4]],['conv',[8,8,128],[1,1]],['bn'],['relu'],['pool',[1,4]],['conv',[8,8,256],[1,1]],['bn'],['relu'],['pool',[1,8]],['fc',512],['fc_bn'],['fc_relu'],['dropout'],['out',]]] #[L_channel,R_channel] = tf.unstack(subbands_batch,axis=3) [L_channel, R_channel] = tf.unstack(combined_iter_dict[0]['train/image'], axis=3) concat_for_downsample = tf.concat([L_channel, R_channel], axis=0) reshaped_for_downsample = tf.expand_dims(concat_for_downsample, axis=3) #hard coding filter shape based on previous experimentation new_sig_downsampled = downsample(reshaped_for_downsample, sr, cochleagram_sr, window_size=4097, beta=10.06, post_rectify=post_rectify) downsampled_squeezed = tf.squeeze(new_sig_downsampled) [L_channel_downsampled, R_channel_downsampled] = tf.split(downsampled_squeezed, num_or_size_splits=2, axis=0) downsampled_reshaped = tf.stack( [L_channel_downsampled, R_channel_downsampled], axis=3) new_sig_nonlin = tf.pow(downsampled_reshaped, 0.3) # print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,scope='fp32_storage')) # print(subbands_batch) ####TMEPORARY OVERRIDE#### #branched = False net = NetBuilder() if branched: out, out2 = net.build(config_array, new_sig_nonlin, training_state, dropout_training_state, filter_dtype, padding, n_classes_localization, n_classes_recognition, branched, regularizer) else: out = net.build(config_array, new_sig_nonlin, training_state, dropout_training_state, filter_dtype, padding, n_classes_localization, n_classes_recognition, branched, regularizer) if regularizer is not None: reg_term = tf.contrib.layers.apply_regularization( regularizer, (tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))) combined_dict = collections.OrderedDict() combined_dict_fg = collections.OrderedDict() combined_dict_bkgd = collections.OrderedDict() for k, v in combined_iter_dict[0].items(): if k != 'train/image' and k != 'train/image_height' and k != 'train/image_width': combined_dict_fg[k] = combined_iter_dict[0][k] for k, v in combined_iter_dict[1].items(): if k != 'train/image' and k != 'train/image_height' and k != 'train/image_width': combined_dict_bkgd[k] = combined_iter_dict[1][k] combined_dict[0] = combined_dict_fg combined_dict[1] = combined_dict_bkgd ##Fully connected Layer 2 #wd2 = tf.get_variable('wd2',[512,512],filter_dtype) #dense_bias2 = tf.get_variable('wb6',[512],filter_dtype) #fc2 = tf.add(tf.matmul(fc1_do, wd2), dense_bias2) #fc2 = tf.nn.relu(fc2) #fc2_do = tf.layers.dropout(fc2,training=dropout_training_state) # Construct model #fix labels dimension to be one less that logits dimension #Testing small subbatch if sam_tones or transposed_tones: labels_batch_cost_sphere = tf.squeeze( tf.zeros_like(combined_dict[0]['train/carrier_freq'])) elif precedence_effect: labels_batch_cost_sphere = tf.squeeze( tf.zeros_like(combined_dict[0]['train/start_sample'])) else: labels_batch_cost = tf.squeeze(combined_dict[0]['train/azim']) #labels_batch_cost = tf.squeeze(subbands_batch_labels,axis=[1,2]) if not tone_version: labels_batch_sphere = tf.add( tf.scalar_mul(tf.constant(36, dtype=tf.int32), combined_dict[0]['train/elev']), combined_dict[0]['train/azim']) else: labels_batch_sphere = combined_dict[0]['train/azim'] labels_batch_cost_sphere = tf.squeeze(labels_batch_sphere) # Define loss and optimizer # On r1.1 reduce mean doees not work(returns nans) with float16 vals if branched: cost1 = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=out, labels=labels_batch_cost_sphere)) cost2 = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=out2, labels=combined_dict[0]['train/class_num'])) cost = cost1 + cost2 else: cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=out, labels=labels_batch_cost_sphere)) if regularizer is not None: cost = tf.add(cost, reg_term) #cost = tf.Print(cost, [labels],message="\nLabel:",summarize=32) cond_dist = tf.nn.softmax(out) if branched: cond_dist2 = tf.nn.softmax(out2) #cost = tf.Print(cost, [tf.argmax(out, 1)],message="\nOut:",summarize=32) # init_op = tf.group(tf.global_variables_initializer(), # tf.local_variables_initializer()) # config = tf.ConfigProto(allow_soft_placement=True, # inter_op_parallelism_threads=0, intra_op_parallelism_threads=0) # sess = tf.Session(config=config) # sess.run(init_op) # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(sess=sess,coord=coord) # print(sess.run(cost)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): update_grads = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-4).minimize(cost) # Evaluate model correct_pred = tf.equal(tf.argmax(out, 1), tf.cast(labels_batch_cost_sphere, tf.int64)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) top_k = tf.nn.top_k(out, 5) if branched: correct_pred2 = tf.equal( tf.argmax(out2, 1), tf.cast(combined_dict[0]['train/class_num'], tf.int64)) accuracy2 = tf.reduce_mean(tf.cast(correct_pred2, tf.float32)) top_k2 = tf.nn.top_k(out2, 5) #test_pred = conv_net(tf.cast(test_images,tf.float32),weights,biases) #correct_pred = tf.equal(tf.argmax(test_pred, 1), tf.cast(test_labels,tf.int64)) #test_acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) ##Check special cases(made by hand in testdata.json #custom_pred = conv_net(tf_test_data,weights,biases) #custom_correct_pred = tf.equal(tf.argmax(custom_pred, 1), tf.cast(tf_test_label,tf.int64)) #custom_test_acc = tf.reduce_mean(tf.cast(custom_correct_pred, tf.float32)) # Initializing the variables # # Check_op seems to take up a lot of space on the GPU check_op = tf.add_check_numerics_ops() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Launch the graph #with tf.Session() as sess: #run_metadata = tf.RunMetadata() config = tf.ConfigProto(allow_soft_placement=True, inter_op_parallelism_threads=0, intra_op_parallelism_threads=0) sess = tf.Session(config=config) sess.run(init_op) if branched: print("Class Labels:" + str(sess.run(combined_dict[0]['train/class_num']))) # ##This code allows for tracing ops acorss GPUs, you often have to run it twice # ##to get sensible traces # # #sess.run(optimizer,options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), # # run_metadata=run_metadata) # #from tensorflow.python.client import timeline # #trace = timeline.Timeline(step_stats=run_metadata.step_stats) # #trace_file.close() ##Used to write out stimuli examples # # low_lim=30 # hi_lim=20000 # sr=48000 # sample_factor=1 # scale = 0.1 # i=0 # pad_factor = None # #invert subbands # n = int(np.floor(erb.freq2erb(hi_lim) - erb.freq2erb(low_lim)) - 1) # sess.run(combined_iter.initializer) # subbands_test,az_label,elev_label = sess.run([combined_iter_dict[0]['train/image'],combined_iter_dict[0]['train/azim'],combined_iter_dict[0]['train/elev']]) # # filts, hz_cutoffs, freqs=erb.make_erb_cos_filters_nx(subbands_test.shape[2],sr, n,low_lim,hi_lim, sample_factor,pad_factor=pad_factor,full_filter=True) # # filts_no_edges = filts[1:-1] # for batch_iter in range(3): # for stim_iter in range(16): # subbands_l=subbands_test[stim_iter,:,:,0] # subbands_r=subbands_test[stim_iter,:,:,1] # wavs = np.zeros([subbands_test.shape[2],2]) # wavs[:,0] = sb.collapse_subbands(subbands_l,filts_no_edges).astype(np.float32) # wavs[:,1] = sb.collapse_subbands(subbands_r,filts_no_edges).astype(np.float32) # max_val = wavs.max() # rescaled_wav = wavs/max_val*scale # name = "stim_{}_{}az_{}elev.wav".format(stim_iter+batch_iter*16,int(az_label[stim_iter])*5,int(elev_label[stim_iter])*5) # name_with_path = newpath+'/'+name # write(name_with_path,sr,rescaled_wav) # pdb.set_trace() # subbands_test,az_label,elev_label = sess.run([combined_iter_dict[0]['train/image'],combined_iter_dict[0]['train/azim'],combined_iter_dict[0]['train/elev']]) if not testing: sess.run(combined_iter.initializer) saver = tf.train.Saver(max_to_keep=None) learning_curve = [] errors_count = 0 try: step = 1 sess.graph.finalize() while True: #sess.run([optimizer,check_op]) try: if step == 1: if not num_files == 1: #latest_addition = max(files, key=os.path.getctime) file_list = [] for elem in files: if (elem.split("/")[-1] ).split(".")[0] == 'model': file_list.append(elem) latest_addition = max(file_list, key=os.path.getctime) latest_addition_name = latest_addition.split( ".")[-2] saver.restore( sess, newpath + "/model." + latest_addition_name) step = int(latest_addition_name.split("-")[1]) else: sess.run(update_grads) else: sess.run(update_grads) # sess.run(update_grads) except tf.errors.InvalidArgumentError as e: print(e.message) errors_count += 1 continue if step % display_step == 0: # Calculate batch loss and accuracy loss, acc, az = sess.run( [cost, accuracy, combined_dict[0]['train/azim']]) #print("Batch Labels: ",az) print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc)) if step % 5000 == 0: print("Checkpointing Model...") retry_count = 0 while True: try: saver.save(sess, newpath + '/model.ckpt', global_step=step, write_meta_graph=False) break except ValueError as e: if retry_count > 36: print( "Maximum wait time reached(6H). Terminating Program." ) raise e from None print( "Checkpointing failed. Retrying in 10 minutes..." ) time.sleep(600) retry_count += 1 learning_curve.append([int(step * batch_size), float(acc)]) print("Checkpoint Complete") #Just for testing the model/call_model if step == 300000: print("Break!") break step += 1 except tf.errors.OutOfRangeError: print("Out of Range Error. Optimization Finished") except tf.errors.DataLossError as e: print("Corrupted file found!!") pdb.set_trace() except tf.errors.ResourceExhaustedError as e: gpu = e.message print("Out of memory error") error = "Out of memory error" with open(newpath + '/train_error.json', 'w') as f: json.dump(arch_ID, f) json.dump(error, f) json.dump(gpu, f) return False finally: print(errors_count) print("Training stopped.") with open(newpath + '/curve_no_resample_w_cutoff_vary_loc.json', 'w') as f: json.dump(learning_curve, f) if testing: ##Testing loop for stim in model_version: sess.run(combined_iter.initializer) print("Starting model version: ", stim) batch_acc = [] batch_acc2 = [] batch_conditional = [] batch_conditional2 = [] saver = tf.train.Saver(max_to_keep=None) #saver.restore(sess,newpath+"/model.ckpt-"+str(model_version)) saver.restore(sess, newpath + "/model.ckpt-" + str(stim)) step = 0 try: eval_vars = list(combined_dict[0].values()) eval_keys = list(combined_dict[0].keys()) while True: pred, cd, e_vars = sess.run( [correct_pred, cond_dist, eval_vars]) array_len = len(e_vars) if isinstance(e_vars, list): e_vars = list(zip(*e_vars)) batch_conditional += [(cond, var) for cond, var in zip(cd, e_vars)] batch_acc += [(pd, ev) for pd, ev in zip(pred, e_vars)] else: e_vars = np.array([np.squeeze(x) for x in e_vars]) split = np.vsplit(e_vars, array_len) batch_conditional += [ (cond, var) for cond, var in zip(cd, e_vars.T) ] split.insert(0, pred) batch_acc += np.dstack(split).tolist()[0] if branched: pred2, cd2, e_vars2 = sess.run(correct_pred2, cond_dist2, eval_vars) e_vars2 = np.squeeze(e_vars2) array_len2 = len(e_vars2) split2 = np.vsplit(e_vars2, array_len2) split2.insert(0, pred2) batch_conditional2 += [ (cond, var) for cond, var in zip(cd2, e_vars, T) ] batch_acc2 += np.dstack(split2).tolist()[0] step += 1 if step % display_step == 0: print("Iter " + str(step * batch_size)) #if not tone_version: # print("Current Accuracy:",sum(batch_acc)/len(batch_acc)) if (step + 1) % write_step == 0: print("writing batch data at step: {}".format(step)) write_batch_data(newpath, train_path_pattern, stim, batch_acc, batch_conditional, eval_keys, step) print("Data written") batch_acc = [] batch_conditional = [] if step == 500000: print("Break!") break except tf.errors.ResourceExhaustedError: print("Out of memory error") error = "Out of memory error" with open(newpath + '/test_error_{}.json'.format(stim), 'w') as f: json.dump(arch_ID, f) json.dump(error, f) except tf.errors.OutOfRangeError: print("Out of Range Error. Optimization Finished") finally: if tone_version: np.save(newpath + '/plot_array_test_{}.npy'.format(stim), batch_acc) np.save( newpath + '/batch_conditional_test_{}.npy'.format(stim), batch_conditional) acc_corr = [pred[0] for pred in batch_acc] acc_accuracy = sum(acc_corr) / len(acc_corr) if branched: np.save( newpath + '/plot_array_test_{}_2.npy'.format(stim), batch_acc2) np.save( newpath + '/batch_conditional_test_{}_2.npy'.format(stim), batch_conditional2) acc_corr2 = [pred2[0] for pred2 in batch_acc2] acc_accuracy2 = sum(acc_corr2) / len(acc_corr2) with open(newpath + '/accuracies_itd_{}.json'.format(stim), 'w') as f: json.dump(acc_accuracy, f) if branched: json.dump(acc_accuracy2, f) elif (sam_tones or transposed_tones or precedence_effect or narrowband_noise): if train_path_pattern.split("/")[-2] == 'testset': stimuli_name = 'testset_' + train_path_pattern.split( "/")[-3] else: stimuli_name = train_path_pattern.split("/")[-2] np.save( newpath + '/batch_array_{}_iter{}.npy'.format( stimuli_name, stim), batch_acc) np.save( newpath + '/batch_conditional_{}_iter{}.npy'.format( stimuli_name, stim), batch_conditional) acc_corr = [pred[0] for pred in batch_acc] acc_accuracy = sum(acc_corr) / len(acc_corr) if branched: np.save( newpath + '/plot_array_test_{}_2.npy'.format(stim), batch_acc2) np.save( newpath + '/batch_conditional_test_{}_2.npy'.format(stim), batch_conditional2) acc_corr2 = [pred2[0] for pred2 in batch_acc2] acc_accuracy2 = sum(acc_corr2) / len(acc_corr2) with open( newpath + '/accuracies_test_{}_iter{}.json'.format( stimuli_name, stim), 'w') as f: json.dump(acc_accuracy, f) if branched: json.dump(acc_accuracy2, f) with open( newpath + '/keys_test_{}_iter{}.json'.format( stimuli_name, stim), 'w') as f: json.dump(eval_keys, f) else: if train_path_pattern.split("/")[-2] == 'testset': stimuli_name = 'testset_' + train_path_pattern.split( "/")[-3] else: stimuli_name = train_path_pattern.split("/")[-2] np.save( newpath + '/plot_array_padded_{}_iter{}.npy'.format( stimuli_name, stim), batch_acc) np.save( newpath + '/batch_conditional_{}_iter{}.npy'.format( stimuli_name, stim), batch_conditional) acc_corr = [pred[0] for pred in batch_acc] acc_accuracy = sum(acc_corr) / len(acc_corr) if branched: np.save( newpath + '/plot_array_stim_vary_env_{}_2.npy'.format(stim), batch_acc2) np.save( newpath + '/batch_conditional_test_{}_2.npy'.format(stim), batch_conditional2) acc_corr2 = [pred2[0] for pred2 in batch_acc2] acc_accuracy2 = sum(acc_corr2) / len(acc_corr2) with open( newpath + '/accuracies_test_{}_iter{}.json'.format( stimuli_name, stim), 'w') as f: json.dump(acc_accuracy, f) if branched: json.dump(acc_accuracy2, f) with open( newpath + '/keys_test_{}_iter{}.json'.format( stimuli_name, stim), 'w') as f: json.dump(eval_keys, f) #acc= sess.run(test_acc) #print("Test Accuracy= "+"{:.5f}".format(acc)) #customs = sess.run(custom_test_acc) #correct_pred = sess.run(custom_correct_pred) #with open('custom_out2.json', 'w') as f: # json.dump([test_data_img,correct_pred.tolist()],f) #print("ACC for special cases:") #print(customs) #first_layer = sess.run(weights['wc1']) #activation1, activation2 = sess.run([conv1,conv3]) #with open('activations.json','w') as f: # json.dump([activation1.tolist(),activation2.tolist()],f) #tf.get_variable_scope().reuse_variables() #first_layer = [var for var in tf.global_variables() if var.op.name=="wc1"][0] #second_layer = [var for var in tf.global_variables() if var.op.name=="wc2"][0] #weights_image = put_kernels_on_grid(first_layer) #weights_image2 = put_kernels_on_grid(second_layer) #np_weights1, np_weights2 = sess.run([weights_image,weights_image2]) #with open('conv1weights.json','w') as f: # json.dump([np_weights1.tolist(),np_weights2.tolist()],f) # sess.close() tf.reset_default_graph()
def approximate(namePrePrefix, pointSubset, pointList): kdTree = cKDTree(pointSubset) #We'll pick points by picking a random (spherical normal) offset #from randomly-chosen points in the given point list variance = 100000.0 mean_vec = np.array([0.0, 0.0, 0.0], dtype=np.float32) covar_mat = np.array([[variance, 0, 0], [0, variance, 0], [0, 0, variance]], dtype=np.float32) #The name prefix for all variable scopes namePrefix = namePrePrefix + "Metric" in_points = tf.placeholder(tf.float32, [None, 3], name=(namePrefix + "In")) small_points = in_points * 0.001 crossterm_one = small_points[:, 0] * small_points[:, 1] crossterm_two = small_points[:, 1] * small_points[:, 2] crossterm_three = small_points[:, 0] * small_points[:, 2] crossterms = tf.stack([crossterm_one, crossterm_two, crossterm_three], axis=1) poly_aug_in_points = tf.concat([small_points, tf.square(small_points), crossterms], axis=1) approx_norm_network = approxNetwork(poly_aug_in_points, False, namePrefix=namePrefix) approx_norm_out = tf.identity(approx_norm_network, name=(namePrefix + "Out")) target_norms = tf.placeholder(tf.float32, [None, 1]) with tf.name_scope('loss'): loss = tf.losses.absolute_difference(approx_norm_out, tf.square(target_norms * .001)) with tf.name_scope('adam_optimizer'): train_step = tf.train.GradientDescentOptimizer(train_step_size).minimize(loss) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) check = tf.add_check_numerics_ops() start = time.time() with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() batchNum = 0 start = time.time() num_exact = int(float(BATCH_SIZE) * zero_hammer_prob) num_fuzzed = BATCH_SIZE - num_exact for i in range(training_iters): #Pick a random collection of points on the target manifold exactPoints = randomRows(pointSubset, num_exact) #Pick a random collection of points from the input point list fuzzedPoints = randomRows(pointList, num_fuzzed) #Compute normally-distributed offsets for them offsets = np.random.multivariate_normal(mean_vec, covar_mat, size=num_fuzzed) fuzzedPoints = fuzzedPoints + offsets allPoints = np.vstack((exactPoints, fuzzedPoints)) #Great, now for each fuzzed point, compute the actual distances to the original point cloud actualDistances, _ = kdTree.query(allPoints) actualDistances = np.reshape(actualDistances, (BATCH_SIZE, 1)) #Okay, now run a training step batchNum += 1 sess.run([train_step, check], feed_dict={in_points : allPoints, target_norms : actualDistances}) if (i % VIEW_AFTER == 0): train_loss = loss.eval(feed_dict={in_points : allPoints, target_norms : actualDistances}) print("Batches per second: ", batchNum / (time.time() - start)) train_loss = math.sqrt(train_loss) * 1000.0 print("Step %d, training loss %g mm" % (i, train_loss)) saver.save(sess, "./" + namePrefix + "/" + namePrefix)
def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of ``Inference`` **must** implement this method. No methods which build ops should be called outside ``initialize()``. Parameters ---------- n_iter : int, optional Number of iterations for algorithm. n_print : int, optional Number of iterations for each print progress. To suppress print progress, then specify 0. Default is ``int(n_iter / 100)``. scale : dict of RandomVariable to tf.Tensor, optional A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. logdir : str, optional Directory where event file will be written. For details, see ``tf.summary.FileWriter``. Default is to log nothing. log_timestamp : bool, optional If True (and ``logdir`` is specified), create a subdirectory of ``logdir`` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars : list, optional Specifies the list of variables to log after each ``n_print`` steps. If None, will log all variables. If ``[]``, no variables will be logged. ``logdir`` must be specified for variables to be logged. debug : bool, optional If True, add checks for ``NaN`` and ``Inf`` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) self.summarize = tf.summary.merge_all() else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])]
def train(): _get_control_params() if not os.path.exists(modelParams['dataDir']): raise ValueError("No such data directory %s" % modelParams['dataDir']) #meanImgFi1000le = os.path.join(FLAGS.dataDir, "meta") #if not os.path.isfile(meanImgFile): # raise ValueError("Warning, no meta file found at %s" % meanImgFile) #else: # with open(meanImgFile, "r") as inMeanFile: # meanInfo = json.load(inMeanFile) # # meanImg = meanInfo['mean'] # # # also load the target output sizes # params['targSz'] = meanInfo["targSz"] #_setupLogging(os.path.join(modelParams['trainLogDir'], "genlog")) with tf.Graph().as_default(): # BGR to RGB #params['meanImg'] = tf.constant(meanImg, dtype=tf.float32) # track the number of train calls (basically number of batches processed) globalStep = tf.get_variable('globalStep', [], initializer=tf.constant_initializer(0), trainable=False) # Get images and transformation for model_cnn. imagesOrig, images, pOrig, tHAB, tfrecFileIDs = data_input.inputs( **modelParams) # Build a Graph that computes the HAB predictions from the # inference model. pHAB = model_cnn.inference(images, **modelParams) # Calculate loss. loss = model_cnn.loss(pHAB, tHAB, **modelParams) # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build an initialization operation to run below. #init = tf.initialize_all_variables() init = tf.global_variables_initializer() opCheck = tf.add_check_numerics_ops() # Start running operations on the Graph. config = tf.ConfigProto( log_device_placement=modelParams['logDevicePlacement']) config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) sess.run(init) # restore a saver. saver = tf.train.Saver(tf.global_variables()) saver.restore(sess, modelParams['trainLogDir'] + '/model.ckpt-89999') # Start the queue runners. tf.train.start_queue_runners(sess=sess) HABperPixelsum = 0 durationSum = 0 ######### USE LATEST STATE TO WARP IMAGES if modelParams['writeWarpedImages']: stepsForOneDataRound = int((modelParams['numExamples'] / modelParams['activeBatchSize'])) + 1 print('Warping images with batch size %d in %d steps' % (modelParams['activeBatchSize'], stepsForOneDataRound)) for step in xrange(stepsForOneDataRound): startTime = time.time() evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, evlossValue = sess.run( [ imagesOrig, images, pOrig, tHAB, pHAB, tfrecFileIDs, loss ]) durationSum += (time.time() - startTime) HABRES = evtHAB - evpHAB HABperPixel = 0 for i in xrange(modelParams['activeBatchSize']): H = np.asarray([[ HABRES[i][0], HABRES[i][1], HABRES[i][2], HABRES[i][3] ], [ HABRES[i][4], HABRES[i][5], HABRES[i][6], HABRES[i][7] ]], np.float32) HABperPixel += np.sqrt((H * H).sum(axis=0)).mean() HABperPixel = HABperPixel / modelParams['activeBatchSize'] HABperPixelsum += HABperPixel #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs data_output.output(evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, **modelParams) # Print Progress Info if ((step % FLAGS.ProgressStepReportStep) == 0) or ((step + 1) == stepsForOneDataRound): print( 'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins' % ((100 * step) / stepsForOneDataRound, HABperPixelsum / (step + 1), durationSum / 60, (((durationSum * stepsForOneDataRound) / (step + 1)) / 60) - (durationSum / 60))) print( 'Average training loss = %.2f - Average time per sample= %.2f s, Steps = %d' % (HABperPixelsum / step, durationSum / (step * modelParams['activeBatchSize']), step))
def main(_=None): FLAGS = flags.FLAGS if FLAGS.save_model: FLAGS.batch_size = 1 FLAGS.overwrite = False config = FLAGS FLAGS.__dict__['config'] = config FLAGS.logdir = FLAGS.logdir.format(name=FLAGS.name) logdir = FLAGS.logdir save_model_dir = "saved_model/" logging.info('logdir: %s', logdir) if os.path.exists(logdir) and FLAGS.overwrite and not FLAGS.save_model: logging.info( '"overwrite" is set to True. Deleting logdir at "%s".', logdir) shutil.rmtree(logdir) if os.path.exists(save_model_dir) and FLAGS.save_model: logging.info( '"save_model" is set to True. Deleting save_model_dir at "%s".', save_model_dir) shutil.rmtree(save_model_dir) with tf.Graph().as_default(): model_dict = model_config.get(FLAGS) lr = model_dict.lr opt = model_dict.opt model = model_dict.model lr = tf.convert_to_tensor(lr) tf.summary.scalar('learning_rate', lr) global_step = tf.train.get_or_create_global_step() train_dataset, test_dataset = getDataset(config) target, res = model.make_target(train_dataset) gvs = opt.compute_gradients(target) suppress_inf_and_nans = (config.grad_value_clip > 0 or config.grad_norm_clip > 0) report = tools.gradient_summaries(gvs, suppress_inf_and_nans) valid_report = dict() gvs = tools.clip_gradients(gvs, value_clip=config.grad_value_clip, norm_clip=config.grad_norm_clip) try: report.update(model.make_report(train_dataset)) valid_report.update(model.make_report(test_dataset)) except AttributeError: logging.warning( 'Model %s has no "make_report" method.', str(model)) raise report = tools.scalar_logs(report, config.ema, 'train', global_update=config.global_ema_update) report['lr'] = lr valid_report = tools.scalar_logs( valid_report, config.ema, 'valid', global_update=config.global_ema_update) reports_keys = sorted(report.keys()) def _format(k): if k in ('lr', 'learning_rate'): return '.2E' return '.3f' report_template = ', '.join(['{}: {}{}:{}{}'.format( k, '{', k, _format(k), '}') for k in reports_keys]) logging.info('Trainable variables:') tools.log_variables_by_scope() # inspect gradients for g, v in gvs: if g is None: logging.warning('No gradient for variable: %s.', v.name) tools.log_num_params() update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if FLAGS.check_numerics: update_ops += [tf.add_check_numerics_ops()] with tf.control_dependencies(update_ops): train_step = opt.apply_gradients(gvs, global_step=global_step) sess_config = tf.ConfigProto() sess_config.gpu_options.allow_growth = True if config.save_model: saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, logdir + "/best.ckpt") signature = tf.saved_model.signature_def_utils.build_signature_def(res.input, res.output, 'sig_inout') builder = tf.saved_model.builder.SavedModelBuilder("saved_model/") builder.add_meta_graph_and_variables(sess, ['car_match_model'], {'sig_inout': signature}) builder.save() else: with tf.train.SingularMonitoredSession( hooks=create_hooks(FLAGS), checkpoint_dir=logdir, config=sess_config) as sess: train_itr, _ = sess.run([global_step, update_ops]) train_tensors = [global_step, train_step] report_tensors = [report, valid_report] all_tensors = report_tensors + train_tensors valid_loss = 1e5 while train_itr < config.max_train_steps: if train_itr % config.report_loss_steps == 0: report_vals, valid_report_vals, train_itr, _ = sess.run( all_tensors) this_loss = valid_report_vals['loss'] if this_loss < valid_loss and tf.train.latest_checkpoint(logdir) is not None: ends = ['.data-00000-of-00001', '.index', '.meta'] for end in ends: shutil.copy(tf.train.latest_checkpoint(logdir) + end, logdir + "/best.ckpt" + end) logging.info('') logging.info('train:') logging.info('#%s: %s', train_itr, report_template.format(**report_vals)) logging.info('valid:') valid_logs = dict(report_vals) valid_logs.update(valid_report_vals) logging.info('#%s: %s', train_itr, report_template.format(**valid_logs)) vals_to_check = list(report_vals.values()) if (np.isnan(vals_to_check).any() or np.isnan(vals_to_check).any()): logging.fatal('NaN in reports: %s; breaking...', report_template.format(**report_vals)) else: train_itr, _ = sess.run(train_tensors)
def train(args, model, data): dirname = 'save-vrnn/' + args.appliance if not os.path.exists(dirname): os.makedirs(dirname) with open(os.path.join(dirname, 'config.pkl'), 'w') as f: cPickle.dump(args, f) ckpt = tf.train.get_checkpoint_state( dirname ) #check if there exists a previously trained model in the checkpoint Xtrain, ytrain = data train = Iterator(Xtrain, ytrain, batch_size=args.batch_size, n_steps=args.seq_length) #to split data into batches n_batches = train.nbatches Xtrain, ytrain = train.get_split() mae = [] mse = [] with tf.Session() as sess: summary_writer = tf.summary.FileWriter( 'logs/' + datetime.now().isoformat().replace(':', '-'), sess.graph) check = tf.add_check_numerics_ops() merged = tf.summary.merge_all() tf.global_variables_initializer().run( ) #initialize all variables in the graph as defined saver = tf.train.Saver(tf.global_variables()) if ckpt: saver.restore( sess, ckpt.model_checkpoint_path) #restore previously saved model print "Loaded model" start = time.time() state_c = None state_h = None for e in xrange(args.num_epochs): #assign learning rate sess.run( tf.assign(model.lr, args.learning_rate * (args.decay_rate**e))) #get the initial state of lstm cell state = model.initial_state_c, model.initial_state_h mae.append([]) mse.append([]) for b in xrange(n_batches): x = Xtrain[b] y = ytrain[b] feed = { model.input_x: x, model.input_y: y, model.target_data: y } # input data : x and y ; target data : y #train the model on this batch of data train_loss, _, cr, summary, sigma, mu, inp, target, state_c, state_h, pred = sess.run( [ model.cost, model.train_op, check, merged, model.sigma, model.mu, model.flat_input, model.target, model.final_state_c, model.final_state_h, model.output ], feed) summary_writer.add_summary(summary, e * n_batches + b) #the output from the model is in the shape [50000,1] reshape to 3D (batch_size, time_steps, n_app) pred = np.array(np.reshape(pred, [250, 200, -1])).astype(float) label = np.array(y).astype(float) #compute mae and mse for the output mae_i = np.reshape(np.absolute((label - pred)), [ -1, ]).mean() mse_i = np.reshape((label - pred)**2, [ -1, ]).mean() mae[e].append(mae_i) mse[e].append(mse_i) #save the model after every 800 (monitoring_freq) epochs if (e * n_batches + b) % args.save_every == 0 and ( (e * n_batches + b) > 0): checkpoint_path = os.path.join( dirname, 'model_' + str(args.num_epochs) + '_' + str(args.learning_rate) + '.ckpt') saver.save(sess, checkpoint_path, global_step=e * n_batches + b) print "model saved to {}".format(checkpoint_path) end = time.time() print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}" \ .format(e * n_batches + b, args.num_epochs * n_batches, e, args.chunk_samples * train_loss, end - start, sigma.mean(axis=0).mean(axis=0)) start = time.time() #the average mae,mse values in every epoch print "Epoch {}, mae = {:.3f}, mse = {:.3f}".format( e, sum(mae[e]) / len(mae[e]), sum(mse[e]) / len(mse[e])) #path to save the final model checkpoint_path = os.path.join( dirname, 'final_model_' + str(args.num_epochs) + '_' + str(args.learning_rate) + '.ckpt') saver2 = tf.train.Saver() saver2.save(sess, checkpoint_path) print "model saved to {}".format(checkpoint_path)
def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of `Inference` **must** implement this method. No methods which build ops should be called outside `initialize()`. Args: n_iter: int, optional. Number of iterations for algorithm when calling `run()`. Alternatively if controlling inference manually, it is the expected number of calls to `update()`; this number determines tracking information during the print progress. n_print: int, optional. Number of iterations for each print progress. To suppress print progress, then specify 0. Default is `int(n_iter / 100)`. scale: dict of RandomVariable to tf.Tensor, optional. A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. logdir: str, optional. Directory where event file will be written. For details, see `tf.summary.FileWriter`. Default is to log nothing. log_timestamp: bool, optional. If True (and `logdir` is specified), create a subdirectory of `logdir` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars: list, optional. Specifies the list of variables to log after each `n_print` steps. If None, will log all variables. If `[]`, no variables will be logged. `logdir` must be specified for variables to be logged. debug: bool, optional. If True, add checks for `NaN` and `Inf` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) self.summarize = tf.summary.merge_all() else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])]
def main(_): # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() label_count = 10 # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_epochs=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_epochs_list = list( map(int, FLAGS.how_many_training_epochs.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_epochs_list) != len(learning_rates_list): raise Exception( '--how_many_training_epochs and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_epochs_list), len(learning_rates_list))) input_xs = tf.placeholder(tf.float32, [None, 784], name='input_xs') logits, dropout_prob = models.create_model(input_xs, label_count, FLAGS.model_architecture, is_training=True) # Define loss and optimizer ground_truth_input = tf.placeholder(tf.float32, [None, 10], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.softmax_cross_entropy( onehot_labels=ground_truth_input, logits=logits) tf.summary.scalar('cross_entropy', cross_entropy_mean) with tf.name_scope('train'), tf.control_dependencies(control_dependencies): learning_rate_input = tf.placeholder(tf.float32, [], name='learning_rate_input') momentum = tf.placeholder(tf.float32, [], name='momentum') # train_step = tf.train.GradientDescentOptimizer(learning_rate_input).minimize(cross_entropy_mean) train_step = tf.train.MomentumOptimizer( learning_rate_input, momentum, use_nesterov=True).minimize(cross_entropy_mean) # train_step = tf.train.AdamOptimizer(learning_rate_input).minimize(cross_entropy_mean) # train_step = tf.train.AdadeltaOptimizer(learning_rate_input).minimize(cross_entropy_mean) # train_step = tf.train.RMSPropOptimizer(learning_rate_input, momentum).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) correct_prediction = tf.equal(predicted_indices, tf.argmax(ground_truth_input, 1)) confusion_matrix = tf.confusion_matrix(tf.argmax(ground_truth_input, 1), predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') tf.global_variables_initializer().run() start_epoch = 1 start_checkpoint_epoch = 0 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) tmp = FLAGS.start_checkpoint tmp = tmp.split('-') tmp.reverse() start_checkpoint_epoch = int(tmp[0]) start_epoch = start_checkpoint_epoch + 1 # calculate training epochs max training_epochs_max = np.sum(training_epochs_list) # start_checkpoint 값과 training_epochs_max 값이 다를 경우에만 training 수행 if start_checkpoint_epoch != training_epochs_max: tf.logging.info('Training from epoch: %d ', start_epoch) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Training epoch for training_epoch in xrange(start_epoch, training_epochs_max + 1): # Figure out what the current learning rate is. training_epochs_sum = 0 for i in range(len(training_epochs_list)): training_epochs_sum += training_epochs_list[i] if training_epoch <= training_epochs_sum: learning_rate_value = learning_rates_list[i] break set_size = mnist.train.num_examples for i in xrange(0, set_size, FLAGS.batch_size): # Pull the image samples we'll use for training. train_batch_xs, train_batch_ys = mnist.train.next_batch( FLAGS.batch_size) # Run the graph with this batch of training data. train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step ], feed_dict={ input_xs: train_batch_xs, ground_truth_input: train_batch_ys, learning_rate_input: learning_rate_value, momentum: 0.95, dropout_prob: 0.5 }) train_writer.add_summary(train_summary, i) tf.logging.info( 'Epoch #%d, Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_epoch, i, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = ((set_size - i) / FLAGS.batch_size <= 1) if is_last_step: set_size = mnist.validation.num_examples total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_batch_xs, validation_batch_ys = \ mnist.validation.next_batch(FLAGS.batch_size) # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. validation_summary, validation_accuracy, conf_matrix = sess.run( [merged_summaries, evaluation_step, confusion_matrix], feed_dict={ input_xs: validation_batch_xs, ground_truth_input: validation_batch_ys, dropout_prob: 1.0 }) validation_writer.add_summary(validation_summary, training_epoch) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info( 'Epoch %d: Validation accuracy = %.1f%% (N=%d)' % (training_epoch, total_accuracy * 100, set_size)) # Save the model checkpoint periodically. if (training_epoch % FLAGS.save_step_interval == 0 or training_epoch == training_epochs_max): checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_epoch) saver.save(sess, checkpoint_path, global_step=training_epoch) # For testing set_size = mnist.test.num_examples tf.logging.info('test size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): test_batch_xs, test_batch_ys = mnist.test.next_batch(FLAGS.batch_size) test_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ input_xs: test_batch_xs, ground_truth_input: test_batch_ys, dropout_prob: 1.0 }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (test_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100, set_size))
def main(): N = 10000 d = 250 alpha = np.ones((d),) alpha[d/2:] = 10.0 sigma2 = 1.0 X = np.random.rand(N, d) w, y = simulate(X, alpha, sigma2) batch_size = 64 batch_X = tf.placeholder(tf.float32, (batch_size, d), name="X") batch_y = tf.placeholder(tf.float32, (batch_size, ), name="y") mf = bf.mean_field.MeanFieldInference(linear_ard_joint_density, batch_X=batch_X, batch_y=batch_y, N=N) a0 = 1.0 b0 = 1.0 c0 = 1.0 d0 = 1.0 alpha_default = np.ones((d,), dtype=np.float32) * a0/b0 mf.add_latent("alpha", 1/np.sqrt(alpha_default), 1e-6 * np.ones((d,), dtype=np.float32), bf.transforms.exp_reciprocal, shape=(d,)) sigma2_default = np.array(d0/(c0+1)).astype(np.float32) mf.add_latent("sigma2", np.sqrt(sigma2_default), 1e-6, bf.transforms.square, shape=()) mf.add_latent("w", tf.random_normal([d,], stddev=1.0, dtype=tf.float32), 1e-6 * np.ones((d,), dtype=np.float32), shape=(d,)) elbo = mf.build_stochastic_elbo(n_eps=5) sigma2s = mf.get_posterior_samples("sigma2") #alphas = mf.get_posterior_samples("alpha") alpha_mean_var = mf.latents["alpha"]["q_mean"] alpha_stddev_var = mf.latents["alpha"]["q_stddev"] alpha_var = mf.latents["alpha"]["samples"][0] train_step = tf.train.AdamOptimizer(0.01).minimize(-elbo) debug = tf.add_check_numerics_ops() init = tf.initialize_all_variables() merged = tf.merge_all_summaries() sess = tf.Session() writer = tf.train.SummaryWriter("/tmp/ard_logs", sess.graph_def) sess.run(init) for i, batch_xs, batch_ys in batch_generator(X, y, 64, max_steps=20000): fd = mf.sample_stochastic_inputs() fd[batch_X] = batch_xs fd[batch_y] = batch_ys (elbo_val, sigma2s_val, alpha_mean, alpha_stddev, alpha_val) = sess.run([elbo, sigma2s, alpha_mean_var, alpha_stddev_var, alpha_var], feed_dict=fd) print "step %d elbo %.2f sigma2 %.2f " % (i, elbo_val, np.mean(sigma2s_val)) summary_str = sess.run(merged, feed_dict=fd) writer.add_summary(summary_str, i) try: sess.run(debug, feed_dict=fd) except: bad = ~np.isfinite(alpha_val) print alpha_mean[bad] print alpha_stddev[bad] print alpha_val[bad] sess.run(train_step, feed_dict = fd)
def main(_): image_filenames, label_filenames = camvid.get_filename_list(image_dir) val_image_filenames, val_label_filenames = camvid.get_filename_list(val_dir) with tf.Graph().as_default(): with tf.device('/cpu:0'): # config = tf.ConfigProto(device_count = {'GPU': 0}) config = tf.ConfigProto() config.gpu_options.allocator_type = 'BFC' sess = tf.InteractiveSession(config=config) train_data = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL], name='train_data') train_labels = tf.placeholder(tf.int64, shape=[BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, 1], name='train_labels') is_training = tf.placeholder(tf.bool, name='is_training') images, labels = camvid.CamVidInputs(image_filenames, label_filenames, BATCH_SIZE) val_images, val_labels = camvid.CamVidInputs(val_image_filenames, val_label_filenames, BATCH_SIZE) logits = segnet_vgg.inference(train_data, is_training, NUM_CLASSES) total_loss = loss(logits, train_labels) train_op = train(total_loss) check_op = tf.add_check_numerics_ops() merged_summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter('train', sess.graph) saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) checkpoint_path = os.path.join(LOG_DIR, "segnet.ckpt") sess.run(tf.global_variables_initializer()) # Start the queue runners. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for i in range(EPOCH): image_batch, label_batch = sess.run([images, labels]) feed_dict = { train_data: image_batch, train_labels: label_batch, is_training: True } _, _, _, summary = sess.run([train_op, total_loss, check_op, merged_summary_op], feed_dict=feed_dict) if i % 10 == 0: print("Start validating...") val_images_batch, val_labels_batch = sess.run([val_images, val_labels]) loss_value = total_loss.eval(feed_dict={train_data: val_images_batch, train_labels: val_labels_batch, is_training: True}) print("Epoch: %d, Loss: %g" % (i, loss_value)) saver.save(sess, checkpoint_path) # write logs at every iteration summary_writer.add_summary(summary, i) coord.request_stop() coord.join(threads)
def main(_): # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() model_settings = models.prepare_model_settings( len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) audio_processor = input_data.AudioProcessor(FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) training_steps = FLAGS.how_many_training_steps learning_rate = FLAGS.learning_rate # ----------------------------------------------------------------------- # -----------------------------Placeholder------------------------------- # ----------------------------------------------------------------------- fingerprint_input = tf.placeholder(tf.float32, [None, fingerprint_size], name='fingerprint_input') logits, dropout_prob = models.create_model(fingerprint_input, model_settings, FLAGS.model_architecture, is_training=True) # Define loss and optimizer ground_truth_input = tf.placeholder(tf.int64, [None], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # ----------------------------------------------------------------------- # -----------------Back propagation and training evaluation-------------- # ----------------------------------------------------------------------- # Create the back propagation and training evaluation machinery in the graph. with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) tf.summary.scalar('cross_entropy', cross_entropy_mean) with tf.name_scope('train'), tf.control_dependencies(control_dependencies): train_step = tf.train.AdamOptimizer(learning_rate).minimize( cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) correct_prediction = tf.equal(predicted_indices, ground_truth_input) confusion_matrix = tf.confusion_matrix(ground_truth_input, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') tf.global_variables_initializer().run() start_step = 1 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) start_step = global_step.eval(session=sess) tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Save list of words. with gfile.GFile( os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) # ----------------------------------------------------------------------- # -----------------Training and validation------------------------------- # ----------------------------------------------------------------------- # Training loop. training_steps_max = training_steps # Print the local time of beginning training beg_time = datetime.datetime.now() print("Beginning time : " + str(beg_time)) for training_step in xrange(start_step, training_steps_max + 1): # Pull the audio samples we'll use for training. train_fingerprints, train_ground_truth = audio_processor.get_data( FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency, FLAGS.background_volume, time_shift_samples, 'training', sess) # Run the graph with this batch of training data. train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step ], feed_dict={ fingerprint_input: train_fingerprints, ground_truth_input: train_ground_truth, dropout_prob: 0.5 }) train_writer.add_summary(train_summary, training_step) tf.logging.info( 'Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) # Validation if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = audio_processor.set_size('validation') total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'validation', sess)) # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. validation_summary, validation_accuracy, conf_matrix = sess.run( [merged_summaries, evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: validation_fingerprints, ground_truth_input: validation_ground_truth, dropout_prob: 1.0 }) validation_writer.add_summary(validation_summary, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) # Save the model checkpoint periodically. if (training_step % FLAGS.save_step_interval == 0 or training_step == training_steps_max): checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step) # Print the local time of ending training print("Beginning time : " + str(beg_time)) print("Ending time : " + str(datetime.datetime.now())) # ----------------------------------------------------------------------- # ------------------------------Test------------------------------------- # ----------------------------------------------------------------------- set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess) test_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, dropout_prob: 1.0 }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (test_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100, set_size))
def main(): if (len(sys.argv) == 1): raise NameError('[ERROR] No dataset key') if (sys.argv[1] == 'imagenetval'): FLAGS.updates_per_epoch = 49000 FLAGS.num_test_batches = 1000 FLAGS.in_featdir = 'data/featslist/imagenetval/' FLAGS.in_lvdir = 'data/output/imagenetval/' elif (sys.argv[1] == 'lfw'): FLAGS.updates_per_epoch = 12233 FLAGS.num_test_batches = 1000 FLAGS.in_featdir = 'data/featslist/lfw/' FLAGS.in_lvdir = 'data/output/lfw/' elif (sys.argv[1] == 'church'): FLAGS.updates_per_epoch = 125227 FLAGS.num_test_batches = 1000 FLAGS.in_featdir = 'data/featslist/church/' FLAGS.in_lvdir = 'data/output/church/' else: raise NameError('[ERROR] Incorrect dataset key') data_loader = zhangfeats_loader(os.path.join(FLAGS.in_featdir, 'list.train.txt'), \ os.path.join(FLAGS.in_featdir, 'list.test.txt'),\ os.path.join(FLAGS.in_lvdir, 'lv_color_train.mat.npy'),\ os.path.join(FLAGS.in_lvdir, 'lv_color_test.mat.npy')) #Inputs lf = layer_factory() input_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.feats_height, \ FLAGS.feats_width, FLAGS.feats_nch]) output_gt_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.hidden_size]) is_training = tf.placeholder(tf.bool) keep_prob = tf.placeholder(tf.float32) #Inference with tf.variable_scope('Inference', reuse=False): output_activ = cnn_feedforward(lf, input_tensor, is_training, keep_prob, reuse=False) with tf.variable_scope('Inference', reuse=True): output_test_activ = cnn_feedforward(lf, input_tensor, is_training, keep_prob, reuse=True) #Loss and gradient descent step loss, _, _, _ = compute_gmm_loss(output_gt_tensor, output_activ, summ=True) loss_test, pi_test, mu_test, sigma_test = compute_gmm_loss( output_gt_tensor, output_test_activ) train_step = optimize(loss, FLAGS.lr) #Standard steps check_nan_op = tf.add_check_numerics_ops() init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=0) summary_op = tf.summary.merge_all() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) train_writer = tf.summary.FileWriter( os.path.join(FLAGS.in_lvdir, 'logs_mdn'), sess.graph) sess.run(init) if (FLAGS.is_train): for epoch in range(FLAGS.max_epoch): training_loss = 0. data_loader.random_reset() for i in range(FLAGS.updates_per_epoch): batch, batch_gt = data_loader.train_next_batch( FLAGS.batch_size) feed_dict = {input_tensor:batch, output_gt_tensor:batch_gt, \ is_training:True, keep_prob:.75} _, _, loss_value, summary_str = sess.run(\ [check_nan_op, train_step, loss, summary_op], \ feed_dict) train_writer.add_summary(summary_str, epoch * FLAGS.updates_per_epoch + i) training_loss = training_loss + loss_value print('[DEBUG] Epoch# %d, Loss: %f' % (epoch, \ (training_loss*1.)/FLAGS.updates_per_epoch)) save_chkpt(saver, epoch, sess, os.path.join(FLAGS.in_lvdir, 'models_mdn'), \ prefix='model_%d_exp' % FLAGS.nmix) else: load_chkpt(saver, sess, os.path.join(FLAGS.in_lvdir, 'models_mdn')) test_loss = 0. data_loader.reset() lv_test_codes = np.zeros((0, (FLAGS.hidden_size + 1 + 1) * FLAGS.nmix), dtype='f') for i in range(FLAGS.num_test_batches): batch, batch_gt = data_loader.test_next_batch(FLAGS.batch_size) feed_dict = {input_tensor:batch, output_gt_tensor:batch_gt, \ is_training:False, keep_prob:1.} _, loss_value, output_pi, output_mu, output_sigma = \ sess.run([check_nan_op, loss_test, pi_test, mu_test, sigma_test], feed_dict) test_loss = test_loss + loss_value output = np.concatenate((output_mu, output_sigma, output_pi), axis=1) lv_test_codes = np.concatenate((lv_test_codes, output), axis=0) print('[DEBUG] Test Loss: %f' % ((test_loss * 1.) / FLAGS.num_test_batches)) np.save(os.path.join(FLAGS.in_lvdir, 'lv_color_mdn_test.mat'), lv_test_codes) print(lv_test_codes.shape) sess.close()
def train(): """Train fish_cubes for a number of steps.""" # Get the sets of images and labels for training, validation, and # test if FLAGS.num_epochs: num_epochs = FLAGS.num_epochs else: num_epochs = None # Track global step across multiple iterations. This is updated in # the optimizer. with tf.variable_scope('control'): global_step = tf.get_variable('global_step', dtype=tf.int32, initializer=0, trainable=False) # seed provides the mechanism to control the shuffling which takes place reading input seed = tf.placeholder(tf.int64, shape=()) # Generate placeholders for the images and labels. iterator = input_data.input_pipeline_binary( FLAGS.data_dir, FLAGS.batch_size, fake_data=FLAGS.fake_data, num_epochs=num_epochs, read_threads=FLAGS.read_threads, shuffle_size=FLAGS.shuffle_size, num_expected_examples=FLAGS.num_examples, seed=seed) image_path, label_path, images, labels = iterator.get_next() if FLAGS.verbose: print_op = tf.print("images and labels this batch: ", image_path, label_path, labels) else: print_op = tf.constant('No printing') if FLAGS.random_rotation: images, labels = harmonics.apply_random_rotation(images, labels) # Build a Graph that computes predictions from the inference model. logits = topology.inference(images, FLAGS.network_pattern) # Add to the Graph the Ops for loss calculation. loss = topology.binary_loss(logits, labels) print('loss: ', loss) if FLAGS.check_numerics: if FLAGS.random_rotation: sys.exit('check_numerics is not compatible with random_rotation') check_numerics_op = tf.add_check_numerics_ops() else: check_numerics_op = tf.constant('not checked') var_pfx_map = {'cnn': 'cnn/', 'classifier': 'image_binary_classifier/'} if len(FLAGS.starting_snapshot): keys = FLAGS.snapshot_load.split(',') if FLAGS.snapshot_load else [ 'all' ] keys = [k.strip() for k in keys] if 'all' in keys: vars_to_load = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) else: assert all([k in var_pfx_map for k in keys]), 'unknown key to load: %s' % key vars_to_load = [global_step] for k in keys: vars_to_load.extend([ v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if v.name.startswith(var_pfx_map[k]) ]) if FLAGS.reset_global_step: vars_to_load.remove(global_step) else: vars_to_load = [] vars_to_hold_constant = [] # empty list means hold nothing constant if FLAGS.hold_constant is not None: keys = [k.strip() for k in FLAGS.hold_constant.split(',')] assert all([k in var_pfx_map for k in keys]), 'unknown key to hold constant: %s' % key for k in keys: vars_to_hold_constant.extend([ v for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if v.name.startswith(var_pfx_map[k]) ]) print('not subject to training: %s' % [v.name for v in vars_to_hold_constant]) if FLAGS.starting_snapshot and len(FLAGS.starting_snapshot): vars_in_snapshot = [ k for k in (pywrap_tensorflow.NewCheckpointReader( FLAGS.starting_snapshot).get_variable_to_shape_map()) ] else: vars_in_snapshot = [] vars_in_snapshot = set(vars_in_snapshot) print('vars in snapshot: %s' % vars_in_snapshot) if FLAGS.optimizer == 'Adam': optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate, epsilon=0.1) elif FLAGS.optimizer == 'SGD': optimizer = tf.train.GradientDescentOptimizer( learning_rate=FLAGS.learning_rate) else: raise RuntimeError('Unimplemented optimizer %s was requested' % FLAGS.optimizer) train_op = topology.training(loss, FLAGS.learning_rate, exclude=vars_to_hold_constant, optimizer=optimizer) # Also load any variables the optimizer created for variables we want to load vars_to_load.extend([ optimizer.get_slot(var, name) for name in optimizer.get_slot_names() for var in vars_to_load ]) vars_to_load = [var for var in vars_to_load if var is not None] vars_to_load = list(set(vars_to_load)) # remove duplicates # Filter vars to load based on what is in the checkpoint in_vars = [] out_vars = [] for var in vars_to_load: if get_cpt_name(var) in vars_in_snapshot: in_vars.append(var) else: out_vars.append(var) if out_vars: print( 'WARNING: cannot load the following vars because they are not in the snapshot: %s' % [var.name for var in out_vars]) if in_vars: print('loading from checkpoint: %s' % [var.name for var in in_vars]) tf.train.init_from_checkpoint( FLAGS.starting_snapshot, {get_cpt_name(var): var for var in in_vars}) # Try making histograms of *everything* for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): if var.name.startswith('cnn') or var.name.startswith( 'image_binary_classifier'): tf.summary.histogram(var.name, var) # Create a saver for writing training checkpoints. saver = tf.train.Saver(max_to_keep=10) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Create a session for running operations in the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.verbose)) # Create the graph, etc. # we either have no snapshot and must initialize everything, or we do have a snapshot # and have already set appropriate vars to be initialized from it init_op = tf.variables_initializer( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)) sess.run(init_op) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) loss_value = -1.0 # avoid a corner case where it is unset on error duration = 0.0 # ditto num_chk = None # ditto #sess = tf_debug.LocalCLIDebugWrapperSession(sess) # Loop through training epochs for epoch in range(num_epochs): try: sess.run(iterator.initializer, feed_dict={seed: epoch}) saver.save(sess, FLAGS.log_dir + 'cnn', global_step=global_step) last_save_epoch = 0 while True: # Run training steps or whatever start_time = time.time() _, loss_value, num_chk, _, gstp = sess.run( [train_op, loss, check_numerics_op, print_op, global_step]) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if ((gstp + 1) % 100 == 0 or gstp < 10): # Print status to stdout. print( 'Global step %d epoch %d: numerics = %s, batch mean loss = %.2f (%.3f sec)' % (gstp, epoch, num_chk, loss_value.mean(), duration)) # Update the events file. summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, gstp) summary_writer.flush() # Save a checkpoint periodically. if (epoch + 1) % 100 == 0 and epoch != last_save_epoch: # If log_dir is /tmp/cnn/ then checkpoints are saved in that # directory, prefixed with 'cnn'. print('saving checkpoint at global step %d, epoch %s' % (gstp, epoch)) saver.save(sess, FLAGS.log_dir + 'cnn', global_step=global_step) last_save_epoch = epoch except tf.errors.OutOfRangeError as e: print('Finished epoch {}'.format(epoch)) # finally: # # When done, ask the threads to stop. # coord.request_stop() # print('Final Step %d: numerics = %s, loss = %.2f (%.3f sec)' # % (step, num_chk, loss_value, duration)) # summary_str = sess.run(summary_op, num_chk) # summary_writer.add_summary(summary_str, step) # summary_writer.flush() # Wait for threads to finish. # coord.join(threads, stop_grace_period=10) print('Final Step %d: numerics = %s, batch mean loss = %.2f (%.3f sec)' % (gstp, num_chk, loss_value.mean(), duration)) try: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) summary_writer.flush() except tf.errors.OutOfRangeError as e: print('No final summary to write') sess.close()
def main(_): num_features = 40 # 40 num_time_steps=98 # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() # Begin by making sure we have the training data we need. If you already have # training data of your own, use `--data_url= ` on the command line to avoid # downloading. model_settings = models.prepare_model_settings( len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) audio_processor = input_data.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) if FLAGS.model_architecture != 'rnn' and FLAGS.model_architecture != 'rnn_s': fingerprint_input = tf.placeholder(tf.float32, [None, fingerprint_size], name='fingerprint_input') else: num_audio_features = num_features time_steps = fingerprint_size/num_audio_features fingerprint_input = tf.placeholder(tf.float32, [time_steps, None, num_audio_features], name='fingerprint_input') print('hello world',fingerprint_input.get_shape()) logits, dropout_prob = models.create_model( fingerprint_input, model_settings, FLAGS.model_architecture, is_training=True) # Define loss and optimizer ground_truth_input = tf.placeholder( tf.int64, [None], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) tf.summary.scalar('cross_entropy', cross_entropy_mean) with tf.name_scope('train'), tf.control_dependencies(control_dependencies): learning_rate_input = tf.placeholder( tf.float32, [], name='learning_rate_input') train_step = tf.train.GradientDescentOptimizer( learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) correct_prediction = tf.equal(predicted_indices, ground_truth_input) confusion_matrix = tf.confusion_matrix( ground_truth_input, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/rnn3/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/rnn3/validation') tf.global_variables_initializer().run() start_step = 1 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) start_step = global_step.eval(session=sess) tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Save list of words. with gfile.GFile( os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) # Training loop. training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # Figure out what the current learning rate is. training_steps_sum = 0 for i in range(len(training_steps_list)): training_steps_sum += training_steps_list[i] if training_step <= training_steps_sum: learning_rate_value = learning_rates_list[i] break # Pull the audio samples we'll use for training. train_fingerprints, train_ground_truth = audio_processor.get_data( FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency, FLAGS.background_volume, time_shift_samples, 'training', sess) #print('data raw [examples][data]=',train_fingerprints[:3,0:2] ,train_fingerprints[:3,280:283]) #*** if we are using RNN change shape of train_fingerprints: if FLAGS.model_architecture == 'rnn' or FLAGS.model_architecture == 'rnn_s': #print('train_fingerprints shape before transform=',train_fingerprints.shape)#(100, 3920) shape_data = train_fingerprints.shape train_fingerprints = np.reshape(train_fingerprints, [shape_data[0],num_time_steps,-1]) train_fingerprints = np.transpose(train_fingerprints,(1,0,2))#(14, 100, 280) (time_step,batch_size,feature_size) #print('processed data =', train_fingerprints[:2,:3,:8]) #Test passed #print('train_fingerprints shape After transform=',train_fingerprints.shape) # Run the graph with this batch of training data. train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step ], feed_dict={ fingerprint_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, dropout_prob: 0.5 }) train_writer.add_summary(train_summary, training_step) tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = audio_processor.set_size('validation') total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'validation', sess)) if FLAGS.model_architecture == 'rnn' or FLAGS.model_architecture == 'rnn_s': #tf.reshape(validation_fingerprints, [None,num_features,-1]) #tf.transpose(validation_fingerprints,[2,0,1]) print('validation_fingerprints shape before transform=',validation_fingerprints.shape) shape_data = validation_fingerprints.shape validation_fingerprints = np.reshape(validation_fingerprints, [shape_data[0],num_time_steps,-1]) validation_fingerprints = np.transpose(validation_fingerprints,(1,0,2)) print('test_fingerprints shape After transform=',validation_fingerprints.shape) # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. validation_summary, validation_accuracy, conf_matrix = sess.run( [merged_summaries, evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: validation_fingerprints, ground_truth_input: validation_ground_truth, dropout_prob: 1.0 }) validation_writer.add_summary(validation_summary, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) # Save the model checkpoint periodically. if (training_step % FLAGS.save_step_interval == 0 or training_step == training_steps_max): checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step) set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess) test_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, dropout_prob: 1.0 }) if FLAGS.model_architecture == 'rnn' or FLAGS.model_architecture == 'rnn_s': #tf.reshape(test_fingerprints, [None,num_features,-1]) #tf.transpose(test_fingerprints,[2,0,1]) print('test_fingerprints shape before transform=',test_fingerprints.shape) shape_data = test_fingerprints.shape test_fingerprints = np.reshape(test_fingerprints, [shape_data[0],num_time_steps,-1]) test_fingerprints = np.transpose(test_fingerprints,(1,0,2)) print('test_fingerprints shape After transform=',test_fingerprints.shape) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (test_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100, set_size))
def train(self, model, dataset, re_epochs=0, re_steps=0, debug_NaN=False): """ Train the model on a particular dataset. """ if debug_NaN: # Add checking ops self.check_op = tf.add_check_numerics_ops() # Parameters log file if model.config.saving: model.parameters_log() # Save points of the kernel to file self.save_kernel_points(model, 0) if model.config.saving: # Training log file with open(join(model.saving_path, 'training.txt'), "w") as file: file.write( 'Steps out_loss reg_loss point_loss train_accuracy time memory\n' ) # Killing file (simply delete this file when you want to stop the training) if not exists(join(model.saving_path, 'running_PID.txt')): with open(join(model.saving_path, 'running_PID.txt'), "w") as file: file.write('Launched with PyCharm') # Train loop variables t0 = time.time() self.training_step = re_steps self.training_epoch = re_epochs print('restored at epoch: ', self.training_epoch) mean_dt = np.zeros(2) last_display = t0 self.training_preds = np.zeros(0) self.training_labels = np.zeros(0) epoch_n = 1 mean_epoch_n = 0 if self.training_epoch != 0: op = self.learning_rate.assign( tf.multiply(self.learning_rate, model.config.lr_decays[self.training_epoch])) print('setting learning rate') self.sess.run(op) # Initialise iterator with train data self.sess.run(dataset.train_init_op) # Start loop while self.training_epoch < model.config.max_epoch: try: # Run one step of the model. t = [time.time()] ops = [ self.train_op, model.output_loss, model.regularization_loss, model.offsets_loss, model.logits, model.labels, model.class_logits, model.inputs, #model.inputs['batch_ind'], self.accuracy ] # If NaN appears in a training, use this debug block if debug_NaN: all_values = self.sess.run( ops + [self.check_op] + list(dataset.flat_inputs), {model.dropout_prob: 0.5}) L_out, L_reg, L_p, probs, labels, acc = all_values[1:7] if np.isnan(L_reg) or np.isnan(L_out): input_values = all_values[8:] self.debug_nan(model, input_values, probs) a = 1 / 0 else: # Run normal #_, L_out, L_reg, L_p, probs, labels, class_prob, cloud_label, acc = self.sess.run(ops, {model.dropout_prob: 0.5}) _, L_out, L_reg, L_p, probs, labels, class_logits, inputs, acc = self.sess.run( ops, {model.dropout_prob: model.config.dropout_prob}) t += [time.time()] # Stack prediction for training confusion if model.config.network_model == 'classification': self.training_preds = np.hstack( (self.training_preds, np.argmax(probs, axis=1))) self.training_labels = np.hstack( (self.training_labels, labels)) t += [time.time()] # Average timing mean_dt = 0.95 * mean_dt + 0.05 * (np.array(t[1:]) - np.array(t[:-1])) # Console display (only one per second) if (t[-1] - last_display) > 1.0: last_display = t[-1] message = 'Step {:08d} L_out={:5.3f} L_reg={:5.3f} L_p={:5.3f} Acc={:4.2f} ' \ '---{:8.2f} ms/batch (Averaged)' print( message.format(self.training_step, L_out, L_reg, L_p, acc, 1000 * mean_dt[0], 1000 * mean_dt[1])) # Log file if model.config.saving: process = psutil.Process(os.getpid()) with open(join(model.saving_path, 'training.txt'), "a") as file: message = '{:d} {:.3f} {:.3f} {:.3f} {:.2f} {:.2f} {:.1f}\n' file.write( message.format(self.training_step, L_out, L_reg, L_p, acc, t[-1] - t0, process.memory_info().rss * 1e-6)) # Check kill signal (running_PID.txt deleted) if model.config.saving and not exists( join(model.saving_path, 'running_PID.txt')): break if model.config.dataset.startswith( 'ShapeNetPart') or model.config.dataset.startswith( 'ModelNet'): if model.config.epoch_steps and epoch_n > model.config.epoch_steps: raise tf.errors.OutOfRangeError(None, None, '') except tf.errors.OutOfRangeError: # End of train dataset, update average of epoch steps mean_epoch_n += (epoch_n - mean_epoch_n) / (self.training_epoch + 1) epoch_n = 0 self.int = int(np.floor(mean_epoch_n)) model.config.epoch_steps = int(np.floor(mean_epoch_n)) if model.config.saving: model.parameters_log() # Snapshot if model.config.saving and (self.training_epoch + 1 ) % model.config.snapshot_gap == 0: # Tensorflow snapshot snapshot_directory = join(model.saving_path, 'snapshots') if not exists(snapshot_directory): makedirs(snapshot_directory) self.saver.save(self.sess, snapshot_directory + '/snap', global_step=self.training_step + 1) # Save points self.save_kernel_points(model, self.training_epoch) # Update learning rate if self.training_epoch in model.config.lr_decays: op = self.learning_rate.assign( tf.multiply( self.learning_rate, model.config.lr_decays[self.training_epoch])) self.sess.run(op) # Increment self.training_epoch += 1 # Validation if model.config.network_model == 'classification': self.validation_error(model, dataset) elif model.config.network_model == 'segmentation': self.segment_validation_error(model, dataset) elif model.config.network_model == 'multi_segmentation': self.multi_validation_error(model, dataset) elif model.config.network_model == 'cloud_segmentation': self.cloud_validation_error(model, dataset) else: raise ValueError( 'No validation method implemented for this network type' ) self.training_preds = np.zeros(0) self.training_labels = np.zeros(0) # Reset iterator on training data self.sess.run(dataset.train_init_op) except tf.errors.InvalidArgumentError as e: print('Caught a NaN error :') print(e.error_code) print(e.message) print(e.op) print(e.op.name) print([t.name for t in e.op.inputs]) print([t.name for t in e.op.outputs]) a = 1 / 0 # Increment steps self.training_step += 1 epoch_n += 1 # Remove File for kill signal if exists(join(model.saving_path, 'running_PID.txt')): remove(join(model.saving_path, 'running_PID.txt')) self.sess.close()
fix_lengths=False, threshold=threshold) # lengths = tf.identity(lengths) grad, = tf.gradients(tf.boolean_mask(lengths, hit), scale_factor) with tf.Session(graph=graph) as sess: # l0, g = sess.run((lengths, grad)) # print(l0, g) l0, g0, h0 = sess.run((lengths, grad, hit)) graph = tf.Graph() with graph.as_default(): sdf, eye, directions, scale_factor = build_graph() # lengths, passed = render.get_linearized_solution_lengths( # eye, directions, l0, sdf) lengths = render.fix_length_gradient(eye, directions, l0, sdf) # lengths, passed = render.fix_length_gradient( # eye, directions, l0, sdf) # passed = tf.Print( # passed, # [tf.reduce_sum(tf.cast(tf.logical_and(h0, passed), tf.uint8))]) # hit = tf.logical_and(h0, tf.logical_not(passed)) lengths = tf.minimum(max_length, lengths) grad, = tf.gradients(tf.boolean_mask(lengths, h0), scale_factor) with tf.control_dependencies([tf.add_check_numerics_ops()]): grad = tf.identity(grad) # with tf.Session(graph=graph) as sess: l1, g = sess.run((lengths, grad)) print(np.max(np.abs(l0 - l1)), g, np.abs((g - g0) / g0))
def train_ichikawa_2(self, data=0, steps=-1, dropout=None, display_step=10, test_step=100, batch_size=10, resume=save_step, ckpt_name="SSL.ckpt", start_ckpt="ichikawa.ckpt"): # epochs=-1, print("learning_rate: %f" % self.learning_rate) if data: self.data = data steps = 9999999 if steps < 0 else steps session = self.session # with tf.device(_cpu): # t = tf.verify_tensor_all_finite(t, msg) tf.add_check_numerics_ops() self.summaries = tf.summary.merge_all() self.summary_writer = tf.summary.FileWriter(current_logdir(), session.graph) if not dropout: dropout = 1. # keep all x = self.x y = self.y keep_prob = self.keep_prob if not resume or not self.resume(session): session.run([tf.global_variables_initializer()]) saver_c = tf.train.Saver([ v for v in tf.all_variables() if not (v.name.startswith("model/prediction/Dense_10")) ]) saver = tf.train.Saver(tf.global_variables()) saver_c.restore(session, "ichikawa.ckpt") snapshot = self.name + str(get_last_tensorboard_run_nr()) step = 0 # show first #check = session.graph.get_tensor_by_name("model/conv/filters:0") #w_sh = tf.assign(check,tf.random_normal([3, 3, 1, 64])) #session.run(w_sh) w_0 = tf.Variable(tf.random_normal([3, 3, 1, 64])) w_1 = tf.Variable(tf.random_normal([3, 3, 64, 12])) w_2 = tf.Variable(tf.random_normal([1, 1, 76, 76])) w_3 = tf.Variable(tf.random_normal([3, 3, 76, 12])) w_4 = tf.Variable(tf.random_normal([1, 1, 88, 88])) w_5 = tf.Variable(tf.random_normal([3, 3, 88, 12])) session.run([tf.global_variables_initializer()]) saver_c.restore(session, "ichikawa.ckpt") check_0 = session.graph.get_tensor_by_name("model/conv/filters:0") w_sh_0 = tf.assign(w_0, check_0) w_s_0 = tf.assign(check_0, w_0) session.run(w_s_0) check_1 = session.graph.get_tensor_by_name("model/conv_1/filters:0") w_sh_1 = tf.assign(w_1, check_1) w_s_1 = tf.assign(check_1, w_1) session.run(w_sh_1) check_2 = session.graph.get_tensor_by_name("model/conv_2/filters:0") w_sh_2 = tf.assign(w_2, check_2) w_s_2 = tf.assign(check_2, w_2) session.run(w_sh_2) check_3 = session.graph.get_tensor_by_name("model/conv_3/filters:0") w_sh_3 = tf.assign(w_3, check_3) w_s_3 = tf.assign(check_3, w_3) session.run(w_sh_3) check_4 = session.graph.get_tensor_by_name("model/conv_4/filters:0") w_sh_4 = tf.assign(w_4, check_4) w_s_4 = tf.assign(check_4, w_4) session.run(w_sh_4) check_5 = session.graph.get_tensor_by_name("model/conv_5/filters:0") w_sh_5 = tf.assign(w_5, check_5) w_s_5 = tf.assign(check_5, w_5) session.run(w_sh_5) while step < steps: batch_xs, batch_ys = self.next_batch(batch_size, session) # batch_xs=np.array(batch_xs).reshape([-1]+self.input_shape) # print("step %d \r" % step)# end=' ') # tf.train.shuffle_batch_join(example_list, batch_size, capacity=min_queue_size + batch_size * 16, min_queue_size) # Fit training using batch data feed_dict = { x: batch_xs, y: batch_ys, keep_prob: dropout, self.train_phase: True } loss, _ = session.run([self.cost, self.optimize], feed_dict=feed_dict) session.run([w_s_0, w_s_1, w_s_2, w_s_3, w_s_4, w_s_5]) if step % display_step == 0: seconds = int(time.time()) - start # Calculate batch accuracy, loss feed = { x: batch_xs, y: batch_ys, keep_prob: 1., self.train_phase: False } acc = session.run(self.accuracy, feed_dict=feed) # acc, summary = session.run([self.accuracy, self.summaries], feed_dict=feed) # self.summary_writer.add_summary(summary, step) # only test summaries for smoother curve and SPEED! print("\rStep {:d} Loss= {:.6f} Accuracy= {:.3f} Time= {:d}s". format(step, loss, acc, seconds), end=' ') if str(loss) == "nan": return print( "\nLoss gradiant explosion, exiting!!!") # restore! if step % test_step == 0: self.test(step) if step % save_step == 0 and step > 0: print("SAVING snapshot %s" % snapshot) saver.save(session, checkpoint_dir + "/" + snapshot + ".ckpt", self.global_step) step += 1 print("\nOptimization Finished!") saver.save(session, ckpt_name) self.test(step, number=10000) # final test
def accuracy_test(self, data=0, steps=-1, dropout=None, display_step=10, test_step=100, batch_size=10, resume=save_step, ckpt_name="SSL.ckpt"): # epochs=-1, print("learning_rate: %f" % self.learning_rate) if data: self.data = data steps = 9999999 if steps < 0 else steps session = self.session # with tf.device(_cpu): # t = tf.verify_tensor_all_finite(t, msg) tf.add_check_numerics_ops() self.summaries = tf.summary.merge_all() self.summary_writer = tf.summary.FileWriter(current_logdir(), session.graph) if not dropout: dropout = 1. # keep all x = self.x y = self.y keep_prob = self.keep_prob if not resume or not self.resume(session): session.run([tf.global_variables_initializer()]) #saver_c = tf.train.Saver([v for v in tf.all_variables() if not(v.name.startswith("model/prediction/Dense_10"))]) saver = tf.train.Saver(tf.global_variables()) print(ckpt_name) saver.restore(session, ckpt_name) snapshot = self.name + str(get_last_tensorboard_run_nr()) step = 0 # show first while step < steps: batch_xs, batch_ys = self.next_batch(batch_size, session) # batch_xs=np.array(batch_xs).reshape([-1]+self.input_shape) # print("step %d \r" % step)# end=' ') # tf.train.shuffle_batch_join(example_list, batch_size, capacity=min_queue_size + batch_size * 16, min_queue_size) # Fit training using batch data feed_dict = { x: batch_xs, y: batch_ys, keep_prob: dropout, self.train_phase: True } #print(session.run([self.target,self.output],feed_dict=feed_dict)) #print(session.run([self.output],feed_dict=feed_dict)) output = session.run([self.output, self.target], feed_dict=feed_dict) #output = session.run(self.train_phase,feed_dict=feed_dict) print(output) if output[0][0][0] < output[0][0][1]: print("mae") else: print("usiro") #loss, _ = session.run([self.cost, self.optimize], feed_dict=feed_dict) #if step % display_step == 0: #seconds = int(time.time()) - start # Calculate batch accuracy, loss #feed = {x: batch_xs, y: batch_ys, keep_prob: 1., self.train_phase: False} #acc = session.run(self.accuracy, feed_dict=feed) # acc, summary = session.run([self.accuracy, self.summaries], feed_dict=feed) # self.summary_writer.add_summary(summary, step) # only test summaries for smoother curve and SPEED! #print("\rStep {:d} Loss= {:.6f} Accuracy= {:.3f} Time= {:d}s".format(step, loss, acc, seconds), end=' ') #if str(loss) == "nan": return print("\nLoss gradiant explosion, exiting!!!") # restore! #if step % test_step == 0: self.test(step) if step % save_step == 0 and step > 0: print("SAVING snapshot %s" % snapshot) saver.save(session, checkpoint_dir + "/" + snapshot + ".ckpt", self.global_step) step += 1 print("\nOptimization Finished!") #saver.save(session, ckpt_name) self.test(step, number=10000) # final test
def __init__(self, path_to_model): g = tf.Graph() with g.as_default(): train_attention = True initialize_random = False train_we = True with open( settings.BASE_DIR + str("/Prediction/ML_model/dataset/dataset_mcgm_clean/word_index_map_mcgm.pickle" ), "rb") as myFile: self.word_index_map = pickle.load(myFile, encoding='latin1') if not initialize_random: # load pre-trained word embedding. with open( settings.BASE_DIR + "/Prediction/ML_model/dataset/dataset_mcgm_clean/word_vectors_mcgm.pickle", "rb") as myFile: word_vectors = pickle.load(myFile, encoding='latin1') word_vectors = np.asarray(word_vectors).astype(np.float32) for i in range(len(word_vectors) - 1): word_vectors[i] /= (la.norm((word_vectors[i]))) # # for i in range(len(word_vectors) - 1): # print np.max(np.abs(word_vectors[i])) vocab_size = len(word_vectors) embedding_dim = 300 learning_rate = 1e-3 # decay_factor = 0.99 self.max_padded_sentence_length = 35 batch_size = 100 iterations = 200 highest_val_acc = 0 self.last_index = len(word_vectors) - 1 def init_weight(shape, name): initial = tf.truncated_normal(shape, stddev=0.1, name=name, dtype=tf.float32) return tf.Variable(initial) def init_bias(shape, name): initial = tf.truncated_normal(shape=shape, stddev=0.1, name=name, dtype=tf.float32) return tf.Variable(initial) if initialize_random: # Initial embedding initialized randomly embedding_init = tf.Variable(tf.truncated_normal( shape=[vocab_size, embedding_dim], stddev=0.1, dtype=tf.float32), trainable=train_we, name="word_embedding") else: # Initial embedding initialized by word2vec vectors embedding_init = tf.Variable(tf.constant( word_vectors, shape=[vocab_size, embedding_dim]), trainable=train_we, name="word_embedding") config = projector.ProjectorConfig() # It will hold tensor of size [batch_size, max_padded_sentence_length] self.X = tf.placeholder(tf.int32, [None, self.max_padded_sentence_length]) # Word embedding lookup word_embeddings = tf.nn.embedding_lookup(embedding_init, self.X) if train_attention: in_size = tf.shape(word_embeddings)[0] reshaped_w_e = tf.reshape( word_embeddings, [in_size * self.max_padded_sentence_length, embedding_dim]) print(reshaped_w_e) no_of_nurons_h1 = 512 Wa = init_weight([embedding_dim, no_of_nurons_h1], 'Wa') ba = init_bias([no_of_nurons_h1], 'ba') ya = tf.nn.relu(tf.matmul(reshaped_w_e, Wa) + ba) # Hidden layer of size 512 no_of_nurons_h2 = 512 Wa1 = init_weight([no_of_nurons_h1, no_of_nurons_h2], 'Wa1') ba1 = init_bias([no_of_nurons_h2], 'ba1') ya1 = tf.nn.relu(tf.matmul(ya, Wa1) + ba1) Wa2 = init_weight([no_of_nurons_h2, 1], 'Wa2') ba2 = init_bias([1], 'ba2') # Output layer of the neural network. ya2 = tf.matmul(ya1, Wa2) + ba2 attention_reshaped = tf.reshape( ya2, [in_size, self.max_padded_sentence_length]) attention_softmaxed = tf.nn.softmax(attention_reshaped) attention_expanded = tf.expand_dims(attention_softmaxed, axis=2) # Attention based weighted averaging of word vectors. sentence_embedding = tf.reduce_sum(tf.multiply( word_embeddings, attention_expanded), axis=1) else: # Simply Average out word embedding to create sentence embedding sentence_embedding = tf.reduce_mean(word_embeddings, axis=1) def get_batches(X, Y, bsize): for i in range(0, len(X) - bsize + 1, bsize): indices = slice(i, i + bsize) yield X[indices], Y[indices] input_layer_size = embedding_dim output_layer_size = 165 # Hidden layer of size 1024 no_of_nurons_h1 = 512 W = init_weight([input_layer_size, no_of_nurons_h1], 'W') b = init_bias([no_of_nurons_h1], 'b') y = tf.nn.relu(tf.matmul(sentence_embedding, W) + b) # Hidden layer of size 1024 no_of_nurons_h2 = 512 W1 = init_weight([no_of_nurons_h1, no_of_nurons_h2], 'W1') b1 = init_bias([no_of_nurons_h2], 'b1') y1 = tf.nn.relu(tf.matmul(y, W1) + b1) W2 = init_weight([no_of_nurons_h2, output_layer_size], 'W2') b2 = init_bias([output_layer_size], 'b2') # Output layer of the neural network. y2 = tf.matmul(y1, W2) + b2 # It will hold the true label for current batch y_ = tf.placeholder(tf.int32, shape=[None, output_layer_size]) check_op = tf.add_check_numerics_ops() loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=y2, labels=y_)) train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss) # train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) correct_prediction = tf.equal(tf.argmax(y2, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) self.probs = tf.nn.softmax(y2) predicted_lables = tf.argmax(self.probs, 1) correct_lables = tf.argmax(y_, 1) variables_names = [v.name for v in tf.trainable_variables()] init = tf.global_variables_initializer() self.sess = tf.Session() self.sess.run(init) saver = tf.train.Saver() # Restore the best model to calculate the test accuracy. saver.restore(self.sess, path_to_model)
print("len(outH1) %d"% len(outH1)) ####Optimizing print("building loss") logits3d = tf.stack(logits) loss = tf.reduce_mean(ctc.ctc_loss(logits3d, targetY, seqLengths)) out = tf.identity(loss, 'ctc_loss_mean') optimizer = tf.train.MomentumOptimizer(learningRate, momentum).minimize(loss) ####Evaluating print("building Evaluation") logitsMaxTest = tf.slice(tf.argmax(logits3d, 2), [0, 0], [seqLengths[0], 1]) predictions = tf.to_int32(ctc.ctc_beam_search_decoder(logits3d, seqLengths)[0][0]) reduced_sum = tf.reduce_sum(tf.edit_distance(predictions, targetY, normalize=False)) errorRate = reduced_sum / tf.to_float(tf.size(targetY.values)) check_op = tf.add_check_numerics_ops() print("done building graph") ####Run session with tf.Session(graph=graph) as session: try: merged = tf.summary.merge_all() except: merged = tf.summary.merge_all() try:writer = tf.summary.FileWriter("/tmp/basic_new", session.graph) except: writer = tf.summary.FileWriter("/tmp/basic_new", session.graph) try:saver = tf.train.Saver() # defaults to saving all variables except: print("tf.train.Saver() broken in tensorflow 0.12") saver = tf.train.Saver(tf.global_variables())# WTF stupid API breaking ckpt = tf.train.get_checkpoint_state('./checkpoints') start = 0
def model_fn(features, labels, params, mode, scope=None): embedding_size = params['embedding_size'] num_blocks = params['num_blocks'] vocab_size = params['vocab_size'] debug = params['debug'] story = features['story'] query = features['query'] batch_size = tf.shape(story)[0] normal_initializer = tf.random_normal_initializer(stddev=0.1) ones_initializer = tf.constant_initializer(1.0) # PReLU activations have their alpha parameters initialized to 1 # so they may be identity before training. activation = partial(prelu, initializer=ones_initializer) with tf.variable_scope(scope, 'EntityNetwork', initializer=normal_initializer): # Embeddings # The embedding mask forces the special "pad" embedding to zeros. embedding_params = tf.get_variable('embedding_params', [vocab_size, embedding_size]) embedding_mask = tf.constant([0 if i == 0 else 1 for i in range(vocab_size)], dtype=tf.float32, shape=[vocab_size, 1]) story_embedding = tf.nn.embedding_lookup(embedding_params * embedding_mask, story) query_embedding = tf.nn.embedding_lookup(embedding_params * embedding_mask, query) # Input Module encoded_story = get_input_encoding(story_embedding, ones_initializer, 'StoryEncoding') encoded_query = get_input_encoding(query_embedding, ones_initializer, 'QueryEncoding') # Memory Module # We define the keys outside of the cell so they may be used for state initialization. keys = [tf.get_variable('key_{}'.format(j), [embedding_size]) for j in range(num_blocks)] cell = DynamicMemoryCell(num_blocks, embedding_size, keys, initializer=normal_initializer, activation=activation) # Recurrence initial_state = cell.zero_state(batch_size, tf.float32) sequence_length = get_sequence_length(encoded_story) _, last_state = tf.nn.dynamic_rnn(cell, encoded_story, sequence_length=sequence_length, initial_state=initial_state) # Output Module output = get_output(last_state, encoded_query, num_blocks=num_blocks, vocab_size=vocab_size, initializer=normal_initializer, activation=activation) prediction = tf.argmax(output, 1) # Training loss = get_loss(output, labels, mode) train_op = get_train_op(loss, params, mode) if debug: tf.contrib.layers.summarize_tensor(sequence_length, 'sequence_length') tf.contrib.layers.summarize_tensor(encoded_story, 'encoded_story') tf.contrib.layers.summarize_tensor(encoded_query, 'encoded_query') tf.contrib.layers.summarize_tensor(last_state, 'last_state') tf.contrib.layers.summarize_tensor(output, 'output') tf.contrib.layers.summarize_variables() tf.add_check_numerics_ops() return prediction, loss, train_op
def set_model(self, model): self.model = model self.sess = K.get_session() self.check_num = tf.add_check_numerics_ops()
def train(args, model,data,val_data): dirname = 'save-vrnn/' if not os.path.exists(dirname): os.makedirs(dirname) with open(os.path.join(dirname, 'config.pkl'), 'w') as f: cPickle.dump(args, f) ckpt = tf.train.get_checkpoint_state(dirname) #check if there exists a previously trained model in the checkpoint Xtrain,ytrain = data Xval, yval = val_data shape1 = np.shape(Xtrain) df1 = pd.DataFrame(np.reshape(Xtrain,(shape1[0],-1))) shape2 = np.shape(ytrain) df2 = pd.DataFrame(np.reshape(ytrain,(shape2[0],-1))) print("\nXtrain") print(df1.describe()) print('\nytrain') print(df2.describe()) train = Iterator(Xtrain,ytrain,batch_size = args.batch_size,n_steps=args.seq_length,shape_diff=True) #to split data into batches n_batches = train.nbatches Xtrain,ytrain = train.get_split() #split validation data into batches validate = Iterator(Xval,yval,batch_size = args.batch_size,n_steps=args.seq_length,shape_diff=True) val_nbatches = validate.nbatches Xval, yval = validate.get_split() myFile = open(dirname+'/outputValidation.csv', 'w') writer = csv.writer(myFile) writer.writerows([["Epoch","Train_Loss","MAE","MSE"]]) mae = [] mse = [] with tf.Session() as sess: summary_writer = tf.summary.FileWriter('logs/' + datetime.now().isoformat().replace(':', '-'), sess.graph) check = tf.add_check_numerics_ops() merged = tf.summary.merge_all() tf.global_variables_initializer().run() #initialize all variables in the graph as defined saver = tf.train.Saver(tf.global_variables()) if ckpt: saver.restore(sess, ckpt.model_checkpoint_path) #restore previously saved model print "Loaded model" start = time.time() state_c = None state_h = None logs = [] for e in xrange(args.num_epochs): #assign learning rate sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) #get the initial state of lstm cell state = model.initial_state_c, model.initial_state_h mae.append([]) mse.append([]) prior_mean = [] ## phi_mean = [] ## if((e+1)%10 != 0): for b in xrange(n_batches): x = Xtrain[b] y = ytrain[b] feed = {model.input_x: x, model.input_y: y, model.target_data: y} # input data : x and y ; target data : y #train the model on this batch of data train_loss, _, cr, summary, sigma, mu, inp, target, state_c, state_h, pred, prior_mu, phi_mu = sess.run( [model.cost, model.train_op, check, merged, model.sigma, model.mu, model.flat_input, model.target, model.final_state_c, model.final_state_h, model.output, model.prior_mu, model.phi_mu], feed) ## prior_mean.append(prior_mu) ## phi_mean.append(phi_mu) ## summary_writer.add_summary(summary, e * n_batches + b) pred = np.concatenate(pred, axis=1) sigma = np.concatenate(sigma, axis=1) mu = np.concatenate(mu, axis=1) #the output from the model is in the shape [50000,1] reshape to 3D (batch_size, time_steps, n_app) pred = np.array(np.reshape(pred, [args.batch_size,args.seq_length,-1])).astype(float) label = np.array(y).astype(float) #compute mae and mse for the output mae_i = np.reshape(np.absolute((label - pred)),[-1,]).mean() mse_i = np.reshape((label - pred)**2,[-1,]).mean() mae[e].append(mae_i) mse[e].append(mse_i) #save the model after every 800 (monitoring_freq) epochs if (e * n_batches + b) % args.save_every == 0 and ((e * n_batches + b) > 0): checkpoint_path = os.path.join(dirname, 'model_'+str(args.num_epochs)+'_'+str(args.learning_rate)+'.ckpt') saver.save(sess, checkpoint_path, global_step=e * n_batches + b) print "model saved to {}".format(checkpoint_path) end = time.time() print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}" \ .format(e * n_batches + b, args.num_epochs * n_batches, e, args.chunk_samples * train_loss, end - start, sigma.mean(axis=0).mean(axis=0)) start = time.time() else: #pass validation data print("\nValidation Data\n") loss = 0 for b in xrange(val_nbatches): x = Xval[b] y = yval[b] feed = {model.input_x: x, model.input_y: y, model.target_data: y} # input data : x and y ; target data : y #train the model on this batch of data train_loss, cr, summary, sigma, mu, inp, target, state_c, state_h, pred = sess.run( [model.cost, check, merged, model.sigma, model.mu, model.flat_input, model.target, model.final_state_c, model.final_state_h, model.output], feed) loss += train_loss summary_writer.add_summary(summary, e * n_batches + b) pred = np.concatenate(pred, axis=1) sigma = np.concatenate(sigma, axis=1) mu = np.concatenate(mu, axis=1) #the output from the model is in the shape [50000,1] reshape to 3D (batch_size, time_steps, n_app) pred = np.array(np.reshape(pred, [args.batch_size,args.seq_length,-1])).astype(float) label = np.array(y).astype(float) #compute mae and mse for the output mae_i = np.reshape(np.absolute((label - pred)),[-1,]).mean() mse_i = np.reshape((label - pred)**2,[-1,]).mean() mae[e].append(mae_i) mse[e].append(mse_i) #save the model after every 800 (monitoring_freq) epochs if (e * n_batches + b) % args.save_every == 0 and ((e * n_batches + b) > 0): checkpoint_path = os.path.join(dirname, 'model_'+str(args.num_epochs)+'_'+str(args.learning_rate)+'.ckpt') saver.save(sess, checkpoint_path, global_step=e * n_batches + b) print "model saved to {}".format(checkpoint_path) end = time.time() print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}" \ .format(e * n_batches + b, args.num_epochs * n_batches, e, args.chunk_samples * train_loss, end - start, sigma.mean(axis=0).mean(axis=0)) start = time.time() logs.append([e,train_loss/val_nbatches,sum(mae[e])/len(mae[e]), sum(mse[e])/len(mse[e])]) #the average mae,mse values in every epoch print "Epoch {}, mae = {:.3f}, mse = {:.3f}".format(e, sum(mae[e])/len(mae[e]), sum(mse[e])/len(mse[e])) print("prior_mu_mean:",np.mean(prior_mean)) print("phi_mu_mean: ",np.mean(phi_mean)) writer.writerows(logs) #path to save the final model checkpoint_path = os.path.join(dirname, 'final_model_'+str(args.num_epochs)+'_'+str(args.learning_rate)+'.ckpt') saver2 = tf.train.Saver() saver2.save(sess, checkpoint_path) print "model saved to {}".format(checkpoint_path)
def main(_): # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() # Begin by making sure we have the training data we need. If you already have # training data of your own, use `--data_url= ` on the command line to avoid # downloading. model_settings = models.prepare_model_settings( len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) audio_processor = input_data.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) fingerprint_input = tf.placeholder( tf.float32, [None, fingerprint_size], name='fingerprint_input') logits, dropout_prob = models.create_model( fingerprint_input, model_settings, FLAGS.model_architecture, is_training=True) # Define loss and optimizer ground_truth_input = tf.placeholder( tf.float32, [None, label_count], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=ground_truth_input, logits=logits)) tf.summary.scalar('cross_entropy', cross_entropy_mean) with tf.name_scope('train'), tf.control_dependencies(control_dependencies): learning_rate_input = tf.placeholder( tf.float32, [], name='learning_rate_input') train_step = tf.train.GradientDescentOptimizer( learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) expected_indices = tf.argmax(ground_truth_input, 1) correct_prediction = tf.equal(predicted_indices, expected_indices) confusion_matrix = tf.confusion_matrix(expected_indices, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') tf.global_variables_initializer().run() start_step = 1 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) start_step = global_step.eval(session=sess) tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Save list of words. with gfile.GFile( os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) # Training loop. training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # Figure out what the current learning rate is. training_steps_sum = 0 for i in range(len(training_steps_list)): training_steps_sum += training_steps_list[i] if training_step <= training_steps_sum: learning_rate_value = learning_rates_list[i] break # Pull the audio samples we'll use for training. train_fingerprints, train_ground_truth = audio_processor.get_data( FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency, FLAGS.background_volume, time_shift_samples, 'training', sess) # Run the graph with this batch of training data. train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step ], feed_dict={ fingerprint_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, dropout_prob: 0.5 }) train_writer.add_summary(train_summary, training_step) tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = audio_processor.set_size('validation') total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'validation', sess)) # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. validation_summary, validation_accuracy, conf_matrix = sess.run( [merged_summaries, evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: validation_fingerprints, ground_truth_input: validation_ground_truth, dropout_prob: 1.0 }) validation_writer.add_summary(validation_summary, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) # Save the model checkpoint periodically. if (training_step % FLAGS.save_step_interval == 0 or training_step == training_steps_max): checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step) set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess) test_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, dropout_prob: 1.0 }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (test_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100, set_size))
def train(): dataset = facenet.get_dataset(FLAGS.data_dir) train_set, test_set = facenet.split_dataset(dataset, 0.9) """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Placeholder for input images images_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, 96, 96, 3), name='Input') # Build a Graph that computes the logits predictions from the inference model embeddings = facenet.inference_no_batch_norm_deeper(images_placeholder, tf.constant(True)) #embeddings = facenet.inference(images_placeholder, tf.constant(False)) # Split example embeddings into anchor, positive and negative #a, p, n = tf.split(0, 3, embeddings) # Calculate triplet loss loss = facenet.triplet_loss_modified(embeddings) # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op, grads = facenet.train(loss, global_step) # Create a saver saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() check_num = tf.add_check_numerics_ops() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)) sess.run(init) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) epoch = 1 with sess.as_default(): while epoch<FLAGS.max_nrof_epochs: batch_number = 0 while batch_number<FLAGS.epoch_size: print('Loading new data') image_data, num_per_class = facenet.load_data(train_set) print('Selecting suitable triplets for training') start_time = time.time() emb_list = [] # Run a forward pass for the sampled images nrof_examples_per_epoch = FLAGS.people_per_batch*FLAGS.images_per_person nrof_batches_per_epoch = int(np.floor(nrof_examples_per_epoch/FLAGS.batch_size)) #for i in xrange(nrof_batches_per_epoch): #feed_dict = facenet.get_batch(images_placeholder, image_data, i) #emb_list += sess.run([embeddings], feed_dict=feed_dict) #emb_array = np.vstack(emb_list) # Stack the embeddings to a nrof_examples_per_epoch x 128 matrix ## Select triplets based on the embeddings #apn, nrof_random_negs, nrof_triplets = facenet.select_triplets(emb_array, num_per_class, image_data) #duration = time.time() - start_time #print('(nrof_random_negs, nrof_triplets) = (%d, %d): time=%.3f seconds' % (nrof_random_negs, nrof_triplets, duration)) count = 0 # while count<nrof_triplets*3 and batch_number<FLAGS.epoch_size: while batch_number<FLAGS.epoch_size: start_time = time.time() # feed_dict = facenet.get_batch(images_placeholder, apn, batch_number) feed_dict = facenet.get_batch(images_placeholder, image_data, batch_number) grad_tensors, grad_vars = zip(*grads) grads_eval = sess.run(grad_tensors, feed_dict=feed_dict) for gt, gv in zip(grads_eval, grad_vars): print('%40s: %6d %6f %6f' % (gv.op.name, np.sum(np.isnan(gt)), np.max(gt), np.min(gt))) duration = time.time() - start_time print('Epoch: [%d][%d/%d]\tTime %.3f\ttripErr %2.3f' % (epoch, batch_number, FLAGS.epoch_size, duration, err)) batch_number+=1 count+=FLAGS.batch_size epoch+=1 # Save the model checkpoint periodically. checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=epoch*FLAGS.epoch_size+batch_number)