def train(self, data=0, steps=-1, dropout=None, display_step=10, test_step=200, batch_size=10,
	          do_resume=False):  # epochs=-1,
		if data: self.data = data
		steps = 9999999 if steps == -1 else steps
		session = self.session
		# with tf.device(_cpu):

		# import tensorflow.contrib.layers as layers
		# t = tf.verify_tensor_all_finite(t, msg)
		tf.add_check_numerics_ops()
		try:
			self.summaries = tf.summary.merge_all()
		except:
			self.summaries = tf.merge_all_summaries()
		try:
			self.summary_writer = tf.summary.FileWriter(current_logdir(), session.graph)  #
		except:
			self.summary_writer = tf.train.SummaryWriter(current_logdir(), session.graph)  #
		if not dropout: dropout = 1.  # keep all
		x = self.x
		y = self.y
		keep_prob = self.keep_prob
		try:
			saver = tf.train.Saver(tf.global_variables())
		except:
			saver = tf.train.Saver(tf.all_variables())
		snapshot = self.name + str(get_last_tensorboard_run_nr())
		checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
		if do_resume and checkpoint:
			print("LOADING " + checkpoint + " !!!")
			saver.restore(session, checkpoint)
		try:
			session.run([tf.global_variables_initializer()])
		except:
			session.run([tf.initialize_all_variables()])
		step = 0  # show first
		while step < steps:
			batch_xs, batch_ys = self.next_batch(batch_size, session)
			# print("step %d \r" % step)# end=' ')

			# tf.train.shuffle_batch_join(example_list, batch_size, capacity=min_queue_size + batch_size * 16, min_queue_size)
			# Fit training using batch data
			feed_dict = {x: batch_xs, y: batch_ys, keep_prob: dropout, self.train_phase: True}
			loss, _ = session.run([self.cost, self.optimizer], feed_dict=feed_dict)
			if step % display_step == 0:
				seconds = int(time.time()) - start
				# Calculate batch accuracy, loss
				feed = {x: batch_xs, y: batch_ys, keep_prob: 1., self.train_phase: False}
				acc, summary = session.run([self.accuracy, self.summaries], feed_dict=feed)
				# self.summary_writer.add_summary(summary, step) # only test summaries for smoother curve
				print("\rStep {:d} Loss= {:.6f} Accuracy= {:.3f} Time= {:d}s".format(step, loss, acc, seconds), end=' ')
				if str(loss) == "nan": return print("\nLoss gradiant explosion, exiting!!!")  # restore!
			if step % test_step == 0: self.test(step)
			if step % save_step == 0 and step > 0:
				print("SAVING snapshot %s" % snapshot)
				saver.save(session, checkpoint_dir + snapshot + ".ckpt", self.global_step)

			step += 1
		print("\nOptimization Finished!")
		self.test(step, number=10000)  # final test
Exemple #2
0
    def __init_output(self):
        with tf.variable_scope('output'):
            # Losses
            self.regularization_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
            self.cross_entropy_loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.y, name='loss'))
            self.loss = self.regularization_loss + self.cross_entropy_loss

            # Optimizer
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.args.learning_rate)
                self.train_op = self.optimizer.minimize(self.loss)
                # This is for debugging NaNs. Check TensorFlow documentation.
                self.check_op = tf.add_check_numerics_ops()

            # Output and Metrics
            self.y_out_softmax = tf.nn.softmax(self.logits)# softmax 归一化分类
            self.y_out_argmax = tf.argmax(self.y_out_softmax, axis=-1, output_type=tf.int32)# 最大值得到分类结果
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.y, self.y_out_argmax), tf.float32))#准确度
        # 记录参数
        with tf.name_scope('train-summary-per-iteration'):
            tf.summary.scalar('loss', self.loss)
            tf.summary.scalar('acc', self.accuracy)
            self.summaries_merged = tf.summary.merge_all()
Exemple #3
0
    def build_computation_graphs(self):
        self.model.declare_params(self.param_init_function)

        self.tf_nodes = {}
        to_build = {k:v for k, v in self.model_hypers_to_build_graph.iteritems() 
                        if k in self.data.get_hypers_names()}

        for model_hypers, build_graph in to_build.iteritems():
            print ("Construct forward graph... ", end="")

            forward_time_start = time.time()
            inputs, outputs = build_graph(self.model)
            loss, display_loss, output_placeholders, mask_placeholders, loss_nodes = \
                self.construct_loss(outputs)
            print ("done in %.2fs." % (time.time() - forward_time_start))

            optimizer = self.make_optimizer()

            gradient_time_start = time.time()
            print ("Construct gradient graph... ", end="")
            grads_and_vars = self.compute_gradients(optimizer, loss)
            print ("done in %.2fs." % (time.time() - gradient_time_start))

            gradient_apply_time_start = time.time()
            print ("Construct apply gradient graph... ", end="")
            train_op = self.apply_update(optimizer, grads_and_vars)
            print ("done in %.2fs." % (time.time() - gradient_apply_time_start))

            if self.do_debug:
                check_time_start = time.time()
                print ("Construct check numerics graph... ", end="")
                self.check_ops.append(tf.add_check_numerics_ops())
                print ("done in %.2fs." % (time.time() - check_time_start))

            if self.make_log:
                self.summary_nodes["train"] = tf.scalar_summary('train_loss', display_loss)
                self.summary_nodes["validate"] = tf.scalar_summary('validate_loss', display_loss)
                self.summary_nodes["params"] = []
                for p_name, p_node in self.model.params.iteritems():
                    n_elements = p_node.get_shape()[0].value
                    for i in range(n_elements):
                        self.summary_nodes["params"].append(
                            tf.scalar_summary('%s/%i' % (p_name, i), p_node[i]))


            placeholders = {}
            placeholders.update(inputs)
            placeholders.update(output_placeholders)
            placeholders.update(mask_placeholders)
            self.tf_nodes[model_hypers] = {
                "inputs": inputs,
                "outputs": outputs,
                "placeholders": placeholders,
                "loss_nodes": loss_nodes,
                "loss": loss,
                "display_loss": display_loss,
                "grads_and_vars": grads_and_vars,
                "train_op": train_op
            }
 def testBoth(self):
     with self.test_session(graph=tf.Graph()):
         t1 = tf.constant([1.0, 0.0])
         t2 = tf.constant([0.0, 0.0])
         a = tf.div(t1, t2)
         check = tf.add_check_numerics_ops()
         a = control_flow_ops.with_dependencies([check], a)
         with self.assertRaisesOpError("Inf and NaN"):
             a.eval()
Exemple #5
0
 def testNaN(self):
   for use_gpu in [True, False]:
     with self.test_session(use_gpu=use_gpu, graph=tf.Graph()):
       t1 = tf.constant(0.0)
       t2 = tf.constant(0.0)
       a = tf.div(t1, t2)
       check = tf.add_check_numerics_ops()
       a = control_flow_ops.with_dependencies([check], a)
       with self.assertRaisesOpError("NaN"):
         a.eval()
Exemple #6
0
def create_model(sess, dataset, forward_only):
  start_time = time.time()

  #initializer = tf.random_normal_initializer(0.0, 0.1)
  initializer = tf.random_uniform_initializer(-0.1, 0.1)
  with tf.variable_scope("model", initializer=initializer):
    model = LASModel(
        dataset, FLAGS.batch_size, FLAGS.features_width, FLAGS.features_len_max,
        FLAGS.vocab_size, FLAGS.embedding_size, FLAGS.tokens_len_max,
        FLAGS.encoder_cell_size, FLAGS.decoder_cell_size,
        FLAGS.attention_embedding_size, FLAGS.max_gradient_norm,
        FLAGS.learning_rate)

  tf.add_check_numerics_ops()
  sess.run(tf.initialize_all_variables())
  tf.train.start_queue_runners(sess=sess)

  print('create_model graph time %f' % (time.time() - start_time))

  return model
Exemple #7
0
def train(args):
    fnames = glob.glob('../mp3/*.mp3')[:1]
    traces = [util.loadf(fname) for fname in fnames]
    traces = np.hstack(traces)
    dirname = 'save-vrnn'
    if not os.path.exists(dirname):
      os.makedirs(dirname)

    with open(os.path.join(dirname, 'config.pkl'), 'w') as f:
      cPickle.dump(args, f)

    model = VRNN(args)
    # load previously trained model if applicable
    ckpt = tf.train.get_checkpoint_state(dirname)
    if ckpt:
      model.load_model(dirname)

    with tf.Session() as sess:
        summary_writer = tf.train.SummaryWriter('logs/'+datetime.now().isoformat().replace(':','-'), sess.graph)
        check = tf.add_check_numerics_ops()
        merged = tf.merge_all_summaries()
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        start = time.time()
        for e in xrange(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            state = model.initial_state
            for b in xrange(100):
                #t0 = np.random.randn(args.batch_size,1,(args.chunk_samples))
                #x = np.sin(2*np.pi*(np.arange(args.seq_length)[np.newaxis,:,np.newaxis]/30.+t0)) + np.random.randn(args.batch_size,args.seq_length,(args.chunk_samples))*0.1
                #y = np.sin(2*np.pi*(np.arange(1,args.seq_length+1)[np.newaxis,:,np.newaxis]/30.+t0)) + np.random.randn(args.batch_size,args.seq_length,(args.chunk_samples))*0.1
                if (e * 100 + b)%int(traces.shape[0]/(args.chunk_samples*args.batch_size)) == 0:
                    data, _, _ = util.load_augment_data(traces,args.chunk_samples)
                    print "Refreshed data"
                #x,y = next_batch(data,args)
                slopes = 10*np.random.random((1,1,2*args.chunk_samples))+1
                x,y = (slopes*np.arange(args.seq_length)[np.newaxis,:,np.newaxis])-1,(slopes*np.arange(args.seq_length)[np.newaxis,:,np.newaxis])
                y[:,:,args.chunk_samples:] = 0.
                x[:,:,args.chunk_samples:] = 0.
                feed = {model.input_data: x, model.target_data: y}
                train_loss, _, cr, summary, sigma = sess.run([model.cost, model.train_op, check, merged, model.sigma], feed)
                #train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                summary_writer.add_summary(summary, e * 100 + b)
                if (e * 100 + b) % args.save_every == 0 and ((e * 100 + b) > 0):
                    checkpoint_path = os.path.join('save', 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * 100 + b)
                    print "model saved to {}".format(checkpoint_path)
                end = time.time()
                print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}/{:.3f}" \
                    .format(e * 100 + b,
                            args.num_epochs * 100,
                            e, args.chunk_samples*train_loss, end - start, (sigma[:,200:]).mean(axis=0).mean(axis=0),(sigma[:,:200]).mean(axis=0).mean(axis=0))
                start = time.time()
Exemple #8
0
def train(args):
    fnames = glob.glob('../mp3/*01*.mp3')[:1]
    traces = [util.loadf(fname) for fname in fnames]
    with open(os.path.join('save', 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)

    model = Model(args)

    with tf.Session() as sess:
        summary_writer = tf.train.SummaryWriter('logs/'+datetime.now().isoformat().replace(':','-'), sess.graph)
        check = tf.add_check_numerics_ops()
        merged = tf.merge_all_summaries()
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        start = time.time()
        for e in xrange(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            state = model.initial_state.eval()
            for b in xrange(100):
                #t0 = np.random.randn(args.batch_size,1,(args.chunk_samples))
                #x = np.sin(2*np.pi*(np.arange(args.seq_length)[np.newaxis,:,np.newaxis]/30.+t0)) + np.random.randn(args.batch_size,args.seq_length,(args.chunk_samples))*0.1
                #y = np.sin(2*np.pi*(np.arange(1,args.seq_length+1)[np.newaxis,:,np.newaxis]/30.+t0)) + np.random.randn(args.batch_size,args.seq_length,(args.chunk_samples))*0.1
                if b%25 == 0:
                    data, _, _ = util.load_augment_data(traces[0],args.chunk_samples)
                x,y = next_batch(data,args)
                feed = {model.input_data: x, model.target_data: y, model.initial_state: state}
                train_loss, state, _, cr, summary, sigma = sess.run([model.cost, model.final_state, model.train_op, check, merged, model.sigma], feed)
                #train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                summary_writer.add_summary(summary, e * 100 + b)
                if (e * 100 + b) % args.save_every == 0 and ((e * 100 + b) > 0):
                    checkpoint_path = os.path.join('save', 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * 100 + b)
                    print "model saved to {}".format(checkpoint_path)
                end = time.time()
                print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}, std = {}" \
                    .format(e * 100 + b,
                            args.num_epochs * 100,
                            e, train_loss, end - start, np.sqrt(sigma).mean(axis=0).mean(axis=0))
                start = time.time()

            x,y = next_val_batch(data,args)
            feed = {model.input_data: x, model.target_data: y, model.initial_state: state}
            test_loss, state = sess.run([model.cost, model.final_state], feed)
            end = time.time()
            print ">> {}/{} (epoch {}), test_loss = {:.3f}, time/batch = {:.3f}" \
                .format(e * 100 + b,
                        args.num_epochs * 100,
                        e, test_loss, end - start)
            start = time.time()
Exemple #9
0
  def __init__(self, batch_size=1,
                z_dim=8, net_size = 384,
                learning_rate = 0.01, keep_prob = 1.0, loss_mode = 1, chunk_samples = 1024):
    """

    Args:
        sess: TensorFlow session
        batch_size: The size of batch. Should be specified before training.
        z_dim: (optional) Dimension of dim for Z. [20]
        net_size: number of nodes in each hidden layer
        keep_prob: dropout keep probability
        loss_mode: 1 -> "L2" or 2 -> "Bournoulli"
    """

    self.learning_rate = learning_rate
    self.batch_size = batch_size
    self.z_dim = z_dim
    self.net_size = net_size
    self.keep_prob = keep_prob
    self.loss_mode = loss_mode
    self.chunk_samples = chunk_samples

    self.x_dim = self.chunk_samples
    self.n_points = self.x_dim 

    # tf Graph batch of image (batch_size, height, width, depth)
    self.x_raw = tf.placeholder(tf.float32, [batch_size, self.chunk_samples])
    self.lamb = tf.placeholder(tf.float32, [])

    # distort raw data (decided in the end to leave this task to DataLoader class)
    self.x = self.x_raw

    # Create autoencoder network
    self._create_network()
    # Define loss function based variational upper-bound and
    # corresponding optimizer
    self._create_loss_optimizer()

    self.check = tf.add_check_numerics_ops()
    # Initializing the tensor flow variables
    init = tf.initialize_all_variables()

    # Launch the session
    self.sess = tf.InteractiveSession()
    self.sess.run(init)
    self.saver = tf.train.Saver(tf.all_variables())
Exemple #10
0
def optimize_elbo(node, steps=200, adam_rate=0.1, debug=False, return_session=False):
    """
    Convenience function to optimize an ELBO and return the breakdown of the final bound as well
    as the estimated posterior. 
    """
    
    elbo, sample_stochastic, decompose_elbo, inspect_posterior = construct_elbo(node)

    try:
        train_step = tf.train.AdamOptimizer(adam_rate).minimize(-elbo)
    except ValueError as e:
        print e
        steps = 0
                                
    init = tf.initialize_all_variables()

    if debug:
        debug_ops = tf.add_check_numerics_ops()

    
    sess = tf.Session()
    sess.run(init)
    for i in range(steps):
        fd = sample_stochastic()

        if debug:
            sess.run(debug_ops, feed_dict = fd)

        sess.run(train_step, feed_dict = fd)
        
        elbo_val = sess.run((elbo), feed_dict=fd)
        print i, elbo_val

        
    fd = sample_stochastic()    
    elbo_terms = decompose_elbo(sess, fd)
    posterior = inspect_posterior(sess, fd)

    if return_session:
        return elbo_terms, posterior, sess, fd
    else:
        sess.close()
        return elbo_terms, posterior
def train(args, model):
    dirname = 'save-vrnn'
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    with open(os.path.join(dirname, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)

    ckpt = tf.train.get_checkpoint_state(dirname)
    n_batches = 100
    with tf.Session() as sess:
        summary_writer = tf.summary.FileWriter('logs/' + datetime.now().isoformat().replace(':', '-'), sess.graph)
        check = tf.add_check_numerics_ops()
        merged = tf.summary.merge_all()
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        if ckpt:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print "Loaded model"
        start = time.time()
        for e in xrange(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            state = model.initial_state_c, model.initial_state_h
            for b in xrange(n_batches):
                x, y = next_batch(args)
                feed = {model.input_data: x, model.target_data: y}
                train_loss, _, cr, summary, sigma, mu, input, target= sess.run(
                        [model.cost, model.train_op, check, merged, model.sigma, model.mu, model.flat_input, model.target],
                                                             feed)
                summary_writer.add_summary(summary, e * n_batches + b)
                if (e * n_batches + b) % args.save_every == 0 and ((e * n_batches + b) > 0):
                    checkpoint_path = os.path.join(dirname, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=e * n_batches + b)
                    print "model saved to {}".format(checkpoint_path)
                end = time.time()
                print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}" \
                    .format(e * n_batches + b,
                            args.num_epochs * n_batches,
                            e, args.chunk_samples * train_loss, end - start, sigma.mean(axis=0).mean(axis=0))
                start = time.time()
    def train(self, adam_rate=0.1, steps=10000,
              print_interval=50, logdir=None,
              display_dict=None, sess=None):
        
        if display_dict is None or len(display_dict)==0:
            print_names = []
            print_vars = []
        else:
            print_names, print_vars = zip(*display_dict.items())
        print_names = ["elbo",] + list(print_names)
        print_vars = [self.elbo,] + list(print_vars)

        debug = tf.add_check_numerics_ops()
        train_step = tf.train.AdamOptimizer(adam_rate).minimize(-self.elbo)
        init = tf.initialize_all_variables()

        if sess is None:
            sess = tf.Session()

        if logdir is not None:
            merged = tf.merge_all_summaries()
            writer = tf.train.SummaryWriter(logdir, sess.graph_def)

        sess.run(init)
        for i in range(steps):
            fd = self.sample_stochastic_inputs()
            
            if i % print_interval == 0:
                print_vals  = sess.run(print_vars, feed_dict=fd)
                print_str = " ".join(["%s %.4f" % (n, v) for (n, v) in zip(print_names, print_vals)])
                print ("step %d " % i) + print_str

                if logdir is not None:
                    summary_str = sess.run(merged, feed_dict=fd)
                    writer.add_summary(summary_str, i)

            sess.run(debug, feed_dict=fd)
            sess.run(train_step, feed_dict = fd)
Exemple #13
0
  def build_model(self):
    """Defines the GP model.

    The loss is computed for partial feedback settings (bandits), so only
    the observed outcome is backpropagated (see weighted loss).
    Selects the optimizer and, finally, it also initializes the graph.
    """

    logging.info("Initializing model %s.", self.name)
    self.global_step = tf.train.get_or_create_global_step()

    # Define state for the model (inputs, etc.)
    self.x_train = tf.get_variable(
        "training_data",
        initializer=tf.ones(
            [self.hparams.batch_size, self.n_in], dtype=tf.float64),
        validate_shape=False,
        trainable=False)
    self.y_train = tf.get_variable(
        "training_labels",
        initializer=tf.zeros([self.hparams.batch_size, 1], dtype=tf.float64),
        validate_shape=False,
        trainable=False)
    self.weights_train = tf.get_variable(
        "weights_train",
        initializer=tf.ones(
            [self.hparams.batch_size, self.n_out], dtype=tf.float64),
        validate_shape=False,
        trainable=False)
    self.input_op = tf.assign(self.x_train, self.x_in, validate_shape=False)
    self.input_w_op = tf.assign(
        self.weights_train, self.weights, validate_shape=False)

    self.input_std = tf.get_variable(
        "data_standard_deviation",
        initializer=tf.ones([1, self.n_out], dtype=tf.float64),
        dtype=tf.float64,
        trainable=False)
    self.input_mean = tf.get_variable(
        "data_mean",
        initializer=tf.zeros([1, self.n_out], dtype=tf.float64),
        dtype=tf.float64,
        trainable=True)

    # GP Hyperparameters
    self.noise = tf.get_variable(
        "noise", initializer=tf.cast(0.0, dtype=tf.float64))
    self.amplitude = tf.get_variable(
        "amplitude", initializer=tf.cast(1.0, dtype=tf.float64))
    self.amplitude_linear = tf.get_variable(
        "linear_amplitude", initializer=tf.cast(1.0, dtype=tf.float64))
    self.length_scales = tf.get_variable(
        "length_scales", initializer=tf.zeros([1, self.n_in], dtype=tf.float64))
    self.length_scales_lin = tf.get_variable(
        "length_scales_linear",
        initializer=tf.zeros([1, self.n_in], dtype=tf.float64))

    # Latent embeddings of the different outputs for task covariance
    self.task_vectors = tf.get_variable(
        "latent_task_vectors",
        initializer=tf.random_normal(
            [self.n_out, self.task_latent_dim], dtype=tf.float64))

    # Normalize outputs across each dimension
    # Since we have different numbers of observations across each task, we
    # normalize by their respective counts.
    index_counts = self.atleast_2d(tf.reduce_sum(self.weights, axis=0),
                                   self.n_out)
    index_counts = tf.where(index_counts > 0, index_counts,
                            tf.ones(tf.shape(index_counts), dtype=tf.float64))
    self.mean_op = tf.assign(self.input_mean,
                             tf.reduce_sum(self.y, axis=0) / index_counts)
    self.var_op = tf.assign(
        self.input_std, tf.sqrt(1e-4 + tf.reduce_sum(tf.square(
            self.y - tf.reduce_sum(self.y, axis=0) / index_counts), axis=0)
                                / index_counts))

    with tf.control_dependencies([self.var_op]):
      y_normed = self.atleast_2d(
          (self.y - self.input_mean) / self.input_std, self.n_out)
      y_normed = self.atleast_2d(tf.boolean_mask(y_normed, self.weights > 0), 1)
    self.out_op = tf.assign(self.y_train, y_normed, validate_shape=False)

    # Observation noise
    alpha = tf.nn.softplus(self.noise) + 1e-6

    # Covariance
    with tf.control_dependencies([self.input_op, self.input_w_op, self.out_op]):
      self.self_cov = (self.cov(self.x_in, self.x_in) *
                       self.task_cov(self.weights, self.weights) +
                       tf.eye(tf.shape(self.x_in)[0], dtype=tf.float64) * alpha)

    self.chol = tf.cholesky(self.self_cov)
    self.kinv = tf.cholesky_solve(self.chol, tf.eye(tf.shape(self.x_in)[0],
                                                    dtype=tf.float64))

    self.input_inv = tf.Variable(
        tf.eye(self.hparams.batch_size, dtype=tf.float64),
        validate_shape=False,
        trainable=False)
    self.input_cov_op = tf.assign(self.input_inv, self.kinv,
                                  validate_shape=False)

    # Log determinant by taking the singular values along the diagonal
    # of self.chol
    with tf.control_dependencies([self.input_cov_op]):
      logdet = 2.0 * tf.reduce_sum(tf.log(tf.diag_part(self.chol) + 1e-16))

    # Log Marginal likelihood
    self.marginal_ll = -tf.reduce_sum(-0.5 * tf.matmul(
        tf.transpose(y_normed), tf.matmul(self.kinv, y_normed)) - 0.5 * logdet -
                                      0.5 * self.n * np.log(2 * np.pi))

    zero = tf.cast(0., dtype=tf.float64)
    one = tf.cast(1., dtype=tf.float64)
    standard_normal = tfd.Normal(loc=zero, scale=one)

    # Loss is marginal likelihood and priors
    self.loss = tf.reduce_sum(
        self.marginal_ll -
        (standard_normal.log_prob(self.amplitude) +
         standard_normal.log_prob(tf.exp(self.noise)) +
         standard_normal.log_prob(self.amplitude_linear) +
         tfd.Normal(loc=zero, scale=one * 10.).log_prob(
             self.task_vectors))
    )

    # Optimizer for hyperparameters
    optimizer = tf.train.AdamOptimizer(learning_rate=self.hparams.lr)
    vars_to_optimize = [
        self.amplitude, self.length_scales, self.length_scales_lin,
        self.amplitude_linear, self.noise, self.input_mean
    ]

    if self.learn_embeddings:
      vars_to_optimize.append(self.task_vectors)
    grads = optimizer.compute_gradients(self.loss, vars_to_optimize)
    self.train_op = optimizer.apply_gradients(grads,
                                              global_step=self.global_step)

    # Predictions for test data
    self.y_mean, self.y_pred = self.posterior_mean_and_sample(self.x)

    # create tensorboard metrics
    self.create_summaries()
    self.summary_writer = tf.summary.FileWriter("{}/graph_{}".format(
        FLAGS.logdir, self.name), self.sess.graph)
    self.check = tf.add_check_numerics_ops()
Exemple #14
0
def main(_):
  # We want to see all the logging messages for this tutorial.
  tf.logging.set_verbosity(tf.logging.INFO)

  # Start a new TensorFlow session.
  sess = tf.InteractiveSession()

  # Begin by making sure we have the training data we need. If you already have
  # training data of your own, use `--data_url= ` on the command line to avoid
  # downloading.
  model_settings = models.prepare_model_settings(
      len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))),
      FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
      FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
  audio_processor = input_data.AudioProcessor(
      FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage,
      FLAGS.unknown_percentage,
      FLAGS.wanted_words.split(','), FLAGS.validation_percentage,
      FLAGS.testing_percentage, model_settings)
  fingerprint_size = model_settings['fingerprint_size']
  label_count = model_settings['label_count']
  time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000)
  # Figure out the learning rates for each training phase. Since it's often
  # effective to have high learning rates at the start of training, followed by
  # lower levels towards the end, the number of steps and learning rates can be
  # specified as comma-separated lists to define the rate at each stage. For
  # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001
  # will run 13,000 training loops in total, with a rate of 0.001 for the first
  # 10,000, and 0.0001 for the final 3,000.
  training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
  learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
  if len(training_steps_list) != len(learning_rates_list):
    raise Exception(
        '--how_many_training_steps and --learning_rate must be equal length '
        'lists, but are %d and %d long instead' % (len(training_steps_list),
                                                   len(learning_rates_list)))

  fingerprint_input = tf.placeholder(
      tf.float32, [None, fingerprint_size], name='fingerprint_input')

  logits, dropout_prob = models.create_model(
      fingerprint_input,
      model_settings,
      FLAGS.model_architecture,
      FLAGS.model_size_info,
      is_training=True)

  # Define loss and optimizer
  ground_truth_input = tf.placeholder(
      tf.float32, [None, label_count], name='groundtruth_input')

  # Optionally we can add runtime checks to spot when NaNs or other symptoms of
  # numerical errors start occurring during training.
  control_dependencies = []
  if FLAGS.check_nans:
    checks = tf.add_check_numerics_ops()
    control_dependencies = [checks]

  # Create the back propagation and training evaluation machinery in the graph.
  with tf.name_scope('cross_entropy'):
    cross_entropy_mean = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            labels=ground_truth_input, logits=logits))
  tf.summary.scalar('cross_entropy', cross_entropy_mean)

  update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
  with tf.name_scope('train'), tf.control_dependencies(update_ops), tf.control_dependencies(control_dependencies):
    learning_rate_input = tf.placeholder(
        tf.float32, [], name='learning_rate_input')
    train_op = tf.train.AdamOptimizer(
        learning_rate_input)
    train_step = slim.learning.create_train_op(cross_entropy_mean, train_op)
#    train_step = tf.train.GradientDescentOptimizer(
#        learning_rate_input).minimize(cross_entropy_mean)
  predicted_indices = tf.argmax(logits, 1)
  expected_indices = tf.argmax(ground_truth_input, 1)
  correct_prediction = tf.equal(predicted_indices, expected_indices)
  confusion_matrix = tf.confusion_matrix(
      expected_indices, predicted_indices, num_classes=label_count)
  evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  tf.summary.scalar('accuracy', evaluation_step)

  global_step = tf.train.get_or_create_global_step()
  increment_global_step = tf.assign(global_step, global_step + 1)

  saver = tf.train.Saver(tf.global_variables())

  # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
  merged_summaries = tf.summary.merge_all()
  train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                       sess.graph)
  validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation')

  tf.global_variables_initializer().run()

  # Parameter counts
  params = tf.trainable_variables()
  num_params = sum(map(lambda t: np.prod(tf.shape(t.value()).eval()), params))
  print('Total number of Parameters: ', num_params)

  start_step = 1

  if FLAGS.start_checkpoint:
    models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint)
    start_step = global_step.eval(session=sess)

  tf.logging.info('Training from step: %d ', start_step)

  # Save graph.pbtxt.
  tf.train.write_graph(sess.graph_def, FLAGS.train_dir,
                       FLAGS.model_architecture + '.pbtxt')

  # Save list of words.
  with gfile.GFile(
      os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'),
      'w') as f:
    f.write('\n'.join(audio_processor.words_list))

  # Training loop.
  best_accuracy = 0
  training_steps_max = np.sum(training_steps_list)
  for training_step in xrange(start_step, training_steps_max + 1):
    # Figure out what the current learning rate is.
    training_steps_sum = 0
    for i in range(len(training_steps_list)):
      training_steps_sum += training_steps_list[i]
      if training_step <= training_steps_sum:
        learning_rate_value = learning_rates_list[i]
        break
    # Pull the audio samples we'll use for training.
    train_fingerprints, train_ground_truth = audio_processor.get_data(
        FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency,
        FLAGS.background_volume, time_shift_samples, 'training', sess)
    
    # train_std = 11.558333964158848 
    # train_mean = -1.5683672671004598
    # train_fingerprints = (train_fingerprints - train_mean)/train_std
    # train_fingerprints += 1
    
    # Run the graph with this batch of training data.
    train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
        [
            merged_summaries, evaluation_step, cross_entropy_mean, train_step,
            increment_global_step
        ],
        feed_dict={
            fingerprint_input: train_fingerprints,
            ground_truth_input: train_ground_truth,
            learning_rate_input: learning_rate_value,
            dropout_prob: 1.0
        })
    train_writer.add_summary(train_summary, training_step)
    tf.logging.info('Step #%d: rate %f, accuracy %.2f%%, cross entropy %f' %
                    (training_step, learning_rate_value, train_accuracy * 100,
                     cross_entropy_value))
    is_last_step = (training_step == training_steps_max)
    if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step:
      set_size = audio_processor.set_size('validation')
      total_accuracy = 0
      total_conf_matrix = None
      for i in xrange(0, set_size, FLAGS.batch_size):
        validation_fingerprints, validation_ground_truth = (
            audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0,
            0.0, 0, 'validation', sess))
        # val_std = 20.91701306351207 
        # val_mean = -3.0561562801250295
        # validation_fingerprints = (validation_fingerprints - val_mean)/val_std
        # Run a validation step and capture training summaries for TensorBoard
        # with the `merged` op.
        validation_summary, validation_accuracy, conf_matrix = sess.run(
            [merged_summaries, evaluation_step, confusion_matrix],
            feed_dict={
                fingerprint_input: validation_fingerprints,
                ground_truth_input: validation_ground_truth,
                dropout_prob: 1.0
            })
        validation_writer.add_summary(validation_summary, training_step)
        batch_size = min(FLAGS.batch_size, set_size - i)
        total_accuracy += (validation_accuracy * batch_size) / set_size
        if total_conf_matrix is None:
          total_conf_matrix = conf_matrix
        else:
          total_conf_matrix += conf_matrix
      tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
      tf.logging.info('Step %d: Validation accuracy = %.2f%% (N=%d)' %
                      (training_step, total_accuracy * 100, set_size))

      # Save the model checkpoint when validation accuracy improves
      if total_accuracy > best_accuracy:
        best_accuracy = total_accuracy
        checkpoint_path = os.path.join(FLAGS.train_dir, 'best',
                                       FLAGS.model_architecture + '_'+ str(int(best_accuracy*10000)) + '.ckpt')
        tf.logging.info('Saving best model to "%s-%d"', checkpoint_path, training_step)
        saver.save(sess, checkpoint_path, global_step=training_step)
      tf.logging.info('So far the best validation accuracy is %.2f%%' % (best_accuracy*100))

  set_size = audio_processor.set_size('testing')
  tf.logging.info('set_size=%d', set_size)
  total_accuracy = 0
  total_conf_matrix = None
  for i in xrange(0, set_size, FLAGS.batch_size):
    test_fingerprints, test_ground_truth = audio_processor.get_data(
        FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess)
    test_accuracy, conf_matrix = sess.run(
        [evaluation_step, confusion_matrix],
        feed_dict={
            fingerprint_input: test_fingerprints,
            ground_truth_input: test_ground_truth,
            dropout_prob: 1.0
        })
    batch_size = min(FLAGS.batch_size, set_size - i)
    total_accuracy += (test_accuracy * batch_size) / set_size
    if total_conf_matrix is None:
      total_conf_matrix = conf_matrix
    else:
      total_conf_matrix += conf_matrix
  tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
  tf.logging.info('Final test accuracy = %.2f%% (N=%d)' % (total_accuracy * 100,
                                                           set_size))
def main(_):
  best_acc = 0
  best_step = 0
  best_acc_istrain = 0
  best_step_istrain = 0
  # We want to see all the logging messages for this tutorial.
  tf.logging.set_verbosity(tf.logging.INFO)

  # Start a new TensorFlow session.
  sess = tf.InteractiveSession()

  # Begin by making sure we have the training data we need. If you already have
  # training data of your own, use `--data_url= ` on the command line to avoid
  # downloading.
  model_settings = models.prepare_model_settings(
      len(input_data_filler.prepare_words_list_my(FLAGS.wanted_words.split(','))),
      FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
      FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
  audio_processor = input_data_filler.AudioProcessor(
      FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage,
      FLAGS.unknown_percentage,
      FLAGS.wanted_words.split(','), FLAGS.validation_percentage,
      FLAGS.testing_percentage, model_settings)
  fingerprint_size = model_settings['fingerprint_size']
  label_count = model_settings['label_count']
  time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000)
  # Figure out the learning rates for each training phase. Since it's often
  # effective to have high learning rates at the start of training, followed by
  # lower levels towards the end, the number of steps and learning rates can be
  # specified as comma-separated lists to define the rate at each stage. For
  # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001
  # will run 13,000 training loops in total, with a rate of 0.001 for the first
  # 10,000, and 0.0001 for the final 3,000.
  training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
  learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
  if len(training_steps_list) != len(learning_rates_list):
    raise Exception(
        '--how_many_training_steps and --learning_rate must be equal length '
        'lists, but are %d and %d long instead' % (len(training_steps_list),
                                                   len(learning_rates_list)))
##############################################
  ############tensorflow modules##########

  fingerprint_input = tf.placeholder(
      tf.float32, [None, fingerprint_size], name='fingerprint_input')

  # ############ 模型创建 ##########
  istrain = tf.placeholder(tf.bool, name='istrain')
  logits= models.create_model(
      fingerprint_input,
      model_settings,
      FLAGS.model_architecture,
      is_training=istrain)
  ############ 模型创建 ##########
  # logits, dropout_prob= models.create_model(
  #     fingerprint_input,
  #     model_settings,
  #     FLAGS.model_architecture,
  #     is_training=True)
  # Define loss and optimizer

  ############ 真实值 ##########
  ground_truth_input = tf.placeholder(
      tf.float32, [None, label_count], name='groundtruth_input')

  # Optionally we can add runtime checks to spot when NaNs or other symptoms of
  # numerical errors start occurring during training.
  control_dependencies = []
  if FLAGS.check_nans:
    checks = tf.add_check_numerics_ops()
    control_dependencies = [checks]

  # Create the back propagation and training evaluation machinery in the graph.
  ############ 交叉熵计算 ##########
  # with tf.name_scope('cross_entropy'):
  #   cross_entropy_mean = tf.reduce_mean(
  #       tf.nn.softmax_cross_entropy_with_logits(
  #           labels=ground_truth_input, logits=logits)) + beta*loss_norm
  with tf.name_scope('cross_entropy'):
    cross_entropy_mean = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            labels=ground_truth_input, logits=logits))
  tf.summary.scalar('cross_entropy', cross_entropy_mean)

  ############ 学习率、准确率、混淆矩阵 ##########
  # learning_rate_input    学习率输入(tf.placeholder)
  # train_step             训练过程 (优化器)
  # predicted_indices      预测输出索引
  # expected_indices       实际希望输出索引
  # correct_prediction     正确预测矩阵
  # confusion_matrix       混淆矩阵
  # evaluation_step        正确分类概率(每个阶段)
  # global_step            全局训练阶段
  # increment_global_step  全局训练阶段递增

  learning_rate_input = tf.placeholder(
      tf.float32, [], name='learning_rate_input')
  update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
  with tf.control_dependencies(update_ops):
    train_step = tf.train.AdamOptimizer(
        learning_rate_input).minimize(cross_entropy_mean)
  # with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
  #   learning_rate_input = tf.placeholder(
  #       tf.float32, [], name='learning_rate_input')
  #  # train_step = tf.train.GradientDescentOptimizer(
  #     #  learning_rate_input).minimize(cross_entropy_mean)
  #   with tf.control_dependencies(update_ops):
  #       train_step = tf.train.AdamOptimizer(
  #           learning_rate_input).minimize(cross_entropy_mean)
  predicted_indices = tf.argmax(logits, 1)
  expected_indices = tf.argmax(ground_truth_input, 1)
  correct_prediction = tf.equal(predicted_indices, expected_indices)
  confusion_matrix = tf.confusion_matrix(
      expected_indices, predicted_indices, num_classes=label_count)
  evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  acc = tf.summary.scalar('accuracy', evaluation_step)

  global_step = tf.train.get_or_create_global_step()
  increment_global_step = tf.assign(global_step, global_step + 1)


  saver = tf.train.Saver(tf.global_variables(),max_to_keep=None)# max keep file // moren 5

  # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
  merged_summaries = tf.summary.merge_all()
  validation_merged_summaries = tf.summary.merge([tf.get_collection(tf.GraphKeys.SUMMARIES,'accuracy'),tf.get_collection(tf.GraphKeys.SUMMARIES,'cross_entropy')])
  test_summaries = tf.summary.merge([acc])
  test_summaries_istrain = tf.summary.merge([acc])
  train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                       sess.graph)
  validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation')
  test_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test')
  test_istrain_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test_istrain')
  tf.global_variables_initializer().run()

  start_step = 1

  if FLAGS.start_checkpoint:
    models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint)
    start_step = global_step.eval(session=sess)

  tf.logging.info('Training from step: %d ', start_step)

  # Save graph.pbtxt.
  tf.train.write_graph(sess.graph_def, FLAGS.train_dir,
                       FLAGS.model_architecture + '.pbtxt')

  # Save list of words.
  with gfile.GFile(
      os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'),
      'w') as f:
    f.write('\n'.join(audio_processor.words_list))
###
  # model1: fc
  # model2: conv :940k个parameter
  # model3:low_latancy_conv:~~model1
  # model4: 750k
  # Training loop.
    #############################################
    ########            主循环              ######
    #############################################
  training_steps_max = np.sum(training_steps_list)
  for training_step in xrange(start_step, training_steps_max + 1):
    # Figure out what the current learning rate is.
    #######       自动切换学习率      #######
    if training_step <12000+1:
        learning_rate_value = learning_rates_list[0]*0.02**(training_step/12000)
    else:
        learning_rate_value = learning_rates_list[0]*0.02    #0.015 12000
    training_steps_sum = 0
    # for i in range(len(training_steps_list)):
    #   training_steps_sum += training_steps_list[i]
    #   if training_step <= training_steps_sum:
    #     learning_rate_value = learning_rates_list[i]
    #     break

    # Pull the audio samples we'll use for training.
    #######       audio处理器导入数据      ##################################
    ##get_data(self, how_many, offset, model_settings, background_frequency,
    ##         background_volume_range, time_shift, mode, sess)
    ########################################################################
    train_fingerprints, train_ground_truth = audio_processor.get_data_my(
        FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency,
        FLAGS.background_volume, time_shift_samples, 'training', sess)
    #mid = np.abs(np.max(train_fingerprints) + np.min(train_fingerprints)) / 2
    #half = np.max(train_fingerprints) - np.min(train_fingerprints)
    #train_fingerprints = ((train_fingerprints + mid) / half * 255).astype(int)
    ####    输入归一化   ####
    # train_fingerprints=input_normalization(train_fingerprints)
    # Run the graph with this batch of training data.
    train_fingerprints = np_round_and_clip(train_fingerprints)

    train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
        [
            merged_summaries, evaluation_step, cross_entropy_mean, train_step,
            increment_global_step
        ],
        feed_dict={
            fingerprint_input: train_fingerprints,
            ground_truth_input: train_ground_truth,
            learning_rate_input: learning_rate_value,
            istrain:True
        })
    train_writer.add_summary(train_summary, training_step)
    tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' %
                    (training_step, learning_rate_value, train_accuracy * 100,
                     cross_entropy_value))
    is_last_step = (training_step == training_steps_max)
    if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step:
      set_size = audio_processor.set_size('validation')
      total_accuracy = 0
      total_conf_matrix = None
      #############################################
      ########交叉验证集重复计算正确率和混淆矩阵######
      for i in xrange(0, set_size, FLAGS.batch_size):
        validation_fingerprints, validation_ground_truth = (
            audio_processor.get_data_my(FLAGS.batch_size, i, model_settings, 0.0,
                                     0.0, 0, 'validation', sess))
        #mid = np.abs(np.max(validation_fingerprints) + np.min(validation_fingerprints)) / 2
       # half = np.max(validation_fingerprints) - np.min(validation_fingerprints)
        #validation_fingerprints = ((validation_fingerprints + mid) / half * 255).astype(int)
        # ####    输入归一化   ####
        # validation_fingerprints = input_normalization(validation_fingerprints)
        # Run a validation step and capture training summaries for TensorBoard
        # with the `merged` op.
        validation_fingerprints = np_round_and_clip(validation_fingerprints)

        validation_summaries, validation_accuracy, conf_matrix = sess.run(
            [validation_merged_summaries, evaluation_step, confusion_matrix],
            feed_dict={
                fingerprint_input: validation_fingerprints,
                ground_truth_input: validation_ground_truth,
                istrain: True
            })
        validation_writer.add_summary(validation_summaries, training_step)
        batch_size = min(FLAGS.batch_size, set_size - i)
        total_accuracy += (validation_accuracy * batch_size) / set_size
        if total_conf_matrix is None:
          total_conf_matrix = conf_matrix
        else:
          total_conf_matrix += conf_matrix

      tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
      tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' %
                      (training_step, total_accuracy * 100, set_size))

      #############################################
      ########  测试集重复计算正确率和混淆矩阵  ######
      set_size = audio_processor.set_size('testing')
      tf.logging.info('set_size=%d', set_size)
      test_fingerprints, test_ground_truth = audio_processor.get_data_my(
        -1, 0, model_settings, 0.0, 0.0, 0, 'testing', sess)
      #mid = np.abs(np.max(test_fingerprints) + np.min(test_fingerprints)) / 2
      #half = np.max(test_fingerprints) - np.min(test_fingerprints)
      #test_fingerprints = ((test_fingerprints + mid) / half * 255).astype(int)
      test_fingerprints = np_round_and_clip(test_fingerprints)

      final_summary,test_accuracy, conf_matrix = sess.run(
          [test_summaries,evaluation_step, confusion_matrix],
          feed_dict={
              fingerprint_input: test_fingerprints,
              ground_truth_input: test_ground_truth,
              istrain : False
          })
      final_summary_istrain,test_accuracy_istrain= sess.run(
          [test_summaries_istrain,evaluation_step],
          feed_dict={
              fingerprint_input: test_fingerprints,
              ground_truth_input: test_ground_truth,
              istrain : True
          })
      if test_accuracy > best_acc:
          best_acc = test_accuracy
          best_step = training_step
      if test_accuracy_istrain > best_acc_istrain:
          best_acc_istrain = test_accuracy_istrain
          best_step_istrain = training_step
      test_writer.add_summary(final_summary, training_step)
      test_istrain_writer.add_summary(final_summary_istrain, training_step)
      tf.logging.info('Confusion Matrix:\n %s' % (conf_matrix))
      tf.logging.info('test accuracy = %.1f%% (N=%d)' % (test_accuracy * 100,6882))
      tf.logging.info('test_istrain accuracy = %.1f%% (N=%d)' % (test_accuracy_istrain * 100,6882))

      tf.logging.info('Best test accuracy before now = %.1f%% (N=%d)' % (best_acc * 100,6882) + '  at step of ' + str(best_step))
      tf.logging.info('Best test_istrain accuracy before now = %.1f%% (N=%d)' % (best_acc_istrain * 100,6882) + '  at step of ' + str(best_step_istrain))
    # Save the model checkpoint periodically.
    if (training_step % FLAGS.save_step_interval == 0 or
        training_step == training_steps_max):
      checkpoint_path = os.path.join(FLAGS.train_dir + '/'+FLAGS.model_architecture,
                                     FLAGS.model_architecture + '.ckpt')
      tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step)
      saver.save(sess, checkpoint_path, global_step=training_step)
    print_line = 'Best test accuracy before now = %.1f%% (N=%d)' % (best_acc * 100,6882) + '  at step of ' + str(best_step) + '\n' + \
                 'Best test_istrain accuracy before now = %.1f%% (N=%d)' % (best_acc_istrain * 100,6882) + '  at step of ' + str(best_step_istrain)
    if training_step == training_steps_max:
        with open(FLAGS.train_dir + '/' +FLAGS.model_architecture+ '/details.txt', 'w') as f:
            f.write(print_line)
Exemple #16
0
def train():
  """Train CIFAR-10 for a number of steps."""

  with tf.Graph().as_default():
    global_step = tf.contrib.framework.get_or_create_global_step()

    # Get images and labels for CIFAR-10.
    # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
    # GPU and resulting in a slow down.
    with tf.device('/cpu:0'):
      images, ratioImages, labels = gl.inputs(False)
    check_op = tf.add_check_numerics_ops()
    # Build a Graph that computes the logits predictions from the
    # inference model.
    a = tf.Print(images.shape, [images.shape])
    logits = gl.inference(images, ratioImages)

    # Calculate loss.
    # loss = gl.loss_2(logits, labels)
    loss = gl.loss_depart(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = gl.train(loss, global_step)

    class _LoggerHook(tf.train.SessionRunHook):
      """Logs loss and runtime."""

      def begin(self):
        self._step = -1
        self._start_time = time.time()

      def before_run(self, run_context):
        self._step += 1
        return tf.train.SessionRunArgs(loss)  # Asks for loss value.

      def after_run(self, run_context, run_values):
        if self._step % FLAGS.log_frequency == 0:
          current_time = time.time()
          duration = current_time - self._start_time
          self._start_time = current_time

          loss_value = run_values.results
          examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
          sec_per_batch = float(duration / FLAGS.log_frequency)

          format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
          print (format_str % (datetime.now(), self._step, loss_value,
                               examples_per_sec, sec_per_batch))

    with tf.train.MonitoredTrainingSession(
        checkpoint_dir=FLAGS.train_dir,
        hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
               tf.train.NanTensorHook(loss),
               _LoggerHook()],
        config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement)) as mon_sess:
      buffer_labels = []
      buffer_logits = []
      counter = 0;
      conv1_buffer = []
      conv2_buffer = []
      conv3_buffer = []
      conv4_buffer = []
      while not mon_sess.should_stop():
#        mon_sess = tfdbg.LocalCLIDebugWrapperSession(mon_sess)
#        mon_sess.add_tensor_filter("has_inf_or_nan", tfdbg.has_nan_or_inf)
        _0, _1, np_labels, np_logits = mon_sess.run([train_op, check_op, labels, logits])
        # print(conv1.shape, conv2.shape, conv3.shape, conv4.shape)
        # conv1, conv2, conv3, conv4 = mon_sess.run([conv1, conv2, conv3, conv4])
        buffer_labels.append(np_labels)
        buffer_logits.append(np_logits)
        # conv1_buffer.append(conv1)
        # conv2_buffer.append(conv2)
        # conv3_buffer.append(conv3)
        # conv4_buffer.append(conv4)
        # counter = counter+1
        # if counter%10==0:
        #   f_index = int(counter/1000)
        #   np.save('train_playground/conv1/%d.npy'%(f_index), np.array(conv1_buffer))
        #   np.save('train_playground/conv2/%d.npy'%(f_index), np.array(conv2_buffer))
        #   np.save('train_playground/conv3/%d.npy'%(f_index), np.array(conv3_buffer))
        #   np.save('train_playground/conv4/%d.npy'%(f_index), np.array(conv4_buffer))
      np.save('train_playground/np_labels.npy', np.array(buffer_labels))
      np.save('train_playground/np_logits.npy', np.array(buffer_logits))
Exemple #17
0
    def __init__(self, batch_size, sN, sL, qL,
                 vocab_size, embed_size, hidden_size,
                 learning_rate=5e-3,
                 optim_type='Adam',
                 attention_type='bilinear',
                 attention_layer=3,
                 glove=False,
                 train_glove=False,
                 max_norm=6):
        """
        sN: sentence number 
        sL: sentence length
        qL: query length

        Placeholders
        # passage [batch_size, sN, sL]
        # p_len   [batch_size, sN]
        # p_idf   [batch_size, sN, sL]
        # query   [batch_size, qL]
        # q_len   [batch_size]
        # q_idf   [batch_size, qL]
        # answer  [batch_size, sN]
        # dropout scalar
        """

        self.create_placeholder(batch_size, sN, sL, qL)

        global_step = tf.Variable(0, name='global_step', trainable=False)
        learning_rate = tf.train.exponential_decay(
            learning_rate, global_step, 1000, 0.95)
        self.lr_sum = tf.scalar_summary('learning_rate', learning_rate)

        self.emb = tf.get_variable(
            "emb", [vocab_size, embed_size], trainable=(not glove or train_glove))
        embed_p = tf.nn.embedding_lookup(
            self.emb, self.passage, name='embed_p')  # N,sN,sL,E
        embed_q = tf.nn.embedding_lookup(
            self.emb, self.query, name='embed_q')  # N,qL,E
        self.embed_sum = tf.histogram_summary("embed", self.emb)

        with tf.name_scope('BoW'):
            wt_p = tf.expand_dims( self.p_wt, -1 )
            bow_p = tf.reduce_sum( embed_p*wt_p, 2, name='bow_p' ) # N, sN, E
            epsilon = 1e-5
            denominator = tf.to_float(tf.expand_dims( self.p_len, -1 )) + epsilon
            # bow_p = tf.div( bow_p, denominator, name= 'true_bow_p' ) # N, sN, 1

            wt_q = tf.expand_dims( self.q_wt, -1 ) 
            bow_q = tf.reduce_sum( embed_q*wt_q, 1, name='bow_q') # N, E
            denominator = tf.to_float(tf.expand_dims( self.q_len, -1 )) + epsilon
            # bow_q = tf.div( bow_q, denominator, name='true_bow_q' ) # N, 1

            p_rep = bow_p
            q_rep = bow_q

        sN_mask = tf.to_float(self.p_len > 0, name='sN_mask')  # N, sN
        sN_count = tf.reduce_sum(sN_mask, 1)
        self.sN_mask = sN_mask
        self.sN_count = sN_count
        sN_count = tf.to_int64(sN_count, name='sN_count')
        # self.sn_c_print = tf.Print(sN_count, [sN_count, sN_mask], message='sn count, sn mask', first_n=50)

        with tf.name_scope('REP_dropout'):
            q_rep = tf.nn.dropout(q_rep, self.dropout)
            p_rep = tf.nn.dropout(p_rep, self.dropout)

        p_rep = tf.unpack(p_rep, axis=1)
        atten = self.apply_attention(
            attention_type, embed_size/2, sN, p_rep, q_rep, layer=attention_layer)

        atten = atten - tf.reduce_min(atten, [1], keep_dims=True)
        atten = tf.mul(atten, sN_mask, name='unnormalized_attention')

        self.score = atten  # N, sN
        self.alignment = tf.nn.softmax(atten, name='alignment')

        self.loss = tf.nn.softmax_cross_entropy_with_logits(
            self.score, self.answer, name='loss')

        self.prediction = tf.argmax(self.score, 1)
        self.answer_id = tf.argmax(self.answer, 1)
        self.correct_prediction = tf.equal(
            self.prediction, self.answer_id)  # N
        self.accuracy = tf.reduce_mean(
            tf.cast(self.correct_prediction, tf.float32), name='accuracy')

        self.optim = self.get_optimizer(optim_type, learning_rate)
        gvs = self.optim.compute_gradients(self.loss)
        with tf.name_scope('clip_norm'):
            self.gvs = [(tf.clip_by_norm(g, max_norm), v) for g, v in gvs]

        self.train_op = self.optim.apply_gradients(
            self.gvs, global_step=global_step, name='train_op')
        self.check_op = tf.add_check_numerics_ops()

        tsum, vsum = self.create_summary(add_gv_sum=True)
        self.train_summary = tf.merge_summary(tsum)
        self.validate_summary = tf.merge_summary(vsum)

        # store param =======================
        self.p_rep = p_rep
        self.q_rep = q_rep
        self.embed_p = embed_p
        self.embed_q = embed_q
        self.global_step = global_step
        self.origin_gv = gvs
        self.learning_rate = learning_rate
Exemple #18
0
    def build_model(self):
        """Defines the GP model.

    The loss is computed for partial feedback settings (bandits), so only
    the observed outcome is backpropagated (see weighted loss).
    Selects the optimizer and, finally, it also initializes the graph.
    """

        tf.logging.info("Initializing model %s.", self.name)
        self.global_step = tf.train.get_or_create_global_step()

        # Define state for the model (inputs, etc.)
        self.x_train = tf.get_variable(
            "training_data",
            initializer=tf.ones([self.hparams.batch_size, self.n_in],
                                dtype=tf.float64),
            validate_shape=False,
            trainable=False)
        self.y_train = tf.get_variable("training_labels",
                                       initializer=tf.zeros(
                                           [self.hparams.batch_size, 1],
                                           dtype=tf.float64),
                                       validate_shape=False,
                                       trainable=False)
        self.weights_train = tf.get_variable(
            "weights_train",
            initializer=tf.ones([self.hparams.batch_size, self.n_out],
                                dtype=tf.float64),
            validate_shape=False,
            trainable=False)
        self.input_op = tf.assign(self.x_train,
                                  self.x_in,
                                  validate_shape=False)
        self.input_w_op = tf.assign(self.weights_train,
                                    self.weights,
                                    validate_shape=False)

        self.input_std = tf.get_variable("data_standard_deviation",
                                         initializer=tf.ones([1, self.n_out],
                                                             dtype=tf.float64),
                                         dtype=tf.float64,
                                         trainable=False)
        self.input_mean = tf.get_variable("data_mean",
                                          initializer=tf.zeros(
                                              [1, self.n_out],
                                              dtype=tf.float64),
                                          dtype=tf.float64,
                                          trainable=True)

        # GP Hyperparameters
        self.noise = tf.get_variable("noise",
                                     initializer=tf.cast(0.0,
                                                         dtype=tf.float64))
        self.amplitude = tf.get_variable("amplitude",
                                         initializer=tf.cast(1.0,
                                                             dtype=tf.float64))
        self.amplitude_linear = tf.get_variable("linear_amplitude",
                                                initializer=tf.cast(
                                                    1.0, dtype=tf.float64))
        self.length_scales = tf.get_variable("length_scales",
                                             initializer=tf.zeros(
                                                 [1, self.n_in],
                                                 dtype=tf.float64))
        self.length_scales_lin = tf.get_variable("length_scales_linear",
                                                 initializer=tf.zeros(
                                                     [1, self.n_in],
                                                     dtype=tf.float64))

        # Latent embeddings of the different outputs for task covariance
        self.task_vectors = tf.get_variable(
            "latent_task_vectors",
            initializer=tf.random_normal([self.n_out, self.task_latent_dim],
                                         dtype=tf.float64))

        # Normalize outputs across each dimension
        # Since we have different numbers of observations across each task, we
        # normalize by their respective counts.
        index_counts = self.atleast_2d(tf.reduce_sum(self.weights, axis=0),
                                       self.n_out)
        index_counts = tf.where(
            index_counts > 0, index_counts,
            tf.ones(tf.shape(index_counts), dtype=tf.float64))
        self.mean_op = tf.assign(self.input_mean,
                                 tf.reduce_sum(self.y, axis=0) / index_counts)
        self.var_op = tf.assign(
            self.input_std,
            tf.sqrt(1e-4 + tf.reduce_sum(tf.square(
                self.y - tf.reduce_sum(self.y, axis=0) / index_counts),
                                         axis=0) / index_counts))

        with tf.control_dependencies([self.var_op]):
            y_normed = self.atleast_2d(
                (self.y - self.input_mean) / self.input_std, self.n_out)
            y_normed = self.atleast_2d(
                tf.boolean_mask(y_normed, self.weights > 0), 1)
        self.out_op = tf.assign(self.y_train, y_normed, validate_shape=False)

        # Observation noise
        alpha = tf.nn.softplus(self.noise) + 1e-6

        # Covariance
        with tf.control_dependencies(
            [self.input_op, self.input_w_op, self.out_op]):
            self.self_cov = (
                self.cov(self.x_in, self.x_in) *
                self.task_cov(self.weights, self.weights) +
                tf.eye(tf.shape(self.x_in)[0], dtype=tf.float64) * alpha)

        self.chol = tf.cholesky(self.self_cov)
        self.kinv = tf.cholesky_solve(
            self.chol, tf.eye(tf.shape(self.x_in)[0], dtype=tf.float64))

        self.input_inv = tf.Variable(tf.eye(self.hparams.batch_size,
                                            dtype=tf.float64),
                                     validate_shape=False,
                                     trainable=False)
        self.input_cov_op = tf.assign(self.input_inv,
                                      self.kinv,
                                      validate_shape=False)

        # Log determinant by taking the singular values along the diagonal
        # of self.chol
        with tf.control_dependencies([self.input_cov_op]):
            logdet = 2.0 * tf.reduce_sum(
                tf.log(tf.diag_part(self.chol) + 1e-16))

        # Log Marginal likelihood
        self.marginal_ll = -tf.reduce_sum(
            -0.5 *
            tf.matmul(tf.transpose(y_normed), tf.matmul(self.kinv, y_normed)) -
            0.5 * logdet - 0.5 * self.n * np.log(2 * np.pi))

        zero = tf.cast(0., dtype=tf.float64)
        one = tf.cast(1., dtype=tf.float64)
        standard_normal = tfd.Normal(loc=zero, scale=one)

        # Loss is marginal likelihood and priors
        self.loss = tf.reduce_sum(self.marginal_ll - (
            standard_normal.log_prob(self.amplitude) +
            standard_normal.log_prob(tf.exp(self.noise)) +
            standard_normal.log_prob(self.amplitude_linear) +
            tfd.Normal(loc=zero, scale=one * 10.).log_prob(self.task_vectors)))

        # Optimizer for hyperparameters
        optimizer = tf.train.AdamOptimizer(learning_rate=self.hparams.lr)
        vars_to_optimize = [
            self.amplitude, self.length_scales, self.length_scales_lin,
            self.amplitude_linear, self.noise, self.input_mean
        ]

        if self.learn_embeddings:
            vars_to_optimize.append(self.task_vectors)
        grads = optimizer.compute_gradients(self.loss, vars_to_optimize)
        self.train_op = optimizer.apply_gradients(grads,
                                                  global_step=self.global_step)

        # Predictions for test data
        self.y_mean, self.y_pred = self.posterior_mean_and_sample(self.x)

        # create tensorboard metrics
        self.create_summaries()
        self.summary_writer = tf.summary.FileWriter(
            "{}/graph_{}".format(FLAGS.logdir, self.name), self.sess.graph)
        self.check = tf.add_check_numerics_ops()
def create(parameters):
    print('Creating the neural network model.')
    tf.reset_default_graph()
    # tf Graph input
    x = tf.placeholder(tf.float32, shape=(None, parameters['n_steps'], parameters['n_input']), name='input')
    x = tf.verify_tensor_all_finite(x, "X not finite!")
    y = tf.placeholder(tf.float32, shape=(None, parameters['n_output']), name='expected_output')
    y = tf.verify_tensor_all_finite(y, "Y not finite!")
    #x = tf.Print(x, [x], "X: ")
    #y = tf.Print(y, [y], "Y: ")
    lstm_state_size = np.sum(parameters['lstm_layers']) * 2
    # Note: Batch size is the first dimension in istate.
    istate = tf.placeholder(tf.float32, shape=(None, lstm_state_size), name='internal_state')
    lr = tf.placeholder(tf.float32, name='learning_rate')

    # The target to track itself and its peers, each with x, y
    input_size = (parameters['n_peers'] + 1) * 2
    inputToRnn = parameters['input_layer']
    if (parameters['input_layer'] == None):
        inputToRnn = parameters['n_input']

    cells = [rnn_cell.LSTMCell(l, parameters['lstm_layers'][i-1] if (i > 0) else inputToRnn,
                               num_proj=parameters['lstm_layers'][i],
                               cell_clip=parameters['lstm_clip'],
                               use_peepholes=True) for i,l in enumerate(parameters['lstm_layers'])] 
    # TODO: GRUCell support here.
    # cells = [rnn_cell.GRUCell(l, parameters['lstm_layers'][i-1] if (i > 0) else inputToRnn) for i,l in enumerate(parameters['lstm_layers'])]
    model = {
        'input_weights': tf.Variable(tf.random_normal(
            [input_size, parameters['input_layer']]), name='input_weights'),
        'input_bias': tf.Variable(tf.random_normal([parameters['input_layer']]), name='input_bias'),
        'output_weights': tf.Variable(tf.random_normal([parameters['lstm_layers'][-1],
                                                        # 6 = 2 sigma, 2 mean, weight, rho
                                                        parameters['n_mixtures'] * 6]),
                                      name='output_weights'),
        # We need to put at least the standard deviation output biases to about 5 to prevent zeros and infinities.
        # , mean = 5.0, stddev = 3.0
        'output_bias': tf.Variable(tf.random_normal([parameters['n_mixtures'] * 6]),
                                   name='output_bias'),
        'rnn_cell': rnn_cell.MultiRNNCell(cells),
        'lr': lr,
        'x': x,
        'y': y,
        'keep_prob': tf.placeholder(tf.float32),
        'istate': istate
    }
    # if (parameters['input_layer'] <> None):

    #model['input_weights'] = tf.Print(model['input_weights'], [model['input_weights']], "Input weights: ", summarize=100)
    #model['input_bias'] = tf.Print(model['input_bias'], [model['input_bias']], "Input bias: ", summarize=100)
    model['input_weights'] = tf.verify_tensor_all_finite(model['input_weights'], "Input weights not finite!")
    model['input_bias'] = tf.verify_tensor_all_finite(model['input_bias'], "Input bias not finite!")
    #model['output_weights'] = tf.Print(model['output_weights'], [model['output_weights']], "Output weights: ", summarize=100)
    #model['output_bias'] = tf.Print(model['output_bias'], [model['output_bias']], "Output bias: ", summarize=100)
    model['output_weights'] = tf.verify_tensor_all_finite(model['output_weights'], "Output weights not finite!")
    model['output_bias'] = tf.verify_tensor_all_finite(model['output_bias'], "Output bias not finite!")
    
    pred = RNN(parameters, x, model, istate)
    
    tvars = tf.trainable_variables()
    avars = tf.all_variables()
    
    # Define loss and optimizer
    # We will take 1 m as the arbitrary goal post to be happy with the error.
    # The delta error is taken in squared to emphasize its importance (errors are much smaller than in absolute
    # positions)
    n_mixtures = parameters['n_mixtures']
    batch_size = parameters['batch_size']
    
    cost = mixture_loss(pred[0], y, n_mixtures, batch_size)

    # Clipping the gradients
    gradients = map(tf.to_float, tf.gradients(cost, tvars, aggregation_method = 2))
    grads, _ = tf.clip_by_global_norm(gradients, parameters['clip_gradients'])
    optimizer = tf.train.AdamOptimizer(learning_rate = parameters['learning_rate'])

    train_op = optimizer.apply_gradients(zip(grads, tvars))
    tf.add_check_numerics_ops()
    
    model['pred'] = pred[0]
    model['last_state'] = pred[1]
    model['cost'] = cost
    model['optimizer'] = train_op
    
    return model
Exemple #20
0
def FaceVerification(img_path1, img_path2):
    img_lm_1, img_rm_1, img_nose_1, img_le_1, img_re_1 = face_region(img_path1)
    img_lm_2, img_rm_2, img_nose_2, img_le_2, img_re_2 = face_region(img_path2)

    tf.reset_default_graph()
    sess = tf.InteractiveSession()

    test_right_mouth = bundle(img_rm_1, img_rm_2)
    test_left_mouth = bundle(img_lm_1, img_lm_2)
    test_right_eye = bundle(img_re_1, img_re_2)
    test_left_eye = bundle(img_le_1, img_le_2)
    test_nose = bundle(img_nose_1, img_nose_2)

    x_image_nose = tf.placeholder(tf.float32, [8, None, 31, 31, 6], name='x')
    x_image_left_mouth = tf.placeholder(tf.float32, [8, None, 31, 31, 6],
                                        name='x')
    x_image_right_mouth = tf.placeholder(tf.float32, [8, None, 31, 31, 6],
                                         name='x')
    x_image_left_eye = tf.placeholder(tf.float32, [8, None, 31, 31, 6],
                                      name='x')
    x_image_right_eye = tf.placeholder(tf.float32, [8, None, 31, 31, 6],
                                       name='x')

    # drop out
    #keep_prob = tf.placeholder(tf.float32)

    # left mouth part
    # first convolutional layer
    W_conv1_left_mouth = weight_variable([4, 4, 6, 20], 'W_conv1_left_mouth')
    b_conv1_left_mouth = bias_variable([20], 'b_conv1_left_mouth')
    # second convolutional layer
    W_conv2_left_mouth = weight_variable([3, 3, 20, 40], 'W_conv2_left_mouth')
    b_conv2_left_mouth = bias_variable([40], 'b_conv2_left_mouth')
    # third convolutional layer
    W_conv3_left_mouth = weight_variable([3, 3, 40, 60], 'W_conv3_left_mouth')
    b_conv3_left_mouth = bias_variable([60], 'b_conv3_left_mouth')
    # forth convolutional layer
    W_conv4_left_mouth = weight_variable([2, 2, 60, 80], 'W_conv4_left_mouth')
    b_conv4_left_mouth = bias_variable([80], 'b_conv4_left_mouth')
    # densely connected layer
    W_fc1_left_mouth = weight_variable([1 * 1 * 80, 80], 'W_fc1_left_mouth')
    b_fc1_left_mouth = bias_variable([80], 'b_fc1_left_mouth')
    # right mouth part
    # first convolutional layer
    W_conv1_right_mouth = weight_variable([4, 4, 6, 20], 'W_conv1_right_mouth')
    b_conv1_right_mouth = bias_variable([20], 'b_conv1_right_mouth')
    # second convolutional layer
    W_conv2_right_mouth = weight_variable([3, 3, 20, 40],
                                          'W_conv2_right_mouth')
    b_conv2_right_mouth = bias_variable([40], 'b_conv2_right_mouth')
    # third convolutional layer
    W_conv3_right_mouth = weight_variable([3, 3, 40, 60],
                                          'W_conv3_right_mouth')
    b_conv3_right_mouth = bias_variable([60], 'b_conv3_right_mouth')
    # forth convolutional layer
    W_conv4_right_mouth = weight_variable([2, 2, 60, 80],
                                          'W_conv4_right_mouth')
    b_conv4_right_mouth = bias_variable([80], 'b_conv4_right_mouth')
    # densely connected layer
    W_fc1_right_mouth = weight_variable([1 * 1 * 80, 80], 'W_fc1_right_mouth')
    b_fc1_right_mouth = bias_variable([80], 'b_fc1_right_mouth')
    # left eye part
    # first convolutional layer
    W_conv1_left_eye = weight_variable([4, 4, 6, 20], 'W_conv1_left_eye')
    b_conv1_left_eye = bias_variable([20], 'b_conv1_left_eye')
    # second convolutional layer
    W_conv2_left_eye = weight_variable([3, 3, 20, 40], 'W_conv2_left_eye')
    b_conv2_left_eye = bias_variable([40], 'b_conv2_left_eye')
    # third convolutional layer
    W_conv3_left_eye = weight_variable([3, 3, 40, 60], 'W_conv3_left_eye')
    b_conv3_left_eye = bias_variable([60], 'b_conv3_left_eye')
    # forth convolutional layer
    W_conv4_left_eye = weight_variable([2, 2, 60, 80], 'W_conv4_left_eye')
    b_conv4_left_eye = bias_variable([80], 'b_conv4_left_eye')
    # densely connected layer
    W_fc1_left_eye = weight_variable([1 * 1 * 80, 80], 'W_fc1_left_eye')
    b_fc1_left_eye = bias_variable([80], 'b_fc1_left_eye')
    # right eye part
    # first convolutional layer
    W_conv1_right_eye = weight_variable([4, 4, 6, 20], 'W_conv1_right_eye')
    b_conv1_right_eye = bias_variable([20], 'b_conv1_right_eye')
    # second convolutional layer
    W_conv2_right_eye = weight_variable([3, 3, 20, 40], 'W_conv2_right_eye')
    b_conv2_right_eye = bias_variable([40], 'b_conv2_right_eye')
    # third convolutional layer
    W_conv3_right_eye = weight_variable([3, 3, 40, 60], 'W_conv3_right_eye')
    b_conv3_right_eye = bias_variable([60], 'b_conv3_right_eye')
    # forth convolutional layer
    W_conv4_right_eye = weight_variable([2, 2, 60, 80], 'W_conv4_right_eye')
    b_conv4_right_eye = bias_variable([80], 'b_conv4_right_eye')
    # densely connected layer
    W_fc1_right_eye = weight_variable([1 * 1 * 80, 80], 'W_fc1_right_eye')
    b_fc1_right_eye = bias_variable([80], 'b_fc1_right_eye')
    # nose part
    # first convolutional layer
    W_conv1_nose = weight_variable([4, 4, 6, 20], 'W_conv1_nose')
    b_conv1_nose = bias_variable([20], 'b_conv1_nose')
    # second convolutional layer
    W_conv2_nose = weight_variable([3, 3, 20, 40], 'W_conv2_nose')
    b_conv2_nose = bias_variable([40], 'b_conv2_nose')
    # third convolutional layer
    W_conv3_nose = weight_variable([3, 3, 40, 60], 'W_conv3_nose')
    b_conv3_nose = bias_variable([60], 'b_conv3_nose')
    # forth convolutional layer
    W_conv4_nose = weight_variable([2, 2, 60, 80], 'W_conv4_nose')
    b_conv4_nose = bias_variable([80], 'b_conv4_nose')
    # densely connected layer
    W_fc1_nose = weight_variable([1 * 1 * 80, 80], 'W_fc1_nose')
    b_fc1_nose = bias_variable([80], 'b_fc1_nose')

    h_fc1_drop_left_mouth = CNN_Computaion2(
        x_image_left_mouth, W_conv1_left_mouth, b_conv1_left_mouth,
        W_conv2_left_mouth, b_conv2_left_mouth, W_conv3_left_mouth,
        b_conv3_left_mouth, W_conv4_left_mouth, b_conv4_left_mouth,
        W_fc1_left_mouth, b_fc1_left_mouth)
    h_fc1_drop_right_mouth = CNN_Computaion2(
        x_image_right_mouth, W_conv1_right_mouth, b_conv1_right_mouth,
        W_conv2_right_mouth, b_conv2_right_mouth, W_conv3_right_mouth,
        b_conv3_right_mouth, W_conv4_right_mouth, b_conv4_right_mouth,
        W_fc1_right_mouth, b_fc1_right_mouth)
    h_fc1_drop_left_eye = CNN_Computaion2(x_image_left_eye, W_conv1_left_eye,
                                          b_conv1_left_eye, W_conv2_left_eye,
                                          b_conv2_left_eye, W_conv3_left_eye,
                                          b_conv3_left_eye, W_conv4_left_eye,
                                          b_conv4_left_eye, W_fc1_left_eye,
                                          b_fc1_left_eye)
    h_fc1_drop_right_eye = CNN_Computaion2(
        x_image_right_eye, W_conv1_right_eye, b_conv1_right_eye,
        W_conv2_right_eye, b_conv2_right_eye, W_conv3_right_eye,
        b_conv3_right_eye, W_conv4_right_eye, b_conv4_right_eye,
        W_fc1_right_eye, b_fc1_right_eye)
    h_fc1_drop_nose = CNN_Computaion2(x_image_nose, W_conv1_nose, b_conv1_nose,
                                      W_conv2_nose, b_conv2_nose, W_conv3_nose,
                                      b_conv3_nose, W_conv4_nose, b_conv4_nose,
                                      W_fc1_nose, b_fc1_nose)

    # RBM implentation
    h_fc1_drop = tf.concat([
        h_fc1_drop_left_mouth, h_fc1_drop_right_mouth, h_fc1_drop_left_eye,
        h_fc1_drop_right_eye, h_fc1_drop_nose
    ], 1)

    W_hidden = weight_variable([3200, 8], 'W_hidden', 0.5 / tf.sqrt(3200.0))
    b_hidden = bias_variable([8], 'b_hidden')
    h_dc = tf.matmul(h_fc1_drop, W_hidden) + b_hidden

    W_output_1 = weight_variable([8], 'W_output_1', 0.5 / tf.sqrt(8.0))
    W_output_2 = weight_variable([8], 'W_output_2', 0.5 / tf.sqrt(8.0))

    b_output = bias_variable([2], 'b_output')

    y_ = tf.placeholder(tf.float32, [None, 2], name='y')
    Probability_part1 = tf.exp(h_dc + W_output_1) + 1
    Probability_part2 = tf.exp(h_dc + W_output_2) + 1
    Probability_part3 = tf.transpose(tf.stack(
        [Probability_part1, Probability_part2], 2),
                                     perm=[1, 0, 2])

    Probability_part5 = tf.reduce_sum(tf.multiply(Probability_part3,
                                                  [1.0, 0.0]),
                                      2,
                                      keep_dims=True)
    Probability_pos_numerator = tf.reduce_sum(
        tf.multiply([1.0, 0.0], tf.exp(b_output)))
    Probability_pos_denominator = tf.reduce_sum(
        tf.multiply(
            tf.reduce_prod(tf.div(Probability_part3, Probability_part5), 0),
            tf.exp(b_output)), 1)
    Probability_pos = tf.div(Probability_pos_numerator,
                             Probability_pos_denominator)

    Probability_part6 = tf.reduce_sum(tf.multiply(Probability_part3,
                                                  [0.0, 1.0]),
                                      2,
                                      keep_dims=True)
    Probability_neg_numerator = tf.reduce_sum(
        tf.multiply([0.0, 1.0], tf.exp(b_output)))
    Probability_neg_denominator = tf.reduce_sum(
        tf.multiply(
            tf.reduce_prod(tf.div(Probability_part3, Probability_part6), 0),
            tf.exp(b_output)), 1)
    Probability_neg = tf.div(Probability_neg_numerator,
                             Probability_neg_denominator)

    Probability_dis = tf.stack([Probability_pos, Probability_neg], 1)
    check_op = tf.add_check_numerics_ops()
    prediction = tf.argmin(Probability_dis, 1)
    # set check point
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess.run(init_op)
    saver = tf.train.Saver()
    check_dir = './check_point'
    ckpt = tf.train.get_checkpoint_state(check_dir)
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)

    prediction_array = sess.run(prediction,
                                feed_dict={
                                    x_image_nose: test_nose,
                                    x_image_left_eye: test_left_eye,
                                    x_image_right_eye: test_right_eye,
                                    x_image_left_mouth: test_left_mouth,
                                    x_image_right_mouth: test_right_mouth
                                })
    sess.close()
    return prediction_array[0]
def main_gpu(arg):
    print('Running main')
    print('--==>', dict(arg) )
    arg.act_name = arg.act.__name__
    results = {'train_errors':[], 'cv_errors':[],'test_errors':[]}

    path, errors_pretty, mdl_dir, json_file = set_experiment_folders(arg)
    set_tensorboard(arg)

    ## Data sets and task
    print( '----====> TASK NAME: %s' % arg.data_file_name )
    (X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(arg)

    N_frac = arg.N_frac
    X_train, Y_train, X_cv, Y_cv, X_test, Y_test = X_train[:N_frac,:], Y_train[:N_frac,:], X_cv[:N_frac,:], Y_cv[:N_frac,:], X_test[:N_frac,:], Y_test[:N_frac,:]

    if arg.data_normalize == 'normalize_input':
        X_train, X_cv, X_test = preprocessing.scale(X_train), preprocessing.scale(X_cv), preprocessing.scale(X_test)

    (N_train,D) = X_train.shape
    (N_test,D_out) = Y_test.shape
    print( '(N_train,D) = ', (N_train,D) )
    print( '(N_test,D_out) = ', (N_test,D_out) )

    ##
    phase_train = tf.placeholder(tf.bool, name='phase_train') if arg.bn else  None

    arg.steps = arg.get_steps(arg)
    arg.M = arg.get_batch_size(arg)

    arg.log_learning_rate = arg.get_log_learning_rate(arg)
    arg.starter_learning_rate = arg.get_start_learning_rate(arg)
    print( '++> starter_learning_rate ', arg.starter_learning_rate )

    ## decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
    arg.decay_rate = arg.get_decay_rate(arg)
    arg.decay_steps = arg.get_decay_steps(arg)

    if arg.optimization_alg == 'GD':
        pass
    elif arg.optimization_alg=='Momentum':
        arg.use_nesterov = arg.get_use_nesterov()
        arg.momentum = arg.get_momentum(arg)
        print('arg.use_nesterov', arg.use_nesterov)
        print('arg.momentum', arg.momentum)
    elif arg.optimization_alg == 'Adadelta':
        arg.rho = arg.get_rho(arg)
        print('arg.rho', arg.rho)
    elif arg.optimization_alg == 'Adagrad':
        #only has learning rate
        pass
    elif arg.optimization_alg == 'Adam':
        arg.beta1 = arg.get_beta1(arg)
        arg.beta2 = arg.get_beta2(arg)
        print('arg.beta1', arg.beta1)
        print('arg.beta2', arg.beta2)
    elif arg.optimization_alg == 'RMSProp':
        arg.decay = arg.get_decay(arg)
        arg.momentum = arg.get_momentum(arg)
        print('arg.decay', arg.decay)
        print('arg.momentum', arg.momentum)
    else:
        pass

    ##############################
    # if data_file_name == 'task_MNIST_flat_auto_encoder':
    #     PCA_errors = {12:24.8254684915, 48:9.60052317906, 96:4.72118325768}
    #     if len(units_list) == 1:
    #         k = units_list[0]
    #     else:
    #         k = units_list[0] * len(units_list)
    #     if not k in PCA_errors.keys():
    #         print( 'COMPUTING PCA... k = ', k)
    #         X_reconstruct_pca, _, _ = mtf. get_reconstruction_pca(X_train,k=units_list[0])
    #         pca_error = mtf.report_l2_loss(Y=X_train,Y_pred=X_reconstruct_pca)
    #         PCA_errors[k] = pca_error
    #     else:
    #         pca_error = PCA_errors[k]
    #     print( '*************> PCA error: ', pca_error)
    # else:
    #     pca_error = None
    #     rbf_error = None
    #
    # hbf1_error = None
    # if model == 'hbf':
    #     #error, Y_pred, Kern, C, subsampled_data_points = report_RBF_error_from_data(X_train, dims, stddev)
    #     if len(units_list) > 1:
    #         k = units_list[0]*len(units_list)
    #         print( 'RBF units = ', k)
    #         nb_units = [None, k]
    #         rbf_error, _, _, _, _ = mtf.report_RBF_error_from_data(X_train, X_train, nb_units, S_init[1])
    #         print( rbf_error)
    #         hbf1={12:26.7595}
    #         if k in hbf1.keys():
    #             hbf1_error = hbf1[k]
    #     else:
    #         nb_units = dims
    #         rbf_error, _, _, _, _ = mtf.report_RBF_error_from_data(X_train, X_train, nb_units, S_init[1])
    ##

    pca_error = None
    rbf_error = None
    hbf1_error = None
    ## Make Model
    if arg.mdl == 'standard_nn':
        arg.dims = [D]+arg.units+[D_out]
        arg.mu_init_list = arg.get_W_mu_init(arg)
        arg.std_init_list = arg.get_W_std_init(arg)

        arg.b_init = arg.get_b_init(arg)
        float_type = tf.float64
        x = tf.placeholder(float_type, shape=[None, D], name='x-input') # M x D

        nb_layers = len(arg.dims)-1
        nb_hidden_layers = nb_layers-1
        (inits_C,inits_W,inits_b) = mtf.get_initilizations_standard_NN(init_type=arg.init_type,dims=arg.dims,mu=arg.mu_init_list,std=arg.std_init_list,b_init=arg.b_init, X_train=X_train, Y_train=Y_train)
        with tf.name_scope("standardNN") as scope:
            mdl = mtf.build_standard_NN(arg, x,arg.dims,(None,inits_W,inits_b),phase_train,arg.trainable_bn)
            mdl = mtf.get_summation_layer(l=str(nb_layers),x=mdl,init=inits_C[0])
        inits_S = inits_b
    elif arg.mdl == 'hbf':
        arg.dims = [D]+arg.units+[D_out]
        trainable_S = True if (arg.trainable_S=='train_S') else False
        arg.b_init = arg.get_b_init(arg)
        arg.S_init = arg.b_init
        float_type = tf.float64
        #arg.mu , arg.std = arg.get_W_mu_init(arg), arg.get_W_std_init(arg)
        x = tf.placeholder(float_type, shape=[None, D], name='x-input') # M x D
        (inits_C,inits_W,inits_S,rbf_error) = mtf.get_initilizations_HBF(init_type=arg.init_type,dims=arg.dims,mu=arg.mu,std=arg.std,b_init=arg.b_init,S_init=arg.S_init, X_train=X_train, Y_train=Y_train, train_S_type=arg.train_S_type)
        #print(inits_W)
        nb_layers = len(arg.dims)-1
        nb_hidden_layers = nb_layers-1
        with tf.name_scope("HBF") as scope:
            mdl = mtf.build_HBF2(x,arg.dims,(inits_C,inits_W,inits_S),phase_train,arg.trainable_bn,trainable_S)
            mdl = mtf.get_summation_layer(l=str(nb_layers),x=mdl,init=inits_C[0])
    elif arg.mdl == 'binary_tree_4D':
        pass
    elif arg.mdl == 'binary_tree_4D_conv_hidden_layer':
        print( 'binary_tree_4D' )
        inits_S = None
        pca_error, rbf_error = None, None
        float_type = tf.float32
        # Data sizes needed for reshaping
        N_cv, N_test = X_cv.shape[0], X_test.shape[0]
        # reshape data sets
        X_train, X_cv, X_test = X_train.reshape(N_train,1,D,1), X_cv.reshape(N_cv,1,D,1), X_test.reshape(N_test,1,D,1)
        x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input')
        #
        arg.stride_convd1, arg.filter_size = 2, 2 #fixed for Binary Tree BT
        #arg.mean, arg.stddev = arg.get_W_mu_init(arg), arg.get_W_std_init(arg)
        with tf.name_scope("build_binary_model") as scope:
            mdl = mtf.build_binary_tree_4D_hidden_layer(x,arg,phase_train=phase_train)
        arg.dims = [D]+[arg.nb_filters]+[arg.nb_final_hidden_units]+[D_out]
    elif arg.mdl == 'binary_tree_4D_conv_hidden_layer_automatic':
        print( arg.scope_name )
        inits_S = None
        pca_error, rbf_error = None, None
        float_type = tf.float32
        #
        N_cv, N_test = X_cv.shape[0], X_test.shape[0]
        X_train, X_cv, X_test = X_train.reshape(N_train,1,D,1), X_cv.reshape(N_cv,1,D,1), X_test.reshape(N_test,1,D,1)
        x = tf.placeholder(tf.float32, shape=[None,1,D,1], name='x-input') #[M, 1, D, 1]
        #
        with tf.name_scope("mdl"+arg.scope_name) as scope:
            mdl = mtf.bt_mdl_conv(arg,x)
        arg.dims = [D]+arg.F[1:]+[D_out]
    elif arg.mdl == 'binary_tree_8D_conv_hidden_layer':
        print( arg.scope_name )
        inits_S = None
        pca_error, rbf_error = None, None
        float_type = tf.float32
        #
        N_cv, N_test = X_cv.shape[0], X_test.shape[0]
        X_train, X_cv, X_test = X_train.reshape(N_train,1,D,1), X_cv.reshape(N_cv,1,D,1), X_test.reshape(N_test,1,D,1)
        x = tf.placeholder(tf.float32, shape=[None,1,D,1], name='x-input') #[M, 1, D, 1]
        #
        with tf.name_scope("mdl"+arg.scope_name) as scope:
            mdl = mtf.bt_mdl_conv(arg,x)
        arg.dims = [D]+arg.F[1:]+[D_out]

    ## Output and Loss
    y = mdl
    y_ = tf.placeholder(float_type, shape=[None, D_out]) # (M x D)
    with tf.name_scope("L2_loss") as scope:
        l2_loss = tf.reduce_sum( tf.reduce_mean(tf.square(y_-y), 0) )
        #l2_loss = (2.0/N_train)*tf.nn.l2_loss(y_-y)
        #l2_loss = tf.reduce_mean(tf.square(y_-y))

    nb_params = count_number_trainable_params(y)
    results['nb_params'] = nb_params
    print( '---> nb_params ', nb_params )


    ##

    with tf.name_scope("train") as scope:
        # If the argument staircase is True, then global_step / decay_steps is an integer division and the decayed earning rate follows a staircase function.
        ## decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
        global_step = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(learning_rate=arg.starter_learning_rate, global_step=global_step,decay_steps=arg.decay_steps, decay_rate=arg.decay_rate, staircase=arg.staircase)
        # Passing global_step to minimize() will increment it at each step.
        if arg.optimization_alg == 'GD':
            opt = tf.train.GradientDescentOptimizer(learning_rate)
        elif arg.optimization_alg == 'Momentum':
            opt = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=arg.momentum,use_nesterov=arg.use_nesterov)
        elif arg.optimization_alg == 'Adadelta':
            opt = tf.train.AdadeltaOptimizer(learning_rate=learning_rate, rho=arg.rho, epsilon=1e-08, use_locking=False, name='Adadelta')
        elif arg.optimization_alg == 'Adam':
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=arg.beta1, beta2=arg.beta2, epsilon=1e-08, name='Adam')
        elif arg.optimization_alg == 'Adagrad':
            opt = tf.train.AdagradOptimizer(learning_rate)
        elif arg.optimization_alg == 'RMSProp':
            opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=arg.decay, momentum=arg.momentum, epsilon=1e-10, name='RMSProp')

    ## TODO
    if arg.re_train == 're_train' and arg.data_file_name == 'hrushikesh':
        print( 'data_file_name: ', data_file_name)
        print( 're_train: ', re_train)
        var_list = [v for v in tf.all_variables() if v.name == 'C:0']
        #train_step = opt.minimize(l2_loss, var_list=var_list)
    else:
        train_step = opt.minimize(l2_loss, global_step=global_step)

    ##
    with tf.name_scope('learning_rate'):
        learning_rate_scalar_summary = tf.scalar_summary("learning_rate", learning_rate)

    with tf.name_scope("l2_loss") as scope:
        ls_scalar_summary = tf.scalar_summary("l2_loss", l2_loss)

    if arg.data_file_name == 'task_MNIST_flat_auto_encoder':
        with tf.name_scope('input_reshape'):
            x_image = tf.to_float(x, name='ToFloat')
            image_shaped_input_x = tf.reshape(x_image, [-1, 28, 28, 1])
            # tf.image_summary(tag, tensor, max_images=3, collections=None, name=None)
            tf.image_summary('input', image_shaped_input_x, 10)

        with tf.name_scope('reconstruct'):
            y_image = tf.to_float(y, name='ToFloat')
            image_shaped_input_y = tf.reshape(x_image, [-1, 28, 28, 1])
            # tf.image_summary(tag, tensor, max_images=3, collections=None, name=None)
            tf.image_summary('reconstruct', image_shaped_input_y, 10)

    def register_all_variables_and_grads(y):
        all_vars = tf.all_variables()
        grad_vars = opt.compute_gradients(y,all_vars) #[ (gradient,variable) ]
        for (dldw,v) in grad_vars:
            if dldw != None:
                prefix_name = 'derivative_'+v.name
                suffix_text = 'dJd'+v.name
                #mtf.put_summaries(var=tf.sqrt( tf.reduce_sum(tf.square(dldw)) ),prefix_name=prefix_name,suffix_text=suffix_text)
                mtf.put_summaries(var=tf.abs(dldw),prefix_name=prefix_name,suffix_text='_abs_'+suffix_text)
                tf.histogram_summary('hist'+prefix_name, dldw)


    register_all_variables_and_grads(y)
    ## TRAIN
    if phase_train is not None:
        #DO BN
        feed_dict_train = {x:X_train, y_:Y_train, phase_train: False}
        feed_dict_cv = {x:X_cv, y_:Y_cv, phase_train: False}
        feed_dict_test = {x:X_test, y_:Y_test, phase_train: False}
    else:
        #Don't do BN
        feed_dict_train = {x:X_train, y_:Y_train}
        feed_dict_cv = {x:X_cv, y_:Y_cv}
        feed_dict_test = {x:X_test, y_:Y_test}

    def get_batch_feed(X, Y, M, phase_train):
        mini_batch_indices = np.random.randint(M,size=M)
        Xminibatch =  X[mini_batch_indices,:] # ( M x D^(0) )
        Yminibatch = Y[mini_batch_indices,:] # ( M x D^(L) )
        if phase_train is not None:
            #DO BN
            feed_dict = {x: Xminibatch, y_: Yminibatch, phase_train: True}
        else:
            #Don't do BN
            feed_dict = {x: Xminibatch, y_: Yminibatch}
        return feed_dict

    def print_messages(*args):
        for i, msg in enumerate(args):
            print('>%s'%msg, flush=True)

    if arg.use_tensorboard:
        if tf.gfile.Exists('/tmp/mdl_logs'):
          tf.gfile.DeleteRecursively('/tmp/mdl_logs')
        tf.gfile.MakeDirs('/tmp/mdl_logs')

    tf.add_check_numerics_ops()

    # Add ops to save and restore all the variables.
    if arg.mdl_save:
        saver = tf.train.Saver(max_to_keep=arg.max_to_keep)
    start_time = time.time()
    print()
    #file_for_error = './ray_error_file.txt'
    if arg.save_config_args:
        arg_dict = dict(arg).copy()
        arg_dict = get_remove_functions_from_dict(arg_dict)
        pickle.dump( arg_dict, open( "pickle-slurm-%s_%s.p"%(arg.slurm_jobid,arg.slurm_array_task_id) , "wb" ) )
        #with open('json-slurm-%s_%s.json', 'w+') as f_json:
        #    json.dump(results,f_json,sort_keys=True, indent=2, separators=(',', ': '))
    with open(path+errors_pretty, 'w+') as f_err_msgs:
    #with open(file_for_error, 'w+') as f_err_msgs:
        #with tf.Session() as sess:
        sess = tf.Session()
        ## prepare writers and fetches
        if arg.use_tensorboard:
            merged = tf.merge_all_summaries()
            #writer = tf.train.SummaryWriter(tensorboard_data_dump, sess.graph)
            train_writer = tf.train.SummaryWriter(arg.tensorboard_data_dump_train, sess.graph)
            test_writer = tf.train.SummaryWriter(arg.tensorboard_data_dump_test, sess.graph)
            ##
            fetches_train = [merged, l2_loss]
            fetches_cv = l2_loss
            fetches_test = [merged, l2_loss]
        else:
            fetches_train = l2_loss
            fetches_cv = l2_loss
            fetches_test = l2_loss

        sess.run( tf.initialize_all_variables() )
        for i in range(arg.steps):
            ## Create fake data for y = W.x + b where W = 2, b = 0
            #(batch_xs, batch_ys) = get_batch_feed(X_train, Y_train, M, phase_train)
            feed_dict_batch = get_batch_feed(X_train, Y_train, arg.M, phase_train)
            ## Train
            if i%arg.report_error_freq == 0:
                if arg.use_tensorboard:
                    (summary_str_train,train_error) = sess.run(fetches=fetches_train, feed_dict=feed_dict_train)
                    cv_error = sess.run(fetches=fetches_cv, feed_dict=feed_dict_cv)
                    (summary_str_test,test_error) = sess.run(fetches=fetches_test, feed_dict=feed_dict_test)

                    train_writer.add_summary(summary_str_train, i)
                    test_writer.add_summary(summary_str_test, i)
                else:
                    train_error = sess.run(fetches=fetches_train, feed_dict=feed_dict_train)
                    cv_error = sess.run(fetches=fetches_cv, feed_dict=feed_dict_cv)
                    test_error = sess.run(fetches=fetches_test, feed_dict=feed_dict_test)

                current_learning_rate = sess.run(fetches=learning_rate)
                loss_msg = "=> Mdl*%s*-units%s, task: %s, step %d/%d, train err %g, cv err: %g test err %g"%(arg.mdl,arg.dims,arg.data_file_name,i,arg.steps,train_error,cv_error,test_error)
                mdl_info_msg = "Act: %s, Opt:%s, BN %s, BN_trainable: %s After%d/%d iteration,Init: %s, current_learning_rate %s, M %s, decay_rate %s, decay_steps %s, nb_params %s" % (arg.act.__name__,arg.optimization_alg,arg.bn,arg.trainable_bn,i,arg.steps,arg.init_type,current_learning_rate,arg.M,arg.decay_rate,arg.decay_steps,nb_params)
                errors_to_beat = 'BEAT: hbf1_error: %s RBF error: %s PCA error: %s '%(hbf1_error, rbf_error,pca_error)

                print_messages(loss_msg, mdl_info_msg, errors_to_beat)
                print('S: ', inits_S, flush=True)
                print()

                # store results
                results['train_errors'].append( float(train_error) )
                results['cv_errors'].append( float(cv_error) )
                results['test_errors'].append( float(test_error) )
                # write errors to pretty print
                f_err_msgs.write(loss_msg+'\n')
                f_err_msgs.write(mdl_info_msg+'\n')
                if any_is_NaN(train_error,cv_error,test_error):
                    # if its a nan make sure to stop script
                    print('nan_found')
                    break
                if arg.mdl_save:
                    save_path = saver.save(sess, path+mdl_dir+'/model.ckpt',global_step=i)
            if arg.use_tensorboard:
                sess.run(fetches=[merged,train_step], feed_dict=feed_dict_batch) #sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
            else:
                sess.run(fetches=train_step, feed_dict=feed_dict_batch) #sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})


    sess.close()
    _, best_train, best_cv, best_test =  arg.get_errors_from(results)
    results['best_train'], results['best_cv'], results['best_test'] = best_train, best_cv, best_test
    print('End of main')

    git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
    results['git_hash'] = str(git_hash)
    #results['tf_rand_seed'] = tf_rand_seed
    #
    seconds = (time.time() - start_time)
    minutes = seconds/ 60
    hours = minutes/ 60
    print("--- %s seconds ---" % seconds )
    print("--- %s minutes ---" % minutes )
    print("--- %s hours ---" % hours )
    ## dump results to JSON
    results['seconds'] = seconds
    results['minutes'] = minutes
    results['hours'] = hours
    #print results
    #results['arg'] = arg
    arg_dict = dict(arg)
    arg_dict = get_remove_functions_from_dict(arg_dict)
    results['arg_dict'] = arg_dict
    with open(path+json_file, 'w+') as f_json:
        print('Writing Json')
        print('path+json_file', path+json_file)
        json.dump(results,f_json,indent=2, separators=(',', ': '))
    print( '\a') #makes beep
    #print(results)
    print('get_errors_from: ', arg.get_errors_from.__name__)
    print('best results: train, cv, test: ', best_train, best_cv, best_test )
def main(_):
    NUM_INPUTS = 4
    NUM_CLASSES = 9

    # the data, split between train and test sets
    x_train, y_train, x_test, y_test = generate_simulated_data()

    x_train = x_train.astype('uint8')
    x_test = x_test.astype('uint8')
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    y_train = y_train.astype('int64')
    y_test = y_test.astype('int64')

    tf.logging.set_verbosity(tf.logging.INFO)
    sess = tf.InteractiveSession()

    # Figure out the learning rates for each training phase. Since it's often
    # effective to have high learning rates at the start of training, followed by
    # lower levels towards the end, the number of steps and learning rates can be
    # specified as comma-separated lists to define the rate at each stage. For
    # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001
    # will run 13,000 training loops in total, with a rate of 0.001 for the first
    # 10,000, and 0.0001 for the final 3,000.
    training_steps_list = list(
        map(int, FLAGS.how_many_training_steps.split(',')))
    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
    if len(training_steps_list) != len(learning_rates_list):
        raise Exception(
            '--how_many_training_steps and --learning_rate must be equal length '
            'lists, but are %d and %d long instead' %
            (len(training_steps_list), len(learning_rates_list)))

    input_placeholder = tf.placeholder(tf.float32, [None, NUM_INPUTS],
                                       name='graph_input')
    if FLAGS.quantize:
        input_min, input_max = 0, 256
        graph_input = tf.fake_quant_with_min_max_args(input_placeholder,
                                                      input_min, input_max)
    else:
        graph_input = input_placeholder

    logits, dropout_prob = models.create_three_fc_model(graph_input,
                                                        NUM_INPUTS,
                                                        20,
                                                        20,
                                                        NUM_CLASSES,
                                                        is_training=True)

    # Define loss and optimizer
    ground_truth_input = tf.placeholder(tf.int64, [None],
                                        name='groundtruth_input')

    # Optionally we can add runtime checks to spot when NaNs or other symptoms of
    # numerical errors start occurring during training.
    control_dependencies = []
    if FLAGS.check_nans:
        checks = tf.add_check_numerics_ops()
        control_dependencies = [checks]

    # Create the back propagation and training evaluation machinery in the graph.
    with tf.name_scope('cross_entropy'):
        cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
            labels=ground_truth_input, logits=logits)
    if FLAGS.quantize:
        tf.contrib.quantize.create_training_graph(quant_delay=0)
    with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
        learning_rate_input = tf.placeholder(tf.float32, [],
                                             name='learning_rate_input')
        train_step = tf.train.GradientDescentOptimizer(
            learning_rate_input).minimize(cross_entropy_mean)
    predicted_indices = tf.argmax(logits, 1)
    correct_prediction = tf.equal(predicted_indices, ground_truth_input)
    confusion_matrix = tf.confusion_matrix(ground_truth_input,
                                           predicted_indices,
                                           num_classes=NUM_CLASSES)
    evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    with tf.get_default_graph().name_scope('eval'):
        tf.summary.scalar('cross_entropy', cross_entropy_mean)
        tf.summary.scalar('accuracy', evaluation_step)

    global_step = tf.train.get_or_create_global_step()
    increment_global_step = tf.assign(global_step, global_step + 1)

    saver = tf.train.Saver(tf.global_variables())

    # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
    merged_summaries = tf.summary.merge_all(scope='eval')
    train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                         sess.graph)
    validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir +
                                              '/validation')

    tf.global_variables_initializer().run()

    start_step = 1
    tf.logging.info('Training from step: %d ', start_step)

    # Save graph.pbtxt.
    tf.train.write_graph(sess.graph_def, FLAGS.train_dir,
                         FLAGS.model_architecture + '.pbtxt')

    # Training loop.
    training_steps_max = np.sum(training_steps_list)
    for training_step in xrange(start_step, training_steps_max + 1):
        # Figure out what the current learning rate is.
        training_steps_sum = 0
        for i in range(len(training_steps_list)):
            training_steps_sum += training_steps_list[i]
            if training_step <= training_steps_sum:
                learning_rate_value = learning_rates_list[i]
                break

        # Pull the audio samples we'll use for training.
        index = (training_step * FLAGS.batch_size) % x_train.shape[0]
        train_fingerprints = x_train[index:index + FLAGS.batch_size]
        train_ground_truth = y_train[index:index + FLAGS.batch_size]
        # Run the graph with this batch of training data.
        train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
            [
                merged_summaries,
                evaluation_step,
                cross_entropy_mean,
                train_step,
                increment_global_step,
            ],
            feed_dict={
                graph_input: train_fingerprints,
                ground_truth_input: train_ground_truth,
                learning_rate_input: learning_rate_value,
                dropout_prob: 0.5
            })
        train_writer.add_summary(train_summary, training_step)
        tf.logging.info(
            'Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' %
            (training_step, learning_rate_value, train_accuracy * 100,
             cross_entropy_value))

        is_last_step = (training_step == training_steps_max)
        if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step:
            set_size = y_test.shape[0]
            total_accuracy = 0
            total_conf_matrix = None
            for i in xrange(0, set_size, FLAGS.batch_size):
                validation_fingerprints = x_test[i:i + FLAGS.batch_size]
                validation_ground_truth = y_test[i:i + FLAGS.batch_size]
                # Run a validation step and capture training summaries for TensorBoard
                # with the `merged` op.
                validation_summary, validation_accuracy, conf_matrix = sess.run(
                    [merged_summaries, evaluation_step, confusion_matrix],
                    feed_dict={
                        graph_input: validation_fingerprints,
                        ground_truth_input: validation_ground_truth,
                        dropout_prob: 1.0
                    })
                validation_writer.add_summary(validation_summary,
                                              training_step)
                batch_size = min(FLAGS.batch_size, set_size - i)
                total_accuracy += (validation_accuracy * batch_size) / set_size
                if total_conf_matrix is None:
                    total_conf_matrix = conf_matrix
                else:
                    total_conf_matrix += conf_matrix
            tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
            tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' %
                            (training_step, total_accuracy * 100, set_size))

        # Save the model checkpoint periodically.
        if (training_step % FLAGS.save_step_interval == 0
                or training_step == training_steps_max):
            checkpoint_path = os.path.join(FLAGS.train_dir,
                                           FLAGS.model_architecture + '.ckpt')
            tf.logging.info('Saving to "%s-%d"', checkpoint_path,
                            training_step)
            saver.save(sess, checkpoint_path, global_step=training_step)
Exemple #23
0
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   auto_transform=True,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False,
                   optimizer=None,
                   var_list=None,
                   use_prettytensor=False,
                   global_step=None,
                   n_samples=1,
                   kl_scaling=None,
                   maxnorm=5.):

        if kl_scaling is None:
            kl_scaling = {}
        if n_samples <= 0:
            raise ValueError(
                "n_samples should be greater than zero: {}".format(n_samples))

        self.n_samples = n_samples
        self.kl_scaling = kl_scaling

        # from inference.py
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")
        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")
        self.scale = scale

        self.transformations = {}
        if auto_transform:
            latent_vars = self.latent_vars.copy()
            self.latent_vars = {}
            self.latent_vars_unconstrained = {}
            for z, qz in six.iteritems(latent_vars):
                if hasattr(z, 'support') and hasattr(qz, 'support') and \
                        z.support != qz.support and qz.support != 'point':

                    z_unconstrained = transform(z)
                    self.transformations[z] = z_unconstrained

                    if qz.support == "points":
                        qz_unconstrained = qz
                    else:
                        qz_unconstrained = transform(qz)
                    self.latent_vars_unconstrained[
                        z_unconstrained] = qz_unconstrained

                    if z_unconstrained != z:
                        qz_constrained = transform(
                            qz_unconstrained,
                            bijectors.Invert(z_unconstrained.bijector))

                        try:
                            qz_constrained.params = \
                                    z_unconstrained.bijector.inverse(
                                        qz_unconstrained.params)
                        except:
                            pass
                    else:
                        qz_constrained = qz_unconstrained

                    self.latent_vars[z] = qz_constrained
                else:
                    self.latent_vars[z] = qz
                    self.latent_vars_unconstrained[z] = qz
            del latent_vars

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.expanduser(logdir)
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._summary_key = tf.get_default_graph().unique_name("summaries")
            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        self.reset = [tf.variables_initializer([self.t])]

        # from variational_inference.py
        if var_list is None:
            var_list = set()
            trainables = tf.trainable_variables()
            for z, qz in six.iteritems(self.latent_vars):
                var_list.update(get_variables(z, collection=trainables))
                var_list.update(get_variables(qz, collection=trainables))

            for x, qx in six.iteritems(self.data):
                if isinstance(x, RandomVariable) and \
                        not isinstance(qx, RandomVariable):
                    var_list.update(get_variables(x, collection=trainables))

        var_list = list(var_list)

        self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

        clipped_grads_and_vars = []
        for grad, var in grads_and_vars:
            if "kernel" in var.name or "bias" in var.name:
                clipped_grads_and_vars.append((tf.clip_by_norm(grad,
                                                               maxnorm,
                                                               axes=[0]), var))
            else:
                clipped_grads_and_vars.append((grad, var))
        # for grad, var in grads_and_vars:
        #     clipped_grads_and_vars.append(
        #         (tf.clip_by_value(grad, -1000., 1000.), var))
        del grads_and_vars

        if self.logging:
            tf.summary.scalar("loss",
                              self.loss,
                              collections=[self._summary_key])
        for grad, var in clipped_grads_and_vars:
            tf.summary.histogram("gradient/" + var.name.replace(':', '/'),
                                 grad,
                                 collections=[self._summary_key])
            tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'),
                              tf.norm(grad),
                              collections=[self._summary_key])

        self.summarize = tf.summary.merge_all(key=self._summary_key)

        if optimizer is None and global_step is None:
            global_step = tf.Variable(0, trainable=False, name="global_step")

        if isinstance(global_step, tf.Variable):
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step,
                                                       100,
                                                       0.9,
                                                       staircase=True)
        else:
            learning_rate = 0.01

        # Build optimizer.
        if optimizer is None:
            optimizer = tf.train.AdamOptimizer(learning_rate)
        elif isinstance(optimizer, str):
            if optimizer == 'gradientdescent':
                optimizer = tf.train.GradientDescentOptimizer(learning_rate)
            elif optimizer == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer(learning_rate)
            elif optimizer == 'adagrad':
                optimizer = tf.train.AdagradOptimizer(learning_rate)
            elif optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
            elif optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            elif optimizer == 'ftrl':
                optimizer = tf.train.FtrlOptimizer(learning_rate)
            elif optimizer == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(learning_rate)
            else:
                raise ValueError('Optimizer class not found:', optimizer)
        elif not isinstance(optimizer, tf.train.Optimizer):
            raise TypeError(
                "Optimizer must be str, tf.train.Optimizer, or None.")

        with tf.variable_scope(None, default_name="optimizer") as scope:
            if not use_prettytensor:
                self.train = optimizer.apply_gradients(clipped_grads_and_vars,
                                                       global_step=global_step)
            else:
                import prettytensor as pt
                self.train = pt.apply_optimizer(optimizer,
                                                losses=[self.loss],
                                                global_step=global_step,
                                                var_list=var_list)

        self.reset.append(
            tf.variables_initializer(
                tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                  scope=scope.name)))
def main(arg):
    results = {'train_errors':[], 'cv_errors':[],'test_errors':[]}

    path, errors_pretty, mdl_dir, json_file = set_experiment_folders(arg)

    # try to make directory, if it exists do NOP
    mtf.make_and_check_dir(path=path)
    mtf.make_and_check_dir(path=path+mdl_dir)
    # JSON results structure
    #results_dic = mtf.fill_results_dic_with_np_seed(np_rnd_seed=np.random.get_state(), results=results)

    ## Data sets and task
    print( '----====> TASK NAME: %s' % arg.task_name )
    (X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(arg.task_name)
    if arg.data_normalize == 'normalize_input':
        X_train, X_cv, X_test = preprocessing.scale(X_train), preprocessing.scale(X_cv), preprocessing.scale(X_test)

    (N_train,D) = X_train.shape
    (N_test,D_out) = Y_test.shape
    print( '(N_train,D) = ', (N_train,D) )
    print( '(N_test,D_out) = ', (N_test,D_out) )

    ##
    units_list = arg.units_list
    dims = [D]+arg.units_list+[D_out]
    mu = arg.W_mu_init(dims, arg)
    std = arg.W_std_init(dims, arg)

    b_init = arg.b_init()

    phase_train = tf.placeholder(tf.bool, name='phase_train') if arg.bn else  None

    steps = np.random.randint(low=arg.steps_low ,high=arg.steps_high)
    M = np.random.randint(low=arg.M_low , high=arg.M_high)
    arg.M = M

    log_learning_rate = np.random.uniform(low=arg.low_log_const_learning_rate, high=arg.high_log_const_learning_rate)
    starter_learning_rate = 10**log_learning_rate
    print( '++> starter_learning_rate ', starter_learning_rate )

    ## decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
    decay_rate = np.random.uniform(low=arg.decay_rate_low, high=arg.decay_rate_high)
    arg.decay_steps_high
    decay_steps = np.random.randint(low=arg.decay_steps_low(arg), high=arg.decay_steps_high(arg) )
    staircase = arg.staircase

    if optimization_alg == 'GD':
        pass
    elif optimization_alg=='Momentum':
        use_nesterov = arg.use_nesterov
        momentum=np.random.uniform(low=0.1, high=0.99)
        results['momentum']=float(momentum)
    elif optimization_alg == 'Adadelta':
        rho=np.random.uniform(low=0.4, high=0.99)
        results['rho']=float(rho)
    elif optimization_alg == 'Adagrad':
        #only has learning rate
        pass
    elif optimization_alg == 'Adam':
        beta1 = arg.get_beta1(arg)
        beta2 = arg.get_beta2(arg)
        results['beta1']=float(beta1)
        results['beta2']=float(beta2)
    elif optimization_alg == 'RMSProp':
        decay = np.random.uniform(low=arg.decay_loc,high=arg.decay_high)
        momentum = np.random.uniform(low=arg.momentum_low,high=arg.momentum_high)
        results['decay']=float(decay)
        results['momentum']=float(momentum)
    else:
        pass
    results['range_learning_rate'] = [low_const_learning_rate, high_const_learning_rate]

    ##############################
    if task_name == 'task_MNIST_flat_auto_encoder':
        PCA_errors = {12:24.8254684915, 48:9.60052317906, 96:4.72118325768}
        if len(units_list) == 1:
            k = units_list[0]
        else:
            k = units_list[0] * len(units_list)
        if not k in PCA_errors.keys():
            print( 'COMPUTING PCA... k = ', k)
            X_reconstruct_pca, _, _ = mtf. get_reconstruction_pca(X_train,k=units_list[0])
            pca_error = mtf.report_l2_loss(Y=X_train,Y_pred=X_reconstruct_pca)
            PCA_errors[k] = pca_error
        else:
            pca_error = PCA_errors[k]
        print( '*************> PCA error: ', pca_error)
    else:
        pca_error = None
        rbf_error = None

    hbf1_error = None
    if model == 'hbf':
        #error, Y_pred, Kern, C, subsampled_data_points = report_RBF_error_from_data(X_train, dims, stddev)
        if len(units_list) > 1:
            k = units_list[0]*len(units_list)
            print( 'RBF units = ', k)
            nb_units = [None, k]
            rbf_error, _, _, _, _ = mtf.report_RBF_error_from_data(X_train, X_train, nb_units, S_init[1])
            print( rbf_error)
            hbf1={12:26.7595}
            if k in hbf1.keys():
                hbf1_error = hbf1[k]
        else:
            nb_units = dims
            rbf_error, _, _, _, _ = mtf.report_RBF_error_from_data(X_train, X_train, nb_units, S_init[1])

    S_init = b_init
    ##

    ## Make Model
    nb_layers = len(dims)-1
    nb_hidden_layers = nb_layers-1
    print( '-----> Running model: %s. (nb_hidden_layers = %d, nb_layers = %d)' % (model,nb_hidden_layers,nb_layers) )
    print( '-----> Units: %s)' % (dims) )
    if model == 'standard_nn':
        rbf_error = None
        #tensorboard_data_dump = '/tmp/standard_nn_logs'
        float_type = tf.float64
        x = tf.placeholder(float_type, shape=[None, D], name='x-input') # M x D
        (inits_C,inits_W,inits_b) = mtf.get_initilizations_standard_NN(init_type=init_type,dims=dims,mu=mu,std=std,b_init=b_init,S_init=S_init, X_train=X_train, Y_train=Y_train)
        with tf.name_scope("standardNN") as scope:
            mdl = mtf.build_standard_NN(x,dims,(inits_C,inits_W,inits_b),phase_train,trainable_bn)
            mdl = mtf.get_summation_layer(l=str(nb_layers),x=mdl,init=inits_C[0])
        inits_S = inits_b
    elif model == 'hbf':
        trainable_S = True if (arg.trainable_S=='train_S') else False
        #tensorboard_data_dump = '/tmp/hbf_logs'
        float_type = tf.float64
        x = tf.placeholder(float_type, shape=[None, D], name='x-input') # M x D
        (inits_C,inits_W,inits_S,rbf_error) = mtf.get_initilizations_HBF(init_type=init_type,dims=dims,mu=mu,std=std,b_init=b_init,S_init=S_init, X_train=X_train, Y_train=Y_train, train_S_type=train_S_type)
        print(inits_W)
        with tf.name_scope("HBF") as scope:
            mdl = mtf.build_HBF2(x,dims,(inits_C,inits_W,inits_S),phase_train,trainable_bn,trainable_S)
            mdl = mtf.get_summation_layer(l=str(nb_layers),x=mdl,init=inits_C[0])
    elif model == 'binary_tree_4D_conv':
        print( 'binary_tree_4D')
        #tensorboard_data_dump = '/tmp/hbf_logs'
        inits_S = None
        pca_error = None
        rbf_error = None
        float_type = tf.float32
        # things that need reshaping
        N_cv = X_cv.shape[0]
        N_test = X_test.shape[0]
        #
        X_train = X_train.reshape(N_train,1,D,1)
        #Y_train = Y_train.reshape(N_train,1,D,1)
        X_cv = X_cv.reshape(N_cv,1,D,1)
        #Y_cv = Y_cv.reshape(N_cv,1,D,1)
        X_test = X_test.reshape(N_test,1,D,1)
        #Y_test = Y_test.reshape(N_test,1,D,1)
        x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input')
        #
        filter_size = 2 #fixed for Binary Tree BT
        #nb_filters = nb_filters
        mean, stddev = bn_tree_init_stats
        stddev = float( np.random.uniform(low=0.001, high=stddev) )
        print( 'stddev', stddev)
        x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input')
        with tf.name_scope("build_binary_model") as scope:
            mdl = mtf.build_binary_tree(x,filter_size,nb_filters,mean,stddev,stride_convd1=2,phase_train=phase_train,trainable_bn=trainable_bn)
        #
        dims = [D]+[nb_filters]+[D_out]
        results['nb_filters'] = nb_filters
    elif model == 'binary_tree_D8':
        #tensorboard_data_dump = '/tmp/hbf_logs'
        inits_S = None
        pca_error = None
        rbf_error = None
        float_type = tf.float32
        # things that need reshaping
        N_cv = X_cv.shape[0]
        N_test = X_test.shape[0]
        #
        X_train = X_train.reshape(N_train,1,D,1)
        #Y_train = Y_train.reshape(N_train,1,D,1)
        X_cv = X_cv.reshape(N_cv,1,D,1)
        #Y_cv = Y_cv.reshape(N_cv,1,D,1)
        X_test = X_test.reshape(N_test,1,D,1)
        #Y_test = Y_test.reshape(N_test,1,D,1)
        x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input')
        #
        filter_size = 2 #fixed for Binary Tree BT
        nb_filters1,nb_filters2 = nb_filters
        mean1,stddev1,mean2,stddev2,mean3,stddev3 = bn_tree_init_stats
        x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input')
        with tf.name_scope("binary_tree_D8") as scope:
            mdl = mtf.build_binary_tree_8D(x,nb_filters1,nb_filters2,mean1,stddev1,mean2,stddev2,mean3,stddev3,stride_conv1=2)
        #
        dims = [D]+nb_filters+[D_out]
        results['nb_filters'] = nb_filters

    ## Output and Loss
    y = mdl
    y_ = tf.placeholder(float_type, shape=[None, D_out]) # (M x D)
    with tf.name_scope("L2_loss") as scope:
        l2_loss = tf.reduce_sum( tf.reduce_mean(tf.square(y_-y), 0) )
        #l2_loss = (2.0/N_train)*tf.nn.l2_loss(y_-y)
        #l2_loss = tf.reduce_mean(tf.square(y_-y))

    ##

    with tf.name_scope("train") as scope:
        # starter_learning_rate = 0.0000001
        # decay_rate = 0.9
        # decay_steps = 100
        # staircase = True
        # decay_steps = 10000000
        # staircase = False
        # decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
        global_step = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(learning_rate=starter_learning_rate, global_step=global_step,decay_steps=decay_steps, decay_rate=decay_rate, staircase=staircase)

        # Passing global_step to minimize() will increment it at each step.
        if optimization_alg == 'GD':
            opt = tf.train.GradientDescentOptimizer(learning_rate)
        elif optimization_alg == 'Momentum':
            opt = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=momentum,use_nesterov=use_nesterov)
        elif optimization_alg == 'Adadelta':
            tf.train.AdadeltaOptimizer(learning_rate=learning_rate, rho=rho, epsilon=1e-08, use_locking=False, name='Adadelta')
        elif optimization_alg == 'Adam':
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=1e-08, name='Adam')
        elif optimization_alg == 'Adagrad':
            opt = tf.train.AdagradOptimizer(learning_rate)
        elif optimization_alg == 'RMSProp':
            opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=decay, momentum=momentum, epsilon=1e-10, name='RMSProp')

    ##
    if re_train == 're_train' and task_name == 'hrushikesh':
        print( 'task_name: ', task_name)
        print( 're_train: ', re_train)
        var_list = [v for v in tf.all_variables() if v.name == 'C:0']
        #train_step = opt.minimize(l2_loss, var_list=var_list)
    else:
        train_step = opt.minimize(l2_loss, global_step=global_step)

    ##
    with tf.name_scope("l2_loss") as scope:
        ls_scalar_summary = tf.scalar_summary("l2_loss", l2_loss)

    if task_name == 'task_MNIST_flat_auto_encoder':
        with tf.name_scope('input_reshape'):
            x_image = tf.to_float(x, name='ToFloat')
            image_shaped_input_x = tf.reshape(x_image, [-1, 28, 28, 1])
            # tf.image_summary(tag, tensor, max_images=3, collections=None, name=None)
            tf.image_summary('input', image_shaped_input_x, 10)

        with tf.name_scope('reconstruct'):
            y_image = tf.to_float(y, name='ToFloat')
            image_shaped_input_y = tf.reshape(x_image, [-1, 28, 28, 1])
            # tf.image_summary(tag, tensor, max_images=3, collections=None, name=None)
            tf.image_summary('reconstruct', image_shaped_input_y, 10)

    def register_all_variables_and_grads(y):
        all_vars = tf.all_variables()
        grad_vars = opt.compute_gradients(y,all_vars) #[ (gradient,variable) ]
        for (dldw,v) in grad_vars:
            if dldw != None:
                prefix_name = 'derivative_'+v.name
                suffix_text = 'dJd'+v.name
                #mtf.put_summaries(var=tf.sqrt( tf.reduce_sum(tf.square(dldw)) ),prefix_name=prefix_name,suffix_text=suffix_text)
                mtf.put_summaries(var=tf.abs(dldw),prefix_name=prefix_name,suffix_text='_abs_'+suffix_text)
                tf.histogram_summary('hist'+prefix_name, dldw)

    register_all_variables_and_grads(y)
    ## TRAIN
    if phase_train is not None:
        #DO BN
        feed_dict_train = {x:X_train, y_:Y_train, phase_train: False}
        feed_dict_cv = {x:X_cv, y_:Y_cv, phase_train: False}
        feed_dict_test = {x:X_test, y_:Y_test, phase_train: False}
    else:
        #Don't do BN
        feed_dict_train = {x:X_train, y_:Y_train}
        feed_dict_cv = {x:X_cv, y_:Y_cv}
        feed_dict_test = {x:X_test, y_:Y_test}

    def get_batch_feed(X, Y, M, phase_train):
        mini_batch_indices = np.random.randint(M,size=M)
        Xminibatch =  X[mini_batch_indices,:] # ( M x D^(0) )
        Yminibatch = Y[mini_batch_indices,:] # ( M x D^(L) )
        if phase_train is not None:
            #DO BN
            feed_dict = {x: Xminibatch, y_: Yminibatch, phase_train: True}
        else:
            #Don't do BN
            feed_dict = {x: Xminibatch, y_: Yminibatch}
        return feed_dict

    def print_messages(*args):
        for i, msg in enumerate(args):
            print('>',msg)

    if use_tensorboard:
        if tf.gfile.Exists('/tmp/mdl_logs'):
          tf.gfile.DeleteRecursively('/tmp/mdl_logs')
        tf.gfile.MakeDirs('/tmp/mdl_logs')

    tf.add_check_numerics_ops()

    # Add ops to save and restore all the variables.
    if mdl_save:
        saver = tf.train.Saver(max_to_keep=max_to_keep)
    start_time = time.time()
    file_for_error = './ray_error_file.txt'
    #with open(path+errors_pretty, 'w+') as f_err_msgs:
    with open(file_for_error, 'w+') as f_err_msgs:
        with tf.Session() as sess:
            ## prepare writers and fetches
            if use_tensorboard:
                merged = tf.merge_all_summaries()
                #writer = tf.train.SummaryWriter(tensorboard_data_dump, sess.graph)
                train_writer = tf.train.SummaryWriter(tensorboard_data_dump_train, sess.graph)
                test_writer = tf.train.SummaryWriter(tensorboard_data_dump_test, sess.graph)
                ##
                fetches_train = [merged, l2_loss]
                fetches_cv = l2_loss
                fetches_test = [merged, l2_loss]
            else:
                fetches_train = l2_loss
                fetches_cv = l2_loss
                fetches_test = l2_loss

            sess.run( tf.initialize_all_variables() )
            for i in range(steps):
                ## Create fake data for y = W.x + b where W = 2, b = 0
                #(batch_xs, batch_ys) = get_batch_feed(X_train, Y_train, M, phase_train)
                feed_dict_batch = get_batch_feed(X_train, Y_train, M, phase_train)
                ## Train
                if i%report_error_freq == 0:
                    if use_tensorboard:
                        (summary_str_train,train_error) = sess.run(fetches=fetches_train, feed_dict=feed_dict_train)
                        cv_error = sess.run(fetches=fetches_cv, feed_dict=feed_dict_cv)
                        (summary_str_test,test_error) = sess.run(fetches=fetches_test, feed_dict=feed_dict_test)

                        train_writer.add_summary(summary_str_train, i)
                        test_writer.add_summary(summary_str_test, i)
                    else:
                        train_error = sess.run(fetches=fetches_train, feed_dict=feed_dict_train)
                        cv_error = sess.run(fetches=fetches_cv, feed_dict=feed_dict_cv)
                        test_error = sess.run(fetches=fetches_test, feed_dict=feed_dict_test)

                    loss_msg = "Mdl*%s%s*-units%s, task: %s, step %d/%d, train err %g, cv err: %g test err %g"%(model,nb_hidden_layers,dims,task_name,i,steps,train_error,cv_error,test_error)
                    mdl_info_msg = "Opt:%s, BN %s, BN_trainable: %s After%d/%d iteration,Init: %s" % (optimization_alg,bn,trainable_bn,i,steps,init_type)
                    errors_to_beat = 'BEAT: hbf1_error: %s RBF error: %s PCA error: %s '%(hbf1_error, rbf_error,pca_error)
                    print_messages(loss_msg, mdl_info_msg, errors_to_beat)
                    #sys.stdout.flush()
                    loss_msg+="\n"
                    mdl_info_msg+="\n"
                    errors_to_beat+="\n"

                    print( 'S: ', inits_S)
                    # store results
                    #print type(train_error)
                    results['train_errors'].append( float(train_error) )
                    #print type(cv_error)
                    results['cv_errors'].append( float(cv_error) )
                    #print type(test_error)
                    results['test_errors'].append( float(test_error) )
                    # write errors to pretty print
                    f_err_msgs.write(loss_msg)
                    f_err_msgs.write(mdl_info_msg)
                    # save mdl
                    if mdl_save:
                        save_path = saver.save(sess, path+mdl_dir+'/model.ckpt',global_step=i)
                if use_tensorboard:
                    sess.run(fetches=[merged,train_step], feed_dict=feed_dict_batch) #sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
                else:
                    sess.run(fetches=train_step, feed_dict=feed_dict_batch) #sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

    git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
    mtf.load_results_dic(results,git_hash=git_hash,dims=dims,mu=mu,std=std,init_constant=init_constant,b_init=b_init,S_init=S_init,\
        init_type=init_type,model=model,bn=bn,path=path,\
        tensorboard_data_dump_test=tensorboard_data_dump_test,tensorboard_data_dump_train=tensorboard_data_dump_train,\
        report_error_freq=report_error_freq,steps=steps,M=M,optimization_alg=optimization_alg,\
        starter_learning_rate=starter_learning_rate,decay_rate=decay_rate,staircase=staircase)

    ##
    results['job_name'] = job_name
    results['slurm_jobid'] = slurm_jobid
    results['slurm_array_task_id'] = slurm_array_task_id
    #results['tf_rand_seed'] = tf_rand_seed
    results['date'] = date
    results['bn'] = bn
    results['trainable_bn'] = trainable_bn

    seconds = (time.time() - start_time)
    minutes = seconds/ 60
    hours = minutes/ 60
    print("--- %s seconds ---" % seconds )
    print("--- %s minutes ---" % minutes )
    print("--- %s hours ---" % hours )
    ## dump results to JSON
    results['seconds'] = seconds
    results['minutes'] = minutes
    results['hours'] = hours
    #print results
    with open(path+json_file, 'w+') as f_json:
        json.dump(results,f_json,sort_keys=True, indent=2, separators=(',', ': '))
    print( '\a') #makes beep
Exemple #25
0
    def build_fast_forward_pass(self, step=0.003):
        self.check_op = tf.add_check_numerics_ops()
        computations = []
        bob = 1
        if self.node_layers[0][0].t == 'b':
            bob = 2
        with tf.name_scope("input"):
            self.input = tf.placeholder(dtype=tf.float64,
                                        shape=(None, max(self.input_order) + 1,
                                               bob),
                                        name='Input')
        # self.input = tf.placeholder(dtype=tf.float64,
        #                                  shape=(len(self.input_order)*2), name='Input')
        #the input to be appended to each layer
        input_splits = []
        self.conz = tf.placeholder(shape=[1], dtype=tf.float64)
        #compute the input
        weights = []
        with tf.name_scope("projection"):
            n = tf.constant(0.0001, dtype=tf.float64)
            for L in range(len(self.weights)):
                if L != 0:
                    drop = tf.round(
                        tf.random_uniform(self.weights[L].get_shape(),
                                          self.conz,
                                          1.0,
                                          dtype=tf.float64))
                    weights.append(
                        tf.add(
                            tf.nn.relu(tf.subtract(self.weights[L] * drop, n)),
                            n))
                else:
                    weights.append(
                        tf.add(tf.nn.relu(tf.subtract(self.weights[L], n)), n))

        with tf.name_scope('nomralization'):
            self.sum_of_weights = [
                tf.segment_sum(x, y) if x.get_shape()[0] > 0 else None
                for x, y in zip(weights, self.inds)
            ]
            sum_of_weights = self.sum_of_weights
            self.norm_weights = [
                tf.div(x, tf.gather(y, z)) if x.get_shape()[0] > 0 else None
                for x, y, z in zip(weights, self.sum_of_weights, self.inds)
            ]

        with tf.name_scope('LEAFS_' + str(len(self.input_order))):
            input_gather = tf.reshape(tf.transpose(
                tf.gather(tf.transpose(self.input, (1, 0, 2)),
                          self.input_swap), (1, 0, 2)),
                                      shape=(-1, len(self.input_order) * bob))
            self.counting.append(input_gather)
            if self.node_layers[0][0].t == 'b':  #if contiuous
                input_computation_w = tf.multiply(input_gather, weights[0])
                input_computation_s = tf.transpose(
                    tf.segment_sum(tf.transpose(input_computation_w),
                                   self.inds[0]))
                input_computation_n = tf.log(
                    tf.div(input_computation_s, sum_of_weights[0]))
                computations.append(input_computation_n)
            else:
                pi = tf.constant(np.pi, tf.float64)
                mus = self.cont[0]
                sigs = tf.nn.relu(self.cont[1] - 0.01
                                  ) + 0.01  #sigma can't be smaller than 0.01
                #gassian formula
                input_computation_g = tf.div(
                    tf.exp(
                        tf.negative(
                            tf.div(tf.square(input_gather - mus),
                                   2 * tf.multiply(sigs, sigs)))),
                    tf.sqrt(2 * pi) * sigs) + 0.000001
                input_computation_n = tf.log(input_computation_g)
                computations.append(input_computation_n)

        #split the input computation and figure out which one goes in each layer
            j = 0
            for i in range(len(self.input_layers)):
                a = tf.constant(j)
                b = self.input_layers[i]
                input_splits.append(
                    tf.slice(input_computation_n, [0, a], [-1, b]))
                j += b

        current_computation = input_splits[0]

        for i in range(len(self.node_layers[1:])):
            L = i + 1  #the layer number

            if self.weights[L].get_shape()[0] == 0:  #product
                with tf.name_scope("PRD" + str(self.inds[L].get_shape()[0])):
                    #do a segment sum in the log domain
                    current_computation = tf.transpose(
                        tf.segment_sum(tf.transpose(current_computation),
                                       self.inds[L]))

            else:
                with tf.name_scope("SUM" + str(self.inds[L].get_shape()[0])):
                    self.counting.append(
                        current_computation)  #stats for counting and cccp

                    #get the max at each node
                    maxes = tf.transpose(
                        tf.segment_max(tf.transpose(current_computation),
                                       self.inds[L]))
                    back_maxes = tf.transpose(
                        tf.gather(tf.transpose(maxes), self.inds[L]))

                    #sub the max at each node
                    current_computation = tf.subtract(current_computation,
                                                      back_maxes)
                    #get out of log domain
                    current_computation = tf.exp(current_computation)
                    #multiply by weights
                    current_computation = tf.multiply(current_computation,
                                                      weights[L])
                    #compute sum node
                    current_computation = tf.transpose(
                        tf.segment_sum(tf.transpose(current_computation),
                                       self.inds[L]))
                    #normalize
                    current_computation = tf.div(current_computation,
                                                 sum_of_weights[L])
                    #re-add the maxes that we took out after entering log domain
                    current_computation = tf.add(tf.log(current_computation),
                                                 maxes)
                    #concatenate with inputs for the next layer
                    current_computation = tf.concat(
                        1, [current_computation, input_splits[L]])
                    #shuffle so that next node is ready
                    current_computation = tf.transpose(
                        tf.gather(tf.transpose(current_computation),
                                  self.shuffle[L]))

            computations.append(current_computation)
        with tf.name_scope('root_node'):
            self.output = current_computation
        with tf.name_scope('loss'):
            if self.multiclass:
                self.labels = tf.placeholder(shape=(None,
                                                    len(self.node_layers[-1])),
                                             dtype=tf.float64)
                self.loss = -tf.reduce_mean(
                    tf.multiply(self.output, 0.1 *
                                (self.labels - 1) + self.labels))
            else:
                self.loss = -tf.reduce_mean(self.output)
            self.loss_summary = tf.scalar_summary(self.summ, self.loss)
        self.opt_val = self.optimizer(0.001).minimize(self.loss)
        self.computations = computations
Exemple #26
0
  def initialize(self, n_iter=1000, n_print=None, n_minibatch=None, scale=None,
                 logdir=None, debug=False):
    """Initialize inference algorithm.

    Parameters
    ----------
    n_iter : int, optional
      Number of iterations for algorithm.
    n_print : int, optional
      Number of iterations for each print progress. To suppress print
      progress, then specify 0. Default is ``int(n_iter / 10)``.
    n_minibatch : int, optional
      Number of samples for data subsampling. Default is to use all
      the data. ``n_minibatch`` is available only for TensorFlow,
      Python, and PyMC3 model wrappers; use ``scale`` for Edward's
      language. All data must be passed in as NumPy arrays. For
      subsampling details, see ``tf.train.slice_input_producer`` and
      ``tf.train.batch``.
    scale : dict of RandomVariable to tf.Tensor, optional
      A scalar value to scale computation for any random variable that
      it is binded to. For example, this is useful for scaling
      computations with respect to local latent variables.
    logdir : str, optional
      Directory where event file will be written. For details,
      see ``tf.train.SummaryWriter``. Default is to write nothing.
    debug : bool, optional
      If True, add checks for ``NaN`` and ``Inf`` to all computations
      in the graph. May result in substantially slower execution
      times.
    """
    self.n_iter = n_iter
    if n_print is None:
      self.n_print = int(n_iter / 10)
    else:
      self.n_print = n_print

    self.t = tf.Variable(0, trainable=False)
    self.increment_t = self.t.assign_add(1)

    if scale is None:
      scale = {}
    elif not isinstance(scale, dict):
      raise TypeError()

    self.scale = scale
    self.n_minibatch = n_minibatch
    if n_minibatch is not None and \
       self.model_wrapper is not None and \
       not isinstance(self.model_wrapper, StanModel):
      # Re-assign data to batch tensors, with size given by
      # ``n_minibatch``. Don't do this for random variables in data.
      dict_rv = {}
      dict_data = {}
      for key, value in six.iteritems(self.data):
        if isinstance(value, RandomVariable):
          dict_rv[key] = value
        else:
          dict_data[key] = value

      values = list(six.itervalues(dict_data))
      slices = tf.train.slice_input_producer(values)
      # By default use as many threads as CPUs.
      batches = tf.train.batch(slices, n_minibatch,
                               num_threads=multiprocessing.cpu_count())
      if not isinstance(batches, list):
        # ``tf.train.batch`` returns tf.Tensor if ``slices`` is a
        # list of size 1.
        batches = [batches]

      self.data = {key: value for key, value in
                   zip(six.iterkeys(dict_data), batches)}
      self.data.update(dict_rv)

    if logdir is not None:
      self.logging = True
      self.train_writer = tf.train.SummaryWriter(logdir, tf.get_default_graph())
      self.summarize = tf.merge_all_summaries()
    else:
      self.logging = False

    self.debug = debug
    if self.debug:
      self.op_check = tf.add_check_numerics_ops()
Exemple #27
0
  def initialize(self, n_iter=1000, n_print=None, scale=None,
                 auto_transform=True, logdir=None, log_timestamp=True,
                 log_vars=None, debug=False):
    """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of `Inference` **must** implement this method.
    No methods which build ops should be called outside `initialize()`.

    Args:
      n_iter: int.
        Number of iterations for algorithm when calling `run()`.
        Alternatively if controlling inference manually, it is the
        expected number of calls to `update()`; this number determines
        tracking information during the print progress.
      n_print: int.
        Number of iterations for each print progress. To suppress print
        progress, then specify 0. Default is `int(n_iter / 100)`.
      scale: dict of RandomVariable to tf.Tensor.
        A tensor to scale computation for any random variable that it is
        binded to. Its shape must be broadcastable; it is multiplied
        element-wise to the random variable. For example, this is useful
        for mini-batch scaling when inferring global variables, or
        applying masks on a random variable.
      auto_transform: bool.
        Whether to automatically transform continuous latent variables
        of unequal support to be on the unconstrained space. It is
        only applied if the argument is `True`, the latent variable
        pair are `ed.RandomVariable`s with the `support` attribute,
        the supports are both continuous and unequal.
      logdir: str.
        Directory where event file will be written. For details,
        see `tf.summary.FileWriter`. Default is to log nothing.
      log_timestamp: bool.
        If True (and `logdir` is specified), create a subdirectory of
        `logdir` to save the specific run results. The subdirectory's
        name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
      log_vars: list.
        Specifies the list of variables to log after each `n_print`
        steps. If None, will log all variables. If `[]`, no variables
        will be logged. `logdir` must be specified for variables to be
        logged.
      debug: bool.
        If True, add checks for `NaN` and `Inf` to all computations
        in the graph. May result in substantially slower execution
        times.
    """
    self.n_iter = n_iter
    if n_print is None:
      self.n_print = int(n_iter / 100)
    else:
      self.n_print = n_print

    self.progbar = Progbar(self.n_iter)
    self.t = tf.Variable(0, trainable=False, name="iteration")

    self.increment_t = self.t.assign_add(1)

    if scale is None:
      scale = {}
    elif not isinstance(scale, dict):
      raise TypeError("scale must be a dict object.")

    self.scale = scale

    # map from original latent vars to unconstrained versions
    self.transformations = {}
    if auto_transform:
      latent_vars = self.latent_vars.copy()
      # latent_vars maps original latent vars to constrained Q's.
      # latent_vars_unconstrained maps unconstrained vars to unconstrained Q's.
      self.latent_vars = {}
      self.latent_vars_unconstrained = {}
      for z, qz in six.iteritems(latent_vars):
        if hasattr(z, 'support') and hasattr(qz, 'support') and \
                z.support != qz.support and qz.support != 'point':

          # transform z to an unconstrained space
          z_unconstrained = transform(z)
          self.transformations[z] = z_unconstrained

          # make sure we also have a qz that covers the unconstrained space
          if qz.support == "points":
            qz_unconstrained = qz
          else:
            qz_unconstrained = transform(qz)
          self.latent_vars_unconstrained[z_unconstrained] = qz_unconstrained

          # additionally construct the transformation of qz
          # back into the original constrained space
          if z_unconstrained != z:
            qz_constrained = transform(
                qz_unconstrained, bijectors.Invert(z_unconstrained.bijector))

            try:  # attempt to pushforward the params of Empirical distributions
              qz_constrained.params = z_unconstrained.bijector.inverse(
                  qz_unconstrained.params)
            except:  # qz_unconstrained is not an Empirical distribution
              pass

          else:
            qz_constrained = qz_unconstrained

          self.latent_vars[z] = qz_constrained
        else:
          self.latent_vars[z] = qz
          self.latent_vars_unconstrained[z] = qz
      del latent_vars

    if logdir is not None:
      self.logging = True
      if log_timestamp:
        logdir = os.path.expanduser(logdir)
        logdir = os.path.join(
            logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

      self._summary_key = tf.get_default_graph().unique_name("summaries")
      self._set_log_variables(log_vars)
      self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
    else:
      self.logging = False

    self.debug = debug
    if self.debug:
      self.op_check = tf.add_check_numerics_ops()

    # Store reset ops which user can call. Subclasses should append
    # any ops needed to reset internal variables in inference.
    self.reset = [tf.variables_initializer([self.t])]
    sess = tf.Session()

    # 建立输入节点与vgg16网络
    imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
    target = tf.placeholder("float", [None, 100])
    vgg = vgg16(imgs, '../model/vgg16_weights.npz', sess)
    print('VGG network created')

    # 损失函数与优化器
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=vgg.fc3l,
                                                labels=target))
    print([_.name for _ in vgg.parameters])
    optimizer = tf.train.MomentumOptimizer(learning_rate=0.001,
                                           momentum=0.9).minimize(loss)
    check_op = tf.add_check_numerics_ops()

    # 测评
    correct_prediction = tf.equal(tf.argmax(vgg.fc3l, 1), tf.argmax(target, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    num_correct_preds = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))

    # 加载权重与参数初始化
    sess.run(tf.global_variables_initializer())
    vgg.load_initial_weights(sess)
    print([_.name for _ in vgg.parameters])
    for v in tf.trainable_variables():
        print("Trainable variables", v)
    print('Starting training')

    val_batch_size = 10
    def _model_fn(self, features, labels, mode, params):

        control_dependencies = []
        checks = tf.add_check_numerics_ops()
        control_dependencies = [checks]

        input_frequency_size = self._dct_coefficient_count
        input_time_size = self._spectrogram_length

        first_filter_width = 8
        first_filter_height = 20
        first_filter_count = 64

        second_filter_width = 4
        second_filter_height = 10
        second_filter_count = 64

        fingerprint_input = features[self._feature_type.FEATURE_1]
        labels = tf.cast(labels, tf.int64)

        tf.logging.info(
            "=====> fingerprint_input {}".format(fingerprint_input))
        tf.logging.info("=====> labels {}".format(labels))

        fingerprint_4d = tf.reshape(
            fingerprint_input, [-1, input_time_size, input_frequency_size, 1])

        tf.logging.info("=====> fingerprint_4d {}".format(fingerprint_4d))

        first_weights = tf.Variable(
            tf.truncated_normal([
                first_filter_height, first_filter_width, 1, first_filter_count
            ],
                                stddev=0.01))
        first_bias = tf.Variable(tf.zeros([first_filter_count]))

        first_conv = tf.nn.conv2d(fingerprint_4d, first_weights, [1, 1, 1, 1],
                                  'SAME') + first_bias

        tf.logging.info("=====> first_conv {}".format(first_conv))

        first_relu = tf.nn.relu(first_conv)

        if mode != ModeKeys.INFER:
            first_dropout = tf.nn.dropout(first_relu,
                                          self.sr_config._keep_prob)
        else:
            first_dropout = first_relu

        max_pool = tf.nn.max_pool(first_dropout, [1, 2, 2, 1], [1, 2, 2, 1],
                                  'SAME')

        second_weights = tf.Variable(
            tf.truncated_normal([
                second_filter_height, second_filter_width, first_filter_count,
                second_filter_count
            ],
                                stddev=0.01))
        second_bias = tf.Variable(tf.zeros([second_filter_count]))
        second_conv = tf.nn.conv2d(max_pool, second_weights, [1, 1, 1, 1],
                                   'SAME') + second_bias
        second_relu = tf.nn.relu(second_conv)

        tf.logging.info("=====> second_conv {}".format(second_conv))

        if mode != ModeKeys.INFER:
            second_dropout = tf.nn.dropout(second_relu,
                                           self.sr_config._keep_prob)
        else:
            second_dropout = second_relu

        second_conv_shape = second_dropout.get_shape()
        second_conv_output_width = second_conv_shape[2]
        second_conv_output_height = second_conv_shape[1]
        second_conv_element_count = int(second_conv_output_width *
                                        second_conv_output_height *
                                        second_filter_count)
        flattened_second_conv = tf.reshape(second_dropout,
                                           [-1, second_conv_element_count])

        final_fc_weights = tf.Variable(
            tf.truncated_normal(
                [second_conv_element_count, self.sr_config._num_classes],
                stddev=0.01))

        final_fc_bias = tf.Variable(tf.zeros([self.sr_config._num_classes]))
        logits = tf.matmul(flattened_second_conv,
                           final_fc_weights) + final_fc_bias

        tf.logging.info("=====> logits {}".format(logits))

        if mode != ModeKeys.INFER:
            with tf.name_scope('cross_entropy'):
                losses = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                                logits=logits)
            tf.logging.info("=====> losses {}".format(losses))
        classes = tf.argmax(logits, 1)
        predictions = {"classes": classes}

        tf.logging.info("=====> classes {}".format(classes))

        # Loss, training and eval operations are not needed during inference.
        loss = None
        train_op = None
        eval_metric_ops = {}

        if mode != ModeKeys.INFER:
            with tf.name_scope(
                    'train-optimization'
            ):  #, tf.control_dependencies(control_dependencies):
                global_step = tf.train.get_global_step()
                learning_rate = self.sr_config._learning_rate
                train_op = tf.contrib.layers.optimize_loss(
                    loss=losses,
                    global_step=global_step,
                    optimizer=tf.train.GradientDescentOptimizer,
                    learning_rate=learning_rate)

                loss = losses

            correct_prediction = tf.equal(predictions["classes"], labels)
            confusion_matrix = tf.confusion_matrix(
                labels,
                predictions["classes"],
                num_classes=self.sr_config._num_classes)

            eval_metric_ops = {
                'Accuracy':
                tf.metrics.accuracy(labels=tf.cast(labels, tf.int32),
                                    predictions=predictions["classes"],
                                    name='accuracy'),
                'Precision':
                tf.metrics.precision(labels=tf.cast(labels, tf.int32),
                                     predictions=predictions["classes"],
                                     name='Precision'),
                'Recall':
                tf.metrics.recall(labels=tf.cast(labels, tf.int32),
                                  predictions=predictions["classes"],
                                  name='Recall')
            }

        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            loss=loss,
            train_op=train_op,
            eval_metric_ops=eval_metric_ops,
            # training_hooks=self.hooks
        )
Exemple #30
0
def inference(image, bounding_box, bbox_type, bbox_character):


    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        initial /= 10.0
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(0.01, shape=shape)
        return tf.Variable(initial)

    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')

    def max_pool_2x2(x):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


    '''
    input must be list type.
    each elment in list is np.ndarray type which assign 1 hangul.
    '''

    input_data = []

    for row in bounding_box:
        input_row = []
        for x1,y1,x2,y2 in row:
            input_row.append(load_english_dataset.read_inference_data(image[x1:x2, y1:y2]))
        input_data.append(input_row)


    sess = tf.InteractiveSession()
    x = tf.placeholder("float", shape=[1,32*32*1]) #[32*32*1]
    y_ = tf.placeholder("float", shape=[1,class_size]) #[66], equal to number of Hangul

    x_image = tf.reshape(x, [-1,32,32,1])

    W_conv0 = weight_variable([3,3,1,16])
    b_conv0 = bias_variable([16])

    h_conv0 = tf.nn.relu(conv2d(x_image, W_conv0) + b_conv0) #h_conv0=[1,32*32*16]

    W_conv1 = weight_variable([3,3,16,32])
    b_conv1 = bias_variable([32])

    #x_image = tf.reshape(x, [-1,32,32,1])

    h_conv1 = tf.nn.relu(conv2d(h_conv0, W_conv1) + b_conv1) #h_conv1=[1,32*32*32]
    h_pool1 = max_pool_2x2(h_conv1) #h_pool1=[1,16*16*32]

    W_conv2 = weight_variable([3,3,32,64])
    b_conv2 = bias_variable([64])

    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) #h_conv2=[16*16*64]
    h_pool2 = max_pool_2x2(h_conv2) #h_pool2=[8*8*64]


    W_conv3 = weight_variable([3,3,64,64])
    b_conv3 = bias_variable([64])

    h_conv3 = tf.nn.relu(conv2d(h_pool2,W_conv3) + b_conv3) #h_conv3=[8*8*64]

    W_fc1 = weight_variable([8*8*64, 1024])
    b_fc1 = bias_variable([1024])

    h_conv3_flat = tf.reshape(h_conv3, [-1, 8*8*64])

    h_fc1 = tf.nn.relu(tf.matmul(h_conv3_flat, W_fc1) + b_fc1)

    keep_prob = tf.placeholder("float")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)


    # [66] is number of Hangul
    W_fc2 = weight_variable([1024,512])
    b_fc2 = bias_variable([512])

    h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop,W_fc2) + b_fc2)

    h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)

    W_fc3 = weight_variable([512,class_size])
    b_fc3 = bias_variable([class_size])

    y_conv = tf.nn.softmax(tf.matmul(h_fc2_drop, W_fc3) + b_fc3)

    cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv + 1e-7))

    global_step = tf.Variable(0, trainable=False)
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(y_conv,1))

    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    sess.run(tf.initialize_all_variables())

    saver = tf.train.Saver(tf.all_variables())

    start_time = time.time()

    check_op = tf.add_check_numerics_ops()

    print '*************************************************'
    check_file = checkpoint_dir + 'hangul_shortcut.ckpt'
    saver.restore(sess, check_file)

    i = 0
    for row in input_data:
        j = -1
        for input_box in row:
            """
            only takes hangul character.
            """
            j += 1
            if bbox_type[i][j] > 6 : continue


            part_image_list = divide_image(image_orig=input_box, type_of_box=int(bbox_type[i][j]))
            part_image_list = getting_zero_padding_image(partition_image=part_image_list)


            concated_list = []
            for part_image in part_image_list:
                part_image_label = sess.run(tf.argmax(y_conv, 1), feed_dict={x:part_image, keep_prob:1.0})

                concated_list.extend(part_image_label)

            a = raw_input()


            print 'concated_list is >>> ',
            print concated_list
            bbox_character[i][j] = concat.concatenate(hangul_type=int(bbox_type[i][j]), concat_list=concated_list)
        i += 1

        bbox_character_file = open(pickle_path + 'bbox+_character.txt', 'w')
        pickle.dump(bbox_character, bbox_character_file)
        bbox_character_file.close()

    sess.close()

    return concated_list
def model(X_train,
          Y_train,
          X_test,
          Y_test,
          learning_rate=0.0001,
          num_epochs=400000,
          minibatch_size=32,
          print_cost=True):
    """
    Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.

    Arguments:
    X_train -- training set, of shape (input size = 12288, number of training examples = 1080)
    Y_train -- test set, of shape (output size = 6, number of training examples = 1080)
    X_test -- training set, of shape (input size = 12288, number of training examples = 120)
    Y_test -- test set, of shape (output size = 6, number of test examples = 120)
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs

    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    ops.reset_default_graph()
    (
        n_x, m
    ) = X_train.shape  # (n_x: input size, m : number of examples in the train set)
    n_y = Y_train.shape[0]  # n_y : output size
    costs = []  # To keep track of the cost
    # Create Placeholders of shape (n_x, n_y)
    X, Y = create_placeholders(n_x, n_y)

    # Initialize parameters
    parameters = initialize_parameters()

    # Forward propagation: Build the forward propagation in the tensorflow graph
    Z3 = forward_propagation(X, parameters)

    # Z3 = tf.Print(Z3,[tf.shape(Z3)], message="my Z-values:")
    # Y = tf.Print(Y,[tf.shape(Y)], message="my Y-values:")
    # Cost function: Add cost function to tensorflow graph
    cost = compute_cost(Z3, Y)

    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(cost)

    # Initialize all the variables
    init = tf.global_variables_initializer()

    check_op = tf.add_check_numerics_ops()
    # Start the session to compute the tensorflow graph
    saver = tf.train.Saver()

    with tf.Session() as sess:
        # Run the initialization
        sess.run(init)
        # saver.restore(sess, "./model.ckpt")

        # Do the training loop
        for epoch in range(num_epochs):

            epoch_cost = 0.  # Defines a cost related to an epoch
            num_minibatches = int(
                m / minibatch_size
            )  # number of minibatches of size minibatch_size in the train set
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch

                # IMPORTANT: The line that runs the graph on a minibatch.
                # Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
                ### START CODE HERE ### (1 line)
                # print(minibatch_X.shape)
                _, minibatch_cost, _ = sess.run([optimizer, cost, check_op],
                                                feed_dict={
                                                    X: minibatch_X,
                                                    Y: minibatch_Y
                                                })
                ### END CODE HERE ###
                # print(minibatch_cost)
                epoch_cost += minibatch_cost / num_minibatches

            # Print the cost every epoch
            if print_cost == True and epoch % 100 == 0:
                print("Cost after epoch %i: %f" % (epoch, epoch_cost))
                save_path = saver.save(sess, "./model.ckpt")
                print("Model saved in file: %s" % save_path)
            if print_cost == True and epoch % 5 == 0:
                costs.append(epoch_cost)

        # plot the cost
        # plt.plot(np.squeeze(costs))
        # plt.ylabel('cost')
        # plt.xlabel('iterations (per tens)')
        # plt.title("Learning rate =" + str(learning_rate))
        # plt.show()

        # lets save the parameters in a variable
        parameters = sess.run(parameters)
        print("Parameters have been trained!")

        # Calculate the correct predictions
        # correct_prediction = tf.sqrt(tf.squared_difference(Z3, Y))

        # Calculate accuracy on the test set
        # accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        accuracy = tf.reduce_mean(tf.div(tf.abs(tf.subtract(Y, Z3)), Y))
        print("Train Error:", accuracy.eval({X: X_train, Y: Y_train}))
        print("Test Error:", accuracy.eval({X: X_test, Y: Y_test}))

        return parameters
def tf_record_CNN_spherical(tone_version,
                            itd_tones,
                            ild_tones,
                            manually_added,
                            freq_label,
                            sam_tones,
                            transposed_tones,
                            precedence_effect,
                            narrowband_noise,
                            all_positions_bkgd,
                            background_textures,
                            testing,
                            branched,
                            zero_padded,
                            stacked_channel,
                            model_version,
                            num_epochs,
                            train_path_pattern,
                            bkgd_train_path_pattern,
                            arch_ID,
                            config_array,
                            files,
                            num_files,
                            newpath,
                            regularizer,
                            SNR_max=40,
                            SNR_min=5):

    bkgd_training_paths = glob.glob(bkgd_train_path_pattern)
    training_paths = glob.glob(train_path_pattern)

    ###Do not change parameters below unless altering network###

    BKGD_SIZE = [78, 48000]
    STIM_SIZE = [78, 89999]
    TONE_SIZE = [78, 59099]
    ITD_TONE_SIZE = [78, 39690]
    if zero_padded:
        STIM_SIZE = [78, 48000]

    if stacked_channel:
        STIM_SIZE = [39, 48000, 2]
        BKGD_SIZE = [39, 48000, 2]
    n_classes_localization = 504
    n_classes_recognition = 780
    localization_bin_resolution = 5

    #Optimization Params
    batch_size = 16
    learning_rate = 1e-3
    loss_exponent = 12
    loss_scale = 2**loss_exponent
    bn_training_state = True
    dropout_training_state = True
    training_state = True
    #Queue parameters
    dequeue_min = 8
    dequeue_min_main = 10
    #num_epochs = None
    #Change for network precision,must match input data type
    filter_dtype = tf.float32
    padding = 'VALID'

    #Downsampling Params
    sr = 48000
    cochleagram_sr = 8000
    post_rectify = True

    #Display interval training statistics
    display_step = 25
    #Changes how often data is saved to numpy arrays when dataset is large
    write_step = 15625  #250k examples
    #write_step = 25 #250k examples

    if itd_tones:
        TONE_SIZE = ITD_TONE_SIZE

    if ild_tones:
        itd_tones = True

    if testing:
        bn_training_state = False
        dropout_training_state = False
        training_state = False
        num_epochs = 1
        #Using these values because 5/40 are the standard training SNRs
        if not (SNR_min > 30 or SNR_max > 40):
            SNR_max = 35.0
            SNR_min = 30.0
        print("Testing SNR(dB): Max: " + str(SNR_max) + "Min: " + str(SNR_min))

    #mean_subbands = np.load("mean_subband_51400.npy")/51400
    #tf_mean_subbands = tf.constant(mean_subbands,dtype=filter_dtype)
    def check_speed():
        for i in range(30):
            sess.run(subbands_batch)
        start_time = time.time()
        for _ in range(30):
            time.sleep(0.5)
            print(time.time() - start_time)
            print("Len:",
                  sess.run('example_queue/random_shuffle_queue_Size:0'))

    with tf.device("/cpu:0"):
        ###READING QUEUE MACHINERY###

        def add_labels(paths):
            return [(stim, stim.split('_')[-1].split('a')[0])
                    for stim in paths]

        def rms(wav):
            square = tf.square(wav)
            mean_val = tf.reduce_mean(square)
            return tf.sqrt(mean_val)

        def combine_signal_and_noise_stacked_channel(signals, backgrounds,
                                                     delay, sr, cochleagram_sr,
                                                     post_rectify):
            tensor_dict_fg = {}
            tensor_dict_bkgd = {}
            tensor_dict = {}
            snr = tf.random_uniform([],
                                    minval=SNR_min,
                                    maxval=SNR_max,
                                    name="snr_gen")
            for path1 in backgrounds:
                if path1 == 'train/image':
                    background = backgrounds['train/image']
                else:
                    tensor_dict_bkgd[path1] = backgrounds[path1]
            for path in signals:
                if path == 'train/image':
                    signal = signals['train/image']
                    sig_len = signal.shape[1] - delay
                    sig = tf.slice(signal, [0, 0, 0], [39, sig_len, 2])
                    max_val = tf.reduce_max(sig)
                    sig_rms = rms(tf.reduce_sum(sig, [0, 2]))
                    sig = tf.div(sig, sig_rms)
                    #sig = tf.Print(sig, [tf.reduce_max(sig)],message="\nMax SIG:")
                    sf = tf.pow(tf.constant(10, dtype=tf.float32),
                                tf.div(snr, tf.constant(20, dtype=tf.float32)))
                    bak_rms = rms(tf.reduce_sum(background, [0, 2]))
                    #bak_rms = tf.Print(bak_rms, [tf.reduce_max(bak_rms)],message="\nNoise RMS:")
                    sig_rms = rms(tf.reduce_sum(sig, [0, 2]))
                    scaling_factor = tf.div(tf.div(sig_rms, bak_rms), sf)
                    #scaling_factor = tf.Print(scaling_factor, [scaling_factor],message="\nScaling Factor:")
                    noise = tf.scalar_mul(scaling_factor, background)
                    #noise = tf.Print(noise, [tf.reduce_max(noise)],message="\nMax Noise:")
                    front = tf.slice(noise, [0, 0, 0], [39, delay, 2])
                    middle = tf.slice(noise, [0, delay, 0], [39, sig_len, 2])
                    end = tf.slice(noise, [0, (delay + int(sig_len)), 0],
                                   [39, -1, 2])
                    middle_added = tf.add(middle, sig)
                    new_sig = tf.concat([front, middle_added, end], 1)
                    #new_sig = sig
                    rescale_factor = tf.div(max_val, tf.reduce_max(new_sig))
                    #rescale_factor = tf.Print(rescale_factor, [rescale_factor],message="\nRescaling Factor:")
                    new_sig = tf.scalar_mul(rescale_factor, new_sig)
                    new_sig_rectified = tf.nn.relu(new_sig)
                    new_sig_reshaped = tf.reshape(new_sig_rectified,
                                                  [39, 48000, 2])
                    #new_sig_reshaped = tf.reshape(new_sig,[72,30000,1])
                    #return (signal, background,noise,new_sig_reshaped)
                    tensor_dict_fg[path] = new_sig_reshaped
                else:
                    tensor_dict_fg[path] = signals[path]
            tensor_dict[0] = tensor_dict_fg
            tensor_dict[1] = tensor_dict_bkgd
            return tensor_dict

        def combine_signal_and_noise(signals, backgrounds, delay, sr,
                                     cochleagram_sr, post_rectify):
            tensor_dict_fg = {}
            tensor_dict_bkgd = {}
            tensor_dict = {}
            snr = tf.random_uniform([],
                                    minval=SNR_min,
                                    maxval=SNR_max,
                                    name="snr_gen")
            for path1 in backgrounds:
                if path1 == 'train/image':
                    background = backgrounds['train/image']
                else:
                    tensor_dict_bkgd[path1] = backgrounds[path1]
            for path in signals:
                if path == 'train/image':
                    signal = signals['train/image']
                    sig_len = signal.shape[1] - delay
                    sig = tf.slice(signal, [0, 0], [78, sig_len])
                    max_val = tf.reduce_max(sig)
                    sig_rms = rms(tf.reduce_sum(sig, 0))
                    sig = tf.div(sig, sig_rms)
                    #sig = tf.Print(sig, [tf.reduce_max(sig)],message="\nMax SIG:")
                    sf = tf.pow(tf.constant(10, dtype=tf.float32),
                                tf.div(snr, tf.constant(20, dtype=tf.float32)))
                    bak_rms = rms(tf.reduce_sum(background, 0))
                    #bak_rms = tf.Print(bak_rms, [tf.reduce_max(bak_rms)],message="\nNoise RMS:")
                    sig_rms = rms(tf.reduce_sum(sig, 0))
                    scaling_factor = tf.div(tf.div(sig_rms, bak_rms), sf)
                    #scaling_factor = tf.Print(scaling_factor, [scaling_factor],message="\nScaling Factor:")
                    noise = tf.scalar_mul(scaling_factor, background)
                    #noise = tf.Print(noise, [tf.reduce_max(noise)],message="\nMax Noise:")
                    front = tf.slice(noise, [0, 0], [78, delay])
                    middle = tf.slice(noise, [0, delay], [78, sig_len])
                    end = tf.slice(noise, [0, (delay + int(sig_len))],
                                   [78, -1])
                    middle_added = tf.add(middle, sig)
                    new_sig = tf.concat([front, middle_added, end], 1)
                    #new_sig = sig
                    rescale_factor = tf.div(max_val, tf.reduce_max(new_sig))
                    #rescale_factor = tf.Print(rescale_factor, [rescale_factor],message="\nRescaling Factor:")
                    new_sig = tf.scalar_mul(rescale_factor, new_sig)
                    new_sig_rectified = tf.nn.relu(new_sig)
                    new_sig_reshaped = tf.reshape(new_sig_rectified,
                                                  [72, 48000, 1])
                    #new_sig_reshaped = tf.reshape(new_sig,[72,30000,1])
                    #return (signal, background,noise,new_sig_reshaped)
                    tensor_dict_fg[path] = new_sig_reshaped
                else:
                    tensor_dict_fg[path] = signals[path]
            tensor_dict[0] = tensor_dict_fg
            tensor_dict[1] = tensor_dict_bkgd
            return tensor_dict

        #Best to read https://www.tensorflow.org/api_guides/python/reading_data#Reading_from_files
        ###READING QUEUE MACHINERY###
        #Best to read https://www.tensorflow.org/api_guides/python/reading_data#Reading_from_files

        options = tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.GZIP)
        is_bkgd = False
        first = training_paths[0]
        for example in tf.python_io.tf_record_iterator(first, options=options):
            result = tf.train.Example.FromString(example)
            break

        jsonMessage = MessageToJson(tf.train.Example.FromString(example))
        jsdict = json.loads(jsonMessage)
        feature = parse_nested_dictionary(jsdict, is_bkgd)

        dataset = build_tfrecords_iterator(num_epochs, train_path_pattern,
                                           is_bkgd, feature, narrowband_noise,
                                           manually_added, STIM_SIZE,
                                           localization_bin_resolution,
                                           stacked_channel)

        ###READING QUEUE MACHINERY###

        # Create a list of filenames and pass it to a queue
        bkgd_filename_queue = tf.train.string_input_producer(
            bkgd_training_paths,
            shuffle=True,
            capacity=len(bkgd_training_paths))
        # Define a reader and read the next record
        options = tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.GZIP)
        bkgd_reader = tf.TFRecordReader(options=options)
        _, bkgd_serialized_example = bkgd_reader.read(bkgd_filename_queue)

        is_bkgd = True
        bkgd_first = bkgd_training_paths[0]
        for bkgd_example in tf.python_io.tf_record_iterator(bkgd_first,
                                                            options=options):
            bkgd_result = tf.train.Example.FromString(bkgd_example)
            break

        bkgd_jsonMessage = MessageToJson(
            tf.train.Example.FromString(bkgd_example))
        bkgd_jsdict = json.loads(bkgd_jsonMessage)
        bkgd_feature = parse_nested_dictionary(bkgd_jsdict, is_bkgd)

        dataset_bkgd = build_tfrecords_iterator(
            num_epochs, bkgd_train_path_pattern, is_bkgd, bkgd_feature,
            narrowband_noise, manually_added, BKGD_SIZE,
            localization_bin_resolution, stacked_channel)

        new_dataset = tf.data.Dataset.zip((dataset, dataset_bkgd))

        #SNR = tf.random_uniform([],minval=SNR_min,maxval=SNR_max,name="snr_gen")

        if stacked_channel:
            new_dataset = new_dataset.map(
                lambda x, y: combine_signal_and_noise_stacked_channel(
                    x, y, 0, 48000, 8000, post_rectify=True))
        else:
            new_dataset = new_dataset.map(
                lambda x, y: combine_signal_and_noise(
                    x, y, 0, 48000, 8000, post_rectify=True))
        batch_sizes = tf.constant(16, dtype=tf.int64)
        new_dataset = new_dataset.shuffle(buffer_size=200).batch(
            batch_size=batch_sizes, drop_remainder=True)
        #combined_iter = new_dataset.make_one_shot_iterator()
        combined_iter = new_dataset.make_initializable_iterator()
        combined_iter_dict = collections.OrderedDict()
        combined_iter_dict = combined_iter.get_next()

        if background_textures:
            bkgd_metadata = [
                combined_iter_dict[1]['train/azim'],
                combined_iter_dict[1]['train/elev']
            ]

    ###END READING QUEUE MACHINERY###

    def make_downsample_filt_tensor(SR=16000,
                                    ENV_SR=200,
                                    WINDOW_SIZE=1001,
                                    beta=5.0,
                                    pycoch_downsamp=False):
        """
        Make the sinc filter that will be used to downsample the cochleagram
        Parameters
        ----------
        SR : int
            raw sampling rate of the audio signal
        ENV_SR : int
            end sampling rate of the envelopes
        WINDOW_SIZE : int
            the size of the downsampling window (should be large enough to go to zero on the edges).
        beta : float
            kaiser window shape parameter
        pycoch_downsamp : Boolean
            if true, uses a slightly different downsampling function
        Returns
        -------
        downsample_filt_tensor : tensorflow tensor, tf.float32
            a tensor of shape [0 WINDOW_SIZE 0 0] the sinc windows with a kaiser lowpass filter that is applied while downsampling the cochleagram
        """
        DOWNSAMPLE = SR / ENV_SR
        if not pycoch_downsamp:
            downsample_filter_times = np.arange(-WINDOW_SIZE / 2,
                                                int(WINDOW_SIZE / 2))
            downsample_filter_response_orig = np.sinc(
                downsample_filter_times / DOWNSAMPLE) / DOWNSAMPLE
            downsample_filter_window = signallib.kaiser(WINDOW_SIZE, beta)
            downsample_filter_response = downsample_filter_window * downsample_filter_response_orig
        else:
            max_rate = DOWNSAMPLE
            f_c = 1. / max_rate  # cutoff of FIR filter (rel. to Nyquist)
            half_len = 10 * max_rate  # reasonable cutoff for our sinc-like function
            if max_rate != 1:
                downsample_filter_response = signallib.firwin(2 * half_len + 1,
                                                              f_c,
                                                              window=('kaiser',
                                                                      beta))
            else:  # just in case we aren't downsampling -- I think this should work?
                downsample_filter_response = zeros(2 * half_len + 1)
                downsample_filter_response[half_len + 1] = 1

            # Zero-pad our filter to put the output samples at the center
            # n_pre_pad = int((DOWNSAMPLE - half_len % DOWNSAMPLE))
            # n_post_pad = 0
            # n_pre_remove = (half_len + n_pre_pad) // DOWNSAMPLE
            # We should rarely need to do this given our filter lengths...
            # while _output_len(len(h) + n_pre_pad + n_post_pad, x.shape[axis],
            #                  up, down) < n_out + n_pre_remove:
            #     n_post_pad += 1
            # downsample_filter_response = np.concatenate((np.zeros(n_pre_pad), downsample_filter_response, np.zeros(n_post_pad)))

        downsample_filt_tensor = tf.constant(downsample_filter_response,
                                             tf.float32)
        downsample_filt_tensor = tf.expand_dims(downsample_filt_tensor, 0)
        downsample_filt_tensor = tf.expand_dims(downsample_filt_tensor, 2)
        downsample_filt_tensor = tf.expand_dims(downsample_filt_tensor, 3)

        return downsample_filt_tensor

    def downsample(signal,
                   current_rate,
                   new_rate,
                   window_size,
                   beta,
                   post_rectify=True):
        downsample = current_rate / new_rate
        message = ("The current downsample rate {} is "
                   "not an integer. Only integer ratios "
                   "between current and new sampling rates "
                   "are supported".format(downsample))

        assert (current_rate % new_rate == 0), message
        message = ("New rate must be less than old rate for this "
                   "implementation to work!")
        assert (new_rate < current_rate), message
        # make the downsample tensor
        downsample_filter_tensor = make_downsample_filt_tensor(
            current_rate, new_rate, window_size, pycoch_downsamp=False)
        downsampled_signal = tf.nn.conv2d(signal,
                                          downsample_filter_tensor,
                                          strides=[1, 1, downsample, 1],
                                          padding='SAME',
                                          name='conv2d_cochleagram_raw')
        if post_rectify:
            downsampled_signal = tf.nn.relu(downsampled_signal)

        return downsampled_signal

    def put_kernels_on_grid(kernel, pad=1):
        '''Visualize conv. filters as an image (mostly for the 1st layer).
      Arranges filters into a grid, with some paddings between adjacent filters.
      Args:
        kernel:            tensor of shape [Y, X, NumChannels, NumKernels]
        pad:               number of black pixels around each filter (between them)
      Return:
        Tensor of shape [1, (Y+2*pad)*grid_Y, (X+2*pad)*grid_X, NumChannels].
      '''

        # get shape of the grid. NumKernels == grid_Y * grid_X
        def factorization(n):
            for i in range(int(sqrt(float(n))), 0, -1):
                if n % i == 0:
                    if i == 1:
                        print('Who would enter a prime number of filters')
                    return (i, int(n / i))

        (grid_Y, grid_X) = factorization(kernel.get_shape()[3].value)
        print('grid: %d = (%d, %d)' %
              (kernel.get_shape()[3].value, grid_Y, grid_X))

        x_min = tf.reduce_min(kernel)
        x_max = tf.reduce_max(kernel)
        kernel = (kernel - x_min) / (x_max - x_min)

        # pad X and Y
        x = tf.pad(kernel,
                   tf.constant([[pad, pad], [pad, pad], [0, 0], [0, 0]]),
                   mode='CONSTANT')

        # X and Y dimensions, w.r.t. padding
        Y = kernel.get_shape()[0] + 2 * pad
        X = kernel.get_shape()[1] + 2 * pad
        x = tf.pad(kernel,
                   tf.constant([[pad, pad], [pad, pad], [0, 0], [0, 0]]),
                   mode='CONSTANT')

        # X and Y dimensions, w.r.t. padding
        Y = kernel.get_shape()[0] + 2 * pad
        X = kernel.get_shape()[1] + 2 * pad

        channels = kernel.get_shape()[2]

        # put NumKernels to the 1st dimension
        x = tf.transpose(x, (3, 0, 1, 2))
        # organize grid on Y axis
        x = tf.reshape(x, tf.stack([grid_X, Y * grid_Y, X, channels]))

        # switch X and Y axes
        x = tf.transpose(x, (0, 2, 1, 3))
        # organize grid on X axis
        x = tf.reshape(x, tf.stack([1, X * grid_X, Y * grid_Y, channels]))

        # back to normal order (not combining with the next step for clarity)
        x = tf.transpose(x, (2, 1, 3, 0))

        # to tf.image_summary order [batch_size, height, width, channels],
        #   where in this case batch_size == 1
        x = tf.transpose(x, (3, 0, 1, 2))

        # scaling to [0, 255] is not necessary for tensorboard
        return x

    #Many lines are commented out to allow for quick architecture changes
    #TODO:This should be abstracted to arcitectures are defined by some sort of
    #config dictionary or file

    def gradients_with_loss_scaling(loss, loss_scale):
        """Gradient calculation with loss scaling to improve numerical stability
        when training with float16.
        """

        grads = [
            (grad[0] / loss_scale, grad[1]) for grad in tf.train.AdamOptimizer(
                learning_rate=learning_rate, epsilon=1e-4).compute_gradients(
                    loss * loss_scale, colocate_gradients_with_ops=True)
        ]
        return grads

    def float32_variable_storage_getter(
            getter,
            name,
            shape=None,
            dtype=None,
            initializer=tf.contrib.layers.xavier_initializer(uniform=False),
            regularizer=None,
            trainable=True,
            *args,
            **kwargs):
        storage_dtype = tf.float32 if trainable else dtype
        variable = getter(name,
                          shape,
                          dtype=storage_dtype,
                          initializer=initializer,
                          regularizer=regularizer,
                          trainable=trainable,
                          *args,
                          **kwargs)
        if trainable and dtype != tf.float32:
            variable = tf.cast(variable, dtype)
        return variable

    strides = 1
    time_stride = 1
    freq_stride = 2
    time_pool = 4
    freq_pool = 1
    k = 2
    k_wide = 8

    #    config_array=[[["/gpu:0"],['conv',[2,50,32],[2,1]],['relu'],['pool',[1,4]]],[["/gpu:1"],['conv',[4,20,64],[1,1]],['bn'],['relu'],['pool',[1,4]],['conv',[8,8,128],[1,1]],['bn'],['relu'],['pool',[1,4]],['conv',[8,8,256],[1,1]],['bn'],['relu'],['pool',[1,8]],['fc',512],['fc_bn'],['fc_relu'],['dropout'],['out',]]]

    #[L_channel,R_channel] = tf.unstack(subbands_batch,axis=3)
    [L_channel, R_channel] = tf.unstack(combined_iter_dict[0]['train/image'],
                                        axis=3)
    concat_for_downsample = tf.concat([L_channel, R_channel], axis=0)
    reshaped_for_downsample = tf.expand_dims(concat_for_downsample, axis=3)

    #hard coding filter shape based on previous experimentation
    new_sig_downsampled = downsample(reshaped_for_downsample,
                                     sr,
                                     cochleagram_sr,
                                     window_size=4097,
                                     beta=10.06,
                                     post_rectify=post_rectify)
    downsampled_squeezed = tf.squeeze(new_sig_downsampled)
    [L_channel_downsampled,
     R_channel_downsampled] = tf.split(downsampled_squeezed,
                                       num_or_size_splits=2,
                                       axis=0)
    downsampled_reshaped = tf.stack(
        [L_channel_downsampled, R_channel_downsampled], axis=3)
    new_sig_nonlin = tf.pow(downsampled_reshaped, 0.3)
    # print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,scope='fp32_storage'))
    # print(subbands_batch)

    ####TMEPORARY OVERRIDE####

    #branched = False
    net = NetBuilder()
    if branched:
        out, out2 = net.build(config_array, new_sig_nonlin, training_state,
                              dropout_training_state, filter_dtype, padding,
                              n_classes_localization, n_classes_recognition,
                              branched, regularizer)
    else:
        out = net.build(config_array, new_sig_nonlin, training_state,
                        dropout_training_state, filter_dtype, padding,
                        n_classes_localization, n_classes_recognition,
                        branched, regularizer)

    if regularizer is not None:
        reg_term = tf.contrib.layers.apply_regularization(
            regularizer,
            (tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)))

    combined_dict = collections.OrderedDict()
    combined_dict_fg = collections.OrderedDict()
    combined_dict_bkgd = collections.OrderedDict()
    for k, v in combined_iter_dict[0].items():
        if k != 'train/image' and k != 'train/image_height' and k != 'train/image_width':
            combined_dict_fg[k] = combined_iter_dict[0][k]
    for k, v in combined_iter_dict[1].items():
        if k != 'train/image' and k != 'train/image_height' and k != 'train/image_width':
            combined_dict_bkgd[k] = combined_iter_dict[1][k]
    combined_dict[0] = combined_dict_fg
    combined_dict[1] = combined_dict_bkgd

    ##Fully connected Layer 2
    #wd2 = tf.get_variable('wd2',[512,512],filter_dtype)
    #dense_bias2 = tf.get_variable('wb6',[512],filter_dtype)
    #fc2 = tf.add(tf.matmul(fc1_do, wd2), dense_bias2)
    #fc2 = tf.nn.relu(fc2)
    #fc2_do = tf.layers.dropout(fc2,training=dropout_training_state)

    # Construct model
    #fix labels dimension to be one less that logits dimension

    #Testing small subbatch
    if sam_tones or transposed_tones:
        labels_batch_cost_sphere = tf.squeeze(
            tf.zeros_like(combined_dict[0]['train/carrier_freq']))
    elif precedence_effect:
        labels_batch_cost_sphere = tf.squeeze(
            tf.zeros_like(combined_dict[0]['train/start_sample']))
    else:
        labels_batch_cost = tf.squeeze(combined_dict[0]['train/azim'])
        #labels_batch_cost = tf.squeeze(subbands_batch_labels,axis=[1,2])
        if not tone_version:
            labels_batch_sphere = tf.add(
                tf.scalar_mul(tf.constant(36, dtype=tf.int32),
                              combined_dict[0]['train/elev']),
                combined_dict[0]['train/azim'])
        else:
            labels_batch_sphere = combined_dict[0]['train/azim']
        labels_batch_cost_sphere = tf.squeeze(labels_batch_sphere)

    # Define loss and optimizer
    # On r1.1 reduce mean doees not work(returns nans) with float16 vals

    if branched:
        cost1 = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=out, labels=labels_batch_cost_sphere))
        cost2 = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=out2, labels=combined_dict[0]['train/class_num']))
        cost = cost1 + cost2
    else:
        cost = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=out, labels=labels_batch_cost_sphere))

    if regularizer is not None:
        cost = tf.add(cost, reg_term)

    #cost = tf.Print(cost, [labels],message="\nLabel:",summarize=32)

    cond_dist = tf.nn.softmax(out)
    if branched:
        cond_dist2 = tf.nn.softmax(out2)

    #cost = tf.Print(cost, [tf.argmax(out, 1)],message="\nOut:",summarize=32)

#     init_op = tf.group(tf.global_variables_initializer(),
#                        tf.local_variables_initializer())
#     config = tf.ConfigProto(allow_soft_placement=True,
#                             inter_op_parallelism_threads=0, intra_op_parallelism_threads=0)
#     sess = tf.Session(config=config)
#     sess.run(init_op)
#     coord = tf.train.Coordinator()
#     threads = tf.train.start_queue_runners(sess=sess,coord=coord)
#     print(sess.run(cost))

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        update_grads = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                              epsilon=1e-4).minimize(cost)

    # Evaluate model
    correct_pred = tf.equal(tf.argmax(out, 1),
                            tf.cast(labels_batch_cost_sphere, tf.int64))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    top_k = tf.nn.top_k(out, 5)

    if branched:
        correct_pred2 = tf.equal(
            tf.argmax(out2, 1),
            tf.cast(combined_dict[0]['train/class_num'], tf.int64))
        accuracy2 = tf.reduce_mean(tf.cast(correct_pred2, tf.float32))

        top_k2 = tf.nn.top_k(out2, 5)
    #test_pred = conv_net(tf.cast(test_images,tf.float32),weights,biases)
    #correct_pred = tf.equal(tf.argmax(test_pred, 1), tf.cast(test_labels,tf.int64))
    #test_acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    ##Check special cases(made by hand in testdata.json
    #custom_pred = conv_net(tf_test_data,weights,biases)
    #custom_correct_pred = tf.equal(tf.argmax(custom_pred, 1), tf.cast(tf_test_label,tf.int64))
    #custom_test_acc = tf.reduce_mean(tf.cast(custom_correct_pred, tf.float32))

    # Initializing the variables
    #
    # Check_op seems to take up a lot of space on the GPU
    check_op = tf.add_check_numerics_ops()

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    # Launch the graph
    #with tf.Session() as sess:
    #run_metadata = tf.RunMetadata()
    config = tf.ConfigProto(allow_soft_placement=True,
                            inter_op_parallelism_threads=0,
                            intra_op_parallelism_threads=0)
    sess = tf.Session(config=config)
    sess.run(init_op)
    if branched:
        print("Class Labels:" +
              str(sess.run(combined_dict[0]['train/class_num'])))

#     ##This code allows for tracing ops acorss GPUs, you often have to run it twice
#     ##to get sensible traces
#
#     #sess.run(optimizer,options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
#     #                     run_metadata=run_metadata)
#     #from tensorflow.python.client import timeline
#     #trace = timeline.Timeline(step_stats=run_metadata.step_stats)
#     #trace_file.close()

##Used to write out stimuli examples
#
#    low_lim=30
#    hi_lim=20000
#    sr=48000
#    sample_factor=1
#    scale = 0.1
#    i=0
#    pad_factor = None
#    #invert subbands
#    n = int(np.floor(erb.freq2erb(hi_lim) - erb.freq2erb(low_lim)) - 1)
#    sess.run(combined_iter.initializer)
#    subbands_test,az_label,elev_label = sess.run([combined_iter_dict[0]['train/image'],combined_iter_dict[0]['train/azim'],combined_iter_dict[0]['train/elev']])
#
#    filts, hz_cutoffs, freqs=erb.make_erb_cos_filters_nx(subbands_test.shape[2],sr, n,low_lim,hi_lim, sample_factor,pad_factor=pad_factor,full_filter=True)
#
#    filts_no_edges = filts[1:-1]
#    for batch_iter in range(3):
#        for stim_iter in range(16):
#            subbands_l=subbands_test[stim_iter,:,:,0]
#            subbands_r=subbands_test[stim_iter,:,:,1]
#            wavs = np.zeros([subbands_test.shape[2],2])
#            wavs[:,0] = sb.collapse_subbands(subbands_l,filts_no_edges).astype(np.float32)
#            wavs[:,1] = sb.collapse_subbands(subbands_r,filts_no_edges).astype(np.float32)
#            max_val = wavs.max()
#            rescaled_wav = wavs/max_val*scale
#            name = "stim_{}_{}az_{}elev.wav".format(stim_iter+batch_iter*16,int(az_label[stim_iter])*5,int(elev_label[stim_iter])*5)
#            name_with_path = newpath+'/'+name
#            write(name_with_path,sr,rescaled_wav)
#        pdb.set_trace()
#        subbands_test,az_label,elev_label = sess.run([combined_iter_dict[0]['train/image'],combined_iter_dict[0]['train/azim'],combined_iter_dict[0]['train/elev']])

    if not testing:
        sess.run(combined_iter.initializer)
        saver = tf.train.Saver(max_to_keep=None)
        learning_curve = []
        errors_count = 0
        try:
            step = 1
            sess.graph.finalize()
            while True:
                #sess.run([optimizer,check_op])
                try:
                    if step == 1:
                        if not num_files == 1:
                            #latest_addition = max(files, key=os.path.getctime)
                            file_list = []
                            for elem in files:
                                if (elem.split("/")[-1]
                                    ).split(".")[0] == 'model':
                                    file_list.append(elem)
                            latest_addition = max(file_list,
                                                  key=os.path.getctime)
                            latest_addition_name = latest_addition.split(
                                ".")[-2]
                            saver.restore(
                                sess,
                                newpath + "/model." + latest_addition_name)
                            step = int(latest_addition_name.split("-")[1])
                        else:
                            sess.run(update_grads)
                    else:
                        sess.run(update_grads)
#                    sess.run(update_grads)
                except tf.errors.InvalidArgumentError as e:
                    print(e.message)
                    errors_count += 1
                    continue
                if step % display_step == 0:
                    # Calculate batch loss and accuracy
                    loss, acc, az = sess.run(
                        [cost, accuracy, combined_dict[0]['train/azim']])
                    #print("Batch Labels: ",az)
                    print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
                          "{:.6f}".format(loss) + ", Training Accuracy= " + \
                          "{:.5f}".format(acc))
                if step % 5000 == 0:
                    print("Checkpointing Model...")
                    retry_count = 0
                    while True:
                        try:
                            saver.save(sess,
                                       newpath + '/model.ckpt',
                                       global_step=step,
                                       write_meta_graph=False)
                            break
                        except ValueError as e:
                            if retry_count > 36:
                                print(
                                    "Maximum wait time reached(6H). Terminating Program."
                                )
                                raise e from None
                            print(
                                "Checkpointing failed. Retrying in 10 minutes..."
                            )
                            time.sleep(600)
                            retry_count += 1
                    learning_curve.append([int(step * batch_size), float(acc)])
                    print("Checkpoint Complete")

                #Just for testing the model/call_model
                if step == 300000:
                    print("Break!")
                    break
                step += 1
        except tf.errors.OutOfRangeError:
            print("Out of Range Error. Optimization Finished")
        except tf.errors.DataLossError as e:
            print("Corrupted file found!!")
            pdb.set_trace()
        except tf.errors.ResourceExhaustedError as e:
            gpu = e.message
            print("Out of memory error")
            error = "Out of memory error"
            with open(newpath + '/train_error.json', 'w') as f:
                json.dump(arch_ID, f)
                json.dump(error, f)
                json.dump(gpu, f)
            return False
        finally:
            print(errors_count)
            print("Training stopped.")

        with open(newpath + '/curve_no_resample_w_cutoff_vary_loc.json',
                  'w') as f:
            json.dump(learning_curve, f)

    if testing:
        ##Testing loop
        for stim in model_version:
            sess.run(combined_iter.initializer)
            print("Starting model version: ", stim)
            batch_acc = []
            batch_acc2 = []
            batch_conditional = []
            batch_conditional2 = []
            saver = tf.train.Saver(max_to_keep=None)
            #saver.restore(sess,newpath+"/model.ckpt-"+str(model_version))
            saver.restore(sess, newpath + "/model.ckpt-" + str(stim))
            step = 0
            try:
                eval_vars = list(combined_dict[0].values())
                eval_keys = list(combined_dict[0].keys())
                while True:
                    pred, cd, e_vars = sess.run(
                        [correct_pred, cond_dist, eval_vars])
                    array_len = len(e_vars)
                    if isinstance(e_vars, list):
                        e_vars = list(zip(*e_vars))
                        batch_conditional += [(cond, var)
                                              for cond, var in zip(cd, e_vars)]
                        batch_acc += [(pd, ev) for pd, ev in zip(pred, e_vars)]
                    else:
                        e_vars = np.array([np.squeeze(x) for x in e_vars])
                        split = np.vsplit(e_vars, array_len)
                        batch_conditional += [
                            (cond, var) for cond, var in zip(cd, e_vars.T)
                        ]
                        split.insert(0, pred)
                        batch_acc += np.dstack(split).tolist()[0]

                    if branched:
                        pred2, cd2, e_vars2 = sess.run(correct_pred2,
                                                       cond_dist2, eval_vars)
                        e_vars2 = np.squeeze(e_vars2)
                        array_len2 = len(e_vars2)
                        split2 = np.vsplit(e_vars2, array_len2)
                        split2.insert(0, pred2)
                        batch_conditional2 += [
                            (cond, var) for cond, var in zip(cd2, e_vars, T)
                        ]
                        batch_acc2 += np.dstack(split2).tolist()[0]

                    step += 1
                    if step % display_step == 0:
                        print("Iter " + str(step * batch_size))
                        #if not tone_version:
                        #    print("Current Accuracy:",sum(batch_acc)/len(batch_acc))
                    if (step + 1) % write_step == 0:
                        print("writing batch data at step: {}".format(step))
                        write_batch_data(newpath, train_path_pattern, stim,
                                         batch_acc, batch_conditional,
                                         eval_keys, step)
                        print("Data written")
                        batch_acc = []
                        batch_conditional = []
                    if step == 500000:
                        print("Break!")
                        break
            except tf.errors.ResourceExhaustedError:
                print("Out of memory error")
                error = "Out of memory error"
                with open(newpath + '/test_error_{}.json'.format(stim),
                          'w') as f:
                    json.dump(arch_ID, f)
                    json.dump(error, f)
            except tf.errors.OutOfRangeError:
                print("Out of Range Error. Optimization Finished")

            finally:
                if tone_version:
                    np.save(newpath + '/plot_array_test_{}.npy'.format(stim),
                            batch_acc)
                    np.save(
                        newpath +
                        '/batch_conditional_test_{}.npy'.format(stim),
                        batch_conditional)
                    acc_corr = [pred[0] for pred in batch_acc]
                    acc_accuracy = sum(acc_corr) / len(acc_corr)
                    if branched:
                        np.save(
                            newpath + '/plot_array_test_{}_2.npy'.format(stim),
                            batch_acc2)
                        np.save(
                            newpath +
                            '/batch_conditional_test_{}_2.npy'.format(stim),
                            batch_conditional2)
                        acc_corr2 = [pred2[0] for pred2 in batch_acc2]
                        acc_accuracy2 = sum(acc_corr2) / len(acc_corr2)
                    with open(newpath + '/accuracies_itd_{}.json'.format(stim),
                              'w') as f:
                        json.dump(acc_accuracy, f)
                        if branched:
                            json.dump(acc_accuracy2, f)
                elif (sam_tones or transposed_tones or precedence_effect
                      or narrowband_noise):
                    if train_path_pattern.split("/")[-2] == 'testset':
                        stimuli_name = 'testset_' + train_path_pattern.split(
                            "/")[-3]
                    else:
                        stimuli_name = train_path_pattern.split("/")[-2]
                    np.save(
                        newpath + '/batch_array_{}_iter{}.npy'.format(
                            stimuli_name, stim), batch_acc)
                    np.save(
                        newpath + '/batch_conditional_{}_iter{}.npy'.format(
                            stimuli_name, stim), batch_conditional)
                    acc_corr = [pred[0] for pred in batch_acc]
                    acc_accuracy = sum(acc_corr) / len(acc_corr)
                    if branched:
                        np.save(
                            newpath + '/plot_array_test_{}_2.npy'.format(stim),
                            batch_acc2)
                        np.save(
                            newpath +
                            '/batch_conditional_test_{}_2.npy'.format(stim),
                            batch_conditional2)
                        acc_corr2 = [pred2[0] for pred2 in batch_acc2]
                        acc_accuracy2 = sum(acc_corr2) / len(acc_corr2)
                    with open(
                            newpath + '/accuracies_test_{}_iter{}.json'.format(
                                stimuli_name, stim), 'w') as f:
                        json.dump(acc_accuracy, f)
                        if branched:
                            json.dump(acc_accuracy2, f)
                    with open(
                            newpath + '/keys_test_{}_iter{}.json'.format(
                                stimuli_name, stim), 'w') as f:
                        json.dump(eval_keys, f)

                else:
                    if train_path_pattern.split("/")[-2] == 'testset':
                        stimuli_name = 'testset_' + train_path_pattern.split(
                            "/")[-3]
                    else:
                        stimuli_name = train_path_pattern.split("/")[-2]
                    np.save(
                        newpath + '/plot_array_padded_{}_iter{}.npy'.format(
                            stimuli_name, stim), batch_acc)
                    np.save(
                        newpath + '/batch_conditional_{}_iter{}.npy'.format(
                            stimuli_name, stim), batch_conditional)
                    acc_corr = [pred[0] for pred in batch_acc]
                    acc_accuracy = sum(acc_corr) / len(acc_corr)
                    if branched:
                        np.save(
                            newpath +
                            '/plot_array_stim_vary_env_{}_2.npy'.format(stim),
                            batch_acc2)
                        np.save(
                            newpath +
                            '/batch_conditional_test_{}_2.npy'.format(stim),
                            batch_conditional2)
                        acc_corr2 = [pred2[0] for pred2 in batch_acc2]
                        acc_accuracy2 = sum(acc_corr2) / len(acc_corr2)
                    with open(
                            newpath + '/accuracies_test_{}_iter{}.json'.format(
                                stimuli_name, stim), 'w') as f:
                        json.dump(acc_accuracy, f)
                        if branched:
                            json.dump(acc_accuracy2, f)
                    with open(
                            newpath + '/keys_test_{}_iter{}.json'.format(
                                stimuli_name, stim), 'w') as f:
                        json.dump(eval_keys, f)

    #acc= sess.run(test_acc)
    #print("Test Accuracy= "+"{:.5f}".format(acc))
    #customs = sess.run(custom_test_acc)
    #correct_pred = sess.run(custom_correct_pred)
    #with open('custom_out2.json', 'w') as f:
    #    json.dump([test_data_img,correct_pred.tolist()],f)
    #print("ACC for special cases:")
    #print(customs)
    #first_layer = sess.run(weights['wc1'])
    #activation1, activation2 = sess.run([conv1,conv3])
    #with open('activations.json','w') as f:
    #    json.dump([activation1.tolist(),activation2.tolist()],f)
    #tf.get_variable_scope().reuse_variables()
    #first_layer = [var for var in tf.global_variables() if var.op.name=="wc1"][0]
    #second_layer = [var for var in tf.global_variables() if var.op.name=="wc2"][0]
    #weights_image = put_kernels_on_grid(first_layer)
    #weights_image2 = put_kernels_on_grid(second_layer)
    #np_weights1, np_weights2 = sess.run([weights_image,weights_image2])
    #with open('conv1weights.json','w') as f:
    #    json.dump([np_weights1.tolist(),np_weights2.tolist()],f)
    #
    sess.close()
    tf.reset_default_graph()
Exemple #33
0
def approximate(namePrePrefix, pointSubset, pointList):
    kdTree = cKDTree(pointSubset)
    #We'll pick points by picking a random (spherical normal) offset
    #from randomly-chosen points in the given point list
    variance = 100000.0
    mean_vec = np.array([0.0, 0.0, 0.0], dtype=np.float32)
    covar_mat = np.array([[variance, 0, 0], [0, variance, 0], [0, 0, variance]], dtype=np.float32)

    #The name prefix for all variable scopes
    namePrefix = namePrePrefix + "Metric"


    in_points = tf.placeholder(tf.float32, [None, 3], name=(namePrefix + "In"))

    small_points = in_points * 0.001
    
    crossterm_one = small_points[:, 0] * small_points[:, 1]
    crossterm_two = small_points[:, 1] * small_points[:, 2]
    crossterm_three = small_points[:, 0] * small_points[:, 2]
    crossterms = tf.stack([crossterm_one, crossterm_two, crossterm_three], axis=1)


    poly_aug_in_points = tf.concat([small_points, tf.square(small_points), crossterms], axis=1)
    approx_norm_network = approxNetwork(poly_aug_in_points, False, namePrefix=namePrefix)
    approx_norm_out = tf.identity(approx_norm_network, name=(namePrefix + "Out"))

    target_norms = tf.placeholder(tf.float32, [None, 1])

    with tf.name_scope('loss'):
        loss = tf.losses.absolute_difference(approx_norm_out, tf.square(target_norms * .001))

    with tf.name_scope('adam_optimizer'):
        train_step = tf.train.GradientDescentOptimizer(train_step_size).minimize(loss)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
    check = tf.add_check_numerics_ops()

    start = time.time()
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver()

        batchNum = 0
        start = time.time()
        num_exact = int(float(BATCH_SIZE) * zero_hammer_prob)
        num_fuzzed = BATCH_SIZE - num_exact

        for i in range(training_iters):
            #Pick a random collection of points on the target manifold
            exactPoints = randomRows(pointSubset, num_exact)
            #Pick a random collection of points from the input point list
            fuzzedPoints = randomRows(pointList, num_fuzzed)
            #Compute normally-distributed offsets for them
            offsets = np.random.multivariate_normal(mean_vec, covar_mat, size=num_fuzzed)
            fuzzedPoints = fuzzedPoints + offsets

            allPoints = np.vstack((exactPoints, fuzzedPoints))
           
            #Great, now for each fuzzed point, compute the actual distances to the original point cloud
            actualDistances, _ = kdTree.query(allPoints)
            actualDistances = np.reshape(actualDistances, (BATCH_SIZE, 1))

            #Okay, now run a training step
            batchNum += 1

            sess.run([train_step, check], feed_dict={in_points : allPoints, target_norms : actualDistances})

            if (i % VIEW_AFTER == 0):
                train_loss = loss.eval(feed_dict={in_points : allPoints, target_norms : actualDistances})
                print("Batches per second: ", batchNum / (time.time() - start))
                train_loss = math.sqrt(train_loss) * 1000.0
                print("Step %d, training loss %g mm" % (i, train_loss))
        saver.save(sess, "./" + namePrefix + "/" + namePrefix)
Exemple #34
0
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False):
        """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of ``Inference`` **must** implement this method.
    No methods which build ops should be called outside ``initialize()``.

    Parameters
    ----------
    n_iter : int, optional
      Number of iterations for algorithm.
    n_print : int, optional
      Number of iterations for each print progress. To suppress print
      progress, then specify 0. Default is ``int(n_iter / 100)``.
    scale : dict of RandomVariable to tf.Tensor, optional
      A tensor to scale computation for any random variable that it is
      binded to. Its shape must be broadcastable; it is multiplied
      element-wise to the random variable. For example, this is useful
      for mini-batch scaling when inferring global variables, or
      applying masks on a random variable.
    logdir : str, optional
      Directory where event file will be written. For details,
      see ``tf.summary.FileWriter``. Default is to log nothing.
    log_timestamp : bool, optional
      If True (and ``logdir`` is specified), create a subdirectory of
      ``logdir`` to save the specific run results. The subdirectory's
      name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
    log_vars : list, optional
      Specifies the list of variables to log after each ``n_print``
      steps. If None, will log all variables. If ``[]``, no variables
      will be logged. ``logdir`` must be specified for variables to be
      logged.
    debug : bool, optional
      If True, add checks for ``NaN`` and ``Inf`` to all computations
      in the graph. May result in substantially slower execution
      times.
    """
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")

        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")

        self.scale = scale

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
            self.summarize = tf.summary.merge_all()
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        # Store reset ops which user can call. Subclasses should append
        # any ops needed to reset internal variables in inference.
        self.reset = [tf.variables_initializer([self.t])]
Exemple #35
0
def train():
    _get_control_params()

    if not os.path.exists(modelParams['dataDir']):
        raise ValueError("No such data directory %s" % modelParams['dataDir'])

    #meanImgFi1000le = os.path.join(FLAGS.dataDir, "meta")
    #if not os.path.isfile(meanImgFile):
    #    raise ValueError("Warning, no meta file found at %s" % meanImgFile)
    #else:
    #    with open(meanImgFile, "r") as inMeanFile:
    #        meanInfo = json.load(inMeanFile)
    #
    #    meanImg = meanInfo['mean']
    #
    #    # also load the target output sizes
    #    params['targSz'] = meanInfo["targSz"]

    #_setupLogging(os.path.join(modelParams['trainLogDir'], "genlog"))

    with tf.Graph().as_default():
        # BGR to RGB
        #params['meanImg'] = tf.constant(meanImg, dtype=tf.float32)

        # track the number of train calls (basically number of batches processed)
        globalStep = tf.get_variable('globalStep', [],
                                     initializer=tf.constant_initializer(0),
                                     trainable=False)

        # Get images and transformation for model_cnn.
        imagesOrig, images, pOrig, tHAB, tfrecFileIDs = data_input.inputs(
            **modelParams)

        # Build a Graph that computes the HAB predictions from the
        # inference model.
        pHAB = model_cnn.inference(images, **modelParams)

        # Calculate loss.
        loss = model_cnn.loss(pHAB, tHAB, **modelParams)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        # Build an initialization operation to run below.
        #init = tf.initialize_all_variables()
        init = tf.global_variables_initializer()

        opCheck = tf.add_check_numerics_ops()
        # Start running operations on the Graph.
        config = tf.ConfigProto(
            log_device_placement=modelParams['logDevicePlacement'])
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)

        #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        #sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init)

        # restore a saver.
        saver = tf.train.Saver(tf.global_variables())
        saver.restore(sess, modelParams['trainLogDir'] + '/model.ckpt-89999')

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        HABperPixelsum = 0
        durationSum = 0
        ######### USE LATEST STATE TO WARP IMAGES
        if modelParams['writeWarpedImages']:
            stepsForOneDataRound = int((modelParams['numExamples'] /
                                        modelParams['activeBatchSize'])) + 1
            print('Warping images with batch size %d in %d steps' %
                  (modelParams['activeBatchSize'], stepsForOneDataRound))
            for step in xrange(stepsForOneDataRound):
                startTime = time.time()
                evImagesOrig, evImages, evPOrig, evtHAB, evpHAB, evtfrecFileIDs, evlossValue = sess.run(
                    [
                        imagesOrig, images, pOrig, tHAB, pHAB, tfrecFileIDs,
                        loss
                    ])
                durationSum += (time.time() - startTime)
                HABRES = evtHAB - evpHAB
                HABperPixel = 0
                for i in xrange(modelParams['activeBatchSize']):
                    H = np.asarray([[
                        HABRES[i][0], HABRES[i][1], HABRES[i][2], HABRES[i][3]
                    ], [
                        HABRES[i][4], HABRES[i][5], HABRES[i][6], HABRES[i][7]
                    ]], np.float32)
                    HABperPixel += np.sqrt((H * H).sum(axis=0)).mean()
                HABperPixel = HABperPixel / modelParams['activeBatchSize']
                HABperPixelsum += HABperPixel
                #### put imageA, warpped imageB by pHAB, HAB-pHAB as new HAB, changed fileaddress tfrecFileIDs
                data_output.output(evImagesOrig, evImages, evPOrig, evtHAB,
                                   evpHAB, evtfrecFileIDs, **modelParams)
                # Print Progress Info
                if ((step % FLAGS.ProgressStepReportStep)
                        == 0) or ((step + 1) == stepsForOneDataRound):
                    print(
                        'Progress: %.2f%%, Loss: %.2f, Elapsed: %.2f mins, Training Completion in: %.2f mins'
                        %
                        ((100 * step) / stepsForOneDataRound, HABperPixelsum /
                         (step + 1), durationSum / 60,
                         (((durationSum * stepsForOneDataRound) /
                           (step + 1)) / 60) - (durationSum / 60)))
            print(
                'Average training loss = %.2f - Average time per sample= %.2f s, Steps = %d'
                % (HABperPixelsum / step, durationSum /
                   (step * modelParams['activeBatchSize']), step))
Exemple #36
0
def main(_=None):
    FLAGS = flags.FLAGS
    if FLAGS.save_model:
        FLAGS.batch_size = 1
        FLAGS.overwrite = False
    config = FLAGS
    FLAGS.__dict__['config'] = config

    FLAGS.logdir = FLAGS.logdir.format(name=FLAGS.name)

    logdir = FLAGS.logdir
    save_model_dir = "saved_model/"
    logging.info('logdir: %s', logdir)

    if os.path.exists(logdir) and FLAGS.overwrite and not FLAGS.save_model:
        logging.info(
            '"overwrite" is set to True. Deleting logdir at "%s".', logdir)
        shutil.rmtree(logdir)
    if os.path.exists(save_model_dir) and FLAGS.save_model:
        logging.info(
            '"save_model" is set to True. Deleting save_model_dir at "%s".', save_model_dir)
        shutil.rmtree(save_model_dir)

    with tf.Graph().as_default():
        model_dict = model_config.get(FLAGS)

        lr = model_dict.lr
        opt = model_dict.opt
        model = model_dict.model

        lr = tf.convert_to_tensor(lr)
        tf.summary.scalar('learning_rate', lr)

        global_step = tf.train.get_or_create_global_step()
        train_dataset, test_dataset = getDataset(config)
        target, res = model.make_target(train_dataset)

        gvs = opt.compute_gradients(target)

        suppress_inf_and_nans = (config.grad_value_clip > 0
                                 or config.grad_norm_clip > 0)
        report = tools.gradient_summaries(gvs, suppress_inf_and_nans)
        valid_report = dict()

        gvs = tools.clip_gradients(gvs, value_clip=config.grad_value_clip,
                                   norm_clip=config.grad_norm_clip)

        try:
            report.update(model.make_report(train_dataset))
            valid_report.update(model.make_report(test_dataset))
        except AttributeError:
            logging.warning(
                'Model %s has no "make_report" method.', str(model))
            raise

        report = tools.scalar_logs(report, config.ema, 'train',
                                   global_update=config.global_ema_update)
        report['lr'] = lr
        valid_report = tools.scalar_logs(
            valid_report, config.ema, 'valid',
            global_update=config.global_ema_update)

        reports_keys = sorted(report.keys())

        def _format(k):
            if k in ('lr', 'learning_rate'):
                return '.2E'
            return '.3f'

        report_template = ', '.join(['{}: {}{}:{}{}'.format(
            k, '{', k, _format(k), '}') for k in reports_keys])

        logging.info('Trainable variables:')
        tools.log_variables_by_scope()

        # inspect gradients
        for g, v in gvs:
            if g is None:
                logging.warning('No gradient for variable: %s.', v.name)

        tools.log_num_params()

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if FLAGS.check_numerics:
            update_ops += [tf.add_check_numerics_ops()]

        with tf.control_dependencies(update_ops):
            train_step = opt.apply_gradients(gvs, global_step=global_step)

        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True

        if config.save_model:
            saver = tf.train.Saver()
            with tf.Session() as sess:
                saver.restore(sess, logdir + "/best.ckpt")
                signature = tf.saved_model.signature_def_utils.build_signature_def(res.input, res.output, 'sig_inout')
                builder = tf.saved_model.builder.SavedModelBuilder("saved_model/")
                builder.add_meta_graph_and_variables(sess, ['car_match_model'], {'sig_inout': signature})
                builder.save()
        else:
            with tf.train.SingularMonitoredSession(
                    hooks=create_hooks(FLAGS),
                    checkpoint_dir=logdir, config=sess_config) as sess:

                train_itr, _ = sess.run([global_step, update_ops])
                train_tensors = [global_step, train_step]
                report_tensors = [report, valid_report]
                all_tensors = report_tensors + train_tensors
                valid_loss = 1e5

                while train_itr < config.max_train_steps:

                    if train_itr % config.report_loss_steps == 0:
                        report_vals, valid_report_vals, train_itr, _ = sess.run(
                            all_tensors)
                        this_loss = valid_report_vals['loss']
                        if this_loss < valid_loss and tf.train.latest_checkpoint(logdir) is not None:
                            ends = ['.data-00000-of-00001', '.index', '.meta']
                            for end in ends:
                                shutil.copy(tf.train.latest_checkpoint(logdir) + end, logdir + "/best.ckpt" + end)

                        logging.info('')
                        logging.info('train:')
                        logging.info('#%s: %s', train_itr,
                                     report_template.format(**report_vals))

                        logging.info('valid:')
                        valid_logs = dict(report_vals)
                        valid_logs.update(valid_report_vals)
                        logging.info('#%s: %s', train_itr,
                                     report_template.format(**valid_logs))

                        vals_to_check = list(report_vals.values())
                        if (np.isnan(vals_to_check).any()
                                or np.isnan(vals_to_check).any()):
                            logging.fatal('NaN in reports: %s; breaking...',
                                          report_template.format(**report_vals))
                    else:
                        train_itr, _ = sess.run(train_tensors)
def train(args, model, data):
    dirname = 'save-vrnn/' + args.appliance
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    with open(os.path.join(dirname, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)

    ckpt = tf.train.get_checkpoint_state(
        dirname
    )  #check if there exists a previously trained model in the checkpoint

    Xtrain, ytrain = data
    train = Iterator(Xtrain,
                     ytrain,
                     batch_size=args.batch_size,
                     n_steps=args.seq_length)  #to split data into batches
    n_batches = train.nbatches
    Xtrain, ytrain = train.get_split()
    mae = []
    mse = []
    with tf.Session() as sess:
        summary_writer = tf.summary.FileWriter(
            'logs/' + datetime.now().isoformat().replace(':', '-'), sess.graph)
        check = tf.add_check_numerics_ops()
        merged = tf.summary.merge_all()
        tf.global_variables_initializer().run(
        )  #initialize all variables in the graph as defined
        saver = tf.train.Saver(tf.global_variables())
        if ckpt:
            saver.restore(
                sess,
                ckpt.model_checkpoint_path)  #restore previously saved model
            print "Loaded model"
        start = time.time()
        state_c = None
        state_h = None
        for e in xrange(args.num_epochs):
            #assign learning rate
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))

            #get the initial state of lstm cell
            state = model.initial_state_c, model.initial_state_h
            mae.append([])
            mse.append([])
            for b in xrange(n_batches):
                x = Xtrain[b]
                y = ytrain[b]
                feed = {
                    model.input_x: x,
                    model.input_y: y,
                    model.target_data: y
                }  # input data : x and y ; target data : y

                #train the model on this batch of data
                train_loss, _, cr, summary, sigma, mu, inp, target, state_c, state_h, pred = sess.run(
                    [
                        model.cost, model.train_op, check, merged, model.sigma,
                        model.mu, model.flat_input, model.target,
                        model.final_state_c, model.final_state_h, model.output
                    ], feed)

                summary_writer.add_summary(summary, e * n_batches + b)

                #the output from the model is in the shape [50000,1] reshape to 3D (batch_size, time_steps, n_app)
                pred = np.array(np.reshape(pred, [250, 200, -1])).astype(float)
                label = np.array(y).astype(float)

                #compute mae and mse for the output
                mae_i = np.reshape(np.absolute((label - pred)), [
                    -1,
                ]).mean()
                mse_i = np.reshape((label - pred)**2, [
                    -1,
                ]).mean()

                mae[e].append(mae_i)
                mse[e].append(mse_i)

                #save the model after every 800 (monitoring_freq) epochs
                if (e * n_batches + b) % args.save_every == 0 and (
                    (e * n_batches + b) > 0):
                    checkpoint_path = os.path.join(
                        dirname, 'model_' + str(args.num_epochs) + '_' +
                        str(args.learning_rate) + '.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * n_batches + b)
                    print "model saved to {}".format(checkpoint_path)

                end = time.time()

                print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}" \
                    .format(e * n_batches + b,
                            args.num_epochs * n_batches,
                            e, args.chunk_samples * train_loss, end - start, sigma.mean(axis=0).mean(axis=0))
                start = time.time()

            #the average mae,mse values in every epoch
            print "Epoch {}, mae = {:.3f}, mse = {:.3f}".format(
                e,
                sum(mae[e]) / len(mae[e]),
                sum(mse[e]) / len(mse[e]))

#path to save the final model
        checkpoint_path = os.path.join(
            dirname, 'final_model_' + str(args.num_epochs) + '_' +
            str(args.learning_rate) + '.ckpt')

        saver2 = tf.train.Saver()
        saver2.save(sess, checkpoint_path)

        print "model saved to {}".format(checkpoint_path)
Exemple #38
0
  def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None,
                 log_timestamp=True, log_vars=None, debug=False):
    """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of `Inference` **must** implement this method.
    No methods which build ops should be called outside `initialize()`.

    Args:
      n_iter: int, optional.
        Number of iterations for algorithm when calling `run()`.
        Alternatively if controlling inference manually, it is the
        expected number of calls to `update()`; this number determines
        tracking information during the print progress.
      n_print: int, optional.
        Number of iterations for each print progress. To suppress print
        progress, then specify 0. Default is `int(n_iter / 100)`.
      scale: dict of RandomVariable to tf.Tensor, optional.
        A tensor to scale computation for any random variable that it is
        binded to. Its shape must be broadcastable; it is multiplied
        element-wise to the random variable. For example, this is useful
        for mini-batch scaling when inferring global variables, or
        applying masks on a random variable.
      logdir: str, optional.
        Directory where event file will be written. For details,
        see `tf.summary.FileWriter`. Default is to log nothing.
      log_timestamp: bool, optional.
        If True (and `logdir` is specified), create a subdirectory of
        `logdir` to save the specific run results. The subdirectory's
        name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
      log_vars: list, optional.
        Specifies the list of variables to log after each `n_print`
        steps. If None, will log all variables. If `[]`, no variables
        will be logged. `logdir` must be specified for variables to be
        logged.
      debug: bool, optional.
        If True, add checks for `NaN` and `Inf` to all computations
        in the graph. May result in substantially slower execution
        times.
    """
    self.n_iter = n_iter
    if n_print is None:
      self.n_print = int(n_iter / 100)
    else:
      self.n_print = n_print

    self.progbar = Progbar(self.n_iter)
    self.t = tf.Variable(0, trainable=False, name="iteration")

    self.increment_t = self.t.assign_add(1)

    if scale is None:
      scale = {}
    elif not isinstance(scale, dict):
      raise TypeError("scale must be a dict object.")

    self.scale = scale

    if logdir is not None:
      self.logging = True
      if log_timestamp:
        logdir = os.path.join(
            logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

      self._set_log_variables(log_vars)
      self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
      self.summarize = tf.summary.merge_all()
    else:
      self.logging = False

    self.debug = debug
    if self.debug:
      self.op_check = tf.add_check_numerics_ops()

    # Store reset ops which user can call. Subclasses should append
    # any ops needed to reset internal variables in inference.
    self.reset = [tf.variables_initializer([self.t])]
Exemple #39
0
def main(_):
    # We want to see all the logging messages for this tutorial.
    tf.logging.set_verbosity(tf.logging.INFO)

    # Start a new TensorFlow session.
    sess = tf.InteractiveSession()

    label_count = 10

    # Figure out the learning rates for each training phase. Since it's often
    # effective to have high learning rates at the start of training, followed by
    # lower levels towards the end, the number of steps and learning rates can be
    # specified as comma-separated lists to define the rate at each stage. For
    # example --how_many_training_epochs=10000,3000 --learning_rate=0.001,0.0001
    # will run 13,000 training loops in total, with a rate of 0.001 for the first
    # 10,000, and 0.0001 for the final 3,000.
    training_epochs_list = list(
        map(int, FLAGS.how_many_training_epochs.split(',')))
    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
    if len(training_epochs_list) != len(learning_rates_list):
        raise Exception(
            '--how_many_training_epochs and --learning_rate must be equal length '
            'lists, but are %d and %d long instead' %
            (len(training_epochs_list), len(learning_rates_list)))

    input_xs = tf.placeholder(tf.float32, [None, 784], name='input_xs')
    logits, dropout_prob = models.create_model(input_xs,
                                               label_count,
                                               FLAGS.model_architecture,
                                               is_training=True)

    # Define loss and optimizer
    ground_truth_input = tf.placeholder(tf.float32, [None, 10],
                                        name='groundtruth_input')

    # Optionally we can add runtime checks to spot when NaNs or other symptoms of
    # numerical errors start occurring during training.
    control_dependencies = []
    if FLAGS.check_nans:
        checks = tf.add_check_numerics_ops()
        control_dependencies = [checks]

    # Create the back propagation and training evaluation machinery in the graph.
    with tf.name_scope('cross_entropy'):
        cross_entropy_mean = tf.losses.softmax_cross_entropy(
            onehot_labels=ground_truth_input, logits=logits)
    tf.summary.scalar('cross_entropy', cross_entropy_mean)
    with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
        learning_rate_input = tf.placeholder(tf.float32, [],
                                             name='learning_rate_input')
        momentum = tf.placeholder(tf.float32, [], name='momentum')
        # train_step = tf.train.GradientDescentOptimizer(learning_rate_input).minimize(cross_entropy_mean)
        train_step = tf.train.MomentumOptimizer(
            learning_rate_input, momentum,
            use_nesterov=True).minimize(cross_entropy_mean)
        # train_step = tf.train.AdamOptimizer(learning_rate_input).minimize(cross_entropy_mean)
        # train_step = tf.train.AdadeltaOptimizer(learning_rate_input).minimize(cross_entropy_mean)
        # train_step = tf.train.RMSPropOptimizer(learning_rate_input, momentum).minimize(cross_entropy_mean)

    predicted_indices = tf.argmax(logits, 1)
    correct_prediction = tf.equal(predicted_indices,
                                  tf.argmax(ground_truth_input, 1))
    confusion_matrix = tf.confusion_matrix(tf.argmax(ground_truth_input, 1),
                                           predicted_indices,
                                           num_classes=label_count)
    evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar('accuracy', evaluation_step)

    global_step = tf.train.get_or_create_global_step()
    increment_global_step = tf.assign(global_step, global_step + 1)

    saver = tf.train.Saver(tf.global_variables())

    # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
    merged_summaries = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                         sess.graph)
    validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir +
                                              '/validation')

    tf.global_variables_initializer().run()

    start_epoch = 1
    start_checkpoint_epoch = 0
    if FLAGS.start_checkpoint:
        models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint)
        tmp = FLAGS.start_checkpoint
        tmp = tmp.split('-')
        tmp.reverse()
        start_checkpoint_epoch = int(tmp[0])
        start_epoch = start_checkpoint_epoch + 1

    # calculate training epochs max
    training_epochs_max = np.sum(training_epochs_list)

    # start_checkpoint 값과 training_epochs_max 값이 다를 경우에만 training 수행
    if start_checkpoint_epoch != training_epochs_max:
        tf.logging.info('Training from epoch: %d ', start_epoch)

        # Save graph.pbtxt.
        tf.train.write_graph(sess.graph_def, FLAGS.train_dir,
                             FLAGS.model_architecture + '.pbtxt')

    # Training epoch
    for training_epoch in xrange(start_epoch, training_epochs_max + 1):
        # Figure out what the current learning rate is.
        training_epochs_sum = 0
        for i in range(len(training_epochs_list)):
            training_epochs_sum += training_epochs_list[i]
            if training_epoch <= training_epochs_sum:
                learning_rate_value = learning_rates_list[i]
                break

        set_size = mnist.train.num_examples
        for i in xrange(0, set_size, FLAGS.batch_size):
            # Pull the image samples we'll use for training.
            train_batch_xs, train_batch_ys = mnist.train.next_batch(
                FLAGS.batch_size)
            # Run the graph with this batch of training data.
            train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
                [
                    merged_summaries, evaluation_step, cross_entropy_mean,
                    train_step, increment_global_step
                ],
                feed_dict={
                    input_xs: train_batch_xs,
                    ground_truth_input: train_batch_ys,
                    learning_rate_input: learning_rate_value,
                    momentum: 0.95,
                    dropout_prob: 0.5
                })
            train_writer.add_summary(train_summary, i)
            tf.logging.info(
                'Epoch #%d, Step #%d: rate %f, accuracy %.1f%%, cross entropy %f'
                % (training_epoch, i, learning_rate_value,
                   train_accuracy * 100, cross_entropy_value))

            is_last_step = ((set_size - i) / FLAGS.batch_size <= 1)
            if is_last_step:
                set_size = mnist.validation.num_examples
                total_accuracy = 0
                total_conf_matrix = None
                for i in xrange(0, set_size, FLAGS.batch_size):
                    validation_batch_xs, validation_batch_ys = \
                      mnist.validation.next_batch(FLAGS.batch_size)
                    # Run a validation step and capture training summaries for TensorBoard
                    # with the `merged` op.
                    validation_summary, validation_accuracy, conf_matrix = sess.run(
                        [merged_summaries, evaluation_step, confusion_matrix],
                        feed_dict={
                            input_xs: validation_batch_xs,
                            ground_truth_input: validation_batch_ys,
                            dropout_prob: 1.0
                        })
                    validation_writer.add_summary(validation_summary,
                                                  training_epoch)
                    batch_size = min(FLAGS.batch_size, set_size - i)
                    total_accuracy += (validation_accuracy *
                                       batch_size) / set_size
                    if total_conf_matrix is None:
                        total_conf_matrix = conf_matrix
                    else:
                        total_conf_matrix += conf_matrix

                tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
                tf.logging.info(
                    'Epoch %d: Validation accuracy = %.1f%% (N=%d)' %
                    (training_epoch, total_accuracy * 100, set_size))

        # Save the model checkpoint periodically.
        if (training_epoch % FLAGS.save_step_interval == 0
                or training_epoch == training_epochs_max):
            checkpoint_path = os.path.join(FLAGS.train_dir,
                                           FLAGS.model_architecture + '.ckpt')
            tf.logging.info('Saving to "%s-%d"', checkpoint_path,
                            training_epoch)
            saver.save(sess, checkpoint_path, global_step=training_epoch)

    # For testing
    set_size = mnist.test.num_examples
    tf.logging.info('test size=%d', set_size)
    total_accuracy = 0
    total_conf_matrix = None
    for i in xrange(0, set_size, FLAGS.batch_size):
        test_batch_xs, test_batch_ys = mnist.test.next_batch(FLAGS.batch_size)
        test_accuracy, conf_matrix = sess.run(
            [evaluation_step, confusion_matrix],
            feed_dict={
                input_xs: test_batch_xs,
                ground_truth_input: test_batch_ys,
                dropout_prob: 1.0
            })
        batch_size = min(FLAGS.batch_size, set_size - i)
        total_accuracy += (test_accuracy * batch_size) / set_size
        if total_conf_matrix is None:
            total_conf_matrix = conf_matrix
        else:
            total_conf_matrix += conf_matrix

    tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
    tf.logging.info('Final test accuracy = %.1f%% (N=%d)' %
                    (total_accuracy * 100, set_size))
Exemple #40
0
def main():  

    
    N = 10000
    d = 250
    alpha = np.ones((d),)
    alpha[d/2:] = 10.0
    sigma2 = 1.0
    X = np.random.rand(N, d)
    w, y = simulate(X, alpha, sigma2)

    batch_size = 64
    batch_X = tf.placeholder(tf.float32, (batch_size, d), name="X")
    batch_y = tf.placeholder(tf.float32, (batch_size, ), name="y")

    mf = bf.mean_field.MeanFieldInference(linear_ard_joint_density, 
                                          batch_X=batch_X, 
                                          batch_y=batch_y,
                                          N=N)

    a0 = 1.0
    b0 = 1.0
    c0 = 1.0
    d0 = 1.0
    
    alpha_default = np.ones((d,), dtype=np.float32) * a0/b0
    mf.add_latent("alpha", 
                  1/np.sqrt(alpha_default), 
                  1e-6 * np.ones((d,), dtype=np.float32), 
                  bf.transforms.exp_reciprocal,
                  shape=(d,))
    sigma2_default = np.array(d0/(c0+1)).astype(np.float32)
    mf.add_latent("sigma2", 
                  np.sqrt(sigma2_default), 
                  1e-6, 
                  bf.transforms.square,
                  shape=())
    mf.add_latent("w", 
                  tf.random_normal([d,], stddev=1.0, dtype=tf.float32),
                  1e-6 * np.ones((d,), dtype=np.float32),
                  shape=(d,))
    

    
    elbo = mf.build_stochastic_elbo(n_eps=5)
    sigma2s = mf.get_posterior_samples("sigma2")
    #alphas = mf.get_posterior_samples("alpha")
    alpha_mean_var = mf.latents["alpha"]["q_mean"]
    alpha_stddev_var = mf.latents["alpha"]["q_stddev"]
    alpha_var = mf.latents["alpha"]["samples"][0]
    
    train_step = tf.train.AdamOptimizer(0.01).minimize(-elbo)
    debug = tf.add_check_numerics_ops()
    init = tf.initialize_all_variables()
    merged = tf.merge_all_summaries()
    
    sess = tf.Session()
    writer = tf.train.SummaryWriter("/tmp/ard_logs", sess.graph_def)
    sess.run(init)
    
    for i, batch_xs, batch_ys in batch_generator(X, y, 64, max_steps=20000):
        fd = mf.sample_stochastic_inputs()
        fd[batch_X] = batch_xs
        fd[batch_y] = batch_ys

        (elbo_val, sigma2s_val, alpha_mean, alpha_stddev, alpha_val) = sess.run([elbo, sigma2s, alpha_mean_var, alpha_stddev_var, alpha_var], feed_dict=fd)
        
        print "step %d elbo %.2f sigma2 %.2f " % (i, elbo_val, np.mean(sigma2s_val))

        summary_str = sess.run(merged, feed_dict=fd)
        writer.add_summary(summary_str, i)


        try:
            sess.run(debug, feed_dict=fd)
        except:
            bad = ~np.isfinite(alpha_val)
            print alpha_mean[bad]
            print alpha_stddev[bad]
            print alpha_val[bad]
            
        sess.run(train_step, feed_dict = fd)
Exemple #41
0
def main(_):
    image_filenames, label_filenames = camvid.get_filename_list(image_dir)
    val_image_filenames, val_label_filenames = camvid.get_filename_list(val_dir)

    with tf.Graph().as_default():
        with tf.device('/cpu:0'):
            # config = tf.ConfigProto(device_count = {'GPU': 0})
            config = tf.ConfigProto()
            config.gpu_options.allocator_type = 'BFC'
            sess = tf.InteractiveSession(config=config)

            train_data = tf.placeholder(tf.float32,
                                        shape=[BATCH_SIZE,
                                               IMAGE_HEIGHT,
                                               IMAGE_WIDTH,
                                               IMAGE_CHANNEL],
                                        name='train_data')
            train_labels = tf.placeholder(tf.int64,
                                          shape=[BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, 1],
                                          name='train_labels')
            is_training = tf.placeholder(tf.bool, name='is_training')

            images, labels = camvid.CamVidInputs(image_filenames,
                                                 label_filenames,
                                                 BATCH_SIZE)
            val_images, val_labels = camvid.CamVidInputs(val_image_filenames,
                                                         val_label_filenames,
                                                         BATCH_SIZE)

            logits = segnet_vgg.inference(train_data, is_training, NUM_CLASSES)
            total_loss = loss(logits, train_labels)
            train_op = train(total_loss)
            check_op = tf.add_check_numerics_ops()

            merged_summary_op = tf.summary.merge_all()
            summary_writer = tf.summary.FileWriter('train', sess.graph)
            saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
            if not os.path.exists(LOG_DIR):
                os.makedirs(LOG_DIR)
            checkpoint_path = os.path.join(LOG_DIR, "segnet.ckpt")

            sess.run(tf.global_variables_initializer())

            # Start the queue runners.
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            for i in range(EPOCH):
                image_batch, label_batch = sess.run([images, labels])
                feed_dict = {
                    train_data: image_batch,
                    train_labels: label_batch,
                    is_training: True
                }
                _, _, _, summary = sess.run([train_op, total_loss, check_op, merged_summary_op],
                                            feed_dict=feed_dict)
                if i % 10 == 0:
                    print("Start validating...")
                    val_images_batch, val_labels_batch = sess.run([val_images, val_labels])
                    loss_value = total_loss.eval(feed_dict={train_data: val_images_batch,
                                                            train_labels: val_labels_batch,
                                                            is_training: True})
                    print("Epoch: %d, Loss: %g" % (i, loss_value))
                    saver.save(sess, checkpoint_path)
                # write logs at every iteration
                summary_writer.add_summary(summary, i)

            coord.request_stop()
            coord.join(threads)
def main(_):

    # We want to see all the logging messages for this tutorial.
    tf.logging.set_verbosity(tf.logging.INFO)

    # Start a new TensorFlow session.
    sess = tf.InteractiveSession()

    model_settings = models.prepare_model_settings(
        len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))),
        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)

    audio_processor = input_data.AudioProcessor(FLAGS.data_url, FLAGS.data_dir,
                                                FLAGS.silence_percentage,
                                                FLAGS.unknown_percentage,
                                                FLAGS.wanted_words.split(','),
                                                FLAGS.validation_percentage,
                                                FLAGS.testing_percentage,
                                                model_settings)

    fingerprint_size = model_settings['fingerprint_size']
    label_count = model_settings['label_count']
    time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000)

    training_steps = FLAGS.how_many_training_steps
    learning_rate = FLAGS.learning_rate

    # -----------------------------------------------------------------------
    # -----------------------------Placeholder-------------------------------
    # -----------------------------------------------------------------------

    fingerprint_input = tf.placeholder(tf.float32, [None, fingerprint_size],
                                       name='fingerprint_input')

    logits, dropout_prob = models.create_model(fingerprint_input,
                                               model_settings,
                                               FLAGS.model_architecture,
                                               is_training=True)

    # Define loss and optimizer
    ground_truth_input = tf.placeholder(tf.int64, [None],
                                        name='groundtruth_input')

    # Optionally we can add runtime checks to spot when NaNs or other symptoms of
    # numerical errors start occurring during training.
    control_dependencies = []
    if FLAGS.check_nans:
        checks = tf.add_check_numerics_ops()
        control_dependencies = [checks]

    # -----------------------------------------------------------------------
    # -----------------Back propagation and training evaluation--------------
    # -----------------------------------------------------------------------

    # Create the back propagation and training evaluation machinery in the graph.
    with tf.name_scope('cross_entropy'):
        cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
            labels=ground_truth_input, logits=logits)

    tf.summary.scalar('cross_entropy', cross_entropy_mean)

    with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(
            cross_entropy_mean)

    predicted_indices = tf.argmax(logits, 1)
    correct_prediction = tf.equal(predicted_indices, ground_truth_input)
    confusion_matrix = tf.confusion_matrix(ground_truth_input,
                                           predicted_indices,
                                           num_classes=label_count)
    evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar('accuracy', evaluation_step)

    global_step = tf.train.get_or_create_global_step()
    increment_global_step = tf.assign(global_step, global_step + 1)

    saver = tf.train.Saver(tf.global_variables())

    # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
    merged_summaries = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                         sess.graph)
    validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir +
                                              '/validation')

    tf.global_variables_initializer().run()

    start_step = 1

    if FLAGS.start_checkpoint:
        models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint)
        start_step = global_step.eval(session=sess)

    tf.logging.info('Training from step: %d ', start_step)

    # Save graph.pbtxt.
    tf.train.write_graph(sess.graph_def, FLAGS.train_dir,
                         FLAGS.model_architecture + '.pbtxt')

    # Save list of words.
    with gfile.GFile(
            os.path.join(FLAGS.train_dir,
                         FLAGS.model_architecture + '_labels.txt'), 'w') as f:
        f.write('\n'.join(audio_processor.words_list))

    # -----------------------------------------------------------------------
    # -----------------Training and validation-------------------------------
    # -----------------------------------------------------------------------

    # Training loop.
    training_steps_max = training_steps

    # Print the local time of beginning training
    beg_time = datetime.datetime.now()
    print("Beginning time : " + str(beg_time))

    for training_step in xrange(start_step, training_steps_max + 1):

        # Pull the audio samples we'll use for training.
        train_fingerprints, train_ground_truth = audio_processor.get_data(
            FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency,
            FLAGS.background_volume, time_shift_samples, 'training', sess)

        # Run the graph with this batch of training data.
        train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
            [
                merged_summaries, evaluation_step, cross_entropy_mean,
                train_step, increment_global_step
            ],
            feed_dict={
                fingerprint_input: train_fingerprints,
                ground_truth_input: train_ground_truth,
                dropout_prob: 0.5
            })

        train_writer.add_summary(train_summary, training_step)
        tf.logging.info(
            'Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' %
            (training_step, learning_rate, train_accuracy * 100,
             cross_entropy_value))
        is_last_step = (training_step == training_steps_max)

        # Validation
        if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step:

            set_size = audio_processor.set_size('validation')
            total_accuracy = 0
            total_conf_matrix = None

            for i in xrange(0, set_size, FLAGS.batch_size):

                validation_fingerprints, validation_ground_truth = (
                    audio_processor.get_data(FLAGS.batch_size, i,
                                             model_settings, 0.0, 0.0, 0,
                                             'validation', sess))

                # Run a validation step and capture training summaries for TensorBoard
                # with the `merged` op.
                validation_summary, validation_accuracy, conf_matrix = sess.run(
                    [merged_summaries, evaluation_step, confusion_matrix],
                    feed_dict={
                        fingerprint_input: validation_fingerprints,
                        ground_truth_input: validation_ground_truth,
                        dropout_prob: 1.0
                    })

                validation_writer.add_summary(validation_summary,
                                              training_step)
                batch_size = min(FLAGS.batch_size, set_size - i)
                total_accuracy += (validation_accuracy * batch_size) / set_size

                if total_conf_matrix is None:
                    total_conf_matrix = conf_matrix
                else:
                    total_conf_matrix += conf_matrix

            tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
            tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' %
                            (training_step, total_accuracy * 100, set_size))

        # Save the model checkpoint periodically.
        if (training_step % FLAGS.save_step_interval == 0
                or training_step == training_steps_max):
            checkpoint_path = os.path.join(FLAGS.train_dir,
                                           FLAGS.model_architecture + '.ckpt')

            tf.logging.info('Saving to "%s-%d"', checkpoint_path,
                            training_step)
            saver.save(sess, checkpoint_path, global_step=training_step)

    # Print the local time of ending training
    print("Beginning time : " + str(beg_time))
    print("Ending time : " + str(datetime.datetime.now()))

    # -----------------------------------------------------------------------
    # ------------------------------Test-------------------------------------
    # -----------------------------------------------------------------------

    set_size = audio_processor.set_size('testing')
    tf.logging.info('set_size=%d', set_size)
    total_accuracy = 0
    total_conf_matrix = None

    for i in xrange(0, set_size, FLAGS.batch_size):

        test_fingerprints, test_ground_truth = audio_processor.get_data(
            FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess)

        test_accuracy, conf_matrix = sess.run(
            [evaluation_step, confusion_matrix],
            feed_dict={
                fingerprint_input: test_fingerprints,
                ground_truth_input: test_ground_truth,
                dropout_prob: 1.0
            })

        batch_size = min(FLAGS.batch_size, set_size - i)
        total_accuracy += (test_accuracy * batch_size) / set_size

        if total_conf_matrix is None:
            total_conf_matrix = conf_matrix
        else:
            total_conf_matrix += conf_matrix

    tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
    tf.logging.info('Final test accuracy = %.1f%% (N=%d)' %
                    (total_accuracy * 100, set_size))
Exemple #43
0
def main():

    if (len(sys.argv) == 1):
        raise NameError('[ERROR] No dataset key')
    if (sys.argv[1] == 'imagenetval'):
        FLAGS.updates_per_epoch = 49000
        FLAGS.num_test_batches = 1000
        FLAGS.in_featdir = 'data/featslist/imagenetval/'
        FLAGS.in_lvdir = 'data/output/imagenetval/'
    elif (sys.argv[1] == 'lfw'):
        FLAGS.updates_per_epoch = 12233
        FLAGS.num_test_batches = 1000
        FLAGS.in_featdir = 'data/featslist/lfw/'
        FLAGS.in_lvdir = 'data/output/lfw/'
    elif (sys.argv[1] == 'church'):
        FLAGS.updates_per_epoch = 125227
        FLAGS.num_test_batches = 1000
        FLAGS.in_featdir = 'data/featslist/church/'
        FLAGS.in_lvdir = 'data/output/church/'
    else:
        raise NameError('[ERROR] Incorrect dataset key')

    data_loader = zhangfeats_loader(os.path.join(FLAGS.in_featdir, 'list.train.txt'), \
     os.path.join(FLAGS.in_featdir, 'list.test.txt'),\
     os.path.join(FLAGS.in_lvdir, 'lv_color_train.mat.npy'),\
     os.path.join(FLAGS.in_lvdir, 'lv_color_test.mat.npy'))

    #Inputs
    lf = layer_factory()
    input_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, FLAGS.feats_height, \
      FLAGS.feats_width, FLAGS.feats_nch])
    output_gt_tensor = tf.placeholder(tf.float32,
                                      [FLAGS.batch_size, FLAGS.hidden_size])
    is_training = tf.placeholder(tf.bool)
    keep_prob = tf.placeholder(tf.float32)

    #Inference
    with tf.variable_scope('Inference', reuse=False):
        output_activ = cnn_feedforward(lf,
                                       input_tensor,
                                       is_training,
                                       keep_prob,
                                       reuse=False)

    with tf.variable_scope('Inference', reuse=True):
        output_test_activ = cnn_feedforward(lf,
                                            input_tensor,
                                            is_training,
                                            keep_prob,
                                            reuse=True)

    #Loss and gradient descent step
    loss, _, _, _ = compute_gmm_loss(output_gt_tensor, output_activ, summ=True)
    loss_test, pi_test, mu_test, sigma_test = compute_gmm_loss(
        output_gt_tensor, output_test_activ)

    train_step = optimize(loss, FLAGS.lr)

    #Standard steps
    check_nan_op = tf.add_check_numerics_ops()
    init = tf.global_variables_initializer()
    saver = tf.train.Saver(max_to_keep=0)
    summary_op = tf.summary.merge_all()

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    train_writer = tf.summary.FileWriter(
        os.path.join(FLAGS.in_lvdir, 'logs_mdn'), sess.graph)

    sess.run(init)

    if (FLAGS.is_train):
        for epoch in range(FLAGS.max_epoch):
            training_loss = 0.

            data_loader.random_reset()
            for i in range(FLAGS.updates_per_epoch):
                batch, batch_gt = data_loader.train_next_batch(
                    FLAGS.batch_size)
                feed_dict = {input_tensor:batch, output_gt_tensor:batch_gt, \
                 is_training:True, keep_prob:.75}
                _, _, loss_value, summary_str = sess.run(\
                 [check_nan_op, train_step, loss, summary_op], \
                 feed_dict)
                train_writer.add_summary(summary_str,
                                         epoch * FLAGS.updates_per_epoch + i)
                training_loss = training_loss + loss_value

            print('[DEBUG] Epoch# %d, Loss: %f' % (epoch, \
            (training_loss*1.)/FLAGS.updates_per_epoch))

            save_chkpt(saver, epoch, sess, os.path.join(FLAGS.in_lvdir, 'models_mdn'), \
                 prefix='model_%d_exp' % FLAGS.nmix)
    else:
        load_chkpt(saver, sess, os.path.join(FLAGS.in_lvdir, 'models_mdn'))

    test_loss = 0.
    data_loader.reset()
    lv_test_codes = np.zeros((0, (FLAGS.hidden_size + 1 + 1) * FLAGS.nmix),
                             dtype='f')
    for i in range(FLAGS.num_test_batches):
        batch, batch_gt = data_loader.test_next_batch(FLAGS.batch_size)
        feed_dict = {input_tensor:batch, output_gt_tensor:batch_gt, \
         is_training:False, keep_prob:1.}
        _, loss_value, output_pi, output_mu, output_sigma = \
         sess.run([check_nan_op, loss_test, pi_test, mu_test, sigma_test], feed_dict)

        test_loss = test_loss + loss_value
        output = np.concatenate((output_mu, output_sigma, output_pi), axis=1)
        lv_test_codes = np.concatenate((lv_test_codes, output), axis=0)

    print('[DEBUG] Test Loss: %f' %
          ((test_loss * 1.) / FLAGS.num_test_batches))
    np.save(os.path.join(FLAGS.in_lvdir, 'lv_color_mdn_test.mat'),
            lv_test_codes)
    print(lv_test_codes.shape)

    sess.close()
Exemple #44
0
def train():
    """Train fish_cubes for a number of steps."""
    # Get the sets of images and labels for training, validation, and
    # test
    if FLAGS.num_epochs:
        num_epochs = FLAGS.num_epochs
    else:
        num_epochs = None

    # Track global step across multiple iterations.  This is updated in
    # the optimizer.
    with tf.variable_scope('control'):
        global_step = tf.get_variable('global_step',
                                      dtype=tf.int32,
                                      initializer=0,
                                      trainable=False)

    # seed provides the mechanism to control the shuffling which takes place reading input
    seed = tf.placeholder(tf.int64, shape=())

    # Generate placeholders for the images and labels.
    iterator = input_data.input_pipeline_binary(
        FLAGS.data_dir,
        FLAGS.batch_size,
        fake_data=FLAGS.fake_data,
        num_epochs=num_epochs,
        read_threads=FLAGS.read_threads,
        shuffle_size=FLAGS.shuffle_size,
        num_expected_examples=FLAGS.num_examples,
        seed=seed)
    image_path, label_path, images, labels = iterator.get_next()

    if FLAGS.verbose:
        print_op = tf.print("images and labels this batch: ", image_path,
                            label_path, labels)
    else:
        print_op = tf.constant('No printing')

    if FLAGS.random_rotation:
        images, labels = harmonics.apply_random_rotation(images, labels)

    # Build a Graph that computes predictions from the inference model.
    logits = topology.inference(images, FLAGS.network_pattern)

    # Add to the Graph the Ops for loss calculation.
    loss = topology.binary_loss(logits, labels)
    print('loss: ', loss)

    if FLAGS.check_numerics:
        if FLAGS.random_rotation:
            sys.exit('check_numerics is not compatible with random_rotation')
        check_numerics_op = tf.add_check_numerics_ops()
    else:
        check_numerics_op = tf.constant('not checked')

    var_pfx_map = {'cnn': 'cnn/', 'classifier': 'image_binary_classifier/'}

    if len(FLAGS.starting_snapshot):
        keys = FLAGS.snapshot_load.split(',') if FLAGS.snapshot_load else [
            'all'
        ]
        keys = [k.strip() for k in keys]
        if 'all' in keys:
            vars_to_load = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        else:
            assert all([k in var_pfx_map
                        for k in keys]), 'unknown key to load: %s' % key
            vars_to_load = [global_step]
            for k in keys:
                vars_to_load.extend([
                    v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
                    if v.name.startswith(var_pfx_map[k])
                ])
        if FLAGS.reset_global_step:
            vars_to_load.remove(global_step)
    else:
        vars_to_load = []

    vars_to_hold_constant = []  # empty list means hold nothing constant
    if FLAGS.hold_constant is not None:
        keys = [k.strip() for k in FLAGS.hold_constant.split(',')]
        assert all([k in var_pfx_map
                    for k in keys]), 'unknown key to hold constant: %s' % key
        for k in keys:
            vars_to_hold_constant.extend([
                v for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
                if v.name.startswith(var_pfx_map[k])
            ])
    print('not subject to training: %s' %
          [v.name for v in vars_to_hold_constant])

    if FLAGS.starting_snapshot and len(FLAGS.starting_snapshot):
        vars_in_snapshot = [
            k for k in (pywrap_tensorflow.NewCheckpointReader(
                FLAGS.starting_snapshot).get_variable_to_shape_map())
        ]
    else:
        vars_in_snapshot = []
    vars_in_snapshot = set(vars_in_snapshot)
    print('vars in snapshot: %s' % vars_in_snapshot)

    if FLAGS.optimizer == 'Adam':
        optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate,
                                           epsilon=0.1)
    elif FLAGS.optimizer == 'SGD':
        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=FLAGS.learning_rate)
    else:
        raise RuntimeError('Unimplemented optimizer %s was requested' %
                           FLAGS.optimizer)
    train_op = topology.training(loss,
                                 FLAGS.learning_rate,
                                 exclude=vars_to_hold_constant,
                                 optimizer=optimizer)

    # Also load any variables the optimizer created for variables we want to load
    vars_to_load.extend([
        optimizer.get_slot(var, name) for name in optimizer.get_slot_names()
        for var in vars_to_load
    ])
    vars_to_load = [var for var in vars_to_load if var is not None]
    vars_to_load = list(set(vars_to_load))  # remove duplicates

    # Filter vars to load based on what is in the checkpoint
    in_vars = []
    out_vars = []
    for var in vars_to_load:
        if get_cpt_name(var) in vars_in_snapshot:
            in_vars.append(var)
        else:
            out_vars.append(var)
    if out_vars:
        print(
            'WARNING: cannot load the following vars because they are not in the snapshot: %s'
            % [var.name for var in out_vars])
    if in_vars:
        print('loading from checkpoint: %s' % [var.name for var in in_vars])
        tf.train.init_from_checkpoint(
            FLAGS.starting_snapshot,
            {get_cpt_name(var): var
             for var in in_vars})

    # Try making histograms of *everything*
    for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
        if var.name.startswith('cnn') or var.name.startswith(
                'image_binary_classifier'):
            tf.summary.histogram(var.name, var)

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver(max_to_keep=10)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()

    # Create a session for running operations in the Graph.
    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=FLAGS.verbose))

    # Create the graph, etc.
    # we either have no snapshot and must initialize everything, or we do have a snapshot
    # and have already set appropriate vars to be initialized from it
    init_op = tf.variables_initializer(
        tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
    sess.run(init_op)

    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

    loss_value = -1.0  # avoid a corner case where it is unset on error
    duration = 0.0  # ditto
    num_chk = None  # ditto
    #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
    # Loop through training epochs
    for epoch in range(num_epochs):
        try:
            sess.run(iterator.initializer, feed_dict={seed: epoch})
            saver.save(sess, FLAGS.log_dir + 'cnn', global_step=global_step)
            last_save_epoch = 0

            while True:
                # Run training steps or whatever
                start_time = time.time()
                _, loss_value, num_chk, _, gstp = sess.run(
                    [train_op, loss, check_numerics_op, print_op, global_step])
                duration = time.time() - start_time

                # Write the summaries and print an overview fairly often.
                if ((gstp + 1) % 100 == 0 or gstp < 10):
                    # Print status to stdout.
                    print(
                        'Global step %d epoch %d: numerics = %s, batch mean loss = %.2f (%.3f sec)'
                        % (gstp, epoch, num_chk, loss_value.mean(), duration))
                    # Update the events file.
                    summary_str = sess.run(summary_op)
                    summary_writer.add_summary(summary_str, gstp)
                    summary_writer.flush()

                # Save a checkpoint periodically.
                if (epoch + 1) % 100 == 0 and epoch != last_save_epoch:
                    # If log_dir is /tmp/cnn/ then checkpoints are saved in that
                    # directory, prefixed with 'cnn'.
                    print('saving checkpoint at global step %d, epoch %s' %
                          (gstp, epoch))
                    saver.save(sess,
                               FLAGS.log_dir + 'cnn',
                               global_step=global_step)
                    last_save_epoch = epoch

        except tf.errors.OutOfRangeError as e:
            print('Finished epoch {}'.format(epoch))


#         finally:
#             # When done, ask the threads to stop.
#             coord.request_stop()
#             print('Final Step %d: numerics = %s, loss = %.2f (%.3f sec)'
#                   % (step, num_chk, loss_value, duration))
#             summary_str = sess.run(summary_op, num_chk)
#             summary_writer.add_summary(summary_str, step)
#             summary_writer.flush()

# Wait for threads to finish.
#        coord.join(threads, stop_grace_period=10)

    print('Final Step %d: numerics = %s, batch mean loss = %.2f (%.3f sec)' %
          (gstp, num_chk, loss_value.mean(), duration))
    try:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)
        summary_writer.flush()
    except tf.errors.OutOfRangeError as e:
        print('No final summary to write')

    sess.close()
Exemple #45
0
def main(_):
  num_features = 40 # 40
  num_time_steps=98
  # We want to see all the logging messages for this tutorial.
  tf.logging.set_verbosity(tf.logging.INFO)

  # Start a new TensorFlow session.
  sess = tf.InteractiveSession()

  # Begin by making sure we have the training data we need. If you already have
  # training data of your own, use `--data_url= ` on the command line to avoid
  # downloading.
  model_settings = models.prepare_model_settings(
      len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))),
      FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
      FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
  audio_processor = input_data.AudioProcessor(
      FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage,
      FLAGS.unknown_percentage,
      FLAGS.wanted_words.split(','), FLAGS.validation_percentage,
      FLAGS.testing_percentage, model_settings)
  fingerprint_size = model_settings['fingerprint_size']
  label_count = model_settings['label_count']
  time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000)
  # Figure out the learning rates for each training phase. Since it's often
  # effective to have high learning rates at the start of training, followed by
  # lower levels towards the end, the number of steps and learning rates can be
  # specified as comma-separated lists to define the rate at each stage. For
  # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001
  # will run 13,000 training loops in total, with a rate of 0.001 for the first
  # 10,000, and 0.0001 for the final 3,000.
  training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
  learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
  if len(training_steps_list) != len(learning_rates_list):
    raise Exception(
        '--how_many_training_steps and --learning_rate must be equal length '
        'lists, but are %d and %d long instead' % (len(training_steps_list),
                                                   len(learning_rates_list)))
  if FLAGS.model_architecture != 'rnn' and FLAGS.model_architecture != 'rnn_s':
      fingerprint_input = tf.placeholder(tf.float32, [None, fingerprint_size], name='fingerprint_input')
  else:
      num_audio_features = num_features
      time_steps = fingerprint_size/num_audio_features
      fingerprint_input = tf.placeholder(tf.float32, [time_steps, None, num_audio_features], name='fingerprint_input')
      print('hello world',fingerprint_input.get_shape())

  logits, dropout_prob = models.create_model(
      fingerprint_input,
      model_settings,
      FLAGS.model_architecture,
      is_training=True)

  # Define loss and optimizer
  ground_truth_input = tf.placeholder(
      tf.int64, [None], name='groundtruth_input')

  # Optionally we can add runtime checks to spot when NaNs or other symptoms of
  # numerical errors start occurring during training.
  control_dependencies = []
  if FLAGS.check_nans:
    checks = tf.add_check_numerics_ops()
    control_dependencies = [checks]

  # Create the back propagation and training evaluation machinery in the graph.
  with tf.name_scope('cross_entropy'):
    cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
        labels=ground_truth_input, logits=logits)
  tf.summary.scalar('cross_entropy', cross_entropy_mean)
  with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
    learning_rate_input = tf.placeholder(
        tf.float32, [], name='learning_rate_input')
    train_step = tf.train.GradientDescentOptimizer(
        learning_rate_input).minimize(cross_entropy_mean)
  predicted_indices = tf.argmax(logits, 1)
  correct_prediction = tf.equal(predicted_indices, ground_truth_input)
  confusion_matrix = tf.confusion_matrix(
      ground_truth_input, predicted_indices, num_classes=label_count)
  evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  tf.summary.scalar('accuracy', evaluation_step)

  global_step = tf.train.get_or_create_global_step()
  increment_global_step = tf.assign(global_step, global_step + 1)

  saver = tf.train.Saver(tf.global_variables())

  # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
  merged_summaries = tf.summary.merge_all()
  train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/rnn3/train',
                                       sess.graph)
  validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/rnn3/validation')

  tf.global_variables_initializer().run()

  start_step = 1

  if FLAGS.start_checkpoint:
    models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint)
    start_step = global_step.eval(session=sess)

  tf.logging.info('Training from step: %d ', start_step)

  # Save graph.pbtxt.
  tf.train.write_graph(sess.graph_def, FLAGS.train_dir,
                       FLAGS.model_architecture + '.pbtxt')

  # Save list of words.
  with gfile.GFile(
      os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'),
      'w') as f:
    f.write('\n'.join(audio_processor.words_list))

  # Training loop.
  training_steps_max = np.sum(training_steps_list)
  for training_step in xrange(start_step, training_steps_max + 1):
    # Figure out what the current learning rate is.
    training_steps_sum = 0
    for i in range(len(training_steps_list)):
      training_steps_sum += training_steps_list[i]
      if training_step <= training_steps_sum:
        learning_rate_value = learning_rates_list[i]
        break
    # Pull the audio samples we'll use for training.
    train_fingerprints, train_ground_truth = audio_processor.get_data(
        FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency,
        FLAGS.background_volume, time_shift_samples, 'training', sess)
    #print('data raw [examples][data]=',train_fingerprints[:3,0:2] ,train_fingerprints[:3,280:283])
    #*** if we are using RNN change shape of train_fingerprints:
    if FLAGS.model_architecture == 'rnn' or FLAGS.model_architecture == 'rnn_s':
        #print('train_fingerprints shape before transform=',train_fingerprints.shape)#(100, 3920)
        shape_data = train_fingerprints.shape
        train_fingerprints = np.reshape(train_fingerprints, [shape_data[0],num_time_steps,-1])
        train_fingerprints = np.transpose(train_fingerprints,(1,0,2))#(14, 100, 280) (time_step,batch_size,feature_size)
        
        #print('processed data =', train_fingerprints[:2,:3,:8]) #Test passed
        #print('train_fingerprints shape After transform=',train_fingerprints.shape)
        
        
    # Run the graph with this batch of training data.
    train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
        [
            merged_summaries, evaluation_step, cross_entropy_mean, train_step,
            increment_global_step
        ],
        feed_dict={
            fingerprint_input: train_fingerprints,
            ground_truth_input: train_ground_truth,
            learning_rate_input: learning_rate_value,
            dropout_prob: 0.5
        })
    train_writer.add_summary(train_summary, training_step)
    tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' %
                    (training_step, learning_rate_value, train_accuracy * 100,
                     cross_entropy_value))
    is_last_step = (training_step == training_steps_max)
    if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step:
      set_size = audio_processor.set_size('validation')
      total_accuracy = 0
      total_conf_matrix = None
      for i in xrange(0, set_size, FLAGS.batch_size):
        validation_fingerprints, validation_ground_truth = (
            audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0,
                                     0.0, 0, 'validation', sess))
        if FLAGS.model_architecture == 'rnn' or FLAGS.model_architecture == 'rnn_s':
            #tf.reshape(validation_fingerprints, [None,num_features,-1])
            #tf.transpose(validation_fingerprints,[2,0,1])
            print('validation_fingerprints shape before transform=',validation_fingerprints.shape)
            shape_data = validation_fingerprints.shape
            validation_fingerprints = np.reshape(validation_fingerprints, [shape_data[0],num_time_steps,-1])
            validation_fingerprints = np.transpose(validation_fingerprints,(1,0,2))
            print('test_fingerprints shape After transform=',validation_fingerprints.shape)
            
        # Run a validation step and capture training summaries for TensorBoard
        # with the `merged` op.
        validation_summary, validation_accuracy, conf_matrix = sess.run(
            [merged_summaries, evaluation_step, confusion_matrix],
            feed_dict={
                fingerprint_input: validation_fingerprints,
                ground_truth_input: validation_ground_truth,
                dropout_prob: 1.0
            })
        validation_writer.add_summary(validation_summary, training_step)
        batch_size = min(FLAGS.batch_size, set_size - i)
        total_accuracy += (validation_accuracy * batch_size) / set_size
        if total_conf_matrix is None:
          total_conf_matrix = conf_matrix
        else:
          total_conf_matrix += conf_matrix
      tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
      tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' %
                      (training_step, total_accuracy * 100, set_size))

    # Save the model checkpoint periodically.
    if (training_step % FLAGS.save_step_interval == 0 or
        training_step == training_steps_max):
      checkpoint_path = os.path.join(FLAGS.train_dir,
                                     FLAGS.model_architecture + '.ckpt')
      tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step)
      saver.save(sess, checkpoint_path, global_step=training_step)

  set_size = audio_processor.set_size('testing')
  tf.logging.info('set_size=%d', set_size)
  total_accuracy = 0
  total_conf_matrix = None
  for i in xrange(0, set_size, FLAGS.batch_size):
    test_fingerprints, test_ground_truth = audio_processor.get_data(
        FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess)
    test_accuracy, conf_matrix = sess.run(
        [evaluation_step, confusion_matrix],
        feed_dict={
            fingerprint_input: test_fingerprints,
            ground_truth_input: test_ground_truth,
            dropout_prob: 1.0
        })
    if FLAGS.model_architecture == 'rnn' or FLAGS.model_architecture == 'rnn_s':
        #tf.reshape(test_fingerprints, [None,num_features,-1])
        #tf.transpose(test_fingerprints,[2,0,1])
        print('test_fingerprints shape before transform=',test_fingerprints.shape)
        shape_data = test_fingerprints.shape
        test_fingerprints = np.reshape(test_fingerprints, [shape_data[0],num_time_steps,-1])
        test_fingerprints = np.transpose(test_fingerprints,(1,0,2))
        print('test_fingerprints shape After transform=',test_fingerprints.shape)
        
    batch_size = min(FLAGS.batch_size, set_size - i)
    total_accuracy += (test_accuracy * batch_size) / set_size
    if total_conf_matrix is None:
      total_conf_matrix = conf_matrix
    else:
      total_conf_matrix += conf_matrix
  tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
  tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100,
                                                           set_size))
Exemple #46
0
    def train(self, model, dataset, re_epochs=0, re_steps=0, debug_NaN=False):
        """
        Train the model on a particular dataset.
        """

        if debug_NaN:
            # Add checking ops
            self.check_op = tf.add_check_numerics_ops()

        # Parameters log file
        if model.config.saving:
            model.parameters_log()

        # Save points of the kernel to file
        self.save_kernel_points(model, 0)

        if model.config.saving:
            # Training log file
            with open(join(model.saving_path, 'training.txt'), "w") as file:
                file.write(
                    'Steps out_loss reg_loss point_loss train_accuracy time memory\n'
                )

            # Killing file (simply delete this file when you want to stop the training)
            if not exists(join(model.saving_path, 'running_PID.txt')):
                with open(join(model.saving_path, 'running_PID.txt'),
                          "w") as file:
                    file.write('Launched with PyCharm')

        # Train loop variables
        t0 = time.time()
        self.training_step = re_steps
        self.training_epoch = re_epochs
        print('restored at epoch: ', self.training_epoch)
        mean_dt = np.zeros(2)
        last_display = t0
        self.training_preds = np.zeros(0)
        self.training_labels = np.zeros(0)
        epoch_n = 1
        mean_epoch_n = 0

        if self.training_epoch != 0:

            op = self.learning_rate.assign(
                tf.multiply(self.learning_rate,
                            model.config.lr_decays[self.training_epoch]))
            print('setting learning rate')
            self.sess.run(op)

        # Initialise iterator with train data
        self.sess.run(dataset.train_init_op)

        # Start loop
        while self.training_epoch < model.config.max_epoch:

            try:
                # Run one step of the model.
                t = [time.time()]
                ops = [
                    self.train_op,
                    model.output_loss,
                    model.regularization_loss,
                    model.offsets_loss,
                    model.logits,
                    model.labels,
                    model.class_logits,
                    model.inputs,
                    #model.inputs['batch_ind'],
                    self.accuracy
                ]

                # If NaN appears in a training, use this debug block
                if debug_NaN:
                    all_values = self.sess.run(
                        ops + [self.check_op] + list(dataset.flat_inputs),
                        {model.dropout_prob: 0.5})
                    L_out, L_reg, L_p, probs, labels, acc = all_values[1:7]
                    if np.isnan(L_reg) or np.isnan(L_out):
                        input_values = all_values[8:]
                        self.debug_nan(model, input_values, probs)
                        a = 1 / 0

                else:
                    # Run normal
                    #_, L_out, L_reg, L_p, probs, labels, class_prob, cloud_label, acc = self.sess.run(ops, {model.dropout_prob: 0.5})
                    _, L_out, L_reg, L_p, probs, labels, class_logits, inputs, acc = self.sess.run(
                        ops, {model.dropout_prob: model.config.dropout_prob})

                t += [time.time()]

                # Stack prediction for training confusion
                if model.config.network_model == 'classification':
                    self.training_preds = np.hstack(
                        (self.training_preds, np.argmax(probs, axis=1)))
                    self.training_labels = np.hstack(
                        (self.training_labels, labels))
                t += [time.time()]

                # Average timing
                mean_dt = 0.95 * mean_dt + 0.05 * (np.array(t[1:]) -
                                                   np.array(t[:-1]))

                # Console display (only one per second)
                if (t[-1] - last_display) > 1.0:
                    last_display = t[-1]
                    message = 'Step {:08d} L_out={:5.3f} L_reg={:5.3f} L_p={:5.3f} Acc={:4.2f} ' \
                              '---{:8.2f} ms/batch (Averaged)'
                    print(
                        message.format(self.training_step, L_out, L_reg, L_p,
                                       acc, 1000 * mean_dt[0],
                                       1000 * mean_dt[1]))

                # Log file
                if model.config.saving:
                    process = psutil.Process(os.getpid())
                    with open(join(model.saving_path, 'training.txt'),
                              "a") as file:
                        message = '{:d} {:.3f} {:.3f} {:.3f} {:.2f} {:.2f} {:.1f}\n'
                        file.write(
                            message.format(self.training_step, L_out, L_reg,
                                           L_p, acc, t[-1] - t0,
                                           process.memory_info().rss * 1e-6))

                # Check kill signal (running_PID.txt deleted)
                if model.config.saving and not exists(
                        join(model.saving_path, 'running_PID.txt')):
                    break

                if model.config.dataset.startswith(
                        'ShapeNetPart') or model.config.dataset.startswith(
                            'ModelNet'):
                    if model.config.epoch_steps and epoch_n > model.config.epoch_steps:
                        raise tf.errors.OutOfRangeError(None, None, '')

            except tf.errors.OutOfRangeError:

                # End of train dataset, update average of epoch steps
                mean_epoch_n += (epoch_n -
                                 mean_epoch_n) / (self.training_epoch + 1)
                epoch_n = 0
                self.int = int(np.floor(mean_epoch_n))
                model.config.epoch_steps = int(np.floor(mean_epoch_n))
                if model.config.saving:
                    model.parameters_log()

                # Snapshot
                if model.config.saving and (self.training_epoch + 1
                                            ) % model.config.snapshot_gap == 0:

                    # Tensorflow snapshot
                    snapshot_directory = join(model.saving_path, 'snapshots')
                    if not exists(snapshot_directory):
                        makedirs(snapshot_directory)
                    self.saver.save(self.sess,
                                    snapshot_directory + '/snap',
                                    global_step=self.training_step + 1)

                    # Save points
                    self.save_kernel_points(model, self.training_epoch)

                # Update learning rate
                if self.training_epoch in model.config.lr_decays:
                    op = self.learning_rate.assign(
                        tf.multiply(
                            self.learning_rate,
                            model.config.lr_decays[self.training_epoch]))
                    self.sess.run(op)

                # Increment
                self.training_epoch += 1

                # Validation
                if model.config.network_model == 'classification':
                    self.validation_error(model, dataset)
                elif model.config.network_model == 'segmentation':
                    self.segment_validation_error(model, dataset)
                elif model.config.network_model == 'multi_segmentation':
                    self.multi_validation_error(model, dataset)
                elif model.config.network_model == 'cloud_segmentation':
                    self.cloud_validation_error(model, dataset)
                else:
                    raise ValueError(
                        'No validation method implemented for this network type'
                    )

                self.training_preds = np.zeros(0)
                self.training_labels = np.zeros(0)

                # Reset iterator on training data
                self.sess.run(dataset.train_init_op)

            except tf.errors.InvalidArgumentError as e:

                print('Caught a NaN error :')
                print(e.error_code)
                print(e.message)
                print(e.op)
                print(e.op.name)
                print([t.name for t in e.op.inputs])
                print([t.name for t in e.op.outputs])

                a = 1 / 0

            # Increment steps
            self.training_step += 1
            epoch_n += 1

        # Remove File for kill signal
        if exists(join(model.saving_path, 'running_PID.txt')):
            remove(join(model.saving_path, 'running_PID.txt'))
        self.sess.close()
Exemple #47
0
        fix_lengths=False,
        threshold=threshold)
    # lengths = tf.identity(lengths)
    grad, = tf.gradients(tf.boolean_mask(lengths, hit), scale_factor)

with tf.Session(graph=graph) as sess:
    # l0, g = sess.run((lengths, grad))
    # print(l0, g)
    l0, g0, h0 = sess.run((lengths, grad, hit))

graph = tf.Graph()
with graph.as_default():
    sdf, eye, directions, scale_factor = build_graph()
    # lengths, passed = render.get_linearized_solution_lengths(
    #     eye, directions, l0, sdf)
    lengths = render.fix_length_gradient(eye, directions, l0, sdf)
    # lengths, passed = render.fix_length_gradient(
    #     eye, directions, l0, sdf)
    # passed = tf.Print(
    #     passed,
    #     [tf.reduce_sum(tf.cast(tf.logical_and(h0, passed), tf.uint8))])
    # hit = tf.logical_and(h0, tf.logical_not(passed))
    lengths = tf.minimum(max_length, lengths)
    grad, = tf.gradients(tf.boolean_mask(lengths, h0), scale_factor)
    with tf.control_dependencies([tf.add_check_numerics_ops()]):
        grad = tf.identity(grad)
#
with tf.Session(graph=graph) as sess:
    l1, g = sess.run((lengths, grad))
    print(np.max(np.abs(l0 - l1)), g, np.abs((g - g0) / g0))
Exemple #48
0
    def train_ichikawa_2(self,
                         data=0,
                         steps=-1,
                         dropout=None,
                         display_step=10,
                         test_step=100,
                         batch_size=10,
                         resume=save_step,
                         ckpt_name="SSL.ckpt",
                         start_ckpt="ichikawa.ckpt"):  # epochs=-1,
        print("learning_rate: %f" % self.learning_rate)

        if data: self.data = data
        steps = 9999999 if steps < 0 else steps
        session = self.session
        # with tf.device(_cpu):
        # t = tf.verify_tensor_all_finite(t, msg)
        tf.add_check_numerics_ops()
        self.summaries = tf.summary.merge_all()
        self.summary_writer = tf.summary.FileWriter(current_logdir(),
                                                    session.graph)
        if not dropout: dropout = 1.  # keep all
        x = self.x
        y = self.y
        keep_prob = self.keep_prob
        if not resume or not self.resume(session):
            session.run([tf.global_variables_initializer()])
        saver_c = tf.train.Saver([
            v for v in tf.all_variables()
            if not (v.name.startswith("model/prediction/Dense_10"))
        ])
        saver = tf.train.Saver(tf.global_variables())
        saver_c.restore(session, "ichikawa.ckpt")
        snapshot = self.name + str(get_last_tensorboard_run_nr())
        step = 0  # show first

        #check = session.graph.get_tensor_by_name("model/conv/filters:0")
        #w_sh = tf.assign(check,tf.random_normal([3, 3, 1, 64]))
        #session.run(w_sh)

        w_0 = tf.Variable(tf.random_normal([3, 3, 1, 64]))
        w_1 = tf.Variable(tf.random_normal([3, 3, 64, 12]))
        w_2 = tf.Variable(tf.random_normal([1, 1, 76, 76]))
        w_3 = tf.Variable(tf.random_normal([3, 3, 76, 12]))
        w_4 = tf.Variable(tf.random_normal([1, 1, 88, 88]))
        w_5 = tf.Variable(tf.random_normal([3, 3, 88, 12]))

        session.run([tf.global_variables_initializer()])
        saver_c.restore(session, "ichikawa.ckpt")

        check_0 = session.graph.get_tensor_by_name("model/conv/filters:0")
        w_sh_0 = tf.assign(w_0, check_0)
        w_s_0 = tf.assign(check_0, w_0)
        session.run(w_s_0)

        check_1 = session.graph.get_tensor_by_name("model/conv_1/filters:0")
        w_sh_1 = tf.assign(w_1, check_1)
        w_s_1 = tf.assign(check_1, w_1)
        session.run(w_sh_1)

        check_2 = session.graph.get_tensor_by_name("model/conv_2/filters:0")
        w_sh_2 = tf.assign(w_2, check_2)
        w_s_2 = tf.assign(check_2, w_2)
        session.run(w_sh_2)

        check_3 = session.graph.get_tensor_by_name("model/conv_3/filters:0")
        w_sh_3 = tf.assign(w_3, check_3)
        w_s_3 = tf.assign(check_3, w_3)
        session.run(w_sh_3)

        check_4 = session.graph.get_tensor_by_name("model/conv_4/filters:0")
        w_sh_4 = tf.assign(w_4, check_4)
        w_s_4 = tf.assign(check_4, w_4)
        session.run(w_sh_4)

        check_5 = session.graph.get_tensor_by_name("model/conv_5/filters:0")
        w_sh_5 = tf.assign(w_5, check_5)
        w_s_5 = tf.assign(check_5, w_5)
        session.run(w_sh_5)

        while step < steps:
            batch_xs, batch_ys = self.next_batch(batch_size, session)
            # batch_xs=np.array(batch_xs).reshape([-1]+self.input_shape)
            # print("step %d \r" % step)# end=' ')
            # tf.train.shuffle_batch_join(example_list, batch_size, capacity=min_queue_size + batch_size * 16, min_queue_size)
            # Fit training using batch data
            feed_dict = {
                x: batch_xs,
                y: batch_ys,
                keep_prob: dropout,
                self.train_phase: True
            }
            loss, _ = session.run([self.cost, self.optimize],
                                  feed_dict=feed_dict)
            session.run([w_s_0, w_s_1, w_s_2, w_s_3, w_s_4, w_s_5])
            if step % display_step == 0:
                seconds = int(time.time()) - start
                # Calculate batch accuracy, loss
                feed = {
                    x: batch_xs,
                    y: batch_ys,
                    keep_prob: 1.,
                    self.train_phase: False
                }
                acc = session.run(self.accuracy, feed_dict=feed)
                # acc, summary = session.run([self.accuracy, self.summaries], feed_dict=feed)
                # self.summary_writer.add_summary(summary, step) # only test summaries for smoother curve and SPEED!
                print("\rStep {:d} Loss= {:.6f} Accuracy= {:.3f} Time= {:d}s".
                      format(step, loss, acc, seconds),
                      end=' ')
                if str(loss) == "nan":
                    return print(
                        "\nLoss gradiant explosion, exiting!!!")  # restore!
            if step % test_step == 0: self.test(step)
            if step % save_step == 0 and step > 0:
                print("SAVING snapshot %s" % snapshot)
                saver.save(session, checkpoint_dir + "/" + snapshot + ".ckpt",
                           self.global_step)

            step += 1
        print("\nOptimization Finished!")
        saver.save(session, ckpt_name)
        self.test(step, number=10000)  # final test
Exemple #49
0
    def accuracy_test(self,
                      data=0,
                      steps=-1,
                      dropout=None,
                      display_step=10,
                      test_step=100,
                      batch_size=10,
                      resume=save_step,
                      ckpt_name="SSL.ckpt"):  # epochs=-1,
        print("learning_rate: %f" % self.learning_rate)
        if data: self.data = data
        steps = 9999999 if steps < 0 else steps
        session = self.session
        # with tf.device(_cpu):
        # t = tf.verify_tensor_all_finite(t, msg)
        tf.add_check_numerics_ops()
        self.summaries = tf.summary.merge_all()
        self.summary_writer = tf.summary.FileWriter(current_logdir(),
                                                    session.graph)
        if not dropout: dropout = 1.  # keep all
        x = self.x
        y = self.y
        keep_prob = self.keep_prob
        if not resume or not self.resume(session):
            session.run([tf.global_variables_initializer()])
        #saver_c = tf.train.Saver([v for v in tf.all_variables() if not(v.name.startswith("model/prediction/Dense_10"))])
        saver = tf.train.Saver(tf.global_variables())
        print(ckpt_name)
        saver.restore(session, ckpt_name)
        snapshot = self.name + str(get_last_tensorboard_run_nr())
        step = 0  # show first

        while step < steps:
            batch_xs, batch_ys = self.next_batch(batch_size, session)
            # batch_xs=np.array(batch_xs).reshape([-1]+self.input_shape)
            # print("step %d \r" % step)# end=' ')
            # tf.train.shuffle_batch_join(example_list, batch_size, capacity=min_queue_size + batch_size * 16, min_queue_size)
            # Fit training using batch data
            feed_dict = {
                x: batch_xs,
                y: batch_ys,
                keep_prob: dropout,
                self.train_phase: True
            }
            #print(session.run([self.target,self.output],feed_dict=feed_dict))
            #print(session.run([self.output],feed_dict=feed_dict))
            output = session.run([self.output, self.target],
                                 feed_dict=feed_dict)
            #output = session.run(self.train_phase,feed_dict=feed_dict)
            print(output)

            if output[0][0][0] < output[0][0][1]:
                print("mae")
            else:
                print("usiro")

            #loss, _ = session.run([self.cost, self.optimize], feed_dict=feed_dict)
            #if step % display_step == 0:
            #seconds = int(time.time()) - start
            # Calculate batch accuracy, loss
            #feed = {x: batch_xs, y: batch_ys, keep_prob: 1., self.train_phase: False}
            #acc = session.run(self.accuracy, feed_dict=feed)
            # acc, summary = session.run([self.accuracy, self.summaries], feed_dict=feed)
            # self.summary_writer.add_summary(summary, step) # only test summaries for smoother curve and SPEED!
            #print("\rStep {:d} Loss= {:.6f} Accuracy= {:.3f} Time= {:d}s".format(step, loss, acc, seconds), end=' ')
            #if str(loss) == "nan": return print("\nLoss gradiant explosion, exiting!!!")  # restore!
            #if step % test_step == 0: self.test(step)
            if step % save_step == 0 and step > 0:
                print("SAVING snapshot %s" % snapshot)
                saver.save(session, checkpoint_dir + "/" + snapshot + ".ckpt",
                           self.global_step)

            step += 1
        print("\nOptimization Finished!")
        #saver.save(session, ckpt_name)
        self.test(step, number=10000)  # final test
Exemple #50
0
    def __init__(self, path_to_model):
        g = tf.Graph()
        with g.as_default():
            train_attention = True
            initialize_random = False
            train_we = True

            with open(
                    settings.BASE_DIR +
                    str("/Prediction/ML_model/dataset/dataset_mcgm_clean/word_index_map_mcgm.pickle"
                        ), "rb") as myFile:
                self.word_index_map = pickle.load(myFile, encoding='latin1')

            if not initialize_random:

                # load pre-trained word embedding.
                with open(
                        settings.BASE_DIR +
                        "/Prediction/ML_model/dataset/dataset_mcgm_clean/word_vectors_mcgm.pickle",
                        "rb") as myFile:
                    word_vectors = pickle.load(myFile, encoding='latin1')

                word_vectors = np.asarray(word_vectors).astype(np.float32)

                for i in range(len(word_vectors) - 1):
                    word_vectors[i] /= (la.norm((word_vectors[i])))

            #
            # for i in range(len(word_vectors) - 1):
            #     print np.max(np.abs(word_vectors[i]))

            vocab_size = len(word_vectors)
            embedding_dim = 300
            learning_rate = 1e-3
            # decay_factor = 0.99
            self.max_padded_sentence_length = 35
            batch_size = 100
            iterations = 200
            highest_val_acc = 0
            self.last_index = len(word_vectors) - 1

            def init_weight(shape, name):
                initial = tf.truncated_normal(shape,
                                              stddev=0.1,
                                              name=name,
                                              dtype=tf.float32)
                return tf.Variable(initial)

            def init_bias(shape, name):
                initial = tf.truncated_normal(shape=shape,
                                              stddev=0.1,
                                              name=name,
                                              dtype=tf.float32)
                return tf.Variable(initial)

            if initialize_random:

                # Initial embedding initialized randomly
                embedding_init = tf.Variable(tf.truncated_normal(
                    shape=[vocab_size, embedding_dim],
                    stddev=0.1,
                    dtype=tf.float32),
                                             trainable=train_we,
                                             name="word_embedding")

            else:

                # Initial embedding initialized by word2vec vectors
                embedding_init = tf.Variable(tf.constant(
                    word_vectors, shape=[vocab_size, embedding_dim]),
                                             trainable=train_we,
                                             name="word_embedding")

            config = projector.ProjectorConfig()

            # It will hold tensor of size [batch_size, max_padded_sentence_length]
            self.X = tf.placeholder(tf.int32,
                                    [None, self.max_padded_sentence_length])

            # Word embedding lookup
            word_embeddings = tf.nn.embedding_lookup(embedding_init, self.X)

            if train_attention:

                in_size = tf.shape(word_embeddings)[0]

                reshaped_w_e = tf.reshape(
                    word_embeddings,
                    [in_size * self.max_padded_sentence_length, embedding_dim])

                print(reshaped_w_e)

                no_of_nurons_h1 = 512
                Wa = init_weight([embedding_dim, no_of_nurons_h1], 'Wa')
                ba = init_bias([no_of_nurons_h1], 'ba')
                ya = tf.nn.relu(tf.matmul(reshaped_w_e, Wa) + ba)

                # Hidden layer of size 512
                no_of_nurons_h2 = 512
                Wa1 = init_weight([no_of_nurons_h1, no_of_nurons_h2], 'Wa1')
                ba1 = init_bias([no_of_nurons_h2], 'ba1')
                ya1 = tf.nn.relu(tf.matmul(ya, Wa1) + ba1)

                Wa2 = init_weight([no_of_nurons_h2, 1], 'Wa2')
                ba2 = init_bias([1], 'ba2')

                # Output layer of the neural network.
                ya2 = tf.matmul(ya1, Wa2) + ba2

                attention_reshaped = tf.reshape(
                    ya2, [in_size, self.max_padded_sentence_length])

                attention_softmaxed = tf.nn.softmax(attention_reshaped)

                attention_expanded = tf.expand_dims(attention_softmaxed,
                                                    axis=2)

                # Attention based weighted averaging of word vectors.
                sentence_embedding = tf.reduce_sum(tf.multiply(
                    word_embeddings, attention_expanded),
                                                   axis=1)

            else:

                # Simply Average out word embedding to create sentence embedding
                sentence_embedding = tf.reduce_mean(word_embeddings, axis=1)

            def get_batches(X, Y, bsize):
                for i in range(0, len(X) - bsize + 1, bsize):
                    indices = slice(i, i + bsize)
                    yield X[indices], Y[indices]

            input_layer_size = embedding_dim
            output_layer_size = 165

            # Hidden layer of size 1024
            no_of_nurons_h1 = 512
            W = init_weight([input_layer_size, no_of_nurons_h1], 'W')
            b = init_bias([no_of_nurons_h1], 'b')
            y = tf.nn.relu(tf.matmul(sentence_embedding, W) + b)

            # Hidden layer of size 1024
            no_of_nurons_h2 = 512
            W1 = init_weight([no_of_nurons_h1, no_of_nurons_h2], 'W1')
            b1 = init_bias([no_of_nurons_h2], 'b1')
            y1 = tf.nn.relu(tf.matmul(y, W1) + b1)

            W2 = init_weight([no_of_nurons_h2, output_layer_size], 'W2')
            b2 = init_bias([output_layer_size], 'b2')

            # Output layer of the neural network.
            y2 = tf.matmul(y1, W2) + b2

            # It will hold the true label for current batch
            y_ = tf.placeholder(tf.int32, shape=[None, output_layer_size])

            check_op = tf.add_check_numerics_ops()

            loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=y2, labels=y_))

            train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
            # train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
            correct_prediction = tf.equal(tf.argmax(y2, 1), tf.argmax(y_, 1))

            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

            self.probs = tf.nn.softmax(y2)
            predicted_lables = tf.argmax(self.probs, 1)

            correct_lables = tf.argmax(y_, 1)

            variables_names = [v.name for v in tf.trainable_variables()]

            init = tf.global_variables_initializer()
            self.sess = tf.Session()
            self.sess.run(init)

            saver = tf.train.Saver()

            # Restore the best model to calculate the test accuracy.
            saver.restore(self.sess, path_to_model)
	print("len(outH1) %d"% len(outH1))
	####Optimizing
	print("building loss")
	logits3d = tf.stack(logits)
	loss = tf.reduce_mean(ctc.ctc_loss(logits3d, targetY, seqLengths))
	out = tf.identity(loss, 'ctc_loss_mean')
	optimizer = tf.train.MomentumOptimizer(learningRate, momentum).minimize(loss)

	####Evaluating
	print("building Evaluation")
	logitsMaxTest = tf.slice(tf.argmax(logits3d, 2), [0, 0], [seqLengths[0], 1])
	predictions = tf.to_int32(ctc.ctc_beam_search_decoder(logits3d, seqLengths)[0][0])
	reduced_sum = tf.reduce_sum(tf.edit_distance(predictions, targetY, normalize=False))
	errorRate = reduced_sum / tf.to_float(tf.size(targetY.values))

	check_op = tf.add_check_numerics_ops()
print("done building graph")

####Run session
with tf.Session(graph=graph) as session:
	try: merged = tf.summary.merge_all()
	except: merged = tf.summary.merge_all()
	try:writer = tf.summary.FileWriter("/tmp/basic_new", session.graph)
	except: writer = tf.summary.FileWriter("/tmp/basic_new", session.graph)
	try:saver = tf.train.Saver()  # defaults to saving all variables
	except:
		print("tf.train.Saver() broken in tensorflow 0.12")
		saver = tf.train.Saver(tf.global_variables())# WTF stupid API breaking
	ckpt = tf.train.get_checkpoint_state('./checkpoints')

	start = 0
def model_fn(features, labels, params, mode, scope=None):
    embedding_size = params['embedding_size']
    num_blocks = params['num_blocks']
    vocab_size = params['vocab_size']
    debug = params['debug']

    story = features['story']
    query = features['query']

    batch_size = tf.shape(story)[0]

    normal_initializer = tf.random_normal_initializer(stddev=0.1)
    ones_initializer = tf.constant_initializer(1.0)

    # PReLU activations have their alpha parameters initialized to 1
    # so they may be identity before training.
    activation = partial(prelu, initializer=ones_initializer)

    with tf.variable_scope(scope, 'EntityNetwork', initializer=normal_initializer):
        # Embeddings
        # The embedding mask forces the special "pad" embedding to zeros.
        embedding_params = tf.get_variable('embedding_params', [vocab_size, embedding_size])
        embedding_mask = tf.constant([0 if i == 0 else 1 for i in range(vocab_size)],
            dtype=tf.float32,
            shape=[vocab_size, 1])

        story_embedding = tf.nn.embedding_lookup(embedding_params * embedding_mask, story)
        query_embedding = tf.nn.embedding_lookup(embedding_params * embedding_mask, query)

        # Input Module
        encoded_story = get_input_encoding(story_embedding, ones_initializer, 'StoryEncoding')
        encoded_query = get_input_encoding(query_embedding, ones_initializer, 'QueryEncoding')

        # Memory Module
        # We define the keys outside of the cell so they may be used for state initialization.
        keys = [tf.get_variable('key_{}'.format(j), [embedding_size]) for j in range(num_blocks)]

        cell = DynamicMemoryCell(num_blocks, embedding_size, keys,
            initializer=normal_initializer,
            activation=activation)

        # Recurrence
        initial_state = cell.zero_state(batch_size, tf.float32)
        sequence_length = get_sequence_length(encoded_story)
        _, last_state = tf.nn.dynamic_rnn(cell, encoded_story,
            sequence_length=sequence_length,
            initial_state=initial_state)

        # Output Module
        output = get_output(last_state, encoded_query,
            num_blocks=num_blocks,
            vocab_size=vocab_size,
            initializer=normal_initializer,
            activation=activation)
        prediction = tf.argmax(output, 1)

        # Training
        loss = get_loss(output, labels, mode)
        train_op = get_train_op(loss, params, mode)

        if debug:
            tf.contrib.layers.summarize_tensor(sequence_length, 'sequence_length')
            tf.contrib.layers.summarize_tensor(encoded_story, 'encoded_story')
            tf.contrib.layers.summarize_tensor(encoded_query, 'encoded_query')
            tf.contrib.layers.summarize_tensor(last_state, 'last_state')
            tf.contrib.layers.summarize_tensor(output, 'output')
            tf.contrib.layers.summarize_variables()

            tf.add_check_numerics_ops()

        return prediction, loss, train_op
Exemple #53
0
 def set_model(self, model):
     self.model = model
     self.sess = K.get_session()
     self.check_num = tf.add_check_numerics_ops()
def train(args, model,data,val_data):
    dirname = 'save-vrnn/'
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    with open(os.path.join(dirname, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)

    ckpt = tf.train.get_checkpoint_state(dirname) #check if there exists a previously trained model in the checkpoint

    Xtrain,ytrain = data
    Xval, yval = val_data


    shape1 = np.shape(Xtrain)
    df1 = pd.DataFrame(np.reshape(Xtrain,(shape1[0],-1)))
    shape2 = np.shape(ytrain)
    df2 = pd.DataFrame(np.reshape(ytrain,(shape2[0],-1)))
    print("\nXtrain")
    print(df1.describe())
    print('\nytrain')
    print(df2.describe())

    train = Iterator(Xtrain,ytrain,batch_size = args.batch_size,n_steps=args.seq_length,shape_diff=True) #to split data into batches
    n_batches = train.nbatches
    Xtrain,ytrain = train.get_split()

    

    #split validation data into batches
    validate = Iterator(Xval,yval,batch_size = args.batch_size,n_steps=args.seq_length,shape_diff=True)
    val_nbatches = validate.nbatches
    Xval, yval = validate.get_split()

    myFile = open(dirname+'/outputValidation.csv', 'w')
    writer = csv.writer(myFile)
    writer.writerows([["Epoch","Train_Loss","MAE","MSE"]])

    mae = []
    mse = []
    with tf.Session() as sess:
        summary_writer = tf.summary.FileWriter('logs/' + datetime.now().isoformat().replace(':', '-'), sess.graph)
        check = tf.add_check_numerics_ops()
        merged = tf.summary.merge_all()
        tf.global_variables_initializer().run() #initialize all variables in the graph as defined
        saver = tf.train.Saver(tf.global_variables())
        if ckpt:
            saver.restore(sess, ckpt.model_checkpoint_path) #restore previously saved model
            print "Loaded model"
        start = time.time()
	state_c = None
	state_h = None

        logs = [] 
        for e in xrange(args.num_epochs):
            #assign learning rate 
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) 

            #get the initial state of lstm cell 
            state = model.initial_state_c, model.initial_state_h
            mae.append([])
            mse.append([])

            prior_mean = [] ##
            phi_mean = [] ##
            if((e+1)%10 != 0):
		    for b in xrange(n_batches):
		        x = Xtrain[b]
		        y = ytrain[b]
		        feed = {model.input_x: x, model.input_y: y, model.target_data: y} # input data : x and y ; target data : y

		        #train the model on this batch of data
		        train_loss, _, cr, summary, sigma, mu, inp, target, state_c, state_h, pred, prior_mu, phi_mu = sess.run(
		                [model.cost, model.train_op, check, merged, model.sigma, model.mu, model.flat_input, model.target, model.final_state_c, model.final_state_h, model.output, model.prior_mu, model.phi_mu], feed) ##

                        prior_mean.append(prior_mu)  ##
                        phi_mean.append(phi_mu)   ##

		        summary_writer.add_summary(summary, e * n_batches + b)

			pred = np.concatenate(pred, axis=1)
			sigma = np.concatenate(sigma, axis=1)
			mu = np.concatenate(mu, axis=1)

			#the output from the model is in the shape [50000,1] reshape to 3D (batch_size, time_steps, n_app)
		        pred = np.array(np.reshape(pred, [args.batch_size,args.seq_length,-1])).astype(float)
		        label = np.array(y).astype(float)

			#compute mae and mse for the output
		        mae_i  = np.reshape(np.absolute((label - pred)),[-1,]).mean()
		        mse_i =  np.reshape((label - pred)**2,[-1,]).mean()

		        mae[e].append(mae_i)
		        mse[e].append(mse_i)

			#save the model after every 800 (monitoring_freq) epochs
		        if (e * n_batches + b) % args.save_every == 0 and ((e * n_batches + b) > 0):
		            checkpoint_path = os.path.join(dirname, 'model_'+str(args.num_epochs)+'_'+str(args.learning_rate)+'.ckpt')
		            saver.save(sess, checkpoint_path, global_step=e * n_batches + b)
		            print "model saved to {}".format(checkpoint_path)

		        end = time.time()
		
		        print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}" \
		            .format(e * n_batches + b,
		                    args.num_epochs * n_batches,
		                    e, args.chunk_samples * train_loss, end - start, sigma.mean(axis=0).mean(axis=0))
		        start = time.time()
            else: #pass validation data
		print("\nValidation Data\n")
		loss = 0
		for b in xrange(val_nbatches):
		        x = Xval[b]
		        y = yval[b]
		        feed = {model.input_x: x, model.input_y: y, model.target_data: y} # input data : x and y ; target data : y

		        #train the model on this batch of data
		        train_loss, cr, summary, sigma, mu, inp, target, state_c, state_h, pred = sess.run(
		                [model.cost, check, merged, model.sigma, model.mu, model.flat_input, model.target, model.final_state_c, model.final_state_h, model.output],
		                                                     feed)
		        loss += train_loss
		        summary_writer.add_summary(summary, e * n_batches + b)

			pred = np.concatenate(pred, axis=1)
			sigma = np.concatenate(sigma, axis=1)
			mu = np.concatenate(mu, axis=1)

			#the output from the model is in the shape [50000,1] reshape to 3D (batch_size, time_steps, n_app)
		        pred = np.array(np.reshape(pred, [args.batch_size,args.seq_length,-1])).astype(float)
		        label = np.array(y).astype(float)

			#compute mae and mse for the output
		        mae_i  = np.reshape(np.absolute((label - pred)),[-1,]).mean()
		        mse_i =  np.reshape((label - pred)**2,[-1,]).mean()

		        mae[e].append(mae_i)
		        mse[e].append(mse_i)

			#save the model after every 800 (monitoring_freq) epochs
		        if (e * n_batches + b) % args.save_every == 0 and ((e * n_batches + b) > 0):
		            checkpoint_path = os.path.join(dirname, 'model_'+str(args.num_epochs)+'_'+str(args.learning_rate)+'.ckpt')
		            saver.save(sess, checkpoint_path, global_step=e * n_batches + b)
		            print "model saved to {}".format(checkpoint_path)

		        end = time.time()
		
		        print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}" \
		            .format(e * n_batches + b,
		                    args.num_epochs * n_batches,
		                    e, args.chunk_samples * train_loss, end - start, sigma.mean(axis=0).mean(axis=0))
		        start = time.time()
		logs.append([e,train_loss/val_nbatches,sum(mae[e])/len(mae[e]), sum(mse[e])/len(mse[e])])

            #the average mae,mse values in every epoch
            print "Epoch {}, mae = {:.3f}, mse = {:.3f}".format(e, sum(mae[e])/len(mae[e]), sum(mse[e])/len(mse[e]))
        
            print("prior_mu_mean:",np.mean(prior_mean))
            print("phi_mu_mean: ",np.mean(phi_mean))

        writer.writerows(logs)

	#path to save the final model
	checkpoint_path = os.path.join(dirname, 'final_model_'+str(args.num_epochs)+'_'+str(args.learning_rate)+'.ckpt') 

	saver2 = tf.train.Saver()
	saver2.save(sess, checkpoint_path)

	print "model saved to {}".format(checkpoint_path)
Exemple #55
0
def main(_):
  # We want to see all the logging messages for this tutorial.
  tf.logging.set_verbosity(tf.logging.INFO)

  # Start a new TensorFlow session.
  sess = tf.InteractiveSession()

  # Begin by making sure we have the training data we need. If you already have
  # training data of your own, use `--data_url= ` on the command line to avoid
  # downloading.
  model_settings = models.prepare_model_settings(
      len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))),
      FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
      FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
  audio_processor = input_data.AudioProcessor(
      FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage,
      FLAGS.unknown_percentage,
      FLAGS.wanted_words.split(','), FLAGS.validation_percentage,
      FLAGS.testing_percentage, model_settings)
  fingerprint_size = model_settings['fingerprint_size']
  label_count = model_settings['label_count']
  time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000)
  # Figure out the learning rates for each training phase. Since it's often
  # effective to have high learning rates at the start of training, followed by
  # lower levels towards the end, the number of steps and learning rates can be
  # specified as comma-separated lists to define the rate at each stage. For
  # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001
  # will run 13,000 training loops in total, with a rate of 0.001 for the first
  # 10,000, and 0.0001 for the final 3,000.
  training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
  learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
  if len(training_steps_list) != len(learning_rates_list):
    raise Exception(
        '--how_many_training_steps and --learning_rate must be equal length '
        'lists, but are %d and %d long instead' % (len(training_steps_list),
                                                   len(learning_rates_list)))

  fingerprint_input = tf.placeholder(
      tf.float32, [None, fingerprint_size], name='fingerprint_input')

  logits, dropout_prob = models.create_model(
      fingerprint_input,
      model_settings,
      FLAGS.model_architecture,
      is_training=True)

  # Define loss and optimizer
  ground_truth_input = tf.placeholder(
      tf.float32, [None, label_count], name='groundtruth_input')

  # Optionally we can add runtime checks to spot when NaNs or other symptoms of
  # numerical errors start occurring during training.
  control_dependencies = []
  if FLAGS.check_nans:
    checks = tf.add_check_numerics_ops()
    control_dependencies = [checks]

  # Create the back propagation and training evaluation machinery in the graph.
  with tf.name_scope('cross_entropy'):
    cross_entropy_mean = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            labels=ground_truth_input, logits=logits))
  tf.summary.scalar('cross_entropy', cross_entropy_mean)
  with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
    learning_rate_input = tf.placeholder(
        tf.float32, [], name='learning_rate_input')
    train_step = tf.train.GradientDescentOptimizer(
        learning_rate_input).minimize(cross_entropy_mean)
  predicted_indices = tf.argmax(logits, 1)
  expected_indices = tf.argmax(ground_truth_input, 1)
  correct_prediction = tf.equal(predicted_indices, expected_indices)
  confusion_matrix = tf.confusion_matrix(expected_indices, predicted_indices, num_classes=label_count)
  evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  tf.summary.scalar('accuracy', evaluation_step)

  global_step = tf.train.get_or_create_global_step()
  increment_global_step = tf.assign(global_step, global_step + 1)

  saver = tf.train.Saver(tf.global_variables())

  # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
  merged_summaries = tf.summary.merge_all()
  train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                       sess.graph)
  validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation')

  tf.global_variables_initializer().run()

  start_step = 1

  if FLAGS.start_checkpoint:
    models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint)
    start_step = global_step.eval(session=sess)

  tf.logging.info('Training from step: %d ', start_step)

  # Save graph.pbtxt.
  tf.train.write_graph(sess.graph_def, FLAGS.train_dir,
                       FLAGS.model_architecture + '.pbtxt')

  # Save list of words.
  with gfile.GFile(
      os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'),
      'w') as f:
    f.write('\n'.join(audio_processor.words_list))

  # Training loop.
  training_steps_max = np.sum(training_steps_list)
  for training_step in xrange(start_step, training_steps_max + 1):
    # Figure out what the current learning rate is.
    training_steps_sum = 0
    for i in range(len(training_steps_list)):
      training_steps_sum += training_steps_list[i]
      if training_step <= training_steps_sum:
        learning_rate_value = learning_rates_list[i]
        break
    # Pull the audio samples we'll use for training.
    train_fingerprints, train_ground_truth = audio_processor.get_data(
        FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency,
        FLAGS.background_volume, time_shift_samples, 'training', sess)
    # Run the graph with this batch of training data.
    train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
        [
            merged_summaries, evaluation_step, cross_entropy_mean, train_step,
            increment_global_step
        ],
        feed_dict={
            fingerprint_input: train_fingerprints,
            ground_truth_input: train_ground_truth,
            learning_rate_input: learning_rate_value,
            dropout_prob: 0.5
        })
    train_writer.add_summary(train_summary, training_step)
    tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' %
                    (training_step, learning_rate_value, train_accuracy * 100,
                     cross_entropy_value))
    is_last_step = (training_step == training_steps_max)
    if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step:
      set_size = audio_processor.set_size('validation')
      total_accuracy = 0
      total_conf_matrix = None
      for i in xrange(0, set_size, FLAGS.batch_size):
        validation_fingerprints, validation_ground_truth = (
            audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0,
                                     0.0, 0, 'validation', sess))
        # Run a validation step and capture training summaries for TensorBoard
        # with the `merged` op.
        validation_summary, validation_accuracy, conf_matrix = sess.run(
            [merged_summaries, evaluation_step, confusion_matrix],
            feed_dict={
                fingerprint_input: validation_fingerprints,
                ground_truth_input: validation_ground_truth,
                dropout_prob: 1.0
            })
        validation_writer.add_summary(validation_summary, training_step)
        batch_size = min(FLAGS.batch_size, set_size - i)
        total_accuracy += (validation_accuracy * batch_size) / set_size
        if total_conf_matrix is None:
          total_conf_matrix = conf_matrix
        else:
          total_conf_matrix += conf_matrix
      tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
      tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' %
                      (training_step, total_accuracy * 100, set_size))

    # Save the model checkpoint periodically.
    if (training_step % FLAGS.save_step_interval == 0 or
        training_step == training_steps_max):
      checkpoint_path = os.path.join(FLAGS.train_dir,
                                     FLAGS.model_architecture + '.ckpt')
      tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step)
      saver.save(sess, checkpoint_path, global_step=training_step)

  set_size = audio_processor.set_size('testing')
  tf.logging.info('set_size=%d', set_size)
  total_accuracy = 0
  total_conf_matrix = None
  for i in xrange(0, set_size, FLAGS.batch_size):
    test_fingerprints, test_ground_truth = audio_processor.get_data(
        FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess)
    test_accuracy, conf_matrix = sess.run(
        [evaluation_step, confusion_matrix],
        feed_dict={
            fingerprint_input: test_fingerprints,
            ground_truth_input: test_ground_truth,
            dropout_prob: 1.0
        })
    batch_size = min(FLAGS.batch_size, set_size - i)
    total_accuracy += (test_accuracy * batch_size) / set_size
    if total_conf_matrix is None:
      total_conf_matrix = conf_matrix
    else:
      total_conf_matrix += conf_matrix
  tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
  tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100,
                                                           set_size))
def train():
  dataset = facenet.get_dataset(FLAGS.data_dir)
  train_set, test_set = facenet.split_dataset(dataset, 0.9)
  
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)

    # Placeholder for input images
    images_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, 96, 96, 3), name='Input')
    
    # Build a Graph that computes the logits predictions from the inference model
    embeddings = facenet.inference_no_batch_norm_deeper(images_placeholder, tf.constant(True))
    #embeddings = facenet.inference(images_placeholder, tf.constant(False))
    
    # Split example embeddings into anchor, positive and negative
    #a, p, n = tf.split(0, 3, embeddings)

    # Calculate triplet loss
    loss = facenet.triplet_loss_modified(embeddings)

    # Build a Graph that trains the model with one batch of examples and updates the model parameters
    train_op, grads = facenet.train(loss, global_step)
    
    # Create a saver
    saver = tf.train.Saver(tf.all_variables())

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()
    
    check_num = tf.add_check_numerics_ops()

    # Start running operations on the Graph.
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement))
    sess.run(init)

    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def)
    
    epoch = 1
    
    with sess.as_default():

      while epoch<FLAGS.max_nrof_epochs:
        batch_number = 0
        while batch_number<FLAGS.epoch_size:
          print('Loading new data')
          image_data, num_per_class = facenet.load_data(train_set)
      
          print('Selecting suitable triplets for training')
          start_time = time.time()
          emb_list = []
          # Run a forward pass for the sampled images
          nrof_examples_per_epoch = FLAGS.people_per_batch*FLAGS.images_per_person
          nrof_batches_per_epoch = int(np.floor(nrof_examples_per_epoch/FLAGS.batch_size))
          #for i in xrange(nrof_batches_per_epoch):
            #feed_dict = facenet.get_batch(images_placeholder, image_data, i)
            #emb_list += sess.run([embeddings], feed_dict=feed_dict)
          #emb_array = np.vstack(emb_list)  # Stack the embeddings to a nrof_examples_per_epoch x 128 matrix
          ## Select triplets based on the embeddings
          #apn, nrof_random_negs, nrof_triplets = facenet.select_triplets(emb_array, num_per_class, image_data)
          #duration = time.time() - start_time
          #print('(nrof_random_negs, nrof_triplets) = (%d, %d): time=%.3f seconds' % (nrof_random_negs, nrof_triplets, duration))
    
          count = 0
  #        while count<nrof_triplets*3 and batch_number<FLAGS.epoch_size:
          while batch_number<FLAGS.epoch_size:
            start_time = time.time()
   #         feed_dict = facenet.get_batch(images_placeholder, apn, batch_number)
            feed_dict = facenet.get_batch(images_placeholder, image_data, batch_number)
            
            grad_tensors, grad_vars = zip(*grads)
            grads_eval  = sess.run(grad_tensors, feed_dict=feed_dict)
            for gt, gv in zip(grads_eval, grad_vars):
              print('%40s: %6d %6f  %6f' % (gv.op.name, np.sum(np.isnan(gt)), np.max(gt), np.min(gt)))
            
            duration = time.time() - start_time
            
            print('Epoch: [%d][%d/%d]\tTime %.3f\ttripErr %2.3f' % (epoch, batch_number, FLAGS.epoch_size, duration, err))
            batch_number+=1
            count+=FLAGS.batch_size
        epoch+=1

      # Save the model checkpoint periodically.
      checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
      saver.save(sess, checkpoint_path, global_step=epoch*FLAGS.epoch_size+batch_number)