def validate(self, sess): if self._acc_ops is None: self._acc_ops = self._init_acc_ops() if self._acc_orient_ops is None: self._acc_orient_ops = self._init_acc_orient_ops() sess.run(self._acc_ops.reset) sess.run(self._acc_orient_ops.reset) num_batches_val = int(self.data.validation.num_examples/self.batch_size_val) if self.tf_record_prefix is not None: # tmp = sum(1 for _ in tf.python_io.tf_record_iterator(self.data.validation.path)) # assert(num_batches_val == tmp) img, label, label_orient = MNIST.read_and_decode_ops(\ self.data.validation.path, one_hot=self.data.validation.one_hot, num_orientations=len(self.data.validation.orientations)) batch_xs_op, batch_ys_op, batch_os_op = tf.train.batch([img, label, label_orient], batch_size=self.batch_size_val, capacity=2000, num_threads=8 ) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for _ in xrange(num_batches_val): if self.tf_record_prefix is None: batch_xs, batch_ys = self.data.validation.next_batch(self.batch_size_val, shuffle=False) else: batch_xs, batch_ys, batch_os = sess.run([batch_xs_op, batch_ys_op, batch_os_op]) batch_xs_in = batch_xs if self.do_augment_rot: augment_op, batch_os2 = self.rotation_ops_multiset_val(3) #TODO remove hardcoded init rots = rotation_rad(-60,60,15) num_orients = len(rots) orients_dense = np.array([rots.index(o) for o in batch_os2]) batch_os_one_hot = dense_to_one_hot(orients_dense, num_orients) batch_xs_in = sess.run(augment_op, feed_dict={self.x : batch_xs}) else: #TODO remove hardcoded init rots = rotation_rad(-60, 60, 15) batch_os_one_hot = dense_to_one_hot(np.zeros((self.batch_size_val,), dtype=int)+(len(rots)/2), len(rots)) _, _, _, _ = sess.run(\ [self._acc_ops.metric, self._acc_ops.update, self._acc_orient_ops.metric, self._acc_orient_ops.update, # tf.argmax(self.model.p,1), tf.argmax(self.y_,1), ], feed_dict={self.x: batch_xs_in, self.y_: batch_ys, self.orient_ : batch_os_one_hot, } ) if self.tf_record_prefix is not None: coord.request_stop() coord.join(threads)
def _init_data_mnist(self): if self.tf_record_prefix is None: data = MNIST.read_data_sets(self.data_dir, one_hot=True, validation_size=self.validation_size, seed=self.data_seed) else: tf_record_descr = {'data_seed' : self.data_seed, 'one_hot' : True, 'orientations' : sorted(np.rad2deg(rotation_rad(-60,60,15))), 'validation_size' : self.validation_size } descr_str = '_'.join(['-'.join([k, str(tf_record_descr[k])]) for k in sorted(tf_record_descr.keys())]) descr_hash = zlib.adler32(descr_str) self.logger.debug("TF Record description (hash:%s):'%s'" % (descr_hash, descr_str)) tf_record_name = '%s_%s' % (self.tf_record_prefix, descr_hash) fpath_tf_record_descr = os.path.join(self.data_dir, tf_record_name + '.yml') self.logger.debug("Save TF Record description (hash:%s) to %s" % (descr_hash, fpath_tf_record_descr)) with open(fpath_tf_record_descr, 'w') as h: h.write(yaml.dump(tf_record_descr)) data = MNIST.to_tf_record(os.path.join(self.data_dir, tf_record_name + '.tfrecords'), self.data_dir, one_hot=tf_record_descr['one_hot'], orientations=tf_record_descr['orientations'], seed=tf_record_descr['data_seed']) self.logger.info("Data will be loaded from TF Records: " "%s" % ', '.join([':'.join([f,str(getattr(data, f).path)]) for f in data._fields]) ) return data
def validate(self, sess): if self._acc_ops is None: self._acc_ops = self._init_acc_ops() if self._acc_orient_ops is None: self._acc_orient_ops = self._init_acc_orient_ops() sess.run(self._acc_ops.reset) sess.run(self._acc_orient_ops.reset) num_batches_val = int(self.data.validation.num_examples / self.batch_size_val) for _ in xrange(num_batches_val): if self.tf_record_prefix is None: batch_xs, batch_ys = self.data.validation.next_batch( self.batch_size_val, shuffle=False) else: batch_xs, batch_ys, batch_os = sess.run( [batch_xs_op, batch_ys_op, batch_os_op]) batch_xs_in = batch_xs if self.do_augment_rot: augment_op, batch_os2 = self.rotation_ops_multiset_val(3) rots = rotation_rad(-60, 60, 15) num_orients = len(rots) orients_dense = np.array([rots.index(o) for o in batch_os2]) batch_os_one_hot = dense_to_one_hot(orients_dense, num_orients) batch_xs_in = sess.run(augment_op, feed_dict={self.x: batch_xs}) _, _, _, _ = sess.run(\ [self._acc_ops.metric, self._acc_ops.update, self._acc_orient_ops.metric, self._acc_orient_ops.update, # tf.argmax(self.model.p,1), tf.argmax(self.y_,1), ], feed_dict={self.x: batch_xs_in, self.y_: batch_ys, self.orient_ : batch_os_one_hot, } )
def learn(self, sess): if self.y_ is None: self.logger.info("Define placeholder for ground truth. Dims: %d" % self.model.n_nodes[-1]) self.y_ = tf.placeholder("float", [None, self.model.n_nodes[-1]]) suffix = '' if self.do_finetune: self.logger.info("Finetuning!") suffix += 'finetune' dir_train = self.dirpath('train', suffix=suffix) dir_val = self.dirpath('validation', suffix=suffix) summary_writer_train = tf.summary.FileWriter(dir_train, sess.graph) summary_writer_val = tf.summary.FileWriter(dir_val) cost, loss = self._cost_loss(self.dirname('train', suffix=suffix)) vars_new = None if not self.do_finetune: vars_new = self.model.vars_new() # limit optimizaer vars if not finetuning optimizer = setup_optimizer(cost, self.learning_rate, var_list=vars_new) if not self.do_finetune: vars_new = self.model.vars_new() self.init_vars(sess, vars_new) summaries_merged_train = self._merge_summaries_scalars([cost, loss]) if self._acc_ops is None: self._acc_ops = self._init_acc_ops() sess.run(self._acc_ops.reset) if self._acc_orient_ops is None: self._acc_orient_ops = self._init_acc_orient_ops() sess.run(self._acc_orient_ops.reset) summaries_merged_val = self._merge_summaries_scalars([self._acc_ops.metric, self._acc_orient_ops.metric]) # # in_ = self.model.x # xx = tf.placeholder("float", [None, 784]) # augment_op = augment_rotation(xx, # -90, 90, 15, # self.batch_size_train) # self.model.x = augment_op if self.tf_record_prefix is not None: img, label, label_orient = MNIST.read_and_decode_ops(\ self.data.train.path, one_hot=self.data.train.one_hot, num_orientations=len(self.data.train.orientations)) batch_xs_op, batch_ys_op, batch_os_op = tf.train.shuffle_batch([img, label, label_orient], batch_size=self.batch_size_train, capacity=2000, min_after_dequeue=1000, num_threads=8 ) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) if self.do_augment_rot: #TODO remove hardcoded init rots = rotation_rad(-60, 60, 15) self._init_saver() itr_exp = 0 result = collections.namedtuple('Result', ['max', 'last', 'name', 'history', 'epoch_last']) result_orient = collections.namedtuple('Result', ['max', 'last', 'name', 'history', 'epoch_last']) result.name = self._acc_ops.metric.name result.max = 0 result.history = collections.deque(maxlen=3) result_orient.name = self._acc_orient_ops.metric.name result_orient.max = 0 result_orient.history = collections.deque(maxlen=3) for epoch in xrange(self.training_epochs): self.logger.info("Start %s epoch %d, step %d" % (suffix, epoch, itr_exp)) # Loop over all batches for itr_epoch in xrange(self.num_batches_train): if self.tf_record_prefix is None: batch_xs, batch_ys = self.data.train.next_batch(self.batch_size_train) else: batch_xs, batch_ys, batch_os = sess.run([batch_xs_op, batch_ys_op, batch_os_op]) # f = sess.run([augment_op], feed_dict={xx:batch_xs}) batch_xs_in = batch_xs if self.do_augment_rot: augment_op, batch_os2 = self.rotation_ops_multiset_train(3) batch_xs_in = sess.run(augment_op, feed_dict={self.x : batch_xs}) orients_dense = np.array([rots.index(o) for o in batch_os2]) batch_os_one_hot = dense_to_one_hot(orients_dense, len(rots)) else: #TODO remove hardcoded init rots = rotation_rad(-60, 60, 15) batch_os_one_hot = dense_to_one_hot(np.zeros((self.batch_size_train,), dtype=int)+(len(rots)/2), len(rots)) _, _, sess_summary = sess.run([optimizer, cost, summaries_merged_train], feed_dict={self.x : batch_xs_in, self.y_: batch_ys, self.orient_ : batch_os_one_hot} ) if self.is_time_to_track_train(itr_exp): summary_writer_train.add_summary(sess_summary, itr_exp) # self.logger.debug("training batch loss after step %d: %f" % (itr_exp, loss_batch)) itr_exp += 1 self.validate(sess) # run metric op one more time, data in feed dict is dummy data, does not influence metric acc, acc_orient, sess_summary = sess.run([ self._acc_ops.metric, self._acc_orient_ops.metric, summaries_merged_val], feed_dict={self.x : batch_xs, self.y_ : batch_ys, self.orient_ : batch_os_one_hot} ) if self.is_time_to_track_val(itr_exp): summary_writer_val.add_summary(sess_summary, itr_exp) self.logger.debug("validation accuracy after %s step %d: %f" % (suffix, itr_exp, acc)) self.logger.debug("validation orientation accuracy after %s step %d: %f" % (suffix, itr_exp, acc_orient)) fpath_save = os.path.join(dir_train, self._get_save_name()) self.logger.debug("Save model at %s step %d to '%s'" % (suffix, itr_exp, fpath_save)) self.saver.save(sess, fpath_save, global_step=itr_exp) result.last = acc result.epoch_last = epoch result.max = max(result.max, result.last) result.history.append(result.last) result_orient.last = acc_orient result_orient.max = max(result_orient.max, result_orient.last) result_orient.history.append(result_orient.last) result_orient.epoch_last = epoch if self.do_task_recognition: if len(result.history) == result.history.maxlen and np.absolute(np.mean(result.history)-result.last) < 1e-5: self.logger.debug("Validation accuracy not changing anymore. Stop iterating.") break elif self.do_task_orientation: if len(result_orient.history) == result_orient.history.maxlen and np.absolute(np.mean(result_orient.history)-result_orient.last) < 1e-5: self.logger.debug("Validation orientation accuracy not changing anymore. Stop iterating.") break if self.tf_record_prefix is not None: coord.request_stop() coord.join(threads) self.logger.info("Classification %s Optimization Finished!" % suffix) return result, result_orient
def run(run_name, args): if args.run_dir is None: run_dir = os.path.join(args.log_dir, run_name) else: run_dir = args.run_dir run_dir_already_exists = False if not os.path.isdir(run_dir): os.makedirs(run_dir) else: run_dir_already_exists = True global logger logger = lu.setup_logging(os.path.join(args.log_dir, 'log.txt'), name=[args.logger_name, None][args.logger_name_none]) if run_dir_already_exists: logger.debug("Found run directory %s", run_dir) else: logger.debug("Created run directory %s", run_dir) logger.info("Starting run %s" % run_name) cfg_list = [] logger.debug("Got %d config files." % len(args.fpath_cfg_list)) for cidx, fpath_cfg in enumerate(args.fpath_cfg_list): logger.debug("Loading config from %s" % fpath_cfg) cfg = load_config(fpath_cfg, logger) cfg['log_dir'] = os.path.expanduser(args.log_dir) cfg['run_name'] = run_name cfg['run_dir'] = os.path.expanduser(run_dir) fpath_cfg_dst = os.path.join(run_dir, 'config_%d.yml' % cidx) logger.debug("Write config %s to %s" % (os.path.basename(fpath_cfg), fpath_cfg_dst)) with open(fpath_cfg_dst, 'w') as h: h.write(yaml.dump(cfg)) cfg_list.append(cfg) reuse = args.fpath_meta is not None and args.dir_checkpoints is not None if reuse: trained_model = tf.train.import_meta_graph(args.fpath_meta) cfg = cfg_list[0] ae_runner = AERunner(cfg) n_input = reduce(lambda x, y: x * y, ae_runner.data.train.images.shape[1:], 1) config = tf.ConfigProto() logger.debug('per_process_gpu_memory_fraction set to %f' % args.per_process_gpu_memory_fraction) config.gpu_options.per_process_gpu_memory_fraction = args.per_process_gpu_memory_fraction grph = tf.Graph() with grph.as_default() as g: in_op = tf.placeholder("float", [None, n_input]) sae_params = { 'in_op': in_op, 'reuse': reuse, 'prefix': cfg['prefix'], 'do_denoising': cfg['do_denoising'], 'input_noise_std': cfg['input_noise_std'], } ae_runner.model = SAE(sae_params) cfg = cfg_list[1] mlp_runner = MLPRunner(cfg) # Launch the graph result_mlp = None result_mlp_orient = None result_mlp_fine = None result_mlp_fine_orient = None tasks = [] if mlp_runner.do_task_recognition: tasks.append('recognition') if mlp_runner.do_task_orientation: tasks.append('orientation') with tf.Session(graph=g, config=config) as sess: if args.fpath_meta is not None and args.dir_checkpoints is not None: trained_model.restore( sess, tf.train.latest_checkpoint(args.dir_checkpoints)) result_ae = ae_runner.learn(sess) n_classes = mlp_runner.data.train.labels.shape[-1] classifier_params = { 'n_nodes': [n_classes], 'n_input': ae_runner.model.representation.get_shape()[-1].value, 'prefix': cfg['prefix'], 'branch': cfg.get('branch', len(cfg['n_nodes']) ), # substract additional because of decision layer 'logger_name': cfg['logger_name'], } net = MLP(classifier_params) net.x = ae_runner.model.representation net.build() mlp_runner.x = sae_params['in_op'] mlp_runner.orient_ = tf.placeholder( "float", shape=[None, len(rotation_rad(-60, 60, 15))]) mlp_runner.model = net result_mlp, result_mlp_orient = mlp_runner.learn(sess) mlp_runner.do_finetune = True # result_mlp_fine = mlp_runner.learn(sess) result_mlp_fine, result_mlp_fine_orient = mlp_runner.learn(sess) logger.info("Finished run %s" % run_name) lu.close_logging(logger) return result_ae,\ result_mlp, result_mlp_orient,\ result_mlp_fine, result_mlp_fine_orient, tasks
def run(run_name, args): if args.run_dir is None: run_dir = os.path.join(args.log_dir, run_name) else: run_dir = args.run_dir run_dir_already_exists = False if not os.path.isdir(run_dir): os.makedirs(run_dir) else: run_dir_already_exists = True global logger logger = lu.setup_logging(os.path.join(args.log_dir, 'log.txt'), name=[args.logger_name, None][args.logger_name_none]) if run_dir_already_exists: logger.debug("Found run directory %s", run_dir) else: logger.debug("Created run directory %s", run_dir) logger.info("Starting run %s" % run_name) cfg_list = [] logger.debug("Got %d config files." % len(args.fpath_cfg_list)) for cidx, fpath_cfg in enumerate(args.fpath_cfg_list): logger.debug("Loading config from %s" % fpath_cfg) cfg = load_config(fpath_cfg, logger) cfg['log_dir'] = os.path.expanduser(args.log_dir) cfg['run_name'] = run_name cfg['run_dir'] = os.path.expanduser(run_dir) fname_cfg = os.path.basename(fpath_cfg) fpath_cfg_dst = os.path.join(run_dir, 'config_%d.yml' % cidx) logger.debug("Write config %s to %s" % (fname_cfg, fpath_cfg_dst)) with open(fpath_cfg_dst, 'w') as h: h.write(yaml.dump(cfg)) cfg_list.append(cfg) cfg = cfg_list[0] mlp_runner = Runner(cfg) # n_input = mlp_runner.data.train.images.shape[-1] n_input = reduce(lambda x, y: x * y, mlp_runner.data.train.images.shape[1:], 1) # Launch the graph result = None tasks = [] if mlp_runner.do_task_recognition: tasks.append('recognition') if mlp_runner.do_task_orientation: tasks.append('orientation') config = tf.ConfigProto() logger.debug('per_process_gpu_memory_fraction set to %f' % args.per_process_gpu_memory_fraction) config.gpu_options.per_process_gpu_memory_fraction = args.per_process_gpu_memory_fraction grph = tf.Graph() with grph.as_default() as g: with tf.Session(graph=g, config=config) as sess: n_classes = mlp_runner.data.train.labels.shape[-1] cfg['n_nodes'].append(n_classes) classifier_params = { 'n_nodes': cfg['n_nodes'], 'n_input': n_input, 'prefix': cfg['prefix'], 'branch': cfg.get('branch', len(cfg['n_nodes']) - 1), # subtract additional because of decision layer 'logger_name': cfg['logger_name'], } net = MLP(classifier_params) net.x = tf.placeholder("float", [None, n_input]) net.build() mlp_runner.model = net mlp_runner.x = net.x mlp_runner.orient_ = tf.placeholder( "float", shape=[None, len(rotation_rad(-60, 60, 15))]) result, result_orient = mlp_runner.learn(sess) logger.info("Finished run %s" % run_name) lu.close_logging(logger) return result, result_orient, tasks