def init_C(): with tf.Graph().as_default(): # tf always return the final batch even it is smaller than the batch_size of samples indices, images, labels = clothing1M.inputs(eval_data=False,batch_size=FLAGS.batch_size) is_training = tf.placeholder(tf.bool) logits = clothing1M.inference_resnet_own(images,training=is_training) labels_ = tf.nn.softmax(logits) variables_to_restore = [] variable_averages = tf.train.ExponentialMovingAverage( clothing1M.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.init_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint files found') return inds = [] preds = [] annotations = [] n_iter = 0 # Start the queue runners. coord = tf.train.Coordinator() threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True)) while not coord.should_stop(): try: ind, pred, annotation = sess.run([indices,labels_,labels],feed_dict={is_training:False}) inds.append(ind) preds.append(pred) annotations.append(annotation) n_iter += 1 if n_iter % 100 == 0: print('Iters: %d'%n_iter) except Exception as e: # pylint: disable=broad-except coord.request_stop(e) coord.request_stop() coord.join(threads, stop_grace_period_secs=10) inds = np.concatenate(inds,axis=0) preds = np.concatenate(preds,axis=0) annotations = np.concatenate(annotations,axis=0) est_C = np.zeros((clothing1M.NUM_CLASSES+1,clothing1M.NUM_CLASSES)) for i in xrange(annotations.shape[0]): label_ = np.argmax(preds[i]) label = annotations[i] est_C[label_][label] += 1 return inds, preds, annotations, est_C
def evaluate(): """Eval Clothing1M for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for Clothing1M. eval_data = FLAGS.eval_data == 'test' # set eval_batch_size as the number that can be divided by the whole number of eval_data to compat with tf.data.Iterator eval_batch_size = FLAGS.batch_size indices, images, labels, ori_images = clothing1M.inputs( eval_data=eval_data, batch_size=eval_batch_size, ORI=True) # Build a Graph that computes the logits predictions from the # inference model. logits = clothing1M.inference_resnet_own(images, training=False) preds = tf.nn.softmax(logits) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( clothing1M.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() #for i in variables_to_restore.keys(): # print(i) saver = tf.train.Saver(variables_to_restore) results_list = dict() threshold = FLAGS.threshold with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] else: print('No checkpoint file found') return max_steps = int( math.ceil(clothing1M.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL / FLAGS.batch_size)) for step in xrange(max_steps): if step % 100 == 0: print('step: %d,' % step, 'number of outliers: %d' % len(results_list)) res = sess.run([indices, preds, labels, ori_images]) for ind in xrange(res[0].shape[0]): if res[1][ind][-1] > threshold: results_list[res[0][ind]] = [ res[1][ind], res[2][ind], res[3][ind] ] with open('outlier.pkl', 'wb') as w: pickle.dump(results_list, w) for ind in results_list.keys(): img = Image.fromarray(results_list[ind][-1].astype(np.uint8)) img.save(os.path.join(FLAGS.outlier_dir, str(ind) + '.jpg'))
def estimation_T(): with tf.Graph().as_default(): indices, images, labels = clothing1M.inputs(eval_data=False,batch_size=FLAGS.batch_size) is_training = tf.placeholder(tf.bool) logits = clothing1M.inference_resnet_own(images,training=is_training) labels_ = tf.nn.softmax(logits) variables_to_restore = [] variable_averages = tf.train.ExponentialMovingAverage( clothing1M.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.init_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint files found') return preds = [] annotations = [] # Start the queue runners. coord = tf.train.Coordinator() threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True)) while not coord.should_stop(): try: pred, annotation = sess.run([labels_,labels],feed_dict={is_training:False}) preds.append(pred) annotations.append(annotation) except Exception as e: # pylint: disable=broad-except coord.request_stop(e) coord.request_stop() coord.join(threads, stop_grace_period_secs=10) preds = np.concatenate(preds,axis=0) annotations = np.concatenate(annotations,axis=0) # Type-II estimation unnormal_est_T = np.zeros((clothing1M.NUM_CLASSES+1,clothing1M.NUM_CLASSES)) for i in xrange(annotations.shape[0]): label = annotations[i] unnormal_est_T[:,label] += preds[i] unnormal_est_T_sum = np.sum(unnormal_est_T,axis=1) est_T = unnormal_est_T / unnormal_est_T_sum[:,None] return est_T
def evaluate(): """Eval Clothing1M for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for Clothing1M. eval_data = FLAGS.eval_data == 'test' # set eval_batch_size as the number that can be divided by the whole number of eval_data to compat with tf.data.Iterator eval_batch_size = FLAGS.batch_size indices, images, labels = clothing1M.inputs(eval_data=eval_data, batch_size=eval_batch_size) # Build a Graph that computes the logits predictions from the # inference model. is_training = tf.placeholder(tf.bool) logits = clothing1M.inference_resnet_own(images, training=is_training) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( clothing1M.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() for i in variables_to_restore.keys(): print(i) saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op, is_training, logits, labels) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def train(infer_z, noisy_y, C): """Train Clothing1M for a number of steps.""" with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get images and labels for Clothing1M. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): indices, images, labels = clothing1M.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. is_training = tf.placeholder(tf.bool) logits = clothing1M.inference_resnet_own(images,training=is_training) preds = tf.nn.softmax(logits) # approximate Gibbs sampling T = tf.placeholder(tf.float32,shape=[clothing1M.NUM_CLASSES+1,clothing1M.NUM_CLASSES],name='transition') unnorm_probs = preds * tf.gather(tf.transpose(T,[1,0]),labels) probs = unnorm_probs / tf.reduce_sum(unnorm_probs,axis=1,keepdims=True) sampler = OneHotCategorical(probs=probs) labels_ = tf.stop_gradient(tf.argmax(sampler.sample(),axis=1)) # Calculate loss. loss = clothing1M.loss(logits, labels_) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = clothing1M.train(loss, global_step) # Calculate prediction # acc_op contains acc and update_op. So it is the cumulative accuracy when sess runs acc_op # if you only want to inspect acc of each batch, just sess run acc_op[0] acc_op = tf.metrics.accuracy(labels, tf.argmax(logits,axis=1)) tf.summary.scalar('training accuracy', acc_op[0]) #### build scalffold for MonitoredTrainingSession to restore the variables you wish variables_to_restore = [] #variables_to_restore += [var for var in tf.trainable_variables() if ('dense' not in var.name and 'logits_T' not in var.name)] variables_to_restore += tf.trainable_variables() variables_to_restore += [g for g in tf.global_variables() if 'moving_mean' in g.name or 'moving_variance' in g.name] for var in variables_to_restore: print(var.name) ckpt = tf.train.get_checkpoint_state(FLAGS.init_dir) init_assign_op, init_feed_dict = tf.contrib.framework.assign_from_checkpoint( ckpt.model_checkpoint_path, variables_to_restore) def InitAssignFn(scaffold,sess): sess.run(init_assign_op, init_feed_dict) scaffold = tf.train.Scaffold(saver=tf.train.Saver(), init_fn=InitAssignFn) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(tf.get_collection('losses')[0]) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ('%s: step %d, loss = %.6f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) max_steps = int(math.ceil(clothing1M.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN*FLAGS.num_epochs/FLAGS.batch_size)) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, scaffold = scaffold, hooks=[tf.train.StopAtStepHook(last_step=max_steps), tf.train.NanTensorHook(loss), _LoggerHook()], save_checkpoint_secs=60, config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options)) as mon_sess: alpha = 1.0 C_init = C.copy() trans_init = (C_init + alpha) / np.sum(C_init + alpha, axis=1, keepdims=True) warming_up_step_1 = 20000 warming_up_step_2 = 40000 step = 0 freq_step = 50000 ## warming up transition with open(FLAGS.transition) as f: data = pickle.load(f) trans_warming_1 = np.concatenate([np_smoothing_eye(clothing1M.NUM_CLASSES,delta=0.05),np.ones([1,clothing1M.NUM_CLASSES])*1.0/clothing1M.NUM_CLASSES],axis=0) trans_warming_2 = data[0].copy() trans = data[0].copy() exemplars = [] while not mon_sess.should_stop(): alpha = 1.0 if (step >= warming_up_step_2) and (step%freq_step == 0): trans = (C + alpha) / np.sum(C + alpha, axis=1, keepdims=True) if step < warming_up_step_1: res = mon_sess.run([train_op,acc_op,global_step,indices,labels,labels_],feed_dict={is_training:True, T: trans_warming_1}) elif step < warming_up_step_2: res = mon_sess.run([train_op,acc_op,global_step,indices,labels,labels_],feed_dict={is_training:True, T: trans_warming_2}) else: res = mon_sess.run([train_op,acc_op,global_step,indices,labels,labels_],feed_dict={is_training:True, T: trans}) for i in xrange(res[3].shape[0]): ind = res[3][i] #print(ind,noisy_y[ind],res[4][i]) assert noisy_y[ind] == res[4][i] C[infer_z[ind]][res[4][i]] -= 1 assert C[infer_z[ind]][noisy_y[ind]] >= 0 infer_z[ind] = res[5][i] C[infer_z[ind]][res[4][i]] += 1 #print(res[4][i],res[5][i]) step = res[2] if step % 1000 == 0: print('Counting matrix\n', C) print('Counting matrix\n', C_init) print('Transition matrix\n', trans) print('Transition matrix\n', trans_init) if step % 20000 == 0: exemplars.append([infer_z.keys(), infer_z.values(), C]) with open('varC_learnt_%s.pkl'%FLAGS.transition[:-4],'w') as w: pickle.dump(exemplars,w)
def train(): """Train Clothing1M for a number of steps.""" with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get images and labels for Clothing1M. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): indices, images, labels = clothing1M.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. is_training = tf.placeholder(tf.bool) logits = clothing1M.inference_resnet_own(images, training=is_training) # Calculate loss. loss = clothing1M.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = clothing1M.train(loss, global_step) # Calculate prediction # acc_op contains acc and update_op. So it is the cumulative accuracy when sess runs acc_op # if you only want to inspect acc of each batch, just sess run acc_op[0] acc_op = tf.metrics.accuracy(labels, tf.argmax(logits, axis=1)) tf.summary.scalar('training accuracy', acc_op[0]) #### build scalffold for MonitoredTrainingSession to restore the variables you wish variables_to_restore = [] variables_to_restore += [ var for var in tf.trainable_variables() if 'dense' not in var.name ] variables_to_restore += [ g for g in tf.global_variables() if 'moving_mean' in g.name or 'moving_variance' in g.name ] for var in variables_to_restore: print(var.name) ckpt = tf.train.get_checkpoint_state(FLAGS.init_dir) init_assign_op, init_feed_dict = tf.contrib.framework.assign_from_checkpoint( ckpt.model_checkpoint_path, variables_to_restore) def InitAssignFn(scaffold, sess): sess.run(init_assign_op, init_feed_dict) scaffold = tf.train.Scaffold(saver=tf.train.Saver(), init_fn=InitAssignFn) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs( tf.get_collection('losses')[0]) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %d, loss = %.6f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) max_steps = int( math.ceil(clothing1M.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * FLAGS.num_epochs / FLAGS.batch_size)) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, scaffold=scaffold, hooks=[ tf.train.StopAtStepHook(last_step=max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], save_checkpoint_secs=60, config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options)) as mon_sess: while not mon_sess.should_stop(): res = mon_sess.run([train_op, acc_op, global_step], feed_dict={is_training: True})