def run_demo(): # images = tf.Variable(tf.random_normal([1, 224, 224, 3], dtype=tf.float32, stddev=1e-1)) images = np.random.rand(1, 224, 224, 3) model = alex_model.AlexNet() # init = tf.global_variables_initializer() saver = tf.train.Saver() init = tf.compat.v1.global_variables_initializer() device_count = {'GPU': 1} if FLAGS.use_gpu else {'GPU': 0} # with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: # with InteractiveSession(config=tf.compat.v1.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: config = tf.ConfigProto(device_count=device_count) config.gpu_options.allow_growth = True sess = InteractiveSession(config=config) sess.run(init) output_value = sess.run([ model.output, ], feed_dict={model.input_images: images}, options=run_options, run_metadata=run_metadata) print(output_value) saver.save(sess, save_path=FLAGS.save_model_path) tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open('./models/timeline.json', 'w') as f: f.write(ctf)
class predicter(object): def __init__(self): self.initial_weight = cfg.EVAL.WEIGHT self.time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) self.moving_ave_decay = cfg.YOLOv2.MOVING_AVE_DECAY self.eval_logdir = "./data/logs/eval" self.evalset = dataset.Dataset('test') self.output_dir = cfg.EVAL.OUTPUT_PRED_PATH self.img_anchors = loader.load_anchors(cfg.IMG.ANCHORS) with tf.name_scope('model'): self.model = yolov2_network.YOLOv2Network() self.net = self.model.load() self.img_pred = self.net['img_pred'] config = ConfigProto() config.gpu_options.allow_growth = True self.sess = InteractiveSession(config=config) self.saver = tf.train.Saver() #ema_obj.variables_to_restore()) self.saver.restore(self.sess, self.initial_weight) self.timer = timer.Timer() def predict(self): img_imwrite_path = os.path.join(self.output_dir, "img_imshow_result/") img_result_path = os.path.join(self.output_dir, "img_result/") if os.path.exists(img_imwrite_path): shutil.rmtree(img_imwrite_path) os.mkdir(img_imwrite_path) if os.path.exists(img_result_path): shutil.rmtree(img_result_path) os.mkdir(img_result_path) dumped_json = {"tps": [], "fps": [], "fns": []} for step in range(len(self.evalset)): # for step in range(10): print(step, "/", len(self.evalset)) eval_result = self.evalset.load() # print("load time: ", self.timer.time_diff_per_n_loops()) img_pred = self.sess.run(self.img_pred, feed_dict={ self.net["img_input"]: eval_result[0], self.net["trainable"]: False })[0][0] # print("inference time: ", self.timer.time_diff_per_n_loops()) img_bboxes = postprocess.parse_img_predmap(img_pred, self.img_anchors) img_bboxes = postprocess.img_nms(img_bboxes, cfg.IMG.IOU_THRESHOLDS)
class MasterNetwork: train_queue = [[], [], [], [], []] #s, a, r, s', s' terminal mask lock_queue = threading.Lock() def __init__(self): ## these lines are from stackoverflow to avoid gpu memory overusage error (https://github.com/tensorflow/tensorflow/issues/24828) config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) self.session = InteractiveSession(config=config) #self.session = tf.Session() Keras.set_session(self.session) Keras.manual_variable_initialization(True) #build model first self.model = self._build_model() self.graph = self._build_graph(self.model) self.session.run(tf.global_variables_initializer()) self.default_graph = tf.get_default_graph() self.default_graph.finalize() #avoid modifications def _build_model(self): l_input = Input(shape=IMAGE_SIZE) # input layer, shape:(?,96,96,4) x = l_input x = Convolution2D(16, (16, 16), strides=(2, 2), activation='relu')( x) # conv layer 1, kernel shape: (16,16, 4, 16) # output shape: (?, 41,41,16) x = Convolution2D(32, (8, 8), strides=(2, 2), activation='relu')( x) # conv layer 2, kernel shape: (16,16, 4, 16) # output shape: (?, 17,17,32) x = Flatten()(x) # flatten layer, shape: (?, 9248) x = Dense(256, activation='relu')( x) # dense layer, kernel shape: (9248, 256) # output shape: (?, 256) l_dense = Dense(16, activation='relu')( x) # dense layer 2, kernel shape: (256, 16) # output shape: (?, 16) #l_dense = Dense(16, activation='relu')(l_input) #actions need to have a correct probability distribution, hence the softmax activation out_actions = Dense(NUM_ACTIONS, activation='softmax')( l_dense) #output dense layer for actions: # kernel shape: (16,NUM_ACTIONS = 4 as of now) # outputshape: (?, 4) out_values = Dense(1, activation='linear')( l_dense) #output dense layer for values: # kernel shape: (16,1) # outputshape: (?, 1) model = Model(inputs=[l_input], outputs=[out_actions, out_values]) model._make_predict_function() #have to initialize before threading return model """ builds a tf graph to define the loss functions so tf can solve them the policy loss function is the negation of the Objective function J: L_π = - (1/n) * ∑ [A(s_i,a_i) * log π(a_i|s_i)] the value loss used in this graph is the summed error square of our estimated Value V(s0) towards the real Value V = r0 + γr1+γ2r2+...+γ(n−1)r(n−1) LV= (1/n) * ∑ [e_i²] """ def _build_graph(self, model): # 2D array placeholders that hold a whole batch later when called in minimize() # first dimension is unlimited and represents training batches # second dimension is number of variables s_t = tf.placeholder(tf.float32, shape=(None, IMAGE_SIZE[0], IMAGE_SIZE[1], IMAGE_SIZE[2])) a_t = tf.placeholder(tf.float32, shape=(None, NUM_ACTIONS)) r_t = tf.placeholder(tf.float32, shape=(None, 1)) #discounted n-step reward # retrieve policy and value functions from Master Model # @MO determine dimensions of p and v p, v = model(s_t) # we need the probability of a certain action a, given state s # therefore the probabilities p are multiplied with the hot_encoded vector a_t and sum-reduced # (axis 1 representing the dimension of different actions)which will leave us the # exact probability of taking the action a (a-th index in p) given state s # the small constant added is to prevent NaN errors, if a probability was zero # (possible through eps-greedy) log_prob = tf.log( tf.reduce_sum(p * a_t, axis=1, keepdims=True) + 1e-10) # advantage for n-step reward, r_t holds the n-stepo return reward and approximates the # action-state function Q(s,a) advantage = r_t - v # policy loss according to above def. the advantage is regarded as constant # and should not be included in tf gradient building. Averaging over the sum # is done later in the code loss_policy = -log_prob * tf.stop_gradient(advantage) # since Q(s,a) is approximated by n-step return reward r_t, the value error equals # the advantage function now! loss_value = LOSS_V * tf.square(advantage) # maximize (@MO: minimize ?) entropy entropy = LOSS_ENTROPY * tf.reduce_sum( p * tf.log(p + 1e-10), axis=1, keepdims=True) # The previously skipped average-over-sum's in one step now loss_total = tf.reduce_mean(loss_policy + loss_value + entropy) #@MO: what does RMSProp Optimizer do ? it allows manual learning rates but otherwise ? optimizer = tf.train.RMSPropOptimizer(LEARNING_RATE, decay=.99) minimize = optimizer.minimize(loss_total) return s_t, a_t, r_t, minimize """ optimize preprocesses data and runs minimize() of MasterNetwork. optimize is called by an optimizer, possibly multiple isntances of optimizer to handle incoming samples fast enough """ def optimize(self): #make sure enough training samples are in queue, yield to other threads otherwise if len(self.train_queue[0]) < MIN_BATCH: time.sleep(0) #yield return #@MO: WHY LOCK QUEUE, how is 'with' used in python ? # extract all samples from training queue with lock (for multithrading security) # with self.lock_queue: if len(self.train_queue[0]) < MIN_BATCH: return # s_mask indicates whether s_ is a dummy inserted due to terminal state being reached, # contains 0 (isDummy) or 1 (isNotDummy) s, a, r, s_, s_mask = self.train_queue self.train_queue = [[], [], [], [], []] # transform into blocks of numpy arrays #print("shape of s[0] : {}".format(s[0].shape)) #print("shape of np.array(s) : {}".format(np.array(s).shape)) s = np.array(s) # new shape of a: (32,96,96,4) a = np.vstack(a) # new shape of a: (32,4) r = np.vstack(r) # new shape of r: (32,1) #s_ = np.vstack(s_) # new shape of s_: (32,96,96,4) #print("shape of s_[0] : {}".format(s_[0].shape)) #print("shape of np.array(s_) : {}".format(np.array(s_).shape)) s_ = np.array(s_) s_mask = np.vstack(s_mask) # new shape of s_mask: (32,1) if len(s) > 5 * MIN_BATCH: print("Opimizer alert! Minimizing batch of {}".format(len(s))) # the reward received from the training queue is so far only immediate up to n-th step # reward and missed (n * V(s_n) ). v is therefore first calculated starting from # the latest state s_, discounted and added. v = self.predict_v(s_) r = r + GAMMA_N * v * s_mask #set v to 0 where s_ is terminal state # retrieve placeholders s_t, a_t, r_t, minimize = self.graph self.session.run(minimize, feed_dict={s_t: s, a_t: a, r_t: r}) def train_push(self, s, a, r, s_): #@MO: what does this lock do ? with self.lock_queue: #queue s, a, and r into the training queue self.train_queue[0].append(s) self.train_queue[1].append(a) self.train_queue[2].append(r) # if the next state s_ is after last possible state, insert # dummy state for parallelism and flag it in queue[4] if s_ is None: self.train_queue[3].append(NONE_STATE) self.train_queue[4].append(0.) else: self.train_queue[3].append(s_) self.train_queue[4].append(1.) def predict(self, s): with self.default_graph.as_default(): p, v = self.model.predict(s) return p, v def predict_p(self, s): with self.default_graph.as_default(): p, v = self.model.predict(s) return p def predict_v(self, s): with self.default_graph.as_default(): p, v = self.model.predict(s) return v
def main(): config = ConfigProto() config.gpu_options.allow_growth = True sess = InteractiveSession(config=config) def _str_to_bool(s): """Convert string to bool (in argparse context).""" if s.lower() not in ['true', 'false']: raise ValueError( 'Argument needs to be a boolean, got {}'.format(s)) return {'true': True, 'false': False}[s.lower()] parser = argparse.ArgumentParser(description='WaveNet example network') #DATA_DIRECTORY = 'D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\moon,D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\son' DATA_DIRECTORY = './/data//moon,.//data//son' parser.add_argument('--data_dir', type=str, default=DATA_DIRECTORY, help='The directory containing the VCTK corpus.') LOGDIR = None #LOGDIR = './/logdir-wavenet//train//2019-03-27T20-27-18' parser.add_argument( '--logdir', type=str, default=LOGDIR, help= 'Directory in which to store the logging information for TensorBoard. If the model already exists, it will restore the state and will continue training. Cannot use with --logdir_root and --restore_from.' ) parser.add_argument( '--logdir_root', type=str, default=None, help= 'Root directory to place the logging output and generated model. These are stored under the dated subdirectory of --logdir_root. Cannot use with --logdir.' ) parser.add_argument( '--restore_from', type=str, default=None, help= 'Directory in which to restore the model from. This creates the new model under the dated directory in --logdir_root. Cannot use with --logdir.' ) CHECKPOINT_EVERY = 1000 # checkpoint 저장 주기 parser.add_argument( '--checkpoint_every', type=int, default=CHECKPOINT_EVERY, help='How many steps to save each checkpoint after. Default: ' + str(CHECKPOINT_EVERY) + '.') parser.add_argument('--eval_every', type=int, default=8, help='Steps between eval on test data') config = parser.parse_args() # command 창에서 입력받을 수 있는 조건 config.data_dir = config.data_dir.split(",") try: directories = validate_directories(config, hparams) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from log_path = os.path.join(logdir, 'train.log') infolog.init(log_path, logdir) global_step = tf.Variable(0, name='global_step', trainable=False) if hparams.l2_regularization_strength == 0: hparams.l2_regularization_strength = None # Create coordinator. coord = tf.train.Coordinator() num_speakers = len(config.data_dir) # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = hparams.silence_threshold if hparams.silence_threshold > EPSILON else None gc_enable = True # Before: num_speakers > 1 After: 항상 True # AudioReader에서 wav 파일을 잘라 input값을 만든다. receptive_field길이만큼을 앞부분에 pad하거나 앞조각에서 가져온다. (receptive_field+ sample_size)크기로 자른다. reader = DataFeederWavenet(coord, config.data_dir, batch_size=hparams.wavenet_batch_size, gc_enable=gc_enable, test_mode=False) # test를 위한 DataFeederWavenet를 하나 만들자. 여기서는 딱 1개의 파일만 가져온다. reader_test = DataFeederWavenet(coord, config.data_dir, batch_size=1, gc_enable=gc_enable, test_mode=True, queue_size=1) audio_batch, lc_batch, gc_id_batch = reader.inputs_wav, reader.local_condition, reader.speaker_id # Create train network. net = create_network(hparams, hparams.wavenet_batch_size, num_speakers, is_training=True) net.add_loss(input_batch=audio_batch, local_condition=lc_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=hparams.l2_regularization_strength, upsample_type=hparams.upsample_type) net.add_optimizer(hparams, global_step) run_metadata = tf.RunMetadata() # Set up session #sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) # log_device_placement=False --> cpu/gpu 자동 배치. init = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver( var_list=tf.global_variables(), max_to_keep=hparams.max_checkpoints) # 최대 checkpoint 저장 갯수 지정 try: start_step = load(saver, sess, restore_from) # checkpoint load if is_overwritten_training or start_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. zero_step_assign = tf.assign(global_step, 0) sess.run(zero_step_assign) start_step = 0 except: print( "Something went wrong while restoring checkpoint. We will terminate training to avoid accidentally overwriting the previous model." ) raise ########### reader.start_in_session(sess, start_step) reader_test.start_in_session(sess, start_step) ################### Create test network. <---- Queue 생성 때문에, sess restore후 test network 생성 net_test = create_network(hparams, 1, num_speakers, is_training=False) if hparams.scalar_input: samples = tf.placeholder(tf.float32, shape=[net_test.batch_size, None]) waveform = 2 * np.random.rand(net_test.batch_size).reshape( net_test.batch_size, -1) - 1 else: samples = tf.placeholder(tf.int32, shape=[ net_test.batch_size, None ]) # samples: mu_law_encode로 변환된 것. one-hot으로 변환되기 전. (batch_size, 길이) waveform = np.random.randint(hparams.quantization_channels, size=net_test.batch_size).reshape( net_test.batch_size, -1) upsampled_local_condition = tf.placeholder( tf.float32, shape=[net_test.batch_size, hparams.num_mels]) speaker_id = tf.placeholder(tf.int32, shape=[net_test.batch_size]) next_sample = net_test.predict_proba_incremental( samples, upsampled_local_condition, speaker_id ) # Fast Wavenet Generation Algorithm-1611.09482 algorithm 적용 sess.run(net_test.queue_initializer) # test를 위한 placeholder는 모두 3개: samples,speaker_id,upsampled_local_condition # test용 mel-spectrogram을 하나 뽑자. 그것을 고정하지 않으면, thread가 계속 돌아가면서 data를 읽어온다. reader_test의 역할은 여기서 끝난다. mel_input_test, speaker_id_test = sess.run( [reader_test.local_condition, reader_test.speaker_id]) with tf.variable_scope('wavenet', reuse=tf.AUTO_REUSE): upsampled_local_condition_data = net_test.create_upsample( mel_input_test, upsample_type=hparams.upsample_type) upsampled_local_condition_data_ = sess.run( upsampled_local_condition_data ) # upsampled_local_condition_data_ 을 feed_dict로 placehoder인 upsampled_local_condition에 넣어준다. ###################################################### start_step = sess.run(global_step) step = last_saved_step = start_step try: while not coord.should_stop(): start_time = time.time() if hparams.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. log('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) step, loss_value, _ = sess.run( [global_step, net.loss, net.optimize], options=run_options, run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: step, loss_value, _ = sess.run( [global_step, net.loss, net.optimize]) duration = time.time() - start_time log('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % config.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step if step % config.eval_every == 0: # config.eval_every eval_step(sess, logdir, step, waveform, upsampled_local_condition_data_, speaker_id_test, mel_input_test, samples, speaker_id, upsampled_local_condition, next_sample) if step >= hparams.num_steps: # error message가 나오지만, 여기서 멈춘 것은 맞다. raise Exception('End xxx~~~yyy') except Exception as e: print('finally') log('Exiting due to exception: %s' % e, slack=True) #if step > last_saved_step: # save(saver, sess, logdir, step) traceback.print_exc() coord.request_stop(e)
class Trainer(object): def __init__(self): self.learn_rate_init = cfg.TRAIN.LEARN_RATE_INIT self.learn_rate_end = cfg.TRAIN.LEARN_RATE_END self.first_stage_epochs = cfg.TRAIN.FRIST_STAGE_EPOCHS self.second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS self.warmup_periods = cfg.TRAIN.WARMUP_EPOCHS self.initial_weight = cfg.TRAIN.PRETRAIN_WEIGHT self.time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) self.moving_ave_decay = cfg.YOLOv2.MOVING_AVE_DECAY self.train_logdir = "./data/log/train" self.trainset = dataset.Dataset('train') self.valset = dataset.Dataset('val') self.steps_per_period = len(self.trainset) config = ConfigProto() config.gpu_options.allow_growth = True self.sess = InteractiveSession(config=config) self.timer = timer.Timer() # self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) with tf.name_scope('model'): self.model = yolov2_network.YOLOv2Network() self.net = self.model.load() self.net_var = tf.global_variables() self.loss = self.net["yolov2_loss"] with tf.name_scope('learn_rate'): self.global_step = tf.Variable(1.0, dtype=tf.float64, trainable=False, name='global_step') warmup_steps = tf.constant(self.warmup_periods * self.steps_per_period, dtype=tf.float64, name='warmup_steps') train_steps = tf.constant( (self.first_stage_epochs + self.second_stage_epochs) * self.steps_per_period, dtype=tf.float64, name='train_steps') self.learn_rate = tf.cond( pred=self.global_step < warmup_steps, true_fn=lambda: self.global_step / warmup_steps * self. learn_rate_init, false_fn=lambda: self.learn_rate_end + 0.5 * (self.learn_rate_init - self.learn_rate_end) * (1 + tf.cos( (self.global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi))) global_step_update = tf.assign_add(self.global_step, 1.0) with tf.name_scope("define_weight_decay"): moving_ave = tf.train.ExponentialMovingAverage( self.moving_ave_decay).apply(tf.trainable_variables()) with tf.name_scope("define_first_stage_train"): self.first_stage_trainable_var_list = [] for var in tf.trainable_variables(): var_name = var.op.name var_name_mess = str(var_name).split('/') if var_name_mess[0] in ["yolov2_headnet"]: self.first_stage_trainable_var_list.append(var) first_stage_optimizer = tf.train.AdamOptimizer( self.learn_rate).minimize( self.loss, var_list=self.first_stage_trainable_var_list) with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): with tf.control_dependencies( [first_stage_optimizer, global_step_update]): with tf.control_dependencies([moving_ave]): self.train_op_with_frozen_variables = tf.no_op() with tf.name_scope("define_second_stage_train"): second_stage_trainable_var_list = tf.trainable_variables() second_stage_optimizer = tf.train.AdamOptimizer( self.learn_rate).minimize( self.loss, var_list=second_stage_trainable_var_list) with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): with tf.control_dependencies( [second_stage_optimizer, global_step_update]): with tf.control_dependencies([moving_ave]): self.train_op_with_all_variables = tf.no_op() with tf.name_scope('loader_and_saver'): self.loader = tf.train.Saver(self.net_var) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) with tf.name_scope('summary'): tf.summary.scalar("learn_rate", self.learn_rate) tf.summary.scalar("yolov2_loss", self.net["yolov2_loss"]) tf.summary.scalar("img_obj_loss", self.net["img_obj_loss"]) tf.summary.scalar("img_cls_loss", self.net["img_cls_loss"]) tf.summary.scalar("img_bbox_loss", self.net["img_bbox_loss"]) logdir = "../logs/tensorboard" if os.path.exists(logdir): shutil.rmtree(logdir) os.mkdir(logdir) self.write_op = tf.summary.merge_all() self.summary_writer = tf.summary.FileWriter(logdir, graph=self.sess.graph) img_pred_dir = cfg.YOLOv2.LOG_DIR + "/pred/img_pred/" if os.path.exists(img_pred_dir): shutil.rmtree(img_pred_dir) os.mkdir(img_pred_dir) def train(self): self.sess.run(tf.global_variables_initializer()) try: print('=> Restoring weights from: %s ... ' % self.initial_weight) self.loader.restore(self.sess, self.initial_weight) except: print('=> %s does not exist !!!' % self.initial_weight) print('=> Now it starts to train YOLOv2 from scratch ...') self.first_stage_epochs = 0 for epoch in range( 1, 1 + self.first_stage_epochs + self.second_stage_epochs): if epoch <= self.first_stage_epochs: train_op = self.train_op_with_frozen_variables else: train_op = self.train_op_with_all_variables train_epoch_loss = [] last_log_time = time.time() print( "======================================> Epoch:%d <======================================" % epoch) for _ in range(self.steps_per_period): train_data = self.trainset.load() _, summary, train_step_loss, global_step_val = self.sess.run( [train_op, self.write_op, self.loss, self.global_step], feed_dict={ self.net["img_input"]: train_data[0], self.net["img_label"]: train_data[1], self.net["img_loss_scale"]: cfg.IMG.LOSS_SCALE, self.net["trainable"]: True }) train_epoch_loss.append(train_step_loss) self.summary_writer.add_summary(summary, global_step_val) if global_step_val % cfg.YOLOv2.PRINTING_STEPS == 0: log_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print( "======> Epoch:%2d Time:%s step: %d/%d During Time: %.2f Train loss: %.2f" % (epoch, log_time, global_step_val % self.steps_per_period, self.steps_per_period, time.time() - last_log_time, np.mean( train_epoch_loss[-cfg.YOLOv2.PRINTING_STEPS:]))) last_log_time = time.time() if global_step_val % cfg.TRAIN.SAVING_STEPS == 0: val_epoch_loss = [] print("valing...") for _ in range(len(self.valset)): val_data = self.valset.load() val_step_loss, img_pred = self.sess.run( [self.loss, self.net['img_pred']], feed_dict={ self.net["img_input"]: val_data[0], self.net["img_label"]: val_data[1], self.net["img_loss_scale"]: cfg.IMG.LOSS_SCALE, self.net["trainable"]: False }) val_epoch_loss.append(val_step_loss) print("saving...") train_epoch_loss_m, val_epoch_loss_m = np.mean( train_epoch_loss), np.mean(val_epoch_loss) ckpt_file = "../checkpoint/yolov2_val_loss=%.4f.ckpt" % val_epoch_loss_m log_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print( "======> Epoch:%2d Time: %s Train loss: %.2f val loss: %.2f Saving %s ..." % (epoch, log_time, train_epoch_loss_m, val_epoch_loss_m, ckpt_file)) self.saver.save(self.sess, ckpt_file, global_step=epoch) print("saving...") save_time = time.asctime(time.localtime(time.time())) ckpt_file = "../checkpoint/yolov2_last_epoch-%s.ckpt" % save_time self.saver.save(self.sess, ckpt_file, global_step=epoch)
z, mn, sd = encoder(X_in, rate) dec = decoder(z, rate) MSE = tf.reduce_sum(tf.squared_difference(dec, Y_flat), 1) latent_loss = -0.5 * tf.reduce_sum( 1.0 + 2.0 * sd - tf.square(mn) - tf.exp(2.0 * sd), 1) loss = tf.reduce_mean(latent_loss + MSE) optimizer = tf.train.AdamOptimizer(0.0001) training_op = optimizer.minimize(loss) saver = tf.train.Saver() init = tf.global_variables_initializer() sess = InteractiveSession(config=config) sess.run(init) # Data loading, merging and testing data, sample_rate = get_data(file_arr) def rep(data): """converts data to length * num_channels than splits by real and complex part""" total = np.zeros(shape=[len(data) * 2, len(data[0])]) for i in range(len(data)): total[2 * i - 1, :] = data[i, :].real total[2 * i, :] = data[i, :].imag return total def invrep(d):
from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession import json import time import numpy as np import tensorflow as tf import os TIMEOUT = 10 #10 seconds config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) init = tf.global_variables_initializer() session.run(tf.global_variables_initializer()) session.run(tf.local_variables_initializer()) def main(args): mode = args if(mode == "camera"): camera_recog() elif mode == "input": create_manual_data(); else: raise ValueError("Unimplemented mode") ''' Description: Images from Video Capture -> detect faces' regions -> crop those faces and align them -> each cropped face is categorized in 3 types: Center, Left, Right -> Extract 128D vectors( face features)
model_path = root_model_path + model_dir + "/" # model model_name = 'my_model' #model's name model_ind = -1 #model's index pred_res_path = './predict_res/' + data_path.split( '/')[-1][0:-4] + "-" + model_dir + "/" # dir of the prediction results if not os.path.isdir(pred_res_path) and SAVE_FLAG: os.makedirs(pred_res_path) # In[] # load model # sess = tf.Session() sess = InteractiveSession(config=config) sess.run(tf.global_variables_initializer()) restorer = tf.train.import_meta_graph(model_path + model_name + '.meta') ckpt = tf.train.get_checkpoint_state(model_path) if ckpt: ckpt_states = ckpt.all_model_checkpoint_paths restorer.restore(sess, ckpt_states[model_ind]) # load Ops and variables according to old model and your need graph = tf.get_default_graph() inputs_ = graph.get_tensor_by_name("inputs/inputs_:0") mask_prob = graph.get_tensor_by_name("inputs/Placeholder_1:0") targets_ = graph.get_tensor_by_name("inputs/targets_:0") keep_prob = graph.get_tensor_by_name("inputs/Placeholder:0") # for dropout
class Model: """ Model Class for Model Abstraction, Class implentation is similar to the Tensorflow Sequential and Functional Model Using Graph Please refer to: https://www.tensorflow.org/api_docs/python/tf/Graph for more details """ def __init__(self, commands, input_size=1960, first_conv_filter=128, second_conv_filter=64, model_dir="model", frequency_size=40, time_size=49, sess=False, preprocess="micro", training=True): """ Initialization of variables and Tensor Session """ self.check_session(sess) self.commands = commands self.commands_dic = self.create_commands(self.commands) self._softmax_layer, self._dropout_placeholder = self._build( input_size, first_conv_filter, second_conv_filter, frequency_size, time_size, training) self._model_dir = model_dir self._input_size = input_size self._loaded = False self._start_step = 0 self._global_step = tf.compat.v1.train.get_or_create_global_step() self._save_step = 1 if preprocess == "micro": self.mfcc, self.wav_filename_placeholder = Micro_process() else: self.mfcc, self.wav_filename_placeholder = run_mfcc() self.train(learn_rate=[0, 0], dropout_rate=0, save_step=0, batch_size=0, eval_step=0, training_time=0, rate_step=0, display_step=0, train_data=0, Validation_data=0, init=True) assert type(commands) == list, " Commands type should be a list " assert type( model_dir) == str, "model directory should be a string object" def _build(self, input_size, first_conv_filter, second_conv_filter, frequency_size, time_size, training, input_1d=False): """ This a private protected Method to Build the Model Layer in graph Args: input_size: Size of the flattened input default to 1960 first_conv_filter : Size of filter for first convolutional layer second_conv_filter : Size of filter for second convolutional layer frequecncy_size : Size of MFCC rows. Refer to feature extraction for run_MFCC method time_size : Size of MFCC cols returns: Returns are abstracted """ dropout_rate = tf.compat.v1.placeholder(tf.float32, name='dropout_rate') self._fingerprint_input = tf.compat.v1.placeholder( tf.float32, [None, input_size], name='fingerprint_input') if training: input_4d = tf.reshape( self. _fingerprint_input, # input: MFCC for commands [batch_size, input_size] [-1, time_size, frequency_size, 1 ]) # output reshape [batch_size, rows, cols, channel] else: input_4d = tf.reshape( input_1d, # input: MFCC for commands [batch_size, input_size] [-1, time_size, frequency_size, 1]) with tf.compat.v1.variable_scope("first_weights", reuse=tf.compat.v1.AUTO_REUSE): first_weights = tf.compat.v1.get_variable( # Weights Initialization name='first_weights', initializer=tf.compat.v1.truncated_normal_initializer( stddev=0.01), shape=[20, 8, 1, first_conv_filter]) with tf.compat.v1.variable_scope("first_bias", reuse=tf.compat.v1.AUTO_REUSE): first_bias = tf.compat.v1.get_variable( # Bias Initialization name='first_bias', initializer=tf.compat.v1.zeros_initializer, shape=[ first_conv_filter, ]) first_conv = tf.nn.conv2d( input=input_4d, # First Convolution Layer filters=first_weights, #input: [batch_size, rows, cols, channel] strides=[1, 1, 1, 1], padding='SAME') + first_bias first_relu = tf.nn.relu(first_conv) if training: first_dropout = tf.nn.dropout(first_relu, rate=dropout_rate) else: first_dropout = first_relu max_pool = tf.nn.max_pool2d(input=first_dropout, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') with tf.compat.v1.variable_scope("second_weights", reuse=tf.compat.v1.AUTO_REUSE): second_weights = tf.compat.v1.get_variable( name='second_weights', initializer=tf.compat.v1.truncated_normal_initializer( stddev=0.01), shape=[10, 4, first_conv_filter, second_conv_filter]) with tf.compat.v1.variable_scope("second_bias", reuse=tf.compat.v1.AUTO_REUSE): second_bias = tf.compat.v1.get_variable( name='second_bias', initializer=tf.compat.v1.zeros_initializer, shape=[ second_conv_filter, ]) second_conv = tf.nn.conv2d(input=max_pool, filters=second_weights, strides=[1, 1, 1, 1], padding='SAME') + second_bias second_relu = tf.nn.relu(second_conv) if training: second_dropout = tf.nn.dropout(second_relu, rate=dropout_rate) else: second_dropout = second_relu conv_shape = second_dropout.get_shape() conv_output_width = conv_shape[2] conv_output_height = conv_shape[1] conv_element_count = int(conv_output_width * conv_output_height * second_conv_filter) flattened_second_conv = tf.reshape(second_dropout, [-1, conv_element_count]) label_count = len(self.commands_dic) with tf.compat.v1.variable_scope("softmax_weights", reuse=tf.compat.v1.AUTO_REUSE): softmax_weights = tf.compat.v1.get_variable( name='softmax_weights', initializer=tf.compat.v1.truncated_normal_initializer( stddev=0.01), shape=[conv_element_count, label_count]) with tf.compat.v1.variable_scope("softmax_bias", reuse=tf.compat.v1.AUTO_REUSE): softmax_bias = tf.compat.v1.get_variable( name='softmax_bias', initializer=tf.compat.v1.zeros_initializer, shape=[label_count]) softmax_layer = tf.matmul(flattened_second_conv, softmax_weights) + softmax_bias if training: return softmax_layer, dropout_rate return softmax_layer def train(self, learn_rate, dropout_rate, save_step, batch_size, eval_step, training_time, rate_step, display_step, train_data, Validation_data, init=False): self._save_step = save_step self._training_time = training_time assert type(learn_rate) == list,\ "Learn Rate should be a List to be used. e.g [.001, .0001]" self._ground_truth_input = tf.compat.v1.placeholder( tf.int64, [None], name='groundtruth_input') with tf.compat.v1.name_scope('cross_entropy'): self._cross_entropy_mean = tf.compat.v1.losses.sparse_softmax_cross_entropy( labels=self._ground_truth_input, logits=self._softmax_layer) learning_rate_input = tf.compat.v1.placeholder( tf.float32, [], name='learning_rate_input') train_step = tf.compat.v1.train.GradientDescentOptimizer( learning_rate_input).minimize(self._cross_entropy_mean) self._predicted = tf.argmax(input=self._softmax_layer, axis=1) correct_prediction = tf.equal(self._predicted, self._ground_truth_input) self._evaluation_step = tf.reduce_mean( input_tensor=tf.cast(correct_prediction, tf.float32)) saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables()) if self._loaded is False and self._start_step == 0: self._global_step = tf.compat.v1.train.get_or_create_global_step() tf.compat.v1.global_variables_initializer().run() #self._loaded = True increment_global_step = tf.compat.v1.assign(self._global_step, self._global_step + 1) if init is False: tf.io.write_graph(self._sess.graph_def, self._model_dir, "model" + '.pbtxt') with gfile.GFile( os.path.join(self._model_dir, "commands" + '_labels.txt'), 'wb') as f: f.write('\n'.join(self.commands)) if training_time <= self._start_step and self._loaded is True: print( f"Checkpoint Loaded has been trained to {self._start_step} epochs,\ \n New Trainig starts from {self._start_step}, Please increase Training_time to train model" ) if init is False: if tf.config.list_physical_devices('GPU'): strategy = tf.distribute.MirroredStrategy() else: # use default strategy strategy = tf.distribute.get_strategy() with strategy.scope(): history = { "categorical_accuracy": [], "loss": [], "val_categorical_accuracy": [], "val_loss": [] } learning_rate = learn_rate[0] for training_step in xrange(self._start_step, training_time): if training_step == int(rate_step): learning_rate = learn_rate[1] x_train, y_train = self.get_next_batch( batch_size, train_data) train_accuracy, cross_entropy_value, _, _ = self._sess.run( [ self._evaluation_step, self._cross_entropy_mean, train_step, increment_global_step, ], feed_dict={ self._fingerprint_input: x_train, self._ground_truth_input: y_train, learning_rate_input: learning_rate, self._dropout_placeholder: dropout_rate }) if training_step % int(display_step) == 0: print( 'Step #%d: learning rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate, train_accuracy * 100, cross_entropy_value)) history["categorical_accuracy"].append(train_accuracy) history["loss"].append(cross_entropy_value) if training_step % int(eval_step) == 0: x_val, y_val = self.get_next_batch( batch_size * 4, Validation_data) validation_accuracy, val_crossentropy_value = self._sess.run( [self._evaluation_step, self._cross_entropy_mean], feed_dict={ self._fingerprint_input: x_val, self._ground_truth_input: y_val, self._dropout_placeholder: 0.0 }) history["val_categorical_accuracy"].append( validation_accuracy) history["val_loss"].append(val_crossentropy_value) print( 'Step %d: Validation accuracy = %.1f%% (Val Size=%d), Validation loss = %f' % (training_step, validation_accuracy * 100, batch_size * 4, val_crossentropy_value)) if (training_step % int(save_step) == 0) or (training_step == training_time - 1): path_to_save = os.path.join( self._model_dir, "model_checkpoint" + '.ckpt') if (training_step == training_time - 1): training_step = training_time saver.save(self._sess, path_to_save, global_step=training_step) self._start_step = self._global_step.eval( session=self._sess) return history def check_session(self, sess=False): if sess != False: if sess._closed: if tf.test.is_built_with_cuda(): # Check GPU compatibility from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession config = ConfigProto() config.gpu_options.allow_growth = True #sess.close() self._sess = InteractiveSession(config=config) else: # Run on CPU if GPU is not available #sess.close() self._sess = InteractiveSession() else: self._sess = sess else: if tf.test.is_built_with_cuda(): # Check GPU compatibility from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession config = ConfigProto() config.gpu_options.allow_growth = True #sess.close() self._sess = InteractiveSession(config=config) else: # Run on CPU if GPU is not available #sess.close() self._sess = InteractiveSession() def create_commands(self, commands): commands_dic = {} for i in range(len(commands)): commands_dic[i] = commands[i] self.dic_commands = {} for i in range(len(commands)): self.dic_commands[commands[i]] = i return commands_dic def get_next_batch(self, batch_size, file_path): data = [] labels = [] np.random.shuffle(file_path) for i in range(batch_size): data.append( self._sess.run(self.mfcc, feed_dict={ self.wav_filename_placeholder: file_path[i] }).flatten()) labels.append(self.dic_commands[get_label(file_path[i])]) return np.stack(data), np.stack(labels) def predict(self, input_data): predicted = self._sess.run([self._predicted], feed_dict={ self._fingerprint_input: input_data, self._dropout_placeholder: 0.0 }) return predicted[0], [ self.commands_dic[n.item()] for n in predicted[0] ] def evaluate(self, input_data, labels, verbose=1): validation_accuracy, val_crossentropy_value = self._sess.run( [self._evaluation_step, self._cross_entropy_mean], feed_dict={ self._fingerprint_input: input_data, self._ground_truth_input: labels, self._dropout_placeholder: 0.0 }) if verbose: print('Validation accuracy = %.1f%%, Validation loss = %f' % (validation_accuracy * 100, val_crossentropy_value)) return validation_accuracy, val_crossentropy_value def load_checkpoint(self, path=0): if path == 0: try: last = int( self._start_step // self._save_step) * self._save_step path = os.path.join(self._model_dir, "model_checkpoint" + '.ckpt-' + str(last)) except: print( "Check point Path does not Exist, pass path as Arguiment or train for a number of epochs" ) return #assert os.file.exists(path), "Path does not exist" saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables()) saver.restore(self._sess, path) self._start_step = self._global_step.eval(session=self._sess) self._loaded = True return True def save_pb_model(self, file_name, first_conv_filter=128, second_conv_filter=64, frequency_size=40, time_size=49, last_checkpoint=True): """ Save Model For Inference """ input_1d = tf.reshape(self.mfcc, [-1, self._input_size]) softmax_layer = self._build(self._input_size, first_conv_filter, second_conv_filter, frequency_size, time_size, training=False, input_1d=input_1d) output = tf.nn.softmax(softmax_layer, name='labels_softmax') if last_checkpoint: # Should load from last saved checkpoint self.load_checkpoint() else: self.load_checkpoint(path=last_checkpoint) build = tf.compat.v1.saved_model.builder.SavedModelBuilder(file_name) info_inputs = { 'input': tf.compat.v1.saved_model.utils.build_tensor_info(input_1d) } info_outputs = { 'predictions': tf.compat.v1.saved_model.utils.build_tensor_info(output) } signature = ( tf.compat.v1.saved_model.signature_def_utils.build_signature_def( inputs=info_inputs, outputs=info_outputs, method_name=tf.compat.v1.saved_model.signature_constants. PREDICT_METHOD_NAME)) build.add_meta_graph_and_variables( self._sess, [tf.compat.v1.saved_model.tag_constants.SERVING], signature_def_map={ tf.compat.v1.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature, }, ) build.save()
def main(image_dir="./", net_loc="../cnn_mnist_10c.h5"): config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) K.set_session(session) print(image_dir) print(net_loc) # from ptpdb import set_trace # set_trace() # imcollection = np.array(imread_collection(image_dir))[:, :, :, 0] imcollection = np.array(imread_collection(f"{image_dir}/*.png")) net_generated_data = np.expand_dims(imcollection, 3) x_real_train, x_real_test = keras_extract_mnist_digits() num_samples = min(len(net_generated_data), len(x_real_test)) x_real_train = x_real_train / 255 x_real_test = x_real_test / 255 net_generated_data = net_generated_data / 255 np.random.shuffle(x_real_train) np.random.shuffle(x_real_test) np.random.shuffle(net_generated_data) x_real_train = x_real_train[:num_samples] x_real_test = x_real_test[:num_samples] full_classifier = keras.models.load_model(net_loc) req_layer = "flatten_1" classifier = Model( inputs=full_classifier.input, outputs=full_classifier.get_layer(req_layer).output, ) print("Calculating FCD for train data") fcd_train = compute_real_fcd(x_real_train, classifier) print("Calculating FCD for test data") fcd_test = compute_real_fcd(x_real_test, classifier) print( f"samples = {num_samples} train fcd = {fcd_train:.3g} test fcd = {fcd_test:.3g}" ) net_real_data = x_real_train assert len(net_generated_data) == len(net_real_data) print( np.max(net_generated_data), np.min(net_generated_data), f"{np.std(net_generated_data):.3f}", f"{np.mean(net_generated_data):.3f}", ) print( np.max(net_real_data), np.min(net_real_data), f"{np.std(net_real_data):.3f}", f"{np.mean(net_real_data):.3f}", ) real_act = classifier.predict(net_real_data) print(real_act.shape) gen_act = classifier.predict(net_generated_data) print("Calculating FCD for generated data") fcd_tensor = diagonal_only_frechet_classifier_distance_from_activations( tf.convert_to_tensor(real_act), tf.convert_to_tensor(gen_act)) fcd = session.run(fcd_tensor) print(f"fcd = {fcd:.3g}") session.close() sys.exit(0) fcd_iters = 2 gen_fcd_arr = [] for fcd_i in range(fcd_iters): # inverse normalization due to tanh # net_generated_data = (net_generated_data + 1) / 2 net_real_data = x_real_train assert len(net_generated_data) == len(net_real_data) print( np.max(net_generated_data), np.min(net_generated_data), f"{np.std(net_generated_data):.3f}", f"{np.mean(net_generated_data):.3f}", ) print( np.max(net_real_data), np.min(net_real_data), f"{np.std(net_real_data):.3f}", f"{np.mean(net_real_data):.3f}", ) np.random.shuffle(net_generated_data) np.random.shuffle(net_real_data) real_act = classifier.predict(net_real_data) gen_act = classifier.predict(net_generated_data) print("Calculating FCD for generated data") fcd_tensor = diagonal_only_frechet_classifier_distance_from_activations( tf.convert_to_tensor(real_act), tf.convert_to_tensor(gen_act)) sess = K.get_session() fcd = sess.run(fcd_tensor) gen_fcd_arr.append(fcd)
def main(): #Each subdirectory will get a numeric label by alphabetical order, starting with 0 #For each subdirectory (class) train_meta_data = [] test_meta_data = [] counts = [0,0] dataset_path_train = "/data/fm_tools/autofm/wholeImagesBacon/train" dataset_path_train_masks = "/data/fm_tools/autofm/wholeImagesBacon/masks/train" classes = sorted(os.walk(dataset_path_train).__next__()[1]) for label, c in enumerate(classes): c_dir = os.path.join(dataset_path_train, c) walk = os.walk(c_dir).__next__() file_list = walk[2] for sample in file_list: if sample.endswith('png'): one_hot = np.zeros(2) one_hot[label] = 1 mask_path = os.path.join(dataset_path_train_masks, c_dir.split("/")[-1], sample) if label == 0: mask_path = '/data/fm_tools/autofm/wholeImagesBacon/clean_mask.png' train_meta_data.append([os.path.join(c_dir, sample), one_hot, mask_path]) dataset_path_test = "/data/fm_tools/autofm/wholeImagesBacon/test" dataset_path_test_masks = "/data/fm_tools/autofm/wholeImagesBacon/masks/test" classes = sorted(os.walk(dataset_path_test).__next__()[1]) for label, c in enumerate(classes): c_dir = os.path.join(dataset_path_test, c) walk = os.walk(c_dir).__next__() file_list = walk[2] for sample in file_list: if sample.endswith('png'): one_hot = np.zeros(2) one_hot[label] = 1 mask_path = os.path.join(dataset_path_test_masks, c_dir.split("/")[-1], sample) if label == 0: mask_path = '/data/fm_tools/autofm/wholeImagesBacon/clean_mask.png' test_meta_data.append([os.path.join(c_dir, sample), one_hot, mask_path]) ''' We are shuffleing here as well as letting tf.dataset shuffle because the tf.dataset shuffle cannot do a uniform shuffle on large datasets ''' random.shuffle(train_meta_data) random.shuffle(test_meta_data) ''' Create dataset pipeline. The pipeline takes in a list of filenames and an array of labels (not one-hot). We are using a tensroflow reinitializable iterator, which allows us to switch between test and train datasets at train time by calling iterator.make_initializer(). ''' #Train image_ds_train = tf.data.Dataset.from_tensor_slices(np.array(train_meta_data)[:,0]) image_ds_train = image_ds_train.map(preprocess_train, num_parallel_calls=12) mask_ds_train = tf.data.Dataset.from_tensor_slices(np.array(train_meta_data)[:,2]) mask_ds_train = mask_ds_train.map(preprocess_train_mask, num_parallel_calls=12) #image_ds_train = image_ds_train.map(apply_image_augmentation, num_parallel_calls=12) label_ds_train = tf.data.Dataset.from_tensor_slices( np.stack(np.array(train_meta_data)[:,1], axis=0)) #Test image_ds_test = tf.data.Dataset.from_tensor_slices(np.array(test_meta_data)[:,0]) image_ds_test = image_ds_test.map(preprocess_test, num_parallel_calls=12) mask_ds_test = tf.data.Dataset.from_tensor_slices(np.array(test_meta_data)[:,2]) mask_ds_test = mask_ds_test.map(preprocess_test_mask, num_parallel_calls=12) label_ds_test = tf.data.Dataset.from_tensor_slices( np.stack(np.array(test_meta_data)[:,1], axis=0)) dataset_train = tf.data.Dataset.zip((image_ds_train, label_ds_train, mask_ds_train)) dataset_train = dataset_train.shuffle(buffer_size=1000) #buffer_size determins uniformity of the shuffle dataset_train = dataset_train.batch(BATCH_SIZE) dataset_train = dataset_train.prefetch(buffer_size=AUTOTUNE) dataset_test = tf.data.Dataset.zip((image_ds_test, label_ds_test, mask_ds_test)) dataset_test = dataset_test.shuffle(buffer_size=1000) #buffer_size determins uniformity of the shuffle dataset_test = dataset_test.batch(BATCH_SIZE) dataset_test = dataset_test.prefetch(buffer_size=AUTOTUNE) iterator = tf.data.Iterator.from_structure(dataset_train.output_types, dataset_test.output_shapes) X_BATCH , Y_BATCH, Z_BATCH = iterator.get_next() training_init_op = iterator.make_initializer(dataset_train) validation_init_op = iterator.make_initializer(dataset_test) X_BATCH = tf.identity(X_BATCH, "images_input") Y_BATCH = tf.identity(Y_BATCH, "labels_input") Z_BATCH = tf.identity(Z_BATCH, "masks_input") network = FCN_Model(X_BATCH, Z_BATCH) #X_BATCH is images Y_BATCH is labels #network.x = tf.identity(network.x, "kee_rate_input") output_op = tf.get_default_graph().get_operation_by_name("Softmax") #_ = tf.identity(output_op.outputs[0], "Softmax_output") NUM_EPOCHS = 2000 learning_rate = 5e-4 optimizer = tf.train.AdamOptimizer(learning_rate).minimize(network.loss) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=500) config = ConfigProto() config.gpu_options.allow_growth = True sess = InteractiveSession(config=config) with sess.graph.as_default(): sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.local_variables_initializer()) saver.restore(sess, "model1322") print("Training started") for epoch in range(NUM_EPOCHS): #random.shuffle(train_meta_data) #random.shuffle(test_meta_data) #Train epoch #Set to use train data sess.run(training_init_op) step = 0 train_losses = [] train_accuracies = [] allLabels = [] try: while True: softmax, images, masks, train_loss, _, pred, accuracy = sess.run([network.softmax, network.original_image, network.mask, network.loss, optimizer, network.predictions, network.accuracy]) train_losses.append(train_loss) train_accuracies.append(accuracy[0]) print("Train accuracy =", "%.3f" % accuracy[0], "Train loss =", train_loss, end='\r') step += 1 except tf.errors.OutOfRangeError: #This looks like a hack but this is the correct way to tell when an epoch is complete pass print("Epoch" , epoch, "complete. Train accuracy for epoch was", np.mean(train_accuracies), "train loss was", np.mean(train_losses)) #Test epoch #Set to use test_data sess.run(validation_init_op) test_accuracies = [] try: while True: pred, softmax, images, masks, test_loss, accuracy = sess.run([network.predictions, network.softmax, network.original_image, network.mask, network.loss, network.accuracy]) print("Test accuracy =", accuracy[0], end='\r') test_accuracies.append(accuracy[0]) cv2.imshow("prediction", np.squeeze(pred[0]).astype(np.uint8)*255) cv2.imshow("softmax", (np.squeeze(softmax[0][:,:,1])*255).astype(np.uint8)) cv2.imshow("mask", np.squeeze(masks[0]).astype(np.uint8)*255) cv2.imshow("image", np.squeeze(images[0]).astype(np.uint8)[...,::-1]) cv2.waitKey(1) print("Test accuracy =", "%.3f" % accuracy[0], "Test loss =", test_loss, end='\r') except tf.errors.OutOfRangeError: pass print("Test accuracy after epoch was", np.mean(test_accuracies)) saver.save(sess, "model1322") freeze.create_pb("model1322") print("Checkpoint saved and pb created.")