def freeze_slots(self, features): assert not self._frozen, "Already finalized" if self._config_run: raise ConfigRunError() self._sparse_v2opt = {} bias_config = self._get_bias_slot_configs() if bias_config: bias_weights = self._bias_embedding.weights for i, opt in enumerate(bias_config['optimizers']): for j in range(self._num_shards): self._sparse_v2opt[bias_weights[i][j]] = opt vec_config = self._get_vec_slot_configs() if vec_config: vec_weights = self._vec_embedding.weights for i, opt in enumerate(vec_config['optimizers']): for j in range(self._num_shards): self._sparse_v2opt[vec_weights[i][j]] = opt placeholders = [] dims = [] for slot_id, _, _, _ in vec_config['slot_list']: fc = self._feature_column_v1s[slot_id] for sslice in fc.feature_slot.feature_slices: dims.append(sslice.len) placeholders.append(fc.get_vector(sslice)) vec_split = tf.split(self._vec_tensor, dims, axis=1) ge.swap_ts(vec_split, placeholders) for slot in self._feature_slots.values(): slot._frozen = True self._frozen = True
def edit_graph(self, graph): sgv0 = [as_tftensor(x) for x in self._placeholders] sgv1 = self.dequeue_op sgv2 = [ tf.cond(self.queue_cond, lambda: x, lambda: y) for x, y in zip(sgv1, sgv0) ] graph_editor.swap_ts(sgv0, sgv2, cannot_modify=[self.enqueue_op] + [x.op for x in sgv2])
def main(): """ Run the script from repository root directory """ args = doParsing() print(args) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess: metagraphPath, checkpointPath = getModelPaths(args.modelDir, args.checkpointStep) print("Loading metagraph") saver = tf.train.import_meta_graph(metagraphPath) print("Restoring model") restored = saver.restore(sess, checkpointPath) print("Checkpoint loaded") # Optional save of tensorboard to see tensor names if args.tensorboardDir is not None: train_writer = tf.summary.FileWriter(args.tensorboardDir) train_writer.add_graph(sess.graph) print("Removing random and adding an input placeholder") randomTensor = sess.graph.get_tensor_by_name("random_uniform" + ":0") explicitInput = tf.placeholder(shape=randomTensor.shape, name="input", dtype=tf.float32) ge.swap_ts(randomTensor, explicitInput) # Save metagraph tf.train.write_graph(sess.graph.as_graph_def(), "", os.path.join(args.frozenModelDir, "model_graph.pb"), False) print("Metagraph saved") # Freeze graph (graphdef plus parameters), # this includes in the graph only the layers needed to provide the output_node_names print("Freezing graph...") freeze_graph(input_graph=args.frozenModelDir + "/model_graph.pb", input_saver="", input_binary=True, input_checkpoint=checkpointPath, output_node_names="Tanh", restore_op_name="save/restore_all", filename_tensor_name="save/Const:0", output_graph=args.frozenModelDir + "/graph.pb", clear_devices=True, initializer_nodes="")
def test_swap(self): ge.swap_ts([self.a0, self.b0], [self.a1, self.b1]) self.assertTrue(match.OpMatcher("c0").input_ops("a1", "b1")(self.c0.op)) self.assertTrue(match.OpMatcher("c1").input_ops("a0", "b0")(self.c1.op))
def train(self, X_train, Y_train, X_val, Y_val, max_epochs, batch_size, learning_rate_init, reg_param=0, learning_rate_decay_type='inverse', learning_rate_decay_parameter=10, early_stopping=True, save_path='./UNet', reset_parameters=False, check_val_every_n_batches=5, seed=0, data_on_GPU=True): ''' Trains the network on the given data, provided as numpy arrays. It is assumed all preprocessing has already been done, including shuffling and splitting of the data into training/validation sets. ''' # (Re)load base graph from file print('Loading graph...') saver = self.load_graph(save_path) with self.G.as_default(): print('Inserting data augmentation operations...') # Load data onto GPU, replace the input placeholder with an index into the data on the GPU (if applicable) m_train, height, width, n_channels = X_train.shape m_val = X_val.shape[0] m = m_train + m_val n_classes = Y_train.shape[-1] if data_on_GPU: X_train_t = tf.constant(X_train, dtype=tf.uint8) X_val_t = tf.constant(X_val, dtype=tf.uint8) Y_train_t = tf.constant(Y_train, dtype=tf.bool) Y_val_t = tf.constant(Y_val, dtype=tf.bool) del X_train, X_val, Y_train, Y_val train_idx = tf.placeholder_with_default([0], shape=[None]) X_train_t = tf.gather(X_train_t, train_idx, axis=0) Y_train_t = tf.gather(Y_train_t, train_idx, axis=0) else: X_train_t = tf.placeholder_with_default(np.zeros( [0, height, width, n_channels], dtype=np.uint8), shape=self.input.shape, name='X_train_input') X_val_t = tf.placeholder_with_default(np.zeros( [0, height, width, n_channels], dtype=np.uint8), shape=self.input.shape, name='X_val_input') Y_train_t = tf.placeholder_with_default( np.zeros([0, height, width, n_classes], dtype=np.bool), shape=self.labels.shape, name='Y_train_input') Y_val_t = tf.placeholder_with_default(np.zeros( [0, height, width, n_classes], dtype=np.bool), shape=self.labels.shape, name='Y_val_input') # Insert data augmentation steps to graph train_or_val_idx = tf.placeholder(dtype=tf.int32, shape=[None]) X_train_aug, Y_train_aug = self.data_augmentation( X_train_t, Y_train_t) X = tf.cast( tf.gather(tf.concat([X_train_aug, X_val_t], axis=0), train_or_val_idx), tf.float32) Y = tf.cast( tf.gather(tf.concat([Y_train_aug, Y_val_t], axis=0), train_or_val_idx), tf.float32) ge.swap_ts([X, Y], [self.input, self.labels]) # Use X and Y from now on! # Write to log file with open(str(save_path) + '.log', 'w+') as fo: fo.write('Training log\n\n') fo.write('Dataset metrics:\n') fo.write('Training data shape: {}\n'.format(X_train.shape)) fo.write('Validation set size: {}\n'.format(m_val)) # fo.write('X_mean: {}\n'.format(X_mean)) # fo.write('X_std: {}\n\n'.format(X_std)) fo.write('Hyperparameters:\n') fo.write('Batch size: {}\n'.format(batch_size)) fo.write('Learning rate: {}\n'.format(learning_rate_init)) fo.write('Learning rate annealed every N epochs: {}\n'.format( learning_rate_decay_parameter)) fo.write('Learning rate anneal type: {}\n'.format( learning_rate_decay_type)) # fo.write('Stepped anneal: {}\n'.format(STEPPED_ANNEAL)) # fo.write('Regularization type: {}\n'.format(REGULARIZATION_TYPE)) fo.write('Regularization parameter: {}\n'.format(reg_param)) # fo.write('Input noise variance: {:.2f}\n'.format(INPUT_NOISE_MAGNITUDE**2)) # fo.write('Weight noise variance: {:.2f}\n'.format(WEIGHT_NOISE_MAGNITUDE**2)) # for n in range(1,len(KEEP_PROB)+1): # fo.write('Dropout keep prob. group {}: {:.2f}\n'.format(n, KEEP_PROB[n])) fo.write('Logging frequency: {} global steps\n'.format( check_val_every_n_batches)) fo.write('Random seed: {}\n'.format(seed)) # Initialize control flow variables and logs # max_val_accuracy = -1 # max_val_conf = -1 best_val_loss = np.inf global_step = 0 # with open(str(save_path)+'_val_accuracy.log', 'w+') as fo: # fo.write('') with open(str(save_path) + '_val_loss.log', 'w+') as fo: fo.write('') # with open(str(save_path)+'_val_confidence.log', 'w+') as fo: # fo.write('') # with open(str(save_path)+'_train_accuracy.log', 'w+') as fo: # fo.write('') with open(str(save_path) + '_train_loss.log', 'w+') as fo: fo.write('') # with open(str(save_path)+'_train_confidence.log', 'w+') as fo: # fo.write('') with open(str(save_path) + '_learning_rate.log', 'w+') as fo: fo.write('') # Start tensorflow session, reset_parameters or reload checkpoint print('Starting tensorflow session...') with tf.Session() as sess: if reset_parameters: saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) else: saver.restore(sess, save_path) uninitialized_vars = [] for var in tf.global_variables(): try: sess.run(var) except tf.errors.FailedPreconditionError: uninitialized_vars.append(var) sess.run(tf.variables_initializer(uninitialized_vars)) # Iterate over training epochs best_val_loss = np.inf for epoch in range(max_epochs): if learning_rate_decay_type == 'inverse': learning_rate = learning_rate_init / ( 1 + epoch / learning_rate_decay_parameter) elif learning_rate_decay_type == 'constant': learning_rate = learning_rate_init elif learning_rate_decay_type == 'exponential': learning_rate = learning_rate_init * np.exp( -epoch / learning_rate_decay_parameter) else: raise Exception('Unknown learning rate decay function') # Iterate over batches: n_batches = math.ceil(m_train / batch_size) for b in range(n_batches): train_idx_i = b * batch_size train_idx_f = min((b + 1) * batch_size, m_train) if data_on_GPU: feed_dict = { train_idx: range(train_idx_i, train_idx_f + 1), train_or_val_idx: range(train_idx_f - train_idx_i), self.learning_rate: learning_rate, self.reg_param: reg_param } else: feed_dict = { X_train_t: X_train[train_idx_i:train_idx_f], Y_train_t: Y_train[train_idx_i:train_idx_f], train_or_val_idx: range(train_idx_f - train_idx_i), self.learning_rate: learning_rate, self.reg_param: reg_param } train_loss, _ = sess.run([self.loss, self.train_op], feed_dict=feed_dict) print('Epoch {}, batch {}/{}: loss={:.3e}'.format( epoch + 1, b, n_batches, train_loss)) if np.isnan(train_loss) or np.isinf(train_loss): print('Detected nan, exiting training') quit() exit() break if (global_step % check_val_every_n_batches) == 0: if data_on_GPU: feed_dict = { train_or_val_idx: range(1, m_val + 1), self.reg_param: reg_param } else: feed_dict = { X_val_t: X_val, Y_val_t: Y_val, train_or_val_idx: range(m_val), self.reg_param: reg_param } val_loss = sess.run(self.loss, feed_dict=feed_dict) if early_stopping and (val_loss < best_val_loss): best_val_loss = val_loss print( 'New best validation loss: {:.3e}! Saving...' .format(val_loss)) saver.save(sess, save_path, write_meta_graph=False) # Write to logs everytime validation set run with open(str(save_path) + '_train_loss.log', 'a') as fo: fo.write(str(train_loss) + '\n') # with open(str(save_path)+'_train_accuracy.log', 'a') as fo: # fo.write(str(train_accuracy)+'\n') # with open(str(save_path)+'_train_confidence.log', 'a') as fo: # fo.write(str(train_conf)+'\n') # with open(str(save_path)+'_val_accuracy.log', 'a') as fo: # fo.write(str(val_accuracy)+'\n') with open(str(save_path) + '_val_loss.log', 'a') as fo: fo.write(str(val_loss) + '\n') # with open(str(save_path)+'_val_confidence.log', 'a') as fo: # fo.write(str(val_conf)+'\n') with open( str(save_path) + '_learning_rate.log', 'a') as fo: fo.write(str(learning_rate) + '\n') u.plot_metrics(str(save_path)) global_step += 1
def train(self, X_train, Y_train, X_val, Y_val, max_epochs, batch_size, learning_rate_init, reg_param=0, learning_rate_decay_type='inverse', learning_rate_decay_parameter=10, keep_prob=[], early_stopping=True, save_path=Path('./UNet'), reset_parameters=False, val_checks_per_epoch=10, seed=None, data_on_GPU=True): ''' Trains the network on the given data, provided as numpy arrays. It is assumed all preprocessing has already been done, including shuffling and splitting of the data into training/validation sets. ''' assert type(save_path) == type(Path('.')), 'save_path needs to be a pathlib Path' check_val_every_n_batches = math.ceil(X_train.shape[0]/batch_size/val_checks_per_epoch) # (Re)load base graph from file print('Loading graph...') saver = self.load_graph(save_path) with self.G.as_default(): # Set the seed if seed is None: seed = int(time.time()) tf.set_random_seed(seed) np.random.seed(seed) print('Inserting data augmentation operations...') # Get dataset size/statistics m_train, height, width, n_channels = X_train.shape m_val = X_val.shape[0] m = m_train + m_val n_classes = Y_train.shape[-1] X_train_mean = np.mean(X_train) X_val_mean = np.mean(X_val) X_mean = ((m_train*X_train_mean + m_val*X_val_mean)/m) X_train_var = u.var(X_train, X_mean) X_val_var = u.var(X_val, X_mean) X_std = np.sqrt((m_train*X_train_var + m_val*X_val_var)/m) # Load data onto GPU, replace the input placeholder with an index into the data on the GPU (if applicable) if data_on_GPU: print('Loading X_train to GPU...') X_train_t = tf.constant(X_train, dtype=tf.uint8) del X_train print('Loading X_val to GPU...') X_val_t = tf.constant(X_val, dtype=tf.uint8) del X_val print('Loading Y_train to GPU...') Y_train_t = tf.constant(Y_train, dtype=tf.bool) del Y_train print('Loading Y_val to GPU...') Y_val_t = tf.constant(Y_val, dtype=tf.bool) del Y_val train_idx = tf.placeholder_with_default([0], shape=[None]) X_train_t = tf.gather(X_train_t, train_idx, axis=0) Y_train_t = tf.gather(Y_train_t, train_idx, axis=0) else: X_train_t = tf.placeholder_with_default(np.zeros([0,height,width,n_channels], dtype=np.uint8), shape=self.input.shape, name='X_train_input') X_val_t = tf.placeholder_with_default(np.zeros([0,height,width,n_channels], dtype=np.uint8), shape=self.input.shape, name='X_val_input') Y_train_t = tf.placeholder_with_default(np.zeros([0,height,width,n_classes], dtype=np.bool), shape=self.labels.shape, name='Y_train_input') Y_val_t = tf.placeholder_with_default(np.zeros([0,height,width,n_classes], dtype=np.bool), shape=self.labels.shape, name='Y_val_input') # Insert data augmentation steps to graph train_or_val_idx = tf.placeholder(dtype=tf.int32, shape=[None]) X_train_aug, Y_train_aug, X_val_aug, Y_val_aug = self.data_augmentation(X_train_t, Y_train_t, X_val_t, Y_val_t) X = (tf.cast(tf.gather(tf.concat([X_train_aug, X_val_aug], axis=0), train_or_val_idx), tf.float32) - X_mean)/X_std Y = tf.cast(tf.gather(tf.concat([Y_train_aug, Y_val_aug], axis=0), train_or_val_idx), tf.float32) ge.swap_ts([X, Y], [self.input, self.labels]) # Use X and Y from now on! # Add metrics prob = tf.sigmoid(self.output[:,:,:,0]) Y_bool = tf.cast(Y[:,:,:,0], bool) is_over_thresh = (prob>0.5) is_equal = tf.equal(is_over_thresh, Y_bool) intersection = tf.reduce_sum(tf.cast(tf.logical_and(is_over_thresh, Y_bool), tf.float32)) union = tf.reduce_sum(tf.cast(tf.logical_or(is_over_thresh, Y_bool), tf.float32)) acc = tf.reduce_mean(tf.cast(is_equal, tf.float32)) conf = tf.reduce_mean(2*tf.abs(prob-0.5)*tf.cast(is_equal, tf.float32)) IOU = intersection/union # Write to log file with open(str(save_path)+'.log', 'w+') as fo: fo.write('Training log\n\n') fo.write('Dataset metrics:\n') fo.write('Training data shape: {}\n'.format(X_train.shape)) fo.write('Validation set size: {}\n'.format(m_val)) fo.write('X_mean: {}\n'.format(X_mean)) fo.write('X_std: {}\n\n'.format(X_std)) fo.write('Hyperparameters:\n') fo.write('Batch size: {}\n'.format(batch_size)) fo.write('Learning rate: {}\n'.format(learning_rate_init)) fo.write('Learning rate decay parameter: {}\n'.format(learning_rate_decay_parameter)) fo.write('Learning rate decay type: {}\n'.format(learning_rate_decay_type)) # fo.write('Stepped anneal: {}\n'.format(STEPPED_ANNEAL)) # fo.write('Regularization type: {}\n'.format(REGULARIZATION_TYPE)) fo.write('Regularization parameter: {}\n'.format(reg_param)) # fo.write('Input noise variance: {:.2f}\n'.format(INPUT_NOISE_MAGNITUDE**2)) # fo.write('Weight noise variance: {:.2f}\n'.format(WEIGHT_NOISE_MAGNITUDE**2)) for n in range(len(keep_prob)): fo.write('Dropout keep prob. group {}: {:.2f}\n'.format(n, keep_prob[n])) fo.write('Logging frequency: {} global steps\n'.format(check_val_every_n_batches)) fo.write('Random seed: {}\n'.format(seed)) # Initialize control flow variables and logs # best_val_accuracy = 0 best_val_conf = 0 # best_val_loss = np.inf global_step = 0 if reset_parameters: io_mode = 'w+' else: io_mode = 'a+' with open(str(save_path)+'_val_accuracy.log', io_mode) as fo: fo.write('') with open(str(save_path)+'_val_loss.log', io_mode) as fo: fo.write('') with open(str(save_path)+'_val_confidence.log', io_mode) as fo: fo.write('') with open(str(save_path)+'_val_IOU.log', io_mode) as fo: fo.write('') with open(str(save_path)+'_train_accuracy.log', io_mode) as fo: fo.write('') with open(str(save_path)+'_train_loss.log', io_mode) as fo: fo.write('') with open(str(save_path)+'_train_confidence.log', io_mode) as fo: fo.write('') with open(str(save_path)+'_train_IOU.log', io_mode) as fo: fo.write('') with open(str(save_path)+'_learning_rate.log', io_mode) as fo: fo.write('') # Start tensorflow session, reset_parameters or reload checkpoint print('Starting tensorflow session...') with tf.Session() as sess: if reset_parameters: # saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) else: try: saver.restore(sess, save_path) except: # saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) uninitialized_vars = [] for var in tf.global_variables(): try: sess.run(var) except tf.errors.FailedPreconditionError: uninitialized_vars.append(var) sess.run(tf.variables_initializer(uninitialized_vars)) # Iterate over training epochs for epoch in range(max_epochs): if learning_rate_decay_type == 'inverse': learning_rate = learning_rate_init/(1+epoch/learning_rate_decay_parameter) elif learning_rate_decay_type == 'constant': learning_rate = learning_rate_init elif learning_rate_decay_type == 'exponential': learning_rate = learning_rate_init*np.exp(-epoch/learning_rate_decay_parameter) else: raise Exception('Unknown learning rate decay function') # Iterate over batches: n_batches = int(math.ceil(m_train/batch_size)) for b in range(n_batches): train_idx_i = b*batch_size train_idx_f = min((b+1)*batch_size, m_train) if data_on_GPU: feed_dict = {train_idx:range(train_idx_i, train_idx_f+1), train_or_val_idx:range(train_idx_f-train_idx_i), self.learning_rate:learning_rate, self.reg_param:reg_param} else: feed_dict = {X_train_t:X_train[train_idx_i:train_idx_f], Y_train_t:Y_train[train_idx_i:train_idx_f], train_or_val_idx:range(train_idx_f-train_idx_i), self.learning_rate:learning_rate, self.reg_param:reg_param} feed_dict = {**feed_dict, **{self.keep_prob_dict[i]:kp for i, kp in enumerate(keep_prob)}} train_loss, train_acc, train_conf, train_IOU, _ = sess.run([self.loss, acc, conf, IOU, self.train_op], feed_dict=feed_dict) print('Epoch {}, batch {}/{}: loss={:.3e}, acc={:.3f}, IOU={:.3f}'.format(epoch+1, b, n_batches, train_loss, train_acc, train_IOU)) if np.isnan(train_loss) or np.isinf(train_loss): print('Detected nan, exiting training') quit() exit() break if ((global_step % check_val_every_n_batches) == 0) and (global_step != 0): if data_on_GPU: feed_dict = {train_or_val_idx:range(1,m_val+1), self.reg_param:reg_param} else: feed_dict = {X_val_t:X_val, Y_val_t:Y_val, train_or_val_idx:range(m_val), self.reg_param:reg_param} val_loss, val_acc, val_conf, val_IOU = sess.run([self.loss, acc, conf, IOU], feed_dict=feed_dict) print('Validation set: loss={:.3e}, acc={:.3f}, IOU={:.3f}'.format(val_loss, val_acc, val_IOU)) if early_stopping and (val_conf > best_val_conf): best_val_conf = val_conf print('New best validation confidence: {:.3e}! Saving...'.format(val_conf)) saver.save(sess, str(save_path), write_meta_graph=False) # Write to logs everytime validation set is run with open(str(save_path)+'_train_loss.log', 'a') as fo: fo.write(str(train_loss)+'\n') with open(str(save_path)+'_train_accuracy.log', 'a') as fo: fo.write(str(train_acc)+'\n') with open(str(save_path)+'_train_confidence.log', 'a') as fo: fo.write(str(train_conf)+'\n') with open(str(save_path)+'_train_IOU.log', 'a') as fo: fo.write(str(train_IOU)+'\n') with open(str(save_path)+'_val_accuracy.log', 'a') as fo: fo.write(str(val_acc)+'\n') with open(str(save_path)+'_val_loss.log', 'a') as fo: fo.write(str(val_loss)+'\n') with open(str(save_path)+'_val_confidence.log', 'a') as fo: fo.write(str(val_conf)+'\n') with open(str(save_path)+'_val_IOU.log', 'a') as fo: fo.write(str(val_IOU)+'\n') with open(str(save_path)+'_learning_rate.log', 'a') as fo: fo.write(str(learning_rate)+'\n') # Plot metrics (actually, I can just run this manually) # u.plot_metrics(str(save_path)) # Iterate global step global_step += 1 # Save if not using early stopping if not early_stopping: saver.save(sess, str(save_path), write_meta_graph=False) # Plot an example of how the algorithm is doing on the task if data_on_GPU: pass else: x = X_val[0] feed_dict = {X_val_t:np.expand_dims(x, axis=0), Y_val_t:np.expand_dims(Y_val[0], axis=0), train_or_val_idx:range(1), self.reg_param:reg_param} x, y = sess.run([X_val_aug, prob], feed_dict=feed_dict).squeeze() plt.ioff() plt.figure('Progress pic') plt.clf() plt.imshow((x-np.min(x))/(np.max(x)-np.min(x))) plt.imshow(y, alpha=0.4) (save_path.parent/'ProgressPics').mkdir(exist_ok=True) plt.savefig(str(save_path.parent)+'/ProgressPics/ProgressPic{}.png'.format(epoch))
def test_swap(self): ge.swap_ts([self.a0, self.b0], [self.a1, self.b1]) self.assertTrue( match.OpMatcher("c0").input_ops("a1", "b1")(self.c0.op)) self.assertTrue( match.OpMatcher("c1").input_ops("a0", "b0")(self.c1.op))
def edit_graph(self, graph): sgv0 = [as_tftensor(x) for x in self._placeholders] sgv1 = self.dequeue_op graph_editor.swap_ts(sgv0, sgv1, can_modify=self._editable_operations)