def play(args): # Create environment env = gym.make(args.env) num_actions = env.action_space.n state_buf = StateBuffer(args) # Define input placeholders state_ph = tf.placeholder( tf.uint8, (None, args.frame_height, args.frame_width, args.frames_per_state)) # Instantiate DQN network DQN = DeepQNetwork(num_actions, state_ph, scope='DQN_main') DQN_predict_op = DQN.predict() # Create session config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(config=config) # Load ckpt file loader = tf.train.Saver() if args.ckpt_file is not None: ckpt = args.ckpt_dir + '/' + args.ckpt_file else: ckpt = tf.train.latest_checkpoint(args.ckpt_dir) loader.restore(sess, ckpt) print('%s restored.\n\n' % ckpt) for ep in range(0, args.num_eps): # Reset environment and state buffer for next episode reset_env_and_state_buffer(env, state_buf, args) step = 0 ep_done = False initial_steps = np.random.randint(1, args.max_initial_random_steps + 1) while not ep_done: time.sleep(0.05) env.render() # Choose random action for initial steps to ensure every episode has a random start point. Then choose action with highest Q-value according to network's current policy. if step < initial_steps: action = env.action_space.sample() else: state = np.expand_dims(state_buf.get_state(), 0) action = sess.run(DQN_predict_op, {state_ph: state}) frame, _, ep_terminal, _ = env.step(action) frame = preprocess_image(frame, args.frame_width, args.frame_height) state_buf.add(frame) step += 1 # Episode can finish either by reaching terminal state or max episode steps if ep_terminal or step == args.max_ep_length: ep_done = True
def __getitem__(self, index): # get entry from dataframe image_id, labels_string = self.dataframe.values[index] #print("In dataset {}".format(labels_string)) # process entry from dataframe in order to extract annotations # coords = str2coords(labels_string) # print(coords) # print(image_id) # get image coordinates # x_coords, y_coords = get_img_coords(labels_string, self.camera_matrix) # print(x_coords) # print(y_coords) # open image img0 = cv2.imread(self.imgs_path + image_id + ".jpg") img = preprocess_image(img0) # do we need to convert from HWC to CHW because of opencv??? MAJOR ISSUE!!! img = np.rollaxis(img, 2, 0) mask, regr = get_mask_and_regr(img0, labels_string, self.camera_matrix) #print("Regression shape {}".format(regr.shape)) # convert from HWC to CHW regr = np.rollaxis(regr, 2, 0) #print("Regression shape after rollaxis {}".format(regr.shape)) # print(mask.shape) # print(regr.shape) # display image - just for testing # w, h, _ = img.shape # for x, y in zip(x_coords, y_coords): # cv2.circle(img0, (int(x), int(y)), 10, (0, 0, 255), -1) #img0 = visualize(img0, coords, self.camera_matrix) #img0 = cv2.resize(img0, (int(0.2 * h), int(0.2 * w))) # cv2.imshow("asdas", img) # cv2.imwrite("de_test.jpg", img) # cv2.imshow("mask", mask) # cv2.imshow("regr", regr[:,:,0]) # cv2.waitKey(0) # cv2.destroyAllWindows() # we need to return x, y, z, yaw, pitch, roll values/heatmap and regression heatmap return img, mask, regr
def predict_video(sess, video_file, video_out_file): video_in = imageio.get_reader(video_file) frames = [] for i, image in enumerate(tqdm(video_in)): imageio.imwrite("data/cache.jpg", image) image_in, image_data = preprocess_image("data/cache.jpg", model_image_size=(608, 608), image_shape=(720, 1280), type="video") out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={ yolo_model.input: image_data, K.learning_phase(): 0 }) colors = generate_colors(class_names) draw_boxes(image_in, out_scores, out_boxes, out_classes, class_names, colors) image_in.save(os.path.join("data", "cache_out.jpg"), quality=90) frames.append(imageio.imread("data/cache_out.jpg")) imageio.mimsave(video_out_file, frames)
def predict_image(sess, image_in_file, image_out_file): image, image_data = preprocess_image(image_in_file, model_image_size=(608, 608)) out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={ yolo_model.input: image_data, K.learning_phase(): 0 }) print('Found {} boxes for {}'.format(len(out_boxes), image_in_file)) # Generate colors for drawing bounding boxes. colors = generate_colors(class_names) # Draw bounding boxes on the image file draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors) # Save the predicted bounding box on the image image.save(image_out_file, quality=90) # Display the results in the notebook output_image = scipy.misc.imread(image_out_file) imshow(output_image) pylab.show() return out_scores, out_boxes, out_classes
def test(args): # Create environment env = gym.make(args.env) num_actions = env.action_space.n # Set random seeds for reproducability env.seed(args.random_seed) np.random.seed(args.random_seed) tf.set_random_seed(args.random_seed) # Initialise state buffer state_buf = StateBuffer(args) # Define input placeholders state_ph = tf.placeholder( tf.uint8, (None, args.frame_height, args.frame_width, args.frames_per_state)) # Instantiate DQN network DQN = DeepQNetwork(num_actions, state_ph, scope='DQN_main') DQN_predict_op = DQN.predict() # Create session config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(config=config) # Load ckpt file loader = tf.train.Saver() if args.ckpt_file is not None: ckpt = args.ckpt_dir + '/' + args.ckpt_file else: ckpt = tf.train.latest_checkpoint(args.ckpt_dir) loader.restore(sess, ckpt) sys.stdout.write('%s restored.\n\n' % ckpt) sys.stdout.flush() ckpt_split = ckpt.split('-') train_ep = ckpt_split[-1] # Create summary writer to write summaries to disk if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) summary_writer = tf.summary.FileWriter(args.log_dir, sess.graph) # Create summary op to save episode reward to Tensorboard log reward_var = tf.Variable(0.0, trainable=False) tf.summary.scalar("Average Test Reward", reward_var) summary_op = tf.summary.merge_all() ## Begin testing env.reset() rewards = [] for test_ep in range(args.num_eps_test): # Reset environment and state buffer for next episode reset_env_and_state_buffer(env, state_buf, args) ep_reward = 0 step = 0 ep_done = False initial_steps = np.random.randint(1, args.max_initial_random_steps + 1) sys.stdout.write('\n') sys.stdout.flush() while not ep_done: if args.render: env.render() else: env.render(mode='rgb_array') #Choose random action for initial steps to ensure every episode has a random start point. Then choose action with highest Q-value according to network's current policy. if step < initial_steps: test_action = env.action_space.sample() else: test_state = np.expand_dims(state_buf.get_state(), 0) test_action = sess.run(DQN_predict_op, {state_ph: test_state}) test_frame, test_reward, test_ep_terminal, _ = env.step( test_action) test_frame = preprocess_image(test_frame, args.frame_width, args.frame_height) state_buf.add(test_frame) ep_reward += test_reward step += 1 sys.stdout.write( '\x1b[2K\rTest episode {:d}/{:d} \t Steps = {:d} \t Reward = {:.2f}' .format(test_ep, args.num_eps_test, step, ep_reward)) sys.stdout.flush() # Episode can finish either by reaching terminal state or max episode steps if test_ep_terminal or step == args.max_ep_length: rewards.append(ep_reward) ep_done = True mean_reward = np.mean(rewards) error_reward = ss.sem(rewards) sys.stdout.write( '\n\nTesting complete \t Average reward = {:.2f} +/- {:.2f} /ep \n\n'. format(mean_reward, error_reward)) sys.stdout.flush() # Log average episode reward for Tensorboard visualisation summary_str = sess.run(summary_op, {reward_var: mean_reward}) summary_writer.add_summary(summary_str, train_ep) # Write results to file if args.results_file is not None: if not os.path.exists(args.results_dir): os.makedirs(args.results_dir) output_file = open(args.results_dir + '/' + args.results_file, 'a') output_file.write( 'Training Episode {}: \t Average reward = {:.2f} +/- {:.2f} /ep \n\n' .format(train_ep, mean_reward, error_reward)) output_file.flush() sys.stdout.write('Results saved to file \n\n') sys.stdout.flush() env.close()
def train(args): # Function to return exploration rate based on current step def exploration_rate(current_step, exp_rate_start, exp_rate_end, exp_step_end): if current_step < exp_step_end: exploration_rate = current_step * ( (exp_rate_end - exp_rate_start) / (float(exp_step_end))) + 1 else: exploration_rate = exp_rate_end return exploration_rate # Function to update target network parameters with main network parameters def update_target_network(from_scope, to_scope): from_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, from_scope) to_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, to_scope) op_holder = [] # Update old network parameters with new network parameters for from_var, to_var in zip(from_vars, to_vars): op_holder.append(to_var.assign(from_var)) return op_holder # Create environment env = gym.make(args.env) num_actions = env.action_space.n # Initialise replay memory and state buffer replay_mem = ReplayMemory(args) state_buf = StateBuffer(args) # Define input placeholders state_ph = tf.placeholder( tf.uint8, (None, args.frame_height, args.frame_width, args.frames_per_state)) action_ph = tf.placeholder(tf.int32, (None)) target_ph = tf.placeholder(tf.float32, (None)) # Instantiate DQN network DQN = DeepQNetwork( num_actions, state_ph, action_ph, target_ph, args.learning_rate, scope='DQN_main' ) # Note: One scope cannot be the prefix of another scope (e.g. cannot name this scope 'DQN' and # target network scope 'DQN_target', as a search for vars in 'DQN' scope will return both networks' vars) DQN_predict_op = DQN.predict() DQN_train_step_op = DQN.train_step() # Instantiate DQN target network DQN_target = DeepQNetwork(num_actions, state_ph, scope='DQN_target') update_target_op = update_target_network('DQN_main', 'DQN_target') # Create session config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(config=config) # Add summaries for Tensorboard visualisation tf.summary.scalar('Loss', DQN.loss) reward_var = tf.Variable(0.0, trainable=False) tf.summary.scalar("Episode Reward", reward_var) epsilon_var = tf.Variable(args.epsilon_start, trainable=False) tf.summary.scalar("Exploration Rate", epsilon_var) summary_op = tf.summary.merge_all() # Define saver for saving model ckpts model_name = 'model.ckpt' checkpoint_path = os.path.join(args.ckpt_dir, model_name) if not os.path.exists(args.ckpt_dir): os.makedirs(args.ckpt_dir) saver = tf.train.Saver(max_to_keep=201) # Create summary writer to write summaries to disk if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) summary_writer = tf.summary.FileWriter(args.log_dir, sess.graph) # Load ckpt file if given if args.ckpt_file is not None: loader = tf.train.Saver() #Restore all variables from ckpt ckpt = args.ckpt_dir + '/' + args.ckpt_file ckpt_split = ckpt.split('-') step_str = ckpt_split[-1] start_step = int(step_str) loader.restore(sess, ckpt) else: start_step = 0 sess.run(tf.global_variables_initializer()) sess.run(update_target_op) ## Begin training env.reset() ep_steps = 0 episode_reward = 0 episode_rewards = [] duration_values = [] # Initially populate replay memory by taking random actions sys.stdout.write('\nPopulating replay memory with random actions...\n') sys.stdout.flush() for random_step in range(1, args.initial_replay_mem_size + 1): if args.render: env.render() else: env.render(mode='rgb_array') action = env.action_space.sample() frame, reward, terminal, _ = env.step(action) frame = preprocess_image(frame, args.frame_width, args.frame_height) replay_mem.add(action, reward, frame, terminal) if terminal: env.reset() sys.stdout.write('\x1b[2K\rStep {:d}/{:d}'.format( random_step, args.initial_replay_mem_size)) sys.stdout.flush() # Begin training process reset_env_and_state_buffer(env, state_buf, args) sys.stdout.write('\n\nTraining...\n\n') sys.stdout.flush() for train_step in range(start_step + 1, args.num_steps_train + 1): start_time = time.time() # Run 'train_frequency' iterations in the game for every training step for _ in range(0, args.train_frequency): ep_steps += 1 if args.render: env.render() else: env.render(mode='rgb_array') # Use an epsilon-greedy policy to select action epsilon = exploration_rate(train_step, args.epsilon_start, args.epsilon_end, args.epsilon_step_end) if random.random() < epsilon: #Choose random action action = env.action_space.sample() else: #Choose action with highest Q-value according to network's current policy current_state = np.expand_dims(state_buf.get_state(), 0) action = sess.run(DQN_predict_op, {state_ph: current_state}) # Take action and store experience frame, reward, terminal, _ = env.step(action) frame = preprocess_image(frame, args.frame_width, args.frame_height) state_buf.add(frame) replay_mem.add(action, reward, frame, terminal) episode_reward += reward if terminal or ep_steps == args.max_ep_steps: # Collect total reward of episode episode_rewards.append(episode_reward) # Reset episode reward and episode steps counters episode_reward = 0 ep_steps = 0 # Reset environment and state buffer for next episode reset_env_and_state_buffer(env, state_buf, args) ## Training step # Get minibatch from replay mem states_batch, actions_batch, rewards_batch, next_states_batch, terminals_batch = replay_mem.getMinibatch( ) # Calculate target by passing next states through the target network and finding max future Q future_Q = sess.run(DQN_target.output, {state_ph: next_states_batch}) max_future_Q = np.max(future_Q, axis=1) # Q values of the terminal states is 0 by definition max_future_Q[terminals_batch] = 0 targets = rewards_batch + (max_future_Q * args.discount_rate) # Execute training step if train_step % args.save_log_step == 0: # Train and save logs average_reward = sum(episode_rewards) / len(episode_rewards) summary_str, _ = sess.run( [summary_op, DQN_train_step_op], { state_ph: states_batch, action_ph: actions_batch, target_ph: targets, reward_var: average_reward, epsilon_var: epsilon }) summary_writer.add_summary(summary_str, train_step) # Reset rewards buffer episode_rewards = [] else: # Just train _ = sess.run( DQN_train_step_op, { state_ph: states_batch, action_ph: actions_batch, target_ph: targets }) # Update target networks if train_step % args.update_target_step == 0: sess.run(update_target_op) # Calculate time per step and display progress to console duration = time.time() - start_time duration_values.append(duration) ave_duration = sum(duration_values) / float(len(duration_values)) sys.stdout.write('\x1b[2K\rStep {:d}/{:d} \t ({:.3f} s/step)'.format( train_step, args.num_steps_train, ave_duration)) sys.stdout.flush() # Save checkpoint if train_step % args.save_ckpt_step == 0: saver.save(sess, checkpoint_path, global_step=train_step) sys.stdout.write('\n Checkpoint saved\n') sys.stdout.flush() # Reset time calculation duration_values = []
agg_image_name = (image_name + "_agg_tm" + str(time_tag(time.time())) + IMAGE_FILE_SUFFIX) ''' file = open(SERVER_LOCAL_ADV_IMAGES_PATH+adv_image_name_list[0], 'rb') agg_image_data = file.read() file.close() # Save adv image to local storage first agg_image_file = open(SERVER_LOCAL_AGGREGATED_IMAGES_PATH + agg_image_name, "wb") agg_image_file.write(agg_image_data) agg_image_file.close() print("Aggregated image " + agg_image_name + " created!") ''' global_original_image = cv2.imread(SERVER_LOCAL_ORIGINAL_IMAGES_PATH+image_name+'.jpg', 1) preprocess_global_original_image = preprocess_image(global_original_image) server_local_adv_noise = [] for i in range(len(adv_image_name_list)): local_adv_image = cv2.imread(SERVER_LOCAL_ADV_IMAGES_PATH+adv_image_name_list[i]+'.jpg', 1) preprocess_local_adv_image = preprocess_image(global_original_image) server_local_adv_noise.append(local_adv_image - preprocess_globale_original_image) # aggregation global_aggregated_image =\ aggregate_adv_noise(preprocess_global_original_image, server_local_adv_noise) recreated_global_aggregated_image = recreate_image(global_aggregated_image) # Save adv image to local storage first cv2.imwrite(SERVER_LOCAL_AGGREGATED_IMAGES_PATH + agg_image_name,\ recreated_global_aggregated_image)
import numpy as np from ssd import SSD300 from utils.utils import read_image from utils.utils import resize_image from utils.utils import preprocess_image model = SSD300() weights_filename = '../trained_models/model_checkpoints/weights.hdf5' model.load_weights(weights_filename) image_size = model.input_shape[1:3] image_filename = '../images/008745.jpg' image_array = read_image(image_filename) image_array = resize_image(image_array, image_size) image_array = image_array.astype('float32') image_array = np.expand_dims(image_array, 0) image_array = preprocess_image(image_array) predictions = model.predict([image_array]) predictions = np.squeeze(predictions) predicted_classes = predictions[:, 4:] best_classes = np.argmax(predicted_classes, axis=1) positive_mask = best_classes != 0
###################################################################### # The start of generating adversarial images # the input batch of images = image_list # the labels of the input batch images = label_list # the identifiers of the input batch images = identifier_list # please store the generated batch of adv images ==> adv_image_list, AS A NUMPY ARRAY!!! # please store the original labels of the batch ==> adv_label_list # please store the corresponding identifiers of the batch ==> adv_identifier_list # if you do not change the order the images, then # it should be "adv_identifier_list == identifier_list" and "adv_label_list==label_list" # Prepare data to input in the neural network: x = torch.stack( [preprocess_image(np.array(image)) for image in image_list]) y = torch.from_numpy(np.array(label_list)).long() # Define the model and device of local machine, the following is a toy example Generate_Adv = GenAdv(net, device, F.cross_entropy) adv_image_list, adv_label_list = Generate_Adv.generate_adv(x, y) # Recreate images from transformed images. adv_image_list = np.stack( [recreate_image(adv_image) for adv_image in adv_image_list]) adv_label_list = np.array(adv_label_list.numpy(), dtype=int) adv_identifier_list = np.array(identifier_list) # Save adv image to local storage first #adv_file_name = "adv0_tm425635841.h5" adv_file_name = f"adv{str(adv_num)}_tm{str(time_tag(time.time()))}.h5"
def test_kitti_depth(data_list_file, img_dir, height, width, restore_dp_model, save_dir, depth_test_split='eigen', num_input_threads=8): print('[Info] Evaluate kitti depth...') print('[Info] Reading datalist from:', data_list_file) print('[Info] Loading images from:', img_dir) print('[Info] Reshaing image to size: ({}, {})'.format(height, width)) dataset = BasicDataset(img_height=height, img_width=width, data_list_file=data_list_file, img_dir=img_dir) iterator = dataset.create_one_shot_iterator_depth_kitti( dataset.data_list, num_parallel_calls=num_input_threads) img1 = iterator.get_next() img1 = tf.image.resize_images(img1, [height, width], method=0) img1 = preprocess_image(img1) with tf.variable_scope(tf.get_variable_scope()) as vs: pred_disp_1, _ = D_Net(img1, is_training=False, reuse=False) pred_depth_1 = [1. / disp for disp in pred_disp_1] pred_depth_1 = pred_depth_1[0] if not os.path.exists(save_dir): os.makedirs(save_dir) restore_vars = tf.trainable_variables() # Add batchnorm variables. bn_vars = [ v for v in tf.global_variables() if 'moving_mean' in v.op.name or 'moving_variance' in v.op.name or 'mu' in v.op.name or 'sigma' in v.op.name ] restore_vars.extend(bn_vars) # restore_vars = [var for var in tf.trainable_variables()] print('[Info] Restoring model:', restore_dp_model) saver = tf.train.Saver(max_to_keep=1) # for var in tf.trainable_variables(): # print(var.name) init_assign_op, init_feed_dict = slim.assign_from_checkpoint( restore_dp_model, restore_vars) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) # sess.run(tf.global_variables_initializer()) # sess.run(tf.local_variables_initializer()) sess.run(iterator.initializer) sess.run(init_assign_op, init_feed_dict) # saver.restore(sess, restore_dp_model) depth_path = "%s/depth" % save_dir if not os.path.exists(depth_path): os.mkdir(depth_path) pred_all = [] print("[Info] Data number:", dataset.data_num) start_time = time.time() for i in range(dataset.data_num): np_pred_depth_1 = sess.run(pred_depth_1) np_pred_depth_1 = np_pred_depth_1[0, :, :, 0] pred_all.append(np_pred_depth_1) print("[Info] FPS: %.3f" % (dataset.data_num / (time.time() - start_time))) # save_path = save_dir + '/ckpt_' + restore_dp_model.split('/')[-2] + '_' + restore_dp_model.split('/')[-1] save_path = save_dir + '/test_kitti' print('[Info] Saving to {}.npy'.format(save_path)) np.save(save_path, pred_all)
def predictcmnd(self, image): # check cccd """ ========================================= ===== Crop and align id card image ========================================= """ request = predict_pb2.PredictRequest() # model_name request.model_spec.name = "cropper_cmnd_model" # signature name, default is 'serving_default' request.model_spec.signature_name = "serving_default" # preprocess image img, original_image, original_width, original_height = preprocess_image( image, Cropper.TARGET_SIZE) if img.ndim == 3: img = np.expand_dims(img, axis=0) # request to cropper model request.inputs["input_1"].CopyFrom( tf.make_tensor_proto(img, dtype=np.float32, shape=img.shape)) try: result = self.stub.Predict(request, 10.0) result = result.outputs["tf_op_layer_concat_14"].float_val result = np.array(result).reshape((-1, 9)) except Exception as e: print("Cropper cmnd = ", e) cropper = Cropper() cropper.set_best_bboxes(result, original_width=original_width, original_height=original_height, iou_threshold=0.5) # respone to client if image is invalid if cropper.respone_client(threshold_idcard=0.8) == -1: # print(cropper.respone_client(threshold_idcard=0.8)) return elif cropper.respone_client(threshold_idcard=0.8) == 0: # print("no cropper") # cv2.imwrite('app/static/aligned_images/' + filename, original_image) aligned_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) else: # print("cropper image") cropper.set_image(original_image=original_image) # print("Cropper cmnd end") # output of cropper part aligned_image = getattr(cropper, "image_output") cv2.imwrite('storage/c.jpg', aligned_image) aligned_image = cv2.cvtColor(aligned_image, cv2.COLOR_BGR2RGB) """ =========================================== ==== Detect informations in aligned image =========================================== """ # preprocess aligned image original_height, original_width, _ = aligned_image.shape img = cv2.resize(aligned_image, Detector.TARGET_SIZE) img = np.float32(img / 255.) # model_name request.model_spec.name = "detector_cmnd_model" # signature name, default is 'serving_default' request.model_spec.signature_name = "serving_default" if img.ndim == 3: img = np.expand_dims(img, axis=0) # new request to detector model request.inputs["input_1"].CopyFrom( tf.make_tensor_proto(img, dtype=np.float32, shape=img.shape)) try: # print("Detect cmnd = ok") result = self.stub.Predict(request, 10.0) result = result.outputs["tf_op_layer_concat_14"].float_val result = np.array(result).reshape((-1, 13)) # print("Detect cmnd = end") except Exception as e: print("Detect cmnd = ", e) detector = Detector() detector.set_best_bboxes(result, original_width=original_width, original_height=original_height, iou_threshold=0.5) detector.set_info_images(original_image=aligned_image) # output of detector part info_images = getattr(detector, "info_images") return info_images
def train(self, train_mode=None, retrain=False, cont_model=None, restore_flow_model=None): seed = 8964 tf.set_random_seed(seed) np.random.seed(seed) random.seed(seed) self.mode = train_mode """ Step 1. Loading the training data """ with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(0, name="global_step", trainable=False) lr_decay = tf.train.exponential_decay(self.initial_learning_rate, global_step, decay_steps=self.decay_steps, decay_rate=self.decay_rate, staircase=True) optim = tf.train.AdamOptimizer(lr_decay, self.beta1) tower_grads = [] if self.dataset == 'airsim': loader = AirSim_DataLoader( dataset_dir=self.dataset_config['img_dir'], img_height=self.img_height, img_width=self.img_width, batch_size=self.batch_size, num_scales=self.num_scales, num_source=self.num_source, ext=self.dataset_config['ext'], mode=train_mode) elif self.dataset == 'kitti': loader = DataLoader(dataset_dir=self.dataset_config['img_dir'], img_height=self.img_height, img_width=self.img_width, batch_size=self.batch_size, num_scales=self.num_scales, num_source=self.num_source, ext=self.dataset_config['ext'], mode=train_mode) if train_mode == "train_flow": self.tgt_image, self.src_image_stack = loader.load_train_batch( ) # print("[!] tgt_image:", self.tgt_image) # print("[!] src_image_stack:", self.src_image_stack) # Depth inputs: Feed photometric augmented image, [-1, 1] self.tgt_image = preprocess_image(self.tgt_image) self.src_image_stack = preprocess_image(self.src_image_stack) split_tgt_image = tf.split(axis=0, num_or_size_splits=self.num_gpus, value=self.tgt_image) split_src_image_stack = tf.split( axis=0, num_or_size_splits=self.num_gpus, value=self.src_image_stack) split_tgt_image_norm = [None] * self.num_gpus split_src_image_stack_norm = [None] * self.num_gpus split_cam2pix = [None] * self.num_gpus split_pix2cam = [None] * self.num_gpus elif train_mode == "train_dp": self.image_stack, self.image_stack_norm, self.proj_cam2pix, self.proj_pix2cam = loader.load_train_batch( ) if self.num_source == 2: """ 3 frames """ self.tgt_image = self.image_stack[:, :, :, 3:6] src0_image = self.image_stack[:, :, :, 0:3] src1_image = self.image_stack[:, :, :, 6:9] self.src_image_stack = tf.concat([src0_image, src1_image], axis=3) self.tgt_image_norm = self.image_stack_norm[:, :, :, 3:6] src0_image_norm = self.image_stack_norm[:, :, :, 0:3] src1_image_norm = self.image_stack_norm[:, :, :, 6:9] self.src_image_stack_norm = tf.concat( [src0_image_norm, src1_image_norm], axis=3) elif self.num_source == 4: """ 5 frames """ self.tgt_image = self.image_stack[:, :, :, 6:9] src0_image = self.image_stack[:, :, :, 0:3] src1_image = self.image_stack[:, :, :, 3:6] src2_image = self.image_stack[:, :, :, 9:12] src3_image = self.image_stack[:, :, :, 12:15] self.src_image_stack = tf.concat( [src0_image, src1_image, src2_image, src3_image], axis=3) self.tgt_image_norm = self.image_stack_norm[:, :, :, 6:9] src0_image_norm = self.image_stack_norm[:, :, :, 0:3] src1_image_norm = self.image_stack_norm[:, :, :, 3:6] src2_image_norm = self.image_stack_norm[:, :, :, 9:12] src3_image_norm = self.image_stack_norm[:, :, :, 12:15] self.src_image_stack_norm = tf.concat([ src0_image_norm, src1_image_norm, src2_image_norm, src3_image_norm ], axis=3) split_tgt_image = tf.split(axis=0, num_or_size_splits=self.num_gpus, value=self.tgt_image) split_src_image_stack = tf.split( axis=0, num_or_size_splits=self.num_gpus, value=self.src_image_stack) split_tgt_image_norm = tf.split( axis=0, num_or_size_splits=self.num_gpus, value=self.tgt_image_norm) split_src_image_stack_norm = tf.split( axis=0, num_or_size_splits=self.num_gpus, value=self.src_image_stack_norm) split_cam2pix = tf.split(axis=0, num_or_size_splits=self.num_gpus, value=self.proj_cam2pix) # K split_pix2cam = tf.split(axis=0, num_or_size_splits=self.num_gpus, value=self.proj_pix2cam) # K_inverse summaries_cpu = tf.get_collection(tf.GraphKeys.SUMMARIES, tf.get_variable_scope().name) """ Step 2. Building model """ if self.num_source == 2: print("[!] Loading the model for 3 frames...") from model.model_3frames import Model elif self.num_source == 4: print("[!] Loading the model for 5 frames...") from model.model_5frames import Model with tf.variable_scope(tf.get_variable_scope()) as vs: print('variable_scope(vs):', vs.name) for i in xrange(self.num_gpus): #0 1 with tf.device('/gpu:%d' % i): if i == self.num_gpus - 1: #1 scopename = "model" else: scopename = '%s_%d' % ("tower", i) with tf.name_scope(scopename) as ns: if i == 0: # Build models model = Model( split_tgt_image[i], split_src_image_stack[i], split_tgt_image_norm[i], split_src_image_stack_norm[i], split_cam2pix[i], split_pix2cam[i], batch_size=self.batch_size_per_gpu, img_height=self.img_height, img_width=self.img_width, mode=train_mode, reuse_scope=False, scope=vs) var_pose = list( set( tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=".*pose_net.*"))) var_depth = list( set( tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope= ".*(depth_net|feature_net_disp).*") )) var_flow = list( set( tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope= ".*(flow_net|feature_net_flow).*")) ) if self.mode == 'train_flow': var_train_list = var_flow elif self.mode == 'train_dp': var_train_list = var_pose + var_depth elif self.mode == 'train_all': var_train_list = var_pose + var_depth + var_flow else: model = Model( split_tgt_image[i], split_src_image_stack[i], split_tgt_image_norm[i], split_src_image_stack_norm[i], split_cam2pix[i], split_pix2cam[i], batch_size=self.batch_size_per_gpu, img_height=self.img_height, img_width=self.img_width, mode=train_mode, reuse_scope=True, scope=vs) # Parameter Count param_total = tf.reduce_sum([ tf.reduce_prod(tf.shape(v)) for v in var_train_list ]) param_depth = tf.reduce_sum([ tf.reduce_prod(tf.shape(v)) for v in var_depth ]) param_pose = tf.reduce_sum([ tf.reduce_prod(tf.shape(v)) for v in var_pose ]) param_flow = tf.reduce_sum([ tf.reduce_prod(tf.shape(v)) for v in var_flow ]) # get loss loss = model.losses # Retain the summaries from the final tower. if i == self.num_gpus - 1: summaries = tf.get_collection( tf.GraphKeys.SUMMARIES, ns) # eval_model = Model_eval(scope=vs) # Calculate the gradients for the batch of data on this CIFAR tower. grads = optim.compute_gradients( loss, var_list=var_train_list) # Keep track of the gradients across all towers. tower_grads.append(grads) grads = average_gradients(tower_grads) # grads = [(tf.clip_by_norm(grad, 0.1), var) for grad, var in grads] # Apply the gradients to adjust the shared variables. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): apply_gradient_op = optim.apply_gradients( grads, global_step=global_step) # Create a saver. saver = tf.train.Saver(max_to_keep=50) # Build the summary operation from the last tower summaries. summary_op = tf.summary.merge(summaries + summaries_cpu) # Make training session. config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) # config.gpu_options.per_process_gpu_memory_fraction = 0.8 sess = tf.Session(config=config) summary_writer = tf.summary.FileWriter(logdir='/'.join( [self.summary_dir, 'train', self.model_name]), graph=sess.graph, flush_secs=10) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) print("[Info] Model size: {:.5f}M".format( sess.run(param_total) / 1000000.0)) print("[Info] Depth net size: {:.5f}M".format( sess.run(param_depth) / 1000000.0)) print("[Info] Pose net size: {:.5f}M".format( sess.run(param_pose) / 1000000.0)) print("[Info] Flow net size: {:.5f}M".format( sess.run(param_flow) / 1000000.0)) if cont_model != None: """ Continue training from a checkpoint """ print("[Info] Continue training. Restoreing:", cont_model) saver = tf.train.Saver(max_to_keep=10) saver.restore(sess, cont_model) else: if self.mode == 'train_dp': print("[Info] Restoreing pretrained flow weights from:", restore_flow_model) saver_flow = tf.train.Saver(tf.get_collection( tf.GraphKeys.MODEL_VARIABLES, scope=".*(flow_net|feature_net_flow).*"), max_to_keep=1) saver_flow.restore(sess, restore_flow_model) elif self.mode == 'train_all': print("[Info] Restoreing:", restore_flow_model) saver_rest = tf.train.Saver(list( set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)) - set( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=".*(Adam_1|Adam).*"))), max_to_keep=1) saver_rest.restore(sess, restore_flow_model) if retrain: sess.run(global_step.assign(0)) start_itr = global_step.eval(session=sess) tf.train.start_queue_runners(sess) """ Step 3. Training """ steps_per_epoch = loader.steps_per_epoch last_summary_time = time.time() start_time = time.time() for itr in range(start_itr, self.iter_steps): fetches = { 'train_op': apply_gradient_op, 'grads': grads, 'global_step': global_step, 'lr_decay': lr_decay } if np.mod(itr, self.write_summary_interval) == 0: fetches['summary_str'] = summary_op fetches['summary_scalar_str'] = model.summ_op if np.mod(itr, self.display_log_interval) == 0: fetches['loss'] = loss results = sess.run(fetches) gs = results['global_step'] # print(results['valid_src0']) if np.mod(itr, self.write_summary_interval) == 0: summary_writer.add_summary(results['summary_scalar_str'], itr) summary_writer.add_summary(results['summary_str'], itr) if np.mod(itr, self.display_log_interval) == 0: train_epoch = math.ceil(gs / steps_per_epoch) train_step = gs - (train_epoch - 1) * steps_per_epoch this_cycle = time.time() - last_summary_time last_summary_time += this_cycle print('Epoch: [%2d] [%5d/%5d] total steps:[%6d] lr:[%2.8f] time: %4.2fs (%ds total) loss: %.3f' % \ (train_epoch, train_step, steps_per_epoch, gs, results['lr_decay'], this_cycle, time.time() - start_time, results['loss'])) if np.mod(itr, steps_per_epoch) == 0: print('[Info] Saving checkpoint to %s ...' % self.checkpoint_dir) saver.save(sess, '/'.join([ self.checkpoint_dir, self.model_name, 'model' ]), global_step=gs)