def main(): object_dir = os.path.join(dataset_dir, dataset) f_rgb, f_label, f_line = [], [], [] for line in open(split_file, 'r').readlines(): line = line[:-1] # remove '\n' f_rgb.append(os.path.join(object_dir, 'image_2', line+'.png')) f_label.append(os.path.join(object_dir, 'label_2', line+'.txt')) f_line.append(line) width, height = 64, 64 max_error = 0.1 jitter = 0.1 hue = 0.1#0.1 saturation = 1.5#1.5 exposure = 1.5#1.5 for load_index in range(1,5):#range(len(f_rgb)): warn("{} / {}".format(load_index, len(f_rgb))) img = cv2.imread(f_rgb[load_index]) save_dir = os.path.join(dataset_dir, dataset) org_imgs, cropped_img, ious = image_augmentation(f_rgb[load_index], f_label[load_index], width, height, jitter, hue, saturation, exposure) save_file = os.path.join(test_img_save_dir, '{}.png'.format(f_line[load_index])) label = np.array([line for line in open(f_label[load_index], 'r').readlines()]) gt_box2d = label_to_gt_box2d(np.array(label)[np.newaxis, :], cls=cfg.DETECT_OBJ, coordinate='lidar')[0] # (N', 4) x_min, y_min, x_max, y_max img = draw_bbox2d_on_image(img, gt_box2d) cv2.imwrite(save_file, img) for index in range(len(cropped_img)): save_file = os.path.join(test_img_save_dir, '{}_{}.png'.format(f_line[load_index], index)) cv2.imwrite(save_file, cropped_img[index])
def main(): ratio = 0.85 # 0.7 for training, 0.3 for validation cur_dir = get_cur_dir() dataset_dir = os.path.join(cur_dir, 'data/object/training') # img_dir = os.path.join(dataset_dir, 'image_2') files = glob.glob(os.path.join(dataset_dir, 'image_2', '*.png')) files.sort() files = [file.split('/')[-1].split('.')[-2] for file in files] np.random.shuffle(files) warn("file: {}".format(files)) warn("total : {}".format(len(files))) num_train = int(ratio * len(files)) train_set = files[:num_train] valid_set = files[num_train:] warn("train: {}".format(len(train_set))) warn("valid: {}".format(len(valid_set))) nt = len(train_set) nv = len(valid_set) with open('trainset.txt', 'w+') as f: for idx in range(nt): f.write(train_set[idx] + '\n') f.close() with open('validset.txt', 'w+') as f: for idx in range(nv): f.write(valid_set[idx] + '\n') f.close() warn("total: {}".format(nt + nv))
def classifier_net(self, z1, z2, feat_size, latent_dim, cls_L, cls_batch_per_gpu): with tf.variable_scope("classifier") as scope: z1 = tf.reshape(z1, (cls_batch_per_gpu, -1, latent_dim)) z2 = tf.reshape(z2, (cls_batch_per_gpu, -1, latent_dim)) warn("z1: {}".format(np.shape(z1))) z_diff = U.sum(z1 - z2, axis=1) / cls_L warn("z_diff: {}".format(np.shape(z_diff))) x = U.dense(z_diff, feat_size, 'cls_fc1', U.normc_initializer(1.0)) return x
def load_checkpoints(load_requested = True, checkpoint_dir = get_cur_dir()): saver = tf.train.Saver(max_to_keep = None) checkpoint = tf.train.get_checkpoint_state(checkpoint_dir) chkpoint_num = 0 if checkpoint and checkpoint.model_checkpoint_path and load_requested == True: saver.restore(get_session(), checkpoint.model_checkpoint_path) chk_file = checkpoint.model_checkpoint_path.split('/') chk_file = chk_file[-1] chk_file = chk_file.split('-') chkpoint_num = int(chk_file[-1]) warn("loaded checkpoint: {0}".format(checkpoint.model_checkpoint_path)) else: warn("Could not find old checkpoint") if not os.path.exists(checkpoint_dir): mkdir_p(checkpoint_dir) return saver, chkpoint_num
def fill_queue(self): # warn("fill_queue here") load_index = self.load_index self.load_index += 1 if self.load_index >= self.num_file: if not self.is_testset: # test set just end if self.require_shuffle: self.shuffle_dataset() load_index = 0 self.load_index = load_index + 1 else: self.work_exit.value = True labels, tag, rgb = [], [], [] width, height = 64, 64 max_error = 0.1 jitter = 0.1 hue = 0.1 saturation = 1.5 exposure = 1.5 # img = cv2.imread(f_rgb[load_index]) # save_dir = os.path.join(dataset_dir, dataset) # warn("before img aug") cropped_imgs, confs = image_augmentation(self.f_rgb[load_index], self.f_label[load_index], width, height, jitter, hue, saturation, exposure, self.ratioPosNeg) # warn("num img: {} num confs: {}".format(len(cropped_imgs), len(confs))) try: for idx in range(len(cropped_imgs)): self.dataset_queue.put_nowait((cropped_imgs[idx], confs[idx])) # warn("inserted: {}".format(len(cropped_imgs))) load_index += 1 except: warn("fail") if not self.is_testset: # test set just end self.load_index = 0 if self.require_shuffle: self.shuffle_dataset() else: self.work_exit.value = True
def tf_points_in_boxes(boxes, points): # boxes: N,4,2 # points: N,NP,2 # return: N,NP boolean or 0,1 to indicate whether the each point exists inside boxes or not. boxes_x = boxes[:,:,0] # N,4 boxes_y = boxes[:,:,1] # N,4 min_x = tf.reduce_min(boxes_x, axis=1) # N, max_x = tf.reduce_max(boxes_x, axis=1) min_y = tf.reduce_min(boxes_y, axis=1) max_y = tf.reduce_max(boxes_y, axis=1) points_x = points[:,:,0] # N,NP points_y = points[:,:,1] points_inside = tf.zeros_like(points_x) # N,NP min_x = tf.expand_dims(min_x, -1) max_x = tf.expand_dims(max_x, -1) min_y = tf.expand_dims(min_y, -1) max_y = tf.expand_dims(max_y, -1) minx = tf.greater_equal(points_x, min_x) # N,NP maxx = tf.less_equal(points_x, max_x) miny = tf.greater_equal(points_y, min_y) maxy = tf.less_equal(points_y, max_y) x_cond = tf.logical_and(minx, maxx) y_cond = tf.logical_and(miny, maxy) points_in_boxes = tf.logical_and(x_cond, y_cond) # inside = tf.ones_like(points_x) # outside = tf.zeros_like(points_x) # points_in_boxes = tf.where(rec_cond, inside, outside) warn("points_in_boxes: {}".format(np.shape(points_in_boxes))) return points_in_boxes
def image_augmentation(f_rgb, f_label, width, height, jitter, hue, saturation, exposure): rgb_imgs = [] ious = [] org_imgs = [] label = np.array([line for line in open(f_label, 'r').readlines()]) gt_box2d = label_to_gt_box2d(np.array(label)[np.newaxis, :], cls=cfg.DETECT_OBJ, coordinate='lidar')[0] # (N', 4) x_min, y_min, x_max, y_max img = cv2.imread(f_rgb) warn("img value: {}".format(img[:3,:3,:3])) # warn("{} shape: {}".format(f_rgb, img.shape)) img_height, img_width = img.shape[:2] # warn("height: {}, width: {}".format(img_height, img_width)) for idx in range(len(gt_box2d)): box = gt_box2d[idx] # warn("box {}: {}".format(idx, box)) x_min, y_min, x_max, y_max = box x_min = int(x_min) y_min = int(y_min) x_max = int(x_max) y_max = int(y_max) ori_img = cv2.resize(cv2.imread(f_rgb)[y_min:y_max, x_min:x_max], (64, 64)) org_imgs.append(ori_img) box_height = y_max - y_min box_width = x_max - x_min dx = int(jitter * box_width) + 1 dy = int(jitter * box_height) + 1 # warn("dx : {} dy : {}".format(dx, dy)) lx = np.random.randint(-dx, dx) ly = np.random.randint(-dy, dy) lw = np.random.randint(-dx, dx) lh = np.random.randint(-dy, dy) x = (x_max + x_min)/2.0 + lx y = (y_max + y_min)/2.0 + ly box_height = box_height + lh box_width = box_width + lw x_min = int(max(0, x - box_width/2.0)) x_max = int(min(img_width, x + box_width/2.0)) y_min = int(max(0, y - box_height/2.0)) y_max = int(min(img_height, y + box_height/2.0)) flip = np.random.randint(1,10000)%2 img = cv2.resize(cv2.imread(f_rgb)[y_min:y_max,x_min:x_max], (width, height)) if flip: img = cv2.flip(img, 1) img = random_distort_image(img, hue, saturation, exposure) # for ground truth img, calculate iou with its original location, size iou = bbox_iou(box, (x_min, y_min, x_max, y_max), x1y1x2y2=True) rgb_imgs.append(img) ious.append(iou) # Randomly e[nerate same number of background candidate that will have low iou or zero iou. # after generating new boxes, it needs to calculate iou to each of gt_boxes2d # which will be used as inference. # if inferenced iou is low, then the bounding boxes are empty or background or falsely located. # if inferenced iou is high, then the bounding boxes are correctly inferenced by 3D bounding boxes. # this is the st]rategry I am taking for simple, mini 2D classifier. for idx in range(len(gt_box2d)*4): x = np.random.randint(0, img_width) y = np.random.randint(0, img_height) h = np.random.randint(40, 200) w = np.random.randint(40, 200) x_min = int(max(0, x - w/2.0)) x_max = int(min(img_width, x + w/2.0)) y_min = int(max(0, y - h/2.0)) y_max = int(min(img_height, y + h/2.0)) max_iou = 0 for gt_idx in range(len(gt_box2d)): box = gt_box2d[gt_idx] iou = bbox_iou(box, (x_min, y_min, x_max, y_max), x1y1x2y2=True) if iou > max_iou: max_iou = iou img = cv2.resize(cv2.imread(f_rgb)[y_min:y_max,x_min:x_max], (width, height)) if flip: img = cv2.flip(img, 1) img = random_distort_image(img, hue, saturation, exposure) rgb_imgs.append(img) ious.append(iou) return org_imgs, rgb_imgs, ious
def train_net(model, img_dir, max_iter = 100000, check_every_n = 20, save_model_freq = 1000, batch_size = 128): img1 = U.get_placeholder_cached(name="img1") img2 = U.get_placeholder_cached(name="img2") mean_loss1 = U.mean(model.match_error) mean_loss2 = U.mean(model.reconst_error1) mean_loss3 = U.mean(model.reconst_error2) decoded_img = [model.reconst1, model.reconst2] weight_loss = [1, 1, 1] compute_losses = U.function([img1, img2], [mean_loss1, mean_loss2, mean_loss3]) lr = 0.00001 optimizer=tf.train.AdamOptimizer(learning_rate=lr, epsilon = 0.01/batch_size) all_var_list = model.get_trainable_variables() img1_var_list = [v for v in all_var_list if v.name.split("/")[1].startswith("proj1") or v.name.split("/")[1].startswith("unproj1")] img2_var_list = [v for v in all_var_list if v.name.split("/")[1].startswith("proj2") or v.name.split("/")[1].startswith("unproj2")] img1_loss = mean_loss1 + mean_loss2 img2_loss = mean_loss1 + mean_loss3 optimize_expr1 = optimizer.minimize(img1_loss, var_list=img1_var_list) optimize_expr2 = optimizer.minimize(img2_loss, var_list=img2_var_list) img1_train = U.function([img1, img2], [mean_loss1, mean_loss2, mean_loss3], updates = [optimize_expr1]) img2_train = U.function([img1, img2], [mean_loss1, mean_loss2, mean_loss3], updates = [optimize_expr2]) get_reconst_img = U.function([img1, img2], decoded_img) U.initialize() name = "test" cur_dir = get_cur_dir() chk_save_dir = os.path.join(cur_dir, "chkfiles") log_save_dir = os.path.join(cur_dir, "log") test_img_saver_dir = os.path.join(cur_dir, "test_images") saver, chk_file_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir) test_img_saver = Img_Saver(test_img_saver_dir) meta_saved = False iter_log = [] loss1_log = [] loss2_log = [] loss3_log = [] training_images_list = read_dataset(img_dir) for num_iter in range(chk_file_num+1, max_iter): header("******* {}th iter: Img {} side *******".format(num_iter, num_iter%2 + 1)) idx = random.sample(range(len(training_images_list)), batch_size) batch_files = [training_images_list[i] for i in idx] [images1, images2] = load_image(dir_name = img_dir, img_names = batch_files) img1, img2 = images1, images2 # args = images1, images2 if num_iter%2 == 0: [loss1, loss2, loss3] = img1_train(img1, img2) elif num_iter%2 == 1: [loss1, loss2, loss3] = img2_train(img1, img2) warn("match_error: {}".format(loss1)) warn("reconst_err1: {}".format(loss2)) warn("reconst_err2: {}".format(loss3)) warn("num_iter: {} check: {}".format(num_iter, check_every_n)) if num_iter % check_every_n == 1: idx = random.sample(range(len(training_images_list)), 10) test_batch_files = [training_images_list[i] for i in idx] [images1, images2] = load_image(dir_name = img_dir, img_names = test_batch_files) [reconst1, reconst2] = get_reconst_img(images1, images2) for img_idx in range(len(images1)): sub_dir = "iter_{}".format(num_iter) save_img = np.squeeze(images1[img_idx]) save_img = Image.fromarray(save_img) img_file_name = "{}_ori_2d.jpg".format(test_batch_files[img_idx]) test_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) save_img = np.squeeze(images2[img_idx]) save_img = Image.fromarray(save_img) img_file_name = "{}_ori_3d.jpg".format(test_batch_files[img_idx]) test_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) save_img = np.squeeze(reconst1[img_idx]) save_img = Image.fromarray(save_img) img_file_name = "{}_rec_2d.jpg".format(test_batch_files[img_idx]) test_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) save_img = np.squeeze(reconst2[img_idx]) save_img = Image.fromarray(save_img) img_file_name = "{}_rec_3d.jpg".format(test_batch_files[img_idx]) test_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) if num_iter > 11 and num_iter % save_model_freq == 1: if meta_saved == True: saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = num_iter, write_meta_graph = False) else: print "Save meta graph" saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = num_iter, write_meta_graph = True) meta_saved = True
def tf_calculate_rotation_iou(boxes_corner_a, boxes_center_a, boxes_corner_b, boxes_center_b): # boxes_corner_a : predicted corner boxes => N,8,3 # boxes_corner_b : ground truth boxes => N,8,3 # boxes_center_a : predicted center boxes => N,7 # boxes_center_b : ground truth boxes => N,7 # (1) get max boundaries # input: boxes_corner_a[N, 8, 3], boxes_corner_b[N, 8, 2] only x and y # output: max_boundaries: 2 * (N,4,2) => 2 boxes, 4 points, x and y boxes_standup_a, boxes_standup_b = tf_corner_to_standup(boxes_corner_a, boxes_corner_b) # (2) distribute points onto maximum boundaries # number of points to be distributed: 20 x 20 => 400 # output: num_of_points: (NP, 2): NP: number of points, 2: x, y grid_points = tf_random_gen_points() grid_points = tf.expand_dims(grid_points, 0) N = tf.shape(boxes_standup_a)[0] warn("grid_points: {}".format(np.shape(grid_points))) grid_points = tf.tile(grid_points, [N,1,1]) # N,400,2 warn("grid_points: {}".format(np.shape(grid_points))) # (3) first rotation # input: num_of_points:(N,NP,2), boxes_corner_a:(N,4,2), and boxes_corner_b:(N,4,2), rotation: N angle_a = boxes_center_a[:,6] # (N,) rot_a = np.pi/2 - angle_a # (N,) rot_boxes_standup_a = tf_rot_points(boxes_standup_a, rot_a) rot_points_grid = tf_rot_points(grid_points, rot_a) # (3-1) select points points_in_boxes_a = tf_points_in_boxes(rot_boxes_standup_a, rot_points_grid) # N,NP warn("boxes_center_a: {}".format(np.shape(boxes_center_a))) warn("angle_a: {}".format(np.shape(angle_a))) warn("rot_a: {}".format(np.shape(rot_a))) # (4) second rotation angle_b = boxes_center_b[:,6] # (N,) rot_b = np.pi/2 - angle_b # (N,) rot_boxes_standup_b = tf_rot_points(boxes_standup_b, rot_b) rot_points_grid = tf_rot_points(grid_points, rot_b) # (4-1) select points points_in_boxes_b = tf_points_in_boxes(rot_boxes_standup_b, rot_points_grid) # N,NP warn("boxes_center_b: {}".format(np.shape(boxes_center_b))) warn("angle_b: {}".format(np.shape(angle_b))) warn("rot_b: {}".format(np.shape(rot_b))) # (4-1) select points points_in_intersection = tf.logical_and(points_in_boxes_a, points_in_boxes_b) warn("points_in_intersection: {}".format(np.shape(points_in_intersection))) # (5) calculate ratio between number of points in union and number of points in intersection inside = tf.ones_like(points_in_boxes_a, tf.float32) outside = tf.zeros_like(points_in_boxes_a, tf.float32) num_points_in_boxes_a = tf.where(points_in_boxes_a, inside, outside) warn("num_points_in_boxes_a 1: {}".format(np.shape(num_points_in_boxes_a))) num_points_in_boxes_a = tf.reduce_sum(num_points_in_boxes_a, axis=1) # N, warn("num_points_in_boxes_a 2: {}".format(np.shape(num_points_in_boxes_a))) num_points_in_boxes_b = tf.where(points_in_boxes_b, inside, outside) num_points_in_boxes_b = tf.reduce_sum(num_points_in_boxes_b, axis=1) num_points_in_intersection = tf.where(points_in_intersection, inside, outside) warn("num_points_in_intersection 1: {}".format(np.shape(num_points_in_intersection))) # N,400 num_points_in_intersection = tf.reduce_sum(num_points_in_intersection, axis=1) # N, num_points_in_union = num_points_in_boxes_a + num_points_in_boxes_b - num_points_in_intersection iou = tf.divide(num_points_in_intersection, num_points_in_union) warn("iou: {}".format(np.shape(iou))) return iou
def __init__(self, input, alpha=1.5, beta=1, sigma=3, training=True, name=''): # scale = [batchsize, 10, 400/200, 352/240, 128] should be the output of feature learning network self.input = input self.training = training # groundtruth(target) - each anchor box, represent as △x, △y, △z, △l, △w, △h, rotation self.targets = tf.placeholder(tf.float32, [None, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 14]) # => wip: add confidence(iou) here for yolo style # => pos_equal_one is actually conf_mask in yolo code # self.conf_target = tf.placeholder(tf.float32, [None, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2]) # postive anchors equal to one and others equal to zero(2 anchors in 1 position) self.pos_equal_one = tf.placeholder(tf.float32, [None, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2]) self.pos_equal_one_sum = tf.placeholder(tf.float32, [None, 1, 1, 1]) self.pos_equal_one_for_reg = tf.placeholder(tf.float32, [None, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 14]) # negative anchors equal to one and others equal to zero self.neg_equal_one = tf.placeholder(tf.float32, [None, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2]) self.neg_equal_one_sum = tf.placeholder(tf.float32, [None, 1, 1, 1]) with tf.variable_scope('MiddleAndRPN_' + name): #convolutinal middle layers temp_conv = ConvMD(3, 128, 64, 3, (2, 1, 1), (1, 1, 1), self.input, name='conv1') temp_conv = ConvMD(3, 64, 64, 3, (1, 1, 1), (0, 1, 1), temp_conv, name='conv2') temp_conv = ConvMD(3, 64, 64, 3, (2, 1, 1), (1, 1, 1), temp_conv, name='conv3') temp_conv = tf.transpose(temp_conv, perm = [0, 2, 3, 4, 1]) temp_conv = tf.reshape(temp_conv, [-1, cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 128]) # => batch, 400, 352, 128 #rpn #block1: temp_conv = ConvMD(2, 128, 128, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv4') # => batch, 400, 352, 128 temp_conv = tf.layers.max_pooling2d(temp_conv, pool_size = 2, strides = 2, name = 'maxpool1') # => batch, 200, 176, 128 temp_conv = ConvMD(2, 128, 256, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv5') # => batch, 200, 176, 256 temp_conv = ConvMD(2, 256, 128, 1, (1, 1), (0, 0), temp_conv, training=self.training, name='conv6') # => batch, 200, 176, 128 temp_conv = ConvMD(2, 128, 256, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv7') # => batch, 200, 176, 256 temp_conv = tf.layers.max_pooling2d(temp_conv, pool_size = 2, strides = 2, name = 'maxpool2') # => batch, 100, 88, 256 temp_conv = ConvMD(2, 256, 512, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv8') # => batch, 100, 88, 512 temp_conv = ConvMD(2, 512, 128, 1, (1, 1), (0, 0), temp_conv, training=self.training, name='conv9') # => batch, 100, 88, 128 route_1 = ConvMD(2, 128, 256, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv10') # => batch, 100, 88, 256 temp_conv = ConvMD(2, 256, 128, 1, (1, 1), (0, 0), route_1, training=self.training, name='conv11') # => batch, 100, 88, 128 temp_conv = ConvMD(2, 128, 256, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv12') # => batch, 100, 88, 256 temp_conv = tf.layers.max_pooling2d(temp_conv, pool_size = 2, strides = 2, name = 'maxpool3') # => batch, 50, 44, 256 temp_conv = ConvMD(2, 256, 512, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv13') # => batch, 50, 44, 512 temp_conv = ConvMD(2, 512, 256, 1, (1, 1), (0, 0), temp_conv, training=self.training, name='conv14') # => batch, 50, 44, 256 route_2 = ConvMD(2, 256, 512, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv15') # => batch, 50, 44, 512 temp_conv = ConvMD(2, 256, 64, 3, (1, 1), (1, 1), route_1, training=self.training, name='conv16') # warn("shape: {}".format(np.shape(temp_conv))) # => batch, 100, 88, 64 temp_conv = Reorg(2, temp_conv, name = 'reorg1') # => batch, 50, 44, 256 temp_conv = tf.concat([temp_conv, route_2], axis = -1, name = 'concat1') # => batch, 50, 44, 768 temp_conv = ConvMD(2, 768, 128, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv17') # => batch, 50, 44, 128 p_map = ConvMD(2, 128, 2, 1, (1, 1), (0, 0), temp_conv, training=self.training, name='conv18') r_map = ConvMD(2, 128, 14, 1, (1, 1), (0, 0), temp_conv, training=self.training, activation = False, name='conv19') warn("rmap shape:{}".format(np.shape(r_map))) self.p_pos = tf.sigmoid(p_map) self.output_shape = [cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH] x_pos_0 = tf.expand_dims(tf.sigmoid(r_map[..., 0]), -1) y_pos_0 = tf.expand_dims(tf.sigmoid(r_map[..., 1]), -1) x_pos_1 = tf.expand_dims(tf.sigmoid(r_map[..., 7]), -1) y_pos_1 = tf.expand_dims(tf.sigmoid(r_map[..., 8]), -1) r_map = tf.concat([x_pos_0, y_pos_0, r_map[:,:,:,2:7], x_pos_1, y_pos_1, r_map[:,:,:,9:14]], axis=-1) warn("rmap shape:{}".format(np.shape(r_map))) # TODO: sometime still get inf cls loss # wip: change to yolo style object_scale = 1.0 non_object_scale = 1.0 # self.cls_loss = object_scale * (self.pos_equal_one * tf.square(self.p_pos - self.conf_target)) / self.pos_equal_one_sum\ # + non_object_scale * self.neg_equal_one * tf.square(self.p_pos - self.conf_target) / self.neg_equal_one_sum # self.cls_loss = tf.reduce_sum(self.cls_loss) self.cls_loss = alpha * (-self.pos_equal_one * tf.log(self.p_pos + small_addon_for_BCE)) / self.pos_equal_one_sum \ + beta * (-self.neg_equal_one * tf.pow(self.p_pos, 2.0) * tf.log(1 - self.p_pos + small_addon_for_BCE)) / self.neg_equal_one_sum self.cls_loss = tf.reduce_sum(self.cls_loss) # alpha_tf = 0.25 # gamma = 2 # pred_pt = tf.where(tf.equal(self.pos_equal_one, 1.0), self.p_pos, 1.0 - self.p_pos) # alpha_t = tf.scalar_mul(alpha_tf, tf.ones_like(self.pos_equal_one, dtype=tf.float32)) # alpha_t = tf.where(tf.equal(self.pos_equal_one, 1.0), alpha_t, 1.0 - alpha_t) # self.focal_loss = tf.reduce_sum(-alpha_t * tf.pow(1.0 - pred_pt, gamma) * tf.log(pred_pt + small_addon_for_BCE)) self.reg_loss = smooth_l1(r_map * self.pos_equal_one_for_reg, self.targets * self.pos_equal_one_for_reg, sigma) / self.pos_equal_one_sum self.reg_loss = tf.reduce_sum(self.reg_loss) self.corner_loss = tf.cond(tf.equal(tf.shape(self.targets * self.pos_equal_one_for_reg)[0], 0), lambda: return_zero(), \ lambda: cal_volume_loss(r_map * self.pos_equal_one_for_reg, self.targets * self.pos_equal_one_for_reg, self.pos_equal_one)) # self.corner_loss = tf.reduce_sum(self.corner_loss) self.loss = tf.reduce_sum(1.0 * self.cls_loss) + tf.reduce_sum(10.0*self.corner_loss) self.delta_output = r_map self.prob_output = self.p_pos
def mgpu_train_net(models, num_gpus, mode, img_dir, dataset, chkfile_name, logfile_name, validatefile_name, entangled_feat, max_epoch = 300, check_every_n = 500, loss_check_n = 10, save_model_freq = 5, batch_size = 512, lr = 0.001): img1 = U.get_placeholder_cached(name="img1") img2 = U.get_placeholder_cached(name="img2") feat_cls = U.get_placeholder_cached(name="feat_cls") # batch size must be multiples of ntowers (# of GPUs) ntowers = len(models) tf.assert_equal(tf.shape(img1)[0], tf.shape(img2)[0]) tf.assert_equal(tf.floormod(tf.shape(img1)[0], ntowers), 0) img1splits = tf.split(img1, ntowers, 0) img2splits = tf.split(img2, ntowers, 0) tower_vae_loss = [] tower_latent_z1_tp = [] tower_latent_z2_tp = [] tower_losses = [] tower_siam_max = [] tower_reconst1 = [] tower_reconst2 = [] tower_cls_loss = [] for gid, model in enumerate(models): with tf.name_scope('gpu%d' % gid) as scope: with tf.device('/gpu:%d' % gid): vae_loss = U.mean(model.vaeloss) latent_z1_tp = model.latent_z1 latent_z2_tp = model.latent_z2 losses = [U.mean(model.vaeloss), U.mean(model.siam_loss), U.mean(model.kl_loss1), U.mean(model.kl_loss2), U.mean(model.reconst_error1), U.mean(model.reconst_error2), ] siam_max = U.mean(model.max_siam_loss) cls_loss = U.mean(model.cls_loss) tower_vae_loss.append(vae_loss) tower_latent_z1_tp.append(latent_z1_tp) tower_latent_z2_tp.append(latent_z2_tp) tower_losses.append(losses) tower_siam_max.append(siam_max) tower_reconst1.append(model.reconst1) tower_reconst2.append(model.reconst2) tower_cls_loss.append(cls_loss) tf.summary.scalar('Total Loss', losses[0]) tf.summary.scalar('Siam Loss', losses[1]) tf.summary.scalar('kl1_loss', losses[2]) tf.summary.scalar('kl2_loss', losses[3]) tf.summary.scalar('reconst_err1', losses[4]) tf.summary.scalar('reconst_err2', losses[5]) tf.summary.scalar('Siam Max', siam_max) vae_loss = U.mean(tower_vae_loss) siam_max = U.mean(tower_siam_max) latent_z1_tp = tf.concat(tower_latent_z1_tp, 0) latent_z2_tp = tf.concat(tower_latent_z2_tp, 0) model_reconst1 = tf.concat(tower_reconst1, 0) model_reconst2 = tf.concat(tower_reconst2, 0) cls_loss = U.mean(tower_cls_loss) losses = [[] for _ in range(len(losses))] for tl in tower_losses: for i, l in enumerate(tl): losses[i].append(l) losses = [U.mean(l) for l in losses] siam_normal = losses[1] / entangled_feat tf.summary.scalar('total/Total Loss', losses[0]) tf.summary.scalar('total/Siam Loss', losses[1]) tf.summary.scalar('total/kl1_loss', losses[2]) tf.summary.scalar('total/kl2_loss', losses[3]) tf.summary.scalar('total/reconst_err1', losses[4]) tf.summary.scalar('total/reconst_err2', losses[5]) tf.summary.scalar('total/Siam Normal', siam_normal) tf.summary.scalar('total/Siam Max', siam_max) compute_losses = U.function([img1, img2], vae_loss) all_var_list = model.get_trainable_variables() vae_var_list = [v for v in all_var_list if v.name.split("/")[2].startswith("vae")] cls_var_list = [v for v in all_var_list if v.name.split("/")[2].startswith("cls")] warn("{}".format(all_var_list)) warn("==========================") warn("{}".format(vae_var_list)) # warn("==========================") # warn("{}".format(cls_var_list)) # with tf.device('/cpu:0'): optimizer = tf.train.AdamOptimizer(learning_rate=lr, epsilon = 0.01/batch_size) optimize_expr1 = optimizer.minimize(vae_loss, var_list=vae_var_list) feat_cls_optimizer = tf.train.AdagradOptimizer(learning_rate=0.01) optimize_expr2 = feat_cls_optimizer.minimize(cls_loss, var_list=cls_var_list) merged = tf.summary.merge_all() train = U.function([img1, img2], [losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr1]) get_reconst_img = U.function([img1, img2], [model_reconst1, model_reconst2, latent_z1_tp, latent_z2_tp]) get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp]) cur_dir = get_cur_dir() chk_save_dir = os.path.join(cur_dir, chkfile_name) log_save_dir = os.path.join(cur_dir, logfile_name) validate_img_saver_dir = os.path.join(cur_dir, validatefile_name) if dataset == 'chairs' or dataset == 'celeba': test_img_saver_dir = os.path.join(cur_dir, "test_images") testing_img_dir = os.path.join(cur_dir, "dataset/{}/test_img".format(dataset)) train_writer = U.summary_writer(dir = log_save_dir) U.initialize() saver, chk_file_epoch_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir) if dataset == 'chairs' or dataset == 'celeba': validate_img_saver = Img_Saver(Img_dir = validate_img_saver_dir) elif dataset == 'dsprites': validate_img_saver = BW_Img_Saver(Img_dir = validate_img_saver_dir) # Black and White, temporary usage else: warn("Unknown dataset Error") # break warn("dataset: {}".format(dataset)) if dataset == 'chairs' or dataset == 'celeba': training_images_list = read_dataset(img_dir) n_total_train_data = len(training_images_list) testing_images_list = read_dataset(testing_img_dir) n_total_testing_data = len(testing_images_list) elif dataset == 'dsprites': cur_dir = osp.join(cur_dir, 'dataset') cur_dir = osp.join(cur_dir, 'dsprites') img_dir = osp.join(cur_dir, 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz') manager = DataManager(img_dir, batch_size) else: warn("Unknown dataset Error") # break meta_saved = False if mode == 'train': for epoch_idx in range(chk_file_epoch_num+1, max_epoch): t_epoch_start = time.time() num_batch = manager.get_len() for batch_idx in range(num_batch): if dataset == 'chairs' or dataset == 'celeba': idx = random.sample(range(n_total_train_data), 2*batch_size) batch_files = [training_images_list[i] for i in idx] [images1, images2] = load_image(dir_name = img_dir, img_names = batch_files) elif dataset == 'dsprites': [images1, images2] = manager.get_next() img1, img2 = images1, images2 [l1, l2, _, _] = get_reconst_img(img1, img2) [loss0, loss1, loss2, loss3, loss4, loss5, latent1, latent2, summary] = train(img1, img2) if batch_idx % 50 == 1: header("******* epoch: {}/{} batch: {}/{} *******".format(epoch_idx, max_epoch, batch_idx, num_batch)) warn("Total Loss: {}".format(loss0)) warn("Siam loss: {}".format(loss1)) warn("kl1_loss: {}".format(loss2)) warn("kl2_loss: {}".format(loss3)) warn("reconst_err1: {}".format(loss4)) warn("reconst_err2: {}".format(loss5)) if batch_idx % check_every_n == 1: if dataset == 'chairs' or dataset == 'celeba': idx = random.sample(range(len(training_images_list)), 2*5) validate_batch_files = [training_images_list[i] for i in idx] [images1, images2] = load_image(dir_name = img_dir, img_names = validate_batch_files) elif dataset == 'dsprites': [images1, images2] = manager.get_next() [reconst1, reconst2, _, _] = get_reconst_img(images1, images2) if dataset == 'chairs': for img_idx in range(len(images1)): sub_dir = "iter_{}_{}".format(epoch_idx, batch_idx) save_img = np.squeeze(images1[img_idx]) save_img = Image.fromarray(save_img) img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0]) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) save_img = np.squeeze(reconst1[img_idx]) save_img = Image.fromarray(save_img) img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0]) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) elif dataset == 'celeba': for img_idx in range(len(images1)): sub_dir = "iter_{}_{}".format(epoch_idx, batch_idx) save_img = np.squeeze(images1[img_idx]) save_img = Image.fromarray(save_img, 'RGB') img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0]) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) save_img = np.squeeze(reconst1[img_idx]) save_img = Image.fromarray(save_img, 'RGB') img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0]) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) elif dataset == 'dsprites': for img_idx in range(len(images1)): sub_dir = "iter_{}_{}".format(epoch_idx, batch_idx) # save_img = images1[img_idx].reshape(64, 64) save_img = np.squeeze(images1[img_idx]) save_img = save_img.astype(np.float32) img_file_name = "{}_ori.jpg".format(img_idx) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) # save_img = reconst1[img_idx].reshape(64, 64) save_img = np.squeeze(reconst1[img_idx]) save_img = save_img.astype(np.float32) img_file_name = "{}_rec.jpg".format(img_idx) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) if batch_idx % loss_check_n == 1: train_writer.add_summary(summary, batch_idx) t_epoch_end = time.time() t_epoch_run = t_epoch_end - t_epoch_start if dataset == 'dsprites': t_check = manager.sample_size / t_epoch_run warn("==========================================") warn("Run {} th epoch in {} sec: {} images / sec".format(epoch_idx+1, t_epoch_run, t_check)) warn("==========================================") if meta_saved == True: saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = epoch_idx, write_meta_graph = False) else: print "Save meta graph" saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = epoch_idx, write_meta_graph = True) meta_saved = True
def train_net(model, manager, chkfile_name, logfile_name, validatefile_name, entangled_feat, max_iter = 6000001, check_every_n = 1000, loss_check_n = 10, save_model_freq = 5000, batch_size = 32): img1 = U.get_placeholder_cached(name="img1") img2 = U.get_placeholder_cached(name="img2") # Testing # img_test = U.get_placeholder_cached(name="img_test") # reconst_tp = U.get_placeholder_cached(name="reconst_tp") vae_loss = U.mean(model.vaeloss) latent_z1_tp = model.latent_z1 latent_z2_tp = model.latent_z2 losses = [U.mean(model.vaeloss), U.mean(model.siam_loss), U.mean(model.kl_loss1), U.mean(model.kl_loss2), U.mean(model.reconst_error1), U.mean(model.reconst_error2), ] siam_normal = losses[1]/entangled_feat siam_max = U.mean(model.max_siam_loss) tf.summary.scalar('Total Loss', losses[0]) tf.summary.scalar('Siam Loss', losses[1]) tf.summary.scalar('kl1_loss', losses[2]) tf.summary.scalar('kl2_loss', losses[3]) tf.summary.scalar('reconst_err1', losses[4]) tf.summary.scalar('reconst_err2', losses[5]) tf.summary.scalar('Siam Normal', siam_normal) tf.summary.scalar('Siam Max', siam_max) # decoded_img = [model.reconst1, model.reconst2] compute_losses = U.function([img1, img2], vae_loss) lr = 0.005 optimizer=tf.train.AdagradOptimizer(learning_rate=lr) all_var_list = model.get_trainable_variables() # print all_var_list img1_var_list = all_var_list #[v for v in all_var_list if v.name.split("/")[1].startswith("proj1") or v.name.split("/")[1].startswith("unproj1")] optimize_expr1 = optimizer.minimize(vae_loss, var_list=img1_var_list) merged = tf.summary.merge_all() train = U.function([img1, img2], [losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr1]) get_reconst_img = U.function([img1, img2], [model.reconst1_mean, model.reconst2_mean, latent_z1_tp, latent_z2_tp]) get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp]) # testing # test = U.function([img_test], model.latent_z_test) # test_reconst = U.function([reconst_tp], [model.reconst_test]) cur_dir = get_cur_dir() chk_save_dir = os.path.join(cur_dir, chkfile_name) log_save_dir = os.path.join(cur_dir, logfile_name) validate_img_saver_dir = os.path.join(cur_dir, validatefile_name) # test_img_saver_dir = os.path.join(cur_dir, "test_images") # testing_img_dir = os.path.join(cur_dir, "dataset/test_img") train_writer = U.summary_writer(dir = log_save_dir) U.initialize() saver, chk_file_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir) validate_img_saver = BW_Img_Saver(validate_img_saver_dir) # testing # test_img_saver = Img_Saver(test_img_saver_dir) meta_saved = False iter_log = [] loss1_log = [] loss2_log = [] loss3_log = [] training_images_list = manager.imgs # read_dataset(img_dir) n_total_train_data = len(training_images_list) # testing_images_list = read_dataset(testing_img_dir) # n_total_testing_data = len(testing_images_list) training = True testing = False if training == True: for num_iter in range(chk_file_num+1, max_iter): header("******* {}th iter: *******".format(num_iter)) idx = random.sample(range(n_total_train_data), 2*batch_size) batch_files = idx # print batch_files [images1, images2] = manager.get_images(indices = idx) img1, img2 = images1, images2 [l1, l2, _, _] = get_reconst_img(img1, img2) [loss0, loss1, loss2, loss3, loss4, loss5, latent1, latent2, summary] = train(img1, img2) warn("Total Loss: {}".format(loss0)) warn("Siam loss: {}".format(loss1)) warn("kl1_loss: {}".format(loss2)) warn("kl2_loss: {}".format(loss3)) warn("reconst_err1: {}".format(loss4)) warn("reconst_err2: {}".format(loss5)) # warn("num_iter: {} check: {}".format(num_iter, check_every_n)) # warn("Total Loss: {}".format(loss6)) if num_iter % check_every_n == 1: header("******* {}th iter: *******".format(num_iter)) idx = random.sample(range(len(training_images_list)), 2*5) [images1, images2] = manager.get_images(indices = idx) [reconst1, reconst2, _, _] = get_reconst_img(images1, images2) # for i in range(len(latent1[0])): # print "{} th: {:.2f}".format(i, np.mean(np.abs(latent1[:, i] - latent2[:, i]))) for img_idx in range(len(images1)): sub_dir = "iter_{}".format(num_iter) save_img = images1[img_idx].reshape(64, 64) save_img = save_img.astype(np.float32) img_file_name = "{}_ori.jpg".format(img_idx) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) save_img = reconst1[img_idx].reshape(64, 64) save_img = save_img.astype(np.float32) img_file_name = "{}_rec.jpg".format(img_idx) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) if num_iter % loss_check_n == 1: train_writer.add_summary(summary, num_iter) if num_iter > 11 and num_iter % save_model_freq == 1: if meta_saved == True: saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = num_iter, write_meta_graph = False) else: print "Save meta graph" saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = num_iter, write_meta_graph = True) meta_saved = True
def __init__(self, object_dir='.', queue_size=20, require_shuffle=False, is_testset=True, batch_size=1, use_multi_process_num=0, split_file='', valid_file='', multi_gpu_sum=1): assert (use_multi_process_num >= 0) self.object_dir = object_dir self.is_testset = is_testset self.use_multi_process_num = use_multi_process_num if not self.is_testset else 1 self.require_shuffle = require_shuffle if not self.is_testset else False self.batch_size = batch_size if not self.is_testset else 1 self.split_file = split_file self.valid_file = valid_file self.multi_gpu_sum = multi_gpu_sum self.progress = 0 # warn("dir: {}".format(self.object_dir)) if self.split_file != '': # use split file _tag = [] self.f_rgb, self.f_lidar, self.f_label, self.f_calib = [], [], [], [] self.f_voxel = [] for line in open(self.split_file, 'r').readlines(): line = line[:-1] # remove '\n' _tag.append(line) self.f_rgb.append( os.path.join(self.object_dir, 'image_2', line + '.png')) self.f_lidar.append( os.path.join(self.object_dir, 'velodyne', line + '.bin')) self.f_label.append( os.path.join(self.object_dir, 'label_2', line + '.txt')) self.f_calib.append( os.path.join(self.object_dir, 'calib', line + '.txt')) self.f_rgb_valid = [] self.f_lidar_valid = [] self.f_label_valid = [] self.f_calib_valid = [] for line in open(self.valid_file, 'r').readlines(): line = line[:-1] # remove '\n' self.f_rgb_valid.append( os.path.join(self.object_dir, 'image_2', line + '.png')) self.f_lidar_valid.append( os.path.join(self.object_dir, 'velodyne', line + '.bin')) self.f_label_valid.append( os.path.join(self.object_dir, 'label_2', line + '.txt')) self.f_calib_valid.append( os.path.join(self.object_dir, 'calib', line + '.txt')) self.data_tag_valid = [ name.split('/')[-1].split('.')[-2] for name in self.f_label_valid ] else: self.f_rgb = glob.glob( os.path.join(self.object_dir, 'image_2', '*.png')) self.f_rgb.sort() self.f_lidar = glob.glob( os.path.join(self.object_dir, 'velodyne', '*.bin')) self.f_lidar.sort() self.f_label = glob.glob( os.path.join(self.object_dir, 'label_2', '*.txt')) self.f_label.sort() self.f_calib = glob.glob( os.path.join(self.object_dir, 'calib', '*.txt')) self.f_calib.sort() self.data_tag = [ name.split('/')[-1].split('.')[-2] for name in self.f_label ] # assert(len(self.f_rgb) == len(self.f_lidar) == len(self.f_label) == len(self.data_tag)) warn("{} {} {} {}".format(len(self.f_label), len(self.data_tag), len(self.f_lidar), len(self.f_calib))) assert (len(self.f_label) == len(self.data_tag) == len(self.f_rgb) == len(self.f_lidar)) self.dataset_size = len(self.f_label) self.validset_size = len(self.f_label_valid) self.already_extract_data = 0 self.cur_frame_info = '' # warn("Dataset total length: {}".format(len(self.f_label))) if self.require_shuffle: self.shuffle_dataset() self.queue_size = queue_size self.require_shuffle = require_shuffle self.dataset_queue = Queue( ) # must use the queue provided by multiprocessing module(only this can be shared) self.load_index = 0 if self.use_multi_process_num == 0: self.loader_worker = [ threading.Thread(target=self.loader_worker_main, args=(self.batch_size, )) ] else: self.loader_worker = [ Process(target=self.loader_worker_main, args=(self.batch_size, )) for i in range(self.use_multi_process_num) ] self.work_exit = Value('i', 0) [i.start() for i in self.loader_worker] # This operation is not thread-safe self.rgb_shape = (cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH, 3)
def load_specified_train(self, load_indices=None): # Load without data augmentation labels, tag, voxel, doubled_voxel, rgb, raw_lidar, calib = [], [], [], [], [], [], [] voxel_size = np.array( [cfg.VOXEL_Z_SIZE, cfg.VOXEL_Y_SIZE, cfg.VOXEL_X_SIZE], dtype=np.float32) double_voxel_size = 2 * voxel_size if load_indices is None: load_indices = np.random.randint(len(self.f_rgb), size=self.batch_size) for load_index in load_indices: try: t0 = time.time() rgb.append( cv2.resize(cv2.imread(self.f_rgb[load_index]), (cfg.IMAGE_WIDTH, cfg.IMAGE_HEIGHT))) lidar = np.fromfile(self.f_lidar[load_index], dtype=np.float32).reshape((-1, 4)) calib_file = self.f_lidar[load_index].replace( 'velodyne', 'calib').replace('bin', 'txt') lidar = clip_by_projection(lidar, calib_file, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH) raw_lidar.append(lidar) calib.append(read_calib_mat(self.f_calib[load_index])) labels.append([ line for line in open(self.f_label[load_index], 'r').readlines() ]) tag.append(self.data_tag[load_index]) voxel.append( voxelize(file=self.f_lidar[load_index], lidar=lidar, voxel_size=voxel_size, T=cfg.VOXEL_POINT_COUNT)) doubled_voxel.append( voxelize(file=self.f_lidar[load_index], lidar=lidar, voxel_size=double_voxel_size, T=cfg.VOXEL_POINT_COUNT)) t1 = time.time() # warn("load success") except: warn("Load Specified: Loading Error!! {}".format(tag)) # only for voxel -> [gpu, k_single_batch, ...] vox_feature, vox_number, vox_coordinate = [], [], [] single_batch_size = int(self.batch_size / self.multi_gpu_sum) for idx in range(self.multi_gpu_sum): # warn("single") _, per_vox_feature, per_vox_number, per_vox_coordinate = build_input( voxel[idx * single_batch_size:(idx + 1) * single_batch_size]) vox_feature.append(per_vox_feature) vox_number.append(per_vox_number) vox_coordinate.append(per_vox_coordinate) doubled_vox_feature, doubled_vox_number, doubled_vox_coordinate = [], [], [] for idx in range(self.multi_gpu_sum): # warn("doubled") _, per_vox_feature, per_vox_number, per_vox_coordinate = build_input( doubled_voxel[idx * single_batch_size:(idx + 1) * single_batch_size]) doubled_vox_feature.append(per_vox_feature) doubled_vox_number.append(per_vox_number) doubled_vox_coordinate.append(per_vox_coordinate) ret = (np.array(tag), np.array(labels), np.array(vox_feature), np.array(vox_number), np.array(vox_coordinate), np.array(doubled_vox_feature), np.array(doubled_vox_number), np.array(doubled_vox_coordinate), np.array(rgb), np.array(raw_lidar), np.array(calib)) return ret
def __init__( self, cls='Car', single_batch_size=2, # batch_size_per_gpu learning_rate=0.001, max_gradient_norm=5.0, alpha=1.5, beta=1, is_train=True, avail_gpus=['0']): # hyper parameters and status self.cls = cls self.single_batch_size = single_batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32) self.global_step = tf.Variable(1, trainable=False) self.epoch = tf.Variable(0, trainable=False) self.epoch_add_op = self.epoch.assign(self.epoch + 1) self.alpha = alpha self.beta = beta self.avail_gpus = avail_gpus lr = tf.train.exponential_decay(self.learning_rate, self.global_step, 10000, 0.96) # build graph # input placeholders self.imgs = [] self.confs = [] # => wip: add confidence(iou) here for yolo style # => pos_equal_one is actually conf_mask in yolo code # self.conf_target = [] self.prob_output = [] self.opt = tf.train.AdamOptimizer(lr) self.gradient_norm = [] self.tower_grads = [] self.batch_loss = [] with tf.variable_scope(tf.get_variable_scope()): for idx, dev in enumerate(self.avail_gpus): with tf.device('/gpu:{}'.format(dev)), tf.name_scope( 'gpu_{}'.format(dev)): # must use name scope here since we do not want to create new variables # graph vggnet = vgg(training=is_train, batch_size=self.single_batch_size, name='vgg') tf.get_variable_scope().reuse_variables() # input self.imgs.append(vggnet.imgs) self.confs.append(vggnet.conf) # output prob_output = vggnet.prob # loss and grad self.loss = vggnet.loss self.params = tf.trainable_variables() gradients = tf.gradients(self.loss, self.params) clipped_gradients, gradient_norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.prob_output.append(prob_output) self.tower_grads.append(clipped_gradients) self.gradient_norm.append(gradient_norm) self.batch_loss.append(self.loss) # loss and optimizer # self.xxxloss is only the loss for the lowest tower with tf.device('/gpu:{}'.format(self.avail_gpus[0])): self.grads = average_gradients(self.tower_grads) self.update = self.opt.apply_gradients( zip(self.grads, self.params), global_step=self.global_step) self.gradient_norm = tf.group(*self.gradient_norm) self.prob_output = tf.concat(self.prob_output, axis=0) warn("batch loss1: {}".format(np.shape(self.batch_loss))) self.batch_loss = tf.reduce_sum(self.batch_loss) warn("batch loss2: {}".format(np.shape(self.batch_loss))) # # for predict and image summary # self.rgb = tf.placeholder(tf.uint8, [None, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH, 3]) # self.bv = tf.placeholder(tf.uint8, [ # None, cfg.BV_LOG_FACTOR * cfg.INPUT_HEIGHT, cfg.BV_LOG_FACTOR * cfg.INPUT_WIDTH, 3]) # self.bv_heatmap = tf.placeholder(tf.uint8, [ # None, cfg.BV_LOG_FACTOR * cfg.FEATURE_HEIGHT, cfg.BV_LOG_FACTOR * cfg.FEATURE_WIDTH, 3]) # self.boxes2d = tf.placeholder(tf.float32, [None, 4]) # self.boxes2d_scores = tf.placeholder(tf.float32, [None]) # # NMS(2D) # with tf.device('/gpu:{}'.format(self.avail_gpus[0])): # self.box2d_ind_after_nms = tf.image.non_max_suppression(self.boxes2d, self.boxes2d_scores, max_output_size=cfg.RPN_NMS_POST_TOPK, iou_threshold=cfg.RPN_NMS_THRESH) # summary and saver self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, max_to_keep=10, pad_step_number=True, keep_checkpoint_every_n_hours=1.0) self.train_summary = tf.summary.merge([ tf.summary.scalar('train/loss', self.loss), # tf.summary.scalar('train/reg_loss', self.reg_loss), # tf.summary.scalar('train/cls_loss', self.cls_loss), [tf.summary.histogram(each.name, each) for each in self.params] ]) self.validate_summary = tf.summary.merge( [tf.summary.scalar('validate/loss', self.loss)])
def __init__(self, training, batch_size, name=''): super(vgg, self).__init__() self.training = training # scalar self.batch_size = batch_size # [ΣK, 64, 64, 3] self.imgs = tf.placeholder(tf.float32, [None, 64, 64, 3], name='img') self.conf = tf.placeholder(tf.float32, [None], name='conf') start_time = time.time() print("build model started") # rgb_scaled = rgb * 255.0 # Convert RGB to BGR red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=self.imgs) assert red.get_shape().as_list()[1:] == [64, 64, 1] assert green.get_shape().as_list()[1:] == [64, 64, 1] assert blue.get_shape().as_list()[1:] == [64, 64, 1] x = tf.concat(axis=3, values=[ blue - VGG_MEAN[0], green - VGG_MEAN[1], red - VGG_MEAN[2], ]) with tf.variable_scope(name, reuse=tf.AUTO_REUSE) as scope: temp_conv = ConvMD(2, 3, 16, 3, (1, 1), (1, 1), x, training=self.training, name='conv1_1') # => [None, 64, 64, 16] temp_conv = ConvMD(2, 16, 16, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv1_2') # => [None, 64, 64, 16] temp_conv = tf.layers.max_pooling2d(temp_conv, pool_size=2, strides=2, name='maxpool1') # => [None, 32, 32, 16] temp_conv = ConvMD(2, 16, 16, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv2_1') # => [None, 32, 32, 16] temp_conv = ConvMD(2, 16, 16, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv2_2') # => [None, 32, 32, 16] temp_conv = tf.layers.max_pooling2d(temp_conv, pool_size=2, strides=2, name='maxpool2') # => [None, 16, 16, 16] temp_conv = ConvMD(2, 16, 16, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv3_1') # => [None, 16, 16, 16] temp_conv = ConvMD(2, 16, 16, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv3_2') # => [None, 16, 16, 16] temp_conv = tf.layers.max_pooling2d(temp_conv, pool_size=2, strides=2, name='maxpool3') # => [None, 8, 8, 16] warn("shape: {}".format(np.shape(temp_conv))) temp = tf.layers.flatten(temp_conv, name='flatten') # => [None, 64 * 16] warn("shape: {}".format(np.shape(temp))) temp = tf.nn.relu(self.dense(temp, 32, 'dense1')) warn("shape: {}".format(np.shape(temp))) self.prob = tf.nn.sigmoid(self.dense(temp, 1, 'prob')) warn("shape: {}".format(np.shape(self.prob))) self.loss = tf.reduce_mean( -self.conf * tf.log(self.prob + small_addon_for_BCE) - (1 - self.conf) * tf.log(1 - self.prob + small_addon_for_BCE))
def mgpu_classifier_train_net(models, num_gpus, cls_batch_per_gpu, cls_L, mode, img_dir, dataset, chkfile_name, logfile_name, validatefile_name, entangled_feat, max_epoch = 300, check_every_n = 500, loss_check_n = 10, save_model_freq = 5, batch_size = 512, lr = 0.001): img1 = U.get_placeholder_cached(name="img1") img2 = U.get_placeholder_cached(name="img2") feat_cls = U.get_placeholder_cached(name="feat_cls") # batch size must be multiples of ntowers (# of GPUs) ntowers = len(models) tf.assert_equal(tf.shape(img1)[0], tf.shape(img2)[0]) tf.assert_equal(tf.floormod(tf.shape(img1)[0], ntowers), 0) img1splits = tf.split(img1, ntowers, 0) img2splits = tf.split(img2, ntowers, 0) tower_vae_loss = [] tower_latent_z1_tp = [] tower_latent_z2_tp = [] tower_losses = [] tower_siam_max = [] tower_reconst1 = [] tower_reconst2 = [] tower_cls_loss = [] for gid, model in enumerate(models): with tf.name_scope('gpu%d' % gid) as scope: with tf.device('/gpu:%d' % gid): vae_loss = U.mean(model.vaeloss) latent_z1_tp = model.latent_z1 latent_z2_tp = model.latent_z2 losses = [U.mean(model.vaeloss), U.mean(model.siam_loss), U.mean(model.kl_loss1), U.mean(model.kl_loss2), U.mean(model.reconst_error1), U.mean(model.reconst_error2), ] siam_max = U.mean(model.max_siam_loss) cls_loss = U.mean(model.cls_loss) tower_vae_loss.append(vae_loss) tower_latent_z1_tp.append(latent_z1_tp) tower_latent_z2_tp.append(latent_z2_tp) tower_losses.append(losses) tower_siam_max.append(siam_max) tower_reconst1.append(model.reconst1) tower_reconst2.append(model.reconst2) tower_cls_loss.append(cls_loss) tf.summary.scalar('Cls Loss', cls_loss) vae_loss = U.mean(tower_vae_loss) siam_max = U.mean(tower_siam_max) latent_z1_tp = tf.concat(tower_latent_z1_tp, 0) latent_z2_tp = tf.concat(tower_latent_z2_tp, 0) model_reconst1 = tf.concat(tower_reconst1, 0) model_reconst2 = tf.concat(tower_reconst2, 0) cls_loss = U.mean(tower_cls_loss) losses = [[] for _ in range(len(losses))] for tl in tower_losses: for i, l in enumerate(tl): losses[i].append(l) losses = [U.mean(l) for l in losses] siam_normal = losses[1] / entangled_feat tf.summary.scalar('total/cls_loss', cls_loss) compute_losses = U.function([img1, img2], vae_loss) all_var_list = model.get_trainable_variables() vae_var_list = [v for v in all_var_list if v.name.split("/")[2].startswith("vae")] cls_var_list = [v for v in all_var_list if v.name.split("/")[2].startswith("cls")] warn("{}".format(all_var_list)) warn("=======================") warn("{}".format(vae_var_list)) warn("=======================") warn("{}".format(cls_var_list)) # with tf.device('/cpu:0'): # optimizer = tf.train.AdamOptimizer(learning_rate=lr, epsilon = 0.01/batch_size) # optimize_expr1 = optimizer.minimize(vae_loss, var_list=vae_var_list) feat_cls_optimizer = tf.train.AdagradOptimizer(learning_rate=0.01) optimize_expr2 = feat_cls_optimizer.minimize(cls_loss, var_list=cls_var_list) merged = tf.summary.merge_all() # train = U.function([img1, img2], # [losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr1]) classifier_train = U.function([img1, img2, feat_cls], [cls_loss, latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr2]) get_reconst_img = U.function([img1, img2], [model_reconst1, model_reconst2, latent_z1_tp, latent_z2_tp]) get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp]) cur_dir = get_cur_dir() chk_save_dir = os.path.join(cur_dir, chkfile_name) log_save_dir = os.path.join(cur_dir, logfile_name) cls_logfile_name = 'cls_{}'.format(logfile_name) cls_log_save_dir = os.path.join(cur_dir, cls_logfile_name) validate_img_saver_dir = os.path.join(cur_dir, validatefile_name) if dataset == 'chairs' or dataset == 'celeba': test_img_saver_dir = os.path.join(cur_dir, "test_images") testing_img_dir = os.path.join(cur_dir, "dataset/{}/test_img".format(dataset)) cls_train_writer = U.summary_writer(dir = cls_log_save_dir) U.initialize() saver, chk_file_epoch_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir) if dataset == 'chairs' or dataset == 'celeba': validate_img_saver = Img_Saver(Img_dir = validate_img_saver_dir) elif dataset == 'dsprites': validate_img_saver = BW_Img_Saver(Img_dir = validate_img_saver_dir) # Black and White, temporary usage else: warn("Unknown dataset Error") # break warn("dataset: {}".format(dataset)) if dataset == 'chairs' or dataset == 'celeba': training_images_list = read_dataset(img_dir) n_total_train_data = len(training_images_list) testing_images_list = read_dataset(testing_img_dir) n_total_testing_data = len(testing_images_list) elif dataset == 'dsprites': cur_dir = osp.join(cur_dir, 'dataset') cur_dir = osp.join(cur_dir, 'dsprites') img_dir = osp.join(cur_dir, 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz') manager = DataManager(img_dir, batch_size) else: warn("Unknown dataset Error") # break meta_saved = False cls_train_iter = 10000 for cls_train_i in range(cls_train_iter): # warn("Train:{}".format(cls_train_i)) if dataset == 'dsprites': # At every epoch, train classifier and check result # (1) Load images num_img_pair = cls_L * num_gpus * cls_batch_per_gpu # warn("{} {} {}".format(len(manager.latents_sizes)-1, num_gpus, cls_batch_per_gpu)) feat = np.random.randint(len(manager.latents_sizes)-1, size = num_gpus * cls_batch_per_gpu) [images1, images2] = manager.get_image_fixed_feat_batch(feat, num_img_pair) # warn("images shape:{}".format(np.shape(images1))) # (2) Input PH images [classification_loss, _, _, summary] = classifier_train(images1, images2, feat) if cls_train_i % 100 == 0: warn("cls loss {}: {}".format(cls_train_i, classification_loss)) cls_train_writer.add_summary(summary, cls_train_i)
def tf_corner_to_standup(boxes_corner_a, boxes_corner_b): # boxes_corner_a: (N,8,3) boxes_a_x = boxes_corner_a[:,0:4,0] # N,4 boxes_a_y = boxes_corner_a[:,0:4,1] boxes_b_x = boxes_corner_b[:,0:4,0] boxes_b_y = boxes_corner_b[:,0:4,1] warn("boxes_a_x: {}".format(np.shape(boxes_a_x))) x_axis = tf.concat([boxes_a_x, boxes_b_x], axis=1) # N,8 y_axis = tf.concat([boxes_a_y, boxes_b_y], axis=1) # N,8 warn("x_axis: {}".format(np.shape(x_axis))) min_x = tf.reduce_min(x_axis, axis=1) # N min_y = tf.reduce_min(y_axis, axis=1) max_x = tf.reduce_max(x_axis, axis=1) max_y = tf.reduce_max(y_axis, axis=1) warn("min_x: {}".format(np.shape(min_x))) translation_x = tf.tile(tf.expand_dims(min_x, -1), [1,4]) translation_y = tf.tile(tf.expand_dims(min_y, -1), [1,4]) warn("translation_x: {}".format(np.shape(translation_x))) boxes_a_x = boxes_a_x - translation_x boxes_b_x = boxes_b_x - translation_x boxes_a_y = boxes_a_y - translation_y boxes_b_y = boxes_b_y - translation_y len_x = max_x - min_x len_y = max_y - min_y # N,1 warn("len_x: {}".format(np.shape(len_x))) len_square = tf.maximum(len_x, len_y) # N,1 warn("len_square 1: {}".format(np.shape(len_square))) len_square = tf.tile(tf.expand_dims(len_square, -1), [1,4]) # N,4 warn("len_square 2: {}".format(np.shape(len_square))) boxes_a_x = tf.divide(boxes_a_x, len_square) # N,4 boxes_b_x = tf.divide(boxes_b_x, len_square) boxes_a_y = tf.divide(boxes_a_y, len_square) boxes_b_y = tf.divide(boxes_b_y, len_square) boxes_a = tf.stack([boxes_a_x, boxes_a_y], axis=-1) # N,4,2 boxes_b = tf.stack([boxes_b_x, boxes_b_y], axis=-1) # N,4,2 warn("boxes_a_x: {}".format(np.shape(boxes_a))) # standup_boxes = tf.stack([min_x, min_y, max_x, max_y], axis=1) # N,4 return boxes_a, boxes_b
def train_net(model, mode, img_dir, dataset, chkfile_name, logfile_name, validatefile_name, entangled_feat, max_epoch = 300, check_every_n = 500, loss_check_n = 10, save_model_freq = 5, batch_size = 512, lr = 0.001): img1 = U.get_placeholder_cached(name="img1") img2 = U.get_placeholder_cached(name="img2") vae_loss = U.mean(model.vaeloss) latent_z1_tp = model.latent_z1 latent_z2_tp = model.latent_z2 losses = [U.mean(model.vaeloss), U.mean(model.siam_loss), U.mean(model.kl_loss1), U.mean(model.kl_loss2), U.mean(model.reconst_error1), U.mean(model.reconst_error2), ] siam_normal = losses[1]/entangled_feat siam_max = U.mean(model.max_siam_loss) tf.summary.scalar('Total Loss', losses[0]) tf.summary.scalar('Siam Loss', losses[1]) tf.summary.scalar('kl1_loss', losses[2]) tf.summary.scalar('kl2_loss', losses[3]) tf.summary.scalar('reconst_err1', losses[4]) tf.summary.scalar('reconst_err2', losses[5]) tf.summary.scalar('Siam Normal', siam_normal) tf.summary.scalar('Siam Max', siam_max) compute_losses = U.function([img1, img2], vae_loss) optimizer=tf.train.AdamOptimizer(learning_rate=lr, epsilon = 0.01/batch_size) all_var_list = model.get_trainable_variables() img1_var_list = all_var_list optimize_expr1 = optimizer.minimize(vae_loss, var_list=img1_var_list) merged = tf.summary.merge_all() train = U.function([img1, img2], [losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr1]) get_reconst_img = U.function([img1, img2], [model.reconst1, model.reconst2, latent_z1_tp, latent_z2_tp]) get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp]) cur_dir = get_cur_dir() chk_save_dir = os.path.join(cur_dir, chkfile_name) log_save_dir = os.path.join(cur_dir, logfile_name) validate_img_saver_dir = os.path.join(cur_dir, validatefile_name) if dataset == 'chairs' or dataset == 'celeba': test_img_saver_dir = os.path.join(cur_dir, "test_images") testing_img_dir = os.path.join(cur_dir, "dataset/{}/test_img".format(dataset)) train_writer = U.summary_writer(dir = log_save_dir) U.initialize() saver, chk_file_epoch_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir) if dataset == 'chairs' or dataset == 'celeba': validate_img_saver = Img_Saver(Img_dir = validate_img_saver_dir) elif dataset == 'dsprites': validate_img_saver = BW_Img_Saver(Img_dir = validate_img_saver_dir) # Black and White, temporary usage else: warn("Unknown dataset Error") # break warn(img_dir) if dataset == 'chairs' or dataset == 'celeba': training_images_list = read_dataset(img_dir) n_total_train_data = len(training_images_list) testing_images_list = read_dataset(testing_img_dir) n_total_testing_data = len(testing_images_list) elif dataset == 'dsprites': cur_dir = osp.join(cur_dir, 'dataset') cur_dir = osp.join(cur_dir, 'dsprites') img_dir = osp.join(cur_dir, 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz') manager = DataManager(img_dir, batch_size) else: warn("Unknown dataset Error") # break meta_saved = False if mode == 'train': for epoch_idx in range(chk_file_epoch_num+1, max_epoch): t_epoch_start = time.time() num_batch = manager.get_len() for batch_idx in range(num_batch): if dataset == 'chairs' or dataset == 'celeba': idx = random.sample(range(n_total_train_data), 2*batch_size) batch_files = [training_images_list[i] for i in idx] [images1, images2] = load_image(dir_name = img_dir, img_names = batch_files) elif dataset == 'dsprites': [images1, images2] = manager.get_next() img1, img2 = images1, images2 [l1, l2, _, _] = get_reconst_img(img1, img2) [loss0, loss1, loss2, loss3, loss4, loss5, latent1, latent2, summary] = train(img1, img2) if batch_idx % 50 == 1: header("******* epoch: {}/{} batch: {}/{} *******".format(epoch_idx, max_epoch, batch_idx, num_batch)) warn("Total Loss: {}".format(loss0)) warn("Siam loss: {}".format(loss1)) warn("kl1_loss: {}".format(loss2)) warn("kl2_loss: {}".format(loss3)) warn("reconst_err1: {}".format(loss4)) warn("reconst_err2: {}".format(loss5)) if batch_idx % check_every_n == 1: if dataset == 'chairs' or dataset == 'celeba': idx = random.sample(range(len(training_images_list)), 2*5) validate_batch_files = [training_images_list[i] for i in idx] [images1, images2] = load_image(dir_name = img_dir, img_names = validate_batch_files) elif dataset == 'dsprites': [images1, images2] = manager.get_next() [reconst1, reconst2, _, _] = get_reconst_img(images1, images2) if dataset == 'chairs': for img_idx in range(len(images1)): sub_dir = "iter_{}".format(batch_idx) save_img = np.squeeze(images1[img_idx]) save_img = Image.fromarray(save_img) img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0]) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) save_img = np.squeeze(reconst1[img_idx]) save_img = Image.fromarray(save_img) img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0]) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) elif dataset == 'celeba': for img_idx in range(len(images1)): sub_dir = "iter_{}".format(batch_idx) save_img = np.squeeze(images1[img_idx]) save_img = Image.fromarray(save_img, 'RGB') img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0]) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) save_img = np.squeeze(reconst1[img_idx]) save_img = Image.fromarray(save_img, 'RGB') img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0]) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) elif dataset == 'dsprites': for img_idx in range(len(images1)): sub_dir = "iter_{}".format(batch_idx) # save_img = images1[img_idx].reshape(64, 64) save_img = np.squeeze(images1[img_idx]) save_img = save_img.astype(np.float32) img_file_name = "{}_ori.jpg".format(img_idx) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) # save_img = reconst1[img_idx].reshape(64, 64) save_img = np.squeeze(reconst1[img_idx]) save_img = save_img.astype(np.float32) img_file_name = "{}_rec.jpg".format(img_idx) validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir) if batch_idx % loss_check_n == 1: train_writer.add_summary(summary, batch_idx) t_epoch_end = time.time() t_epoch_run = t_epoch_end - t_epoch_start if dataset == 'dsprites': t_check = manager.sample_size / t_epoch_run warn("==========================================") warn("Run {} th epoch in {} sec: {} images / sec".format(epoch_idx+1, t_epoch_run, t_check)) warn("==========================================") # if epoch_idx % save_model_freq == 0: if meta_saved == True: saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = epoch_idx, write_meta_graph = False) else: print "Save meta graph" saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = epoch_idx, write_meta_graph = True) meta_saved = True # Testing elif mode == 'test': test_file_name = testing_images_list[0] test_img = load_single_img(dir_name = testing_img_dir, img_name = test_file_name) test_feature = 31 test_variation = np.arange(-5, 5, 0.1) z = test(test_img) for idx in range(len(test_variation)): z_test = np.copy(z) z_test[0, test_feature] = z_test[0, test_feature] + test_variation[idx] reconst_test = test_reconst(z_test) test_save_img = np.squeeze(reconst_test[0]) test_save_img = Image.fromarray(test_save_img) img_file_name = "test_feat_{}_var_({}).png".format(test_feature, test_variation[idx]) test_img_saver.save(test_save_img, img_file_name, sub_dir = None) reconst_test = test_reconst(z) test_save_img = np.squeeze(reconst_test[0]) test_save_img = Image.fromarray(test_save_img) img_file_name = "test_feat_{}_var_original.png".format(test_feature) test_img_saver.save(test_save_img, img_file_name, sub_dir = None)
def main(): # Base: https://openreview.net/pdf?id=Sy2fzU9gl # (1) parse arguments parser = argparse.ArgumentParser() parser.add_argument('--dataset') # chairs, celeba, dsprites parser.add_argument('--mode') # train, test parser.add_argument('--disentangled_feat', type=int) parser.add_argument('--num_gpus', type=int, default=1) args = parser.parse_args() dataset = args.dataset mode = args.mode disentangled_feat = args.disentangled_feat chkfile_name = "chk_{}_{}".format(dataset, disentangled_feat) logfile_name = "log_{}_{}".format(dataset, disentangled_feat) validatefile_name = "val_{}_{}".format(dataset, disentangled_feat) # (2) Dataset if dataset == 'chairs': dir_name = "/dataset/chairs/training_img" elif dataset == 'celeba': dir_name = 'temporarily not available' elif dataset == 'dsprites': dir_name = '/dataset/dsprites' # This is dummy, for dsprites dataset, we are using data_manager else: header("Unknown dataset name") cur_dir = get_cur_dir() cur_dir = osp.join(cur_dir, 'dataset') cur_dir = osp.join(cur_dir, 'chairs') img_dir = osp.join(cur_dir, 'training_img') # This is for chairs # (3) Set experiment configuration, and disentangled_feat, according to beta-VAE( https://openreview.net/pdf?id=Sy2fzU9gl ) if dataset == 'chairs': latent_dim = 32 loss_weight = {'siam': 50000.0, 'kl': 30000.0} batch_size = 32 max_epoch = 300 lr = 0.0001 elif dataset == 'celeba': latent_dim = 32 loss_weight = {'siam': 1000.0, 'kl': 30000.0} batch_size = 512 max_epoch = 300 lr = 0.0001 elif dataset == 'dsprites': latent_dim = 10 loss_weight = {'siam': 1.0, 'kl': 1.0} batch_size = 1024 max_epoch = 300 lr = 0.001 feat_size = 5 # shape, rotation, size, x, y => Don't know why there are only 4 features in paper p6. Need to check more about it. cls_batch_per_gpu = 15 cls_L = 10 entangled_feat = latent_dim - disentangled_feat # (4) Open Tensorflow session, Need to find optimal configuration because we don't need to use single thread session # Important!!! : If we don't use single threaded session, then we need to change this!!! # sess = U.single_threaded_session() sess = U.mgpu_session() sess.__enter__() set_global_seeds(0) num_gpus = args.num_gpus # Model Setting # (5) Import model, merged into models.py # only celeba has RGB channel, other has black and white. if dataset == 'chairs': import models mynet = models.mymodel(name="mynet", img_shape=[64, 64, 1], latent_dim=latent_dim, disentangled_feat=disentangled_feat, mode=mode, loss_weight=loss_weight) elif dataset == 'celeba': import models mynet = models.mymodel(name="mynet", img_shape=[64, 64, 3], latent_dim=latent_dim, disentangled_feat=disentangled_feat, mode=mode, loss_weight=loss_weight) elif dataset == 'dsprites': import models img_shape = [None, 64, 64, 1] img1 = U.get_placeholder(name="img1", dtype=tf.float32, shape=img_shape) img2 = U.get_placeholder(name="img2", dtype=tf.float32, shape=img_shape) feat_cls = U.get_placeholder(name="feat_cls", dtype=tf.int32, shape=None) tf.assert_equal(tf.shape(img1)[0], tf.shape(img2)[0]) tf.assert_equal(tf.floormod(tf.shape(img1)[0], num_gpus), 0) tf.assert_equal(tf.floormod(tf.shape(feat_cls)[0], num_gpus), 0) img1splits = tf.split(img1, num_gpus, 0) img2splits = tf.split(img2, num_gpus, 0) feat_cls_splits = tf.split(feat_cls, num_gpus, 0) mynets = [] with tf.variable_scope(tf.get_variable_scope()): for gid in range(num_gpus): with tf.name_scope('gpu%d' % gid) as scope: with tf.device('/gpu:%d' % gid): mynet = models.mymodel( name="mynet", img1=img1splits[gid], img2=img2splits[gid], img_shape=img_shape[1:], latent_dim=latent_dim, disentangled_feat=disentangled_feat, mode=mode, loss_weight=loss_weight, feat_cls=feat_cls_splits[gid], feat_size=feat_size, cls_L=cls_L, cls_batch_per_gpu=cls_batch_per_gpu) mynets.append(mynet) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() else: header("Unknown model name") # (6) Train or test the model # Testing by adding noise on latent feature is not merged yet. Will be finished soon. if mode == 'train': mgpu_train_net(models=mynets, num_gpus=num_gpus, mode=mode, img_dir=img_dir, dataset=dataset, chkfile_name=chkfile_name, logfile_name=logfile_name, validatefile_name=validatefile_name, entangled_feat=entangled_feat, max_epoch=max_epoch, batch_size=batch_size, lr=lr) # train_net(model=mynets[0], mode = mode, img_dir = img_dir, dataset = dataset, chkfile_name = chkfile_name, logfile_name = logfile_name, validatefile_name = validatefile_name, entangled_feat = entangled_feat, max_epoch = max_epoch, batch_size = batch_size, lr = lr) elif mode == 'classifier_train': warn("Classifier Train") mgpu_classifier_train_net(models=mynets, num_gpus=num_gpus, cls_batch_per_gpu=cls_batch_per_gpu, cls_L=cls_L, mode=mode, img_dir=img_dir, dataset=dataset, chkfile_name=chkfile_name, logfile_name=logfile_name, validatefile_name=validatefile_name, entangled_feat=entangled_feat, max_epoch=max_epoch, batch_size=batch_size, lr=lr) elif mode == 'test': header("Need to be merged") else: header("Unknown mode name")
import os import time import sys import tensorflow as tf from itertools import count from misc_util import get_cur_dir, warn, mkdir_p import cv2 from utils.utils import label_to_gt_box2d, bbox_iou, random_distort_image, draw_bbox2d_on_image import numpy as np from config import cfg # from data_aug import image_augmentation cur_dir = get_cur_dir() dataset_dir = os.path.join(cur_dir, 'data/object') warn("dataset_dir: {}".format(dataset_dir)) dataset = 'training' split_file = 'trainset.txt' test_img_save_dir = 'test_img' test_img_save_dir = os.path.join(cur_dir, test_img_save_dir) mkdir_p(test_img_save_dir) def image_augmentation(f_rgb, f_label, width, height, jitter, hue, saturation, exposure): rgb_imgs = [] ious = [] org_imgs = [] label = np.array([line for line in open(f_label, 'r').readlines()]) gt_box2d = label_to_gt_box2d(np.array(label)[np.newaxis, :], cls=cfg.DETECT_OBJ, coordinate='lidar')[0] # (N', 4) x_min, y_min, x_max, y_max img = cv2.imread(f_rgb)
def cal_volume_loss(delta_a, delta_b, mask): loss = 0.0 sigma = 3.0 anchors = tf_cal_anchors() batch_boxes3d_a = tf_delta_to_boxes3d(delta_a, anchors) # prediction batch_boxes3d_b = tf_delta_to_boxes3d(delta_b, anchors) # ground truth batch_boxes3d_b_flipped = tf_delta_to_boxes3d(delta_b, anchors, True) # ground truth flipped mask = tf.reshape(mask, [-1, cfg.FEATURE_WIDTH*cfg.FEATURE_HEIGHT*2])#mask.reshape((batch_size, -1)) ind = tf.equal(mask[:, :], 1.0) batch_boxes3d_a = tf.reshape(batch_boxes3d_a, [-1, 7]) batch_boxes3d_b = tf.reshape(batch_boxes3d_b, [-1, 7]) batch_boxes3d_b_flipped = tf.reshape(batch_boxes3d_b_flipped, [-1, 7]) ind = tf.reshape(ind, [-1]) center_boxes3d_a = tf.boolean_mask(batch_boxes3d_a, ind) # N, 7 center_boxes3d_b = tf.boolean_mask(batch_boxes3d_b, ind) center_boxes3d_b_flipped = tf.boolean_mask(batch_boxes3d_b_flipped, ind) corner_boxes3d_a = tf_center_to_corner_box3d(center_boxes3d_a, coordinate='lidar') corner_boxes3d_b = tf_center_to_corner_box3d(center_boxes3d_b, coordinate='lidar') iou = tf_calculate_rotation_iou(corner_boxes3d_a, center_boxes3d_a, corner_boxes3d_b, center_boxes3d_b) corner_boxes3d_b_flipped = tf_center_to_corner_box3d(center_boxes3d_b_flipped, coordinate='lidar') warn("smooth loss: {}".format(np.shape(loss))) loss = tf.minimum(tf.reduce_sum(smooth_l1(corner_boxes3d_a, corner_boxes3d_b, sigma), [1,2]), \ tf.reduce_sum(smooth_l1(corner_boxes3d_a, corner_boxes3d_b_flipped, sigma), [1,2])) warn("loss : {}".format(np.shape(loss))) warn("iou : {}".format(np.shape(iou))) a = tf.pow(1.0-iou, 2.0) warn("pow iou: {}".format(np.shape(a))) loss = tf.reduce_sum(loss * tf.pow(1.0-iou, 2.0)) a = tf.reduce_sum(smooth_l1(corner_boxes3d_a, corner_boxes3d_b, sigma), [1,2]) warn("one loss: {}".format(np.shape(a))) warn("loss : {}".format(np.shape(loss))) # * loss should be modified. I think this has some error in calculating # * not reduce_sum and take minimum, but minimum first and do reduce_sum divider = tf.maximum(tf.shape(corner_boxes3d_a)[0], 1) # normalize by number of ground truth or prediction boxes # if we don't have this, then as we have more number of boxes, the loss will be larger. divider = tf.cast(divider, dtype=tf.float32) loss = tf.divide(loss, divider) return loss
def cal_rpn_target(labels, feature_map_shape, anchors, cls='Car', calib_mats=None, coordinate='lidar'): # Input: # labels: (N, N') # feature_map_shape: (w, l) # anchors: (w, l, 2, 7) # Output: # pos_equal_one (N, w, l, 2) # neg_equal_one (N, w, l, 2) # targets (N, w, l, 14) # attention: cal IoU on birdview batch_size = labels.shape[0] # for idx in range(batch_size): # warn("{} {}".format(idx, calib_mats[idx])) batch_gt_boxes3d = label_to_gt_box3d(labels, cls=cls, coordinate=coordinate, calib_mats=calib_mats) # defined in eq(1) in 2.2 anchors_reshaped = anchors.reshape(-1, 7) # anchors_d = np.sqrt(anchors_reshaped[:, 4]**2 + anchors_reshaped[:, 5]**2) pos_equal_one = np.zeros((batch_size, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2)) neg_equal_one = np.ones((batch_size, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2)) targets = np.zeros((batch_size, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 14)) anchor_origin = np.array([[0, 0, cfg.ANCHOR_W, cfg.ANCHOR_L],[0, 0, cfg.ANCHOR_W, cfg.ANCHOR_L]]) anchor_standup_2d_origin = anchor_to_standup_box2d(anchor_origin) anchor_rot = [0, 90 / 180 * np.pi] anchor_d = np.sqrt(cfg.ANCHOR_W**2 + cfg.ANCHOR_L**2) # warn("shape: {}".format(batch_gt_boxes3d[0][0, [0, 1, 4, 5, 6]])) for batch_id in range(batch_size): t0 = time.time() for t in range(len(batch_gt_boxes3d[batch_id])): gx, gy, gz, gh, gw, gl, gr = batch_gt_boxes3d[batch_id][t, [0, 1, 2, 3, 4, 5, 6]] if (gx > cfg.X_MAX) or (gx < cfg.X_MIN) or (gy > cfg.Y_MAX) or (gy < cfg.Y_MIN): warn("*** illegal data removed: {:.2f} {:.2f} ***".format(gx, gy)) continue gx_ratio = (gx - cfg.X_MIN) / (cfg.X_MAX - cfg.X_MIN) * (cfg.FEATURE_WIDTH-1) gy_ratio = (gy - cfg.Y_MIN) / (cfg.Y_MAX - cfg.Y_MIN) * (cfg.FEATURE_HEIGHT-1) gi = int(gx_ratio) gj = int(gy_ratio) gt_box_origin = np.array([[0, 0, gw, gl, gr]]) gt_standup_2d_origin = corner_to_standup_box2d(center_to_corner_box2d(gt_box_origin, coordinate=coordinate, calib_mat=calib_mats[batch_id])) # warn("anchor: {}".format(gt_standup_2d_origin)) best_iou = 0 best_anchor = 0 for anchor in range(len(anchor_origin)): iou = bbox_iou(anchor_standup_2d_origin[anchor], gt_standup_2d_origin[0], x1y1x2y2 = True) if iou > best_iou: best_iou = iou best_anchor = anchor # warn("{} : iou : {}".format(anchor, iou)) # warn("shape iou: {}".format(np.shape(iou))) # best_anchor_id = np.argmax(iou.T, axis=1) index_x = gi index_y = gj index_z = best_anchor # warn("{} : gx {:.2f} gy {:.2f} gx_ratio {:.2f} gy_ratio {:.2f} gw {:.2f} gl {:.2f} [ gi {} gj {} anchor {} ] iou {:.2f}".format(t, gx, gy, gx_ratio, gy_ratio, gw, gl, gi, gj, best_anchor, best_iou)) pos_equal_one[batch_id, index_y, index_x, best_anchor] = 1 neg_equal_one[batch_id, index_y, index_x, best_anchor] = 0 targets[batch_id, index_y, index_x, np.array(index_z) * 7] = gx_ratio - gi targets[batch_id, index_y, index_x, np.array(index_z) * 7 + 1] = gy_ratio - gj targets[batch_id, index_y, index_x, np.array(index_z) * 7 + 2] = (gz - cfg.ANCHOR_Z) / cfg.ANCHOR_H targets[batch_id, index_y, index_x, np.array(index_z) * 7 + 3] = np.log(gh / cfg.ANCHOR_H) targets[batch_id, index_y, index_x, np.array(index_z) * 7 + 4] = np.log(gw / cfg.ANCHOR_W) targets[batch_id, index_y, index_x, np.array(index_z) * 7 + 5] = np.log(gl / cfg.ANCHOR_L) targets[batch_id, index_y, index_x, np.array(index_z) * 7 + 6] = (gr - anchor_rot[best_anchor]) # t1 = time.time() # # warn("time for rpn : {}".format(t1-t0)) # # warn("feature map :{} ".format(np.shape(targets))) # # BOTTLENECK # anchors_standup_2d = anchor_to_standup_box2d( # anchors_reshaped[:, [0, 1, 4, 5]]) # # warn("anchor gt: {}".format(anchors_standup_2d[0:4])) # # BOTTLENECK # gt_standup_2d = corner_to_standup_box2d(center_to_corner_box2d( # batch_gt_boxes3d[batch_id][:, [0, 1, 4, 5, 6]], coordinate=coordinate)) # iou = bbox_overlaps( # np.ascontiguousarray(anchors_standup_2d).astype(np.float32), # np.ascontiguousarray(gt_standup_2d).astype(np.float32), # ) # # find anchor with highest iou(iou should also > 0) # id_highest = np.argmax(iou.T, axis=1) # id_highest_gt = np.arange(iou.T.shape[0]) # mask = iou.T[id_highest_gt, id_highest] > 0 # id_highest, id_highest_gt = id_highest[mask], id_highest_gt[mask] # # find anchor iou > cfg.XXX_POS_IOU # id_pos, id_pos_gt = np.where(iou > cfg.RPN_POS_IOU) # # find anchor iou < cfg.XXX_NEG_IOU # id_neg = np.where(np.sum(iou < cfg.RPN_NEG_IOU, # axis=1) == iou.shape[1])[0] # id_pos = np.concatenate([id_pos, id_highest]) # id_pos_gt = np.concatenate([id_pos_gt, id_highest_gt]) # # TODO: uniquify the array in a more scientific way # id_pos, index = np.unique(id_pos, return_index=True) # id_pos_gt = id_pos_gt[index] # id_neg.sort() # # cal the target and set the equal one # index_x, index_y, index_z = np.unravel_index( # id_pos, (*feature_map_shape, 2)) # pos_equal_one[batch_id, index_x, index_y, index_z] = 1 # for k in range(len(index_x)): # warn("x {} y {} z {}".format(index_x[k], index_y[k], index_z[k])) # # warn("x: {}".format(index_x)) # # warn("y: {}".format(index_y)) # # ATTENTION: index_z should be np.array # targets[batch_id, index_x, index_y, np.array(index_z) * 7] = ( # batch_gt_boxes3d[batch_id][id_pos_gt, 0] - anchors_reshaped[id_pos, 0]) / anchors_d[id_pos] # targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 1] = ( # batch_gt_boxes3d[batch_id][id_pos_gt, 1] - anchors_reshaped[id_pos, 1]) / anchors_d[id_pos] # targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 2] = ( # batch_gt_boxes3d[batch_id][id_pos_gt, 2] - anchors_reshaped[id_pos, 2]) / cfg.ANCHOR_H # targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 3] = np.log( # batch_gt_boxes3d[batch_id][id_pos_gt, 3] / anchors_reshaped[id_pos, 3]) # targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 4] = np.log( # batch_gt_boxes3d[batch_id][id_pos_gt, 4] / anchors_reshaped[id_pos, 4]) # targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 5] = np.log( # batch_gt_boxes3d[batch_id][id_pos_gt, 5] / anchors_reshaped[id_pos, 5]) # targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 6] = ( # batch_gt_boxes3d[batch_id][id_pos_gt, 6] - anchors_reshaped[id_pos, 6]) # index_x, index_y, index_z = np.unravel_index( # id_neg, (*feature_map_shape, 2)) # neg_equal_one[batch_id, index_x, index_y, index_z] = 1 # # to avoid a box be pos/neg in the same time # index_x, index_y, index_z = np.unravel_index( # id_highest, (*feature_map_shape, 2)) # neg_equal_one[batch_id, index_x, index_y, index_z] = 0 return pos_equal_one, neg_equal_one, targets