Esempio n. 1
0
def main():

	object_dir = os.path.join(dataset_dir, dataset)
	f_rgb, f_label, f_line = [], [], []
	for line in open(split_file, 'r').readlines():
		line = line[:-1] # remove '\n'
		f_rgb.append(os.path.join(object_dir, 'image_2', line+'.png'))
		f_label.append(os.path.join(object_dir, 'label_2', line+'.txt'))
		f_line.append(line)

	width, height = 64, 64
	max_error = 0.1

	jitter = 0.1
	hue = 0.1#0.1
	saturation = 1.5#1.5 
	exposure = 1.5#1.5


	for load_index in range(1,5):#range(len(f_rgb)):
		warn("{} / {}".format(load_index, len(f_rgb)))
		img = cv2.imread(f_rgb[load_index])
		save_dir = os.path.join(dataset_dir, dataset)
		org_imgs, cropped_img, ious = image_augmentation(f_rgb[load_index], f_label[load_index], width, height, jitter, hue, saturation, exposure)
		save_file = os.path.join(test_img_save_dir, '{}.png'.format(f_line[load_index]))

		label = np.array([line for line in open(f_label[load_index], 'r').readlines()])
		gt_box2d = label_to_gt_box2d(np.array(label)[np.newaxis, :], cls=cfg.DETECT_OBJ, coordinate='lidar')[0]  # (N', 4) x_min, y_min, x_max, y_max

		img = draw_bbox2d_on_image(img, gt_box2d)

		cv2.imwrite(save_file, img)
		for index in range(len(cropped_img)):
			save_file = os.path.join(test_img_save_dir, '{}_{}.png'.format(f_line[load_index], index))
			cv2.imwrite(save_file, cropped_img[index])
Esempio n. 2
0
def main():
    ratio = 0.85  # 0.7 for training, 0.3 for validation
    cur_dir = get_cur_dir()
    dataset_dir = os.path.join(cur_dir, 'data/object/training')
    # img_dir = os.path.join(dataset_dir, 'image_2')
    files = glob.glob(os.path.join(dataset_dir, 'image_2', '*.png'))
    files.sort()
    files = [file.split('/')[-1].split('.')[-2] for file in files]
    np.random.shuffle(files)
    warn("file: {}".format(files))
    warn("total : {}".format(len(files)))
    num_train = int(ratio * len(files))

    train_set = files[:num_train]
    valid_set = files[num_train:]

    warn("train: {}".format(len(train_set)))
    warn("valid: {}".format(len(valid_set)))

    nt = len(train_set)
    nv = len(valid_set)

    with open('trainset.txt', 'w+') as f:
        for idx in range(nt):
            f.write(train_set[idx] + '\n')
    f.close()
    with open('validset.txt', 'w+') as f:
        for idx in range(nv):
            f.write(valid_set[idx] + '\n')
    f.close()

    warn("total: {}".format(nt + nv))
Esempio n. 3
0
 def classifier_net(self, z1, z2, feat_size, latent_dim, cls_L,
                    cls_batch_per_gpu):
     with tf.variable_scope("classifier") as scope:
         z1 = tf.reshape(z1, (cls_batch_per_gpu, -1, latent_dim))
         z2 = tf.reshape(z2, (cls_batch_per_gpu, -1, latent_dim))
         warn("z1: {}".format(np.shape(z1)))
         z_diff = U.sum(z1 - z2, axis=1) / cls_L
         warn("z_diff: {}".format(np.shape(z_diff)))
         x = U.dense(z_diff, feat_size, 'cls_fc1', U.normc_initializer(1.0))
     return x
Esempio n. 4
0
def load_checkpoints(load_requested = True, checkpoint_dir = get_cur_dir()):
    saver = tf.train.Saver(max_to_keep = None)
    checkpoint = tf.train.get_checkpoint_state(checkpoint_dir)
    chkpoint_num = 0
    if checkpoint and checkpoint.model_checkpoint_path and load_requested == True:
        saver.restore(get_session(), checkpoint.model_checkpoint_path)
        chk_file = checkpoint.model_checkpoint_path.split('/')
        chk_file = chk_file[-1]
        chk_file = chk_file.split('-')
        chkpoint_num = int(chk_file[-1])
        warn("loaded checkpoint: {0}".format(checkpoint.model_checkpoint_path))
    else:
        warn("Could not find old checkpoint")
        if not os.path.exists(checkpoint_dir):
            mkdir_p(checkpoint_dir)
    return saver, chkpoint_num
Esempio n. 5
0
    def fill_queue(self):
        # warn("fill_queue here")
        load_index = self.load_index
        self.load_index += 1
        if self.load_index >= self.num_file:
            if not self.is_testset:  # test set just end
                if self.require_shuffle:
                    self.shuffle_dataset()
                load_index = 0
                self.load_index = load_index + 1
            else:
                self.work_exit.value = True

        labels, tag, rgb = [], [], []

        width, height = 64, 64
        max_error = 0.1

        jitter = 0.1
        hue = 0.1
        saturation = 1.5
        exposure = 1.5

        # img = cv2.imread(f_rgb[load_index])
        # save_dir = os.path.join(dataset_dir, dataset)
        # warn("before img aug")
        cropped_imgs, confs = image_augmentation(self.f_rgb[load_index],
                                                 self.f_label[load_index],
                                                 width, height, jitter, hue,
                                                 saturation, exposure,
                                                 self.ratioPosNeg)
        # warn("num img: {} num confs: {}".format(len(cropped_imgs), len(confs)))

        try:
            for idx in range(len(cropped_imgs)):
                self.dataset_queue.put_nowait((cropped_imgs[idx], confs[idx]))
            # warn("inserted: {}".format(len(cropped_imgs)))
            load_index += 1
        except:
            warn("fail")
            if not self.is_testset:  # test set just end
                self.load_index = 0
                if self.require_shuffle:
                    self.shuffle_dataset()
            else:
                self.work_exit.value = True
Esempio n. 6
0
def tf_points_in_boxes(boxes, points):
    # boxes: N,4,2
    # points: N,NP,2
    # return: N,NP boolean or 0,1 to indicate whether the each point exists inside boxes or not.

    boxes_x = boxes[:,:,0] # N,4
    boxes_y = boxes[:,:,1] # N,4

    min_x = tf.reduce_min(boxes_x, axis=1)    # N,
    max_x = tf.reduce_max(boxes_x, axis=1)
    min_y = tf.reduce_min(boxes_y, axis=1)
    max_y = tf.reduce_max(boxes_y, axis=1)   


    points_x = points[:,:,0] # N,NP
    points_y = points[:,:,1]

    points_inside = tf.zeros_like(points_x) # N,NP



    min_x = tf.expand_dims(min_x, -1)
    max_x = tf.expand_dims(max_x, -1)
    min_y = tf.expand_dims(min_y, -1)
    max_y = tf.expand_dims(max_y, -1)   

    minx = tf.greater_equal(points_x, min_x) # N,NP
    maxx = tf.less_equal(points_x, max_x)
    miny = tf.greater_equal(points_y, min_y)
    maxy = tf.less_equal(points_y, max_y)

    x_cond = tf.logical_and(minx, maxx)
    y_cond = tf.logical_and(miny, maxy)
    points_in_boxes = tf.logical_and(x_cond, y_cond)

    # inside = tf.ones_like(points_x)
    # outside = tf.zeros_like(points_x)

    # points_in_boxes = tf.where(rec_cond, inside, outside)
    warn("points_in_boxes: {}".format(np.shape(points_in_boxes)))

    return points_in_boxes
Esempio n. 7
0
def image_augmentation(f_rgb, f_label, width, height, jitter, hue, saturation, exposure):
	rgb_imgs = []
	ious = []
	org_imgs = []
	label = np.array([line for line in open(f_label, 'r').readlines()])
	gt_box2d = label_to_gt_box2d(np.array(label)[np.newaxis, :], cls=cfg.DETECT_OBJ, coordinate='lidar')[0]  # (N', 4) x_min, y_min, x_max, y_max

	img = cv2.imread(f_rgb)
	warn("img value: {}".format(img[:3,:3,:3]))

	# warn("{} shape: {}".format(f_rgb, img.shape))
	img_height, img_width = img.shape[:2]
	# warn("height: {}, width: {}".format(img_height, img_width))

	for idx in range(len(gt_box2d)):
		box = gt_box2d[idx]
		# warn("box {}: {}".format(idx, box))
		x_min, y_min, x_max, y_max = box
		x_min = int(x_min)
		y_min = int(y_min)
		x_max = int(x_max)
		y_max = int(y_max)

		ori_img = cv2.resize(cv2.imread(f_rgb)[y_min:y_max, x_min:x_max], (64, 64))
		org_imgs.append(ori_img)

		box_height = y_max - y_min
		box_width = x_max - x_min

		dx = int(jitter * box_width) + 1
		dy = int(jitter * box_height) + 1

		# warn("dx : {} dy : {}".format(dx, dy))

		lx = np.random.randint(-dx, dx)
		ly = np.random.randint(-dy, dy)

		lw = np.random.randint(-dx, dx)
		lh = np.random.randint(-dy, dy)

		x = (x_max + x_min)/2.0 + lx
		y = (y_max + y_min)/2.0 + ly
		box_height = box_height + lh
		box_width = box_width + lw

		x_min = int(max(0, x - box_width/2.0))
		x_max = int(min(img_width, x + box_width/2.0))
		y_min = int(max(0, y - box_height/2.0))
		y_max = int(min(img_height, y + box_height/2.0))


		flip = np.random.randint(1,10000)%2  

		img = cv2.resize(cv2.imread(f_rgb)[y_min:y_max,x_min:x_max], (width, height))

		if flip:
		    img = cv2.flip(img, 1)
		img = random_distort_image(img, hue, saturation, exposure)
		# for ground truth img, calculate iou with its original location, size

		iou = bbox_iou(box, (x_min, y_min, x_max, y_max), x1y1x2y2=True)


		rgb_imgs.append(img)
		ious.append(iou)



	# Randomly e[nerate same number of background candidate that will have low iou or zero iou.
	# after generating new boxes, it needs to calculate iou to each of gt_boxes2d 
	# which will be used as inference.
	# if inferenced iou is low, then the bounding boxes are empty or background or falsely located.
	# if inferenced iou is high, then the bounding boxes are correctly inferenced by 3D bounding boxes.
	# this is the st]rategry I am taking for simple, mini 2D classifier.

	for idx in range(len(gt_box2d)*4):
		x = np.random.randint(0, img_width)
		y = np.random.randint(0, img_height)
		h = np.random.randint(40, 200)
		w = np.random.randint(40, 200)
		x_min = int(max(0, x - w/2.0))
		x_max = int(min(img_width, x + w/2.0))
		y_min = int(max(0, y - h/2.0))
		y_max = int(min(img_height, y + h/2.0))

		max_iou = 0

		for gt_idx in range(len(gt_box2d)):
			box = gt_box2d[gt_idx]
			iou = bbox_iou(box, (x_min, y_min, x_max, y_max), x1y1x2y2=True)
			if iou > max_iou:
				max_iou = iou

		img = cv2.resize(cv2.imread(f_rgb)[y_min:y_max,x_min:x_max], (width, height))
		if flip:
			img = cv2.flip(img, 1)
		img = random_distort_image(img, hue, saturation, exposure)
		rgb_imgs.append(img)
		ious.append(iou)


	return org_imgs, rgb_imgs, ious
Esempio n. 8
0
def train_net(model, img_dir, max_iter = 100000, check_every_n = 20, save_model_freq = 1000, batch_size = 128):
	img1 = U.get_placeholder_cached(name="img1")
	img2 = U.get_placeholder_cached(name="img2")

	mean_loss1 = U.mean(model.match_error)
	mean_loss2 = U.mean(model.reconst_error1)
	mean_loss3 = U.mean(model.reconst_error2)

	decoded_img = [model.reconst1, model.reconst2]

	weight_loss = [1, 1, 1]

	compute_losses = U.function([img1, img2], [mean_loss1, mean_loss2, mean_loss3])
	lr = 0.00001
	optimizer=tf.train.AdamOptimizer(learning_rate=lr, epsilon = 0.01/batch_size)

	all_var_list = model.get_trainable_variables()

	img1_var_list = [v for v in all_var_list if v.name.split("/")[1].startswith("proj1") or v.name.split("/")[1].startswith("unproj1")]
	img2_var_list = [v for v in all_var_list if v.name.split("/")[1].startswith("proj2") or v.name.split("/")[1].startswith("unproj2")]


	img1_loss = mean_loss1 + mean_loss2
	img2_loss = mean_loss1 + mean_loss3

	optimize_expr1 = optimizer.minimize(img1_loss, var_list=img1_var_list)
	optimize_expr2 = optimizer.minimize(img2_loss, var_list=img2_var_list)

	img1_train = U.function([img1, img2], [mean_loss1, mean_loss2, mean_loss3], updates = [optimize_expr1])
	img2_train = U.function([img1, img2], [mean_loss1, mean_loss2, mean_loss3], updates = [optimize_expr2])

	get_reconst_img = U.function([img1, img2], decoded_img)

	U.initialize()

	name = "test"
	cur_dir = get_cur_dir()
	chk_save_dir = os.path.join(cur_dir, "chkfiles")
	log_save_dir = os.path.join(cur_dir, "log")
	test_img_saver_dir = os.path.join(cur_dir, "test_images")

	saver, chk_file_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir)
	test_img_saver = Img_Saver(test_img_saver_dir)

	meta_saved = False

	iter_log = []
	loss1_log = []
	loss2_log = []
	loss3_log = []

	training_images_list = read_dataset(img_dir)

	for num_iter in range(chk_file_num+1, max_iter):
		header("******* {}th iter: Img {} side *******".format(num_iter, num_iter%2 + 1))

		idx = random.sample(range(len(training_images_list)), batch_size)
		batch_files = [training_images_list[i] for i in idx]
		[images1, images2] = load_image(dir_name = img_dir, img_names = batch_files)
		img1, img2 = images1, images2
		# args = images1, images2
		if num_iter%2 == 0:
			[loss1, loss2, loss3] = img1_train(img1, img2)
		elif num_iter%2 == 1:
			[loss1, loss2, loss3] = img2_train(img1, img2)		
		warn("match_error: {}".format(loss1))
		warn("reconst_err1: {}".format(loss2))
		warn("reconst_err2: {}".format(loss3))
		warn("num_iter: {} check: {}".format(num_iter, check_every_n))
		if num_iter % check_every_n == 1:
			idx = random.sample(range(len(training_images_list)), 10)
			test_batch_files = [training_images_list[i] for i in idx]
			[images1, images2] = load_image(dir_name = img_dir, img_names = test_batch_files)
			[reconst1, reconst2] = get_reconst_img(images1, images2)
			for img_idx in range(len(images1)):
				sub_dir = "iter_{}".format(num_iter)

				save_img = np.squeeze(images1[img_idx])
				save_img = Image.fromarray(save_img)
				img_file_name = "{}_ori_2d.jpg".format(test_batch_files[img_idx])				
				test_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

				save_img = np.squeeze(images2[img_idx])
				save_img = Image.fromarray(save_img)
				img_file_name = "{}_ori_3d.jpg".format(test_batch_files[img_idx])				
				test_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

				save_img = np.squeeze(reconst1[img_idx])
				save_img = Image.fromarray(save_img)
				img_file_name = "{}_rec_2d.jpg".format(test_batch_files[img_idx])				
				test_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

				save_img = np.squeeze(reconst2[img_idx])
				save_img = Image.fromarray(save_img)
				img_file_name = "{}_rec_3d.jpg".format(test_batch_files[img_idx])				
				test_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

		if num_iter > 11 and num_iter % save_model_freq == 1:
			if meta_saved == True:
				saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = num_iter, write_meta_graph = False)
			else:
				print "Save  meta graph"
				saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = num_iter, write_meta_graph = True)
				meta_saved = True
Esempio n. 9
0
def tf_calculate_rotation_iou(boxes_corner_a, boxes_center_a, boxes_corner_b, boxes_center_b):
    # boxes_corner_a : predicted corner boxes => N,8,3
    # boxes_corner_b : ground truth boxes     => N,8,3
    # boxes_center_a : predicted center boxes => N,7
    # boxes_center_b : ground truth boxes => N,7

    # (1) get max boundaries

    # input: boxes_corner_a[N, 8, 3], boxes_corner_b[N, 8, 2] only x and y
    # output: max_boundaries: 2 * (N,4,2) => 2 boxes, 4 points, x and y 

    boxes_standup_a, boxes_standup_b = tf_corner_to_standup(boxes_corner_a, boxes_corner_b)


    # (2) distribute points onto maximum boundaries
    # number of points to be distributed: 20 x 20 => 400
    # output: num_of_points: (NP, 2): NP: number of points, 2: x, y

    grid_points = tf_random_gen_points()
    grid_points = tf.expand_dims(grid_points, 0)

    N = tf.shape(boxes_standup_a)[0]
    warn("grid_points: {}".format(np.shape(grid_points)))

    grid_points = tf.tile(grid_points, [N,1,1]) # N,400,2
    warn("grid_points: {}".format(np.shape(grid_points)))

    # (3) first rotation

    # input: num_of_points:(N,NP,2), boxes_corner_a:(N,4,2), and boxes_corner_b:(N,4,2), rotation: N

    angle_a = boxes_center_a[:,6] # (N,)
    rot_a = np.pi/2 - angle_a # (N,)
    rot_boxes_standup_a = tf_rot_points(boxes_standup_a, rot_a)
    rot_points_grid = tf_rot_points(grid_points, rot_a)
    # (3-1) select points
    points_in_boxes_a = tf_points_in_boxes(rot_boxes_standup_a, rot_points_grid) # N,NP
    warn("boxes_center_a: {}".format(np.shape(boxes_center_a)))
    warn("angle_a: {}".format(np.shape(angle_a)))

    warn("rot_a: {}".format(np.shape(rot_a)))

    # (4) second rotation
    angle_b = boxes_center_b[:,6] # (N,)
    rot_b = np.pi/2 - angle_b # (N,)
    rot_boxes_standup_b = tf_rot_points(boxes_standup_b, rot_b)
    rot_points_grid = tf_rot_points(grid_points, rot_b)
    # (4-1) select points
    points_in_boxes_b = tf_points_in_boxes(rot_boxes_standup_b, rot_points_grid) # N,NP
    warn("boxes_center_b: {}".format(np.shape(boxes_center_b)))
    warn("angle_b: {}".format(np.shape(angle_b)))

    warn("rot_b: {}".format(np.shape(rot_b)))    # (4-1) select points

    points_in_intersection = tf.logical_and(points_in_boxes_a, points_in_boxes_b)

    warn("points_in_intersection: {}".format(np.shape(points_in_intersection)))

    # (5) calculate ratio between number of points in union and number of points in intersection


    inside = tf.ones_like(points_in_boxes_a, tf.float32)
    outside = tf.zeros_like(points_in_boxes_a, tf.float32)
    num_points_in_boxes_a = tf.where(points_in_boxes_a, inside, outside)
    warn("num_points_in_boxes_a 1: {}".format(np.shape(num_points_in_boxes_a)))
    num_points_in_boxes_a = tf.reduce_sum(num_points_in_boxes_a, axis=1) # N,
    warn("num_points_in_boxes_a 2: {}".format(np.shape(num_points_in_boxes_a)))
    num_points_in_boxes_b = tf.where(points_in_boxes_b, inside, outside)
    num_points_in_boxes_b = tf.reduce_sum(num_points_in_boxes_b, axis=1)
    num_points_in_intersection = tf.where(points_in_intersection, inside, outside)
    warn("num_points_in_intersection 1: {}".format(np.shape(num_points_in_intersection))) # N,400
    num_points_in_intersection = tf.reduce_sum(num_points_in_intersection, axis=1) # N,

    num_points_in_union = num_points_in_boxes_a + num_points_in_boxes_b - num_points_in_intersection

    iou = tf.divide(num_points_in_intersection, num_points_in_union) 
    warn("iou: {}".format(np.shape(iou)))

    return iou
Esempio n. 10
0
    def __init__(self, input, alpha=1.5, beta=1, sigma=3, training=True, name=''):
        # scale = [batchsize, 10, 400/200, 352/240, 128] should be the output of feature learning network
        self.input = input  
        self.training = training
        # groundtruth(target) - each anchor box, represent as △x, △y, △z, △l, △w, △h, rotation
        self.targets = tf.placeholder(tf.float32, [None, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 14]) 
        # => wip: add confidence(iou) here for yolo style
        # => pos_equal_one is actually conf_mask in yolo code
        # self.conf_target = tf.placeholder(tf.float32, [None, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2]) 
        # postive anchors equal to one and others equal to zero(2 anchors in 1 position)
        self.pos_equal_one = tf.placeholder(tf.float32, [None, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2])
        self.pos_equal_one_sum = tf.placeholder(tf.float32, [None, 1, 1, 1])
        self.pos_equal_one_for_reg = tf.placeholder(tf.float32, [None, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 14])
        # negative anchors equal to one and others equal to zero
        self.neg_equal_one = tf.placeholder(tf.float32, [None, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2])
        self.neg_equal_one_sum = tf.placeholder(tf.float32, [None, 1, 1, 1])

        with tf.variable_scope('MiddleAndRPN_' + name):
            #convolutinal middle layers
            temp_conv = ConvMD(3, 128, 64, 3, (2, 1, 1), (1, 1, 1), self.input, name='conv1')
            temp_conv = ConvMD(3, 64, 64, 3, (1, 1, 1), (0, 1, 1), temp_conv, name='conv2')
            temp_conv = ConvMD(3, 64, 64, 3, (2, 1, 1), (1, 1, 1), temp_conv, name='conv3')
            temp_conv = tf.transpose(temp_conv, perm = [0, 2, 3, 4, 1])
            temp_conv = tf.reshape(temp_conv, [-1, cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 128])
            # => batch, 400, 352, 128

            #rpn
            #block1:
            temp_conv = ConvMD(2, 128, 128, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv4')
            # => batch, 400, 352, 128
            temp_conv = tf.layers.max_pooling2d(temp_conv, pool_size = 2, strides = 2, name = 'maxpool1')
            # => batch, 200, 176, 128
            temp_conv = ConvMD(2, 128, 256, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv5')
            # => batch, 200, 176, 256
            temp_conv = ConvMD(2, 256, 128, 1, (1, 1), (0, 0), temp_conv, training=self.training, name='conv6')
            # => batch, 200, 176, 128
            temp_conv = ConvMD(2, 128, 256, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv7')
            # => batch, 200, 176, 256
            temp_conv = tf.layers.max_pooling2d(temp_conv, pool_size = 2, strides = 2, name = 'maxpool2')
            # => batch, 100, 88, 256
            temp_conv = ConvMD(2, 256, 512, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv8')
            # => batch, 100, 88, 512
            temp_conv = ConvMD(2, 512, 128, 1, (1, 1), (0, 0), temp_conv, training=self.training, name='conv9')
            # => batch, 100, 88, 128
            route_1 = ConvMD(2, 128, 256, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv10')
            # => batch, 100, 88, 256

            temp_conv = ConvMD(2, 256, 128, 1, (1, 1), (0, 0), route_1, training=self.training, name='conv11')
            # => batch, 100, 88, 128
            temp_conv = ConvMD(2, 128, 256, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv12')
            # => batch, 100, 88, 256
            temp_conv = tf.layers.max_pooling2d(temp_conv, pool_size = 2, strides = 2, name = 'maxpool3')
            # => batch, 50, 44, 256
            temp_conv = ConvMD(2, 256, 512, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv13')
            # => batch, 50, 44, 512
            temp_conv = ConvMD(2, 512, 256, 1, (1, 1), (0, 0), temp_conv, training=self.training, name='conv14')
            # => batch, 50, 44, 256
            route_2 = ConvMD(2, 256, 512, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv15')
            # => batch, 50, 44, 512

            temp_conv = ConvMD(2, 256, 64, 3, (1, 1), (1, 1), route_1, training=self.training, name='conv16')
            # warn("shape: {}".format(np.shape(temp_conv)))
            # => batch, 100, 88, 64
            temp_conv = Reorg(2, temp_conv, name = 'reorg1')
            # => batch, 50, 44, 256
            temp_conv = tf.concat([temp_conv, route_2], axis = -1, name = 'concat1')
            # => batch, 50, 44, 768
            temp_conv = ConvMD(2, 768, 128, 3, (1, 1), (1, 1), temp_conv, training=self.training, name='conv17')
            # => batch, 50, 44, 128
            p_map = ConvMD(2, 128, 2, 1, (1, 1), (0, 0), temp_conv, training=self.training, name='conv18')
            r_map = ConvMD(2, 128, 14, 1, (1, 1), (0, 0), temp_conv, training=self.training, activation = False, name='conv19')
            warn("rmap shape:{}".format(np.shape(r_map)))

            self.p_pos = tf.sigmoid(p_map)
            self.output_shape = [cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH]

            x_pos_0 = tf.expand_dims(tf.sigmoid(r_map[..., 0]), -1)
            y_pos_0 = tf.expand_dims(tf.sigmoid(r_map[..., 1]), -1)
            x_pos_1 = tf.expand_dims(tf.sigmoid(r_map[..., 7]), -1)
            y_pos_1 = tf.expand_dims(tf.sigmoid(r_map[..., 8]), -1)

            r_map = tf.concat([x_pos_0, y_pos_0, r_map[:,:,:,2:7], x_pos_1, y_pos_1, r_map[:,:,:,9:14]], axis=-1)         

            warn("rmap shape:{}".format(np.shape(r_map)))
        
            # TODO: sometime still get inf cls loss
            # wip: change to yolo style

            object_scale = 1.0
            non_object_scale = 1.0


            # self.cls_loss = object_scale * (self.pos_equal_one * tf.square(self.p_pos - self.conf_target)) / self.pos_equal_one_sum\
            #                 + non_object_scale * self.neg_equal_one * tf.square(self.p_pos - self.conf_target) / self.neg_equal_one_sum
            # self.cls_loss = tf.reduce_sum(self.cls_loss)

            self.cls_loss = alpha * (-self.pos_equal_one * tf.log(self.p_pos + small_addon_for_BCE)) / self.pos_equal_one_sum \
             + beta * (-self.neg_equal_one * tf.pow(self.p_pos, 2.0) * tf.log(1 - self.p_pos + small_addon_for_BCE)) / self.neg_equal_one_sum
            self.cls_loss = tf.reduce_sum(self.cls_loss)

            # alpha_tf = 0.25
            # gamma = 2
            # pred_pt = tf.where(tf.equal(self.pos_equal_one, 1.0), self.p_pos, 1.0 - self.p_pos)
            # alpha_t = tf.scalar_mul(alpha_tf, tf.ones_like(self.pos_equal_one, dtype=tf.float32))
            # alpha_t = tf.where(tf.equal(self.pos_equal_one, 1.0), alpha_t, 1.0 - alpha_t)

            # self.focal_loss = tf.reduce_sum(-alpha_t * tf.pow(1.0 - pred_pt, gamma) * tf.log(pred_pt + small_addon_for_BCE))

            self.reg_loss = smooth_l1(r_map * self.pos_equal_one_for_reg, self.targets * self.pos_equal_one_for_reg, sigma) / self.pos_equal_one_sum
            self.reg_loss = tf.reduce_sum(self.reg_loss)
            self.corner_loss = tf.cond(tf.equal(tf.shape(self.targets * self.pos_equal_one_for_reg)[0], 0), lambda: return_zero(), \
                lambda: cal_volume_loss(r_map * self.pos_equal_one_for_reg, self.targets * self.pos_equal_one_for_reg, self.pos_equal_one))
            # self.corner_loss = tf.reduce_sum(self.corner_loss)
            self.loss = tf.reduce_sum(1.0 * self.cls_loss) + tf.reduce_sum(10.0*self.corner_loss)

            self.delta_output = r_map 
            self.prob_output = self.p_pos
Esempio n. 11
0
def mgpu_train_net(models, num_gpus, mode, img_dir, dataset, chkfile_name, logfile_name, validatefile_name, entangled_feat, max_epoch = 300, check_every_n = 500, loss_check_n = 10, save_model_freq = 5, batch_size = 512, lr = 0.001):
    img1 = U.get_placeholder_cached(name="img1")
    img2 = U.get_placeholder_cached(name="img2")

    feat_cls = U.get_placeholder_cached(name="feat_cls")

    # batch size must be multiples of ntowers (# of GPUs)
    ntowers = len(models)
    tf.assert_equal(tf.shape(img1)[0], tf.shape(img2)[0])
    tf.assert_equal(tf.floormod(tf.shape(img1)[0], ntowers), 0)

    img1splits = tf.split(img1, ntowers, 0)
    img2splits = tf.split(img2, ntowers, 0)

    tower_vae_loss = []
    tower_latent_z1_tp = []
    tower_latent_z2_tp = []
    tower_losses = []
    tower_siam_max = []
    tower_reconst1 = []
    tower_reconst2 = []
    tower_cls_loss = []
    for gid, model in enumerate(models):
        with tf.name_scope('gpu%d' % gid) as scope:
            with tf.device('/gpu:%d' % gid):

                vae_loss = U.mean(model.vaeloss)
                latent_z1_tp = model.latent_z1
                latent_z2_tp = model.latent_z2
                losses = [U.mean(model.vaeloss),
                          U.mean(model.siam_loss),
                          U.mean(model.kl_loss1),
                          U.mean(model.kl_loss2),
                          U.mean(model.reconst_error1),
                          U.mean(model.reconst_error2),
                          ]
                siam_max = U.mean(model.max_siam_loss)
                cls_loss = U.mean(model.cls_loss)

                tower_vae_loss.append(vae_loss)
                tower_latent_z1_tp.append(latent_z1_tp)
                tower_latent_z2_tp.append(latent_z2_tp)
                tower_losses.append(losses)
                tower_siam_max.append(siam_max)
                tower_reconst1.append(model.reconst1)
                tower_reconst2.append(model.reconst2)
                tower_cls_loss.append(cls_loss)

                tf.summary.scalar('Total Loss', losses[0])
                tf.summary.scalar('Siam Loss', losses[1])
                tf.summary.scalar('kl1_loss', losses[2])
                tf.summary.scalar('kl2_loss', losses[3])
                tf.summary.scalar('reconst_err1', losses[4])
                tf.summary.scalar('reconst_err2', losses[5])
                tf.summary.scalar('Siam Max', siam_max)

    vae_loss = U.mean(tower_vae_loss)
    siam_max = U.mean(tower_siam_max)
    latent_z1_tp = tf.concat(tower_latent_z1_tp, 0)
    latent_z2_tp = tf.concat(tower_latent_z2_tp, 0)
    model_reconst1 = tf.concat(tower_reconst1, 0)
    model_reconst2 = tf.concat(tower_reconst2, 0)
    cls_loss = U.mean(tower_cls_loss)

    losses = [[] for _ in range(len(losses))]
    for tl in tower_losses:
        for i, l in enumerate(tl):
            losses[i].append(l)

    losses = [U.mean(l) for l in losses]
    siam_normal = losses[1] / entangled_feat

    tf.summary.scalar('total/Total Loss', losses[0])
    tf.summary.scalar('total/Siam Loss', losses[1])
    tf.summary.scalar('total/kl1_loss', losses[2])
    tf.summary.scalar('total/kl2_loss', losses[3])
    tf.summary.scalar('total/reconst_err1', losses[4])
    tf.summary.scalar('total/reconst_err2', losses[5])
    tf.summary.scalar('total/Siam Normal', siam_normal)
    tf.summary.scalar('total/Siam Max', siam_max)

    compute_losses = U.function([img1, img2], vae_loss)

    all_var_list = model.get_trainable_variables()
    vae_var_list = [v for v in all_var_list if v.name.split("/")[2].startswith("vae")]
    cls_var_list = [v for v in all_var_list if v.name.split("/")[2].startswith("cls")]

    warn("{}".format(all_var_list))
    warn("==========================")
    warn("{}".format(vae_var_list))
    # warn("==========================")
    # warn("{}".format(cls_var_list))

    # with tf.device('/cpu:0'):
    optimizer = tf.train.AdamOptimizer(learning_rate=lr, epsilon = 0.01/batch_size)
    optimize_expr1 = optimizer.minimize(vae_loss, var_list=vae_var_list)

    feat_cls_optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
    optimize_expr2 = feat_cls_optimizer.minimize(cls_loss, var_list=cls_var_list)


    merged = tf.summary.merge_all()
    train = U.function([img1, img2],
                        [losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr1])


    get_reconst_img = U.function([img1, img2], [model_reconst1, model_reconst2, latent_z1_tp, latent_z2_tp])
    get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp])

    cur_dir = get_cur_dir()
    chk_save_dir = os.path.join(cur_dir, chkfile_name)
    log_save_dir = os.path.join(cur_dir, logfile_name)
    validate_img_saver_dir = os.path.join(cur_dir, validatefile_name)
    if dataset == 'chairs' or dataset == 'celeba':
        test_img_saver_dir = os.path.join(cur_dir, "test_images")
        testing_img_dir = os.path.join(cur_dir, "dataset/{}/test_img".format(dataset))

    train_writer = U.summary_writer(dir = log_save_dir)

    U.initialize()

    saver, chk_file_epoch_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir)
    if dataset == 'chairs' or dataset == 'celeba':
        validate_img_saver = Img_Saver(Img_dir = validate_img_saver_dir)
    elif dataset == 'dsprites':
        validate_img_saver = BW_Img_Saver(Img_dir = validate_img_saver_dir) # Black and White, temporary usage
    else:
        warn("Unknown dataset Error")
        # break

    warn("dataset: {}".format(dataset))
    if dataset == 'chairs' or dataset == 'celeba':
        training_images_list = read_dataset(img_dir)
        n_total_train_data = len(training_images_list)
        testing_images_list = read_dataset(testing_img_dir)
        n_total_testing_data = len(testing_images_list)
    elif dataset == 'dsprites':
        cur_dir = osp.join(cur_dir, 'dataset')
        cur_dir = osp.join(cur_dir, 'dsprites')
        img_dir = osp.join(cur_dir, 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz')
        manager = DataManager(img_dir, batch_size)
    else:
        warn("Unknown dataset Error")
        # break

    meta_saved = False

    if mode == 'train':
        for epoch_idx in range(chk_file_epoch_num+1, max_epoch):
            t_epoch_start = time.time()
            num_batch = manager.get_len()

            for batch_idx in range(num_batch):
                if dataset == 'chairs' or dataset == 'celeba':
                    idx = random.sample(range(n_total_train_data), 2*batch_size)
                    batch_files = [training_images_list[i] for i in idx]
                    [images1, images2] = load_image(dir_name = img_dir, img_names = batch_files)
                elif dataset == 'dsprites':
                    [images1, images2] = manager.get_next()
                img1, img2 = images1, images2
                [l1, l2, _, _] = get_reconst_img(img1, img2)

                [loss0, loss1, loss2, loss3, loss4, loss5, latent1, latent2, summary] = train(img1, img2)

                if batch_idx % 50 == 1:
                    header("******* epoch: {}/{} batch: {}/{} *******".format(epoch_idx, max_epoch, batch_idx, num_batch))
                    warn("Total Loss: {}".format(loss0))
                    warn("Siam loss: {}".format(loss1))
                    warn("kl1_loss: {}".format(loss2))
                    warn("kl2_loss: {}".format(loss3))
                    warn("reconst_err1: {}".format(loss4))
                    warn("reconst_err2: {}".format(loss5))

                if batch_idx % check_every_n == 1:
                    if dataset == 'chairs' or dataset == 'celeba':
                        idx = random.sample(range(len(training_images_list)), 2*5)
                        validate_batch_files = [training_images_list[i] for i in idx]
                        [images1, images2] = load_image(dir_name = img_dir, img_names = validate_batch_files)
                    elif dataset == 'dsprites':
                        [images1, images2] = manager.get_next()

                    [reconst1, reconst2, _, _] = get_reconst_img(images1, images2)

                    if dataset == 'chairs':
                        for img_idx in range(len(images1)):
                            sub_dir = "iter_{}_{}".format(epoch_idx, batch_idx)

                            save_img = np.squeeze(images1[img_idx])
                            save_img = Image.fromarray(save_img)
                            img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                            save_img = np.squeeze(reconst1[img_idx])
                            save_img = Image.fromarray(save_img)
                            img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)
                    elif dataset == 'celeba':
                        for img_idx in range(len(images1)):
                            sub_dir = "iter_{}_{}".format(epoch_idx, batch_idx)

                            save_img = np.squeeze(images1[img_idx])
                            save_img = Image.fromarray(save_img, 'RGB')
                            img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                            save_img = np.squeeze(reconst1[img_idx])
                            save_img = Image.fromarray(save_img, 'RGB')
                            img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)
                    elif dataset == 'dsprites':
                        for img_idx in range(len(images1)):
                            sub_dir = "iter_{}_{}".format(epoch_idx, batch_idx)

                            # save_img = images1[img_idx].reshape(64, 64)
                            save_img = np.squeeze(images1[img_idx])
                            save_img = save_img.astype(np.float32)
                            img_file_name = "{}_ori.jpg".format(img_idx)
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                            # save_img = reconst1[img_idx].reshape(64, 64)
                            save_img = np.squeeze(reconst1[img_idx])
                            save_img = save_img.astype(np.float32)
                            img_file_name = "{}_rec.jpg".format(img_idx)
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                if batch_idx % loss_check_n == 1:
                    train_writer.add_summary(summary, batch_idx)

            t_epoch_end = time.time()
            t_epoch_run = t_epoch_end - t_epoch_start
            if dataset == 'dsprites':
                t_check = manager.sample_size / t_epoch_run

                warn("==========================================")
                warn("Run {} th epoch in {} sec: {} images / sec".format(epoch_idx+1, t_epoch_run, t_check))
                warn("==========================================")


            if meta_saved == True:
                saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = epoch_idx, write_meta_graph = False)
            else:
                print "Save  meta graph"
                saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = epoch_idx, write_meta_graph = True)
                meta_saved = True
Esempio n. 12
0
def train_net(model, manager, chkfile_name, logfile_name, validatefile_name, entangled_feat, max_iter = 6000001, check_every_n = 1000, loss_check_n = 10, save_model_freq = 5000, batch_size = 32):
	img1 = U.get_placeholder_cached(name="img1")
	img2 = U.get_placeholder_cached(name="img2")


	# Testing
	# img_test = U.get_placeholder_cached(name="img_test")
	# reconst_tp = U.get_placeholder_cached(name="reconst_tp")


	vae_loss = U.mean(model.vaeloss)

	latent_z1_tp = model.latent_z1
	latent_z2_tp = model.latent_z2

	losses = [U.mean(model.vaeloss),
			U.mean(model.siam_loss),
			U.mean(model.kl_loss1), 
			U.mean(model.kl_loss2), 
			U.mean(model.reconst_error1), 
			U.mean(model.reconst_error2), 
			]

	siam_normal = losses[1]/entangled_feat		
	siam_max = U.mean(model.max_siam_loss)

	tf.summary.scalar('Total Loss', losses[0])
	tf.summary.scalar('Siam Loss', losses[1])
	tf.summary.scalar('kl1_loss', losses[2])
	tf.summary.scalar('kl2_loss', losses[3])
	tf.summary.scalar('reconst_err1', losses[4])
	tf.summary.scalar('reconst_err2', losses[5])
	tf.summary.scalar('Siam Normal', siam_normal)
	tf.summary.scalar('Siam Max', siam_max)

	# decoded_img = [model.reconst1, model.reconst2]


	compute_losses = U.function([img1, img2], vae_loss)
	lr = 0.005
	optimizer=tf.train.AdagradOptimizer(learning_rate=lr)

	all_var_list = model.get_trainable_variables()

	# print all_var_list
	img1_var_list = all_var_list
	#[v for v in all_var_list if v.name.split("/")[1].startswith("proj1") or v.name.split("/")[1].startswith("unproj1")]
	optimize_expr1 = optimizer.minimize(vae_loss, var_list=img1_var_list)
	merged = tf.summary.merge_all()
	train = U.function([img1, img2], 
						[losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr1])
	get_reconst_img = U.function([img1, img2], [model.reconst1_mean, model.reconst2_mean, latent_z1_tp, latent_z2_tp])
	get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp])


	# testing
	# test = U.function([img_test], model.latent_z_test)
	# test_reconst = U.function([reconst_tp], [model.reconst_test])

	cur_dir = get_cur_dir()
	chk_save_dir = os.path.join(cur_dir, chkfile_name)
	log_save_dir = os.path.join(cur_dir, logfile_name)
	validate_img_saver_dir = os.path.join(cur_dir, validatefile_name)
	# test_img_saver_dir = os.path.join(cur_dir, "test_images")
	# testing_img_dir = os.path.join(cur_dir, "dataset/test_img")
	
	train_writer = U.summary_writer(dir = log_save_dir)


	U.initialize()

	saver, chk_file_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir)
	validate_img_saver = BW_Img_Saver(validate_img_saver_dir)

	# testing
	# test_img_saver = Img_Saver(test_img_saver_dir)

	meta_saved = False

	iter_log = []
	loss1_log = []
	loss2_log = []

	loss3_log = []

	training_images_list = manager.imgs
	# read_dataset(img_dir)
	n_total_train_data = len(training_images_list)

	# testing_images_list = read_dataset(testing_img_dir)
	# n_total_testing_data = len(testing_images_list)

	training = True
	testing = False

	if training == True:
		for num_iter in range(chk_file_num+1, max_iter):
			header("******* {}th iter: *******".format(num_iter))

			idx = random.sample(range(n_total_train_data), 2*batch_size)
			batch_files = idx
			# print batch_files
			[images1, images2] = manager.get_images(indices = idx)
			img1, img2 = images1, images2
			[l1, l2, _, _] = get_reconst_img(img1, img2)

			[loss0, loss1, loss2, loss3, loss4, loss5, latent1, latent2, summary] = train(img1, img2)	

			warn("Total Loss: {}".format(loss0))
			warn("Siam loss: {}".format(loss1))
			warn("kl1_loss: {}".format(loss2))
			warn("kl2_loss: {}".format(loss3))
			warn("reconst_err1: {}".format(loss4))
			warn("reconst_err2: {}".format(loss5))

			# warn("num_iter: {} check: {}".format(num_iter, check_every_n))
			# warn("Total Loss: {}".format(loss6))
			if num_iter % check_every_n == 1:
				header("******* {}th iter: *******".format(num_iter))
				idx = random.sample(range(len(training_images_list)), 2*5)
				[images1, images2] = manager.get_images(indices = idx)
				[reconst1, reconst2, _, _] = get_reconst_img(images1, images2)
				# for i in range(len(latent1[0])):
				# 	print "{} th: {:.2f}".format(i, np.mean(np.abs(latent1[:, i] - latent2[:, i])))
				for img_idx in range(len(images1)):
					sub_dir = "iter_{}".format(num_iter)

					save_img = images1[img_idx].reshape(64, 64)
					save_img = save_img.astype(np.float32)
					img_file_name = "{}_ori.jpg".format(img_idx)				
					validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

					save_img = reconst1[img_idx].reshape(64, 64)
					save_img = save_img.astype(np.float32)
					img_file_name = "{}_rec.jpg".format(img_idx)				
					validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

			if num_iter % loss_check_n == 1:
				train_writer.add_summary(summary, num_iter)

			if num_iter > 11 and num_iter % save_model_freq == 1:
				if meta_saved == True:
					saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = num_iter, write_meta_graph = False)
				else:
					print "Save  meta graph"
					saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = num_iter, write_meta_graph = True)
					meta_saved = True
Esempio n. 13
0
    def __init__(self,
                 object_dir='.',
                 queue_size=20,
                 require_shuffle=False,
                 is_testset=True,
                 batch_size=1,
                 use_multi_process_num=0,
                 split_file='',
                 valid_file='',
                 multi_gpu_sum=1):
        assert (use_multi_process_num >= 0)
        self.object_dir = object_dir
        self.is_testset = is_testset
        self.use_multi_process_num = use_multi_process_num if not self.is_testset else 1
        self.require_shuffle = require_shuffle if not self.is_testset else False
        self.batch_size = batch_size if not self.is_testset else 1
        self.split_file = split_file
        self.valid_file = valid_file
        self.multi_gpu_sum = multi_gpu_sum
        self.progress = 0

        # warn("dir: {}".format(self.object_dir))

        if self.split_file != '':
            # use split file
            _tag = []
            self.f_rgb, self.f_lidar, self.f_label, self.f_calib = [], [], [], []
            self.f_voxel = []
            for line in open(self.split_file, 'r').readlines():
                line = line[:-1]  # remove '\n'
                _tag.append(line)
                self.f_rgb.append(
                    os.path.join(self.object_dir, 'image_2', line + '.png'))
                self.f_lidar.append(
                    os.path.join(self.object_dir, 'velodyne', line + '.bin'))
                self.f_label.append(
                    os.path.join(self.object_dir, 'label_2', line + '.txt'))
                self.f_calib.append(
                    os.path.join(self.object_dir, 'calib', line + '.txt'))

            self.f_rgb_valid = []
            self.f_lidar_valid = []
            self.f_label_valid = []
            self.f_calib_valid = []

            for line in open(self.valid_file, 'r').readlines():
                line = line[:-1]  # remove '\n'
                self.f_rgb_valid.append(
                    os.path.join(self.object_dir, 'image_2', line + '.png'))
                self.f_lidar_valid.append(
                    os.path.join(self.object_dir, 'velodyne', line + '.bin'))
                self.f_label_valid.append(
                    os.path.join(self.object_dir, 'label_2', line + '.txt'))
                self.f_calib_valid.append(
                    os.path.join(self.object_dir, 'calib', line + '.txt'))
                self.data_tag_valid = [
                    name.split('/')[-1].split('.')[-2]
                    for name in self.f_label_valid
                ]

        else:
            self.f_rgb = glob.glob(
                os.path.join(self.object_dir, 'image_2', '*.png'))
            self.f_rgb.sort()
            self.f_lidar = glob.glob(
                os.path.join(self.object_dir, 'velodyne', '*.bin'))
            self.f_lidar.sort()
            self.f_label = glob.glob(
                os.path.join(self.object_dir, 'label_2', '*.txt'))
            self.f_label.sort()
            self.f_calib = glob.glob(
                os.path.join(self.object_dir, 'calib', '*.txt'))
            self.f_calib.sort()

        self.data_tag = [
            name.split('/')[-1].split('.')[-2] for name in self.f_label
        ]
        # assert(len(self.f_rgb) == len(self.f_lidar) == len(self.f_label) == len(self.data_tag))
        warn("{} {} {} {}".format(len(self.f_label), len(self.data_tag),
                                  len(self.f_lidar), len(self.f_calib)))
        assert (len(self.f_label) == len(self.data_tag) == len(self.f_rgb) ==
                len(self.f_lidar))
        self.dataset_size = len(self.f_label)
        self.validset_size = len(self.f_label_valid)
        self.already_extract_data = 0
        self.cur_frame_info = ''

        # warn("Dataset total length: {}".format(len(self.f_label)))
        if self.require_shuffle:
            self.shuffle_dataset()

        self.queue_size = queue_size
        self.require_shuffle = require_shuffle
        self.dataset_queue = Queue(
        )  # must use the queue provided by multiprocessing module(only this can be shared)

        self.load_index = 0
        if self.use_multi_process_num == 0:
            self.loader_worker = [
                threading.Thread(target=self.loader_worker_main,
                                 args=(self.batch_size, ))
            ]
        else:
            self.loader_worker = [
                Process(target=self.loader_worker_main,
                        args=(self.batch_size, ))
                for i in range(self.use_multi_process_num)
            ]
        self.work_exit = Value('i', 0)
        [i.start() for i in self.loader_worker]

        # This operation is not thread-safe
        self.rgb_shape = (cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH, 3)
Esempio n. 14
0
    def load_specified_train(self, load_indices=None):
        # Load without data augmentation
        labels, tag, voxel, doubled_voxel, rgb, raw_lidar, calib = [], [], [], [], [], [], []
        voxel_size = np.array(
            [cfg.VOXEL_Z_SIZE, cfg.VOXEL_Y_SIZE, cfg.VOXEL_X_SIZE],
            dtype=np.float32)
        double_voxel_size = 2 * voxel_size

        if load_indices is None:
            load_indices = np.random.randint(len(self.f_rgb),
                                             size=self.batch_size)

        for load_index in load_indices:
            try:
                t0 = time.time()
                rgb.append(
                    cv2.resize(cv2.imread(self.f_rgb[load_index]),
                               (cfg.IMAGE_WIDTH, cfg.IMAGE_HEIGHT)))
                lidar = np.fromfile(self.f_lidar[load_index],
                                    dtype=np.float32).reshape((-1, 4))

                calib_file = self.f_lidar[load_index].replace(
                    'velodyne', 'calib').replace('bin', 'txt')
                lidar = clip_by_projection(lidar, calib_file, cfg.IMAGE_HEIGHT,
                                           cfg.IMAGE_WIDTH)

                raw_lidar.append(lidar)
                calib.append(read_calib_mat(self.f_calib[load_index]))

                labels.append([
                    line for line in open(self.f_label[load_index],
                                          'r').readlines()
                ])
                tag.append(self.data_tag[load_index])
                voxel.append(
                    voxelize(file=self.f_lidar[load_index],
                             lidar=lidar,
                             voxel_size=voxel_size,
                             T=cfg.VOXEL_POINT_COUNT))
                doubled_voxel.append(
                    voxelize(file=self.f_lidar[load_index],
                             lidar=lidar,
                             voxel_size=double_voxel_size,
                             T=cfg.VOXEL_POINT_COUNT))
                t1 = time.time()
                # warn("load success")

            except:
                warn("Load Specified: Loading Error!! {}".format(tag))

        # only for voxel -> [gpu, k_single_batch, ...]
        vox_feature, vox_number, vox_coordinate = [], [], []

        single_batch_size = int(self.batch_size / self.multi_gpu_sum)
        for idx in range(self.multi_gpu_sum):
            # warn("single")
            _, per_vox_feature, per_vox_number, per_vox_coordinate = build_input(
                voxel[idx * single_batch_size:(idx + 1) * single_batch_size])
            vox_feature.append(per_vox_feature)
            vox_number.append(per_vox_number)
            vox_coordinate.append(per_vox_coordinate)

        doubled_vox_feature, doubled_vox_number, doubled_vox_coordinate = [], [], []
        for idx in range(self.multi_gpu_sum):
            # warn("doubled")
            _, per_vox_feature, per_vox_number, per_vox_coordinate = build_input(
                doubled_voxel[idx * single_batch_size:(idx + 1) *
                              single_batch_size])
            doubled_vox_feature.append(per_vox_feature)
            doubled_vox_number.append(per_vox_number)
            doubled_vox_coordinate.append(per_vox_coordinate)

        ret = (np.array(tag), np.array(labels), np.array(vox_feature),
               np.array(vox_number), np.array(vox_coordinate),
               np.array(doubled_vox_feature), np.array(doubled_vox_number),
               np.array(doubled_vox_coordinate), np.array(rgb),
               np.array(raw_lidar), np.array(calib))

        return ret
Esempio n. 15
0
    def __init__(
            self,
            cls='Car',
            single_batch_size=2,  # batch_size_per_gpu
            learning_rate=0.001,
            max_gradient_norm=5.0,
            alpha=1.5,
            beta=1,
            is_train=True,
            avail_gpus=['0']):
        # hyper parameters and status
        self.cls = cls
        self.single_batch_size = single_batch_size
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.global_step = tf.Variable(1, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)
        self.alpha = alpha
        self.beta = beta
        self.avail_gpus = avail_gpus

        lr = tf.train.exponential_decay(self.learning_rate, self.global_step,
                                        10000, 0.96)

        # build graph
        # input placeholders

        self.imgs = []
        self.confs = []
        # => wip: add confidence(iou) here for yolo style
        # => pos_equal_one is actually conf_mask in yolo code
        # self.conf_target = []

        self.prob_output = []
        self.opt = tf.train.AdamOptimizer(lr)
        self.gradient_norm = []
        self.tower_grads = []
        self.batch_loss = []
        with tf.variable_scope(tf.get_variable_scope()):
            for idx, dev in enumerate(self.avail_gpus):
                with tf.device('/gpu:{}'.format(dev)), tf.name_scope(
                        'gpu_{}'.format(dev)):
                    # must use name scope here since we do not want to create new variables
                    # graph
                    vggnet = vgg(training=is_train,
                                 batch_size=self.single_batch_size,
                                 name='vgg')

                    tf.get_variable_scope().reuse_variables()

                    # input
                    self.imgs.append(vggnet.imgs)
                    self.confs.append(vggnet.conf)

                    # output
                    prob_output = vggnet.prob

                    # loss and grad
                    self.loss = vggnet.loss
                    self.params = tf.trainable_variables()
                    gradients = tf.gradients(self.loss, self.params)
                    clipped_gradients, gradient_norm = tf.clip_by_global_norm(
                        gradients, max_gradient_norm)

                    self.prob_output.append(prob_output)
                    self.tower_grads.append(clipped_gradients)
                    self.gradient_norm.append(gradient_norm)
                    self.batch_loss.append(self.loss)

        # loss and optimizer
        # self.xxxloss is only the loss for the lowest tower
        with tf.device('/gpu:{}'.format(self.avail_gpus[0])):
            self.grads = average_gradients(self.tower_grads)
            self.update = self.opt.apply_gradients(
                zip(self.grads, self.params), global_step=self.global_step)
            self.gradient_norm = tf.group(*self.gradient_norm)

        self.prob_output = tf.concat(self.prob_output, axis=0)

        warn("batch loss1: {}".format(np.shape(self.batch_loss)))
        self.batch_loss = tf.reduce_sum(self.batch_loss)
        warn("batch loss2: {}".format(np.shape(self.batch_loss)))

        # # for predict and image summary
        # self.rgb = tf.placeholder(tf.uint8, [None, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH, 3])

        # self.bv = tf.placeholder(tf.uint8, [
        #                          None, cfg.BV_LOG_FACTOR * cfg.INPUT_HEIGHT, cfg.BV_LOG_FACTOR * cfg.INPUT_WIDTH, 3])
        # self.bv_heatmap = tf.placeholder(tf.uint8, [
        #     None, cfg.BV_LOG_FACTOR * cfg.FEATURE_HEIGHT, cfg.BV_LOG_FACTOR * cfg.FEATURE_WIDTH, 3])
        # self.boxes2d = tf.placeholder(tf.float32, [None, 4])
        # self.boxes2d_scores = tf.placeholder(tf.float32, [None])

        # # NMS(2D)
        # with tf.device('/gpu:{}'.format(self.avail_gpus[0])):
        #     self.box2d_ind_after_nms = tf.image.non_max_suppression(self.boxes2d, self.boxes2d_scores, max_output_size=cfg.RPN_NMS_POST_TOPK, iou_threshold=cfg.RPN_NMS_THRESH)

        # summary and saver
        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=10,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)

        self.train_summary = tf.summary.merge([
            tf.summary.scalar('train/loss', self.loss),
            # tf.summary.scalar('train/reg_loss', self.reg_loss),
            # tf.summary.scalar('train/cls_loss', self.cls_loss),
            [tf.summary.histogram(each.name, each) for each in self.params]
        ])

        self.validate_summary = tf.summary.merge(
            [tf.summary.scalar('validate/loss', self.loss)])
Esempio n. 16
0
    def __init__(self, training, batch_size, name=''):
        super(vgg, self).__init__()
        self.training = training

        # scalar
        self.batch_size = batch_size
        # [ΣK, 64, 64, 3]
        self.imgs = tf.placeholder(tf.float32, [None, 64, 64, 3], name='img')
        self.conf = tf.placeholder(tf.float32, [None], name='conf')
        start_time = time.time()
        print("build model started")
        # rgb_scaled = rgb * 255.0

        # Convert RGB to BGR
        red, green, blue = tf.split(axis=3,
                                    num_or_size_splits=3,
                                    value=self.imgs)
        assert red.get_shape().as_list()[1:] == [64, 64, 1]
        assert green.get_shape().as_list()[1:] == [64, 64, 1]
        assert blue.get_shape().as_list()[1:] == [64, 64, 1]

        x = tf.concat(axis=3,
                      values=[
                          blue - VGG_MEAN[0],
                          green - VGG_MEAN[1],
                          red - VGG_MEAN[2],
                      ])

        with tf.variable_scope(name, reuse=tf.AUTO_REUSE) as scope:
            temp_conv = ConvMD(2,
                               3,
                               16,
                               3, (1, 1), (1, 1),
                               x,
                               training=self.training,
                               name='conv1_1')
            # => [None, 64, 64, 16]
            temp_conv = ConvMD(2,
                               16,
                               16,
                               3, (1, 1), (1, 1),
                               temp_conv,
                               training=self.training,
                               name='conv1_2')
            # => [None, 64, 64, 16]
            temp_conv = tf.layers.max_pooling2d(temp_conv,
                                                pool_size=2,
                                                strides=2,
                                                name='maxpool1')
            # => [None, 32, 32, 16]
            temp_conv = ConvMD(2,
                               16,
                               16,
                               3, (1, 1), (1, 1),
                               temp_conv,
                               training=self.training,
                               name='conv2_1')
            # => [None, 32, 32, 16]
            temp_conv = ConvMD(2,
                               16,
                               16,
                               3, (1, 1), (1, 1),
                               temp_conv,
                               training=self.training,
                               name='conv2_2')
            # => [None, 32, 32, 16]
            temp_conv = tf.layers.max_pooling2d(temp_conv,
                                                pool_size=2,
                                                strides=2,
                                                name='maxpool2')
            # => [None, 16, 16, 16]
            temp_conv = ConvMD(2,
                               16,
                               16,
                               3, (1, 1), (1, 1),
                               temp_conv,
                               training=self.training,
                               name='conv3_1')
            # => [None, 16, 16, 16]
            temp_conv = ConvMD(2,
                               16,
                               16,
                               3, (1, 1), (1, 1),
                               temp_conv,
                               training=self.training,
                               name='conv3_2')
            # => [None, 16, 16, 16]
            temp_conv = tf.layers.max_pooling2d(temp_conv,
                                                pool_size=2,
                                                strides=2,
                                                name='maxpool3')
            # => [None, 8, 8, 16]
            warn("shape: {}".format(np.shape(temp_conv)))
            temp = tf.layers.flatten(temp_conv, name='flatten')
            # => [None, 64 * 16]
            warn("shape: {}".format(np.shape(temp)))
            temp = tf.nn.relu(self.dense(temp, 32, 'dense1'))
            warn("shape: {}".format(np.shape(temp)))
            self.prob = tf.nn.sigmoid(self.dense(temp, 1, 'prob'))
            warn("shape: {}".format(np.shape(self.prob)))
            self.loss = tf.reduce_mean(
                -self.conf * tf.log(self.prob + small_addon_for_BCE) -
                (1 - self.conf) * tf.log(1 - self.prob + small_addon_for_BCE))
Esempio n. 17
0
def mgpu_classifier_train_net(models, num_gpus, cls_batch_per_gpu, cls_L, mode, img_dir, dataset, chkfile_name, logfile_name, validatefile_name, entangled_feat, max_epoch = 300, check_every_n = 500, loss_check_n = 10, save_model_freq = 5, batch_size = 512, lr = 0.001):
    img1 = U.get_placeholder_cached(name="img1")
    img2 = U.get_placeholder_cached(name="img2")

    feat_cls = U.get_placeholder_cached(name="feat_cls")

    # batch size must be multiples of ntowers (# of GPUs)
    ntowers = len(models)
    tf.assert_equal(tf.shape(img1)[0], tf.shape(img2)[0])
    tf.assert_equal(tf.floormod(tf.shape(img1)[0], ntowers), 0)

    img1splits = tf.split(img1, ntowers, 0)
    img2splits = tf.split(img2, ntowers, 0)

    tower_vae_loss = []
    tower_latent_z1_tp = []
    tower_latent_z2_tp = []
    tower_losses = []
    tower_siam_max = []
    tower_reconst1 = []
    tower_reconst2 = []
    tower_cls_loss = []
    for gid, model in enumerate(models):
        with tf.name_scope('gpu%d' % gid) as scope:
            with tf.device('/gpu:%d' % gid):

                vae_loss = U.mean(model.vaeloss)
                latent_z1_tp = model.latent_z1
                latent_z2_tp = model.latent_z2
                losses = [U.mean(model.vaeloss),
                          U.mean(model.siam_loss),
                          U.mean(model.kl_loss1),
                          U.mean(model.kl_loss2),
                          U.mean(model.reconst_error1),
                          U.mean(model.reconst_error2),
                          ]
                siam_max = U.mean(model.max_siam_loss)
                cls_loss = U.mean(model.cls_loss)

                tower_vae_loss.append(vae_loss)
                tower_latent_z1_tp.append(latent_z1_tp)
                tower_latent_z2_tp.append(latent_z2_tp)
                tower_losses.append(losses)
                tower_siam_max.append(siam_max)
                tower_reconst1.append(model.reconst1)
                tower_reconst2.append(model.reconst2)
                tower_cls_loss.append(cls_loss)

                tf.summary.scalar('Cls Loss', cls_loss)

    vae_loss = U.mean(tower_vae_loss)
    siam_max = U.mean(tower_siam_max)
    latent_z1_tp = tf.concat(tower_latent_z1_tp, 0)
    latent_z2_tp = tf.concat(tower_latent_z2_tp, 0)
    model_reconst1 = tf.concat(tower_reconst1, 0)
    model_reconst2 = tf.concat(tower_reconst2, 0)
    cls_loss = U.mean(tower_cls_loss)

    losses = [[] for _ in range(len(losses))]
    for tl in tower_losses:
        for i, l in enumerate(tl):
            losses[i].append(l)

    losses = [U.mean(l) for l in losses]
    siam_normal = losses[1] / entangled_feat

    tf.summary.scalar('total/cls_loss', cls_loss)

    compute_losses = U.function([img1, img2], vae_loss)

    all_var_list = model.get_trainable_variables()
    vae_var_list = [v for v in all_var_list if v.name.split("/")[2].startswith("vae")]
    cls_var_list = [v for v in all_var_list if v.name.split("/")[2].startswith("cls")]
    warn("{}".format(all_var_list))
    warn("=======================")
    warn("{}".format(vae_var_list))
    warn("=======================")
    warn("{}".format(cls_var_list))

    # with tf.device('/cpu:0'):
    # optimizer = tf.train.AdamOptimizer(learning_rate=lr, epsilon = 0.01/batch_size)
    # optimize_expr1 = optimizer.minimize(vae_loss, var_list=vae_var_list)

    feat_cls_optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
    optimize_expr2 = feat_cls_optimizer.minimize(cls_loss, var_list=cls_var_list)

    merged = tf.summary.merge_all()
    # train = U.function([img1, img2],
    #                     [losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr1])

    classifier_train = U.function([img1, img2, feat_cls],
                        [cls_loss, latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr2])

    get_reconst_img = U.function([img1, img2], [model_reconst1, model_reconst2, latent_z1_tp, latent_z2_tp])
    get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp])

    cur_dir = get_cur_dir()
    chk_save_dir = os.path.join(cur_dir, chkfile_name)
    log_save_dir = os.path.join(cur_dir, logfile_name)
    cls_logfile_name = 'cls_{}'.format(logfile_name)
    cls_log_save_dir = os.path.join(cur_dir, cls_logfile_name)
    validate_img_saver_dir = os.path.join(cur_dir, validatefile_name)
    if dataset == 'chairs' or dataset == 'celeba':
        test_img_saver_dir = os.path.join(cur_dir, "test_images")
        testing_img_dir = os.path.join(cur_dir, "dataset/{}/test_img".format(dataset))

    cls_train_writer = U.summary_writer(dir = cls_log_save_dir)

    U.initialize()

    saver, chk_file_epoch_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir)
    if dataset == 'chairs' or dataset == 'celeba':
        validate_img_saver = Img_Saver(Img_dir = validate_img_saver_dir)
    elif dataset == 'dsprites':
        validate_img_saver = BW_Img_Saver(Img_dir = validate_img_saver_dir) # Black and White, temporary usage
    else:
        warn("Unknown dataset Error")
        # break

    warn("dataset: {}".format(dataset))
    if dataset == 'chairs' or dataset == 'celeba':
        training_images_list = read_dataset(img_dir)
        n_total_train_data = len(training_images_list)
        testing_images_list = read_dataset(testing_img_dir)
        n_total_testing_data = len(testing_images_list)
    elif dataset == 'dsprites':
        cur_dir = osp.join(cur_dir, 'dataset')
        cur_dir = osp.join(cur_dir, 'dsprites')
        img_dir = osp.join(cur_dir, 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz')
        manager = DataManager(img_dir, batch_size)
    else:
        warn("Unknown dataset Error")
        # break

    meta_saved = False

    cls_train_iter = 10000
    for cls_train_i in range(cls_train_iter):
        # warn("Train:{}".format(cls_train_i))
        if dataset == 'dsprites':
            # At every epoch, train classifier and check result
            # (1) Load images
            num_img_pair = cls_L * num_gpus * cls_batch_per_gpu
            # warn("{} {} {}".format(len(manager.latents_sizes)-1, num_gpus, cls_batch_per_gpu))
            feat = np.random.randint(len(manager.latents_sizes)-1, size = num_gpus * cls_batch_per_gpu)
            [images1, images2] = manager.get_image_fixed_feat_batch(feat, num_img_pair)

            # warn("images shape:{}".format(np.shape(images1)))

            # (2) Input PH images
            [classification_loss, _, _, summary] = classifier_train(images1, images2, feat)
            if cls_train_i % 100 == 0:
                warn("cls loss {}: {}".format(cls_train_i, classification_loss))

            cls_train_writer.add_summary(summary, cls_train_i)
Esempio n. 18
0
def tf_corner_to_standup(boxes_corner_a, boxes_corner_b):
    # boxes_corner_a: (N,8,3)

    boxes_a_x = boxes_corner_a[:,0:4,0] # N,4
    boxes_a_y = boxes_corner_a[:,0:4,1]
    boxes_b_x = boxes_corner_b[:,0:4,0]
    boxes_b_y = boxes_corner_b[:,0:4,1]

    warn("boxes_a_x: {}".format(np.shape(boxes_a_x)))

    x_axis = tf.concat([boxes_a_x, boxes_b_x], axis=1) # N,8
    y_axis = tf.concat([boxes_a_y, boxes_b_y], axis=1) # N,8

    warn("x_axis: {}".format(np.shape(x_axis)))


    min_x = tf.reduce_min(x_axis, axis=1)    # N
    min_y = tf.reduce_min(y_axis, axis=1)
    max_x = tf.reduce_max(x_axis, axis=1)
    max_y = tf.reduce_max(y_axis, axis=1)

    warn("min_x: {}".format(np.shape(min_x)))

    translation_x = tf.tile(tf.expand_dims(min_x, -1), [1,4])
    translation_y = tf.tile(tf.expand_dims(min_y, -1), [1,4])
    warn("translation_x: {}".format(np.shape(translation_x)))

    boxes_a_x = boxes_a_x - translation_x
    boxes_b_x = boxes_b_x - translation_x
    boxes_a_y = boxes_a_y - translation_y
    boxes_b_y = boxes_b_y - translation_y

    len_x = max_x - min_x
    len_y = max_y - min_y # N,1

    warn("len_x: {}".format(np.shape(len_x)))

    len_square = tf.maximum(len_x, len_y)    # N,1
    warn("len_square 1: {}".format(np.shape(len_square)))
    len_square = tf.tile(tf.expand_dims(len_square, -1), [1,4]) # N,4
    warn("len_square 2: {}".format(np.shape(len_square)))
   
    boxes_a_x = tf.divide(boxes_a_x, len_square) # N,4
    boxes_b_x = tf.divide(boxes_b_x, len_square)
    boxes_a_y = tf.divide(boxes_a_y, len_square)
    boxes_b_y = tf.divide(boxes_b_y, len_square)

    boxes_a = tf.stack([boxes_a_x, boxes_a_y], axis=-1) # N,4,2
    boxes_b = tf.stack([boxes_b_x, boxes_b_y], axis=-1) # N,4,2
    warn("boxes_a_x: {}".format(np.shape(boxes_a)))

    # standup_boxes = tf.stack([min_x, min_y, max_x, max_y], axis=1) # N,4

    return boxes_a, boxes_b
Esempio n. 19
0
def train_net(model, mode, img_dir, dataset, chkfile_name, logfile_name, validatefile_name, entangled_feat, max_epoch = 300, check_every_n = 500, loss_check_n = 10, save_model_freq = 5, batch_size = 512, lr = 0.001):
    img1 = U.get_placeholder_cached(name="img1")
    img2 = U.get_placeholder_cached(name="img2")

    vae_loss = U.mean(model.vaeloss)

    latent_z1_tp = model.latent_z1
    latent_z2_tp = model.latent_z2

    losses = [U.mean(model.vaeloss),
            U.mean(model.siam_loss),
            U.mean(model.kl_loss1),
            U.mean(model.kl_loss2),
            U.mean(model.reconst_error1),
            U.mean(model.reconst_error2),
            ]

    siam_normal = losses[1]/entangled_feat
    siam_max = U.mean(model.max_siam_loss)

    tf.summary.scalar('Total Loss', losses[0])
    tf.summary.scalar('Siam Loss', losses[1])
    tf.summary.scalar('kl1_loss', losses[2])
    tf.summary.scalar('kl2_loss', losses[3])
    tf.summary.scalar('reconst_err1', losses[4])
    tf.summary.scalar('reconst_err2', losses[5])
    tf.summary.scalar('Siam Normal', siam_normal)
    tf.summary.scalar('Siam Max', siam_max)



    compute_losses = U.function([img1, img2], vae_loss)
    optimizer=tf.train.AdamOptimizer(learning_rate=lr, epsilon = 0.01/batch_size)

    all_var_list = model.get_trainable_variables()


    img1_var_list = all_var_list
    optimize_expr1 = optimizer.minimize(vae_loss, var_list=img1_var_list)
    merged = tf.summary.merge_all()
    train = U.function([img1, img2],
                        [losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr1])
    get_reconst_img = U.function([img1, img2], [model.reconst1, model.reconst2, latent_z1_tp, latent_z2_tp])
    get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp])

    cur_dir = get_cur_dir()
    chk_save_dir = os.path.join(cur_dir, chkfile_name)
    log_save_dir = os.path.join(cur_dir, logfile_name)
    validate_img_saver_dir = os.path.join(cur_dir, validatefile_name)
    if dataset == 'chairs' or dataset == 'celeba':
        test_img_saver_dir = os.path.join(cur_dir, "test_images")
        testing_img_dir = os.path.join(cur_dir, "dataset/{}/test_img".format(dataset))

    train_writer = U.summary_writer(dir = log_save_dir)

    U.initialize()

    saver, chk_file_epoch_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir)
    if dataset == 'chairs' or dataset == 'celeba':
        validate_img_saver = Img_Saver(Img_dir = validate_img_saver_dir)
    elif dataset == 'dsprites':
        validate_img_saver = BW_Img_Saver(Img_dir = validate_img_saver_dir) # Black and White, temporary usage
    else:
        warn("Unknown dataset Error")
        # break

    warn(img_dir)
    if dataset == 'chairs' or dataset == 'celeba':
        training_images_list = read_dataset(img_dir)
        n_total_train_data = len(training_images_list)
        testing_images_list = read_dataset(testing_img_dir)
        n_total_testing_data = len(testing_images_list)
    elif dataset == 'dsprites':
        cur_dir = osp.join(cur_dir, 'dataset')
        cur_dir = osp.join(cur_dir, 'dsprites')
        img_dir = osp.join(cur_dir, 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz')
        manager = DataManager(img_dir, batch_size)
    else:
        warn("Unknown dataset Error")
        # break

    meta_saved = False

    if mode == 'train':
        for epoch_idx in range(chk_file_epoch_num+1, max_epoch):
            t_epoch_start = time.time()
            num_batch = manager.get_len()

            for batch_idx in range(num_batch):
                if dataset == 'chairs' or dataset == 'celeba':
                    idx = random.sample(range(n_total_train_data), 2*batch_size)
                    batch_files = [training_images_list[i] for i in idx]
                    [images1, images2] = load_image(dir_name = img_dir, img_names = batch_files)
                elif dataset == 'dsprites':
                    [images1, images2] = manager.get_next()
                img1, img2 = images1, images2
                [l1, l2, _, _] = get_reconst_img(img1, img2)

                [loss0, loss1, loss2, loss3, loss4, loss5, latent1, latent2, summary] = train(img1, img2)

                if batch_idx % 50 == 1:
                    header("******* epoch: {}/{} batch: {}/{} *******".format(epoch_idx, max_epoch, batch_idx, num_batch))
                    warn("Total Loss: {}".format(loss0))
                    warn("Siam loss: {}".format(loss1))
                    warn("kl1_loss: {}".format(loss2))
                    warn("kl2_loss: {}".format(loss3))
                    warn("reconst_err1: {}".format(loss4))
                    warn("reconst_err2: {}".format(loss5))

                if batch_idx % check_every_n == 1:
                    if dataset == 'chairs' or dataset == 'celeba':
                        idx = random.sample(range(len(training_images_list)), 2*5)
                        validate_batch_files = [training_images_list[i] for i in idx]
                        [images1, images2] = load_image(dir_name = img_dir, img_names = validate_batch_files)
                    elif dataset == 'dsprites':
                        [images1, images2] = manager.get_next()

                    [reconst1, reconst2, _, _] = get_reconst_img(images1, images2)

                    if dataset == 'chairs':
                        for img_idx in range(len(images1)):
                            sub_dir = "iter_{}".format(batch_idx)

                            save_img = np.squeeze(images1[img_idx])
                            save_img = Image.fromarray(save_img)
                            img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                            save_img = np.squeeze(reconst1[img_idx])
                            save_img = Image.fromarray(save_img)
                            img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)
                    elif dataset == 'celeba':
                        for img_idx in range(len(images1)):
                            sub_dir = "iter_{}".format(batch_idx)

                            save_img = np.squeeze(images1[img_idx])
                            save_img = Image.fromarray(save_img, 'RGB')
                            img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                            save_img = np.squeeze(reconst1[img_idx])
                            save_img = Image.fromarray(save_img, 'RGB')
                            img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)
                    elif dataset == 'dsprites':
                        for img_idx in range(len(images1)):
                            sub_dir = "iter_{}".format(batch_idx)

                            # save_img = images1[img_idx].reshape(64, 64)
                            save_img = np.squeeze(images1[img_idx])
                            save_img = save_img.astype(np.float32)
                            img_file_name = "{}_ori.jpg".format(img_idx)
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                            # save_img = reconst1[img_idx].reshape(64, 64)
                            save_img = np.squeeze(reconst1[img_idx])
                            save_img = save_img.astype(np.float32)
                            img_file_name = "{}_rec.jpg".format(img_idx)
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                if batch_idx % loss_check_n == 1:
                    train_writer.add_summary(summary, batch_idx)

            t_epoch_end = time.time()
            t_epoch_run = t_epoch_end - t_epoch_start
            if dataset == 'dsprites':
                t_check = manager.sample_size / t_epoch_run

                warn("==========================================")
                warn("Run {} th epoch in {} sec: {} images / sec".format(epoch_idx+1, t_epoch_run, t_check))
                warn("==========================================")

            # if epoch_idx % save_model_freq == 0:
            if meta_saved == True:
                saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = epoch_idx, write_meta_graph = False)
            else:
                print "Save  meta graph"
                saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = epoch_idx, write_meta_graph = True)
                meta_saved = True

    # Testing
    elif mode == 'test':
        test_file_name = testing_images_list[0]
        test_img = load_single_img(dir_name = testing_img_dir, img_name = test_file_name)
        test_feature = 31
        test_variation = np.arange(-5, 5, 0.1)

        z = test(test_img)
        for idx in range(len(test_variation)):
            z_test = np.copy(z)
            z_test[0, test_feature] = z_test[0, test_feature] + test_variation[idx]
            reconst_test = test_reconst(z_test)
            test_save_img = np.squeeze(reconst_test[0])
            test_save_img = Image.fromarray(test_save_img)
            img_file_name = "test_feat_{}_var_({}).png".format(test_feature, test_variation[idx])
            test_img_saver.save(test_save_img, img_file_name, sub_dir = None)
        reconst_test = test_reconst(z)
        test_save_img = np.squeeze(reconst_test[0])
        test_save_img = Image.fromarray(test_save_img)
        img_file_name = "test_feat_{}_var_original.png".format(test_feature)
        test_img_saver.save(test_save_img, img_file_name, sub_dir = None)
Esempio n. 20
0
def main():

    # Base: https://openreview.net/pdf?id=Sy2fzU9gl

    # (1) parse arguments

    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset')  # chairs, celeba, dsprites
    parser.add_argument('--mode')  # train, test
    parser.add_argument('--disentangled_feat', type=int)
    parser.add_argument('--num_gpus', type=int, default=1)
    args = parser.parse_args()

    dataset = args.dataset
    mode = args.mode
    disentangled_feat = args.disentangled_feat
    chkfile_name = "chk_{}_{}".format(dataset, disentangled_feat)
    logfile_name = "log_{}_{}".format(dataset, disentangled_feat)
    validatefile_name = "val_{}_{}".format(dataset, disentangled_feat)

    # (2) Dataset

    if dataset == 'chairs':
        dir_name = "/dataset/chairs/training_img"
    elif dataset == 'celeba':
        dir_name = 'temporarily not available'
    elif dataset == 'dsprites':
        dir_name = '/dataset/dsprites'  # This is dummy, for dsprites dataset, we are using data_manager
    else:
        header("Unknown dataset name")

    cur_dir = get_cur_dir()
    cur_dir = osp.join(cur_dir, 'dataset')
    cur_dir = osp.join(cur_dir, 'chairs')
    img_dir = osp.join(cur_dir, 'training_img')  # This is for chairs

    # (3) Set experiment configuration, and disentangled_feat, according to beta-VAE( https://openreview.net/pdf?id=Sy2fzU9gl )

    if dataset == 'chairs':
        latent_dim = 32
        loss_weight = {'siam': 50000.0, 'kl': 30000.0}
        batch_size = 32
        max_epoch = 300
        lr = 0.0001
    elif dataset == 'celeba':
        latent_dim = 32
        loss_weight = {'siam': 1000.0, 'kl': 30000.0}
        batch_size = 512
        max_epoch = 300
        lr = 0.0001
    elif dataset == 'dsprites':
        latent_dim = 10
        loss_weight = {'siam': 1.0, 'kl': 1.0}
        batch_size = 1024
        max_epoch = 300
        lr = 0.001
        feat_size = 5  # shape, rotation, size, x, y => Don't know why there are only 4 features in paper p6. Need to check more about it.
        cls_batch_per_gpu = 15
        cls_L = 10

    entangled_feat = latent_dim - disentangled_feat

    # (4) Open Tensorflow session, Need to find optimal configuration because we don't need to use single thread session
    # Important!!! : If we don't use single threaded session, then we need to change this!!!

    # sess = U.single_threaded_session()
    sess = U.mgpu_session()
    sess.__enter__()
    set_global_seeds(0)

    num_gpus = args.num_gpus

    # Model Setting

    # (5) Import model, merged into models.py
    # only celeba has RGB channel, other has black and white.

    if dataset == 'chairs':
        import models
        mynet = models.mymodel(name="mynet",
                               img_shape=[64, 64, 1],
                               latent_dim=latent_dim,
                               disentangled_feat=disentangled_feat,
                               mode=mode,
                               loss_weight=loss_weight)
    elif dataset == 'celeba':
        import models
        mynet = models.mymodel(name="mynet",
                               img_shape=[64, 64, 3],
                               latent_dim=latent_dim,
                               disentangled_feat=disentangled_feat,
                               mode=mode,
                               loss_weight=loss_weight)
    elif dataset == 'dsprites':
        import models

        img_shape = [None, 64, 64, 1]
        img1 = U.get_placeholder(name="img1",
                                 dtype=tf.float32,
                                 shape=img_shape)
        img2 = U.get_placeholder(name="img2",
                                 dtype=tf.float32,
                                 shape=img_shape)

        feat_cls = U.get_placeholder(name="feat_cls",
                                     dtype=tf.int32,
                                     shape=None)

        tf.assert_equal(tf.shape(img1)[0], tf.shape(img2)[0])
        tf.assert_equal(tf.floormod(tf.shape(img1)[0], num_gpus), 0)

        tf.assert_equal(tf.floormod(tf.shape(feat_cls)[0], num_gpus), 0)

        img1splits = tf.split(img1, num_gpus, 0)
        img2splits = tf.split(img2, num_gpus, 0)

        feat_cls_splits = tf.split(feat_cls, num_gpus, 0)

        mynets = []
        with tf.variable_scope(tf.get_variable_scope()):
            for gid in range(num_gpus):
                with tf.name_scope('gpu%d' % gid) as scope:
                    with tf.device('/gpu:%d' % gid):
                        mynet = models.mymodel(
                            name="mynet",
                            img1=img1splits[gid],
                            img2=img2splits[gid],
                            img_shape=img_shape[1:],
                            latent_dim=latent_dim,
                            disentangled_feat=disentangled_feat,
                            mode=mode,
                            loss_weight=loss_weight,
                            feat_cls=feat_cls_splits[gid],
                            feat_size=feat_size,
                            cls_L=cls_L,
                            cls_batch_per_gpu=cls_batch_per_gpu)
                        mynets.append(mynet)
                # Reuse variables for the next tower.
                tf.get_variable_scope().reuse_variables()

    else:
        header("Unknown model name")

    # (6) Train or test the model
    # Testing by adding noise on latent feature is not merged yet. Will be finished soon.

    if mode == 'train':
        mgpu_train_net(models=mynets,
                       num_gpus=num_gpus,
                       mode=mode,
                       img_dir=img_dir,
                       dataset=dataset,
                       chkfile_name=chkfile_name,
                       logfile_name=logfile_name,
                       validatefile_name=validatefile_name,
                       entangled_feat=entangled_feat,
                       max_epoch=max_epoch,
                       batch_size=batch_size,
                       lr=lr)
        # train_net(model=mynets[0], mode = mode, img_dir = img_dir, dataset = dataset, chkfile_name = chkfile_name, logfile_name = logfile_name, validatefile_name = validatefile_name, entangled_feat = entangled_feat, max_epoch = max_epoch, batch_size = batch_size, lr = lr)
    elif mode == 'classifier_train':
        warn("Classifier Train")
        mgpu_classifier_train_net(models=mynets,
                                  num_gpus=num_gpus,
                                  cls_batch_per_gpu=cls_batch_per_gpu,
                                  cls_L=cls_L,
                                  mode=mode,
                                  img_dir=img_dir,
                                  dataset=dataset,
                                  chkfile_name=chkfile_name,
                                  logfile_name=logfile_name,
                                  validatefile_name=validatefile_name,
                                  entangled_feat=entangled_feat,
                                  max_epoch=max_epoch,
                                  batch_size=batch_size,
                                  lr=lr)

    elif mode == 'test':
        header("Need to be merged")
    else:
        header("Unknown mode name")
Esempio n. 21
0
import os
import time
import sys
import tensorflow as tf
from itertools import count
from misc_util import get_cur_dir, warn, mkdir_p
import cv2
from utils.utils import label_to_gt_box2d, bbox_iou, random_distort_image, draw_bbox2d_on_image
import numpy as np
from config import cfg
# from data_aug import image_augmentation

cur_dir = get_cur_dir()

dataset_dir = os.path.join(cur_dir, 'data/object')
warn("dataset_dir: {}".format(dataset_dir))
dataset = 'training'
split_file = 'trainset.txt'

test_img_save_dir = 'test_img'
test_img_save_dir = os.path.join(cur_dir, test_img_save_dir)
mkdir_p(test_img_save_dir)

def image_augmentation(f_rgb, f_label, width, height, jitter, hue, saturation, exposure):
	rgb_imgs = []
	ious = []
	org_imgs = []
	label = np.array([line for line in open(f_label, 'r').readlines()])
	gt_box2d = label_to_gt_box2d(np.array(label)[np.newaxis, :], cls=cfg.DETECT_OBJ, coordinate='lidar')[0]  # (N', 4) x_min, y_min, x_max, y_max

	img = cv2.imread(f_rgb)
Esempio n. 22
0
def cal_volume_loss(delta_a, delta_b, mask):
    loss = 0.0
    sigma = 3.0
    anchors = tf_cal_anchors()
    batch_boxes3d_a = tf_delta_to_boxes3d(delta_a, anchors) # prediction
    batch_boxes3d_b = tf_delta_to_boxes3d(delta_b, anchors) # ground truth
    batch_boxes3d_b_flipped = tf_delta_to_boxes3d(delta_b, anchors, True) # ground truth flipped

    mask = tf.reshape(mask, [-1, cfg.FEATURE_WIDTH*cfg.FEATURE_HEIGHT*2])#mask.reshape((batch_size, -1))

    ind = tf.equal(mask[:, :], 1.0)
    batch_boxes3d_a = tf.reshape(batch_boxes3d_a, [-1, 7])
    batch_boxes3d_b = tf.reshape(batch_boxes3d_b, [-1, 7])
    batch_boxes3d_b_flipped = tf.reshape(batch_boxes3d_b_flipped, [-1, 7])
    ind = tf.reshape(ind, [-1])

    center_boxes3d_a = tf.boolean_mask(batch_boxes3d_a, ind) # N, 7
    center_boxes3d_b = tf.boolean_mask(batch_boxes3d_b, ind)
    center_boxes3d_b_flipped = tf.boolean_mask(batch_boxes3d_b_flipped, ind)

    corner_boxes3d_a = tf_center_to_corner_box3d(center_boxes3d_a, coordinate='lidar') 
    corner_boxes3d_b = tf_center_to_corner_box3d(center_boxes3d_b, coordinate='lidar') 

    iou = tf_calculate_rotation_iou(corner_boxes3d_a, center_boxes3d_a, corner_boxes3d_b, center_boxes3d_b)


    corner_boxes3d_b_flipped = tf_center_to_corner_box3d(center_boxes3d_b_flipped, coordinate='lidar') 


    warn("smooth loss: {}".format(np.shape(loss)))
    loss = tf.minimum(tf.reduce_sum(smooth_l1(corner_boxes3d_a, corner_boxes3d_b, sigma), [1,2]), \
                    tf.reduce_sum(smooth_l1(corner_boxes3d_a, corner_boxes3d_b_flipped, sigma), [1,2]))

    warn("loss : {}".format(np.shape(loss)))
    warn("iou : {}".format(np.shape(iou)))
    a = tf.pow(1.0-iou, 2.0)
    warn("pow iou: {}".format(np.shape(a)))
    loss = tf.reduce_sum(loss * tf.pow(1.0-iou, 2.0))
    a = tf.reduce_sum(smooth_l1(corner_boxes3d_a, corner_boxes3d_b, sigma), [1,2])
    warn("one loss: {}".format(np.shape(a)))
    warn("loss : {}".format(np.shape(loss)))
    # * loss should be modified. I think this has some error in calculating 
    # * not reduce_sum and take minimum, but minimum first and do reduce_sum


    divider = tf.maximum(tf.shape(corner_boxes3d_a)[0], 1) # normalize by number of ground truth or prediction boxes
    # if we don't have this, then as we have more number of boxes, the loss will be larger.
    divider = tf.cast(divider, dtype=tf.float32)
    loss = tf.divide(loss, divider)
    return loss
Esempio n. 23
0
def cal_rpn_target(labels, feature_map_shape, anchors, cls='Car', calib_mats=None, coordinate='lidar'):
    # Input:
    #   labels: (N, N')
    #   feature_map_shape: (w, l)
    #   anchors: (w, l, 2, 7)
    # Output:
    #   pos_equal_one (N, w, l, 2)
    #   neg_equal_one (N, w, l, 2)
    #   targets (N, w, l, 14)
    # attention: cal IoU on birdview
    batch_size = labels.shape[0]
    # for idx in range(batch_size):
    #     warn("{} {}".format(idx, calib_mats[idx]))
    batch_gt_boxes3d = label_to_gt_box3d(labels, cls=cls, coordinate=coordinate, calib_mats=calib_mats)
    # defined in eq(1) in 2.2
    anchors_reshaped = anchors.reshape(-1, 7)
    # anchors_d = np.sqrt(anchors_reshaped[:, 4]**2 + anchors_reshaped[:, 5]**2)
    pos_equal_one = np.zeros((batch_size, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2))
    neg_equal_one = np.ones((batch_size, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2))
    targets = np.zeros((batch_size, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 14))

    anchor_origin = np.array([[0, 0, cfg.ANCHOR_W, cfg.ANCHOR_L],[0, 0, cfg.ANCHOR_W, cfg.ANCHOR_L]])
    anchor_standup_2d_origin = anchor_to_standup_box2d(anchor_origin)
    anchor_rot = [0, 90 / 180 * np.pi]
    anchor_d = np.sqrt(cfg.ANCHOR_W**2 + cfg.ANCHOR_L**2)
    # warn("shape: {}".format(batch_gt_boxes3d[0][0, [0, 1, 4, 5, 6]]))
    for batch_id in range(batch_size):
        t0 = time.time()
        for t in range(len(batch_gt_boxes3d[batch_id])):
            gx, gy, gz, gh, gw, gl, gr = batch_gt_boxes3d[batch_id][t, [0, 1, 2, 3, 4, 5, 6]]

            if (gx > cfg.X_MAX) or (gx < cfg.X_MIN) or (gy > cfg.Y_MAX) or (gy < cfg.Y_MIN):
                warn("***  illegal data removed: {:.2f} {:.2f} ***".format(gx, gy))
                continue

            gx_ratio = (gx - cfg.X_MIN) / (cfg.X_MAX - cfg.X_MIN) * (cfg.FEATURE_WIDTH-1)
            gy_ratio = (gy - cfg.Y_MIN) / (cfg.Y_MAX - cfg.Y_MIN) * (cfg.FEATURE_HEIGHT-1)
            gi = int(gx_ratio)
            gj = int(gy_ratio)

            gt_box_origin = np.array([[0, 0, gw, gl, gr]])
            gt_standup_2d_origin = corner_to_standup_box2d(center_to_corner_box2d(gt_box_origin, coordinate=coordinate, calib_mat=calib_mats[batch_id]))
            # warn("anchor: {}".format(gt_standup_2d_origin))
            best_iou = 0
            best_anchor = 0
            for anchor in range(len(anchor_origin)):
                iou = bbox_iou(anchor_standup_2d_origin[anchor], gt_standup_2d_origin[0], x1y1x2y2 = True)
                if iou > best_iou:
                    best_iou = iou
                    best_anchor = anchor 
                # warn("{} : iou : {}".format(anchor, iou))
            # warn("shape iou: {}".format(np.shape(iou)))
            # best_anchor_id = np.argmax(iou.T, axis=1)
            
            index_x = gi
            index_y = gj
            index_z = best_anchor

            # warn("{} : gx {:.2f} gy {:.2f} gx_ratio {:.2f} gy_ratio {:.2f} gw {:.2f} gl {:.2f} [ gi {} gj {} anchor {} ] iou {:.2f}".format(t, gx, gy, gx_ratio, gy_ratio, gw, gl, gi, gj, best_anchor, best_iou))

            pos_equal_one[batch_id, index_y, index_x, best_anchor] = 1
            neg_equal_one[batch_id, index_y, index_x, best_anchor] = 0

            targets[batch_id, index_y, index_x, np.array(index_z) * 7] = gx_ratio - gi 
            targets[batch_id, index_y, index_x, np.array(index_z) * 7 + 1] = gy_ratio - gj
            targets[batch_id, index_y, index_x, np.array(index_z) * 7 + 2] = (gz - cfg.ANCHOR_Z) / cfg.ANCHOR_H
            targets[batch_id, index_y, index_x, np.array(index_z) * 7 + 3] = np.log(gh / cfg.ANCHOR_H)
            targets[batch_id, index_y, index_x, np.array(index_z) * 7 + 4] = np.log(gw / cfg.ANCHOR_W)
            targets[batch_id, index_y, index_x, np.array(index_z) * 7 + 5] = np.log(gl / cfg.ANCHOR_L)
            targets[batch_id, index_y, index_x, np.array(index_z) * 7 + 6] = (gr - anchor_rot[best_anchor])
        # t1 = time.time()
        # # warn("time for rpn : {}".format(t1-t0))
        # # warn("feature map :{} ".format(np.shape(targets)))

        # # BOTTLENECK
        # anchors_standup_2d = anchor_to_standup_box2d(
        #     anchors_reshaped[:, [0, 1, 4, 5]])
        # # warn("anchor gt: {}".format(anchors_standup_2d[0:4]))
        # # BOTTLENECK
        # gt_standup_2d = corner_to_standup_box2d(center_to_corner_box2d(
        #     batch_gt_boxes3d[batch_id][:, [0, 1, 4, 5, 6]], coordinate=coordinate))

        # iou = bbox_overlaps(
        #     np.ascontiguousarray(anchors_standup_2d).astype(np.float32),
        #     np.ascontiguousarray(gt_standup_2d).astype(np.float32),
        # )

        # # find anchor with highest iou(iou should also > 0)
        # id_highest = np.argmax(iou.T, axis=1)
        # id_highest_gt = np.arange(iou.T.shape[0])
        # mask = iou.T[id_highest_gt, id_highest] > 0
        # id_highest, id_highest_gt = id_highest[mask], id_highest_gt[mask]

        # # find anchor iou > cfg.XXX_POS_IOU
        # id_pos, id_pos_gt = np.where(iou > cfg.RPN_POS_IOU)

        # # find anchor iou < cfg.XXX_NEG_IOU
        # id_neg = np.where(np.sum(iou < cfg.RPN_NEG_IOU,
        #                          axis=1) == iou.shape[1])[0]

        # id_pos = np.concatenate([id_pos, id_highest])
        # id_pos_gt = np.concatenate([id_pos_gt, id_highest_gt])

        # # TODO: uniquify the array in a more scientific way
        # id_pos, index = np.unique(id_pos, return_index=True)
        # id_pos_gt = id_pos_gt[index]
        # id_neg.sort()

        # # cal the target and set the equal one
        # index_x, index_y, index_z = np.unravel_index(
        #     id_pos, (*feature_map_shape, 2))
        # pos_equal_one[batch_id, index_x, index_y, index_z] = 1

        # for k in range(len(index_x)):
        #     warn("x {} y {} z {}".format(index_x[k], index_y[k], index_z[k]))
        # # warn("x: {}".format(index_x))
        # # warn("y: {}".format(index_y))

        # # ATTENTION: index_z should be np.array
        # targets[batch_id, index_x, index_y, np.array(index_z) * 7] = (
        #     batch_gt_boxes3d[batch_id][id_pos_gt, 0] - anchors_reshaped[id_pos, 0]) / anchors_d[id_pos]
        # targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 1] = (
        #     batch_gt_boxes3d[batch_id][id_pos_gt, 1] - anchors_reshaped[id_pos, 1]) / anchors_d[id_pos]
        # targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 2] = (
        #     batch_gt_boxes3d[batch_id][id_pos_gt, 2] - anchors_reshaped[id_pos, 2]) / cfg.ANCHOR_H
        # targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 3] = np.log(
        #     batch_gt_boxes3d[batch_id][id_pos_gt, 3] / anchors_reshaped[id_pos, 3])
        # targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 4] = np.log(
        #     batch_gt_boxes3d[batch_id][id_pos_gt, 4] / anchors_reshaped[id_pos, 4])
        # targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 5] = np.log(
        #     batch_gt_boxes3d[batch_id][id_pos_gt, 5] / anchors_reshaped[id_pos, 5])
        # targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 6] = (
        #     batch_gt_boxes3d[batch_id][id_pos_gt, 6] - anchors_reshaped[id_pos, 6])

        # index_x, index_y, index_z = np.unravel_index(
        #     id_neg, (*feature_map_shape, 2))
        # neg_equal_one[batch_id, index_x, index_y, index_z] = 1
        # # to avoid a box be pos/neg in the same time
        # index_x, index_y, index_z = np.unravel_index(
        #     id_highest, (*feature_map_shape, 2))
        # neg_equal_one[batch_id, index_x, index_y, index_z] = 0

    return pos_equal_one, neg_equal_one, targets