def predict(prediction_dir, label_dir, image_dir, calibration_file): # complie models model = nn.network() model.load_weights('3dbox_weights_mob.hdf5') # model.load_weights(args.w) # KITTI_train_gen = KITTILoader(subset='training') dims_avg, _ = KITTILoader(subset='tracklet').get_average_dimension() val_imgs = sorted( [im for im in os.listdir(image_dir) if not im.startswith('.')]) P2 = np.array([]) for line in open(calibration_file): if 'P2' in line: P2 = line.split(' ') P2 = np.asarray([float(i) for i in P2[1:]]) P2 = np.reshape(P2, (3, 4)) for img in tqdm(val_imgs): image_file = os.path.join(image_dir, img) label_file = os.path.join(label_dir, img.replace('png', 'txt')) prediction_file = os.path.join(prediction_dir, img.replace('png', 'txt')) # write the prediction file with open(prediction_file, 'w') as predict: img = cv2.imread(image_file) img = np.array(img, dtype='float32') for line in open(label_file): line = line.strip().split(' ') obj = detectionInfo(line) xmin = int(obj.xmin) xmax = int(obj.xmax) ymin = int(obj.ymin) ymax = int(obj.ymax) if obj.name in cfg.KITTI_cat: # cropped 2d bounding box if xmin == xmax or ymin == ymax: continue # 2D detection area patch = img[ymin:ymax, xmin:xmax] try: patch = cv2.resize(patch, (cfg.norm_h, cfg.norm_w)) except cv2.error: continue # patch -= np.array([[[103.939, 116.779, 123.68]]]) patch /= 255.0 # extend it to match the training dimension patch = np.expand_dims(patch, 0) prediction = model.predict(patch) dim = prediction[0][0] bin_anchor = prediction[1][0] bin_confidence = prediction[2][0] # update with predict dimension dims = dims_avg[obj.name] + dim obj.h, obj.w, obj.l = np.array( [round(dim, 2) for dim in dims]) # update with predicted alpha, [-pi, pi] obj.alpha = recover_angle(bin_anchor, bin_confidence, cfg.bin) # compute global and local orientation obj.rot_global, rot_local = compute_orientaion(P2, obj) # compute and update translation, (x, y, z) obj.tx, obj.ty, obj.tz = translation_constraints( P2, obj, rot_local) # output prediction label output_line = obj.member_to_list() output_line.append(1.0) # Write regressed 3D dim and orientation to file output_line = ' '.join([str(item) for item in output_line]) + '\n' predict.write(output_line)
def predict(args): # complie models model = nn.network() # model.load_weights('3dbox_weights_1st.hdf5') model.load_weights(args.w) # KITTI_train_gen = KITTILoader(subset='training') dims_avg, _ = KITTILoader(subset='training').get_average_dimension() # list all the validation images if args.a == 'training': all_imgs = sorted(os.listdir(test_image_dir)) val_index = int(len(all_imgs) * cfg().split) val_imgs = all_imgs[val_index:] else: val_imgs = sorted(os.listdir(test_image_dir)) start_time = time.time() for i in val_imgs: image_file = test_image_dir + i label_file = test_label_dir + i.replace('png', 'txt') prediction_file = prediction_path + i.replace('png', 'txt') calibration_file = test_calib_path + i.replace('png', 'txt') # write the prediction file with open(prediction_file, 'w') as predict: img = cv2.imread(image_file) img = np.array(img, dtype='float32') P2 = np.array([]) for line in open(calibration_file): if 'P2' in line: P2 = line.split(' ') P2 = np.asarray([float(i) for i in P2[1:]]) P2 = np.reshape(P2, (3, 4)) for line in open(label_file): line = line.strip().split(' ') obj = detectionInfo(line) xmin = int(obj.xmin) xmax = int(obj.xmax) ymin = int(obj.ymin) ymax = int(obj.ymax) if obj.name in cfg().KITTI_cat: # cropped 2d bounding box if xmin == xmax or ymin == ymax: continue # 2D detection area patch = img[ymin:ymax, xmin:xmax] patch = cv2.resize(patch, (cfg().norm_h, cfg().norm_w)) patch -= np.array([[[103.939, 116.779, 123.68]]]) # extend it to match the training dimension patch = np.expand_dims(patch, 0) prediction = model.predict(patch) dim = prediction[0][0] bin_anchor = prediction[1][0] bin_confidence = prediction[2][0] # update with predict dimension dims = dims_avg[obj.name] + dim obj.h, obj.w, obj.l = np.array( [round(dim, 2) for dim in dims]) # update with predicted alpha, [-pi, pi] obj.alpha = recover_angle(bin_anchor, bin_confidence, cfg().bin) # compute global and local orientation obj.rot_global, rot_local = compute_orientaion(P2, obj) # compute and update translation, (x, y, z) obj.tx, obj.ty, obj.tz = translation_constraints( P2, obj, rot_local) # output prediction label output_line = obj.member_to_list() output_line.append(1.0) # Write regressed 3D dim and orientation to file output_line = ' '.join([str(item) for item in output_line]) + '\n' predict.write(output_line) print('Write predicted labels for: ' + str(i)) end_time = time.time() process_time = (end_time - start_time) / len(val_imgs) print(process_time)
def train(): KITTI_train_gen = KITTILoader(subset='training') dim_avg, dim_cnt = KITTI_train_gen.get_average_dimension() new_data = orientation_confidence_flip(KITTI_train_gen.image_data, dim_avg) model = nn.network() #model.load_weights('model00000296.hdf5') early_stop = callbacks.EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10, mode='min', verbose=1) checkpoint = callbacks.ModelCheckpoint('model{epoch:08d}.hdf5', monitor='val_loss', verbose=1, save_best_only=False, mode='min', period=1) tensorboard = callbacks.TensorBoard(log_dir='logs/', histogram_freq=0, write_graph=True, write_images=False) all_examples = len(new_data) trv_split = int(cfg().split * all_examples) # train val split train_gen = data_gen(new_data[: trv_split]) valid_gen = data_gen(new_data[trv_split : all_examples]) print("READY FOR TRAINING") train_num = int(np.ceil(trv_split / cfg().batch_size)) valid_num = int(np.ceil((all_examples - trv_split) / cfg().batch_size)) #gen_flow = gen_flow_for_two_inputs(X_train, X_angle_train, y_train) # choose the minimizer to be sgd # minimizer = optimizer.SGD(lr=0.0001, momentum = 0.9) minimizer = optimizer.Adam(lr=0.0001) # multi task learning model.compile(optimizer=minimizer, #minimizer, loss={'dimensions': 'mean_squared_error', 'orientation': orientation_loss, 'confidence': 'categorical_crossentropy'}, loss_weights={'dimensions': 1., 'orientation': 10., 'confidence': 5.}) print("####################################################") print(K.get_value(model.optimizer.lr)) # Tambahan aing def scheduler(epoch): if epoch%10==0 and epoch!=0: lr = K.get_value(model.optimizer.lr) K.set_value(model.optimizer.lr, lr*.8) print("lr changed to {}".format(lr*.8)) print("lr = ", K.get_value(model.optimizer.lr)) return K.get_value(model.optimizer.lr) lr_sched = callbacks.LearningRateScheduler(scheduler) # d:0.0088 o:0.0042, c:0.0098 # steps_per_epoch=train_num, # validation_steps=valid_num, # callbacks=[early_stop, checkpoint, tensorboard], model.fit_generator(generator=train_gen, steps_per_epoch=train_num, epochs=500, verbose=1, validation_data=valid_gen, validation_steps=valid_num, shuffle=True, callbacks=[checkpoint, tensorboard, lr_sched], max_queue_size=3)
def predict(args): # complie models model = nn.network() # model.load_weights('3dbox_weights_1st.hdf5') model.load_weights(args.w) # KITTI_train_gen = KITTILoader(subset='training') dims_avg, _ = KITTILoader(subset='training').get_average_dimension() print("dims_avg = ", dims_avg) # dims_avg = {'Car': array([1.52608343, 1.62858987, 3.88395449])} # list all the validation images if args.a == 'training': all_imgs = sorted(os.listdir(test_image_dir)) val_index = int(len(all_imgs) * cfg().split) val_imgs = all_imgs[val_index:] else: val_imgs = sorted(os.listdir(test_image_dir)) start_time = time.time() for i in val_imgs: image_file = test_image_dir + i depth_file = test_depth_dir + i label_file = test_label_dir + i.replace('png', 'txt') prediction_file = prediction_path + i.replace('png', 'txt') calibration_file = test_calib_path + i.replace('png', 'txt') #calibration_file = os.path.join('/media/ferdyan/NewDisk/Trajectory_Final/bbox_3d/0000.txt') # write the prediction file with open(prediction_file, 'w') as predict: img = cv2.imread(image_file) img = np.array(img, dtype='float32') dpth = cv2.imread(depth_file) dpth = np.array(dpth, dtype='float32') P2 = np.array([]) for line in open(calibration_file): if 'P2' in line: P2 = line.split(' ') P2 = np.asarray([float(i) for i in P2[1:]]) P2 = np.reshape(P2, (3, 4)) for line in open(label_file): line = line.strip().split(' ') #print("line = ", line) obj = detectionInfo(line) xmin = int(obj.xmin) xmax = int(obj.xmax) ymin = int(obj.ymin) ymax = int(obj.ymax) box2d = [xmin, ymin, xmax, ymax] box_2D = np.asarray(box2d, dtype=np.float) if obj.name in cfg().KITTI_cat: # cropped 2d bounding box if xmin == xmax or ymin == ymax: continue # 2D detection area RGB image patch = img[ymin:ymax, xmin:xmax] patch = cv2.resize(patch, (cfg().norm_h, cfg().norm_w)) patch -= np.array([[[103.939, 116.779, 123.68]]]) # extend it to match the training dimension patch = np.expand_dims(patch, 0) # 2D detection area depth map #patch_d = dpth[ymin : ymax, xmin : xmax] #patch_d = cv2.resize(patch_d, (cfg().norm_h, cfg().norm_w)) #patch_d -= np.array([[[103.939, 116.779, 123.68]]]) # extend it to match the training dimension #patch_d = np.expand_dims(patch_d, 0) # one prediction = model.predict([patch]) # two #prediction = model.predict([patch, patch_d]) # TAMBAHAN AING # Transform regressed angle box2d_center_x = (xmin + xmax) / 2.0 theta_ray = np.arctan(fx / (box2d_center_x - u0)) if theta_ray < 0: theta_ray = theta_ray + np.pi max_anc = np.argmax(prediction[2][0]) anchors = prediction[1][0][max_anc] if anchors[1] > 0: angle_offset = np.arccos(anchors[0]) else: angle_offset = -np.arccos(anchors[0]) bin_num = prediction[2][0].shape[0] wedge = 2. * np.pi / bin_num theta_loc = angle_offset + max_anc * wedge theta = theta_loc + theta_ray # object's yaw angle yaw = np.pi / 2 - theta points2D = gen_3D_box(yaw, dims, cam_to_img, box_2D) draw_3D_box(img, points2D) cv2.imshow('f', img) cv2.waitKey(0) cv2.destroyAllWindows() #cv2.imwrite('output/'+ f.replace('png','jpg'), img) end_time = time.time() process_time = (end_time - start_time) / len(val_imgs) print(process_time)
def train(): KITTI_train_gen = KITTILoader(subset='training') dim_avg, dim_cnt = KITTI_train_gen.get_average_dimension() new_data = orientation_confidence_flip(KITTI_train_gen.image_data, dim_avg) model = nn.network() # model.load_weights('3dbox_weights_mob.hdf5') early_stop = callbacks.EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10, mode='min', verbose=1) checkpoint = callbacks.ModelCheckpoint('3dbox_weights_mob.hdf5', monitor='val_loss', verbose=1, save_best_only=True, mode='min', period=1) tensorboard = callbacks.TensorBoard(log_dir='logs/', histogram_freq=0, write_graph=True, write_images=False) all_examples = len(new_data) trv_split = int(cfg().split * all_examples) # train val split train_gen = data_gen(new_data[:trv_split]) valid_gen = data_gen(new_data[trv_split:all_examples]) train_num = int(np.ceil(trv_split / cfg().batch_size)) valid_num = int(np.ceil((all_examples - trv_split) / cfg().batch_size)) # choose the minimizer to be sgd minimizer = optimizer.SGD(lr=0.0001, momentum=0.9) # multi task learning model.compile( optimizer=minimizer, #minimizer, loss={ 'dimensions': 'mean_squared_error', 'orientation': orientation_loss, 'confidence': 'binary_crossentropy' }, loss_weights={ 'dimensions': 1., 'orientation': 10., 'confidence': 5. }) # d:0.0088 o:0.0042, c:0.0098 model.fit_generator(generator=train_gen, steps_per_epoch=train_num, epochs=500, verbose=1, validation_data=valid_gen, validation_steps=valid_num, shuffle=True, callbacks=[early_stop, checkpoint, tensorboard], max_queue_size=3)
def train(): KITTI_train_gen = KITTILoader(subset='training') dim_avg, dim_cnt = KITTI_train_gen.get_average_dimension() new_data = orientation_confidence_flip(KITTI_train_gen.image_data, dim_avg) model = nn.network() # model.load_weights('3dbox_weights_mob.hdf5') early_stop = callbacks.EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10, mode='min', verbose=1) checkpoint = callbacks.ModelCheckpoint( '3dbox_mbnv2_{}x{}_float32.hdf5'.format(cfg().norm_h, cfg().norm_w), monitor='val_loss', verbose=1, save_best_only=True, mode='min', period=1) tensorboard = callbacks.TensorBoard(log_dir='logs/', histogram_freq=0, write_graph=True, write_images=False) all_examples = len(new_data) trv_split = int(cfg().split * all_examples) # train val split train_gen = data_gen(new_data[:trv_split]) valid_gen = data_gen(new_data[trv_split:all_examples]) train_num = int(np.ceil(trv_split / cfg().batch_size)) valid_num = int(np.ceil((all_examples - trv_split) / cfg().batch_size)) # choose the minimizer to be sgd minimizer = optimizer.SGD(lr=0.0001, momentum=0.9) # multi task learning model.compile( optimizer=minimizer, #minimizer, loss={ 'dimensions': 'mean_squared_error', 'orientation': orientation_loss, 'confidence': 'binary_crossentropy' }, loss_weights={ 'dimensions': 1., 'orientation': 10., 'confidence': 5. }) # d:0.0088 o:0.0042, c:0.0098 model.fit_generator(generator=train_gen, steps_per_epoch=train_num, epochs=500, verbose=1, validation_data=valid_gen, validation_steps=valid_num, shuffle=True, callbacks=[early_stop, checkpoint, tensorboard], max_queue_size=3) tf.saved_model.save(model, 'saved_model_{}x{}'.format(cfg().norm_h, cfg().norm_w)) model.save('3dbox_mbnv2_{}x{}_float32.h5'.format(cfg().norm_h, cfg().norm_w)) full_model = tf.function(lambda inputs: model(inputs)) full_model = full_model.get_concrete_function( inputs=(tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype))) frozen_func = convert_variables_to_constants_v2(full_model, lower_control_flow=False) frozen_func.graph.as_graph_def() tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=".", name="3dbox_mbnv2_{}x{}_float32.pb".format( cfg().norm_h, cfg().norm_w), as_text=False)