def submit(): form = SubmitForm() if form.validate_on_submit(): flash('submit requested for url =' + str(form.url.data)) kf = KeyFrame(form.url.data) download_flag, info = kf.download_video() if not download_flag: return render_template('submit.html', title='Submit', form=form, error=info) entropy_images_path, info = kf.entropy_based() if not entropy_images_path: return render_template('submit.html', title='Submit', form=form, error=info) gray_pixel_images_path = [] gray_pixel_images_path, info = kf.gray_pixel_based() if not gray_pixel_images_path: return render_template('submit.html', title='Submit', form=form, error=info) images_path = packge_result(entropy_images_path, gray_pixel_images_path) return render_template('result.html', title='Result', images_path=images_path) return render_template('submit.html', title='Submit', form=form, error="")
def __init__(self, dataset_path, minibatch_size, img_c, img_w, img_h, frames_n, absolute_max_string_len=32): self.dataset_path = dataset_path self.minibatch_size = minibatch_size self.img_c = img_c self.img_w = img_w self.img_h = img_h self.frames_n = frames_n self.absolute_max_string_len = absolute_max_string_len self.train_index = multiprocessing.Value('i', 0) self.val_index = multiprocessing.Value('i', 0) self.train_epoch = multiprocessing.Value('i', -1) self.keyframe = KeyFrame() self.train_path = os.path.join(self.dataset_path, 'train') self.val_path = os.path.join(self.dataset_path, 'val') self.align_path = os.path.join(self.dataset_path, 'align') self.build_dataset()
def copy(): form = CopyForm() if form.validate_on_submit(): flash('sub requested for url a =' + str(form.url_a.data) + ' url b = ' + str(form.url_b.data)) kf_a = KeyFrame(form.url_a.data) download_flag, info = kf_a.download_video() if not download_flag: return render_template('copy.html', title='Submit', form=form, error=info) kf_b = KeyFrame(form.url_b.data) download_flag, info = kf_b.download_video() if not download_flag: return render_template('copy.html', title='Submit', form=form, error=info) cd = CopyDetector() result_flag = cd.md5_check(kf_a.video, kf_b.video) if not result_flag: print "MD5 NOT WORK" gray_pixel_images_path, info = kf_a.gray_pixel_based() gray_pixel_images_path, info = kf_b.gray_pixel_based() result_flag = cd.sift_check(kf_a.absolute_keyframs_list, kf_b.absolute_keyframs_list) if result_flag: detect_result = "SAME" else: detect_result = "DIFF" print detect_result return render_template('copy_result.html', title='Result', result=detect_result) return render_template('copy.html', title='Detect', form=form, error="")
def track(self, img, frame_id, timestamp=None): Printer.cyan('@tracking') time_start = time.time() # check image size is coherent with camera params print("img.shape: ", img.shape) print("camera ", self.camera.height, " x ", self.camera.width) assert img.shape[0:2] == (self.camera.height, self.camera.width) if timestamp is not None: print('timestamp: ', timestamp) self.timer_main_track.start() # build current frame self.timer_frame.start() f_cur = Frame(img, self.camera, timestamp=timestamp) self.f_cur = f_cur print("frame: ", f_cur.id) self.timer_frame.refresh() # reset indexes of matches self.idxs_ref = [] self.idxs_cur = [] if self.state == SlamState.NO_IMAGES_YET: # push first frame in the inizializer self.intializer.init(f_cur) self.state = SlamState.NOT_INITIALIZED return # EXIT (jump to second frame) if self.state == SlamState.NOT_INITIALIZED: # try to inizialize initializer_output, intializer_is_ok = self.intializer.initialize( f_cur, img) if intializer_is_ok: kf_ref = initializer_output.kf_ref kf_cur = initializer_output.kf_cur # add the two initialized frames in the map self.map.add_frame( kf_ref) # add first frame in map and update its frame id self.map.add_frame( kf_cur) # add second frame in map and update its frame id # add the two initialized frames as keyframes in the map self.map.add_keyframe( kf_ref) # add first keyframe in map and update its kid self.map.add_keyframe( kf_cur) # add second keyframe in map and update its kid kf_ref.init_observations() kf_cur.init_observations() # add points in map new_pts_count, _, _ = self.map.add_points( initializer_output.pts, None, kf_cur, kf_ref, initializer_output.idxs_cur, initializer_output.idxs_ref, img, do_check=False) Printer.green("map: initialized %d new points" % (new_pts_count)) # update covisibility graph connections kf_ref.update_connections() kf_cur.update_connections() # update tracking info self.f_cur = kf_cur self.f_cur.kf_ref = kf_ref self.kf_ref = kf_cur # set reference keyframe self.kf_last = kf_cur # set last added keyframe self.map.local_map.update(self.kf_ref) self.state = SlamState.OK self.update_tracking_history() self.motion_model.update_pose(kf_cur.timestamp, kf_cur.position, kf_cur.quaternion) self.motion_model.is_ok = False # after initialization you cannot use motion model for next frame pose prediction (time ids of initialized poses may not be consecutive) self.intializer.reset() if kUseDynamicDesDistanceTh: self.descriptor_distance_sigma = self.dyn_config.update_descriptor_stat( kf_ref, kf_cur, initializer_output.idxs_ref, initializer_output.idxs_cur) return # EXIT (jump to next frame) # get previous frame in map as reference f_ref = self.map.get_frame(-1) #f_ref_2 = self.map.get_frame(-2) self.f_ref = f_ref # add current frame f_cur to map self.map.add_frame(f_cur) self.f_cur.kf_ref = self.kf_ref # reset pose state flag self.pose_is_ok = False with self.map.update_lock: # check for map point replacements in previous frame f_ref (some points might have been replaced by local mapping during point fusion) self.f_ref.check_replaced_map_points() if kUseDynamicDesDistanceTh: print('descriptor_distance_sigma: ', self.descriptor_distance_sigma) self.local_mapping.descriptor_distance_sigma = self.descriptor_distance_sigma # udpdate (velocity) old motion model # c1=ref_ref, c2=ref, c3=cur; c=cur, r=ref #self.velocity = np.dot(f_ref.pose, inv_T(f_ref_2.pose)) # Tc2c1 = Tc2w * Twc1 (predicted Tcr) #self.predicted_pose = g2o.Isometry3d(np.dot(self.velocity, f_ref.pose)) # Tc3w = Tc2c1 * Tc2w (predicted Tcw) # set intial guess for current pose optimization if kUseMotionModel and self.motion_model.is_ok: print('using motion model for next pose prediction') # update f_ref pose according to its reference keyframe (the pose of the reference keyframe could be updated by local mapping) self.f_ref.update_pose( self.tracking_history.relative_frame_poses[-1] * self.f_ref.kf_ref.isometry3d) # predict pose by using motion model self.predicted_pose, _ = self.motion_model.predict_pose( timestamp, self.f_ref.position, self.f_ref.orientation) f_cur.update_pose(self.predicted_pose) else: print('setting f_cur.pose <-- f_ref.pose') # use reference frame pose as initial guess f_cur.update_pose(f_ref.pose) # track camera motion from f_ref to f_cur self.track_previous_frame(f_ref, f_cur) if not self.pose_is_ok: # if previous track didn't go well then track the camera motion from kf_ref to f_cur self.track_keyframe(self.kf_ref, f_cur) # now, having a better estimate of f_cur pose, we can find more map point matches: # find matches between {local map points} (points in the local map) and {unmatched keypoints of f_cur} if self.pose_is_ok: self.track_local_map(f_cur) # end block {with self.map.update_lock:} # TODO: add relocalization # HACK: since local mapping is not fast enough in python (and tracking is not in real-time) => give local mapping more time to process stuff self.wait_for_local_mapping( ) # N.B.: this must be outside the `with self.map.update_lock:` block with self.map.update_lock: # update slam state if self.pose_is_ok: self.state = SlamState.OK else: self.state = SlamState.LOST Printer.red('tracking failure') # update motion model state self.motion_model.is_ok = self.pose_is_ok if self.pose_is_ok: # if tracking was successful # update motion model self.motion_model.update_pose(timestamp, f_cur.position, f_cur.quaternion) f_cur.clean_vo_map_points() # do we need a new KeyFrame? need_new_kf = self.need_new_keyframe(f_cur) if need_new_kf: Printer.green('adding new KF with frame id % d: ' % (f_cur.id)) if kLogKFinfoToFile: self.kf_info_logger.info( 'adding new KF with frame id % d: ' % (f_cur.id)) kf_new = KeyFrame(f_cur, img) self.kf_last = kf_new self.kf_ref = kf_new f_cur.kf_ref = kf_new self.local_mapping.push_keyframe(kf_new) if not kLocalMappingOnSeparateThread: self.local_mapping.do_local_mapping() else: Printer.yellow('NOT KF') # From ORBSLAM2: # Clean outliers once keyframe generation has been managed: # we allow points with high innovation (considered outliers by the Huber Function) # pass to the new keyframe, so that bundle adjustment will finally decide # if they are outliers or not. We don't want next frame to estimate its position # with those points so we discard them in the frame. f_cur.clean_outlier_map_points() if self.f_cur.kf_ref is None: self.f_cur.kf_ref = self.kf_ref self.update_tracking_history( ) # must stay after having updated slam state (self.state) Printer.green("map: %d points, %d keyframes" % (self.map.num_points(), self.map.num_keyframes())) #self.update_history() self.timer_main_track.refresh() duration = time.time() - time_start print('tracking duration: ', duration)
def initialize(self, f_cur, img_cur): if self.num_failures > kNumOfFailuresAfterWichNumMinTriangulatedPointsIsHalved: self.num_min_triangulated_points = 0.5 * Parameters.kInitializerNumMinTriangulatedPoints self.num_failures = 0 Printer.orange( 'Initializer: halved min num triangulated features to ', self.num_min_triangulated_points) # prepare the output out = InitializerOutput() is_ok = False #print('num frames: ', len(self.frames)) # if too many frames have passed, move the current idx_f_ref forward # this is just one very simple policy that can be used if self.f_ref is not None: if f_cur.id - self.f_ref.id > kMaxIdDistBetweenIntializingFrames: self.f_ref = self.frames[-1] # take last frame in the buffer #self.idx_f_ref = len(self.frames)-1 # take last frame in the buffer #self.idx_f_ref = self.frames.index(self.f_ref) # since we are using a deque, the code of the previous commented line is not valid anymore #print('*** idx_f_ref:',self.idx_f_ref) #self.f_ref = self.frames[self.idx_f_ref] f_ref = self.f_ref #print('ref fid: ',self.f_ref.id,', curr fid: ', f_cur.id, ', idxs_ref: ', self.idxs_ref) # append current frame self.frames.append(f_cur) # if the current frames do no have enough features exit if len(f_ref.kps) < self.num_min_features or len( f_cur.kps) < self.num_min_features: Printer.red('Inializer: ko - not enough features!') self.num_failures += 1 return out, is_ok # find keypoint matches idxs_cur, idxs_ref = match_frames(f_cur, f_ref, kFeatureMatchRatioTestInitializer) #print('|------------') #print('deque ids: ', [f.id for f in self.frames]) #print('initializing frames ', f_cur.id, ', ', f_ref.id) #print("# keypoint matches: ", len(idxs_cur)) Trc = self.estimatePose(f_ref.kpsn[idxs_ref], f_cur.kpsn[idxs_cur]) Tcr = inv_T(Trc) # Tcr w.r.t. ref frame f_ref.update_pose(np.eye(4)) f_cur.update_pose(Tcr) # remove outliers from keypoint matches by using the mask computed with inter frame pose estimation mask_idxs = (self.mask_match.ravel() == 1) self.num_inliers = sum(mask_idxs) #print('# keypoint inliers: ', self.num_inliers ) idx_cur_inliers = idxs_cur[mask_idxs] idx_ref_inliers = idxs_ref[mask_idxs] # create a temp map for initializing map = Map() f_ref.reset_points() f_cur.reset_points() #map.add_frame(f_ref) #map.add_frame(f_cur) kf_ref = KeyFrame(f_ref) kf_cur = KeyFrame(f_cur, img_cur) map.add_keyframe(kf_ref) map.add_keyframe(kf_cur) pts3d, mask_pts3d = triangulate_normalized_points( kf_cur.Tcw, kf_ref.Tcw, kf_cur.kpsn[idx_cur_inliers], kf_ref.kpsn[idx_ref_inliers]) new_pts_count, mask_points, _ = map.add_points( pts3d, mask_pts3d, kf_cur, kf_ref, idx_cur_inliers, idx_ref_inliers, img_cur, do_check=True, cos_max_parallax=Parameters.kCosMaxParallaxInitializer) #print("# triangulated points: ", new_pts_count) if new_pts_count > self.num_min_triangulated_points: err = map.optimize(verbose=False, rounds=20, use_robust_kernel=True) print("init optimization error^2: %f" % err) num_map_points = len(map.points) print("# map points: %d" % num_map_points) is_ok = num_map_points > self.num_min_triangulated_points out.pts = pts3d[mask_points] out.kf_cur = kf_cur out.idxs_cur = idx_cur_inliers[mask_points] out.kf_ref = kf_ref out.idxs_ref = idx_ref_inliers[mask_points] # set scene median depth to equal desired_median_depth' desired_median_depth = Parameters.kInitializerDesiredMedianDepth median_depth = kf_cur.compute_points_median_depth(out.pts) depth_scale = desired_median_depth / median_depth print('forcing current median depth ', median_depth, ' to ', desired_median_depth) out.pts[:, :3] = out.pts[:, :3] * depth_scale # scale points tcw = kf_cur.tcw * depth_scale # scale initial baseline kf_cur.update_translation(tcw) map.delete() if is_ok: Printer.green('Inializer: ok!') else: self.num_failures += 1 #Printer.red('Inializer: ko!') print('|------------') return out, is_ok
class Generator(keras.callbacks.Callback): def __init__(self, dataset_path, minibatch_size, img_c, img_w, img_h, frames_n, absolute_max_string_len=32): self.dataset_path = dataset_path self.minibatch_size = minibatch_size self.img_c = img_c self.img_w = img_w self.img_h = img_h self.frames_n = frames_n self.absolute_max_string_len = absolute_max_string_len self.train_index = multiprocessing.Value('i', 0) self.val_index = multiprocessing.Value('i', 0) self.train_epoch = multiprocessing.Value('i', -1) self.keyframe = KeyFrame() self.train_path = os.path.join(self.dataset_path, 'train') self.val_path = os.path.join(self.dataset_path, 'val') self.align_path = os.path.join(self.dataset_path, 'align') self.build_dataset() @property def training_size(self): return len(self.train_list) @property def default_training_steps(self): return self.training_size / self.minibatch_size @property def validation_size(self): return len(self.val_list) @property def default_validation_steps(self): return self.validation_size / self.minibatch_size def prepare_vidlist(self, path): video_list = [] l = None for video_path in glob.glob(path): l = len(os.listdir(video_path)) if l == 75: video_list.append(video_path) else: print("Error loading video: "+video_path+" less than 75 frames("+str(l)+")") return video_list def prepare_align(self, video_list): align_dict = {} for video_path in video_list: video_id = os.path.splitext(video_path)[0].split('/')[-1] align_path = os.path.join(self.align_path, video_id)+".align" align_dict[video_id] = Align(self.absolute_max_string_len).from_file(align_path) return align_dict def build_dataset(self): print("\nLoading datas...") self.train_list = self.prepare_vidlist(os.path.join(self.train_path, '*', '*')) self.val_list = self.prepare_vidlist(os.path.join(self.val_path, '*', '*')) self.align_dict = self.prepare_align(self.train_list + self.val_list) self.steps_per_epoch = self.default_training_steps print("Number of training videos = ", self.training_size) print("Number of validation videos = ", self.validation_size) print("Steps per epoch ", round(self.steps_per_epoch)) print("") np.random.shuffle(self.train_list) def get_batch(self, index, size, train): if train: video_list = self.train_list else: video_list = self.val_list X_data_path = get_list(video_list, index, size) X_data = [] Y_data = [] label_length = [] input_length = [] source_str = [] for path in X_data_path: video = Video().from_frames(path) align = self.align_dict[path.split('/')[-1]] if train == True: video= self.keyframe.extract(video) to_be_augmented = random.choice([True, False, False])] if to_be_augmented: video.data = select_augmentation(video.data) X_data.append(video.data) Y_data.append(align.padded_label) label_length.append(align.label_length) input_length.append(video.length) source_str.append(align.sentence) source_str = np.array(source_str) label_length = np.array(label_length) input_length = np.array(input_length) Y_data = np.array(Y_data) X_data = np.array(X_data).astype(np.float32) / 255 X_data, Y_data, input_length, label_length, source_str = shuffle(X_data, Y_data, input_length, label_length, source_str, random_state=random_seed) inputs = {'the_input': X_data, 'the_labels': Y_data, 'input_length': input_length, 'label_length': label_length, 'source_str': source_str } outputs = {'ctc': np.zeros([size])} return (inputs, outputs)