def audio_process(video, video_path, models_to_apply): audio_name = str(video.video_file).split("/")[-1].split(".")[0] + ".wav" audio_path = osp.join("audios", audio_name) video.audio_path = audio_path abs_audio_path = osp.join(settings.MEDIA_ROOT, audio_path) # Extract wav file audio_decode(video_path, abs_audio_path) # rpc call print("Processing wav file through rpc call...") with RpcClient(port="50052") as rpc_client: audio_response, fr = rpc_client.service_request( abs_audio_path.encode(), models_to_apply) audio_response = json.loads(audio_response) fr = int(fr) # Save audio result audio_postproc(video, audio_response, fr) return audio_response
def search_product(time_stamp, video_name, host): retrieved_path = "images/product/retrieved" try: video_path = osp.join(settings.MEDIA_ROOT, video_name) # Delete images from previous search for img in os.listdir(osp.join(settings.MEDIA_ROOT, retrieved_path)): if img.endswith(".jpg"): os.remove(osp.join(settings.MEDIA_ROOT, retrieved_path, img)) # Request rpc service with RpcClient(50054) as rpc_client: response, _ = rpc_client.service_request( str(time_stamp).encode(), video_path) response = json.loads(response) src_path_list = [res["IMAGE"] for res in response] # Image path searchable by the server dst_file_list = [ osp.join( retrieved_path, "_".join([ video_name.split("/")[-1].split(".")[0], str(time_stamp), "top", str(i + 1) ]) + ".jpg") for i, _ in enumerate(response) ] for i in range(len(response)): # Change the image path in response to point to the destination response[i]["IMAGE"] = "http://" + host + osp.join( settings.MEDIA_URL, dst_file_list[i]) dst_path_list = [ osp.join(settings.MEDIA_ROOT, f) for f in dst_file_list ] # Copy image from path outside the project to searchable path for src, dst in zip(src_path_list, dst_path_list): copyfile(src, dst) return response # Capture any exception and return None except Exception as e: print(e) return None
def visual_process(video, video_path, models_to_apply): # Process visual content # Read frame count through imageio # TODO: add get_frame_cnt function to Hysia decoder with imageio.get_reader(video_path, "ffmpeg") as vid: frame_cnt = vid.count_frames() video.frame_cnt = frame_cnt # Initiate Hysia GPU decoder decoder = PyDecoder.Decoder(settings.DECODING_HARDWARE) decoder.ingestVideo(video_path) # Start a decoding thread decoder.decode() # rpc call print("Processing frames through rpc call...") visual_responses = list() with tqdm.tqdm(total=frame_cnt, unit="frames") as pbar: with RpcClient(port="50051") as rpc_client: while True: frame = decoder.fetchFrame() # decoder returns empty frame once it reaches the end of video if frame.size == 0: break _, frame_encoded = cv2.imencode('.jpg', frame) json_string, _ = rpc_client.service_request( buf=frame_encoded.tobytes(), meta=models_to_apply) visual_responses.append(json.loads(json_string)) pbar.update(1) # Get scene splits print("Splitting scenes...") shot_detector = Shot_Detector() splits_frame, splits_ms = shot_detector.detect(video_path) # Processing statistics and save frames print( "Computing statistics, extracting features and saving frame results..." ) visual_postproc(video, video_path, visual_responses, splits_frame, splits_ms) return visual_responses
def search_scene(query_img, target_videos): try: img_abs_path = osp.join(settings.MEDIA_ROOT, str(query_img.img)) # Request rpc service with RpcClient(50053) as rpc_client: # Request meta data meta = json.dumps({ "text": query_img.text, "target_videos": target_videos }) response, _ = rpc_client.service_request(img_abs_path.encode(), meta) response = json.loads(response) src_path_list = [ res["IMAGE"] if res["IMAGE"].startswith("/") else osp.join( settings.MEDIA_ROOT, res["IMAGE"]) for res in response ] # Image path searchable by the server dst_file_list = [ osp.join( "images/temp", "_".join([str(query_img.owner.username), "top", str(i + 1)]) + ".jpg") for i, _ in enumerate(response) ] # Group the search result by video name grouped = list() name2idx = dict() for i in range(len(response)): # Change the image path in response to point to the destination response[i]["IMAGE"] = dst_file_list[i] # Change key name to make it accessible by django template language if response[i].get("START TIME"): response[i]["START_TIME"] = response[i]["START TIME"] response[i]["END_TIME"] = response[i]["END TIME"] # TODO re_index TVQA response[i]["TV_NAME"] = response[i]["TV_NAME"].split("/")[-1] # Perform grouping if response[i]["TV_NAME"] in name2idx: grouped[name2idx[response[i]["TV_NAME"]]].append(response[i]) else: grouped.append([response[i]]) name2idx[response[i]["TV_NAME"]] = len(grouped) - 1 # Add scenes highest price response[i]["HIGHEST_PRICE"] = Scene.objects.get( pk=int(response[i]["SCENE_ID"])).highest_price # Sort according to group size grouped.sort(key=lambda x: len(x), reverse=True) # Absolute image list dst_path_list = [ osp.join(settings.MEDIA_ROOT, f) for f in dst_file_list ] # Copy image from path outside the project to searchable path for src, dst in zip(src_path_list, dst_path_list): copyfile(src, dst) return grouped # Capture any exception and return None except Exception as e: return None
def visual_postproc(video, video_path, json_response, splits_frame, splits_ms): middle_frame = list() middle_time = {} for i in range(len(splits_ms) - 1): temp = math.floor((splits_frame[i] + splits_frame[i + 1]) / 2.0) middle_frame.append(temp) middle_time[temp] = [splits_ms[i], splits_ms[i + 1]] decoder = PyDecoder.Decoder(settings.DECODING_HARDWARE) decoder.ingestVideo(video_path) decoder.decode() cur_scene_idx = -1 cur_scene_end = -1 scene_end = video.frame_cnt statistics = list() scene_list = list() # Instantiate feature client to avoid repetitive connection establishment rpc_client = RpcClient(port="50055") with tqdm.tqdm(total=scene_end, unit="frames") as pbar: # Bulk saving to increase speed with transaction.atomic(): try: frame_idx = 0 while True: img = decoder.fetchFrame() if img.size == 0: break # Generate json path wrt video path filename = video_path.split('/')[-1].split( '.')[0] + '_' + str(frame_idx) + '.json' json_path = osp.join(settings.MEDIA_ROOT, 'json', filename) decoded = json_response[frame_idx] # Save json with open(json_path, 'w') as fw: json.dump(decoded, fw) frame_serializer = FrameSerializer(data={ "video": video.pk, 'json_path': json_path }) frame = None if frame_serializer.is_valid( ): # TODO: this does not work when json_path (aka. video name is # longer than certain characters frame = frame_serializer.save() # Switch scene if frame_idx > cur_scene_end: cur_scene_idx += 1 cur_scene_end = splits_frame[ cur_scene_idx + 1] if cur_scene_idx < len( splits_frame) - 1 else scene_end - 1 statistics.append({ "start_frame": splits_frame[cur_scene_idx], "end_frame": cur_scene_end, "cur_scene_statistics": {} }) # Update statistics if decoded.get("detection_classes_names"): for cls in decoded["detection_classes_names"]: if cls in statistics[cur_scene_idx][ "cur_scene_statistics"]: statistics[cur_scene_idx][ "cur_scene_statistics"][cls] += 1 else: statistics[cur_scene_idx][ "cur_scene_statistics"][cls] = 1 # Extract feature if frame_idx in middle_frame: # Create new scene index_scene_feature(rpc_client, video, img, frame, frame_idx, middle_time, scene_list) # Update pbar pbar.update(1) frame_idx += 1 except imageio.core.format.CannotReadFrameError: print("io error caught") pass # Close feature client rpc_client.close() # Save the pickle file pkl_name = str( video.video_file).split("/")[-1].split(".")[0] + "_index.pkl" pkl_path = osp.join(settings.MEDIA_ROOT, "multi_features", pkl_name) with open(pkl_path, "wb") as f: pickle.dump(scene_list, f) video.pkl_path = pkl_path # Save the statistics statistics_name = str( video.video_file).split("/")[-1].split(".")[0] + "_statistics.json" statistics_path = osp.join(settings.MEDIA_ROOT, "statistics/" + statistics_name) with open(statistics_path, "w") as f: json.dump(statistics, f) video.statistics = statistics_path return