def input_from_local(session, log, project_string_id, http_input, file, directory_id): # TODO review how we want to handle header options # Especially if needs to be outside of function for python requests... # immediate_mode = request.headers['immediate_mode'] # Issues to be careful with ie string treamtment of 'True' vs True... immediate_mode = True input = Input() input.directory_id = directory_id if http_input['instance_list']: input.instance_list = {} input.instance_list['list'] = http_input['instance_list'] if http_input['frame_packet_map']: input.frame_packet_map = http_input['frame_packet_map'] # only need to make temp dir if file doesn't already exist... original_filename = secure_filename( file.filename ) # http://flask.pocoo.org/docs/0.12/patterns/fileuploads/ input.extension = os.path.splitext(original_filename)[1].lower() input.original_filename = os.path.split(original_filename)[1] input.temp_dir = tempfile.mkdtemp() input.temp_dir_path_and_filename = input.temp_dir + \ "/" + original_filename + input.extension project = Project.get(session, project_string_id) input.project = project input.media_type = None input.media_type = Process_Media.determine_media_type(input.extension) if not input.media_type: input.status = "failed" input.status_text = "Invalid file type: " + input.extension return False, log, input session.add(input) session.flush() with open(input.temp_dir_path_and_filename, "wb") as f: f.write(file.stream.read()) # For LOCAL not normal upload file_size_limit = 9 * 1024 * 1024 * 1024 file_size = os.path.getsize( input.temp_dir_path_and_filename) # gets size in bytes if file_size > file_size_limit: input.status = "failed" input.status_text = "Exceeded max file size" return False, log, input if immediate_mode == True or immediate_mode is None: # Leave this as a direct call for time being, as we pass # the input back to thing on front end process_media = Process_Media(session=session, input=input) result = process_media.main_entry() # Always return input along with file? if result == True: return True, log, input if result == False: return False, log, input # Default priority = 100 item = PrioritizedItem(priority=priority, input_id=input.id, media_type=input.media_type) add_item_to_queue(item) return True, log, input
def enqueue_packet(project_string_id, session, media_url = None, media_type = None, file_id = None, file_name = None, job_id = None, batch_id = None, directory_id = None, source_directory_id = None, instance_list = None, video_split_duration = None, frame_packet_map = None, remove_link = None, add_link = None, copy_instance_list = None, commit_input = False, task_id = None, video_parent_length = None, type = None, task_action = None, external_map_id = None, original_filename = None, external_map_action = None, enqueue_immediately = False, mode = None, allow_duplicates = False, extract_labels_from_batch = False): """ Creates Input() object and enqueues it for media processing Returns Input() object that was created :param packet_data: :return: """ diffgram_input = Input() project = Project.get(session, project_string_id) diffgram_input.file_id = file_id diffgram_input.task_id = task_id diffgram_input.batch_id = batch_id diffgram_input.video_parent_length = video_parent_length diffgram_input.remove_link = remove_link diffgram_input.add_link = add_link diffgram_input.copy_instance_list = copy_instance_list diffgram_input.external_map_id = external_map_id diffgram_input.original_filename = original_filename diffgram_input.external_map_action = external_map_action diffgram_input.task_action = task_action diffgram_input.mode = mode diffgram_input.project = project diffgram_input.media_type = media_type diffgram_input.type = "from_url" diffgram_input.url = media_url diffgram_input.video_split_duration = video_split_duration diffgram_input.allow_duplicates = allow_duplicates if instance_list: diffgram_input.instance_list = {} diffgram_input.instance_list['list'] = instance_list if frame_packet_map: diffgram_input.frame_packet_map = frame_packet_map # print(diffgram_input.frame_packet_map) session.add(diffgram_input) session.flush() if batch_id and extract_labels_from_batch: upload_tools = Upload(session = session, project = project, request = None) upload_tools.extract_instance_list_from_batch(input = diffgram_input, input_batch_id = batch_id, file_name = file_name) # Expect temp dir to be None here. # because each machine should assign it's own temp dir # Something else to consider for future here! # Once this is part of input, it will be smoothly handled at right time as part of # processing queue diffgram_input.job_id = job_id # Process media handles checking if the directory id is valid diffgram_input.directory_id = directory_id diffgram_input.source_directory_id = source_directory_id diffgram_input_id = diffgram_input.id queue_limit = 0 if media_type == "image": queue_limit = 30 # 50 if media_type == "video": queue_limit = 1 if settings.PROCESS_MEDIA_ENQUEUE_LOCALLY_IMMEDIATELY is True or enqueue_immediately: print('diffgram_input_id', diffgram_input_id) if commit_input: regular_methods.commit_with_rollback(session = session) item = PrioritizedItem( priority = 10000, # individual frames have a priority here. input_id = diffgram_input_id, media_type = media_type) add_item_to_queue(item) else: diffgram_input.processing_deferred = True # Default return diffgram_input
def add_frame_to_queue(self, frame, index: int, original_filename: str, project: Project, directory_id, video, length, video_parent_file: File, global_frame_number=None, initial_global_frame=None): """ Where frame is: a HxWxN np.array, where N=1 for mask clips and N=3 for RGB clips. https://zulko.github.io/moviepy/ref/VideoClip/VideoClip.html Careful we don't have self. context here Cautions * We purposely do not not pass the job id, since we only want to original video to be added to the job Question, is it correct we create input class in part to maintain same concepts / format even for video frames? Answer: For example see frame_end_number is used to pass information Makes more sense to have it all in there then the PrioritizedItem() thing long term Also thinking in terms of logging And yes of course, then it's complete reuse of the component Jan 20, 2020 Note we purposely do NOT commit this as it creates unneeded db overhead, so instead we only use it as a local object to maintain consistency of design which means we do NOT want to add to add it a sesion ie self.session.add(input) """ input = Input() # Use input for class attributes, # but don't add it to the session for video? # TODO use File.new() for consistency here (ie as we add new things) # Single frame naming input.original_filename = original_filename + "_" + str(index) input.extension = ".jpg" input.media_type = "frame" input.temp_dir = tempfile.mkdtemp() input.project = project input.directory_id = directory_id input.parent_file_id = video_parent_file.id input.frame_packet_map = self.input.frame_packet_map # caution length is estimated. frame_count # is calculated as we roll through this so can't use it yet # Question: clarity on difference between numbers. # (I know estimate but still.) input.video_parent_length = length input.parent_input_id = self.input.id input.project_id = self.project.id # This is a temporary usage thing only # Note database persisted # Context of needing it to be defined so existing instances # Can use it (vs having to get video from db each time, # prior we defined this on first frame. input.root_blob_path_to_frames = video.root_blob_path_to_frames input = self.get_instance_list_from_packet_map( input=input, frame_number=index, global_frame_number=global_frame_number, initial_global_frame=initial_global_frame, from_video_split=self.input.type == 'from_video_split') """ For frame priority, the original genesis was doing the last frame last but, I think it also makese sense to process in order in general. An alternative would be to say put a flag on the last frame but using order feels like a more general solution, assuming no suprises or extra overhead. Storing frames Maybe don't attach video_parent_file because it leads to not bound errors in ORM fairly easily. """ # TODO, consider sending data as a "raw" blob # to cloud storage, then setting "processing deferred" to True here. # Process frames of videos started before new videos item = process_media.PrioritizedItem( priority=100 + index, # Process in frame priority input=input, raw_numpy_image=frame, file_is_numpy_array=True, video_id=video.id, frame_number=index, global_frame_number=global_frame_number, media_type=input.media_type) process_media.add_item_to_queue(item)