Esempio n. 1
0
def input_from_local(session, log, project_string_id, http_input, file,
                     directory_id):
    # TODO review how we want to handle header options
    # Especially if needs to be outside of function for python requests...
    # immediate_mode = request.headers['immediate_mode']
    # Issues to be careful with ie string treamtment of 'True' vs True...
    immediate_mode = True

    input = Input()
    input.directory_id = directory_id

    if http_input['instance_list']:
        input.instance_list = {}
        input.instance_list['list'] = http_input['instance_list']

    if http_input['frame_packet_map']:
        input.frame_packet_map = http_input['frame_packet_map']

    # only need to make temp dir if file doesn't already exist...

    original_filename = secure_filename(
        file.filename
    )  # http://flask.pocoo.org/docs/0.12/patterns/fileuploads/

    input.extension = os.path.splitext(original_filename)[1].lower()
    input.original_filename = os.path.split(original_filename)[1]

    input.temp_dir = tempfile.mkdtemp()
    input.temp_dir_path_and_filename = input.temp_dir + \
                                       "/" + original_filename + input.extension

    project = Project.get(session, project_string_id)

    input.project = project

    input.media_type = None
    input.media_type = Process_Media.determine_media_type(input.extension)
    if not input.media_type:
        input.status = "failed"
        input.status_text = "Invalid file type: " + input.extension
        return False, log, input

    session.add(input)
    session.flush()

    with open(input.temp_dir_path_and_filename, "wb") as f:

        f.write(file.stream.read())

    # For LOCAL not normal upload
    file_size_limit = 9 * 1024 * 1024 * 1024

    file_size = os.path.getsize(
        input.temp_dir_path_and_filename)  # gets size in bytes

    if file_size > file_size_limit:
        input.status = "failed"
        input.status_text = "Exceeded max file size"
        return False, log, input

    if immediate_mode == True or immediate_mode is None:
        # Leave this as a direct call for time being, as we pass
        # the input back to thing on front end

        process_media = Process_Media(session=session, input=input)

        result = process_media.main_entry()

        # Always return input along with file?

        if result == True:
            return True, log, input

        if result == False:
            return False, log, input

    # Default
    priority = 100

    item = PrioritizedItem(priority=priority,
                           input_id=input.id,
                           media_type=input.media_type)

    add_item_to_queue(item)

    return True, log, input
Esempio n. 2
0
def enqueue_packet(project_string_id,
                   session,
                   media_url = None,
                   media_type = None,
                   file_id = None,
                   file_name = None,
                   job_id = None,
                   batch_id = None,
                   directory_id = None,
                   source_directory_id = None,
                   instance_list = None,
                   video_split_duration = None,
                   frame_packet_map = None,
                   remove_link = None,
                   add_link = None,
                   copy_instance_list = None,
                   commit_input = False,
                   task_id = None,
                   video_parent_length = None,
                   type = None,
                   task_action = None,
                   external_map_id = None,
                   original_filename = None,
                   external_map_action = None,
                   enqueue_immediately = False,
                   mode = None,
                   allow_duplicates = False,
                   extract_labels_from_batch = False):
    """
        Creates Input() object and enqueues it for media processing
        Returns Input() object that was created
    :param packet_data:
    :return:
    """
    diffgram_input = Input()
    project = Project.get(session, project_string_id)
    diffgram_input.file_id = file_id
    diffgram_input.task_id = task_id
    diffgram_input.batch_id = batch_id
    diffgram_input.video_parent_length = video_parent_length
    diffgram_input.remove_link = remove_link
    diffgram_input.add_link = add_link
    diffgram_input.copy_instance_list = copy_instance_list
    diffgram_input.external_map_id = external_map_id
    diffgram_input.original_filename = original_filename
    diffgram_input.external_map_action = external_map_action
    diffgram_input.task_action = task_action
    diffgram_input.mode = mode
    diffgram_input.project = project
    diffgram_input.media_type = media_type
    diffgram_input.type = "from_url"
    diffgram_input.url = media_url
    diffgram_input.video_split_duration = video_split_duration
    diffgram_input.allow_duplicates = allow_duplicates
    if instance_list:
        diffgram_input.instance_list = {}
        diffgram_input.instance_list['list'] = instance_list

    if frame_packet_map:
        diffgram_input.frame_packet_map = frame_packet_map
    # print(diffgram_input.frame_packet_map)

    session.add(diffgram_input)
    session.flush()

    if batch_id and extract_labels_from_batch:
        upload_tools = Upload(session = session, project = project, request = None)
        upload_tools.extract_instance_list_from_batch(input = diffgram_input,
                                                      input_batch_id = batch_id,
                                                      file_name = file_name)
    # Expect temp dir to be None here.
    # because each machine should assign it's own temp dir
    # Something else to consider for future here!
    # Once this is part of input, it will be smoothly handled at right time as part of
    # processing queue
    diffgram_input.job_id = job_id

    # Process media handles checking if the directory id is valid
    diffgram_input.directory_id = directory_id
    diffgram_input.source_directory_id = source_directory_id

    diffgram_input_id = diffgram_input.id

    queue_limit = 0
    if media_type == "image":
        queue_limit = 30  # 50
    if media_type == "video":
        queue_limit = 1

    if settings.PROCESS_MEDIA_ENQUEUE_LOCALLY_IMMEDIATELY is True or enqueue_immediately:

        print('diffgram_input_id', diffgram_input_id)
        if commit_input:
            regular_methods.commit_with_rollback(session = session)
        item = PrioritizedItem(
            priority = 10000,  # individual frames have a priority here.
            input_id = diffgram_input_id,
            media_type = media_type)
        add_item_to_queue(item)
    else:
        diffgram_input.processing_deferred = True  # Default

    return diffgram_input
Esempio n. 3
0
    def add_frame_to_queue(self,
                           frame,
                           index: int,
                           original_filename: str,
                           project: Project,
                           directory_id,
                           video,
                           length,
                           video_parent_file: File,
                           global_frame_number=None,
                           initial_global_frame=None):
        """
        Where frame is:
            a HxWxN np.array, where N=1 for mask clips and N=3 for RGB clips.
            https://zulko.github.io/moviepy/ref/VideoClip/VideoClip.html


        Careful we don't have self. context here

        Cautions
            * We purposely do not not pass the job id, since we only want to original
            video to be added to the job

        Question,
            is it correct we create input class in part to maintain
            same concepts / format even for video frames?
            Answer: For example see frame_end_number is used to pass information
                Makes more sense to have it all in there then the PrioritizedItem() thing
                long term
                Also thinking in terms of logging
                And yes of course, then it's complete reuse of the component


        Jan 20, 2020
            Note we purposely do NOT commit this as it creates unneeded
            db overhead, so instead we only use it as a local object
            to maintain consistency of design

            which means we do NOT want to add to add it a sesion
            ie self.session.add(input)
        """

        input = Input()

        # Use input for class attributes,
        # but don't add it to the session for video?

        # TODO use File.new() for consistency here (ie as we add new things)

        # Single frame naming
        input.original_filename = original_filename + "_" + str(index)
        input.extension = ".jpg"
        input.media_type = "frame"

        input.temp_dir = tempfile.mkdtemp()
        input.project = project
        input.directory_id = directory_id
        input.parent_file_id = video_parent_file.id
        input.frame_packet_map = self.input.frame_packet_map

        # caution length is estimated. frame_count
        # is calculated as we roll through this so can't use it yet
        # Question: clarity on difference between numbers.
        # (I know estimate but still.)
        input.video_parent_length = length
        input.parent_input_id = self.input.id
        input.project_id = self.project.id

        # This is a temporary usage thing only
        # Note database persisted
        # Context of needing it to be defined so existing instances
        # Can use it (vs having to get video from db each time,
        # prior we defined this on first frame.
        input.root_blob_path_to_frames = video.root_blob_path_to_frames

        input = self.get_instance_list_from_packet_map(
            input=input,
            frame_number=index,
            global_frame_number=global_frame_number,
            initial_global_frame=initial_global_frame,
            from_video_split=self.input.type == 'from_video_split')
        """
        For frame priority, the original genesis was doing the last frame last
        but, I think it also makese sense to process in order in general.
        An alternative would be to say put a flag on the last frame
        but using order feels like a more general solution, assuming no suprises
        or extra overhead.


        Storing frames

        Maybe don't attach video_parent_file
        because it leads to not bound errors in ORM fairly easily.

        """

        # TODO, consider sending data as a "raw" blob
        # to cloud storage, then setting "processing deferred" to True here.

        # Process frames of videos started before new videos
        item = process_media.PrioritizedItem(
            priority=100 + index,  # Process in frame priority
            input=input,
            raw_numpy_image=frame,
            file_is_numpy_array=True,
            video_id=video.id,
            frame_number=index,
            global_frame_number=global_frame_number,
            media_type=input.media_type)

        process_media.add_item_to_queue(item)