예제 #1
0
 def wrapper(*args):
     log = regular_log.default()
     try:
         res = f(*args)
         return res
     except Exception as e:
         log['error'][
             'connection_error'] = 'Error connecting to Labelbox. Please check you private API key is correct.'
         log['error']['exception_details'] = str(e)
         return {'log': log}
 def wrapper(*args):
     log = regular_log.default()
     try:
         res = f(*args)
         return res
     except Exception as e:
         log['error'][
             'auth_credentials'] = 'Error connecting to google cloud storage. Please check you private key, email and id are correct.'
         log['error']['exception_details'] = str(e)
         return {'log': log}
    def __start_fetch_folder(self, opts):
        spec_list = [{'project_string_id': dict}]
        log = regular_log.default()
        log, input = regular_input.input_check_many(
            untrusted_input=self.config_data, spec_list=spec_list, log=log)
        if len(log["error"].keys()) >= 1:
            return {'log': log}

        t = threading.Thread(target=self.__fetch_folder, args=((opts, )))
        t.start()
        return {'result': True}
예제 #4
0
    def process_sync_actions(self, session, sync_action):
        """
            Executes sync action depending on the type of action
        :param session:
        :param sync_action:
        :return:
        """
        log = regular_log.default()
        sync_event = sync_action.sync_event
        sync_events_manager = SyncEventManager(session=session,
                                               sync_event=sync_event)
        logger.debug('Processing new sync event.')
        if sync_event.event_trigger_type == 'task_completed':
            completed_task = sync_event.completed_task
            job_observable = task_file_observers.JobObservable(
                session=session,
                log=log,
                job=completed_task.job,
                task=completed_task,
                sync_events_manager=sync_events_manager)
            job_observable.notify_all_observers(defer=False)
        elif sync_event.event_trigger_type == 'file_operation':
            logger.debug('Processing file_operation sync event.')
            destination_directory = sync_event.dataset_destination
            source_directory = None
            file = sync_event.file
            if sync_event.event_effect_type in ['file_copy', 'file_move']:
                logger.debug('Processing file_copy sync event.')
                if sync_event.event_effect_type == 'file_copy':
                    # we need to provide the source dir for validation of incoming dir.
                    source_directory = sync_event.dataset_source
                    file = sync_event.new_file_copy

                job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
                    session=session,
                    log=log,
                    directory=destination_directory,
                )
                # we need to provide the source dir, so validation of incoming
                # directory does not fail when checking the directory the file is coming from.
                logger.debug('Syncing file on jobs...')
                job_dir_sync_manager.add_file_to_all_jobs(
                    file=file,
                    source_dir=source_directory,
                    create_tasks=True,
                )
            else:
                logger.info(
                    '{} event effect not supported for processing.'.format(
                        sync_event.event_effect_type))
        else:
            logger.info(
                '{} event trigger not supported for processing.'.format(
                    sync_event.event_trigger_type))
예제 #5
0
def labelbox_web_hook_manager():
    """
        Webhook for receiving data on Diffgram once finished on labelbox.
        # NOTE: Labelbox does not supportText or dropdown classifications in export for videos.
    :return:
    """
    # First check if secret is correct
    payload = request.data
    secret = settings.LABEL_BOX_SECRET
    log = regular_log.default()
    computed_signature = hmac.new(bytearray(secret.encode('utf-8')),
                                  msg=payload,
                                  digestmod=hashlib.sha1).hexdigest()
    if request.headers['X-Hub-Signature'] != 'sha1=' + computed_signature:
        error = 'Error: computed_signature does not match signature provided in the headers'
        logger.error(
            'Error: computed_signature does not match signature provided in the headers'
        )
        return error
    with sessionMaker.session_scope() as session:
        labelbox_event = request.headers['X-Labelbox-Event']
        payload = request.json
        logger.debug('Payload for labelbox webhooks: {}'.format(payload))
        labelbox_project_id = payload['project']['id']
        project_external_mapping = ExternalMap.get(
            session=session,
            external_id=labelbox_project_id,
            type='labelbox',
            diffgram_class_string='task_template')
        if project_external_mapping:
            task_template = Job.get_by_id(session,
                                          project_external_mapping.job_id)
            if task_template:
                connection = task_template.interface_connection
                logger.debug('Connection for labelbox: {}'.format(connection))
                connector_manager = ConnectorManager(connection=connection,
                                                     session=session)
                connector = connector_manager.get_connector_instance()
                connector.connect()
                sync_manager = LabelBoxSyncManager(
                    session=session,
                    task_template=task_template,
                    labelbox_project=None,
                    log=log,
                    labelbox_connector=connector)
                sync_manager.handle_task_creation_hook(payload)
                return jsonify({'message': 'OK.'})
            else:
                log['error']['task_template'] = 'Task template not found.'
                return jsonify(log)
        else:
            log['error'][
                'labelbox_project'] = 'Labelbox external mapping not found.'
            return jsonify(log)
예제 #6
0
def check_export_permissions_and_status(export, project_string_id, session):
    project_perms = has_project_permissions_for_export(export,
                                                       project_string_id,
                                                       session)
    if len(project_perms['error'].keys()) > 1:
        return project_perms

    export_completed_result = is_export_completed(export)
    if len(export_completed_result['error'].keys()) > 1:
        return export_completed_result

    return regular_log.default()
예제 #7
0
 def __init__(self, session, project, member, directory=None):
     self.project = project
     self.session = session
     self.member = member
     self.directory = directory
     # Additional security check just for sanity
     Project_permissions.by_project_core(
         project_string_id=self.project.project_string_id,
         Roles=["admin", "Editor", "Viewer", "allow_if_project_is_public"],
         apis_project_list=[],
         apis_user_list=['security_email_verified'])
     self.log = regular_log.default()
     self.parser = Lark(grammar_definition, parser='lalr', transformer=None)
    def test_execute_after_launch_strategy(self):
        file = data_mocking.create_file({'project_id': self.project.id}, self.session)
        attach_dir1 = data_mocking.create_directory({
            'project': self.project,
            'user': self.project_data['users'][0],
            'files': [file]
        }, self.session)
        connection = data_mocking.create_connection({
            'name': 'test',
            'integration_name': 'scale_ai',
            'project_id': self.project.id
        }, self.session)

        job = data_mocking.create_job({
            'name': 'my-test-job-{}'.format(1),
            'project': self.project,
            'status': 'active',
            'type': "Normal",
            'attached_directories': [
                attach_dir1
            ],
            'interface_connection_id': connection.id
        }, self.session)

        strategy = ScaleAITaskTemplateAfterLaunchStrategy(
            task_template=job,
            session=self.session,
            log=regular_log.default()
        )
        with patch.object(ScaleAITaskTemplateAfterLaunchStrategy, 'create_scale_ai_project',
                          return_value={'id': '123', 'name': 'scaleaitest'}):
            strategy.execute_after_launch_strategy()
            commit_with_rollback(self.session)
            tasks_count = self.session.query(Task).filter(
                Task.job_id == job.id
            ).count()
            tasks = self.session.query(Task).filter(
                Task.job_id == job.id
            ).all()
            self.assertEqual(tasks_count, 1)

            external_maps = ExternalMap.get(
                session=self.session,
                job_id=job.id,
                diffgram_class_string='task_template',
                connection_id=connection.id,
                type=connection.integration_name
            )

            self.assertNotEqual(external_maps, None)
    def __count_objects(self, opts):
        spec_list = [{'bucket_name': str, 'path': str}]
        log = regular_log.default()
        log, input = regular_input.input_check_many(untrusted_input=opts,
                                                    spec_list=spec_list,
                                                    log=log)
        if len(log["error"].keys()) >= 1:
            return {'log': log}

        blobs = self.connection_client.list_blobs(opts['bucket_name'],
                                                  prefix=opts['path'])
        count = 0
        for b in blobs:
            if b.name.endswith('/'):
                continue
            count += 1
        return {'result': count}
예제 #10
0
def send_task_to_scale_ai():
    """
        Webhook for receiving data on Diffgram once finished on labelbox.
        # NOTE: Labelbox does not supportText or dropdown classifications in export for videos.
    :return:
    """
    # First check if secret is correct
    spec_list = [{'task_id': dict}, {'project_string_id': str}]

    log, input_data, untrusted_input = regular_input.master(
        request=request, spec_list=spec_list)

    log = regular_log.default()
    with sessionMaker.session_scope() as session:
        task_id = input_data['task_id']
        task = Task.get_by_id(session, task_id=task_id)
        if task:
            task_template = task.job
            connection = task_template.interface_connection
            logger.debug('Connection for ScaleAI: {}'.format(connection))
            connector_manager = ConnectorManager(connection=connection,
                                                 session=session)
            connector = connector_manager.get_connector_instance()
            connector.connect()

            scale_ai_sync_manager = ScaleAISyncManager(
                task_template=task_template,
                scale_ai_connector=connector,
                log=log,
                session=session)

            scale_ai_task, log = scale_ai_sync_manager.send_diffgram_task(task)
            logger.debug('Scale AI create result: {} || {}'.format(
                scale_ai_task, log))
            if not scale_ai_task:
                print('aaaaaaaaaaaaaa', log)
                return jsonify(log=log), 400

            return jsonify({
                'message': 'OK.',
                'scale_ai_task_id': scale_ai_task.id
            })
        else:
            log['error']['task_id'] = 'Task not found.'
            return jsonify(log)
예제 #11
0
    def create_tasks_for_sample_task_template(self,
                                              task_template,
                                              attached_dir=None,
                                              files=None):
        if not files:
            files = WorkingDirFileLink.file_list(
                self.session,
                working_dir_id=attached_dir.id,
                root_files_only=True,  # TODO do we need to get child files too?
                limit=None,
                type='image')

        job_sync_manager = JobDirectorySyncManager(session=self.session,
                                                   job=task_template,
                                                   log=regular_log.default(),
                                                   directory=attached_dir)

        job_sync_manager.create_file_links_for_attached_dirs(create_tasks=True)
        return files
    def connect(self):
        log = regular_log.default()
        if 'project_id' not in self.auth_data or \
                (self.auth_data['project_id'] is '' or self.auth_data['project_id'] is None):
            log['error'][
                'client_project'] = "ValueError: Client project not set: pass an explicit project."
            return {'log': log}

        auth = self.generate_auth_data(
            email=self.auth_data['client_email'],
            client_id=self.auth_data['client_id'],
            client_secret=self.auth_data['client_secret'],
            project_id=self.auth_data['project_id'])

        credentials = service_account.Credentials.from_service_account_info(
            auth)
        self.connection_client = storage.Client(credentials=credentials,
                                                project=auth['project_id'])

        return {'result': True}
예제 #13
0
def task_template_launch_core(session, job):
    """

        This function is in charge of attaching the labels to the job, setting status to active
        and then creating the root tasks for each of the files attached to the job.
    """
    if not job:
        return False
    # TODO other pre checks (ie that guide is attached,
    # has a bid, files, etc.

    # check Status is "launchable" ie in draft

    # Update job status
    log = regular_log.default()
    # CAUTION using default directory for project which may not be right
    result = task_template_label_attach(
        session=session,
        task_template=job,
        project_directory=job.project.directory_default,
    )

    # QUESTION Do we only need to create tasks for "normal work things"?
    # ie for exams it gets done as part of the process
    # QUESTION are these only relevant for normal work? not exam?

    if job.type == "Normal":
        task_template_new_normal(session=session, task_template=job)

    if job.type == "Exam":
        task_template_new_exam(session=session, task_template=job)

    # Add job to all attached directories
    job_sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
        session=session, job=job, log=log)

    assert job is not None

    session.add(job)

    return job
    def test_create_sequence_preview_core(self):
        label = data_mocking.create_label({
            'name': 'apple',
        }, self.session)
        label_file = data_mocking.create_label_file(
            {
                'label': label,
                'project_id': self.project.id
            }, self.session)
        video_file = data_mocking.create_file(
            {
                'project_id': self.project.id,
                'type': 'video'
            }, self.session)
        video_file_bad = data_mocking.create_file(
            {
                'project_id': self.project2.id,
                'type': 'video'
            }, self.session)
        sequence = data_mocking.create_sequence(
            {
                'label_file_id': label_file.id,
                'video_file_id': video_file.id,
                'cache_expiry': time.time() + 500000,
                'number': 1,
            }, self.session)

        sequence2 = data_mocking.create_sequence(
            {
                'label_file_id': label_file.id,
                'video_file_id': video_file_bad.id,
                'cache_expiry': time.time() + 500000,
                'number': 1,
            }, self.session)

        preview_url = 'https://picsum.photos/200/300'
        instance = data_mocking.create_instance(
            {
                'project_id': self.project.id,
                'type': 'box',
                'x_min': 0,
                'x_max': 0,
                'y_min': 0,
                'y_max': 0,
                'file_id': video_file.id,
                'soft_delete': False,
                'sequence_id': sequence.id,
                'preview_image_url': preview_url,
                'preview_image_url_expiry': 900000000,
            }, self.session)
        sequence.instance_preview_cache = {
            'id': instance.id,
            'file_id': sequence.video_file.id,
            'preview_image_url': preview_url,
        }
        self.session.commit()

        result, log = sequence_preview_create.create_sequence_preview_core(
            session=self.session,
            log=regular_log.default(),
            project=self.project,
            sequence_id=sequence.id)

        self.assertFalse(len(log['error'].keys()), 0)
        self.assertEqual(result['instance_preview']['id'], instance.id)
        self.assertEqual(result['instance_preview']['file_id'], video_file.id)
        self.assertEqual(result['instance_preview']['preview_image_url'],
                         preview_url)

        # Error case
        result, log = sequence_preview_create.create_sequence_preview_core(
            session=self.session,
            log=regular_log.default(),
            project=self.project,
            sequence_id=sequence2.id)
        self.assertEqual(len(log['error'].keys()), 1)
        self.assertTrue('project_id' in log['error'])
    def __send_export(self, opts):
        spec_list = [{'project_string_id': dict}]
        log = regular_log.default()
        log, input = regular_input.input_check_many(
            untrusted_input=self.config_data, spec_list=spec_list, log=log)
        if len(log["error"].keys()) >= 1:
            return {'log': log}
        spec_list = [
            {
                'path': str
            },
            {
                "format": {
                    'default': 'JSON',
                    'kind': str,
                    'valid_values_list': ['JSON', 'YAML']
                }
            },
            {
                'export_id': str
            },
            {
                'bucket_name': str
            },
        ]
        log = regular_log.default()
        log, input = regular_input.input_check_many(untrusted_input=opts,
                                                    spec_list=spec_list,
                                                    log=log,
                                                    string_len_not_zero=False)
        if len(log["error"].keys()) >= 1:
            return {'log': log}

        if not opts['path'].endswith('/') and opts['path'] != '':
            log['error'][
                'path'] = 'Path on bucket must be a folder, not a filename.'
            return {'log': log}

        with sessionMaker.session_scope() as session:
            project = Project.get_by_string_id(
                session, self.config_data['project_string_id'])
            member = session.query(Member).filter(
                Member.user_id == opts['event_data']['request_user']).first()
            export = session.query(Export).filter(
                Export.id == opts['export_id']).first()
            # Check perms and export status.
            export_check_result = check_export_permissions_and_status(
                export, self.config_data['project_string_id'], session)
            if len(export_check_result['error'].keys()) > 1:
                log = regular_log.default()
                log['error'] = export_check_result['error']
                log['error']['file_name'] = opts['path']
                log['opts'] = opts
                Event.new(
                    session=session,
                    member_id=opts['event_data']['request_user'],
                    kind='google_cloud_new_export_error',
                    description='Google cloud export error for {}'.format(
                        opts['path']),
                    error_log=log,
                    member=member,
                    project_id=project.id,
                    success=False)
                return export_check_result

            bucket = self.connection_client.get_bucket(opts['bucket_name'])
            result = export_view_core(export=export,
                                      format=opts['format'],
                                      return_type='bytes')
            filename = generate_file_name_from_export(export, session)

            if opts['path'] != '':
                blob = bucket.blob('{}{}.{}'.format(opts['path'], filename,
                                                    opts['format'].lower()))
            else:
                blob = bucket.blob('{}.{}'.format(filename,
                                                  opts['format'].lower()))
            blob.upload_from_string(result)
            log = regular_log.default()
            log['opts'] = opts
            Event.new(session=session,
                      member_id=opts['event_data']['request_user'],
                      kind='google_cloud_new_export_success',
                      description='New cloud export for {}'.format(blob.name),
                      error_log=opts,
                      member=member,
                      project_id=project.id,
                      success=True)
            return {'result': True}
예제 #16
0
    def __init__(self, session, task_template):

        self.session = session
        self.task_template = task_template
        self.log = regular_log.default()
        self.strategy = None
    def __fetch_folder(self, opts):
        result = []

        if self.config_data.get('project_string_id') is None:
            return {'result': 'error'}
        paths = opts['path']
        if type(paths) != list:
            paths = [paths]
        with sessionMaker.session_scope() as session:
            project = Project.get_by_string_id(
                session, self.config_data.get('project_string_id'))
            member = session.query(Member).filter(
                Member.user_id == opts['event_data']['request_user']).first()
            for path in paths:
                blobs = self.connection_client.list_blobs(opts['bucket_name'],
                                                          prefix=path)
                for blob in blobs:
                    # Deduct Media Type:
                    if blob.name.endswith('/'):
                        continue

                    blob_expiry = int(time.time() + (60 * 60 * 24 * 30))
                    signed_url = blob.generate_signed_url(
                        expiration=blob_expiry)
                    extension = Path(blob.path).suffix
                    media_type = None
                    if extension in images_allowed_file_names:
                        media_type = 'image'
                    elif extension in videos_allowed_file_names:
                        media_type = 'video'
                    else:
                        logging.warn('File: {} must type of: {} {}'.format(
                            blob.name, str(images_allowed_file_names),
                            str(videos_allowed_file_names)))

                        log = regular_log.default()
                        log['error'][
                            'invalid_type'] = 'File must type of: {} {}'.format(
                                str(images_allowed_file_names),
                                str(videos_allowed_file_names))
                        log['error']['file_name'] = path
                        log['opts'] = opts
                        Event.new(
                            session=session,
                            member_id=opts['event_data']['request_user'],
                            kind='google_cloud_new_import_warning',
                            description=
                            'Skipped import for {}, invalid file type.'.format(
                                blob.name),
                            error_log=log,
                            project_id=project.id,
                            member=member,
                            success=False)
                        continue
                    result = []
                    # TODO: check Input() table for duplicate file?
                    created_input = packet.enqueue_packet(
                        self.config_data['project_string_id'],
                        session=session,
                        media_url=signed_url,
                        media_type=media_type,
                        job_id=opts.get('job_id'),
                        batch_id=opts.get('batch_id'),
                        file_name=path,
                        video_split_duration=opts.get('video_split_duration'),
                        directory_id=opts.get('directory_id'),
                        extract_labels_from_batch=True)
                    log = regular_log.default()
                    log['opts'] = opts
                    Event.new(session=session,
                              member_id=opts['event_data']['request_user'],
                              kind='google_cloud_new_import_success',
                              description='New cloud import for {}'.format(
                                  blob.name),
                              error_log=opts,
                              project_id=project.id,
                              member=member,
                              success=True)
                    result.append(created_input)
        return result
예제 #18
0
 def connect(self):
     log = regular_log.default()
     self.connection_client = labelbox.Client(
         self.auth_data['client_secret'])
     return {'result': True}
예제 #19
0
class Input(Base):
    __tablename__ = 'input'

    id = Column(Integer, primary_key=True)
    created_time = Column(DateTime, default=datetime.datetime.utcnow)

    time_completed = Column(DateTime)
    time_loaded_video = Column(DateTime)

    # We assume this includes upload
    time_video_write_finished = Column(DateTime)

    # Frames can start processing as this happens
    # But it's still an "end point" to measure
    # ie we could look at other options to speed that up.
    time_pushed_all_frames_to_queue = Column(DateTime)

    # Yes some work to update this
    # BUT this is probably a better measure for more closely tracking
    # if the process crashes so worth it.
    time_updated = Column(DateTime, onupdate=datetime.datetime.utcnow)

    time_last_attempted = Column(Integer)

    # TODO list available modes here...
    mode = Column(String)

    url = Column(String())
    media_type = Column(String())  # image, frame, video, csv

    #  Why not name this "source"
    # TODO naming of this attribute could probably be improved.
    type = Column(String())  # ["from_url", "from_video_split"]

    allow_csv = Column(Boolean())

    allow_duplicates = Column(Boolean(), default=False)

    # By default we don't defer processing, but can if needed
    # At the moment this flag is only used "in flight"
    # So eventually, in theory, all the flags will be False.
    # Not sure if that's ok or bad?
    processing_deferred = Column(Boolean(), default=False)

    status = Column(String(), default="init")
    status_text = Column(String())

    offset_in_seconds = Column(Integer)
    percent_complete = Column(Float, default=0.0)

    description = Column(String())
    size = Column(Integer)

    archived = Column(Boolean, default=False)

    raw_data_blob_path = Column(String())
    # video_processed_blob_path = Column(String())

    resumable_url = Column(String())

    # For AWS S3 Uploads
    upload_aws_id = Column(String())
    upload_aws_parts_list = Column(MutableDict.as_mutable(JSONEncodedDict))

    # For Azure Uploads
    upload_azure_block_list = Column(MutableDict.as_mutable(JSONEncodedDict))

    video_split_duration = Column(Integer())
    # For now inferring from video_split_duration
    # if it exists then we assume we want to split it
    video_was_split = Column(Boolean)

    retry_log = Column(MutableDict.as_mutable(JSONEncodedDict))

    # I think it would be good for this to be seperate
    # For easy searching
    # Default to 0 so we can do < a value instead of None checks
    retry_count = Column(Integer, default=0)

    # Use get_by_id().
    # See parent_input =  in process_media.py
    parent_input_id = Column(Integer, ForeignKey('input.id'))
    # parent_input = relationship("Input", foreign_keys=[parent_input_id])

    # context of say a video file
    parent_file_id = deferred(Column(Integer, ForeignKey('file.id')))
    parent_file = relationship("File", foreign_keys=[parent_file_id])

    file_id = Column(Integer, ForeignKey('file.id'))
    file = relationship("File", foreign_keys=[file_id])

    newly_copied_file_id = Column(Integer, ForeignKey('file.id'))
    newly_copied_file = relationship("File",
                                     foreign_keys=[newly_copied_file_id
                                                   ])  #TODO: ADD TO PRODUCTION

    add_link = Column(Boolean)
    remove_link = Column(Boolean)
    copy_instance_list = Column(Boolean, default=False)

    sequence_map = Column(MutableDict.as_mutable(JSONEncodedDict))

    file_metadata = Column(MutableDict.as_mutable(JSONB))

    task_id = Column(Integer, ForeignKey('task.id'))
    task = relationship("Task", foreign_keys=[task_id])
    task_action = Column(String())

    external_map_id = Column(Integer, ForeignKey('external_map.id'))
    external_map = relationship("ExternalMap", foreign_keys=[external_map_id])
    external_map_action = Column(String())

    job_id = Column(Integer, ForeignKey('job.id'))
    job = relationship("Job")

    # Also include image or video?

    directory_id = Column(Integer,
                          ForeignKey('working_dir.id'))  # target directory
    directory = relationship("WorkingDir", foreign_keys=[directory_id])

    source_directory_id = Column(
        Integer, ForeignKey('working_dir.id'))  # For internal only
    source_directory = relationship("WorkingDir",
                                    foreign_keys=[source_directory_id])

    invalid_directory_permission = Column(Boolean)

    project_id = Column(Integer, ForeignKey('project.id'))
    project = relationship("Project")

    user_id = Column(Integer, ForeignKey('userbase.id'))
    user = relationship("User")

    batch_id = Column(Integer, ForeignKey('input_batch.id'))
    batch = relationship("InputBatch", foreign_keys=[batch_id])

    temp_dir = Column(String())

    temp_dir_path_and_filename = Column(String())
    dzuuid = Column(String())
    original_filename = Column(String())
    extension = Column(String())

    instance_list = Column(MutableDict.as_mutable(JSONEncodedDict))
    frame_packet_map = Column(MutableDict.as_mutable(JSONEncodedDict))

    update_log = Column(MutableDict.as_mutable(JSONEncodedDict),
                        default=regular_log.default())  # New Sept 3, 2020

    # Context of video
    video_parent_length = Column(
        Integer
    )  # This way don't have to check video each time. To see where it ends

    __table_args__ = (Index('index__processing_deferred__archived',
                            "processing_deferred",
                            "archived",
                            postgresql_where=(archived.is_(True))), )

    def parent_input(self, session):
        if not self.parent_input_id: return None

        return session.query(Input).filter(
            Input.id == self.parent_input_id).first()

    def child_list(self, session):
        return session.query(Input).filter(
            Input.parent_input_id == self.id).all()

    @staticmethod
    def new(project=None,
            project_id=None,
            media_type: str = None,
            type: str = None,
            mode: str = None,
            url: str = None,
            job_id: int = None,
            video_parent_length: int = None,
            source_directory_id: int = None,
            remove_link: bool = None,
            add_link: bool = None,
            copy_instance_list: bool = None,
            directory_id: int = None,
            file_id: int = None,
            parent_file_id: int = None,
            newly_copied_file_id: int = None,
            sequence_map: dict = None,
            processing_deferred: bool = False,
            parent_input_id: int = None,
            batch_id: int = None,
            video_split_duration: int = None,
            file_metadata: dict = None):
        """
        Helps insure not forgetting stuff...

        does not add to session or flush because we may not
        always want to do that.

        Different ways files can come in here...
        """

        # Careful to check parent otherwise tries to recusrively split.
        # if there is no parent then it's assumed to be the "original"
        # if a video_split is provided then we use it.
        if parent_input_id is None and video_split_duration is None:
            video_split_duration = 30

        input = Input(project=project,
                      project_id=project_id,
                      file_id=file_id,
                      mode=mode,
                      newly_copied_file_id=newly_copied_file_id,
                      media_type=media_type,
                      type=type,
                      url=url,
                      job_id=job_id,
                      directory_id=directory_id,
                      sequence_map=sequence_map,
                      processing_deferred=processing_deferred,
                      parent_input_id=parent_input_id,
                      video_parent_length=video_parent_length,
                      video_split_duration=video_split_duration,
                      batch_id=batch_id,
                      copy_instance_list=copy_instance_list,
                      file_metadata=file_metadata)
        input.parent_file_id = parent_file_id

        return input

    def get_by_id(session, id: int, skip_locked: bool = False):

        query = session.query(Input).filter(Input.id == id)

        if skip_locked == True:
            query = query.with_for_update(skip_locked=True)

        return query.first()

    def serialize(self):

        directory = None

        if self.directory and not self.invalid_directory_permission:
            directory = self.directory.serialize_simple()

        # Total time
        # TODO maybe look at time last attempted too..
        # ALSO we may want to actually declare a "completion"
        # time, this just assumes it doesn't get updated after
        # completion

        total_time = None
        if self.created_time and self.time_updated:
            total_time = self.time_updated - self.created_time

        return {
            'id': self.id,
            'created_time': self.created_time,
            'time_updated': self.time_updated,
            'total_time': str(total_time),
            'media_type': self.media_type,
            'original_filename': self.original_filename,
            'status': self.status,
            'status_text': self.status_text,
            'directory': directory,
            'percent_complete': self.percent_complete,
            'processing_deferred': self.processing_deferred,
            'time_last_attempted': self.time_last_attempted,
            'retry_log': self.retry_log,
            'retry_count': self.retry_count,
            'video_split_duration': self.video_split_duration,
            'video_was_split': self.video_was_split,
            # For debugging
            'raw_data_blob_path': self.raw_data_blob_path,
            'source': self.type,
            'mode': self.mode,
            'file_id': self.file_id,
            'batch_id': self.batch_id,
            'task_id': self.task_id,  # Include task_id
            'update_log': self.update_log,
            'instance_list': self.instance_list,
            'frame_packet_map': self.frame_packet_map,
            'newly_copied_file_id': self.newly_copied_file_id
        }

    def serialize_with_frame_packet(self):
        result = self.serialize()
        result['frame_packet_map'] = self.frame_packet_map
        result['instance_list'] = self.instance_list
        return result

    @staticmethod
    def directory_not_equal_to_status(session,
                                      directory_id,
                                      status="success",
                                      return_type="count"):
        """
        Returns 0 if there are no files equal to status
        otherwise returns count of files != to status
        """

        file_link_sub_query = WorkingDirFileLink.get_sub_query(
            session, directory_id)

        assert file_link_sub_query is not None

        # TODO should we exclude
        # failed ones optionally?...
        # We could do status not in list [failed_flag, success] etc..

        query = session.query(Input).filter(
            Input.file_id == file_link_sub_query.c.file_id,
            Input.status != status, Input.archived != True)

        if return_type == "count":
            return query.count()

        if return_type == "objects":
            return query.all()
    def test_api_create_sequence_preview(self):
        label = data_mocking.create_label({
            'name': 'apple',
        }, self.session)
        label_file = data_mocking.create_label_file(
            {
                'label': label,
                'project_id': self.project.id
            }, self.session)
        video_file = data_mocking.create_file(
            {
                'project_id': self.project.id,
                'type': 'video'
            }, self.session)
        sequence = data_mocking.create_sequence(
            {
                'label_file_id': label_file.id,
                'video_file_id': video_file.id,
                'cache_expiry': time.time() + 500000,
                'number': 1,
            }, self.session)
        video_file_bad = data_mocking.create_file(
            {
                'project_id': self.project2.id,
                'type': 'video'
            }, self.session)
        preview_url = 'https://picsum.photos/200/300'
        instance = data_mocking.create_instance(
            {
                'project_id': self.project.id,
                'type': 'box',
                'x_min': 0,
                'x_max': 0,
                'y_min': 0,
                'y_max': 0,
                'file_id': video_file.id,
                'soft_delete': False,
                'sequence_id': sequence.id,
                'preview_image_url': preview_url,
                'preview_image_url_expiry': 900000000,
            }, self.session)
        sequence.instance_preview_cache = {
            'id': instance.id,
            'file_id': sequence.video_file.id,
            'preview_image_url': preview_url,
        }
        self.session.commit()

        endpoint = "/api/project/{}/sequence/{}/create-preview".format(
            self.project.project_string_id,
            sequence.id,
        )
        auth_api = common_actions.create_project_auth(project=self.project,
                                                      session=self.session)
        credentials = b64encode(
            "{}:{}".format(auth_api.client_id,
                           auth_api.client_secret).encode()).decode('utf-8')
        response = self.client.post(endpoint,
                                    data=json.dumps({}),
                                    headers={
                                        'directory_id':
                                        str(self.project.directory_default_id),
                                        'Authorization':
                                        'Basic {}'.format(credentials)
                                    })
        data = response.json
        self.assertTrue('result' in data)
        self.assertTrue('log' in data)
        self.assertFalse(len(data['log']['error'].keys()), 0)
        self.assertEqual(data['result']['instance_preview']['id'], instance.id)
        self.assertEqual(data['result']['instance_preview']['file_id'],
                         video_file.id)
        self.assertEqual(
            data['result']['instance_preview']['preview_image_url'],
            preview_url)

        # Error case
        sequence2 = data_mocking.create_sequence(
            {
                'label_file_id': label_file.id,
                'video_file_id': video_file_bad.id,
                'cache_expiry': time.time() + 500000,
                'number': 1,
            }, self.session)
        result, log = sequence_preview_create.create_sequence_preview_core(
            session=self.session,
            log=regular_log.default(),
            project=self.project,
            sequence_id=sequence2.id)
        self.assertEqual(len(log['error'].keys()), 1)
        self.assertTrue('project_id' in log['error'])
예제 #21
0
    def copy_file_from_existing(session,
                                working_dir,
                                existing_file,
                                copy_instance_list: bool = False,
                                log=regular_log.default(),
                                add_link: bool = True,
                                remove_link: bool = True,
                                orginal_directory_id=None,
                                previous_video_parent_id=None,
                                sequence_map=None,
                                deep_copy=False,
                                defer_copy=False,
                                ann_is_complete_reset=False,
                                batch_id=None,
                                flush_session=False,
                                working_dir_id: int = None):
        """
        orginal_directory_id is for Video, to get list of video files
        Should we rename to "source_directory" to keep in line of transfer thing?

        Clarify working_dir is the "target" directory?
        Don't actually need directory if not copying links

        # TODO is "update" really the right name
        if this is generally creating a new file??

        If file is video, we need to
        * Create the new video file
        * Create new files for all of it's frames


        ann_is_complete_reset
            We assume "copying" means copying the status too.
            However, for the example of tasks (and perhaps others in future)
            we assume we want to reset this status for the newly created files.

        """
        # Defer image copy is specified in the parameter.
        start_time = time.time()

        if working_dir:
            working_dir_id = working_dir.id

        # IMAGE Defer
        if existing_file.type == 'image' and defer_copy and not remove_link:
            regular_methods.transmit_interservice_request_after_commit(
                session=session,
                message='image_copy',
                logger=logger,
                service_target='walrus',
                id=existing_file.id,
                project_string_id=existing_file.project.project_string_id,
                extra_params={
                    'file_id': existing_file.id,
                    'copy_instance_list': copy_instance_list,
                    'destination_working_dir_id': working_dir_id,
                    'source_working_dir_id': orginal_directory_id,
                    'add_link': add_link,
                    'batch_id': batch_id,
                    'remove_link': remove_link,
                })
            log['info'][
                'message'] = 'File copy in progress. Please check progress in the file operations progress section.'
            return

        # VIDEO
        if existing_file.type == "video" and defer_copy is True:
            # Defer the copy to the walrus.
            regular_methods.transmit_interservice_request_after_commit(
                session=session,
                message='video_copy',
                logger=logger,
                service_target='walrus',
                id=existing_file.id,
                project_string_id=existing_file.project.project_string_id,
                extra_params={
                    'file_id': existing_file.id,
                    'copy_instance_list': copy_instance_list,
                    'destination_working_dir_id': working_dir_id,
                    'source_working_dir_id': orginal_directory_id,
                    'add_link': add_link,
                    'batch_id': batch_id,
                    'remove_link': remove_link,
                    'frame_count': existing_file.video.frame_count
                })
            log['info'][
                'message'] = 'File copy in progress. Please check progress in the file operations progress section.'
            return

        file = new_file_database_object_from_existing(session)
        file.type = existing_file.type

        # We need this to do permissions
        # At the moment when a video is done
        # We only move the video into the directory not the images
        # So we need to use the project scope.
        file.project_id = existing_file.project_id

        file.image_id = existing_file.image_id
        file.label_id = existing_file.label_id
        file.video_id = existing_file.video_id
        file.global_frame_number = existing_file.global_frame_number
        file.colour = existing_file.colour
        file_relationship(session, file, existing_file)
        file.state = "changed"
        file.frame_number = existing_file.frame_number  # Ok if None

        if ann_is_complete_reset is False:
            file.ann_is_complete = existing_file.ann_is_complete

        file.original_filename = existing_file.original_filename

        # Want to be able to get video file from anyframe...
        # Careful this is the previous one, if we just copy existing then images will
        # be related to old file
        file.video_parent_file_id = previous_video_parent_id

        session.add(file)

        # Question why does add link ned to be true here? or does it?
        # At the moment we don't pass add_link as True when copying it for task
        if add_link is True:
            working_dir_database_models.WorkingDirFileLink.add(
                session, working_dir_id, file)
        # print("Added link")

        if remove_link is True:
            working_dir_database_models.WorkingDirFileLink.remove(
                session, working_dir_id, existing_file.id)

        logger.debug('existing_file.type {}'.format(existing_file.type))
        logger.debug('copy_instance_list {}'.format(copy_instance_list))
        if existing_file.type in ['image', 'frame'
                                  ] and copy_instance_list is True:

            file.count_instances_changed = existing_file.count_instances_changed
            file.set_cache_key_dirty('instance_list')

            instance_list = Instance.list(
                session=session, file_id=existing_file.id,
                limit=None)  # Excludes removed by default
            logger.debug('instance_list len {}'.format(len(instance_list)))
            for instance in instance_list:

                instance_sequence_id = instance.sequence_id
                if sequence_map is not None:
                    logger.debug('sequence_map {}'.format(sequence_map))
                    instance_sequence_id = sequence_map.get(
                        instance_sequence_id)

                new_instance = Instance(
                    file_id=file.id,  # IMPORTANT and different from pattern
                    sequence_id=instance_sequence_id,  #  Different
                    parent_file_id=file.
                    video_parent_file_id,  # Cache for video parent file ID.
                    project_id=instance.project_id,
                    x_min=instance.x_min,
                    y_min=instance.y_min,
                    x_max=instance.x_max,
                    y_max=instance.y_max,
                    width=instance.width,
                    height=instance.height,
                    label_file_id=instance.label_file_id,
                    hash=instance.hash,
                    type=instance.type,
                    number=instance.number,
                    frame_number=instance.frame_number,
                    global_frame_number=instance.global_frame_number,
                    machine_made=instance.machine_made,
                    fan_made=instance.fan_made,
                    points=instance.points,
                    soft_delete=instance.soft_delete,
                    center_x=instance.center_x,
                    center_y=instance.center_y,
                    angle=instance.angle,
                    p1=instance.p1,
                    p2=instance.p2,
                    cp=instance.cp,
                    interpolated=instance.interpolated,
                    front_face=instance.front_face,
                    rear_face=instance.rear_face,
                    creation_ref_id=instance.creation_ref_id)
                session.add(new_instance)

        end_time = time.time()
        if flush_session:
            session.flush()
        return file
    def test_execute_after_launch_strategy(self):
        file = data_mocking.create_file(
            {
                'project_id': self.project.id,
                'type': 'text'
            }, self.session)
        label = data_mocking.create_label({
            'name': 'mylabel',
        }, self.session)
        label_file = data_mocking.create_label_file(
            {
                'label': label,
                'project_id': self.project.id
            }, self.session)
        attach_dir1 = data_mocking.create_directory(
            {
                'project': self.project,
                'user': self.project_data['users'][0],
                'files': [file]
            }, self.session)

        connection = data_mocking.create_connection(
            {
                'name': 'test',
                'integration_name': 'datasaur',
                'project_id': self.project.id
            }, self.session)

        labeldict = {
            "label_file_list_serialized": [{
                "id":
                label_file.id,
                "hash":
                "083e9ebc48d64e9a8874c6b95f490b56b8c4c5b0f4dacd90bd3534085e87d9fa",
                "type":
                "label",
                "state":
                "added",
                "created_time":
                "2020-07-15T18:48:34.477333",
                "time_last_updated":
                "2020-07-15T18:48:34.705290",
                "ann_is_complete":
                None,
                "original_filename":
                None,
                "video_id":
                None,
                "video_parent_file_id":
                None,
                "count_instances_changed":
                None,
                "attribute_group_list": [{
                    "id":
                    2,
                    "kind":
                    "multiple_select",
                    "is_root":
                    True,
                    "name":
                    "carwheeltag",
                    "prompt":
                    "How is this car wheel",
                    "show_prompt":
                    True,
                    "time_updated":
                    "2020-08-05 19:37:07.703576",
                    "attribute_template_list": [{
                        "id": 4,
                        "name": "Is rounded",
                        "value_type": None,
                        "archived": False,
                        "group_id": 2,
                        "display_order": None
                    }, {
                        "id": 5,
                        "name": "is squared",
                        "value_type": None,
                        "archived": False,
                        "group_id": 2,
                        "display_order": None
                    }, {
                        "id": 6,
                        "name": "is beautiful",
                        "value_type": None,
                        "archived": False,
                        "group_id": 2,
                        "display_order": None
                    }, {
                        "id": 7,
                        "name": "is crazy",
                        "value_type": None,
                        "archived": False,
                        "group_id": 2,
                        "display_order": None
                    }]
                }, {
                    "id":
                    3,
                    "kind":
                    "select",
                    "is_root":
                    True,
                    "name":
                    "selectwheel",
                    "prompt":
                    "Please selectt something special about this wheels",
                    "show_prompt":
                    True,
                    "time_updated":
                    "2020-08-12 16:29:54.817801",
                    "attribute_template_list": [{
                        "id": 10,
                        "name": "Silver Wheel",
                        "value_type": None,
                        "archived": False,
                        "group_id": 3,
                        "display_order": None
                    }, {
                        "id": 9,
                        "name": "+Gold wheel",
                        "value_type": None,
                        "archived": False,
                        "group_id": 3,
                        "display_order": None
                    }]
                }, {
                    "id": 4,
                    "kind": "text",
                    "is_root": True,
                    "name": "freewheel",
                    "prompt": "What are your thought on this wheel?",
                    "show_prompt": True,
                    "time_updated": "2020-08-05 20:50:59.195249",
                    "attribute_template_list": []
                }, {
                    "id":
                    5,
                    "kind":
                    "radio",
                    "is_root":
                    True,
                    "name":
                    "clean",
                    "prompt":
                    "Is this wheel clean?",
                    "show_prompt":
                    True,
                    "time_updated":
                    "2020-08-05 20:53:46.314143",
                    "attribute_template_list": [{
                        "id": 11,
                        "name": "Wheel is dirty",
                        "value_type": None,
                        "archived": False,
                        "group_id": 5,
                        "display_order": None
                    }, {
                        "id": 12,
                        "name": "Wheek is clean",
                        "value_type": None,
                        "archived": False,
                        "group_id": 5,
                        "display_order": None
                    }]
                }, {
                    "id": 6,
                    "kind": "text",
                    "is_root": True,
                    "name": "TEST",
                    "prompt": "TEST28",
                    "show_prompt": True,
                    "time_updated": "2020-08-12 16:30:03.770141",
                    "attribute_template_list": []
                }],
                "colour": {
                    "hex": "#194d33",
                    "hsl": {
                        "h": 150,
                        "s": 0.5,
                        "l": 0.2,
                        "a": 1
                    },
                    "hsv": {
                        "h": 150,
                        "s": 0.66,
                        "v": 0.3,
                        "a": 1
                    },
                    "rgba": {
                        "r": 25,
                        "g": 77,
                        "b": 51,
                        "a": 1
                    },
                    "a": 1
                },
                "label": {
                    "id": 5,
                    "name": "Car wheel",
                    "default_sequences_to_single_frame": False
                }
            }],
            "label_file_colour_map": {}
        }
        job = data_mocking.create_job(
            {
                'name': 'my-test-job-{}'.format(1),
                'project': self.project,
                'status': 'active',
                'type': "Normal",
                'label_dict': labeldict,
                'attached_directories': [attach_dir1],
                'interface_connection_id': connection.id
            }, self.session)

        strategy = DatasaurTaskTemplateAfterLaunchStrategy(
            task_template=job, session=self.session, log=regular_log.default())
        with patch.object(DatasaurTaskTemplateAfterLaunchStrategy,
                          'create_datasaur_labelset',
                          return_value={
                              'result': {
                                  'createLabelSet': {
                                      'id': 'mytestid'
                                  }
                              }
                          }):
            with patch.object(DatasaurTaskTemplateAfterLaunchStrategy,
                              'create_datasaur_project',
                              return_value={'result': {
                                  'id': 'datasaur_test'
                              }}):
                with patch.object(DatasaurTaskTemplateAfterLaunchStrategy,
                                  'get_project_files_list',
                                  return_value={
                                      'result': {
                                          'id':
                                          'datasaur_test',
                                          'documents': [{
                                              'id': str(file.id),
                                              'name': str(file.id)
                                          }]
                                      }
                                  }):
                    strategy.execute_after_launch_strategy()
                    commit_with_rollback(self.session)
                    tasks_count = self.session.query(Task).filter(
                        Task.job_id == job.id).count()
                    tasks = self.session.query(Task).filter(
                        Task.job_id == job.id).all()
                    self.assertEqual(tasks_count, 1)

                    external_map = ExternalMap.get(
                        session=self.session,
                        job_id=job.id,
                        external_id='mytestid',
                        connection_id=connection.id,
                        diffgram_class_string='',
                        type='{}_label_set'.format(
                            connection.integration_name),
                    )

                    self.assertNotEqual(external_map, None)

                    project_map = ExternalMap.get(
                        session=self.session,
                        job_id=job.id,
                        external_id='datasaur_test',
                        connection_id=connection.id,
                        diffgram_class_string='task_template',
                        type='{}_project'.format(connection.integration_name),
                    )
                    self.assertNotEqual(project_map, None)

                    files_maps = ExternalMap.get(
                        session=self.session,
                        job_id=job.id,
                        external_id=str(file.id),
                        file_id=file.id,
                        connection_id=connection.id,
                        diffgram_class_string='file',
                        type='{}_file'.format(connection.integration_name),
                    )
                    self.assertNotEqual(files_maps, None)
예제 #23
0
    def test_validate_file_data_for_input_packet(self):
        log = regular_log.default()
        file1 = data_mocking.create_file({'project_id': self.project.id},
                                         self.session)
        # Case of file ID
        input_data = {'file_id': file1.id}
        result, log, file_id = packet.validate_file_data_for_input_packet(
            session=self.session,
            project_string_id=self.project.project_string_id,
            input=input_data,
            log=log)
        self.assertTrue(result)
        self.assertEqual(len(log['error'].keys()), 0)
        self.assertEqual(file_id, file1.id)

        # Case of Media URL
        input_data = {'media': {'url': 'test_url'}}
        result, log, file_id = packet.validate_file_data_for_input_packet(
            session=self.session,
            project_string_id=self.project.project_string_id,
            input=input_data,
            log=log)
        self.assertFalse(result)
        self.assertEqual(len(log['error'].keys()), 1)
        self.assertEqual(file_id, None)

        input_data['media']['type'] = 'image'
        log = regular_log.default()
        result, log, file_id = packet.validate_file_data_for_input_packet(
            session=self.session,
            project_string_id=self.project.project_string_id,
            input=input_data,
            log=log)

        self.assertTrue(result)
        self.assertEqual(len(log['error'].keys()), 0)
        self.assertEqual(file_id, None)

        # Case of Filename + Directory

        file2 = data_mocking.create_file(
            {
                'project_id': self.project.id,
                'original_filename': 'test1.jpg'
            }, self.session)
        directory = data_mocking.create_directory(
            {
                'project': self.project,
                'user': self.project_data['users'][0],
                'files': [file2]
            }, self.session)
        input_data = {'file_name': 'test1.jpg', 'directory_id': directory.id}
        log = regular_log.default()
        result, log, file_id = packet.validate_file_data_for_input_packet(
            session=self.session,
            project_string_id=self.project.project_string_id,
            input=input_data,
            log=log)

        self.assertTrue(result)
        self.assertEqual(len(log['error'].keys()), 0)
        self.assertEqual(file_id, file2.id)

        input_data = {
            'file_name': 'test1111.jpg',
            'directory_id': directory.id
        }
        log = regular_log.default()
        result, log, file_id = packet.validate_file_data_for_input_packet(
            session=self.session,
            project_string_id=self.project.project_string_id,
            input=input_data,
            log=log)
        print('log', log)
        self.assertFalse(result)
        self.assertEqual(len(log['error'].keys()), 1)
        self.assertEqual(file_id, None)