예제 #1
0
 def update_instance_list_for_video(self, frames_data, diffgram_task):
     frame_packet_map = {}
     for frame in frames_data:
         logger.debug('Processing Frame {}'.format(frame['frameNumber']))
         video_data = {
             'current_frame': frame['frameNumber'],
             'video_mode': True,
             'video_file_id': diffgram_task.file.id
         }
         label_instances = frame['objects']
         if len(label_instances) > 0:
             result = self.update_instance_list_for_image_or_frame(
                 label_instances,
                 diffgram_task,
                 video_data=video_data,
                 frame_packet_map=frame_packet_map)
     enqueue_packet(
         project_string_id=self.task_template.project.project_string_id,
         session=self.session,
         media_url=None,
         media_type='video',
         job_id=self.task_template.id,
         file_id=diffgram_task.file.id,
         frame_packet_map=frame_packet_map,
         task_id=diffgram_task.id,
         task_action='complete_task',
         commit_input=True,
         mode="update_with_existing")
     return result
예제 #2
0
 def enqueue_scale_ai_annotations(self, diffgram_task, annotations):
     diffgram_instance_list = self.transform_annotations_to_diffgram_instance_list(annotations)
     enqueue_packet(project_string_id=self.task_template.project.project_string_id,
                    session=self.session,
                    media_url=None,
                    media_type='image',
                    job_id=self.task_template.id,
                    file_id=diffgram_task.file.id,
                    instance_list=diffgram_instance_list,
                    task_id=diffgram_task.id,
                    task_action='complete_task',
                    commit_input=True,
                    mode="update")
     return
    def __fetch_object(self, opts):
        bucket = self.connection_client.get_bucket(opts['bucket_name'])
        blob = bucket.blob(opts['path'])
        blob_expiry = int(time.time() + (60 * 60 * 24 * 30))
        signed_url = blob.generate_signed_url(expiration=blob_expiry)
        # Deduct Media Type:
        # TODO Share this with existing process_media determine_media_type()
        extension = Path(opts['path']).suffix
        extension = extension.lower()
        media_type = None
        if extension in images_allowed_file_names:
            media_type = 'image'
        elif extension in videos_allowed_file_names:
            media_type = 'video'
        else:
            # TODO: Decide, do we want to raise an exception? or just do nothing?
            log = regular_log.default()
            log['error']['invalid_type'] = 'File must type of: {} {}'.format(
                str(images_allowed_file_names), str(videos_allowed_file_names))
            log['error']['file_name'] = opts['path']
            log['opts'] = opts
            with sessionMaker.session_scope() as session:
                Event.new(session=session,
                          member_id=opts['event_data']['request_user'],
                          kind='google_cloud_new_import_error',
                          description='New cloud import for {}'.format(
                              opts['path']),
                          error_log=log)
            raise LookupError('File must type of: {} {}'.format(
                str(images_allowed_file_names),
                str(videos_allowed_file_names)))
        # metadata = self.connection_client.head_object(Bucket=opts['bucket_name, Key=path)
        with sessionMaker.session_scope() as session:

            created_input = packet.enqueue_packet(
                self.config_data['project_string_id'],
                session=session,
                media_url=signed_url,
                media_type=media_type,
                job_id=opts.get('job_id'),
                video_split_duration=opts.get('video_split_duration'),
                directory_id=opts.get('directory_id'))
            log = regular_log.default()
            log['opts'] = opts
            Event.new(session=session,
                      member_id=opts['event_data']['request_user'],
                      kind='google_cloud_new_import_success',
                      description='New cloud import for {}'.format(
                          opts['path']),
                      error_log=opts)
        return {'result': created_input}
예제 #4
0
 def test_packet_endpoint_refactor(self):
     packet_data = {
         'media': {
             'url':
             'https://thumbor.forbes.com/thumbor/250x382/https://blogs-images.forbes.com/dorothypomerantz/files/2011/09/Spongebob-squarepants.jpg?width=960',
             'type': 'image'
         }
     }
     created_input = packet.enqueue_packet(
         self.project_string_id,
         session=self.session,
         media_url=packet_data['media']['url'],
         media_type=packet_data['media']['type'],
         job_id=None,
         directory_id=None)
     self.session.commit()
     self.assertEqual(type(created_input), type(Input()))
예제 #5
0
    def generate_test_data_on_dataset(self, dataset):
        inputs_data = []
        for i in range(0, self.NUM_IMAGES):
            diffgram_input = enqueue_packet(
                project_string_id=dataset.project.project_string_id,
                session=self.session,
                media_url='https://picsum.photos/1000',
                media_type='image',
                directory_id=dataset.id,
                commit_input=True,
                task_id=None,
                type='from_url',
                task_action=None,
                external_map_id=None,
                external_map_action=None,
                enqueue_immediately=True,
                mode=None,
                allow_duplicates=True)

            inputs_data.append(diffgram_input)
        return inputs_data
예제 #6
0
def interservice_receive_api():
    """
    Inter-Service route to notify of new job launch

    For now relies on inter_service_security_token for permissions...

    This is just a starting point for more generic inter service notification
    Pros/Cons to having DB as intermediary point there, fo now
    this is fairly light weight.
    
    Once we have a good pattern here, eg retry/overflow handling,
    can probably remove polling / thread

    """
    spec_list = [
        {
            "inter_service_security_token": {
                'kind': str,
                'required': True,
                'security_token': settings.INTER_SERVICE_SECRET
            }
        },
        {
            "message": {
                'kind': str,
                'required': True
            }
        },
        {
            "id": {  # or "base_class_id"?
                'kind': int,
                'required': False,
                'default': None
            }
        },
        {
            "extra_params": {
                'kind': dict,
                'required': False,
                'default': None
            }
        },
        {
            "base_class_string": {
                'kind': str,
                'required': False,
                'default': None
            }
        },
        {
            "project_string_id": {
                'kind': str,
                'required': False,
                'default': None
            }
        }
        # Serialized object maybe?
    ]

    log, input_from_request, untrusted_input = regular_input.master(
        request=request, spec_list=spec_list)

    if len(log["error"].keys()) >= 1:
        return jsonify(log=log), 400

    logger.info("Received valid inter service request")

    with sessionMaker.session_scope() as session:

        # CAUTIONS
        # Generally assumes any calls here are non blocking
        # So as to reasonably return

        # eg 1) Condition on message then some_launcher(event_id = input['id'])

        # Or 2) if we want object here for some reason, something like:
        # if input['base_class_string']:
        #    base_object = getattr(sys.modules[__name__], input['base_class_string']).get_by_id(
        #        id = input['id'],
        #        session = session)

        if input_from_request['message'] == 'new_job_launch_queue_item':
            job_launcher_thread = TaskTemplateLauncherThread(run_once=True)
            log['info']['job_launcher_thread'] = True
        if input_from_request['message'] == 'new_sync_action_item':
            sync_action_thread = SyncActionsHandlerThread(run_once=True)
            log['info']['job_launcher_thread'] = True
        if input_from_request['message'] == 'video_copy':
            enqueue_packet(
                project_string_id=input_from_request.get('project_string_id'),
                session=session,
                media_url=None,
                media_type='video',
                directory_id=input_from_request['extra_params'].get(
                    'destination_working_dir_id'),
                source_directory_id=input_from_request['extra_params'].get(
                    'source_working_dir_id'),
                remove_link=input_from_request['extra_params'].get(
                    'remove_link'),
                add_link=input_from_request['extra_params'].get('add_link'),
                copy_instance_list=input_from_request['extra_params'].get(
                    'copy_instance_list'),
                job_id=None,
                batch_id=input_from_request['extra_params'].get('batch_id'),
                file_id=input_from_request['id'],
                instance_list=[],
                video_parent_length=input_from_request['extra_params'].get(
                    'frame_count'),
                task_id=None,
                mode='copy_file',
                commit_input=True)
        if input_from_request['message'] == 'image_copy':
            enqueue_packet(
                project_string_id=input_from_request.get('project_string_id'),
                session=session,
                media_url=None,
                media_type='image',
                directory_id=input_from_request['extra_params'].get(
                    'destination_working_dir_id'),
                source_directory_id=input_from_request['extra_params'].get(
                    'source_working_dir_id'),
                remove_link=input_from_request['extra_params'].get(
                    'remove_link'),
                add_link=input_from_request['extra_params'].get('add_link'),
                copy_instance_list=input_from_request['extra_params'].get(
                    'copy_instance_list'),
                job_id=None,
                batch_id=input_from_request['extra_params'].get('batch_id'),
                file_id=input_from_request['id'],
                instance_list=[],
                video_parent_length=None,
                task_id=None,
                mode='copy_file',
                commit_input=True)

        log['success'] = True
        return jsonify(log=log), 200
예제 #7
0
    def update_instance_list_for_image_or_frame(self,
                                                label_instances,
                                                diffgram_task,
                                                video_data=None,
                                                frame_packet_map=None):
        instance_list = []
        count = 1
        for labelbox_instance in label_instances:
            # Check if instance mapping already exists, if so provide instance_id to avoid overriding data.
            instance_map = ExternalMap.get(
                session=self.session,
                external_id=labelbox_instance['featureId'],
                diffgram_class_string='instance',
                type='labelbox_instance',
                connection_id=self.task_template.interface_connection.id)
            if not instance_map:
                instance_map = ExternalMap.new(
                    session=self.session,
                    external_id=None,
                    diffgram_class_string='instance',
                    type='labelbox_instance',
                    connection=self.task_template.interface_connection,
                    add_to_session=True,
                    flush_session=True)
            diffgram_label_file_data = self.task_template.get_label_file_by_name(
                labelbox_instance['title'])
            diffgram_label_instance = self.transform_labelbox_label_to_diffgram_instance(
                labelbox_instance,
                diffgram_label_file_data,
                instance_map=instance_map,
                sequence_num=count if video_data is not None else None)

            if frame_packet_map is not None:
                if video_data['current_frame'] not in frame_packet_map:
                    frame_packet_map[video_data['current_frame']] = [
                        diffgram_label_instance
                    ]
                else:
                    frame_packet_map[video_data['current_frame']].append(
                        diffgram_label_instance)

            if diffgram_label_instance:
                instance_list.append(diffgram_label_instance)
            count += 1
        if instance_list and video_data is None:
            enqueue_packet(
                project_string_id=self.task_template.project.project_string_id,
                session=self.session,
                media_url=None,
                media_type='image',
                job_id=self.task_template.id,
                file_id=diffgram_task.file.id,
                instance_list=instance_list,
                task_id=diffgram_task.id,
                task_action='complete_task',
                commit_input=True,
                external_map_id=instance_map.id,
                external_map_action='set_instance_id',
                mode="update_with_existing")
            return True
        elif instance_list:
            return True
        else:
            return False
예제 #8
0
    def __fetch_object(self, opts):
        """
        Upload a file to Diffgram from an Azure Blob

        :param opts: Dictionary with parameters for object fetching.
        :return: file obj if file was uploaded, else False
        """
        spec_list = [{'bucket_name': str, 'path': str}]
        log = regular_log.default()
        log, input = regular_input.input_check_many(untrusted_input = opts,
                                                    spec_list = spec_list,
                                                    log = log)
        if len(log["error"].keys()) >= 1:
            return {'log': log}
        shared_access_signature = BlobSharedAccessSignature(
            account_name = self.connection_client.account_name,
            account_key = self.connection_client.credential.account_key
        )

        expiration_offset = 40368000
        blob_name = opts['path']
        container = opts['bucket_name']
        added_seconds = datetime.timedelta(0, expiration_offset)
        expiry_time = datetime.datetime.utcnow() + added_seconds
        filename = blob_name.split("/")[-1]
        sas = shared_access_signature.generate_blob(
            container_name = container,
            blob_name = blob_name,
            start = datetime.datetime.utcnow(),
            expiry = expiry_time,
            permission = BlobSasPermissions(read = True),
            content_disposition = 'attachment; filename=' + filename,
        )
        sas_url = 'https://{}.blob.core.windows.net/{}/{}?{}'.format(
            self.connection_client.account_name,
            container,
            blob_name,
            sas
        )

        with sessionMaker.session_scope() as session:

            project = Project.get_by_string_id(session, self.config_data.get('project_string_id'))
            member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first()
            # Deduct Media Type:
            extension = Path(opts['path']).suffix
            extension = extension.lower()
            media_type = None
            if extension in images_allowed_file_names:
                media_type = 'image'
            elif extension in videos_allowed_file_names:
                media_type = 'video'
            else:
                # TODO: Decide, do we want to raise an exception? or just do nothing?
                log = regular_log.default()
                log['error']['invalid_type'] = 'File must type of: {} {}'.format(str(images_allowed_file_names),
                                                                                 str(videos_allowed_file_names))
                log['error']['file_name'] = opts['path']
                log['opts'] = opts
                Event.new(
                    session = session,
                    member_id = opts['event_data']['request_user'],
                    kind = 'microsoft_azure_new_import_warning',
                    description = 'Skipped import for {}, invalid file type.'.format(opts['path']),
                    error_log = log,
                    project_id = project.id,
                    member = member,
                    success = False
                )
                return None

            # metadata = self.connection_client.head_object(Bucket=self.config_data['bucket_name'], Key=path)
            created_input = packet.enqueue_packet(self.config_data['project_string_id'],
                                                  session = session,
                                                  media_url = sas_url,
                                                  media_type = media_type,
                                                  job_id = opts.get('job_id'),
                                                  batch_id = opts.get('batch_id'),
                                                  file_name = opts.get('path'),
                                                  video_split_duration = opts.get('video_split_duration'),
                                                  directory_id = opts.get('directory_id'),
                                                  extract_labels_from_batch = True)
            log = regular_log.default()
            log['opts'] = opts
            Event.new(
                session = session,
                member_id = opts['event_data']['request_user'],
                kind = 'microsoft_azure_new_import_success',
                description = 'New cloud import for {}'.format(opts['path']),
                error_log = opts,
                project_id = project.id,
                member = member,
                success = True
            )
        return created_input
예제 #9
0
    def fetch_instances_from_file(
            self,
            task_template,
            diffgram_file,
            file_id,
            datasaur_connector):

        file_export_data = self.trigger_export_single_datasaur_file(
            datasaur_connector = datasaur_connector,
            file_id = file_id)

        instance_list = []
        # We get the task based on file id since assumption for datasaur is file and task will be the same concept.
        task = self.session.query(Task).filter(
            Task.job_id == task_template.id,
            Task.file_id == diffgram_file.id
        ).first()
        if 'log' in file_export_data and 'error' in file_export_data['log']:
            logger.error('Error fetching export data {}'.format(file_export_data))
        label_items = file_export_data['result']['labelSet']['labelItems']
        label_items_by_id = {}
        for label in label_items:
            external_map_label = ExternalMap.get(
                session=self.session,
                job_id=task_template.id,
                external_id=label['id'],
                connection_id=task_template.interface_connection.id,
                diffgram_class_string='label_file',
                type='datasaur_label'
            )
            if external_map_label:
                label_items_by_id[label['id']] = label
                label_items_by_id[label['id']]['label_file_id'] = external_map_label.file_id
            else:
                logger.error('No label_file found for datasaur ID: {}'.format(label['id']))
                return

        sentences = file_export_data['result']['sentences']
        for sentence in sentences:
            instances = sentence['labels']
            for instance in instances:
                instance_map = ExternalMap.get(
                    session = self.session,
                    external_id = instance['id'],
                    diffgram_class_string = 'instance',
                    type = 'datasaur_instance',
                    return_kind = 'first')
                if not instance_map:
                    logger.debug('Creating Instance Map...')
                    instance_map = ExternalMap.new(
                        session=self.session,
                        job=task_template,
                        external_id=instance['id'],
                        connection=task_template.interface_connection,
                        diffgram_class_string='instance',
                        type='{}_instance'.format(
                            task_template.interface_connection.integration_name),
                        url='',
                        add_to_session=True,
                        flush_session=True)
                else:
                    logger.debug('Instance Map exists, proceding to update.')
                instance_list.append({
                    'start_sentence': instance['sidS'],
                    'end_sentence': instance['sidE'],
                    'start_token': instance['s'],
                    'end_token': instance['e'],
                    'start_char': instance['charS'],
                    'end_char': instance['charE'],
                    'sentence': sentence['id'],
                    'type': 'text_token',
                    'name': label_items_by_id[instance['l']]['labelName'],
                    'label_file_id': label_items_by_id[instance['l']]['label_file_id']
                })
        logger.debug('Enqueuing new instances....')
        # Create new packet to ensure to commit this
        if task and task_template and diffgram_file:
            enqueue_packet(project_string_id=task_template.project.project_string_id,
                           session=self.session,
                           media_url=None,
                           media_type='text',
                           job_id=task_template.id,
                           file_id=diffgram_file.id,
                           instance_list=instance_list,
                           task_id=task.id,
                           task_action='complete_task',
                           commit_input=True,
                           mode="update")
            logger.info('Updated Task {} from datasaur.'.format(task.id))
    def __fetch_folder(self, opts):
        result = []

        if self.config_data.get('project_string_id') is None:
            return {'result': 'error'}
        paths = opts['path']
        if type(paths) != list:
            paths = [paths]
        with sessionMaker.session_scope() as session:
            project = Project.get_by_string_id(
                session, self.config_data.get('project_string_id'))
            member = session.query(Member).filter(
                Member.user_id == opts['event_data']['request_user']).first()
            for path in paths:
                blobs = self.connection_client.list_blobs(opts['bucket_name'],
                                                          prefix=path)
                for blob in blobs:
                    # Deduct Media Type:
                    if blob.name.endswith('/'):
                        continue

                    blob_expiry = int(time.time() + (60 * 60 * 24 * 30))
                    signed_url = blob.generate_signed_url(
                        expiration=blob_expiry)
                    extension = Path(blob.path).suffix
                    media_type = None
                    if extension in images_allowed_file_names:
                        media_type = 'image'
                    elif extension in videos_allowed_file_names:
                        media_type = 'video'
                    else:
                        logging.warn('File: {} must type of: {} {}'.format(
                            blob.name, str(images_allowed_file_names),
                            str(videos_allowed_file_names)))

                        log = regular_log.default()
                        log['error'][
                            'invalid_type'] = 'File must type of: {} {}'.format(
                                str(images_allowed_file_names),
                                str(videos_allowed_file_names))
                        log['error']['file_name'] = path
                        log['opts'] = opts
                        Event.new(
                            session=session,
                            member_id=opts['event_data']['request_user'],
                            kind='google_cloud_new_import_warning',
                            description=
                            'Skipped import for {}, invalid file type.'.format(
                                blob.name),
                            error_log=log,
                            project_id=project.id,
                            member=member,
                            success=False)
                        continue
                    result = []
                    # TODO: check Input() table for duplicate file?
                    created_input = packet.enqueue_packet(
                        self.config_data['project_string_id'],
                        session=session,
                        media_url=signed_url,
                        media_type=media_type,
                        job_id=opts.get('job_id'),
                        batch_id=opts.get('batch_id'),
                        file_name=path,
                        video_split_duration=opts.get('video_split_duration'),
                        directory_id=opts.get('directory_id'),
                        extract_labels_from_batch=True)
                    log = regular_log.default()
                    log['opts'] = opts
                    Event.new(session=session,
                              member_id=opts['event_data']['request_user'],
                              kind='google_cloud_new_import_success',
                              description='New cloud import for {}'.format(
                                  blob.name),
                              error_log=opts,
                              project_id=project.id,
                              member=member,
                              success=True)
                    result.append(created_input)
        return result
예제 #11
0
    def __fetch_object(self, opts):
        """Upload a file to diffgram from an S3 bucket

        :param s3_file_key: path of file to fetch from
        :return: file obj if file was uploaded, else False
        """
        spec_list = [{'bucket_name': str, 'path': str}]
        log = regular_log.default()
        log, input = regular_input.input_check_many(untrusted_input=opts,
                                                    spec_list=spec_list,
                                                    log=log)
        if len(log["error"].keys()) >= 1:
            return {'log': log}
        # This might be an issue. Currently not supporting urls with no expiration. Biggest time is 1 week.
        signed_url = self.connection_client.generate_presigned_url('get_object',
                                                                   Params={'Bucket': opts['bucket_name'],
                                                                           'Key': opts['path']},
                                                                   ExpiresIn=3600 * 24 * 6)  # 5 Days.

        with sessionMaker.session_scope() as session:

            project = Project.get_by_string_id(session, self.config_data.get('project_string_id'))
            member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first()
            # Deduct Media Type:
            extension = Path(opts['path']).suffix
            extension = extension.lower()
            media_type = None
            if extension in images_allowed_file_names:
                media_type = 'image'
            elif extension in videos_allowed_file_names:
                media_type = 'video'
            else:
                # TODO: Decide, do we want to raise an exception? or just do nothing?
                log = regular_log.default()
                log['error']['invalid_type'] = 'File must type of: {} {}'.format(str(images_allowed_file_names),
                                                                                 str(videos_allowed_file_names))
                log['error']['file_name'] = opts['path']
                log['opts'] = opts
                Event.new(
                    session=session,
                    member_id=opts['event_data']['request_user'],
                    kind='aws_s3_new_import_warning',
                    description='Skipped import for {}, invalid file type.'.format(opts['path']),
                    error_log=log,
                    project_id=project.id,
                    member=member,
                    success=False
                )
                return None
            # print('AAAAA', opts, opts.get('job_id'))
            # metadata = self.connection_client.head_object(Bucket=self.config_data['bucket_name'], Key=path)
            created_input = packet.enqueue_packet(self.config_data['project_string_id'],
                                                  session=session,
                                                  media_url=signed_url,
                                                  media_type=media_type,
                                                  job_id=opts.get('job_id'),
                                                  video_split_duration=opts.get('video_split_duration'),
                                                  directory_id=opts.get('directory_id'))
            log = regular_log.default()
            log['opts'] = opts
            Event.new(
                session=session,
                member_id=opts['event_data']['request_user'],
                kind='aws_s3_new_import_success',
                description='New cloud import for {}'.format(opts['path']),
                error_log=opts,
                project_id=project.id,
                member=member,
                success=True
            )
        return created_input