Ejemplo n.º 1
0
    def map_scale_ai_task(self, task, scale_ai_task, type='box'):
        task_id = None
        if type == 'box':
            task_id = scale_ai_task.id
        elif type == 'polygon':
            task_id = scale_ai_task.task_id

        if task_id is None:
            raise Exception('Cannot map ScaleAI task. Id is None')
        external_map = ExternalMap.new(
            session=self.session,
            task=task,
            external_id=scale_ai_task.id,
            connection=task.job.interface_connection,
            diffgram_class_string='task',
            type='{}_task'.format(
                task.job.interface_connection.integration_name),
            url='',
            add_to_session=True,
            flush_session=True)
        # Commented to bottom to avoid circular dependencies on job.
        self.task_template.default_external_map = external_map

        logger.debug('Created ScaleAI Task {}'.format(scale_ai_task.id))
        return external_map
Ejemplo n.º 2
0
 def save_label_instance_ontology_mapping(self, ontology, connection):
     """
         Saves the relationships betweend ID's of Diffgram label instances
         and ID's of the featureNode ID's in the onthology.
     :param ontology:
     :return:
     """
     mapping = {}
     tools = ontology['project']['ontology']['normalized']['tools']
     for tool in tools:
         diffgram_label_file = self.task_template.get_label_file_by_name(tool['name'])
         diffgram_label_file_id = self.task_template.get_label_file_by_name(tool['name'])['id']
         # Feature schema ID was removed from API. Using name instead
         # feature_schema_id = tool['featureSchemaId']
         feature_schema_id = tool['name']
         mapping[feature_schema_id] = {'label_id': diffgram_label_file_id, 'attributes': {}}
         ExternalMap.new(session=self.session,
                         file_id=diffgram_label_file_id,
                         external_id=feature_schema_id,
                         type=connection.integration_name,
                         diffgram_class_string='label_file',
                         connection=connection,
                         add_to_session=True)
         if tool.get('classifications', None):
             classifications = tool.get('classifications', None)
             for classification in classifications:
                 attribute_group = self.task_template.get_attribute_group_by_name(diffgram_label_file,
                                                                                  classification['name'])
                 diffgram_attribute_group_id = attribute_group['id']
                 # feature_schema_id = classification['featureSchemaId']
                 # Changing to name since feature schema was removed.
                 feature_schema_id = classification['name']
                 ExternalMap.new(session=self.session,
                                 external_id=feature_schema_id,
                                 file_id=diffgram_label_file_id,
                                 attribute_template_group_id=diffgram_attribute_group_id,
                                 type=connection.integration_name,
                                 diffgram_class_string='label_file',
                                 connection=connection,
                                 add_to_session=True)
     return mapping
Ejemplo n.º 3
0
    def create_scale_ai_project_mapping(self, scale_ai_project, connection):
        external_map = ExternalMap.new(
            session=self.session,
            job=self.task_template,
            external_id=scale_ai_project['name'],
            connection=connection,
            diffgram_class_string='task_template',
            type=connection.integration_name,
            url='https://dashboard.scale.com/test/tasks?project={}'.format(scale_ai_project['name']),
            add_to_session=True,
            flush_session=True
        )
        # Commented to bottom to avoid circular dependencies on job.
        self.task_template.default_external_map = external_map

        logger.debug('Created ScaleAI Project {}'.format(scale_ai_project['name']))
Ejemplo n.º 4
0
    def create_labelbox_project_mapping(self, labelbox_project, connection):
        external_map = ExternalMap.new(
            session=self.session,
            job=self.task_template,
            external_id=labelbox_project.uid,
            connection=connection,
            diffgram_class_string='task_template',
            type=connection.integration_name,
            url='https://app.labelbox.com/projects/{}/overview'.format(labelbox_project.uid),
            add_to_session=True,
            flush_session=True
        )
        # Commented to bottom to avoid circular dependencies on job.
        self.task_template.default_external_map = external_map

        logger.debug('Created Labelbox Project {}'.format(labelbox_project.uid))
        return external_map
    def execute_after_launch_strategy(self):
        """
            This strategy will attach files from sync directories and creates tasks in
            Diffgram for each of them.
        :return:
        """
        datasaur_project = None
        connection = self.task_template.interface_connection
        logger.debug('Connection for Datasaur: {}'.format(connection))
        connector_manager = ConnectorManager(connection=connection, session=self.session)
        connector = connector_manager.get_connector_instance()
        connector.connect()
        try:

            label_data = []
            for label_element in self.task_template.label_dict.get('label_file_list_serialized', []):
                element = {
                    'uuid': str(uuid.uuid4()),
                    'diffgram_label_file': label_element['id'],
                    'name': '{}'.format(label_element['label']['name']),
                    'color': label_element['colour']['hex'].upper(),
                }
                label_data.append(element)

            # First we need to build a label set
            label_set_result = self.create_datasaur_labelset(label_data, connector)
            label_set = label_set_result['result']['createLabelSet']
            logger.debug('Created label_set {}'.format(label_set))
            if label_set.get('id'):
                logger.info('Datasaur Labelset created succesfully ID:'.format(label_set['id']))
                ExternalMap.new(
                    session=self.session,
                    job=self.task_template,
                    external_id=label_set['id'],
                    connection=connection,
                    diffgram_class_string='',
                    type='{}_label_set'.format(connection.integration_name),
                    url='',
                    add_to_session=True,
                    flush_session=True
                )
                # Now save mappings for created labels
                for label_element in label_data:
                    ExternalMap.new(
                        session=self.session,
                        job=self.task_template,
                        file_id=label_element['diffgram_label_file'],
                        external_id=label_element['uuid'],
                        connection=connection,
                        diffgram_class_string='label_file',
                        type='{}_label'.format(connection.integration_name),
                        url='',
                        add_to_session=True,
                        flush_session=True
                    )

            # Now we create a project
            files_to_process = self.task_template.get_attached_files(self.session, type='text')
            files_to_process_by_id = {}
            if len(files_to_process) == 0:
                raise Exception('Task template has no files in attached folder. Stopping Datasaur launch strategy.')

            for file in files_to_process:
                files_to_process_by_id[str(file.id)] = file
            print('files_to_process_by_id', files_to_process_by_id)
            result = self.create_datasaur_project(connector, label_set, files_to_process)
            logger.debug('Create datasaur Project result: {}'.format(result))
            if 'result' in result:
                datasaur_project = result['result']
                ExternalMap.new(
                    session=self.session,
                    job=self.task_template,
                    external_id=datasaur_project['id'],
                    connection=connection,
                    diffgram_class_string='task_template',
                    type='{}_project'.format(connection.integration_name),
                    url='https://datasaur.ai/projects/{}/'.format(datasaur_project['id']),
                    add_to_session=True,
                    flush_session=True,
                )
                logger.debug('Created Datasaur Project.')
                # Save file ID's mappings
                project_files_results = self.get_project_files_list(connector, datasaur_project)
                print('qweqwe', project_files_results)
                project_files = project_files_results['result']['documents']
                for file in project_files:
                    diffgram_file = files_to_process_by_id[file['name']]
                    ExternalMap.new(
                        session=self.session,
                        job=self.task_template,
                        external_id=file['id'],
                        file=diffgram_file,
                        connection=connection,
                        diffgram_class_string='file',
                        type='{}_file'.format(connection.integration_name),
                        url='',
                        add_to_session=True,
                        flush_session=True,
                    )
                # Now create tasks as usual.
                logger.info(
                    'DatasaurTaskTemplateAfterLaunchStrategy for Task Template ID: {} completed successfully.'.format(
                        self.task_template.id))
                logger.debug('Proceding to standard task template launch...')
                standard_strategy = StandardTaskTemplateAfterLaunchStrategy(
                    session=self.session,
                    task_template=self.task_template,
                    log=self.log
                )
                standard_strategy.execute_after_launch_strategy()

            else:
                logger.error('Error from connector: Rolling back project creation...')
                raise Exception(result)

        except Exception as e:
            logger.error('Error during datasaur launch strategy. {}'.format(traceback.format_exc()))
            if datasaur_project:
                logger.error('Rolling back project creation...')
                result = connector.put_data({
                    'action_type': 'delete_project',
                    'project_id': datasaur_project['id'],
                    'event_data': {},
                })
            raise e
Ejemplo n.º 6
0
    def send_all_files_in_task_template(self):
        """
            Used for initial sync. Will go on all attached directories
            of the task template and create a dataset if doesn't exist
            and then send each file on the dataset to labelbox's dataset.
        :return:
        """
        datasets = self.task_template.get_attached_dirs(self.session)
        if not datasets:
            return

        for dataset in datasets:
            # Assumption here is that the labeling interface has already been checked so we assume we need to
            # create the dataset if it does not exits.
            logger.debug('Syncing dataset {}-{}  in Labelbox'.format(
                dataset.nickname, dataset.id))
            if dataset.default_external_map:
                # Fetch dataset
                logger.debug('Dataset already exists... attaching.')
                dataset_id = dataset.default_external_map.external_id
                result = self.labelbox_connector.fetch_data({
                    'action_type':
                    'get_dataset',
                    'event_data': {},
                    'dataset_id':
                    dataset_id
                })
                force_create = False
                if result['exists']:
                    labelbox_dataset = result['result']
                    # Attach dataset to project
                    result_attach = self.labelbox_connector.put_data({
                        'action_type':
                        'attach_dataset',
                        'dataset':
                        labelbox_dataset,
                        'project':
                        self.labelbox_project,
                        'event_data': {}
                    })
                else:
                    logger.debug('Dataset not found, re-creating it...')
                    # If dataset was not found it may have been deleted. So we'll create it again.
                    force_create = True
                    # Create dataset
                    result = self.labelbox_connector.put_data({
                        'action_type':
                        'create_dataset',
                        'name':
                        dataset.nickname,
                        'event_data': {},
                        'project':
                        self.labelbox_project
                    })
                    labelbox_dataset = result['result']
                    # Now attach it
                    result_attach = self.labelbox_connector.put_data({
                        'action_type':
                        'attach_dataset',
                        'dataset':
                        labelbox_dataset,
                        'project':
                        self.labelbox_project,
                        'event_data': {}
                    })

                    dataset.default_external_map = ExternalMap.new(
                        session=self.session,
                        external_id=labelbox_dataset.uid,
                        dataset=dataset,
                        diffgram_class_string="dataset",
                        type="labelbox",
                        add_to_session=True,
                        flush_session=True)
                    self.session.add(dataset)

                file_list = WorkingDirFileLink.file_list(self.session,
                                                         dataset.id,
                                                         limit=None)
                self.add_files_to_labelbox_dataset(
                    diffgram_files=file_list,
                    labelbox_dataset=labelbox_dataset,
                    force_create=force_create)
            else:

                logger.debug('Dataset does not exist... creating.')
                # Create dataset
                result = self.labelbox_connector.put_data({
                    'action_type':
                    'create_dataset',
                    'name':
                    dataset.nickname,
                    'event_data': {},
                    'project':
                    self.labelbox_project
                })
                labelbox_dataset = result['result']
                dataset.default_external_map = ExternalMap.new(
                    session=self.session,
                    external_id=labelbox_dataset.uid,
                    dataset=dataset,
                    url='https://app.labelbox.com/dataset/{}'.format(
                        labelbox_dataset.uid),
                    diffgram_class_string="dataset",
                    type="labelbox",
                    add_to_session=True,
                    flush_session=True,
                )
                self.session.add(dataset)
                file_list = WorkingDirFileLink.file_list(self.session,
                                                         dataset.id,
                                                         limit=None)

                self.add_files_to_labelbox_dataset(
                    diffgram_files=file_list,
                    labelbox_dataset=labelbox_dataset)
Ejemplo n.º 7
0
    def add_files_to_labelbox_dataset(self,
                                      diffgram_files=[],
                                      labelbox_dataset=None,
                                      force_create=False):
        """
            Adds the files to labelbox.
            Important! If you call this method multiple times, multiple versions of the same file will
            be created at labelbox, so use only on initialization of task templates.
        :param diffgram_files:
        :param labelbox_dataset:
        :param force_create: Ignore existing files and always create (useful for recreating a dataset that was deleted)
        :return:
        """
        if labelbox_dataset is None:
            return False
        file_urls = []
        diffgram_files_by_id = {}
        external_ids = []
        file_ids = [x.id for x in diffgram_files]

        datarow_external_maps = ExternalMap.get(session=self.session,
                                                file_id=file_ids,
                                                diffgram_class_string='file',
                                                type='labelbox',
                                                return_kind='all')
        # To avoid querying external map each time on for loop.
        external_map_by_id = {
            ext_map.file_id: ext_map
            for ext_map in datarow_external_maps
        }
        data_row_ids = [
            external_map.external_id for external_map in datarow_external_maps
            if external_map.external_id
        ]
        result_datarows = self.labelbox_connector.fetch_data({
            'action_type':
            'get_data_rows',
            'event_data':
            '',
            'dataset':
            labelbox_dataset,
            'data_row_ids':
            data_row_ids
        })
        labelbox_existing_data_rows = result_datarows['result']['datasets'][0][
            'dataRows']
        existing_data_rows_ids = [x['id'] for x in labelbox_existing_data_rows]
        deleted_data_rows = [
            row_id for row_id in data_row_ids
            if row_id not in existing_data_rows_ids
        ]
        for diffgram_file in diffgram_files:
            # If we have a registered ID on labelbox, we skip file creation for this file.
            # We have to re-create it if it was deleted for some reason.
            diffgram_file_external_map = external_map_by_id.get(
                diffgram_file.id)
            if diffgram_file_external_map and diffgram_file_external_map.external_id and not force_create \
                    and external_map_by_id.get(diffgram_file.id).external_id not in deleted_data_rows:
                logger.debug('File {} exists. Skipping..'.format(
                    diffgram_file.id))
                continue
            if diffgram_file.type == "image":
                logger.debug('Adding image {}  in Labelbox'.format(
                    diffgram_file.id))
                if diffgram_file.image:
                    data = diffgram_file.image.serialize_for_source_control(
                        self.session)
                    data_row = {
                        labelbox.schema.data_row.DataRow.row_data:
                        data['url_signed'],
                        'external_id':
                        diffgram_file.id
                    }
                    # Cache in memory the file for updating labelbox ID's later
                    diffgram_files_by_id[diffgram_file.id] = diffgram_file
                    external_ids.append(diffgram_file.id)
                    file_urls.append(data_row)
            if diffgram_file.type == "video":
                if diffgram_file.video:
                    logger.debug('Adding video {}  in Labelbox'.format(
                        diffgram_file.id))
                    data = diffgram_file.video.serialize_list_view(
                        self.session, self.task_template.project)
                    data_row = {
                        labelbox.schema.data_row.DataRow.row_data:
                        data['file_signed_url'],
                        'external_id':
                        diffgram_file.id
                    }
                    # Cache in memory the file for updating labelbox ID's later
                    external_ids.append(diffgram_file.id)
                    diffgram_files_by_id[diffgram_file.id] = diffgram_file
                    file_urls.append(data_row)
        task = labelbox_dataset.create_data_rows(file_urls)
        # We want to wait since we're already deferring the creation process.
        task.wait_till_done()
        # Now update al Diffgram files with their labelbox data_row ID.
        query = """query($datasetId: ID!, $externalId: [String!]) {
                    datasets(where:{id: $datasetId }){
                      name
                      id
                      dataRows(where:{externalId_in: $externalId}){
                        id,
                        externalId
                      }
                    }
                }
        """
        data = {'datasetId': labelbox_dataset.uid, 'externalId': external_ids}
        result = self.labelbox_connector.put_data({
            'action_type': 'execute',
            'event_data': [],
            'query': query,
            'data': data
        })

        created_datarows = result['result']['datasets'][0]['dataRows']
        for datarow in created_datarows:
            file = diffgram_files_by_id[int(datarow['externalId'])]
            file.default_external_map = ExternalMap.new(
                session=self.session,
                external_id=datarow['id'],
                file=file,
                diffgram_class_string="file",
                type="labelbox",
                add_to_session=True,
                flush_session=True)
            self.session.add(file)

        return task
Ejemplo n.º 8
0
    def handle_task_creation_hook(self, payload):
        labelbox_data_row_id = payload['dataRow']['id']
        label = json.loads(payload['label'])
        labelbox_label_id = payload['id']
        video_mode = False
        frames_data = None
        if 'frames' in label:
            # Fetch video objects
            frames_result = self.labelbox_connector.fetch_data({
                'action_type':
                'get_frames',
                'frames_url':
                label['frames'],
                'event_data': {},
            })
            if result_has_error(frames_result):
                return jsonify(frames_result), 400
            frames_data = frames_result['result']
            video_mode = True
        else:
            label_instances = label['objects']
        file_external_mapping = ExternalMap.get(
            session=self.session,
            external_id=labelbox_data_row_id,
            diffgram_class_string='file',
            type='labelbox')
        if file_external_mapping:
            diffgram_task = self.session.query(Task).filter(
                Task.job_id == self.task_template.id,
                Task.file_id == file_external_mapping.file_id).first()

            if diffgram_task:
                # Build external mapping
                diffgram_task.default_external_map = ExternalMap.new(
                    session=self.session,
                    external_id=payload['id'],
                    task=diffgram_task,
                    diffgram_class_string="task",
                    type="labelbox",
                    add_to_session=True,
                    flush_session=True)
                self.session.add(diffgram_task)
                # Now process Labels and add them to file.
                if video_mode:
                    result = self.update_instance_list_for_video(
                        frames_data, diffgram_task)
                    if not result:
                        logger.error('Error updating instances')
                        return jsonify('Error updating instances'), 400
                    logger.info('Updated instances succesfully enqueued.')
                else:
                    result = self.update_instance_list_for_image_or_frame(
                        label_instances, diffgram_task)
                    if not result or not result:
                        logger.error('Error updating instances')
                        return jsonify('Error updating instances'), 400

                    else:
                        logger.info('Updated instances succesfully enqueued.')
            else:
                logger.error('Diffgram task not found')
                raise Exception('Diffgram task not found')
        else:
            logger.error('file_external_mapping not found')
            raise Exception('file_external_mapping not found')
Ejemplo n.º 9
0
    def update_instance_list_for_image_or_frame(self,
                                                label_instances,
                                                diffgram_task,
                                                video_data=None,
                                                frame_packet_map=None):
        instance_list = []
        count = 1
        for labelbox_instance in label_instances:
            # Check if instance mapping already exists, if so provide instance_id to avoid overriding data.
            instance_map = ExternalMap.get(
                session=self.session,
                external_id=labelbox_instance['featureId'],
                diffgram_class_string='instance',
                type='labelbox_instance',
                connection_id=self.task_template.interface_connection.id)
            if not instance_map:
                instance_map = ExternalMap.new(
                    session=self.session,
                    external_id=None,
                    diffgram_class_string='instance',
                    type='labelbox_instance',
                    connection=self.task_template.interface_connection,
                    add_to_session=True,
                    flush_session=True)
            diffgram_label_file_data = self.task_template.get_label_file_by_name(
                labelbox_instance['title'])
            diffgram_label_instance = self.transform_labelbox_label_to_diffgram_instance(
                labelbox_instance,
                diffgram_label_file_data,
                instance_map=instance_map,
                sequence_num=count if video_data is not None else None)

            if frame_packet_map is not None:
                if video_data['current_frame'] not in frame_packet_map:
                    frame_packet_map[video_data['current_frame']] = [
                        diffgram_label_instance
                    ]
                else:
                    frame_packet_map[video_data['current_frame']].append(
                        diffgram_label_instance)

            if diffgram_label_instance:
                instance_list.append(diffgram_label_instance)
            count += 1
        if instance_list and video_data is None:
            enqueue_packet(
                project_string_id=self.task_template.project.project_string_id,
                session=self.session,
                media_url=None,
                media_type='image',
                job_id=self.task_template.id,
                file_id=diffgram_task.file.id,
                instance_list=instance_list,
                task_id=diffgram_task.id,
                task_action='complete_task',
                commit_input=True,
                external_map_id=instance_map.id,
                external_map_action='set_instance_id',
                mode="update_with_existing")
            return True
        elif instance_list:
            return True
        else:
            return False
Ejemplo n.º 10
0
    def fetch_instances_from_file(
            self,
            task_template,
            diffgram_file,
            file_id,
            datasaur_connector):

        file_export_data = self.trigger_export_single_datasaur_file(
            datasaur_connector = datasaur_connector,
            file_id = file_id)

        instance_list = []
        # We get the task based on file id since assumption for datasaur is file and task will be the same concept.
        task = self.session.query(Task).filter(
            Task.job_id == task_template.id,
            Task.file_id == diffgram_file.id
        ).first()
        if 'log' in file_export_data and 'error' in file_export_data['log']:
            logger.error('Error fetching export data {}'.format(file_export_data))
        label_items = file_export_data['result']['labelSet']['labelItems']
        label_items_by_id = {}
        for label in label_items:
            external_map_label = ExternalMap.get(
                session=self.session,
                job_id=task_template.id,
                external_id=label['id'],
                connection_id=task_template.interface_connection.id,
                diffgram_class_string='label_file',
                type='datasaur_label'
            )
            if external_map_label:
                label_items_by_id[label['id']] = label
                label_items_by_id[label['id']]['label_file_id'] = external_map_label.file_id
            else:
                logger.error('No label_file found for datasaur ID: {}'.format(label['id']))
                return

        sentences = file_export_data['result']['sentences']
        for sentence in sentences:
            instances = sentence['labels']
            for instance in instances:
                instance_map = ExternalMap.get(
                    session = self.session,
                    external_id = instance['id'],
                    diffgram_class_string = 'instance',
                    type = 'datasaur_instance',
                    return_kind = 'first')
                if not instance_map:
                    logger.debug('Creating Instance Map...')
                    instance_map = ExternalMap.new(
                        session=self.session,
                        job=task_template,
                        external_id=instance['id'],
                        connection=task_template.interface_connection,
                        diffgram_class_string='instance',
                        type='{}_instance'.format(
                            task_template.interface_connection.integration_name),
                        url='',
                        add_to_session=True,
                        flush_session=True)
                else:
                    logger.debug('Instance Map exists, proceding to update.')
                instance_list.append({
                    'start_sentence': instance['sidS'],
                    'end_sentence': instance['sidE'],
                    'start_token': instance['s'],
                    'end_token': instance['e'],
                    'start_char': instance['charS'],
                    'end_char': instance['charE'],
                    'sentence': sentence['id'],
                    'type': 'text_token',
                    'name': label_items_by_id[instance['l']]['labelName'],
                    'label_file_id': label_items_by_id[instance['l']]['label_file_id']
                })
        logger.debug('Enqueuing new instances....')
        # Create new packet to ensure to commit this
        if task and task_template and diffgram_file:
            enqueue_packet(project_string_id=task_template.project.project_string_id,
                           session=self.session,
                           media_url=None,
                           media_type='text',
                           job_id=task_template.id,
                           file_id=diffgram_file.id,
                           instance_list=instance_list,
                           task_id=task.id,
                           task_action='complete_task',
                           commit_input=True,
                           mode="update")
            logger.info('Updated Task {} from datasaur.'.format(task.id))