def update_instance_list_for_video(self, frames_data, diffgram_task): frame_packet_map = {} for frame in frames_data: logger.debug('Processing Frame {}'.format(frame['frameNumber'])) video_data = { 'current_frame': frame['frameNumber'], 'video_mode': True, 'video_file_id': diffgram_task.file.id } label_instances = frame['objects'] if len(label_instances) > 0: result = self.update_instance_list_for_image_or_frame( label_instances, diffgram_task, video_data=video_data, frame_packet_map=frame_packet_map) enqueue_packet( project_string_id=self.task_template.project.project_string_id, session=self.session, media_url=None, media_type='video', job_id=self.task_template.id, file_id=diffgram_task.file.id, frame_packet_map=frame_packet_map, task_id=diffgram_task.id, task_action='complete_task', commit_input=True, mode="update_with_existing") return result
def enqueue_scale_ai_annotations(self, diffgram_task, annotations): diffgram_instance_list = self.transform_annotations_to_diffgram_instance_list(annotations) enqueue_packet(project_string_id=self.task_template.project.project_string_id, session=self.session, media_url=None, media_type='image', job_id=self.task_template.id, file_id=diffgram_task.file.id, instance_list=diffgram_instance_list, task_id=diffgram_task.id, task_action='complete_task', commit_input=True, mode="update") return
def __fetch_object(self, opts): bucket = self.connection_client.get_bucket(opts['bucket_name']) blob = bucket.blob(opts['path']) blob_expiry = int(time.time() + (60 * 60 * 24 * 30)) signed_url = blob.generate_signed_url(expiration=blob_expiry) # Deduct Media Type: # TODO Share this with existing process_media determine_media_type() extension = Path(opts['path']).suffix extension = extension.lower() media_type = None if extension in images_allowed_file_names: media_type = 'image' elif extension in videos_allowed_file_names: media_type = 'video' else: # TODO: Decide, do we want to raise an exception? or just do nothing? log = regular_log.default() log['error']['invalid_type'] = 'File must type of: {} {}'.format( str(images_allowed_file_names), str(videos_allowed_file_names)) log['error']['file_name'] = opts['path'] log['opts'] = opts with sessionMaker.session_scope() as session: Event.new(session=session, member_id=opts['event_data']['request_user'], kind='google_cloud_new_import_error', description='New cloud import for {}'.format( opts['path']), error_log=log) raise LookupError('File must type of: {} {}'.format( str(images_allowed_file_names), str(videos_allowed_file_names))) # metadata = self.connection_client.head_object(Bucket=opts['bucket_name, Key=path) with sessionMaker.session_scope() as session: created_input = packet.enqueue_packet( self.config_data['project_string_id'], session=session, media_url=signed_url, media_type=media_type, job_id=opts.get('job_id'), video_split_duration=opts.get('video_split_duration'), directory_id=opts.get('directory_id')) log = regular_log.default() log['opts'] = opts Event.new(session=session, member_id=opts['event_data']['request_user'], kind='google_cloud_new_import_success', description='New cloud import for {}'.format( opts['path']), error_log=opts) return {'result': created_input}
def test_packet_endpoint_refactor(self): packet_data = { 'media': { 'url': 'https://thumbor.forbes.com/thumbor/250x382/https://blogs-images.forbes.com/dorothypomerantz/files/2011/09/Spongebob-squarepants.jpg?width=960', 'type': 'image' } } created_input = packet.enqueue_packet( self.project_string_id, session=self.session, media_url=packet_data['media']['url'], media_type=packet_data['media']['type'], job_id=None, directory_id=None) self.session.commit() self.assertEqual(type(created_input), type(Input()))
def generate_test_data_on_dataset(self, dataset): inputs_data = [] for i in range(0, self.NUM_IMAGES): diffgram_input = enqueue_packet( project_string_id=dataset.project.project_string_id, session=self.session, media_url='https://picsum.photos/1000', media_type='image', directory_id=dataset.id, commit_input=True, task_id=None, type='from_url', task_action=None, external_map_id=None, external_map_action=None, enqueue_immediately=True, mode=None, allow_duplicates=True) inputs_data.append(diffgram_input) return inputs_data
def interservice_receive_api(): """ Inter-Service route to notify of new job launch For now relies on inter_service_security_token for permissions... This is just a starting point for more generic inter service notification Pros/Cons to having DB as intermediary point there, fo now this is fairly light weight. Once we have a good pattern here, eg retry/overflow handling, can probably remove polling / thread """ spec_list = [ { "inter_service_security_token": { 'kind': str, 'required': True, 'security_token': settings.INTER_SERVICE_SECRET } }, { "message": { 'kind': str, 'required': True } }, { "id": { # or "base_class_id"? 'kind': int, 'required': False, 'default': None } }, { "extra_params": { 'kind': dict, 'required': False, 'default': None } }, { "base_class_string": { 'kind': str, 'required': False, 'default': None } }, { "project_string_id": { 'kind': str, 'required': False, 'default': None } } # Serialized object maybe? ] log, input_from_request, untrusted_input = regular_input.master( request=request, spec_list=spec_list) if len(log["error"].keys()) >= 1: return jsonify(log=log), 400 logger.info("Received valid inter service request") with sessionMaker.session_scope() as session: # CAUTIONS # Generally assumes any calls here are non blocking # So as to reasonably return # eg 1) Condition on message then some_launcher(event_id = input['id']) # Or 2) if we want object here for some reason, something like: # if input['base_class_string']: # base_object = getattr(sys.modules[__name__], input['base_class_string']).get_by_id( # id = input['id'], # session = session) if input_from_request['message'] == 'new_job_launch_queue_item': job_launcher_thread = TaskTemplateLauncherThread(run_once=True) log['info']['job_launcher_thread'] = True if input_from_request['message'] == 'new_sync_action_item': sync_action_thread = SyncActionsHandlerThread(run_once=True) log['info']['job_launcher_thread'] = True if input_from_request['message'] == 'video_copy': enqueue_packet( project_string_id=input_from_request.get('project_string_id'), session=session, media_url=None, media_type='video', directory_id=input_from_request['extra_params'].get( 'destination_working_dir_id'), source_directory_id=input_from_request['extra_params'].get( 'source_working_dir_id'), remove_link=input_from_request['extra_params'].get( 'remove_link'), add_link=input_from_request['extra_params'].get('add_link'), copy_instance_list=input_from_request['extra_params'].get( 'copy_instance_list'), job_id=None, batch_id=input_from_request['extra_params'].get('batch_id'), file_id=input_from_request['id'], instance_list=[], video_parent_length=input_from_request['extra_params'].get( 'frame_count'), task_id=None, mode='copy_file', commit_input=True) if input_from_request['message'] == 'image_copy': enqueue_packet( project_string_id=input_from_request.get('project_string_id'), session=session, media_url=None, media_type='image', directory_id=input_from_request['extra_params'].get( 'destination_working_dir_id'), source_directory_id=input_from_request['extra_params'].get( 'source_working_dir_id'), remove_link=input_from_request['extra_params'].get( 'remove_link'), add_link=input_from_request['extra_params'].get('add_link'), copy_instance_list=input_from_request['extra_params'].get( 'copy_instance_list'), job_id=None, batch_id=input_from_request['extra_params'].get('batch_id'), file_id=input_from_request['id'], instance_list=[], video_parent_length=None, task_id=None, mode='copy_file', commit_input=True) log['success'] = True return jsonify(log=log), 200
def update_instance_list_for_image_or_frame(self, label_instances, diffgram_task, video_data=None, frame_packet_map=None): instance_list = [] count = 1 for labelbox_instance in label_instances: # Check if instance mapping already exists, if so provide instance_id to avoid overriding data. instance_map = ExternalMap.get( session=self.session, external_id=labelbox_instance['featureId'], diffgram_class_string='instance', type='labelbox_instance', connection_id=self.task_template.interface_connection.id) if not instance_map: instance_map = ExternalMap.new( session=self.session, external_id=None, diffgram_class_string='instance', type='labelbox_instance', connection=self.task_template.interface_connection, add_to_session=True, flush_session=True) diffgram_label_file_data = self.task_template.get_label_file_by_name( labelbox_instance['title']) diffgram_label_instance = self.transform_labelbox_label_to_diffgram_instance( labelbox_instance, diffgram_label_file_data, instance_map=instance_map, sequence_num=count if video_data is not None else None) if frame_packet_map is not None: if video_data['current_frame'] not in frame_packet_map: frame_packet_map[video_data['current_frame']] = [ diffgram_label_instance ] else: frame_packet_map[video_data['current_frame']].append( diffgram_label_instance) if diffgram_label_instance: instance_list.append(diffgram_label_instance) count += 1 if instance_list and video_data is None: enqueue_packet( project_string_id=self.task_template.project.project_string_id, session=self.session, media_url=None, media_type='image', job_id=self.task_template.id, file_id=diffgram_task.file.id, instance_list=instance_list, task_id=diffgram_task.id, task_action='complete_task', commit_input=True, external_map_id=instance_map.id, external_map_action='set_instance_id', mode="update_with_existing") return True elif instance_list: return True else: return False
def __fetch_object(self, opts): """ Upload a file to Diffgram from an Azure Blob :param opts: Dictionary with parameters for object fetching. :return: file obj if file was uploaded, else False """ spec_list = [{'bucket_name': str, 'path': str}] log = regular_log.default() log, input = regular_input.input_check_many(untrusted_input = opts, spec_list = spec_list, log = log) if len(log["error"].keys()) >= 1: return {'log': log} shared_access_signature = BlobSharedAccessSignature( account_name = self.connection_client.account_name, account_key = self.connection_client.credential.account_key ) expiration_offset = 40368000 blob_name = opts['path'] container = opts['bucket_name'] added_seconds = datetime.timedelta(0, expiration_offset) expiry_time = datetime.datetime.utcnow() + added_seconds filename = blob_name.split("/")[-1] sas = shared_access_signature.generate_blob( container_name = container, blob_name = blob_name, start = datetime.datetime.utcnow(), expiry = expiry_time, permission = BlobSasPermissions(read = True), content_disposition = 'attachment; filename=' + filename, ) sas_url = 'https://{}.blob.core.windows.net/{}/{}?{}'.format( self.connection_client.account_name, container, blob_name, sas ) with sessionMaker.session_scope() as session: project = Project.get_by_string_id(session, self.config_data.get('project_string_id')) member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first() # Deduct Media Type: extension = Path(opts['path']).suffix extension = extension.lower() media_type = None if extension in images_allowed_file_names: media_type = 'image' elif extension in videos_allowed_file_names: media_type = 'video' else: # TODO: Decide, do we want to raise an exception? or just do nothing? log = regular_log.default() log['error']['invalid_type'] = 'File must type of: {} {}'.format(str(images_allowed_file_names), str(videos_allowed_file_names)) log['error']['file_name'] = opts['path'] log['opts'] = opts Event.new( session = session, member_id = opts['event_data']['request_user'], kind = 'microsoft_azure_new_import_warning', description = 'Skipped import for {}, invalid file type.'.format(opts['path']), error_log = log, project_id = project.id, member = member, success = False ) return None # metadata = self.connection_client.head_object(Bucket=self.config_data['bucket_name'], Key=path) created_input = packet.enqueue_packet(self.config_data['project_string_id'], session = session, media_url = sas_url, media_type = media_type, job_id = opts.get('job_id'), batch_id = opts.get('batch_id'), file_name = opts.get('path'), video_split_duration = opts.get('video_split_duration'), directory_id = opts.get('directory_id'), extract_labels_from_batch = True) log = regular_log.default() log['opts'] = opts Event.new( session = session, member_id = opts['event_data']['request_user'], kind = 'microsoft_azure_new_import_success', description = 'New cloud import for {}'.format(opts['path']), error_log = opts, project_id = project.id, member = member, success = True ) return created_input
def fetch_instances_from_file( self, task_template, diffgram_file, file_id, datasaur_connector): file_export_data = self.trigger_export_single_datasaur_file( datasaur_connector = datasaur_connector, file_id = file_id) instance_list = [] # We get the task based on file id since assumption for datasaur is file and task will be the same concept. task = self.session.query(Task).filter( Task.job_id == task_template.id, Task.file_id == diffgram_file.id ).first() if 'log' in file_export_data and 'error' in file_export_data['log']: logger.error('Error fetching export data {}'.format(file_export_data)) label_items = file_export_data['result']['labelSet']['labelItems'] label_items_by_id = {} for label in label_items: external_map_label = ExternalMap.get( session=self.session, job_id=task_template.id, external_id=label['id'], connection_id=task_template.interface_connection.id, diffgram_class_string='label_file', type='datasaur_label' ) if external_map_label: label_items_by_id[label['id']] = label label_items_by_id[label['id']]['label_file_id'] = external_map_label.file_id else: logger.error('No label_file found for datasaur ID: {}'.format(label['id'])) return sentences = file_export_data['result']['sentences'] for sentence in sentences: instances = sentence['labels'] for instance in instances: instance_map = ExternalMap.get( session = self.session, external_id = instance['id'], diffgram_class_string = 'instance', type = 'datasaur_instance', return_kind = 'first') if not instance_map: logger.debug('Creating Instance Map...') instance_map = ExternalMap.new( session=self.session, job=task_template, external_id=instance['id'], connection=task_template.interface_connection, diffgram_class_string='instance', type='{}_instance'.format( task_template.interface_connection.integration_name), url='', add_to_session=True, flush_session=True) else: logger.debug('Instance Map exists, proceding to update.') instance_list.append({ 'start_sentence': instance['sidS'], 'end_sentence': instance['sidE'], 'start_token': instance['s'], 'end_token': instance['e'], 'start_char': instance['charS'], 'end_char': instance['charE'], 'sentence': sentence['id'], 'type': 'text_token', 'name': label_items_by_id[instance['l']]['labelName'], 'label_file_id': label_items_by_id[instance['l']]['label_file_id'] }) logger.debug('Enqueuing new instances....') # Create new packet to ensure to commit this if task and task_template and diffgram_file: enqueue_packet(project_string_id=task_template.project.project_string_id, session=self.session, media_url=None, media_type='text', job_id=task_template.id, file_id=diffgram_file.id, instance_list=instance_list, task_id=task.id, task_action='complete_task', commit_input=True, mode="update") logger.info('Updated Task {} from datasaur.'.format(task.id))
def __fetch_folder(self, opts): result = [] if self.config_data.get('project_string_id') is None: return {'result': 'error'} paths = opts['path'] if type(paths) != list: paths = [paths] with sessionMaker.session_scope() as session: project = Project.get_by_string_id( session, self.config_data.get('project_string_id')) member = session.query(Member).filter( Member.user_id == opts['event_data']['request_user']).first() for path in paths: blobs = self.connection_client.list_blobs(opts['bucket_name'], prefix=path) for blob in blobs: # Deduct Media Type: if blob.name.endswith('/'): continue blob_expiry = int(time.time() + (60 * 60 * 24 * 30)) signed_url = blob.generate_signed_url( expiration=blob_expiry) extension = Path(blob.path).suffix media_type = None if extension in images_allowed_file_names: media_type = 'image' elif extension in videos_allowed_file_names: media_type = 'video' else: logging.warn('File: {} must type of: {} {}'.format( blob.name, str(images_allowed_file_names), str(videos_allowed_file_names))) log = regular_log.default() log['error'][ 'invalid_type'] = 'File must type of: {} {}'.format( str(images_allowed_file_names), str(videos_allowed_file_names)) log['error']['file_name'] = path log['opts'] = opts Event.new( session=session, member_id=opts['event_data']['request_user'], kind='google_cloud_new_import_warning', description= 'Skipped import for {}, invalid file type.'.format( blob.name), error_log=log, project_id=project.id, member=member, success=False) continue result = [] # TODO: check Input() table for duplicate file? created_input = packet.enqueue_packet( self.config_data['project_string_id'], session=session, media_url=signed_url, media_type=media_type, job_id=opts.get('job_id'), batch_id=opts.get('batch_id'), file_name=path, video_split_duration=opts.get('video_split_duration'), directory_id=opts.get('directory_id'), extract_labels_from_batch=True) log = regular_log.default() log['opts'] = opts Event.new(session=session, member_id=opts['event_data']['request_user'], kind='google_cloud_new_import_success', description='New cloud import for {}'.format( blob.name), error_log=opts, project_id=project.id, member=member, success=True) result.append(created_input) return result
def __fetch_object(self, opts): """Upload a file to diffgram from an S3 bucket :param s3_file_key: path of file to fetch from :return: file obj if file was uploaded, else False """ spec_list = [{'bucket_name': str, 'path': str}] log = regular_log.default() log, input = regular_input.input_check_many(untrusted_input=opts, spec_list=spec_list, log=log) if len(log["error"].keys()) >= 1: return {'log': log} # This might be an issue. Currently not supporting urls with no expiration. Biggest time is 1 week. signed_url = self.connection_client.generate_presigned_url('get_object', Params={'Bucket': opts['bucket_name'], 'Key': opts['path']}, ExpiresIn=3600 * 24 * 6) # 5 Days. with sessionMaker.session_scope() as session: project = Project.get_by_string_id(session, self.config_data.get('project_string_id')) member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first() # Deduct Media Type: extension = Path(opts['path']).suffix extension = extension.lower() media_type = None if extension in images_allowed_file_names: media_type = 'image' elif extension in videos_allowed_file_names: media_type = 'video' else: # TODO: Decide, do we want to raise an exception? or just do nothing? log = regular_log.default() log['error']['invalid_type'] = 'File must type of: {} {}'.format(str(images_allowed_file_names), str(videos_allowed_file_names)) log['error']['file_name'] = opts['path'] log['opts'] = opts Event.new( session=session, member_id=opts['event_data']['request_user'], kind='aws_s3_new_import_warning', description='Skipped import for {}, invalid file type.'.format(opts['path']), error_log=log, project_id=project.id, member=member, success=False ) return None # print('AAAAA', opts, opts.get('job_id')) # metadata = self.connection_client.head_object(Bucket=self.config_data['bucket_name'], Key=path) created_input = packet.enqueue_packet(self.config_data['project_string_id'], session=session, media_url=signed_url, media_type=media_type, job_id=opts.get('job_id'), video_split_duration=opts.get('video_split_duration'), directory_id=opts.get('directory_id')) log = regular_log.default() log['opts'] = opts Event.new( session=session, member_id=opts['event_data']['request_user'], kind='aws_s3_new_import_success', description='New cloud import for {}'.format(opts['path']), error_log=opts, project_id=project.id, member=member, success=True ) return created_input