def wrapper(*args): log = regular_log.default() try: res = f(*args) return res except Exception as e: log['error'][ 'connection_error'] = 'Error connecting to Labelbox. Please check you private API key is correct.' log['error']['exception_details'] = str(e) return {'log': log}
def wrapper(*args): log = regular_log.default() try: res = f(*args) return res except Exception as e: log['error'][ 'auth_credentials'] = 'Error connecting to google cloud storage. Please check you private key, email and id are correct.' log['error']['exception_details'] = str(e) return {'log': log}
def __start_fetch_folder(self, opts): spec_list = [{'project_string_id': dict}] log = regular_log.default() log, input = regular_input.input_check_many( untrusted_input=self.config_data, spec_list=spec_list, log=log) if len(log["error"].keys()) >= 1: return {'log': log} t = threading.Thread(target=self.__fetch_folder, args=((opts, ))) t.start() return {'result': True}
def process_sync_actions(self, session, sync_action): """ Executes sync action depending on the type of action :param session: :param sync_action: :return: """ log = regular_log.default() sync_event = sync_action.sync_event sync_events_manager = SyncEventManager(session=session, sync_event=sync_event) logger.debug('Processing new sync event.') if sync_event.event_trigger_type == 'task_completed': completed_task = sync_event.completed_task job_observable = task_file_observers.JobObservable( session=session, log=log, job=completed_task.job, task=completed_task, sync_events_manager=sync_events_manager) job_observable.notify_all_observers(defer=False) elif sync_event.event_trigger_type == 'file_operation': logger.debug('Processing file_operation sync event.') destination_directory = sync_event.dataset_destination source_directory = None file = sync_event.file if sync_event.event_effect_type in ['file_copy', 'file_move']: logger.debug('Processing file_copy sync event.') if sync_event.event_effect_type == 'file_copy': # we need to provide the source dir for validation of incoming dir. source_directory = sync_event.dataset_source file = sync_event.new_file_copy job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager( session=session, log=log, directory=destination_directory, ) # we need to provide the source dir, so validation of incoming # directory does not fail when checking the directory the file is coming from. logger.debug('Syncing file on jobs...') job_dir_sync_manager.add_file_to_all_jobs( file=file, source_dir=source_directory, create_tasks=True, ) else: logger.info( '{} event effect not supported for processing.'.format( sync_event.event_effect_type)) else: logger.info( '{} event trigger not supported for processing.'.format( sync_event.event_trigger_type))
def labelbox_web_hook_manager(): """ Webhook for receiving data on Diffgram once finished on labelbox. # NOTE: Labelbox does not supportText or dropdown classifications in export for videos. :return: """ # First check if secret is correct payload = request.data secret = settings.LABEL_BOX_SECRET log = regular_log.default() computed_signature = hmac.new(bytearray(secret.encode('utf-8')), msg=payload, digestmod=hashlib.sha1).hexdigest() if request.headers['X-Hub-Signature'] != 'sha1=' + computed_signature: error = 'Error: computed_signature does not match signature provided in the headers' logger.error( 'Error: computed_signature does not match signature provided in the headers' ) return error with sessionMaker.session_scope() as session: labelbox_event = request.headers['X-Labelbox-Event'] payload = request.json logger.debug('Payload for labelbox webhooks: {}'.format(payload)) labelbox_project_id = payload['project']['id'] project_external_mapping = ExternalMap.get( session=session, external_id=labelbox_project_id, type='labelbox', diffgram_class_string='task_template') if project_external_mapping: task_template = Job.get_by_id(session, project_external_mapping.job_id) if task_template: connection = task_template.interface_connection logger.debug('Connection for labelbox: {}'.format(connection)) connector_manager = ConnectorManager(connection=connection, session=session) connector = connector_manager.get_connector_instance() connector.connect() sync_manager = LabelBoxSyncManager( session=session, task_template=task_template, labelbox_project=None, log=log, labelbox_connector=connector) sync_manager.handle_task_creation_hook(payload) return jsonify({'message': 'OK.'}) else: log['error']['task_template'] = 'Task template not found.' return jsonify(log) else: log['error'][ 'labelbox_project'] = 'Labelbox external mapping not found.' return jsonify(log)
def check_export_permissions_and_status(export, project_string_id, session): project_perms = has_project_permissions_for_export(export, project_string_id, session) if len(project_perms['error'].keys()) > 1: return project_perms export_completed_result = is_export_completed(export) if len(export_completed_result['error'].keys()) > 1: return export_completed_result return regular_log.default()
def __init__(self, session, project, member, directory=None): self.project = project self.session = session self.member = member self.directory = directory # Additional security check just for sanity Project_permissions.by_project_core( project_string_id=self.project.project_string_id, Roles=["admin", "Editor", "Viewer", "allow_if_project_is_public"], apis_project_list=[], apis_user_list=['security_email_verified']) self.log = regular_log.default() self.parser = Lark(grammar_definition, parser='lalr', transformer=None)
def test_execute_after_launch_strategy(self): file = data_mocking.create_file({'project_id': self.project.id}, self.session) attach_dir1 = data_mocking.create_directory({ 'project': self.project, 'user': self.project_data['users'][0], 'files': [file] }, self.session) connection = data_mocking.create_connection({ 'name': 'test', 'integration_name': 'scale_ai', 'project_id': self.project.id }, self.session) job = data_mocking.create_job({ 'name': 'my-test-job-{}'.format(1), 'project': self.project, 'status': 'active', 'type': "Normal", 'attached_directories': [ attach_dir1 ], 'interface_connection_id': connection.id }, self.session) strategy = ScaleAITaskTemplateAfterLaunchStrategy( task_template=job, session=self.session, log=regular_log.default() ) with patch.object(ScaleAITaskTemplateAfterLaunchStrategy, 'create_scale_ai_project', return_value={'id': '123', 'name': 'scaleaitest'}): strategy.execute_after_launch_strategy() commit_with_rollback(self.session) tasks_count = self.session.query(Task).filter( Task.job_id == job.id ).count() tasks = self.session.query(Task).filter( Task.job_id == job.id ).all() self.assertEqual(tasks_count, 1) external_maps = ExternalMap.get( session=self.session, job_id=job.id, diffgram_class_string='task_template', connection_id=connection.id, type=connection.integration_name ) self.assertNotEqual(external_maps, None)
def __count_objects(self, opts): spec_list = [{'bucket_name': str, 'path': str}] log = regular_log.default() log, input = regular_input.input_check_many(untrusted_input=opts, spec_list=spec_list, log=log) if len(log["error"].keys()) >= 1: return {'log': log} blobs = self.connection_client.list_blobs(opts['bucket_name'], prefix=opts['path']) count = 0 for b in blobs: if b.name.endswith('/'): continue count += 1 return {'result': count}
def send_task_to_scale_ai(): """ Webhook for receiving data on Diffgram once finished on labelbox. # NOTE: Labelbox does not supportText or dropdown classifications in export for videos. :return: """ # First check if secret is correct spec_list = [{'task_id': dict}, {'project_string_id': str}] log, input_data, untrusted_input = regular_input.master( request=request, spec_list=spec_list) log = regular_log.default() with sessionMaker.session_scope() as session: task_id = input_data['task_id'] task = Task.get_by_id(session, task_id=task_id) if task: task_template = task.job connection = task_template.interface_connection logger.debug('Connection for ScaleAI: {}'.format(connection)) connector_manager = ConnectorManager(connection=connection, session=session) connector = connector_manager.get_connector_instance() connector.connect() scale_ai_sync_manager = ScaleAISyncManager( task_template=task_template, scale_ai_connector=connector, log=log, session=session) scale_ai_task, log = scale_ai_sync_manager.send_diffgram_task(task) logger.debug('Scale AI create result: {} || {}'.format( scale_ai_task, log)) if not scale_ai_task: print('aaaaaaaaaaaaaa', log) return jsonify(log=log), 400 return jsonify({ 'message': 'OK.', 'scale_ai_task_id': scale_ai_task.id }) else: log['error']['task_id'] = 'Task not found.' return jsonify(log)
def create_tasks_for_sample_task_template(self, task_template, attached_dir=None, files=None): if not files: files = WorkingDirFileLink.file_list( self.session, working_dir_id=attached_dir.id, root_files_only=True, # TODO do we need to get child files too? limit=None, type='image') job_sync_manager = JobDirectorySyncManager(session=self.session, job=task_template, log=regular_log.default(), directory=attached_dir) job_sync_manager.create_file_links_for_attached_dirs(create_tasks=True) return files
def connect(self): log = regular_log.default() if 'project_id' not in self.auth_data or \ (self.auth_data['project_id'] is '' or self.auth_data['project_id'] is None): log['error'][ 'client_project'] = "ValueError: Client project not set: pass an explicit project." return {'log': log} auth = self.generate_auth_data( email=self.auth_data['client_email'], client_id=self.auth_data['client_id'], client_secret=self.auth_data['client_secret'], project_id=self.auth_data['project_id']) credentials = service_account.Credentials.from_service_account_info( auth) self.connection_client = storage.Client(credentials=credentials, project=auth['project_id']) return {'result': True}
def task_template_launch_core(session, job): """ This function is in charge of attaching the labels to the job, setting status to active and then creating the root tasks for each of the files attached to the job. """ if not job: return False # TODO other pre checks (ie that guide is attached, # has a bid, files, etc. # check Status is "launchable" ie in draft # Update job status log = regular_log.default() # CAUTION using default directory for project which may not be right result = task_template_label_attach( session=session, task_template=job, project_directory=job.project.directory_default, ) # QUESTION Do we only need to create tasks for "normal work things"? # ie for exams it gets done as part of the process # QUESTION are these only relevant for normal work? not exam? if job.type == "Normal": task_template_new_normal(session=session, task_template=job) if job.type == "Exam": task_template_new_exam(session=session, task_template=job) # Add job to all attached directories job_sync_manager = job_dir_sync_utils.JobDirectorySyncManager( session=session, job=job, log=log) assert job is not None session.add(job) return job
def test_create_sequence_preview_core(self): label = data_mocking.create_label({ 'name': 'apple', }, self.session) label_file = data_mocking.create_label_file( { 'label': label, 'project_id': self.project.id }, self.session) video_file = data_mocking.create_file( { 'project_id': self.project.id, 'type': 'video' }, self.session) video_file_bad = data_mocking.create_file( { 'project_id': self.project2.id, 'type': 'video' }, self.session) sequence = data_mocking.create_sequence( { 'label_file_id': label_file.id, 'video_file_id': video_file.id, 'cache_expiry': time.time() + 500000, 'number': 1, }, self.session) sequence2 = data_mocking.create_sequence( { 'label_file_id': label_file.id, 'video_file_id': video_file_bad.id, 'cache_expiry': time.time() + 500000, 'number': 1, }, self.session) preview_url = 'https://picsum.photos/200/300' instance = data_mocking.create_instance( { 'project_id': self.project.id, 'type': 'box', 'x_min': 0, 'x_max': 0, 'y_min': 0, 'y_max': 0, 'file_id': video_file.id, 'soft_delete': False, 'sequence_id': sequence.id, 'preview_image_url': preview_url, 'preview_image_url_expiry': 900000000, }, self.session) sequence.instance_preview_cache = { 'id': instance.id, 'file_id': sequence.video_file.id, 'preview_image_url': preview_url, } self.session.commit() result, log = sequence_preview_create.create_sequence_preview_core( session=self.session, log=regular_log.default(), project=self.project, sequence_id=sequence.id) self.assertFalse(len(log['error'].keys()), 0) self.assertEqual(result['instance_preview']['id'], instance.id) self.assertEqual(result['instance_preview']['file_id'], video_file.id) self.assertEqual(result['instance_preview']['preview_image_url'], preview_url) # Error case result, log = sequence_preview_create.create_sequence_preview_core( session=self.session, log=regular_log.default(), project=self.project, sequence_id=sequence2.id) self.assertEqual(len(log['error'].keys()), 1) self.assertTrue('project_id' in log['error'])
def __send_export(self, opts): spec_list = [{'project_string_id': dict}] log = regular_log.default() log, input = regular_input.input_check_many( untrusted_input=self.config_data, spec_list=spec_list, log=log) if len(log["error"].keys()) >= 1: return {'log': log} spec_list = [ { 'path': str }, { "format": { 'default': 'JSON', 'kind': str, 'valid_values_list': ['JSON', 'YAML'] } }, { 'export_id': str }, { 'bucket_name': str }, ] log = regular_log.default() log, input = regular_input.input_check_many(untrusted_input=opts, spec_list=spec_list, log=log, string_len_not_zero=False) if len(log["error"].keys()) >= 1: return {'log': log} if not opts['path'].endswith('/') and opts['path'] != '': log['error'][ 'path'] = 'Path on bucket must be a folder, not a filename.' return {'log': log} with sessionMaker.session_scope() as session: project = Project.get_by_string_id( session, self.config_data['project_string_id']) member = session.query(Member).filter( Member.user_id == opts['event_data']['request_user']).first() export = session.query(Export).filter( Export.id == opts['export_id']).first() # Check perms and export status. export_check_result = check_export_permissions_and_status( export, self.config_data['project_string_id'], session) if len(export_check_result['error'].keys()) > 1: log = regular_log.default() log['error'] = export_check_result['error'] log['error']['file_name'] = opts['path'] log['opts'] = opts Event.new( session=session, member_id=opts['event_data']['request_user'], kind='google_cloud_new_export_error', description='Google cloud export error for {}'.format( opts['path']), error_log=log, member=member, project_id=project.id, success=False) return export_check_result bucket = self.connection_client.get_bucket(opts['bucket_name']) result = export_view_core(export=export, format=opts['format'], return_type='bytes') filename = generate_file_name_from_export(export, session) if opts['path'] != '': blob = bucket.blob('{}{}.{}'.format(opts['path'], filename, opts['format'].lower())) else: blob = bucket.blob('{}.{}'.format(filename, opts['format'].lower())) blob.upload_from_string(result) log = regular_log.default() log['opts'] = opts Event.new(session=session, member_id=opts['event_data']['request_user'], kind='google_cloud_new_export_success', description='New cloud export for {}'.format(blob.name), error_log=opts, member=member, project_id=project.id, success=True) return {'result': True}
def __init__(self, session, task_template): self.session = session self.task_template = task_template self.log = regular_log.default() self.strategy = None
def __fetch_folder(self, opts): result = [] if self.config_data.get('project_string_id') is None: return {'result': 'error'} paths = opts['path'] if type(paths) != list: paths = [paths] with sessionMaker.session_scope() as session: project = Project.get_by_string_id( session, self.config_data.get('project_string_id')) member = session.query(Member).filter( Member.user_id == opts['event_data']['request_user']).first() for path in paths: blobs = self.connection_client.list_blobs(opts['bucket_name'], prefix=path) for blob in blobs: # Deduct Media Type: if blob.name.endswith('/'): continue blob_expiry = int(time.time() + (60 * 60 * 24 * 30)) signed_url = blob.generate_signed_url( expiration=blob_expiry) extension = Path(blob.path).suffix media_type = None if extension in images_allowed_file_names: media_type = 'image' elif extension in videos_allowed_file_names: media_type = 'video' else: logging.warn('File: {} must type of: {} {}'.format( blob.name, str(images_allowed_file_names), str(videos_allowed_file_names))) log = regular_log.default() log['error'][ 'invalid_type'] = 'File must type of: {} {}'.format( str(images_allowed_file_names), str(videos_allowed_file_names)) log['error']['file_name'] = path log['opts'] = opts Event.new( session=session, member_id=opts['event_data']['request_user'], kind='google_cloud_new_import_warning', description= 'Skipped import for {}, invalid file type.'.format( blob.name), error_log=log, project_id=project.id, member=member, success=False) continue result = [] # TODO: check Input() table for duplicate file? created_input = packet.enqueue_packet( self.config_data['project_string_id'], session=session, media_url=signed_url, media_type=media_type, job_id=opts.get('job_id'), batch_id=opts.get('batch_id'), file_name=path, video_split_duration=opts.get('video_split_duration'), directory_id=opts.get('directory_id'), extract_labels_from_batch=True) log = regular_log.default() log['opts'] = opts Event.new(session=session, member_id=opts['event_data']['request_user'], kind='google_cloud_new_import_success', description='New cloud import for {}'.format( blob.name), error_log=opts, project_id=project.id, member=member, success=True) result.append(created_input) return result
def connect(self): log = regular_log.default() self.connection_client = labelbox.Client( self.auth_data['client_secret']) return {'result': True}
class Input(Base): __tablename__ = 'input' id = Column(Integer, primary_key=True) created_time = Column(DateTime, default=datetime.datetime.utcnow) time_completed = Column(DateTime) time_loaded_video = Column(DateTime) # We assume this includes upload time_video_write_finished = Column(DateTime) # Frames can start processing as this happens # But it's still an "end point" to measure # ie we could look at other options to speed that up. time_pushed_all_frames_to_queue = Column(DateTime) # Yes some work to update this # BUT this is probably a better measure for more closely tracking # if the process crashes so worth it. time_updated = Column(DateTime, onupdate=datetime.datetime.utcnow) time_last_attempted = Column(Integer) # TODO list available modes here... mode = Column(String) url = Column(String()) media_type = Column(String()) # image, frame, video, csv # Why not name this "source" # TODO naming of this attribute could probably be improved. type = Column(String()) # ["from_url", "from_video_split"] allow_csv = Column(Boolean()) allow_duplicates = Column(Boolean(), default=False) # By default we don't defer processing, but can if needed # At the moment this flag is only used "in flight" # So eventually, in theory, all the flags will be False. # Not sure if that's ok or bad? processing_deferred = Column(Boolean(), default=False) status = Column(String(), default="init") status_text = Column(String()) offset_in_seconds = Column(Integer) percent_complete = Column(Float, default=0.0) description = Column(String()) size = Column(Integer) archived = Column(Boolean, default=False) raw_data_blob_path = Column(String()) # video_processed_blob_path = Column(String()) resumable_url = Column(String()) # For AWS S3 Uploads upload_aws_id = Column(String()) upload_aws_parts_list = Column(MutableDict.as_mutable(JSONEncodedDict)) # For Azure Uploads upload_azure_block_list = Column(MutableDict.as_mutable(JSONEncodedDict)) video_split_duration = Column(Integer()) # For now inferring from video_split_duration # if it exists then we assume we want to split it video_was_split = Column(Boolean) retry_log = Column(MutableDict.as_mutable(JSONEncodedDict)) # I think it would be good for this to be seperate # For easy searching # Default to 0 so we can do < a value instead of None checks retry_count = Column(Integer, default=0) # Use get_by_id(). # See parent_input = in process_media.py parent_input_id = Column(Integer, ForeignKey('input.id')) # parent_input = relationship("Input", foreign_keys=[parent_input_id]) # context of say a video file parent_file_id = deferred(Column(Integer, ForeignKey('file.id'))) parent_file = relationship("File", foreign_keys=[parent_file_id]) file_id = Column(Integer, ForeignKey('file.id')) file = relationship("File", foreign_keys=[file_id]) newly_copied_file_id = Column(Integer, ForeignKey('file.id')) newly_copied_file = relationship("File", foreign_keys=[newly_copied_file_id ]) #TODO: ADD TO PRODUCTION add_link = Column(Boolean) remove_link = Column(Boolean) copy_instance_list = Column(Boolean, default=False) sequence_map = Column(MutableDict.as_mutable(JSONEncodedDict)) file_metadata = Column(MutableDict.as_mutable(JSONB)) task_id = Column(Integer, ForeignKey('task.id')) task = relationship("Task", foreign_keys=[task_id]) task_action = Column(String()) external_map_id = Column(Integer, ForeignKey('external_map.id')) external_map = relationship("ExternalMap", foreign_keys=[external_map_id]) external_map_action = Column(String()) job_id = Column(Integer, ForeignKey('job.id')) job = relationship("Job") # Also include image or video? directory_id = Column(Integer, ForeignKey('working_dir.id')) # target directory directory = relationship("WorkingDir", foreign_keys=[directory_id]) source_directory_id = Column( Integer, ForeignKey('working_dir.id')) # For internal only source_directory = relationship("WorkingDir", foreign_keys=[source_directory_id]) invalid_directory_permission = Column(Boolean) project_id = Column(Integer, ForeignKey('project.id')) project = relationship("Project") user_id = Column(Integer, ForeignKey('userbase.id')) user = relationship("User") batch_id = Column(Integer, ForeignKey('input_batch.id')) batch = relationship("InputBatch", foreign_keys=[batch_id]) temp_dir = Column(String()) temp_dir_path_and_filename = Column(String()) dzuuid = Column(String()) original_filename = Column(String()) extension = Column(String()) instance_list = Column(MutableDict.as_mutable(JSONEncodedDict)) frame_packet_map = Column(MutableDict.as_mutable(JSONEncodedDict)) update_log = Column(MutableDict.as_mutable(JSONEncodedDict), default=regular_log.default()) # New Sept 3, 2020 # Context of video video_parent_length = Column( Integer ) # This way don't have to check video each time. To see where it ends __table_args__ = (Index('index__processing_deferred__archived', "processing_deferred", "archived", postgresql_where=(archived.is_(True))), ) def parent_input(self, session): if not self.parent_input_id: return None return session.query(Input).filter( Input.id == self.parent_input_id).first() def child_list(self, session): return session.query(Input).filter( Input.parent_input_id == self.id).all() @staticmethod def new(project=None, project_id=None, media_type: str = None, type: str = None, mode: str = None, url: str = None, job_id: int = None, video_parent_length: int = None, source_directory_id: int = None, remove_link: bool = None, add_link: bool = None, copy_instance_list: bool = None, directory_id: int = None, file_id: int = None, parent_file_id: int = None, newly_copied_file_id: int = None, sequence_map: dict = None, processing_deferred: bool = False, parent_input_id: int = None, batch_id: int = None, video_split_duration: int = None, file_metadata: dict = None): """ Helps insure not forgetting stuff... does not add to session or flush because we may not always want to do that. Different ways files can come in here... """ # Careful to check parent otherwise tries to recusrively split. # if there is no parent then it's assumed to be the "original" # if a video_split is provided then we use it. if parent_input_id is None and video_split_duration is None: video_split_duration = 30 input = Input(project=project, project_id=project_id, file_id=file_id, mode=mode, newly_copied_file_id=newly_copied_file_id, media_type=media_type, type=type, url=url, job_id=job_id, directory_id=directory_id, sequence_map=sequence_map, processing_deferred=processing_deferred, parent_input_id=parent_input_id, video_parent_length=video_parent_length, video_split_duration=video_split_duration, batch_id=batch_id, copy_instance_list=copy_instance_list, file_metadata=file_metadata) input.parent_file_id = parent_file_id return input def get_by_id(session, id: int, skip_locked: bool = False): query = session.query(Input).filter(Input.id == id) if skip_locked == True: query = query.with_for_update(skip_locked=True) return query.first() def serialize(self): directory = None if self.directory and not self.invalid_directory_permission: directory = self.directory.serialize_simple() # Total time # TODO maybe look at time last attempted too.. # ALSO we may want to actually declare a "completion" # time, this just assumes it doesn't get updated after # completion total_time = None if self.created_time and self.time_updated: total_time = self.time_updated - self.created_time return { 'id': self.id, 'created_time': self.created_time, 'time_updated': self.time_updated, 'total_time': str(total_time), 'media_type': self.media_type, 'original_filename': self.original_filename, 'status': self.status, 'status_text': self.status_text, 'directory': directory, 'percent_complete': self.percent_complete, 'processing_deferred': self.processing_deferred, 'time_last_attempted': self.time_last_attempted, 'retry_log': self.retry_log, 'retry_count': self.retry_count, 'video_split_duration': self.video_split_duration, 'video_was_split': self.video_was_split, # For debugging 'raw_data_blob_path': self.raw_data_blob_path, 'source': self.type, 'mode': self.mode, 'file_id': self.file_id, 'batch_id': self.batch_id, 'task_id': self.task_id, # Include task_id 'update_log': self.update_log, 'instance_list': self.instance_list, 'frame_packet_map': self.frame_packet_map, 'newly_copied_file_id': self.newly_copied_file_id } def serialize_with_frame_packet(self): result = self.serialize() result['frame_packet_map'] = self.frame_packet_map result['instance_list'] = self.instance_list return result @staticmethod def directory_not_equal_to_status(session, directory_id, status="success", return_type="count"): """ Returns 0 if there are no files equal to status otherwise returns count of files != to status """ file_link_sub_query = WorkingDirFileLink.get_sub_query( session, directory_id) assert file_link_sub_query is not None # TODO should we exclude # failed ones optionally?... # We could do status not in list [failed_flag, success] etc.. query = session.query(Input).filter( Input.file_id == file_link_sub_query.c.file_id, Input.status != status, Input.archived != True) if return_type == "count": return query.count() if return_type == "objects": return query.all()
def test_api_create_sequence_preview(self): label = data_mocking.create_label({ 'name': 'apple', }, self.session) label_file = data_mocking.create_label_file( { 'label': label, 'project_id': self.project.id }, self.session) video_file = data_mocking.create_file( { 'project_id': self.project.id, 'type': 'video' }, self.session) sequence = data_mocking.create_sequence( { 'label_file_id': label_file.id, 'video_file_id': video_file.id, 'cache_expiry': time.time() + 500000, 'number': 1, }, self.session) video_file_bad = data_mocking.create_file( { 'project_id': self.project2.id, 'type': 'video' }, self.session) preview_url = 'https://picsum.photos/200/300' instance = data_mocking.create_instance( { 'project_id': self.project.id, 'type': 'box', 'x_min': 0, 'x_max': 0, 'y_min': 0, 'y_max': 0, 'file_id': video_file.id, 'soft_delete': False, 'sequence_id': sequence.id, 'preview_image_url': preview_url, 'preview_image_url_expiry': 900000000, }, self.session) sequence.instance_preview_cache = { 'id': instance.id, 'file_id': sequence.video_file.id, 'preview_image_url': preview_url, } self.session.commit() endpoint = "/api/project/{}/sequence/{}/create-preview".format( self.project.project_string_id, sequence.id, ) auth_api = common_actions.create_project_auth(project=self.project, session=self.session) credentials = b64encode( "{}:{}".format(auth_api.client_id, auth_api.client_secret).encode()).decode('utf-8') response = self.client.post(endpoint, data=json.dumps({}), headers={ 'directory_id': str(self.project.directory_default_id), 'Authorization': 'Basic {}'.format(credentials) }) data = response.json self.assertTrue('result' in data) self.assertTrue('log' in data) self.assertFalse(len(data['log']['error'].keys()), 0) self.assertEqual(data['result']['instance_preview']['id'], instance.id) self.assertEqual(data['result']['instance_preview']['file_id'], video_file.id) self.assertEqual( data['result']['instance_preview']['preview_image_url'], preview_url) # Error case sequence2 = data_mocking.create_sequence( { 'label_file_id': label_file.id, 'video_file_id': video_file_bad.id, 'cache_expiry': time.time() + 500000, 'number': 1, }, self.session) result, log = sequence_preview_create.create_sequence_preview_core( session=self.session, log=regular_log.default(), project=self.project, sequence_id=sequence2.id) self.assertEqual(len(log['error'].keys()), 1) self.assertTrue('project_id' in log['error'])
def copy_file_from_existing(session, working_dir, existing_file, copy_instance_list: bool = False, log=regular_log.default(), add_link: bool = True, remove_link: bool = True, orginal_directory_id=None, previous_video_parent_id=None, sequence_map=None, deep_copy=False, defer_copy=False, ann_is_complete_reset=False, batch_id=None, flush_session=False, working_dir_id: int = None): """ orginal_directory_id is for Video, to get list of video files Should we rename to "source_directory" to keep in line of transfer thing? Clarify working_dir is the "target" directory? Don't actually need directory if not copying links # TODO is "update" really the right name if this is generally creating a new file?? If file is video, we need to * Create the new video file * Create new files for all of it's frames ann_is_complete_reset We assume "copying" means copying the status too. However, for the example of tasks (and perhaps others in future) we assume we want to reset this status for the newly created files. """ # Defer image copy is specified in the parameter. start_time = time.time() if working_dir: working_dir_id = working_dir.id # IMAGE Defer if existing_file.type == 'image' and defer_copy and not remove_link: regular_methods.transmit_interservice_request_after_commit( session=session, message='image_copy', logger=logger, service_target='walrus', id=existing_file.id, project_string_id=existing_file.project.project_string_id, extra_params={ 'file_id': existing_file.id, 'copy_instance_list': copy_instance_list, 'destination_working_dir_id': working_dir_id, 'source_working_dir_id': orginal_directory_id, 'add_link': add_link, 'batch_id': batch_id, 'remove_link': remove_link, }) log['info'][ 'message'] = 'File copy in progress. Please check progress in the file operations progress section.' return # VIDEO if existing_file.type == "video" and defer_copy is True: # Defer the copy to the walrus. regular_methods.transmit_interservice_request_after_commit( session=session, message='video_copy', logger=logger, service_target='walrus', id=existing_file.id, project_string_id=existing_file.project.project_string_id, extra_params={ 'file_id': existing_file.id, 'copy_instance_list': copy_instance_list, 'destination_working_dir_id': working_dir_id, 'source_working_dir_id': orginal_directory_id, 'add_link': add_link, 'batch_id': batch_id, 'remove_link': remove_link, 'frame_count': existing_file.video.frame_count }) log['info'][ 'message'] = 'File copy in progress. Please check progress in the file operations progress section.' return file = new_file_database_object_from_existing(session) file.type = existing_file.type # We need this to do permissions # At the moment when a video is done # We only move the video into the directory not the images # So we need to use the project scope. file.project_id = existing_file.project_id file.image_id = existing_file.image_id file.label_id = existing_file.label_id file.video_id = existing_file.video_id file.global_frame_number = existing_file.global_frame_number file.colour = existing_file.colour file_relationship(session, file, existing_file) file.state = "changed" file.frame_number = existing_file.frame_number # Ok if None if ann_is_complete_reset is False: file.ann_is_complete = existing_file.ann_is_complete file.original_filename = existing_file.original_filename # Want to be able to get video file from anyframe... # Careful this is the previous one, if we just copy existing then images will # be related to old file file.video_parent_file_id = previous_video_parent_id session.add(file) # Question why does add link ned to be true here? or does it? # At the moment we don't pass add_link as True when copying it for task if add_link is True: working_dir_database_models.WorkingDirFileLink.add( session, working_dir_id, file) # print("Added link") if remove_link is True: working_dir_database_models.WorkingDirFileLink.remove( session, working_dir_id, existing_file.id) logger.debug('existing_file.type {}'.format(existing_file.type)) logger.debug('copy_instance_list {}'.format(copy_instance_list)) if existing_file.type in ['image', 'frame' ] and copy_instance_list is True: file.count_instances_changed = existing_file.count_instances_changed file.set_cache_key_dirty('instance_list') instance_list = Instance.list( session=session, file_id=existing_file.id, limit=None) # Excludes removed by default logger.debug('instance_list len {}'.format(len(instance_list))) for instance in instance_list: instance_sequence_id = instance.sequence_id if sequence_map is not None: logger.debug('sequence_map {}'.format(sequence_map)) instance_sequence_id = sequence_map.get( instance_sequence_id) new_instance = Instance( file_id=file.id, # IMPORTANT and different from pattern sequence_id=instance_sequence_id, # Different parent_file_id=file. video_parent_file_id, # Cache for video parent file ID. project_id=instance.project_id, x_min=instance.x_min, y_min=instance.y_min, x_max=instance.x_max, y_max=instance.y_max, width=instance.width, height=instance.height, label_file_id=instance.label_file_id, hash=instance.hash, type=instance.type, number=instance.number, frame_number=instance.frame_number, global_frame_number=instance.global_frame_number, machine_made=instance.machine_made, fan_made=instance.fan_made, points=instance.points, soft_delete=instance.soft_delete, center_x=instance.center_x, center_y=instance.center_y, angle=instance.angle, p1=instance.p1, p2=instance.p2, cp=instance.cp, interpolated=instance.interpolated, front_face=instance.front_face, rear_face=instance.rear_face, creation_ref_id=instance.creation_ref_id) session.add(new_instance) end_time = time.time() if flush_session: session.flush() return file
def test_execute_after_launch_strategy(self): file = data_mocking.create_file( { 'project_id': self.project.id, 'type': 'text' }, self.session) label = data_mocking.create_label({ 'name': 'mylabel', }, self.session) label_file = data_mocking.create_label_file( { 'label': label, 'project_id': self.project.id }, self.session) attach_dir1 = data_mocking.create_directory( { 'project': self.project, 'user': self.project_data['users'][0], 'files': [file] }, self.session) connection = data_mocking.create_connection( { 'name': 'test', 'integration_name': 'datasaur', 'project_id': self.project.id }, self.session) labeldict = { "label_file_list_serialized": [{ "id": label_file.id, "hash": "083e9ebc48d64e9a8874c6b95f490b56b8c4c5b0f4dacd90bd3534085e87d9fa", "type": "label", "state": "added", "created_time": "2020-07-15T18:48:34.477333", "time_last_updated": "2020-07-15T18:48:34.705290", "ann_is_complete": None, "original_filename": None, "video_id": None, "video_parent_file_id": None, "count_instances_changed": None, "attribute_group_list": [{ "id": 2, "kind": "multiple_select", "is_root": True, "name": "carwheeltag", "prompt": "How is this car wheel", "show_prompt": True, "time_updated": "2020-08-05 19:37:07.703576", "attribute_template_list": [{ "id": 4, "name": "Is rounded", "value_type": None, "archived": False, "group_id": 2, "display_order": None }, { "id": 5, "name": "is squared", "value_type": None, "archived": False, "group_id": 2, "display_order": None }, { "id": 6, "name": "is beautiful", "value_type": None, "archived": False, "group_id": 2, "display_order": None }, { "id": 7, "name": "is crazy", "value_type": None, "archived": False, "group_id": 2, "display_order": None }] }, { "id": 3, "kind": "select", "is_root": True, "name": "selectwheel", "prompt": "Please selectt something special about this wheels", "show_prompt": True, "time_updated": "2020-08-12 16:29:54.817801", "attribute_template_list": [{ "id": 10, "name": "Silver Wheel", "value_type": None, "archived": False, "group_id": 3, "display_order": None }, { "id": 9, "name": "+Gold wheel", "value_type": None, "archived": False, "group_id": 3, "display_order": None }] }, { "id": 4, "kind": "text", "is_root": True, "name": "freewheel", "prompt": "What are your thought on this wheel?", "show_prompt": True, "time_updated": "2020-08-05 20:50:59.195249", "attribute_template_list": [] }, { "id": 5, "kind": "radio", "is_root": True, "name": "clean", "prompt": "Is this wheel clean?", "show_prompt": True, "time_updated": "2020-08-05 20:53:46.314143", "attribute_template_list": [{ "id": 11, "name": "Wheel is dirty", "value_type": None, "archived": False, "group_id": 5, "display_order": None }, { "id": 12, "name": "Wheek is clean", "value_type": None, "archived": False, "group_id": 5, "display_order": None }] }, { "id": 6, "kind": "text", "is_root": True, "name": "TEST", "prompt": "TEST28", "show_prompt": True, "time_updated": "2020-08-12 16:30:03.770141", "attribute_template_list": [] }], "colour": { "hex": "#194d33", "hsl": { "h": 150, "s": 0.5, "l": 0.2, "a": 1 }, "hsv": { "h": 150, "s": 0.66, "v": 0.3, "a": 1 }, "rgba": { "r": 25, "g": 77, "b": 51, "a": 1 }, "a": 1 }, "label": { "id": 5, "name": "Car wheel", "default_sequences_to_single_frame": False } }], "label_file_colour_map": {} } job = data_mocking.create_job( { 'name': 'my-test-job-{}'.format(1), 'project': self.project, 'status': 'active', 'type': "Normal", 'label_dict': labeldict, 'attached_directories': [attach_dir1], 'interface_connection_id': connection.id }, self.session) strategy = DatasaurTaskTemplateAfterLaunchStrategy( task_template=job, session=self.session, log=regular_log.default()) with patch.object(DatasaurTaskTemplateAfterLaunchStrategy, 'create_datasaur_labelset', return_value={ 'result': { 'createLabelSet': { 'id': 'mytestid' } } }): with patch.object(DatasaurTaskTemplateAfterLaunchStrategy, 'create_datasaur_project', return_value={'result': { 'id': 'datasaur_test' }}): with patch.object(DatasaurTaskTemplateAfterLaunchStrategy, 'get_project_files_list', return_value={ 'result': { 'id': 'datasaur_test', 'documents': [{ 'id': str(file.id), 'name': str(file.id) }] } }): strategy.execute_after_launch_strategy() commit_with_rollback(self.session) tasks_count = self.session.query(Task).filter( Task.job_id == job.id).count() tasks = self.session.query(Task).filter( Task.job_id == job.id).all() self.assertEqual(tasks_count, 1) external_map = ExternalMap.get( session=self.session, job_id=job.id, external_id='mytestid', connection_id=connection.id, diffgram_class_string='', type='{}_label_set'.format( connection.integration_name), ) self.assertNotEqual(external_map, None) project_map = ExternalMap.get( session=self.session, job_id=job.id, external_id='datasaur_test', connection_id=connection.id, diffgram_class_string='task_template', type='{}_project'.format(connection.integration_name), ) self.assertNotEqual(project_map, None) files_maps = ExternalMap.get( session=self.session, job_id=job.id, external_id=str(file.id), file_id=file.id, connection_id=connection.id, diffgram_class_string='file', type='{}_file'.format(connection.integration_name), ) self.assertNotEqual(files_maps, None)
def test_validate_file_data_for_input_packet(self): log = regular_log.default() file1 = data_mocking.create_file({'project_id': self.project.id}, self.session) # Case of file ID input_data = {'file_id': file1.id} result, log, file_id = packet.validate_file_data_for_input_packet( session=self.session, project_string_id=self.project.project_string_id, input=input_data, log=log) self.assertTrue(result) self.assertEqual(len(log['error'].keys()), 0) self.assertEqual(file_id, file1.id) # Case of Media URL input_data = {'media': {'url': 'test_url'}} result, log, file_id = packet.validate_file_data_for_input_packet( session=self.session, project_string_id=self.project.project_string_id, input=input_data, log=log) self.assertFalse(result) self.assertEqual(len(log['error'].keys()), 1) self.assertEqual(file_id, None) input_data['media']['type'] = 'image' log = regular_log.default() result, log, file_id = packet.validate_file_data_for_input_packet( session=self.session, project_string_id=self.project.project_string_id, input=input_data, log=log) self.assertTrue(result) self.assertEqual(len(log['error'].keys()), 0) self.assertEqual(file_id, None) # Case of Filename + Directory file2 = data_mocking.create_file( { 'project_id': self.project.id, 'original_filename': 'test1.jpg' }, self.session) directory = data_mocking.create_directory( { 'project': self.project, 'user': self.project_data['users'][0], 'files': [file2] }, self.session) input_data = {'file_name': 'test1.jpg', 'directory_id': directory.id} log = regular_log.default() result, log, file_id = packet.validate_file_data_for_input_packet( session=self.session, project_string_id=self.project.project_string_id, input=input_data, log=log) self.assertTrue(result) self.assertEqual(len(log['error'].keys()), 0) self.assertEqual(file_id, file2.id) input_data = { 'file_name': 'test1111.jpg', 'directory_id': directory.id } log = regular_log.default() result, log, file_id = packet.validate_file_data_for_input_packet( session=self.session, project_string_id=self.project.project_string_id, input=input_data, log=log) print('log', log) self.assertFalse(result) self.assertEqual(len(log['error'].keys()), 1) self.assertEqual(file_id, None)