def threaded_job_resync(task_template_id, member_id): with sessionMaker.session_scope_threaded() as session: log = regular_log.default() member = Member.get_by_id(session=session, member_id=member_id) task_template = Job.get_by_id(session=session, job_id=task_template_id) attached_dirs = task_template.get_attached_dirs(session=session, sync_types=['sync']) task_list = task_template.task_list(session=session) file_ids = [t.file_id for t in task_list] missing_files = [] for directory in attached_dirs: files = WorkingDirFileLink.file_list(session=session, limit=None, working_dir_id=directory.id) for file in files: if file.id not in file_ids: logger.info( 'Resyncing File {} on Job {} From Dir {}'.format( file.id, task_template_id, directory.id)) job_sync_dir_manger = job_dir_sync_utils.JobDirectorySyncManager( session=session, job=task_template, log=log) job_sync_dir_manger.create_file_links_for_attached_dirs( sync_only=True, create_tasks=True, file_to_link=file, file_to_link_dataset=directory, related_input=None, member=member) task_template.update_file_count_statistic(session=session) missing_files.append(file) logger.info('Resyncing on Job {} Success. {} Missing files synced'.format( task_template_id, len(missing_files))) return missing_files
def process_sync_actions(self, session, sync_action): """ Executes sync action depending on the type of action :param session: :param sync_action: :return: """ log = regular_log.default() sync_event = sync_action.sync_event sync_events_manager = SyncEventManager(session=session, sync_event=sync_event) logger.debug('Processing new sync event.') if sync_event.event_trigger_type == 'task_completed': completed_task = sync_event.completed_task job_observable = task_file_observers.JobObservable( session=session, log=log, job=completed_task.job, task=completed_task, sync_events_manager=sync_events_manager) job_observable.notify_all_observers(defer=False) elif sync_event.event_trigger_type == 'file_operation': logger.debug('Processing file_operation sync event.') destination_directory = sync_event.dataset_destination source_directory = None file = sync_event.file if sync_event.event_effect_type in ['file_copy', 'file_move']: logger.debug('Processing file_copy sync event.') if sync_event.event_effect_type == 'file_copy': # we need to provide the source dir for validation of incoming dir. source_directory = sync_event.dataset_source file = sync_event.new_file_copy job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager( session=session, log=log, directory=destination_directory, ) # we need to provide the source dir, so validation of incoming # directory does not fail when checking the directory the file is coming from. logger.debug('Syncing file on jobs...') job_dir_sync_manager.add_file_to_all_jobs( file=file, source_dir=source_directory, create_tasks=True, ) else: logger.info( '{} event effect not supported for processing.'.format( sync_event.event_effect_type)) else: logger.info( '{} event trigger not supported for processing.'.format( sync_event.event_trigger_type))
def test__sync_all_jobs_from_dir(self): project = self.project_data['project'] file = data_mocking.create_file({'project_id': project.id}, self.session) job1 = data_mocking.create_job({ 'project': project, 'status': 'active' }, session=self.session) job2 = data_mocking.create_job({ 'project': project, 'status': 'active' }, session=self.session) directory = data_mocking.create_directory( { 'project': project, 'user': self.project_data['users'][0], 'files': [file], 'jobs_to_sync': { 'job_ids': [job1.id, job2.id] } }, self.session) for job in [job1, job2]: job.update_attached_directories(self.session, [{ 'directory_id': directory.id, 'selected': 'sync' }]) log = regular_log.default() sync_manager = job_dir_sync_utils.JobDirectorySyncManager( session=self.session, log=log, job=None) sync_manager._JobDirectorySyncManager__sync_all_jobs_from_dir( file, directory, directory, create_tasks=True) dir_link = self.session.query(WorkingDirFileLink).filter( WorkingDirFileLink.file_id == file.id, WorkingDirFileLink.working_dir_id == job1.directory_id) dir_link2 = self.session.query(WorkingDirFileLink).filter( WorkingDirFileLink.file_id == file.id, WorkingDirFileLink.working_dir_id == job2.directory_id) self.assertTrue(dir_link.first() is not None) self.assertTrue(dir_link2.first() is not None) task1 = self.session.query(Task).filter(Task.job_id == job1.id) task2 = self.session.query(Task).filter(Task.job_id == job2.id) self.assertTrue(task1.first() is not None) self.assertTrue(task2.first() is not None)
def execute_after_launch_strategy(self): """ This strategy will attach files from sync directories and creates tasks in Diffgram for each of them. :return: """ job_sync_manager = job_dir_sync_utils.JobDirectorySyncManager( session=self.session, job=self.task_template, log=self.log ) job_sync_manager.create_file_links_for_attached_dirs(create_tasks=True) # This removes the job from initial file sync queue. self.task_template.pending_initial_dir_sync = False self.session.add(self.task_template) logger.debug('StandardTaskTemplateAfterLaunchStrategy for Task Template ID: {} completed successfully.'.format( self.task_template.id))
def test__add_file_into_job(self): project = self.project_data['project'] file = data_mocking.create_file({'project_id': project.id}, self.session) job = data_mocking.create_job({'project': project}, session=self.session) directory = data_mocking.create_directory( { 'project': project, 'user': self.project_data['users'][0], 'files': [file] }, self.session) log = regular_log.default() sync_manager = job_dir_sync_utils.JobDirectorySyncManager( session=self.session, log=log, job=job) sync_manager._JobDirectorySyncManager__add_file_into_job( file, directory, create_tasks=True) commit_with_rollback(self.session) dir_link = self.session.query(WorkingDirFileLink).filter( WorkingDirFileLink.file_id == file.id, WorkingDirFileLink.working_dir_id == job.directory_id) self.assertTrue(dir_link.first() is not None) task = self.session.query(Task).filter(Task.job_id == job.id) self.assertTrue(task.first() is None) # If job has correct status task should be created. job.status = 'active' self.session.add(job) commit_with_rollback(self.session) sync_manager._JobDirectorySyncManager__add_file_into_job( file, directory, create_tasks=True) task = self.session.query(Task).filter(Task.job_id == job.id) self.assertTrue(task.first() is not None) commit_with_rollback(self.session) # Retest for case of an existing file/task. mngr = SyncEventManager.create_sync_event_and_manager( session=self.session, status='started') sync_manager._JobDirectorySyncManager__add_file_into_job( file, directory, create_tasks=True, sync_event_manager=mngr) task = self.session.query(Task).filter(Task.job_id == job.id) self.assertTrue(task.first() is not None)
def task_template_launch_core(session, job): """ This function is in charge of attaching the labels to the job, setting status to active and then creating the root tasks for each of the files attached to the job. """ if not job: return False # TODO other pre checks (ie that guide is attached, # has a bid, files, etc. # check Status is "launchable" ie in draft # Update job status log = regular_log.default() # CAUTION using default directory for project which may not be right result = task_template_label_attach( session=session, task_template=job, project_directory=job.project.directory_default, ) # QUESTION Do we only need to create tasks for "normal work things"? # ie for exams it gets done as part of the process # QUESTION are these only relevant for normal work? not exam? if job.type == "Normal": task_template_new_normal(session=session, task_template=job) if job.type == "Exam": task_template_new_exam(session=session, task_template=job) # Add job to all attached directories job_sync_manager = job_dir_sync_utils.JobDirectorySyncManager( session=session, job=job, log=log) assert job is not None session.add(job) return job
def test_create_task_from_file(self): project = self.project_data['project'] file = data_mocking.create_file({'project_id': project.id}, self.session) job = data_mocking.create_job({ 'project': project, 'status': 'active' }, session=self.session) directory = data_mocking.create_directory( { 'project': project, 'user': self.project_data['users'][0], 'files': [file], 'jobs_to_sync': { 'job_ids': [job.id] } }, self.session) log = regular_log.default() dir_list = [{ 'directory_id': directory.id, 'nickname': directory.nickname, 'selected': 'sync' }] job.update_attached_directories(self.session, dir_list, delete_existing=True) self.session.add(job) commit_with_rollback(self.session) sync_manager = job_dir_sync_utils.JobDirectorySyncManager( session=self.session, log=log, job=job, ) sync_manager.create_task_from_file(file) commit_with_rollback(self.session) self.session.flush() task = self.session.query(Task).filter(Task.job_id == job.id) self.assertTrue(task.first() is not None)
def test_threaded_job_resync(self): job = data_mocking.create_job( { 'name': 'my-test-job', 'status': 'active', 'project': self.project }, self.session) auth_api = common_actions.create_project_auth(project=job.project, session=self.session) file = data_mocking.create_file({'project_id': self.project.id}, self.session) file_missing1 = data_mocking.create_file( {'project_id': self.project.id}, self.session) file_missing2 = data_mocking.create_file( {'project_id': self.project.id}, self.session) directory = data_mocking.create_directory( { 'project': self.project, 'user': self.project_data['users'][0], 'files': [file, file_missing1, file_missing2] }, self.session) job.update_attached_directories(self.session, [{ 'directory_id': directory.id, 'selected': 'sync' }]) log = regular_log.default() sync_manager = job_dir_sync_utils.JobDirectorySyncManager( session=self.session, log=log, job=job) sync_manager._JobDirectorySyncManager__add_file_into_job( file, directory, create_tasks=True) self.session.commit() result = threaded_job_resync(task_template_id=job.id, member_id=auth_api.member_id) self.assertEqual(len(result), 2)
def test_remove_job_from_all_dirs(self): project = self.project_data['project'] file = data_mocking.create_file({'project_id': project.id}, self.session) job = data_mocking.create_job({ 'project': project, 'status': 'active' }, session=self.session) directory = data_mocking.create_directory( { 'project': project, 'user': self.project_data['users'][0], 'files': [file], 'jobs_to_sync': { 'job_ids': [job.id] } }, self.session) log = regular_log.default() dir_list = [{ 'directory_id': directory.id, 'nickname': directory.nickname, 'selected': 'sync' }] job.update_attached_directories(self.session, dir_list, delete_existing=True) self.session.add(job) commit_with_rollback(self.session) sync_manager = job_dir_sync_utils.JobDirectorySyncManager( session=self.session, log=log, job=job, directory=directory) sync_manager.remove_job_from_all_dirs(soft_delete=False) commit_with_rollback(self.session) self.session.flush() directory_attachments = self.session.query(JobWorkingDir).filter( JobWorkingDir.working_dir_id == directory.id).all() self.assertEqual(len(directory_attachments), 0)
def job_cancel_core(session, user, log, mode, job_id): """ QUESTIONs option to "hide" job as well? What about super admin option to actually delete (ie for database clean up...) Arguments session, db ojbect user, class User object job, class Job object log, diffgram regular log dict Returns """ job = Job.get_by_id(session=session, job_id=job_id) if user is None or job is None: log['error']['user_job'] = "No user or job" return False, log # JOB LIMITs result, log = job_cancel_limits(session, log, user, job, mode) if result is False: return result, log # TASK spcific limits # Difference that a job may have tasks that # Aren't cancelable status_list = None if mode in ["cancel"]: status_list = ["created", "available", "active"] if mode in ["delete"]: # Don't allow even a super admin to delete completed # from this method? # QUESTION # For that matter should a "completed" job even be allowed to be deleted? status_list = ["draft", "created", "available", "active"] # TODO disallow deleting jobs that have # any completed tasks / transactions if status_list: # Just a question, is there really any point of doing this # If the the job was cancelled? # like maybe for deleting but status I don't know task_list = job.task_list(session=session, status_list=status_list) for task in task_list: if mode == "cancel": session.add(task) task.status = "cancelled" if mode == "delete": session.delete(task) if mode == "archive": # We may want to rename "hidden" to archived? session.add(job) job.status = 'archived' job.hidden = True job.member_updated = user.member # Assume we want to remove sync dirs on archive, we might remove if that is not the case. job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager( job=job, session=session, log=log) job_dir_sync_manager.remove_job_from_all_dirs() if mode == "cancel": session.add(job) job.status = "cancelled" job.member_updated = user.member if mode == "delete": """ Question, is there a better way to do this with CASCADE / sql rules? It feels a bit funny to do it this way BUT also want to be careful since so much reuse!!! ie wouldn't want to delete a guide that was attached to a job on cascade """ # What about a job's directory, # TODO what about deleting associated credential links / other tables? user_to_job = User_To_Job.get_single_by_ids(session=session, user_id=user.id, job_id=job.id) task_list = job.task_list(session) for task in task_list: if task.file.type == "video": # Is this the right way to delete stuff here? video_frame_query = WorkingDirFileLink.image_file_list_from_video( session=session, video_parent_file_id=task.file.id, return_mode="query") # Not working yet! video_frame_query.delete() session.delete(task) session.delete(task.file) # TODO still getting an integrity error # Must be some file that exists related to this job? # Or some other file that got updated incorrectly? job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager( job=job, session=session, log=log) job_dir_sync_manager.remove_job_from_all_dirs(soft_delete=False) session.delete(job) session.delete(user_to_job) return True, log
def test_create_file_links_for_attached_dirs(self): project = self.project_data['project'] file1 = data_mocking.create_file({'project_id': project.id}, self.session) file2 = data_mocking.create_file({'project_id': project.id}, self.session) job = data_mocking.create_job({ 'project': project, 'status': 'active' }, session=self.session) directory1 = data_mocking.create_directory( { 'project': project, 'user': self.project_data['users'][0], 'files': [file1], 'jobs_to_sync': { 'job_ids': [job.id] } }, self.session) directory2 = data_mocking.create_directory( { 'project': project, 'user': self.project_data['users'][0], 'files': [file2], 'jobs_to_sync': { 'job_ids': [job.id] } }, self.session) log = regular_log.default() dir_list = [{ 'directory_id': directory1.id, 'nickname': directory1.nickname, 'selected': 'sync' }, { 'directory_id': directory2.id, 'nickname': directory2.nickname, 'selected': 'sync' }] job.update_attached_directories(self.session, dir_list, delete_existing=True) self.session.add(job) self.session.add(directory1) self.session.add(directory2) commit_with_rollback(self.session) sync_manager = job_dir_sync_utils.JobDirectorySyncManager( session=self.session, log=log, job=job, ) sync_manager.create_file_links_for_attached_dirs(create_tasks=True) commit_with_rollback(self.session) self.session.flush() dir_link1 = self.session.query(WorkingDirFileLink).filter( WorkingDirFileLink.file_id == file1.id, WorkingDirFileLink.working_dir_id == job.directory_id) dir_link2 = self.session.query(WorkingDirFileLink).filter( WorkingDirFileLink.file_id == file2.id, WorkingDirFileLink.working_dir_id == job.directory_id) self.assertTrue(dir_link1.first() is not None) self.assertTrue(dir_link2.first() is not None) task = self.session.query(Task).filter(Task.job_id == job.id) task1 = self.session.query(Task).filter(Task.job_id == job.id, Task.file_id == file1.id) task2 = self.session.query(Task).filter(Task.job_id == job.id, Task.file_id == file2.id) self.assertEqual(len(task.all()), 2) self.assertTrue(task1.first() is not None) self.assertTrue(task2.first() is not None)
def update_directory_core(session, project, nickname, mode, directory_id, log): """ TODO thoughts on options to "promote" a directory to default or "jump to" a directory for a user based on prior one they looked at? (This second idea would perhaps be better in a different area of code note sure) """ directory = WorkingDir.get(session=session, directory_id=directory_id, project_id=project.id) if directory is None: log['error'] = "No directory found" return log session.add(directory) link = Project_Directory_List.link(session=session, working_dir_id=directory.id, project_id=project.id) session.add(link) if mode == "RENAME": if not nickname: log['error'] = "No nickname provided" return log directory.nickname = nickname link.nickname = nickname log['info'] = "Updated Nickname." project.set_cache_key_dirty(cache_key="directory_list") return log if mode == "ARCHIVE": if directory.id == project.directory_default_id: """ We may swap default directory to a different one. Context that prior we just rejected request But in a larger project, especially created from SDK, the default dir just sits there and it make it look funny (especailly since we don't have say counts per dir or that other type of stuff yet.) """ project_directory_list = Project_Directory_List.get_by_project( session=session, project_id=project.id, kind="objects", exclude_archived=True, directory_ids_to_ignore_list=[directory.id]) if len(project_directory_list) >= 1: """ Realize that labels rely on project default directory so dn't allow this to change yet But can still hide directory if other stuff is not there... Not 100% clear what the side effects of not having a defualt dir are will have to search it. more to think about to do this well ie perhaps labels should be in their own directory by default? """ pass # project.directory_default_id = project_directory_list[0].working_dir_id # session.add(project) else: log['error']["limit"] = "Can't archive default directory." return log directory.archived = True link.archived = True job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager( job=None, session=session, log=log, directory=directory) job_dir_sync_manager.remove_directory_from_all_attached_jobs() # Regenerate project dir cache. project.set_cache_key_dirty(cache_key="directory_list") return log
def perform_sync_events_after_file_transfer(session, source_directory, destination_directory, log, log_sync_events, transfer_action, file, member, new_file, defer_sync, sync_event_manager=None): """ This function is executed after a move/copy of a file. It logs the sync event and calls all the task templates that are observing the destination directory of the copy/move for creating tasks. :param session: :param source_directory: :param destination_directory: :param log_sync_events: :param transfer_action: :param file: :param member: :param new_file: :param defer_sync: :param sync_event_manager: :return: """ if sync_event_manager is None and log_sync_events: sync_event_manager = SyncEventManager.create_sync_event_and_manager( session=session, dataset_source=source_directory, dataset_destination=destination_directory, description='File {} from dataset {} to dataset {}.'.format( transfer_action, source_directory.nickname if source_directory else '--', destination_directory.nickname, ), file=file, new_file_copy=new_file, job=None, input_id=file.input_id, project=file.project, created_task=None, completed_task=None, transfer_action=transfer_action, event_effect_type='file_{}'.format(transfer_action), event_trigger_type='file_operation', status='completed', member_created=member) logger.debug('Created sync_event {}'.format( sync_event_manager.sync_event.id)) # TODO: UPDATE JOBS WHERE DIRECTORY SHOULD BE SYNCED # Note that at this point we pass the source directory even though new file link has been created. # This is because the session has not been committed and new file link still won't be found in query. if not defer_sync: job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager( session=session, log=log, directory=destination_directory, ) # Note we add the source directory here, because file link has not been committed. So the file link # on destination directory still does not exist at this point. That's why we need to provide the source # dir, so validation of incoming directory does not fail when checking the directory the file is coming from. job_dir_sync_manager.add_file_to_all_jobs( file=file, source_dir=source_directory, create_tasks=True, ) else: if log_sync_events and sync_event_manager.sync_event.event_trigger_type == 'file_operation': SyncActionsQueue.enqueue(session, sync_event_manager.sync_event)