예제 #1
0
def threaded_job_resync(task_template_id, member_id):
    with sessionMaker.session_scope_threaded() as session:
        log = regular_log.default()
        member = Member.get_by_id(session=session, member_id=member_id)
        task_template = Job.get_by_id(session=session, job_id=task_template_id)
        attached_dirs = task_template.get_attached_dirs(session=session,
                                                        sync_types=['sync'])
        task_list = task_template.task_list(session=session)
        file_ids = [t.file_id for t in task_list]
        missing_files = []
        for directory in attached_dirs:
            files = WorkingDirFileLink.file_list(session=session,
                                                 limit=None,
                                                 working_dir_id=directory.id)
            for file in files:
                if file.id not in file_ids:
                    logger.info(
                        'Resyncing File {} on Job {} From Dir {}'.format(
                            file.id, task_template_id, directory.id))
                    job_sync_dir_manger = job_dir_sync_utils.JobDirectorySyncManager(
                        session=session, job=task_template, log=log)

                    job_sync_dir_manger.create_file_links_for_attached_dirs(
                        sync_only=True,
                        create_tasks=True,
                        file_to_link=file,
                        file_to_link_dataset=directory,
                        related_input=None,
                        member=member)
                    task_template.update_file_count_statistic(session=session)
                    missing_files.append(file)

    logger.info('Resyncing on Job {} Success. {} Missing files synced'.format(
        task_template_id, len(missing_files)))
    return missing_files
예제 #2
0
    def process_sync_actions(self, session, sync_action):
        """
            Executes sync action depending on the type of action
        :param session:
        :param sync_action:
        :return:
        """
        log = regular_log.default()
        sync_event = sync_action.sync_event
        sync_events_manager = SyncEventManager(session=session,
                                               sync_event=sync_event)
        logger.debug('Processing new sync event.')
        if sync_event.event_trigger_type == 'task_completed':
            completed_task = sync_event.completed_task
            job_observable = task_file_observers.JobObservable(
                session=session,
                log=log,
                job=completed_task.job,
                task=completed_task,
                sync_events_manager=sync_events_manager)
            job_observable.notify_all_observers(defer=False)
        elif sync_event.event_trigger_type == 'file_operation':
            logger.debug('Processing file_operation sync event.')
            destination_directory = sync_event.dataset_destination
            source_directory = None
            file = sync_event.file
            if sync_event.event_effect_type in ['file_copy', 'file_move']:
                logger.debug('Processing file_copy sync event.')
                if sync_event.event_effect_type == 'file_copy':
                    # we need to provide the source dir for validation of incoming dir.
                    source_directory = sync_event.dataset_source
                    file = sync_event.new_file_copy

                job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
                    session=session,
                    log=log,
                    directory=destination_directory,
                )
                # we need to provide the source dir, so validation of incoming
                # directory does not fail when checking the directory the file is coming from.
                logger.debug('Syncing file on jobs...')
                job_dir_sync_manager.add_file_to_all_jobs(
                    file=file,
                    source_dir=source_directory,
                    create_tasks=True,
                )
            else:
                logger.info(
                    '{} event effect not supported for processing.'.format(
                        sync_event.event_effect_type))
        else:
            logger.info(
                '{} event trigger not supported for processing.'.format(
                    sync_event.event_trigger_type))
예제 #3
0
    def test__sync_all_jobs_from_dir(self):
        project = self.project_data['project']
        file = data_mocking.create_file({'project_id': project.id},
                                        self.session)
        job1 = data_mocking.create_job({
            'project': project,
            'status': 'active'
        },
                                       session=self.session)
        job2 = data_mocking.create_job({
            'project': project,
            'status': 'active'
        },
                                       session=self.session)
        directory = data_mocking.create_directory(
            {
                'project': project,
                'user': self.project_data['users'][0],
                'files': [file],
                'jobs_to_sync': {
                    'job_ids': [job1.id, job2.id]
                }
            }, self.session)
        for job in [job1, job2]:
            job.update_attached_directories(self.session,
                                            [{
                                                'directory_id': directory.id,
                                                'selected': 'sync'
                                            }])
        log = regular_log.default()
        sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
            session=self.session, log=log, job=None)
        sync_manager._JobDirectorySyncManager__sync_all_jobs_from_dir(
            file, directory, directory, create_tasks=True)

        dir_link = self.session.query(WorkingDirFileLink).filter(
            WorkingDirFileLink.file_id == file.id,
            WorkingDirFileLink.working_dir_id == job1.directory_id)

        dir_link2 = self.session.query(WorkingDirFileLink).filter(
            WorkingDirFileLink.file_id == file.id,
            WorkingDirFileLink.working_dir_id == job2.directory_id)
        self.assertTrue(dir_link.first() is not None)
        self.assertTrue(dir_link2.first() is not None)
        task1 = self.session.query(Task).filter(Task.job_id == job1.id)
        task2 = self.session.query(Task).filter(Task.job_id == job2.id)
        self.assertTrue(task1.first() is not None)
        self.assertTrue(task2.first() is not None)
    def execute_after_launch_strategy(self):
        """
            This strategy will attach files from sync directories and creates tasks in
            Diffgram for each of them.
        :return:
        """
        job_sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
            session=self.session,
            job=self.task_template,
            log=self.log
        )

        job_sync_manager.create_file_links_for_attached_dirs(create_tasks=True)
        # This removes the job from initial file sync queue.
        self.task_template.pending_initial_dir_sync = False
        self.session.add(self.task_template)
        logger.debug('StandardTaskTemplateAfterLaunchStrategy for Task Template ID: {} completed successfully.'.format(
            self.task_template.id))
예제 #5
0
    def test__add_file_into_job(self):
        project = self.project_data['project']
        file = data_mocking.create_file({'project_id': project.id},
                                        self.session)
        job = data_mocking.create_job({'project': project},
                                      session=self.session)
        directory = data_mocking.create_directory(
            {
                'project': project,
                'user': self.project_data['users'][0],
                'files': [file]
            }, self.session)
        log = regular_log.default()
        sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
            session=self.session, log=log, job=job)
        sync_manager._JobDirectorySyncManager__add_file_into_job(
            file, directory, create_tasks=True)
        commit_with_rollback(self.session)

        dir_link = self.session.query(WorkingDirFileLink).filter(
            WorkingDirFileLink.file_id == file.id,
            WorkingDirFileLink.working_dir_id == job.directory_id)
        self.assertTrue(dir_link.first() is not None)
        task = self.session.query(Task).filter(Task.job_id == job.id)
        self.assertTrue(task.first() is None)

        # If job has correct status task should be created.
        job.status = 'active'
        self.session.add(job)
        commit_with_rollback(self.session)

        sync_manager._JobDirectorySyncManager__add_file_into_job(
            file, directory, create_tasks=True)
        task = self.session.query(Task).filter(Task.job_id == job.id)
        self.assertTrue(task.first() is not None)
        commit_with_rollback(self.session)
        # Retest for case of an existing file/task.
        mngr = SyncEventManager.create_sync_event_and_manager(
            session=self.session, status='started')
        sync_manager._JobDirectorySyncManager__add_file_into_job(
            file, directory, create_tasks=True, sync_event_manager=mngr)
        task = self.session.query(Task).filter(Task.job_id == job.id)
        self.assertTrue(task.first() is not None)
예제 #6
0
def task_template_launch_core(session, job):
    """

        This function is in charge of attaching the labels to the job, setting status to active
        and then creating the root tasks for each of the files attached to the job.
    """
    if not job:
        return False
    # TODO other pre checks (ie that guide is attached,
    # has a bid, files, etc.

    # check Status is "launchable" ie in draft

    # Update job status
    log = regular_log.default()
    # CAUTION using default directory for project which may not be right
    result = task_template_label_attach(
        session=session,
        task_template=job,
        project_directory=job.project.directory_default,
    )

    # QUESTION Do we only need to create tasks for "normal work things"?
    # ie for exams it gets done as part of the process
    # QUESTION are these only relevant for normal work? not exam?

    if job.type == "Normal":
        task_template_new_normal(session=session, task_template=job)

    if job.type == "Exam":
        task_template_new_exam(session=session, task_template=job)

    # Add job to all attached directories
    job_sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
        session=session, job=job, log=log)

    assert job is not None

    session.add(job)

    return job
예제 #7
0
 def test_create_task_from_file(self):
     project = self.project_data['project']
     file = data_mocking.create_file({'project_id': project.id},
                                     self.session)
     job = data_mocking.create_job({
         'project': project,
         'status': 'active'
     },
                                   session=self.session)
     directory = data_mocking.create_directory(
         {
             'project': project,
             'user': self.project_data['users'][0],
             'files': [file],
             'jobs_to_sync': {
                 'job_ids': [job.id]
             }
         }, self.session)
     log = regular_log.default()
     dir_list = [{
         'directory_id': directory.id,
         'nickname': directory.nickname,
         'selected': 'sync'
     }]
     job.update_attached_directories(self.session,
                                     dir_list,
                                     delete_existing=True)
     self.session.add(job)
     commit_with_rollback(self.session)
     sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
         session=self.session,
         log=log,
         job=job,
     )
     sync_manager.create_task_from_file(file)
     commit_with_rollback(self.session)
     self.session.flush()
     task = self.session.query(Task).filter(Task.job_id == job.id)
     self.assertTrue(task.first() is not None)
예제 #8
0
    def test_threaded_job_resync(self):
        job = data_mocking.create_job(
            {
                'name': 'my-test-job',
                'status': 'active',
                'project': self.project
            }, self.session)
        auth_api = common_actions.create_project_auth(project=job.project,
                                                      session=self.session)
        file = data_mocking.create_file({'project_id': self.project.id},
                                        self.session)
        file_missing1 = data_mocking.create_file(
            {'project_id': self.project.id}, self.session)
        file_missing2 = data_mocking.create_file(
            {'project_id': self.project.id}, self.session)
        directory = data_mocking.create_directory(
            {
                'project': self.project,
                'user': self.project_data['users'][0],
                'files': [file, file_missing1, file_missing2]
            }, self.session)
        job.update_attached_directories(self.session,
                                        [{
                                            'directory_id': directory.id,
                                            'selected': 'sync'
                                        }])

        log = regular_log.default()
        sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
            session=self.session, log=log, job=job)
        sync_manager._JobDirectorySyncManager__add_file_into_job(
            file, directory, create_tasks=True)
        self.session.commit()

        result = threaded_job_resync(task_template_id=job.id,
                                     member_id=auth_api.member_id)

        self.assertEqual(len(result), 2)
예제 #9
0
 def test_remove_job_from_all_dirs(self):
     project = self.project_data['project']
     file = data_mocking.create_file({'project_id': project.id},
                                     self.session)
     job = data_mocking.create_job({
         'project': project,
         'status': 'active'
     },
                                   session=self.session)
     directory = data_mocking.create_directory(
         {
             'project': project,
             'user': self.project_data['users'][0],
             'files': [file],
             'jobs_to_sync': {
                 'job_ids': [job.id]
             }
         }, self.session)
     log = regular_log.default()
     dir_list = [{
         'directory_id': directory.id,
         'nickname': directory.nickname,
         'selected': 'sync'
     }]
     job.update_attached_directories(self.session,
                                     dir_list,
                                     delete_existing=True)
     self.session.add(job)
     commit_with_rollback(self.session)
     sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
         session=self.session, log=log, job=job, directory=directory)
     sync_manager.remove_job_from_all_dirs(soft_delete=False)
     commit_with_rollback(self.session)
     self.session.flush()
     directory_attachments = self.session.query(JobWorkingDir).filter(
         JobWorkingDir.working_dir_id == directory.id).all()
     self.assertEqual(len(directory_attachments), 0)
예제 #10
0
def job_cancel_core(session, user, log, mode, job_id):
    """

	QUESTIONs 
		option to "hide" job as well?
	
		What about super admin option to actually delete
		(ie for database clean up...)

	Arguments
		session, db ojbect
		user, class User object
		job, class Job object
		log, diffgram regular log dict

	Returns

	"""
    job = Job.get_by_id(session=session, job_id=job_id)

    if user is None or job is None:
        log['error']['user_job'] = "No user or job"
        return False, log

    # JOB LIMITs
    result, log = job_cancel_limits(session, log, user, job, mode)

    if result is False:
        return result, log

    # TASK spcific limits
    # Difference that a job may have tasks that
    # Aren't cancelable
    status_list = None

    if mode in ["cancel"]:
        status_list = ["created", "available", "active"]

    if mode in ["delete"]:
        # Don't allow even a super admin to delete completed
        # from this method?
        # QUESTION
        # For that matter should a "completed" job even be allowed to be deleted?
        status_list = ["draft", "created", "available", "active"]

    # TODO disallow deleting jobs that have
    # any completed tasks / transactions

    if status_list:

        # Just a question, is there really any point of doing this
        # If the the job was cancelled?
        # like maybe for deleting but status I don't know
        task_list = job.task_list(session=session, status_list=status_list)

        for task in task_list:

            if mode == "cancel":
                session.add(task)
                task.status = "cancelled"

            if mode == "delete":
                session.delete(task)

    if mode == "archive":
        # We may want to rename "hidden" to archived?
        session.add(job)
        job.status = 'archived'
        job.hidden = True
        job.member_updated = user.member

        # Assume we want to remove sync dirs on archive, we might remove if that is not the case.
        job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
            job=job, session=session, log=log)
        job_dir_sync_manager.remove_job_from_all_dirs()

    if mode == "cancel":
        session.add(job)
        job.status = "cancelled"
        job.member_updated = user.member

    if mode == "delete":
        """
		Question, is there a better way to do this with
			CASCADE / sql rules?
			It feels a bit funny to do it this way
			BUT also want to be careful since so much reuse!!!
			ie wouldn't want to delete a guide that was 
			attached to a job on cascade
		"""

        # What about a job's directory,
        # TODO what about deleting associated credential links / other tables?

        user_to_job = User_To_Job.get_single_by_ids(session=session,
                                                    user_id=user.id,
                                                    job_id=job.id)

        task_list = job.task_list(session)

        for task in task_list:

            if task.file.type == "video":
                # Is this the right way to delete stuff here?
                video_frame_query = WorkingDirFileLink.image_file_list_from_video(
                    session=session,
                    video_parent_file_id=task.file.id,
                    return_mode="query")
                # Not working yet!
                video_frame_query.delete()

            session.delete(task)
            session.delete(task.file)

        # TODO still getting an integrity error
        # Must be some file that exists related to this job?
        # Or some other file that got updated incorrectly?
        job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
            job=job, session=session, log=log)
        job_dir_sync_manager.remove_job_from_all_dirs(soft_delete=False)
        session.delete(job)
        session.delete(user_to_job)

    return True, log
예제 #11
0
    def test_create_file_links_for_attached_dirs(self):
        project = self.project_data['project']
        file1 = data_mocking.create_file({'project_id': project.id},
                                         self.session)
        file2 = data_mocking.create_file({'project_id': project.id},
                                         self.session)
        job = data_mocking.create_job({
            'project': project,
            'status': 'active'
        },
                                      session=self.session)
        directory1 = data_mocking.create_directory(
            {
                'project': project,
                'user': self.project_data['users'][0],
                'files': [file1],
                'jobs_to_sync': {
                    'job_ids': [job.id]
                }
            }, self.session)

        directory2 = data_mocking.create_directory(
            {
                'project': project,
                'user': self.project_data['users'][0],
                'files': [file2],
                'jobs_to_sync': {
                    'job_ids': [job.id]
                }
            }, self.session)
        log = regular_log.default()
        dir_list = [{
            'directory_id': directory1.id,
            'nickname': directory1.nickname,
            'selected': 'sync'
        }, {
            'directory_id': directory2.id,
            'nickname': directory2.nickname,
            'selected': 'sync'
        }]
        job.update_attached_directories(self.session,
                                        dir_list,
                                        delete_existing=True)
        self.session.add(job)
        self.session.add(directory1)
        self.session.add(directory2)
        commit_with_rollback(self.session)
        sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
            session=self.session,
            log=log,
            job=job,
        )
        sync_manager.create_file_links_for_attached_dirs(create_tasks=True)
        commit_with_rollback(self.session)
        self.session.flush()
        dir_link1 = self.session.query(WorkingDirFileLink).filter(
            WorkingDirFileLink.file_id == file1.id,
            WorkingDirFileLink.working_dir_id == job.directory_id)
        dir_link2 = self.session.query(WorkingDirFileLink).filter(
            WorkingDirFileLink.file_id == file2.id,
            WorkingDirFileLink.working_dir_id == job.directory_id)
        self.assertTrue(dir_link1.first() is not None)
        self.assertTrue(dir_link2.first() is not None)
        task = self.session.query(Task).filter(Task.job_id == job.id)
        task1 = self.session.query(Task).filter(Task.job_id == job.id,
                                                Task.file_id == file1.id)
        task2 = self.session.query(Task).filter(Task.job_id == job.id,
                                                Task.file_id == file2.id)
        self.assertEqual(len(task.all()), 2)
        self.assertTrue(task1.first() is not None)
        self.assertTrue(task2.first() is not None)
예제 #12
0
def update_directory_core(session, project, nickname, mode, directory_id, log):
    """
    TODO thoughts on options to "promote" a directory to default
    or "jump to" a directory for a user based on prior one they looked at?
    (This second idea would perhaps be better in a different area of code
    note sure)
    """

    directory = WorkingDir.get(session=session,
                               directory_id=directory_id,
                               project_id=project.id)

    if directory is None:
        log['error'] = "No directory found"
        return log

    session.add(directory)

    link = Project_Directory_List.link(session=session,
                                       working_dir_id=directory.id,
                                       project_id=project.id)

    session.add(link)

    if mode == "RENAME":

        if not nickname:
            log['error'] = "No nickname provided"
            return log

        directory.nickname = nickname
        link.nickname = nickname
        log['info'] = "Updated Nickname."
        project.set_cache_key_dirty(cache_key="directory_list")

        return log

    if mode == "ARCHIVE":

        if directory.id == project.directory_default_id:
            """
            We may swap default directory to a different one.
            Context that prior we just rejected request
            But in a larger project, especially created from 
            SDK, the default dir just sits there and it make it look funny
            (especailly since we don't have say counts per dir or
             that other type of stuff yet.)
            """

            project_directory_list = Project_Directory_List.get_by_project(
                session=session,
                project_id=project.id,
                kind="objects",
                exclude_archived=True,
                directory_ids_to_ignore_list=[directory.id])
            if len(project_directory_list) >= 1:
                """
                Realize that labels rely on project default directory
                so dn't allow this to change yet
                But can still hide directory if other stuff is not there...
                Not 100% clear what the side effects of not having a defualt dir
                are will have to search it. 
                more to think about to do this well
                ie perhaps labels should be in their own directory by default?
                """
                pass
            # project.directory_default_id = project_directory_list[0].working_dir_id
            # session.add(project)
            else:
                log['error']["limit"] = "Can't archive default directory."
                return log

        directory.archived = True
        link.archived = True
        job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
            job=None, session=session, log=log, directory=directory)

        job_dir_sync_manager.remove_directory_from_all_attached_jobs()
        # Regenerate project dir cache.
        project.set_cache_key_dirty(cache_key="directory_list")

        return log
예제 #13
0
def perform_sync_events_after_file_transfer(session,
                                            source_directory,
                                            destination_directory,
                                            log,
                                            log_sync_events,
                                            transfer_action,
                                            file,
                                            member,
                                            new_file,
                                            defer_sync,
                                            sync_event_manager=None):
    """
    This function is executed after a move/copy of a file. It logs the sync event and calls all the
    task templates that are observing the destination directory of the copy/move for creating tasks.
    :param session:
    :param source_directory:
    :param destination_directory:
    :param log_sync_events:
    :param transfer_action:
    :param file:
    :param member:
    :param new_file:
    :param defer_sync:
    :param sync_event_manager:
    :return:
    """
    if sync_event_manager is None and log_sync_events:
        sync_event_manager = SyncEventManager.create_sync_event_and_manager(
            session=session,
            dataset_source=source_directory,
            dataset_destination=destination_directory,
            description='File {} from dataset {} to dataset {}.'.format(
                transfer_action,
                source_directory.nickname if source_directory else '--',
                destination_directory.nickname,
            ),
            file=file,
            new_file_copy=new_file,
            job=None,
            input_id=file.input_id,
            project=file.project,
            created_task=None,
            completed_task=None,
            transfer_action=transfer_action,
            event_effect_type='file_{}'.format(transfer_action),
            event_trigger_type='file_operation',
            status='completed',
            member_created=member)
        logger.debug('Created sync_event {}'.format(
            sync_event_manager.sync_event.id))
    # TODO: UPDATE JOBS WHERE DIRECTORY SHOULD BE SYNCED
    # Note that at this point we pass the source directory even though new file link has been created.
    # This is because the session has not been committed and new file link still won't be found in query.
    if not defer_sync:
        job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager(
            session=session,
            log=log,
            directory=destination_directory,
        )
        # Note we add the source directory here, because file link has not been committed. So the file link
        # on destination directory still does not exist at this point. That's why we need to provide the source
        # dir, so validation of incoming directory does not fail when checking the directory the file is coming from.
        job_dir_sync_manager.add_file_to_all_jobs(
            file=file,
            source_dir=source_directory,
            create_tasks=True,
        )
    else:
        if log_sync_events and sync_event_manager.sync_event.event_trigger_type == 'file_operation':
            SyncActionsQueue.enqueue(session, sync_event_manager.sync_event)