Exemple #1
0
def create_directory(dir_data, session):
    working_dir = WorkingDir()
    working_dir.user_id = dir_data['user'].id
    working_dir.project_id = dir_data['project'].id
    if dir_data.get('jobs_to_sync'):
        working_dir.jobs_to_sync = dir_data.get('jobs_to_sync')
    session.add(working_dir)
    regular_methods.commit_with_rollback(session)
    if dir_data.get('files'):
        file_list = dir_data.get('files')
        for file in file_list:
            WorkingDirFileLink.add(session, working_dir.id, file)
    regular_methods.commit_with_rollback(session)
    return working_dir
Exemple #2
0
    def directory_not_equal_to_status(session,
                                      directory_id,
                                      status="success",
                                      return_type="count"):
        """
        Returns 0 if there are no files equal to status
        otherwise returns count of files != to status
        """

        file_link_sub_query = WorkingDirFileLink.get_sub_query(
            session, directory_id)

        assert file_link_sub_query is not None

        # TODO should we exclude
        # failed ones optionally?...
        # We could do status not in list [failed_flag, success] etc..

        query = session.query(Input).filter(
            Input.file_id == file_link_sub_query.c.file_id,
            Input.status != status, Input.archived != True)

        if return_type == "count":
            return query.count()

        if return_type == "objects":
            return query.all()
Exemple #3
0
def create_label_file(label_file_data, session):
    label_file = File()
    label_file.label = label_file_data.get('label')
    label_file.label_id = label_file_data.get('label').id
    label_file.project_id = label_file_data['project_id']
    label_file.state = label_file_data.get('state', 'added')
    label_file.type = 'label'
    session.add(label_file)
    regular_methods.commit_with_rollback(session)
    project = Project.get_by_id(session, label_file.project_id)
    if project:
        WorkingDirFileLink.add(session, project.directory_default_id,
                               label_file)
        project.refresh_label_dict(session)
    session.add(label_file)
    regular_methods.commit_with_rollback(session)
    return label_file
Exemple #4
0
    def refresh_label_dict(self, session):

        file_list = WorkingDirFileLink.file_list(
            session=session,
            working_dir_id=self.directory_default_id,
            limit=10000000,
            type="label",
            exclude_removed=False)  # eg for permissions
        if not self.label_dict:
            self.label_dict = {}
        self.label_dict['label_file_id_list'] = [file.id for file in file_list]
Exemple #5
0
def build_name_to_file_id_dict(session, directory_id):

    directory_id = int(directory_id)

    sub_query = WorkingDirFileLink.get_sub_query(session=session,
                                                 working_dir_id=directory_id,
                                                 type="label")

    # Could also try and merge on labels or just filter through here

    file_list = session.query(File).filter(File.id == sub_query.c.file_id,
                                           File.state != "removed").all()

    out = {}

    for file in file_list:
        out[file.label.name] = file.id

    return out, True
Exemple #6
0
    def update_file_count_statistic(self, session):
        """
        In theory we could count each file as it gets added
        but that seems prone to off by 1 errors in distributed systems
        context.
        Instead we just query the count,
        and update it here?

        Slight problem is that this statistic could be out if
        the file doesn't get remove properly from the job...

        """

        self.file_count_statistic = WorkingDirFileLink.file_list(
            session=session,
            working_dir_id=self.directory_id,
            counts_only=True,
            limit=None)

        session.add(self)
Exemple #7
0
    def regenerate_preview_file_list(self):
        """
        Returns
            Empty array if no files
            Otherwise array of preview files
            Treating like a "cache" thing so can query this to
            get new ones.

        Thought process is that
            a) We may want to use files for other things
            b) Not clear if we want to use preview image or not
            c) More work to try and parse it into URLs only upfront,
              and not clear of benefit since not actually storing that much data
              and we may *want* to say get a preview of instances or
              something else too...

        Assumes:
            using self.session
            default directory
        """

        preview_file_list = []

        if self.session is None:
            return preview_file_list

        file_list = WorkingDirFileLink.file_list(
            session=self.session,
            working_dir_id=self.directory_default_id,
            limit=3,
            root_files_only=True  # Excludes labels  at time of writing
        )

        if not file_list:
            return preview_file_list

        for file in file_list:
            preview_file_list.append(file.serialize_with_type(self.session))

        return preview_file_list
Exemple #8
0
def new_external_export(session,
                        project,
                        export_id,
                        version=None,
                        working_dir=None,
                        use_request_context=True):
    """
    Create a new export data file

    This is run on first load

    session, session object
    project, project object


    Designed for external consumptions

    returns {"success" : True} if successfully

    Security model
     this is an internal function

     export web DOES the validation
     Job_permissions.check_job_after_project_already_valid()

    """

    logger.info("[Export processor] Started")
    result = False
    start_time = time.time()

    export = session.query(Export).filter(Export.id == export_id).first()

    member = None
    if use_request_context:
        user = User.get(session)

        export.user = user

        if user:
            member = user.member
        else:
            client_id = request.authorization.get('username', None)
            auth = Auth_api.get(session, client_id)
            member = auth.member

    session.add(export)

    if export.source == "task":

        if export.task and export.task.file:
            # Caution export.task not task

            file_list = [export.task.file]

    # While job could be None and still get files
    # if we do have a job id we may want to get
    # files not replaced in the directory yet.
    if export.source == "job":

        file_list = WorkingDirFileLink.file_list(
            session=session,
            limit=None,
            root_files_only=True,
            job_id=export.job_id,
            ann_is_complete=export.ann_is_complete)

    if export.source == "directory":
        # Question, why are we declaring this here?
        # Doesn't really make sense as export already has
        # it when created?
        export.working_dir_id = working_dir.id

        file_list = WorkingDirFileLink.file_list(
            session=session,
            working_dir_id=working_dir.id,
            limit=None,
            root_files_only=True,
            ann_is_complete=export.ann_is_complete)

    result, annotations = annotation_export_core(session=session,
                                                 project=project,
                                                 export=export,
                                                 file_list=file_list)

    if result is False or result is None:
        return False

    filename = generate_file_name_from_export(export, session)

    if export.kind == "Annotations":

        export.yaml_blob_name = settings.EXPORT_DIR + \
                                str(export.id) + filename + '.yaml'

        export.json_blob_name = settings.EXPORT_DIR + \
                                str(export.id) + filename + '.json'

        try:
            yaml_data = yaml.dump(annotations, default_flow_style=False)
            data_tools.upload_from_string(export.yaml_blob_name,
                                          yaml_data,
                                          content_type='text/yaml',
                                          bucket_type='ml')
        except Exception as exception:
            trace_data = traceback.format_exc()
            logger.error("[Export, YAML] {}".format(str(exception)))
            logger.error(trace_data)

        json_data = json.dumps(annotations)
        data_tools.upload_from_string(export.json_blob_name,
                                      json_data,
                                      content_type='text/json',
                                      bucket_type='ml')

    end_time = time.time()
    logger.info("[Export processor] ran in {}".format(end_time - start_time))

    Event.new(kind="export_generation",
              session=session,
              member=member,
              success=result,
              project_id=project.id,
              run_time=end_time - start_time)

    return True, annotations
Exemple #9
0
def annotation_export_core(session, project, export, file_list):
    """
    Generic method to export a file list
    """

    images_dir = settings.PROJECT_IMAGES_BASE_DIR + \
                 str(project.id) + "/"

    export.file_list_length = len(file_list)

    errors_result = check_for_errors(export=export, session=session)
    if errors_result is False:
        return False, None

    # If we build annotations directly then we could return them
    # If tf records than not
    # But some clean up stuff (ie marking complete) that's joint
    # Also not clear where we would be using a return dict of annotations here
    # Ohhh it returns annotations since we upload in YAML or JSON format for that
    # Maybe that should just be part of that process (instead of returning with a
    # seperate flag?)

    annotations = None

    # This is here as it's shared with with masks and
    # not masks, but needs to run before masks if masks

    # So we can have mask values increase in series
    # instead of using ids for example

    # Careful, want to use project default directory for labels for now

    label_file_list = WorkingDirFileLink.file_list(
        session=session,
        working_dir_id=export.project.directory_default_id,
        limit=None,
        type="label")

    if export.kind == "TF Records":
        label_dict = data_tools.label_dict_builder(file_list=label_file_list)

    export_label_map = {}
    for label_file in label_file_list:
        export_label_map[label_file.id] = label_file.label.name

    # TODO masks if not part of TF records is not really handled great right now

    # TODO pass export object to track it?
    """

        Would be good to allow masks for regular records / JSON
        too, but not supported yet, so for now 
        we do the tf records check too.
    """

    if export.masks is True and export.kind == "TF Records":
        # Assumes deep lab style for now?
        semantic_prep = Semantic_segmentation_data_prep()

        semantic_prep.generate_mask_core(session=session,
                                         project=project,
                                         file_list=file_list,
                                         type="joint",
                                         label_dict=label_dict)

    if export.kind == "TF Records":

        export.tf_records_blob_name = settings.EXPORT_DIR + \
                                      str(export.id)

        # Still need to check masks again here
        # To determine what building method we are using?
        if export.masks is True:
            result = data_tools.tf_records_new(
                session=session,
                file_list=file_list,
                project_id=export.project_id,
                method="semantic_segmentation",
                output_blob_dir=export.tf_records_blob_name)

            export.tf_records_blob_name += "/train-0.record"

        if export.masks is False:
            result = data_tools.tf_records_new(
                session=session,
                project_id=export.project_id,
                file_list=file_list,
                method="object_detection",
                label_dict=label_dict,
                output_blob_dir=export.tf_records_blob_name)

            export.tf_records_blob_name += "/tfrecords_0.record"

    if export.kind == "Annotations":
        annotations = {}

        annotations['readme'] = export.serialize_readme()

        annotations['label_map'] = export_label_map
        annotations['label_colour_map'] = build_label_colour_map(
            session, export_label_map)

        # TODO maybe, would like "annotations"
        # To be one layer "deeper" in terms of nesting.
        annotations['export_info'] = export.serialize_for_inside_export_itself(
        )

        # Other / shared stuff
        annotations[
            "attribute_groups_reference"] = build_attribute_groups_reference(
                session=session, project=project)

        # TODO
        # so I guess the "new" yaml one can do it "on demand"
        # if you substitute version for working directory?
        for index, file in enumerate(file_list):

            # Image URL?
            packet = build_packet(
                file=file,
                session=session,
                file_comparison_mode=export.file_comparison_mode)

            # What about by filename?
            # Original filename is not gauranteed to be unique
            # Careful! if this is not unique it will overwrite
            # on export and difficult to debug
            # as it looks like its' working (ie file count is there)
            # but first file is "null"...
            # Prior we used hash here, but in context of a task
            # We may not re hash file (something to look at in future, maybe
            # we do want to hash it...)
            annotations[file.id] = packet

            export.percent_complete = (index / export.file_list_length) * 100

            if index % 10 == 0:
                # TODO would need to commit the session for this to be useful right?
                logger.info("Percent done {}".format(export.percent_complete))
                try_to_commit(session=session)  # push update

    export.status = "complete"
    export.percent_complete = 100

    return True, annotations
Exemple #10
0
def task_template_launch_limits(session, task_template, log):
    """

    """

    # Different permissions depending on conditions ie share type
    # For now don't require billing to be enabled for non market jobs
    #  sending to Market clearly needs billing enabled
    # Future may want to still restrict jobs to paid accounts
    # For now in context of wanting trainer orgs to try it this seems reasonable
    # Potentially a lot to think about here...

    project = task_template.project

    if task_template.share_type == "Market":
        if project.api_billing_enabled is not True:
            log['error'][
                'billing'] = "Please enable billing or select Project / Org for share type. "

    # TODO  Limit count of active jobs? ie default to 3 active jobs?
    # Limit on number of files? ie default to 500 files max per job?

    # Basic info
    # For now this is checked by new job creation
    # so low priorty to double check here
    if task_template.status not in ['draft']:
        log['error']['job_status'] = "Job already launched."

    # Files
    task_template.update_file_count_statistic(session=session)
    attached_dir_list = session.query(JobWorkingDir).filter(
        JobWorkingDir.job_id == task_template.id).all()
    if task_template.file_count_statistic == 0 and len(attached_dir_list) == 0:
        log['error'][
            'attached_dir_list'] = "Must attach at least 1 file or directory"

    if task_template.file_count:
        if task_template.file_count_statistic != task_template.file_count:
            log['error']['file_count'] = str(task_template.file_count_statistic) + " processed files " + \
                                         "does not match set file_count: " + str(task_template.file_count)

    # note we are querying the input table here
    # suspect this is better then getting all the files
    # and doing a query for each to input
    # ie for getting bulk file status?

    # For retrying we may want to not include "removed" files
    # But a challenge here is that we are querying input not other thing
    # Also not sure if this really handles "failed" ones well...

    result = Input.directory_not_equal_to_status(
        session=session, directory_id=task_template.directory_id)

    # TODO may be some cases that this is overbearing / needs to be handled better
    # ie could call directory_not_equal_to_status with return type
    # of "objects" or something...

    print(result)

    if result > 0:
        log['error']['file_status'] = "Files processing. " + \
                                      "Try again in 30-60 minutes."

    # Credentials
    # ie Warn if missing ...
    # ie log['warn']['credentials'] = "No credentials required"

    # TODO if job type is exam check if grants at least one credential?

    # Guides

    if task_template.share_type in ["market"]:
        if task_template.guide_default_id is None:
            log['error']['guide_default'] = "Missing default guide"

    if task_template.type == "Normal":

        if task_template.guide_review_id is None:
            # Default review guide to being same as defualt guide
            # until we can handle this in better way
            task_template.guide_review = task_template.guide_default
            session.add(task_template)

        # Don't log error for now, see above default
        # log['error']['guide_review'] = "Missing review guide"

        # Bid(S)

    # Label check
    label_count = WorkingDirFileLink.file_list(
        session=session,
        working_dir_id=task_template.project.directory_default_id,
        type="label",
        counts_only=True,
    )
    if label_count == 0:
        log['error']['count'] = "Project must have at least 1 label"

    return log
Exemple #11
0
def task_template_label_attach(
    session,
    task_template,
    project_directory=None,
):
    """
    Get label files from project and attach
    to job

    want full project_directory object for label_file_colour_map

    A key part of the rationale here is that a
    job may have labels that are distinct from the project.

    Main point of having this here is flexability that
    if we change the way we represent jobs, we don't have to change
    the "upfront" logic in terms of attach ids.
    ie decouple which ids are attached to a job to whatever muck we
    need to do at launch time / "run" time.

    """

    if task_template.label_mode == "closed_all_available":

        label_file_list_serialized = []

        # Provided
        label_file_list = task_template.label_dict.get('label_file_list')

        if label_file_list:
            file_list = File.get_by_id_list(session, label_file_list)

        else:
            # Temporary fall back for migration
            print("label file list did not exist, using fall back")
            file_list = WorkingDirFileLink.file_list(
                session=session,
                working_dir_id=project_directory.id,
                limit=25,
                type="label")

            # Store for future reference here
            task_template.label_dict['label_file_list'] = [
                file.id for file in file_list
            ]

        # Work
        for file in file_list:
            file_serialized = file.serialize_with_label_and_colour(
                session=session)

            # Make sure time stamps are wrapped in str() to avoid nested json / dict issues

            label_file_list_serialized.append(file_serialized)

        # For debugging issue with serailization here.
        # print(label_file_list_serialized)

        task_template.label_dict[
            'label_file_list_serialized'] = label_file_list_serialized

        # Now in context of users being able to choose labels,
        # We rebuild this on launching

        task_template.label_dict['label_file_colour_map'] = rebuild_label_map(
            file_list)

    return True
Exemple #12
0
    def new(session,
            working_dir_id=None,
            project_id=None,
            file_type=None,
            image_id=None,
            text_file_id=None,
            video_id=None,
            frame_number=None,
            label_id=None,
            colour=None,
            original_filename=None,
            video_parent_file=None,
            input_id=None,
            parent_id=None,
            task=None,
            file_metadata=None):
        """
        "file_added" case

        Given a new image create a new file to track this image
        This assumes a new image is completely new

        We are always creating a new file at init so there will be A
        file, question is if there is a previous file too

        It was confusing it to have two different ways to assign project here
        so remove in favour of just having one.

        Careful with object.id, since if the object can be None it
        won't work as expected then...


        video_parent_file_id issue
            video_parent_file (not id ) FAILs because it
            does NOT exist, we have it as a function
            due to a work around issue with sql alchemy
            so MUST store the actual id

        """
        from shared.database.source_control.working_dir import WorkingDirFileLink

        video_parent_file_id = None
        if video_parent_file:
            video_parent_file_id = video_parent_file.id

        file = File(original_filename=original_filename,
                    image_id=image_id,
                    state="added",
                    type=file_type,
                    project_id=project_id,
                    label_id=label_id,
                    text_file_id=text_file_id,
                    video_id=video_id,
                    video_parent_file_id=video_parent_file_id,
                    frame_number=frame_number,
                    colour=colour,
                    input_id=input_id,
                    parent_id=parent_id,
                    task=task,
                    file_metadata=file_metadata)

        File.new_file_new_frame(file, video_parent_file)

        session.add(file)
        session.flush()

        # Question do we still need to be running this here?
        file.hash_update()

        # Video frames don't need a working dir?
        # Or should we still put them in anyway...
        # in context of video frames
        # we don't want them to be in a working directory directly
        # so we can smoothly move files
        if working_dir_id:
            WorkingDirFileLink.add(session, working_dir_id, file)

        return file
Exemple #13
0
    def create_file_links_for_attached_dirs(self,
                                            sync_only=False,
                                            create_tasks=False,
                                            file_to_link=None,
                                            file_to_link_dataset=None,
                                            related_input=None,
                                            member=None):
        """
            Called once before launch. This function will check all directories
            in JobWorkingDir table
            and create the file links for all the related files. This function
            will create links for both "sync" type and "select". "select" type dirs
            will just be linked once (ie new files added to dir wont be updated),
            sync types dirs will update links on process_media when a new file is attached to the dir
            or when a file is copied or moved to the sync directory.
        :param session:
        :param job:
        :param log:
        :return:

        """

        # Now create a file link for all the files on all the directories on the job and attach them.
        if sync_only:
            directory_list = self.job.get_attached_dirs(session=self.session)

        else:
            directory_list = self.job.get_attached_dirs(
                session=self.session, sync_types=['sync', 'select_once'])
        if len(directory_list) == 0:
            self.log['info'][
                'attached_directories_list'] = 'No directories attached.'
            return directory_list
        if file_to_link is None or file_to_link_dataset is None:
            # Case where we do not provide a single file for sync (i.e no file_to_link or file_to_link_dataset)
            for directory in directory_list:
                if self.job.instance_type in ['text_tokens']:
                    files = WorkingDirFileLink.file_list(
                        self.session,
                        working_dir_id=directory.id,
                        root_files_only=
                        True,  # TODO do we need to get child files too?
                        limit=None,
                        type='text')
                else:
                    files = WorkingDirFileLink.file_list(
                        self.session,
                        working_dir_id=directory.id,
                        root_files_only=
                        True,  # TODO do we need to get child files too?
                        limit=None,
                    )
                for file in files:
                    logger.debug(
                        'Single file sync event with file: {} and folder {}'.
                        format(directory, file))
                    sync_event_manager = SyncEventManager.create_sync_event_and_manager(
                        session=self.session,
                        dataset_source_id=directory.id,
                        dataset_destination=None,
                        description=
                        'Sync file {} from dataset {} to job {} and create task'
                        .format(file.original_filename, directory.nickname,
                                self.job.name),
                        file=file,
                        job=self.job,
                        input=related_input,
                        project=self.job.project,
                        event_effect_type='create_task',
                        event_trigger_type='file_added',
                        status='init',
                        member_created=member)
                    logger.debug('Created sync_event {}'.format(
                        sync_event_manager.sync_event.id))
                    result, log = self.__add_file_into_job(
                        file,
                        directory,
                        create_tasks=create_tasks,
                        sync_event_manager=sync_event_manager)
                    if result is not True:
                        log['error'][
                            'sync_file_dirs'] = 'Error syncing dirs for file id: {}'.format(
                                file.id)
                    if len(log['error'].keys()) > 1:
                        return False, log
        else:
            logger.debug(
                'Single file sync event with file: {} and folder {}'.format(
                    file_to_link_dataset.id, file_to_link.id))
            sync_event_manager = SyncEventManager.create_sync_event_and_manager(
                session=self.session,
                dataset_source_id=file_to_link_dataset.id,
                dataset_destination=None,
                description=
                'Sync file {} from dataset {} to job {} and create task'.
                format(file_to_link.original_filename,
                       file_to_link_dataset.nickname, self.job.name),
                file=file_to_link,
                job=self.job,
                input=related_input,
                project=self.job.project,
                event_effect_type='create_task',
                event_trigger_type='file_added',
                status='init',
                member_created=member)
            logger.debug('Created sync_event {}'.format(
                sync_event_manager.sync_event.id))
            result, log = self.__add_file_into_job(
                file_to_link,
                file_to_link_dataset,
                create_tasks=create_tasks,
                sync_event_manager=sync_event_manager,
            )
            if result is not True:
                log['error'][
                    'sync_file_dirs'] = 'Error syncing dirs for file id: {}'.format(
                        file_to_link.id)
            if len(log['error'].keys()) > 1:
                return False, log
        self.job.update_file_count_statistic(session=self.session)
        return True, self.log
Exemple #14
0
    def __add_file_into_job(self,
                            file: File,
                            incoming_directory: WorkingDir,
                            job: Job = None,
                            create_tasks: bool = False,
                            sync_event_manager=None):
        """
            Given a file, add the link to the job directory and create a task if create_tasks=True.
        :param session:
        :param file:
        :param dir:
        :param job:
        :param log:
        :param create_tasks:
        :return:
        """

        job_obj = self.job
        if job is not None:
            job_obj = job

        result, log = WorkingDirFileLink.file_link_update(
            session=self.session,
            add_or_remove='add',
            incoming_directory=incoming_directory,
            directory=job_obj.directory,
            file_id=file.id,
            job=job_obj,
            log=self.log)
        logger.debug('File {} added to job {}'.format(file.id, job_obj.id))

        if create_tasks is False:
            log['info']['create_tasks flag'] = "create_tasks is False"
            return True, log

        valid_status_to_create_tasks = ['active', 'in_review', 'complete']
        if job_obj.status not in valid_status_to_create_tasks:
            log['info']['job status'] = "not in " + str(
                valid_status_to_create_tasks)
            logger.debug(
                'Job status not active, skipping. Statuses must be one of {}'.
                format(str(valid_status_to_create_tasks)))
            return True, log

        logger.debug('Creating task...')
        potential_existing_task = self.__check_if_task_exists(job=job_obj,
                                                              file=file)
        if potential_existing_task is None:
            task = self.create_task_from_file(
                file, job=job_obj, incoming_directory=incoming_directory)
            task.is_root = True
            logger.debug('New task created. {}'.format(task.id))
        else:
            task = potential_existing_task

        if sync_event_manager:
            sync_event_manager.add_create_task(task)
            sync_event_manager.set_status('completed')
        if result is not True:
            log['error'][
                'create_file_links'] = 'Error creating links for file id: {}'.format(
                    file.id)
            return False, log
        if len(log['error'].keys()) > 1:
            return False, log

        return True, log
def file_transfer_core(
    session,
    source_directory,
    destination_directory,
    transfer_action: str,
    file,
    log: dict,
    member=None,
    copy_instances: bool = False,
    sync_event_manager=None,
    log_sync_events=True,
    defer_sync=False,
    defer_copy=True,
    batch_id=None,
    update_project_for_copy=False,
):
    """

    source_directory and destination_directory are trusted, assumed to be valid here

    copy_instances, bool

    """

    if transfer_action == "copy":
        new_file = File.copy_file_from_existing(
            session=session,
            working_dir=destination_directory,
            orginal_directory_id=source_directory.id
            if source_directory else None,
            existing_file=file,
            copy_instance_list=copy_instances,
            log=log,
            add_link=True,
            remove_link=False,
            flush_session=True,
            defer_copy=defer_copy,
            batch_id=batch_id)
        if defer_copy:
            return log

        perform_sync_events_after_file_transfer(
            session=session,
            source_directory=source_directory,
            destination_directory=destination_directory,
            log=log,
            log_sync_events=log_sync_events,
            transfer_action=transfer_action,
            file=file,
            member=member,
            new_file=new_file,
            defer_sync=defer_sync,
            sync_event_manager=None)

        if not log['info'].get('new_file', []):
            if new_file:
                log['info']['new_file'] = [
                    new_file.serialize_with_type(session)
                ]
        else:
            if new_file:
                log['info']['new_file'].append(
                    new_file.serialize_with_type(session))
        if not log['info'].get('message'):
            log['info']['message'] = 'File Copy Success.'
        return log

    if transfer_action == "move":
        # Get existing link
        link = WorkingDirFileLink.file_link(session=session,
                                            working_dir_id=source_directory.id,
                                            file_id=file.id)
        if link is None:
            log["error"][
                'file_link'] = 'File link of file: {} and workingdir: {}. Does not exists'.format(
                    source_directory.id, file.id)
            return log

        # TODO consider how this effects committed
        # Is it safe to just "update" it this way?
        # SHould this be a built in method of WorkingDirFileLink
        new_link = WorkingDirFileLink.file_link(
            session=session,
            working_dir_id=destination_directory.id,
            file_id=file.id)
        if new_link is not None:
            log["error"][
                'file_link'] = 'File link of file: {} and Destination workingdir: {}. Already Exists'.format(
                    source_directory.id, file.id)
            return log
        link.working_dir_id = destination_directory.id
        session.add(link)

        perform_sync_events_after_file_transfer(
            session=session,
            source_directory=source_directory,
            destination_directory=destination_directory,
            log=log,
            log_sync_events=log_sync_events,
            transfer_action=transfer_action,
            file=file,
            member=member,
            new_file=None,
            defer_sync=defer_sync,
            sync_event_manager=sync_event_manager)
        return log

    if transfer_action == "mirror":

        existing_link = WorkingDirFileLink.file_link(
            session=session,
            working_dir_id=destination_directory.id,
            file_id=file.id)

        if existing_link is not None:
            log["error"][str(file.id)] = "File already in dataset id: " + \
                                         str(destination_directory.id)
            return log

        link = WorkingDirFileLink.add(session=session,
                                      working_dir_id=destination_directory.id,
                                      file=file)
        log["info"][str(file.id)] = True

        return log