def _patch_file(self, download=True):
    """
    Downloads a file from the flatiron, modify it locally, patch it and download it again
    """
    dataset_id = '04abb580-e14b-4716-9ff2-f7b95740b99f'
    dataset = self.one.alyx.rest('datasets', 'read', id=dataset_id)
    # download
    local_file_path = self.one.load(dataset['session'],
                                    dataset_types=dataset['dataset_type'],
                                    download_only=True,
                                    clobber=True)[0]
    # change it
    np.save(local_file_path, ~np.load(local_file_path))
    new_check_sum = hashfile.md5(local_file_path)
    # try once with dry
    self.patcher.patch_dataset(local_file_path,
                               dset_id=dataset['url'][-36:],
                               dry=True)
    self.patcher.patch_dataset(local_file_path,
                               dset_id=dataset['url'][-36:],
                               dry=False)
    # the dataset hash should have been updated
    dataset = self.one.alyx.rest('datasets', 'read', id=dataset_id)
    self.assertEqual(uuid.UUID(dataset['hash']), uuid.UUID(new_check_sum))
    self.assertEqual(dataset['version'], version.ibllib())
    if download:
        # download again and check the hash
        local_file_path.unlink()
        local_file_path = self.one.load(dataset['session'],
                                        dataset_types=dataset['dataset_type'],
                                        download_only=True,
                                        clobber=True)[0]
        self.assertEqual(hashfile.md5(local_file_path), new_check_sum)
Esempio n. 2
0
 def register_dataset(self, file_list, created_by='root', server_repository=None, dry=False):
     """
     Registers a set of files belonging to a session only on the server
     :param session_path:
     :param filenames:
     :param created_by:
     :param server_repository:
     :param dry:
     :return:
     """
     if not isinstance(file_list, list):
         file_list = [Path(file_list)]
     assert len(set([alf.io.get_session_path(f) for f in file_list])) == 1
     assert all([Path(f).exists() for f in file_list])
     session_path = alf.io.get_session_path(file_list[0])
     # first register the file
     r = {'created_by': created_by,
          'path': str(session_path.relative_to((session_path.parents[2]))),
          'filenames': [str(p.relative_to(session_path)) for p in file_list],
          'name': server_repository,
          'server_only': True,
          'hashes': [md5(p) for p in file_list],
          'filesizes': [p.stat().st_size for p in file_list],
          'versions': [version.ibllib() for _ in file_list]}
     if not dry:
         return self.one.alyx.rest('register-file', 'create', data=r)
     else:
         print(r)
Esempio n. 3
0
 def test_registration_session(self):
     behavior_path = self.session_path.joinpath('raw_behavior_data')
     behavior_path.mkdir()
     settings_file = behavior_path.joinpath('_iblrig_taskSettings.raw.json')
     with open(settings_file, 'w') as fid:
         json.dump(MOCK_SESSION_SETTINGS, fid)
     rc = registration.RegistrationClient(one=one)
     rc.register_session(self.session_path)
     eid = one.search(subjects=SUBJECT, date_range=['2018-04-01', '2018-04-01'])[0]
     datasets = one.alyx.rest('datasets', 'list', session=eid)
     for ds in datasets:
         self.assertTrue(ds['hash'] is not None)
         self.assertTrue(ds['file_size'] is not None)
         self.assertTrue(ds['version'] == version.ibllib())
     # checks the procedure of the session
     ses_info = one.alyx.rest('sessions', 'read', id=eid)
     self.assertTrue(ses_info['procedures'] == ['Ephys recording with acute probe(s)'])
     one.alyx.rest('sessions', 'delete', id=eid)
     # re-register the session as behaviour this time
     MOCK_SESSION_SETTINGS['PYBPOD_PROTOCOL'] = '_iblrig_tasks_trainingChoiceWorld6.3.1'
     with open(settings_file, 'w') as fid:
         json.dump(MOCK_SESSION_SETTINGS, fid)
     rc.register_session(self.session_path)
     eid = one.search(subjects=SUBJECT, date_range=['2018-04-01', '2018-04-01'])[0]
     ses_info = one.alyx.rest('sessions', 'read', id=eid)
     self.assertTrue(ses_info['procedures'] == ['Behavior training/tasks'])
 def test_single_registration(self):
     dataset = one.alyx.rest('register-file', 'create', data=r)
     ds = one.alyx.rest('datasets', 'read', id=dataset[0]['id'])
     self.assertEqual(ds['hash'], md5_0)
     self.assertEqual(ds['file_size'], 1234)
     self.assertEqual(ds['version'], version.ibllib())
     self.assertEqual(len(dataset[0]['file_records']), 2)
     one.alyx.rest('datasets', 'delete', id=dataset[0]['id'])
Esempio n. 5
0
def register_dataset(file_list, one=None, created_by=None, repository=None, server_only=False,
                     versions=None, dry=False, max_md5_size=None):
    """
    Registers a set of files belonging to a session only on the server
    :param file_list: (list of pathlib.Path or pathlib.Path)
    :param one: optional (oneibl.ONE), current one object, will create an instance if not provided
    :param created_by: (string) name of user in Alyx (defaults to 'root')
    :param repository: optional: (string) name of the repository in Alyx
    :param server_only: optional: (bool) if True only creates on the Flatiron (defaults to False)
    :param versions: optional (list of strings): versions tags (defaults to ibllib version)
    :param dry: (bool) False by default
    :param verbose: (bool) logs
    :param max_md5_size: (int) maximum file in bytes to compute md5 sum (always compute if Npne)
    defaults to None
    :return:
    """
    if created_by is None:
        created_by = one._par.ALYX_LOGIN
    if file_list is None or file_list == '' or file_list == []:
        return
    elif not isinstance(file_list, list):
        file_list = [Path(file_list)]
    assert len(set([alf.io.get_session_path(f) for f in file_list])) == 1
    assert all([Path(f).exists() for f in file_list])
    if versions is None:
        versions = version.ibllib()
    if isinstance(versions, str):
        versions = [versions for _ in file_list]
    assert isinstance(versions, list) and len(versions) == len(file_list)

    # computing the md5 can be very long, so this is an option to skip if the file is bigger
    # than a certain threshold
    if max_md5_size:
        hashes = [hashfile.md5(p) if
                  p.stat().st_size < max_md5_size else None for p in file_list]
    else:
        hashes = [hashfile.md5(p) for p in file_list]

    session_path = alf.io.get_session_path(file_list[0])
    # first register the file
    r = {'created_by': created_by,
         'path': session_path.relative_to((session_path.parents[2])).as_posix(),
         'filenames': [p.relative_to(session_path).as_posix() for p in file_list],
         'name': repository,
         'server_only': server_only,
         'hashes': hashes,
         'filesizes': [p.stat().st_size for p in file_list],
         'versions': versions}
    if not dry:
        if one is None:
            one = ONE()
        response = one.alyx.rest('register-file', 'create', data=r)
        for p in file_list:
            _logger.info(f"ALYX REGISTERED DATA: {p}")
        return response
Esempio n. 6
0
 def register_images(self, widths=None, function=None, extra_dict=None):
     report_tag = '## report ##'
     snapshot = Snapshot(one=self.one, object_id=self.object_id, content_type=self.content_type)
     jsons = []
     texts = []
     for f in self.outputs:
         json_dict = dict(tag=report_tag, version=version.ibllib(),
                          function=(function or str(self.__class__).split("'")[1]), name=f.stem)
         if extra_dict is not None:
             assert isinstance(extra_dict, dict)
             json_dict.update(extra_dict)
         jsons.append(json_dict)
         texts.append(f"{f.stem}")
     return snapshot.register_images(self.outputs, jsons=jsons, texts=texts, widths=widths)
Esempio n. 7
0
 def patch_dataset(self, path, dset_id=None, dry=False):
     """
     Uploads a dataset from an arbitrary location to FlatIron.
     :param path:
     :param dset_id:
     :param dry:
     :return:
     """
     status = self._patch_dataset(path, dset_id=dset_id, dry=dry)
     if not dry and status == 0:
         self.one.alyx.rest('datasets', 'partial_update', id=dset_id,
                            data={'hash': md5(path),
                                  'file_size': path.stat().st_size,
                                  'version': version.ibllib()}
                            )
 def test_registration_session(self):
     settings = {
         'SESSION_DATE':
         '2018-04-01',
         'SESSION_DATETIME':
         '2018-04-01T12:48:26.795526',
         'PYBPOD_CREATOR':
         ['test_user', 'f092c2d5-c98a-45a1-be7c-df05f129a93c', 'local'],
         'SESSION_NUMBER':
         '002',
         'SUBJECT_NAME':
         'clns0730',
         'PYBPOD_BOARD':
         '_iblrig_mainenlab_behavior_1',
         'PYBPOD_PROTOCOL':
         '_iblrig_tasks_ephysChoiceWorld',
         'IBLRIG_VERSION_TAG':
         '5.4.1',
         'SUBJECT_WEIGHT':
         22,
     }
     with tempfile.TemporaryDirectory() as td:
         # creates the local session
         session_path = Path(td).joinpath('clns0730', '2018-04-01', '002')
         alf_path = session_path.joinpath('alf')
         alf_path.mkdir(parents=True)
         alf_path.joinpath('spikes.times.npy').touch()
         alf_path.joinpath('spikes.amps.npy').touch()
         behavior_path = session_path.joinpath('raw_behavior_data')
         behavior_path.mkdir()
         settings_file = behavior_path.joinpath(
             '_iblrig_taskSettings.raw.json')
         with open(settings_file, 'w') as fid:
             json.dump(settings, fid)
         rc = registration.RegistrationClient(one=one)
         rc.register_session(session_path)
         eid = one.search(subjects='clns0730',
                          date_range=['2018-04-01', '2018-04-01'])[0]
         datasets = one.alyx.get(
             '/datasets?subject=clns0730&date=2018-04-01')
         for ds in datasets:
             self.assertTrue(ds['hash'] is not None)
             self.assertTrue(ds['file_size'] is not None)
             self.assertTrue(ds['version'] == version.ibllib())
         one.alyx.rest('sessions', 'delete', id=eid)
Esempio n. 9
0
one = one.ONE(base_url='https://test.alyx.internationalbrainlab.org', username='******',
              password='******')
SUBJECT = 'clns0730'
USER = '******'

# one = one.ONE(base_url='http://localhost:8000')
# SUBJECT = 'CSP013'
# USER = '******'

md5_0 = 'add2ab27dbf8428f8140-0870d5080c7f'
r = {'created_by': 'olivier',
     'path': f'{SUBJECT}/2018-08-24/002',
     'filenames': ["raw_behavior_data/_iblrig_encoderTrialInfo.raw.ssv"],
     'hashes': [md5_0],
     'filesizes': [1234],
     'versions': [version.ibllib()]}

MOCK_SESSION_SETTINGS = {
    'SESSION_DATE': '2018-04-01',
    'SESSION_DATETIME': '2018-04-01T12:48:26.795526',
    'PYBPOD_CREATOR': [USER,
                       'f092c2d5-c98a-45a1-be7c-df05f129a93c',
                       'local'],
    'SESSION_NUMBER': '002',
    'SUBJECT_NAME': SUBJECT,
    'PYBPOD_BOARD': '_iblrig_mainenlab_behavior_1',
    'PYBPOD_PROTOCOL': '_iblrig_tasks_ephysChoiceWorld',
    'IBLRIG_VERSION_TAG': '5.4.1',
    'SUBJECT_WEIGHT': 22,
}
Esempio n. 10
0
class Task(abc.ABC):
    log = ""
    cpu = 1
    gpu = 0
    io_charge = 5  # integer percentage
    priority = 30  # integer percentage, 100 means highest priority
    ram = 4  # RAM needed to run (Go)
    one = None  # one instance (optional)
    level = 0
    outputs = None
    time_elapsed_secs = None
    time_out_secs = None
    version = version.ibllib()

    def __init__(self, session_path, parents=None, taskid=None, one=None):
        self.taskid = taskid
        self.one = one
        self.session_path = session_path
        self.register_kwargs = {}
        if parents:
            self.parents = parents
        else:
            self.parents = []

    @property
    def name(self):
        return self.__class__.__name__

    def run(self, **kwargs):
        """
        --- do not overload, see _run() below---
        wraps the _run() method with
        -   error management
        -   logging to variable
        """
        # if taskid of one properties are not available, local run only without alyx
        use_alyx = self.one is not None and self.taskid is not None
        if use_alyx:
            self.one.alyx.rest('tasks',
                               'partial_update',
                               id=self.taskid,
                               data={'status': 'Started'})
        # setup
        self.setUp()
        # Setup the console handler with a StringIO object
        log_capture_string = io.StringIO()
        ch = logging.StreamHandler(log_capture_string)
        str_format = '%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s'
        ch.setFormatter(logging.Formatter(str_format))
        _logger.addHandler(ch)
        _logger.info(f"Starting job {self.__class__}")
        # run
        start_time = time.time()
        self.status = 0
        try:
            self.outputs = self._run(**kwargs)
            _logger.info(f"Job {self.__class__} complete")
        except BaseException:
            _logger.error(traceback.format_exc())
            _logger.info(f"Job {self.__class__} errored")
            self.status = -1
        self.time_elapsed_secs = time.time() - start_time
        # log the outputs-+
        if isinstance(self.outputs, list):
            nout = len(self.outputs)
        elif self.outputs is None:
            nout = 0
        else:
            nout = 1
        _logger.info(f"N outputs: {nout}")
        _logger.info(f"--- {self.time_elapsed_secs} seconds run-time ---")
        # after the run, capture the log output
        self.log = log_capture_string.getvalue()
        log_capture_string.close()
        _logger.removeHandler(ch)
        # tear down
        self.tearDown()
        return self.status

    def register_datasets(self, one=None, **kwargs):
        """
        Register output datasets form the task to Alyx
        :param one:
        :param jobid:
        :param kwargs: directly passed to the register_dataset function
        :return:
        """
        assert one
        if self.outputs:
            if isinstance(self.outputs, list):
                versions = [self.version for _ in self.outputs]
            else:
                versions = [self.version]
            return register_dataset(self.outputs,
                                    one=one,
                                    versions=versions,
                                    **kwargs)

    def rerun(self):
        self.run(overwrite=True)

    @abc.abstractmethod
    def _run(self, overwrite=False):
        """
        This is the method to implement
        :param overwrite: (bool) if the output already exists,
        :return: out_files: files to be registered. Could be a list of files (pathlib.Path),
        a single file (pathlib.Path) an empty list [] or None.
        Whithin the pipeline, there is a distinction between a job that returns an empty list
         and a job that returns None. If the function returns None, the job will be labeled as
          "empty" status in the database, otherwise, the job has an expected behaviour of not
          returning any dataset.
        """

    def setUp(self):
        """
        Function to optionally overload to check inputs.
        :return:
        """

    def tearDown(self):
        """
Esempio n. 11
0
desired_statuses = {
    'Task00': 'Complete',
    'Task01_void': 'Empty',
    'Task02_error': 'Errored',
    'Task10': 'Complete',
    'Task11': 'Held',
    'TaskIncomplete': 'Incomplete',
    'TaskGpuLock': 'Waiting'
}

desired_datasets = [
    'spikes.times.npy', 'spikes.amps.npy', 'spikes.clusters.npy'
]
desired_versions = {
    'spikes.times.npy': 'custom_job00',
    'spikes.amps.npy': version.ibllib(),
    'spikes.clusters.npy': version.ibllib()
}
desired_logs = 'Running on machine: testmachine'
desired_logs_rerun = {
    'Task00': 1,
    'Task01_void': 2,
    'Task02_error': 1,
    'Task10': 1,
    'Task11': 1,
    'TaskIncomplete': 1,
    'TaskGpuLock': 2
}


#  job to output a single file (pathlib.Path)
Esempio n. 12
0
    def register_session(self, ses_path, file_list=True):
        """
        Register session in Alyx

        :param ses_path: path to the session
        :param file_list: bool. Set to False will only create the session and skip registration
        :return: Status string on error
        """
        if isinstance(ses_path, str):
            ses_path = Path(ses_path)
        # read meta data from the rig for the session from the task settings file
        settings_json_file = list(
            ses_path.glob(
                '**/raw_behavior_data/_iblrig_taskSettings.raw*.json'))
        if not settings_json_file:
            settings_json_file = list(
                ses_path.glob('**/_iblrig_taskSettings.raw*.json'))
            if not settings_json_file:
                _logger.error(
                    ['could not find _iblrig_taskSettings.raw.json. Abort.'])
                raise ValueError(
                    f'_iblrig_taskSettings.raw.json not found in {ses_path} Abort.'
                )
            _logger.warning(
                [f'Settings found in a strange place: {settings_json_file}'])
        else:
            settings_json_file = settings_json_file[0]
        md = _read_settings_json_compatibility_enforced(settings_json_file)
        # query alyx endpoints for subject, error if not found
        try:
            subject = self.one.alyx.rest('subjects?nickname=' +
                                         md['SUBJECT_NAME'],
                                         'list',
                                         no_cache=True)[0]
        except IndexError:
            _logger.error(
                f"Subject: {md['SUBJECT_NAME']} doesn't exist in Alyx. ABORT.")
            raise alferr.AlyxSubjectNotFound(md['SUBJECT_NAME'])

        # look for a session from the same subject, same number on the same day
        session_id, session = self.one.search(subject=subject['nickname'],
                                              date_range=md['SESSION_DATE'],
                                              number=md['SESSION_NUMBER'],
                                              details=True,
                                              query_type='remote')
        try:
            user = self.one.alyx.rest('users',
                                      'read',
                                      id=md["PYBPOD_CREATOR"][0],
                                      no_cache=True)
        except Exception as e:
            _logger.error(
                f"User: {md['PYBPOD_CREATOR'][0]} doesn't exist in Alyx. ABORT"
            )
            raise e

        username = user['username'] if user else subject['responsible_user']

        # load the trials data to get information about session duration and performance
        ses_data = raw.load_data(ses_path)
        start_time, end_time = _get_session_times(ses_path, md, ses_data)
        n_trials, n_correct_trials = _get_session_performance(md, ses_data)

        # this is the generic relative path: subject/yyyy-mm-dd/NNN
        gen_rel_path = Path(subject['nickname'], md['SESSION_DATE'],
                            '{0:03d}'.format(int(md['SESSION_NUMBER'])))

        # if nothing found create a new session in Alyx
        task_protocol = md['PYBPOD_PROTOCOL'] + md['IBLRIG_VERSION_TAG']
        alyx_procedure = _alyx_procedure_from_task(task_protocol)
        if not session:
            ses_ = {
                'subject': subject['nickname'],
                'users': [username],
                'location': md['PYBPOD_BOARD'],
                'procedures':
                [] if alyx_procedure is None else [alyx_procedure],
                'lab': subject['lab'],
                # 'project': project['name'],
                'type': 'Experiment',
                'task_protocol': task_protocol,
                'number': md['SESSION_NUMBER'],
                'start_time': ibllib.time.date2isostr(start_time),
                'end_time':
                ibllib.time.date2isostr(end_time) if end_time else None,
                'n_correct_trials': n_correct_trials,
                'n_trials': n_trials,
                'json': md,
            }
            session = self.one.alyx.rest('sessions', 'create', data=ses_)
            if md['SUBJECT_WEIGHT']:
                wei_ = {
                    'subject': subject['nickname'],
                    'date_time': ibllib.time.date2isostr(start_time),
                    'weight': md['SUBJECT_WEIGHT'],
                    'user': username
                }
                self.one.alyx.rest('weighings', 'create', data=wei_)
        else:  # TODO: if session exists and no json partial_upgrade it
            session = self.one.alyx.rest('sessions',
                                         'read',
                                         id=session_id[0],
                                         no_cache=True)

        _logger.info(session['url'] + ' ')
        # create associated water administration if not found
        if not session['wateradmin_session_related'] and ses_data:
            wa_ = {
                'subject': subject['nickname'],
                'date_time': ibllib.time.date2isostr(end_time),
                'water_administered': ses_data[-1]['water_delivered'] / 1000,
                'water_type': md.get('REWARD_TYPE') or 'Water',
                'user': username,
                'session': session['url'][-36:],
                'adlib': False
            }
            self.one.alyx.rest('water-administrations', 'create', data=wa_)
        # at this point the session has been created. If create only, exit
        if not file_list:
            return session
        # register all files that match the Alyx patterns, warn user when files are encountered
        rename_files_compatibility(ses_path, md['IBLRIG_VERSION_TAG'])
        F = [
        ]  # empty list whose keys will be relative paths and content filenames
        md5s = []
        file_sizes = []
        for fn in _glob_session(ses_path):
            if fn.suffix in EXCLUDED_EXTENSIONS:
                _logger.debug('Excluded: ', str(fn))
                continue
            if not _check_filename_for_registration(
                    fn, self.registration_patterns):
                _logger.warning('No matching dataset type for: ' + str(fn))
                continue
            if fn.suffix not in self.file_extensions:
                _logger.warning(
                    'No matching dataformat (ie. file extension) for: ' +
                    str(fn))
                continue
            if not _register_bool(fn.name, file_list):
                _logger.debug('Not in filelist: ' + str(fn))
                continue
            try:
                assert (str(gen_rel_path) in str(fn))
            except AssertionError as e:
                strerr = 'ALF folder mismatch: data is in wrong subject/date/number folder. \n'
                strerr += ' Expected ' + str(
                    gen_rel_path) + ' actual was ' + str(fn)
                _logger.error(strerr)
                raise e
            # extract the relative path of the file
            rel_path = Path(str(fn)[str(fn).find(str(gen_rel_path)):])
            F.append(str(rel_path.relative_to(gen_rel_path).as_posix()))
            file_sizes.append(fn.stat().st_size)
            md5s.append(
                hashfile.md5(fn) if fn.stat().st_size < 1024**3 else None)
            _logger.info('Registering ' + str(fn))

        r_ = {
            'created_by': username,
            'path': str(gen_rel_path.as_posix()),
            'filenames': F,
            'hashes': md5s,
            'filesizes': file_sizes,
            'versions': [version.ibllib() for _ in F]
        }
        self.one.alyx.post('/register-file', data=r_)
        return session
Esempio n. 13
0
class Task(abc.ABC):
    log = ""  # place holder to keep the log of the task for registratoin
    cpu = 1  # CPU resource
    gpu = 0  # GPU resources: as of now, either 0 or 1
    io_charge = 5  # integer percentage
    priority = 30  # integer percentage, 100 means highest priority
    ram = 4  # RAM needed to run (Go)
    one = None  # one instance (optional)
    level = 0  # level in the pipeline hierarchy: level 0 means there is no parent task
    outputs = None  # place holder for a list of Path containing output files
    time_elapsed_secs = None
    time_out_secs = 3600 * 2  # time-out after which a task is considered dead
    version = version.ibllib()
    signature = {
        'input_files': [],
        'output_files': []
    }  # list of tuples (filename, collection, required_flag)
    force = False  # whether or not to re-download missing input files on local server if not present

    def __init__(self,
                 session_path,
                 parents=None,
                 taskid=None,
                 one=None,
                 machine=None,
                 clobber=True,
                 location='server'):
        """
        Base task class
        :param session_path: session path
        :param parents: parents
        :param taskid: alyx task id
        :param one: one instance
        :param machine:
        :param clobber: whether or not to overwrite log on rerun
        :param location: location where task is run. Options are 'server' (lab local servers'), 'remote' (remote compute node,
        data required for task downloaded via one), 'AWS' (remote compute node, data required for task downloaded via AWS),
        or 'SDSC' (SDSC flatiron compute node) # TODO 'Globus' (remote compute node, data required for task downloaded via Globus)
        """
        self.taskid = taskid
        self.one = one
        self.session_path = session_path
        self.register_kwargs = {}
        if parents:
            self.parents = parents
        else:
            self.parents = []
        self.machine = machine
        self.clobber = clobber
        self.location = location
        self.plot_tasks = [
        ]  # Plotting task/ tasks to create plot outputs during the task

    @property
    def name(self):
        return self.__class__.__name__

    def run(self, **kwargs):
        """
        --- do not overload, see _run() below---
        wraps the _run() method with
        -   error management
        -   logging to variable
        -   writing a lock file if the GPU is used
        -   labels the status property of the object. The status value is labeled as:
             0: Complete
            -1: Errored
            -2: Didn't run as a lock was encountered
            -3: Incomplete
        """
        # if taskid of one properties are not available, local run only without alyx
        use_alyx = self.one is not None and self.taskid is not None
        if use_alyx:
            # check that alyx user is logged in
            if not self.one.alyx.is_logged_in:
                self.one.alyx.authenticate()
            tdict = self.one.alyx.rest('tasks',
                                       'partial_update',
                                       id=self.taskid,
                                       data={'status': 'Started'})
            self.log = (
                '' if not tdict['log'] else tdict['log'] +
                '\n\n=============================RERUN=============================\n'
            )

        # Setup the console handler with a StringIO object
        logger_level = _logger.level
        log_capture_string = io.StringIO()
        ch = logging.StreamHandler(log_capture_string)
        str_format = '%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s'
        ch.setFormatter(logging.Formatter(str_format))
        _logger.addHandler(ch)
        _logger.setLevel(logging.INFO)
        _logger.info(f"Starting job {self.__class__}")
        if self.machine:
            _logger.info(f"Running on machine: {self.machine}")
        _logger.info(f"running ibllib version {version.ibllib()}")
        # setup
        start_time = time.time()
        try:
            setup = self.setUp(**kwargs)
            _logger.info(f"Setup value is: {setup}")
            self.status = 0
            if not setup:
                # case where outputs are present but don't have input files locally to rerun task
                # label task as complete
                _, self.outputs = self.assert_expected_outputs()
            else:
                # run task
                if self.gpu >= 1:
                    if not self._creates_lock():
                        self.status = -2
                        _logger.info(
                            f"Job {self.__class__} exited as a lock was found")
                        new_log = log_capture_string.getvalue()
                        self.log = new_log if self.clobber else self.log + new_log
                        log_capture_string.close()
                        _logger.removeHandler(ch)
                        return self.status
                self.outputs = self._run(**kwargs)
                _logger.info(f"Job {self.__class__} complete")
        except Exception:
            _logger.error(traceback.format_exc())
            _logger.info(f"Job {self.__class__} errored")
            self.status = -1

        self.time_elapsed_secs = time.time() - start_time
        # log the outputs
        if isinstance(self.outputs, list):
            nout = len(self.outputs)
        elif self.outputs is None:
            nout = 0
        else:
            nout = 1

        _logger.info(f"N outputs: {nout}")
        _logger.info(f"--- {self.time_elapsed_secs} seconds run-time ---")
        # after the run, capture the log output, amend to any existing logs if not overwrite
        new_log = log_capture_string.getvalue()
        self.log = new_log if self.clobber else self.log + new_log
        log_capture_string.close()
        _logger.removeHandler(ch)
        _logger.setLevel(logger_level)
        # tear down
        self.tearDown()
        return self.status

    def register_datasets(self, one=None, **kwargs):
        """
        Register output datasets form the task to Alyx
        :param one:
        :param jobid:
        :param kwargs: directly passed to the register_dataset function
        :return:
        """
        _ = self.register_images()

        return self.data_handler.uploadData(self.outputs, self.version,
                                            **kwargs)

    def register_images(self, **kwargs):
        """
        Registers images to alyx database
        :return:
        """
        if self.one and len(self.plot_tasks) > 0:
            for plot_task in self.plot_tasks:
                try:
                    _ = plot_task.register_images(widths=['orig'])
                except Exception:
                    _logger.error(traceback.format_exc())
                    continue

    def rerun(self):
        self.run(overwrite=True)

    def get_signatures(self, **kwargs):
        """
        This is the default but should be overwritten for each task
        :return:
        """
        self.input_files = self.signature['input_files']
        self.output_files = self.signature['output_files']

    @abc.abstractmethod
    def _run(self, overwrite=False):
        """
        This is the method to implement
        :param overwrite: (bool) if the output already exists,
        :return: out_files: files to be registered. Could be a list of files (pathlib.Path),
        a single file (pathlib.Path) an empty list [] or None.
        Whithin the pipeline, there is a distinction between a job that returns an empty list
         and a job that returns None. If the function returns None, the job will be labeled as
          "empty" status in the database, otherwise, the job has an expected behaviour of not
          returning any dataset.
        """

    def setUp(self, **kwargs):
        """
        Setup method to get the data handler and ensure all data is available locally to run task
        :param kwargs:
        :return:
        """
        if self.location == 'server':
            self.get_signatures(**kwargs)

            input_status, _ = self.assert_expected_inputs(raise_error=False)
            output_status, _ = self.assert_expected(self.output_files,
                                                    silent=True)

            if input_status:
                self.data_handler = self.get_data_handler()
                _logger.info('All input files found: running task')
                return True

            if not self.force:
                self.data_handler = self.get_data_handler()
                _logger.warning(
                    'Not all input files found locally: will still attempt to rerun task'
                )
                # TODO in the future once we are sure that input output task signatures work properly should return False
                # _logger.info('All output files found but input files required not available locally: task not rerun')
                return True
            else:
                # Attempts to download missing data using globus
                _logger.info(
                    'Not all input files found locally: attempting to re-download required files'
                )
                self.data_handler = self.get_data_handler(
                    location='serverglobus')
                self.data_handler.setUp()
                # Double check we now have the required files to run the task
                # TODO in future should raise error if even after downloading don't have the correct files
                self.assert_expected_inputs(raise_error=False)
                return True
        else:
            self.data_handler = self.get_data_handler()
            self.data_handler.setUp()
            self.get_signatures(**kwargs)
            self.assert_expected_inputs()
            return True

    def tearDown(self):
        """
        Function after runs()
        Does not run if a lock is encountered by the task (status -2)
        """
        if self.gpu >= 1:
            if self._lock_file_path().exists():
                self._lock_file_path().unlink()

    def cleanUp(self):
        """
        Function to optionally overload to clean up
        :return:
        """
        self.data_handler.cleanUp()

    def assert_expected_outputs(self, raise_error=True):
        """
        After a run, asserts that all signature files are present at least once in the output files
        Mainly useful for integration tests
        :return:
        """
        assert self.status == 0
        _logger.info('Checking output files')
        everything_is_fine, files = self.assert_expected(self.output_files)

        if not everything_is_fine:
            for out in self.outputs:
                _logger.error(f"{out}")
            if raise_error:
                raise FileNotFoundError(
                    "Missing outputs after task completion")

        return everything_is_fine, files

    def assert_expected_inputs(self, raise_error=True):
        """
        Before running a task, check that all the files necessary to run the task have been downloaded/ are on the local file
        system already
        :return:
        """
        _logger.info('Checking input files')
        everything_is_fine, files = self.assert_expected(self.input_files)

        if not everything_is_fine and raise_error:
            raise FileNotFoundError("Missing inputs to run task")

        return everything_is_fine, files

    def assert_expected(self, expected_files, silent=False):
        everything_is_fine = True
        files = []
        for expected_file in expected_files:
            actual_files = list(
                Path(self.session_path).rglob(
                    str(Path(expected_file[1]).joinpath(expected_file[0]))))
            if len(actual_files) == 0 and expected_file[2]:
                everything_is_fine = False
                if not silent:
                    _logger.error(
                        f"Signature file expected {expected_file} not found")
            else:
                if len(actual_files) != 0:
                    files.append(actual_files[0])

        return everything_is_fine, files

    def get_data_handler(self, location=None):
        """
        Gets the relevant data handler based on location argument
        :return:
        """
        location = location or self.location
        if location == 'local':
            return data_handlers.LocalDataHandler(self.session_path,
                                                  self.signature,
                                                  one=self.one)
        self.one = self.one or ONE()
        if location == 'server':
            dhandler = data_handlers.ServerDataHandler(self.session_path,
                                                       self.signature,
                                                       one=self.one)
        elif location == 'serverglobus':
            dhandler = data_handlers.ServerGlobusDataHandler(self.session_path,
                                                             self.signature,
                                                             one=self.one)
        elif location == 'remote':
            dhandler = data_handlers.RemoteHttpDataHandler(self.session_path,
                                                           self.signature,
                                                           one=self.one)
        elif location == 'AWS':
            dhandler = data_handlers.RemoteAwsDataHandler(self,
                                                          self.session_path,
                                                          self.signature,
                                                          one=self.one)
        elif location == 'SDSC':
            dhandler = data_handlers.SDSCDataHandler(self,
                                                     self.session_path,
                                                     self.signature,
                                                     one=self.one)
        return dhandler

    @staticmethod
    def make_lock_file(taskname="", time_out_secs=7200):
        """Creates a GPU lock file with a timeout of"""
        d = {
            'start': time.time(),
            'name': taskname,
            'time_out_secs': time_out_secs
        }
        with open(Task._lock_file_path(), 'w+') as fid:
            json.dump(d, fid)
        return d

    @staticmethod
    def _lock_file_path():
        """the lock file is in ~/.one/gpu.lock"""
        folder = Path.home().joinpath('.one')
        folder.mkdir(exist_ok=True)
        return folder.joinpath('gpu.lock')

    def _make_lock_file(self):
        """creates a lock file with the current time"""
        return Task.make_lock_file(self.name, self.time_out_secs)

    def is_locked(self):
        """Checks if there is a lock file for this given task"""
        lock_file = self._lock_file_path()
        if not lock_file.exists():
            return False

        with open(lock_file) as fid:
            d = json.load(fid)
        now = time.time()
        if (now - d['start']) > d['time_out_secs']:
            lock_file.unlink()
            return False
        else:
            return True

    def _creates_lock(self):
        if self.is_locked():
            return False
        else:
            self._make_lock_file()
            return True