Esempio n. 1
0
    def insert_new_params(
        cls,
        processing_method: str,
        paramset_id: int,
        paramset_desc: str,
        params: dict,
        processing_method_desc: str = "",
    ):
        ProcessingMethod.insert1({"processing_method": processing_method},
                                 skip_duplicates=True)
        param_dict = {
            "processing_method": processing_method,
            "paramset_id": paramset_id,
            "paramset_desc": paramset_desc,
            "params": params,
            "param_set_hash": dict_to_uuid(params),
        }
        q_param = cls & {"param_set_hash": param_dict["param_set_hash"]}

        if q_param:  # If the specified param-set already exists
            pname = q_param.fetch1("paramset_id")
            if pname == paramset_id:  # If the existed set has the same name: job done
                return
            else:  # If not same name: human error, try adding with different name
                raise dj.DataJointError(
                    "The specified param-set already exists - name: {}".format(
                        pname))
        else:
            cls.insert1(param_dict)
Esempio n. 2
0
 def local_endpoint(self):
     if 'globus.local_endpoint' in dj.config:
         return (dj.config['custom']['globus.local_endpoint'],
                 dj.config['custom']['globus.local_endpoint_subdir'],
                 dj.config['custom']['globus.local_endpoint_local_path'])
     else:
         raise dj.DataJointError("globus_local_endpoint not configured")
Esempio n. 3
0
    def format_value(self, value):
        """format value
        """

        if self.foreign_is_manuallookup:
            if value['existing_entries'] == '<new>':
                value.pop('existing_entries')
                try:
                    self.foreign_table.insert1(
                        self.foreign_table_format_value(value))
                except dj.DataJointError as e:
                    raise dj.DataJointError(
                        "An error occured while inserting into parent table"
                        f" {self.foreign_table.full_table_name}: {e}")
                if self.aliased is None:
                    return {name: value[name] for name in self.names}
                else:
                    return {
                        name: value[self.aliased[name]]
                        for name in self.names
                    }
            elif self.singular:
                return {self.name: value['existing_entries']}
            else:
                values = value['existing_entries'].strip('()').split(', ')
                # strip any quotes
                values = [ele.strip().strip('"').strip("'") for ele in values]
                return dict(zip(self.names, values))

        if self.is_blob:
            value = self._process_blob_value(value)

        # return value
        return {self.name: value}
Esempio n. 4
0
    def format_value(self, value):
        """format value
        """

        if self.foreign_is_manuallookup:
            if value['existing_entries'] == '<new>':
                value.pop('existing_entries')
                try:
                    self.foreign_table.insert1(
                        self.foreign_table_format_value(value)
                    )
                except dj.DataJointError as e:
                    raise dj.DataJointError(
                        "An error occured while inserting into parent table"
                        f" {self.foreign_table.full_table_name}: {e}"
                    )
                if self.aliased is None:
                    value = value[self.name]
                else:
                    value = value[self.aliased[self.name]]
            else:
                value = value['existing_entries']

        if self.attr.is_blob:
            value = self._process_blob_value(value)

        return value
Esempio n. 5
0
    def retrieve1(cls, key):
        '''
        retrieve related files for a given key
        '''
        self = cls()

        # >>> list(key.keys())
        # ['subject_id', 'session', 'trial', 'electrode_group', 'globus_alia

        log.debug(key)
        lep, lep_sub, lep_dir = GlobusStorageLocation().local_endpoint
        log.info('local_endpoint: {}:{} -> {}'.format(lep, lep_sub, lep_dir))

        # get session related information needed for filenames/records
        sinfo = ((lab.WaterRestriction * lab.Subject.proj() *
                  experiment.Session() * experiment.SessionTrial)
                 & key).fetch1()

        h2o = sinfo['water_restriction_number']
        sdate = sinfo['session_date']
        eg = key['electrode_group']
        trial = key['trial']

        # build file locations:
        # fpat: base file pattern for this sessions files
        # gbase: globus-url base path for this sessions files

        fpat = '{}_{}_{}_g0_t{}'.format(h2o, sdate, eg, trial)
        gbase = '/'.join((h2o, str(sdate), str(eg), fpat))

        repname, rep, rep_sub = (GlobusStorageLocation() & key).fetch()[0]

        gsm = self.get_gsm()
        gsm.activate_endpoint(lep)  # XXX: cache this / prevent duplicate RPC?
        gsm.activate_endpoint(rep)  # XXX: cache this / prevent duplicate RPC?

        sfxmap = {
            '.imec.ap.bin': ArchivedRawEphysTrial.ArchivedApChannel,
            '.imec.ap.meta': ArchivedRawEphysTrial.ArchivedApMeta,
            '.imec.lf.bin': ArchivedRawEphysTrial.ArchivedLfChannel,
            '.imec.lf.meta': ArchivedRawEphysTrial.ArchivedLfMeta
        }

        for sfx, cls in sfxmap.items():
            if cls & key:
                log.debug('record found for {} & {}'.format(cls.__name__, key))
                gname = '{}{}'.format(gbase, sfx)

                srcp = '{}:/{}/{}'.format(rep, rep_sub, gname)
                dstp = '{}:/{}/{}'.format(lep, lep_sub, gname)

                log.info('transferring {} to {}'.format(srcp, dstp))

                # XXX: check if exists 1st? (manually or via API copy-checksum)
                if not gsm.cp(srcp, dstp):
                    emsg = "couldn't transfer {} to {}".format(srcp, dstp)
                    log.error(emsg)
                    raise dj.DataJointError(emsg)
    def put(self, obj):
        if obj is None:
            return

        if isinstance(obj, str):
            obj = obj.strip().lower()
        else:
            raise dj.DataJointError(
                f"lookup name '{obj}' must be of type "
                f"'str' and not '{type(obj)}'."
            )

        if not obj.isidentifier():
            raise dj.DataJointError(
                f"lookup name '{obj}' is not an identifier; "
                "it containes characters besides alphanumeric and/or "
                "an underscore."
            )

        return obj
Esempio n. 7
0
def _insert_new_params(tbl_class, param_set_name: str, params: dict):
    param_dict = {'param_set_name': param_set_name,
                  'params': params,
                  'param_set_hash': UUID(dict_to_hash(params))}
    q_param = tbl_class & {'param_set_hash': param_dict['param_set_hash']}

    if q_param:  # If the specified param-set already exists
        pname = q_param.fetch1('param_set_name')
        if pname == param_set_name:  # If the existed set has the same name: job done
            return
        else:  # If not same name: human error, trying to add the same paramset with different name
            raise dj.DataJointError('The specified param-set already exists - name: {}'.format(pname))
    else:
        tbl_class.insert1(param_dict)
Esempio n. 8
0
    def register(self, **kwargs):
        for key, rel in kwargs.items():
            if key in self.store:
                return


            if isinstance(rel, dj.Computed) or \
                    isinstance(rel, dj.Imported) or \
                    isinstance(rel, dj.Part):
                raise dj.DataJointError(
                    "Data should not be entered directly in Computed, Imported, or Subordinate tables."
                )

            class ReturnValue(wtf.Form):
                _rel = rel

                @classmethod
                def append_field(cls, name, field):
                    setattr(cls, name, field)
                    return cls

                def insert(self2, replace=False):
                    rel = self2._rel
                    dat = {}
                    for k, v in self2._fields.items():
                        if v.data is not None and k != 'REFERRER':  # was not specified and is also not required
                            if isinstance(v.data,
                                          datetime.datetime) or isinstance(
                                              v.data, datetime.date):
                                dat[k] = str(v.data)
                            else:
                                dat[k] = v.data
                    rel.insert1(dat, replace=replace)

            ReturnValue.required = OrderedDict()
            for name, attr in rel.heading.attributes.items():
                ReturnValue.append_field(name, field_factory(attr))
                #setattr(ReturnValue, name, field_factory(attr))
                ReturnValue.required[
                    name] = not attr.nullable and attr.default is None
            ReturnValue.append_field(
                'REFERRER',
                wtf.StringField(label='REFERRER', widget=HiddenInput()))
            self.store[key] = ReturnValue
Esempio n. 9
0
    def retrieve1(self, key):
        '''
        retrieve related files for a given key
        '''
        log.debug(key)

        # get remote file information
        linfo = (self * GlobusStorageLocation & key).fetch1()

        rep = linfo['globus_endpoint']
        rep_sub = linfo['globus_path']
        vfile = linfo['video_file_name']

        # get session related information needed for filenames/records
        sinfo = ((lab.WaterRestriction * lab.Subject.proj() * (
            (tracking.TrackingDevice * tracking.Tracking.proj()) & key) *
                  experiment.Session * experiment.SessionTrial)
                 & key).fetch1()

        h2o = sinfo['water_restriction_number']
        sdate_iso = sinfo['session_date'].isoformat()  # YYYY-MM-DD

        # get local endpoint information
        globus_alias = 'raw-video'
        le = GlobusStorageLocation.local_endpoint(globus_alias)
        lep, lep_sub = le['endpoint'], le['endpoint_subdir']

        # build source/destination paths & initiate transfer
        gfile = '{}/{}/{}/{}'.format(h2o, sdate_iso, 'video', vfile)

        srcp = '{}:{}/{}'.format(rep, rep_sub, gfile)  # source path
        dstp = '{}:{}/{}'.format(lep, lep_sub, gfile)  # dset path

        gsm = self.get_gsm()
        gsm.activate_endpoint(lep)  # XXX: cache this / prevent duplicate RPC?
        gsm.activate_endpoint(rep)  # XXX: cache this / prevent duplicate RPC?

        log.info('transferring {} to {}'.format(srcp, dstp))
        if not gsm.cp(dstp, srcp):
            emsg = "couldn't transfer {} to {}".format(srcp, dstp)
            log.error(emsg)
            raise dj.DataJointError(emsg)
Esempio n. 10
0
    def local_endpoint(cls, globus_alias=None):
        '''
        return local endpoint for globus_alias from dj.config
        expects:
          globus.local_endpoints: {
            globus_alias: {
              'endpoint': uuid,  # UUID of local endpoint
              'endpoint_subdir': str,  # unix-style path within endpoint
              'endpoint_path': str  # corresponding local path
          }
        '''
        le = dj.config.get('custom', {}).get('globus.local_endpoints', None)

        if le is None or globus_alias not in le:

            raise dj.DataJointError(
                "globus_local_endpoints for {} not configured".format(
                    globus_alias))

        return le[globus_alias]
Esempio n. 11
0
    def local_endpoint(cls, globus_alias=None):
        '''
        return local endpoint for globus_alias from dj.config
        expects:
          globus.local_endpoints: {
            globus_alias: {
              'endpoint': uuid,  # UUID of local endpoint
              'endpoint_subdir': str,  # unix-style path within endpoint
              'endpoint_path': str  # corresponding local path
          }
        '''
        custom = dj.config.get('custom', None)
        if custom and 'globus.local_endpoints' in custom:
            try:
                return custom['globus.local_endpoints'][globus_alias]
            except KeyError:
                pass

        raise dj.DataJointError(
            "globus_local_endpoints for {} not configured".format(
                globus_alias))
Esempio n. 12
0
    def insert_new_params(cls, processing_method: str, paramset_idx: int,
                          paramset_desc: str, params: dict):
        param_dict = {
            'processing_method': processing_method,
            'paramset_idx': paramset_idx,
            'paramset_desc': paramset_desc,
            'params': params,
            'param_set_hash': UUID(dict_to_hash(params))
        }
        q_param = cls & {'param_set_hash': param_dict['param_set_hash']}

        if q_param:  # If the specified param-set already exists
            pname = q_param.fetch1('param_set_name')
            if pname == paramset_idx:  # If the existed set has the same name: job done
                return
            else:  # If not same name: human error, trying to add the same paramset with different name
                raise dj.DataJointError(
                    'The specified param-set already exists - name: {}'.format(
                        pname))
        else:
            cls.insert1(param_dict)
    def insert_new_params(cls, preprocess_method: str, paramset_idx: int,
                          paramset_desc: str, params: dict):
        param_dict = {
            "preprocess_method": preprocess_method,
            "paramset_idx": paramset_idx,
            "paramset_desc": paramset_desc,
            "params": params,
            "param_set_hash": dict_to_uuid(params),
        }
        q_param = cls & {"param_set_hash": param_dict["param_set_hash"]}

        if q_param:  # If the specified param-set already exists
            pname = q_param.fetch1("paramset_idx")
            if pname == paramset_idx:  # If the existed set has the same name: job done
                return
            else:  # If not same name: human error, trying to add the same paramset with different name
                raise dj.DataJointError(
                    "The specified param-set already exists - name: {}".format(
                        pname))
        else:
            cls.insert1(param_dict)
Esempio n. 14
0
    def insert_new_params(cls, processing_method: str, paramset_idx: int,
                          paramset_desc: str, params: dict):
        param_dict = {
            'clustering_method': processing_method,
            'paramset_idx': paramset_idx,
            'paramset_desc': paramset_desc,
            'params': params,
            'param_set_hash': dict_to_uuid(params)
        }
        param_query = cls & {'param_set_hash': param_dict['param_set_hash']}

        if param_query:  # If the specified param-set already exists
            existing_paramset_idx = param_query.fetch1('paramset_idx')
            if existing_paramset_idx == paramset_idx:  # If the existing set has the same paramset_idx: job done
                return
            else:  # If not same name: human error, trying to add the same paramset with different name
                raise dj.DataJointError(
                    'The specified param-set'
                    ' already exists - paramset_idx: {}'.format(
                        existing_paramset_idx))
        else:
            cls.insert1(param_dict)
Esempio n. 15
0
        def commit(skey, sfiles):
            log.info('commit. skey: {}'.format(skey))

            if not sfiles:
                log.info('commit skipping {}. no files in set'.format(skey))

            # log.debug('sfiles: {}'.format(sfiles))

            h2o, sdate, ftypes = set(), set(), set()

            dftmap = {}  # device:file:trial via load_campath mapping files
            dvfmap = defaultdict(
                lambda: defaultdict(list))  # device:video:file
            dtfmap = defaultdict(
                lambda: defaultdict(list))  # device:trial:file

            for s in sfiles:

                if s['file_type'] == 'tracking-video-trial':
                    dvfmap[s['position']][s['video']].append(s)
                    h2o.add(s['water_restriction_number'])
                    sdate.add(s['session_date'])
                    ftypes.add(s['file_type'])

                if s['file_type'] == 'tracking-video-map':
                    # xfer & load camera:trial map ex: dl55_20190108_side.txtb
                    fsp = s['file_subpath']
                    lsp = '/tmp/' + s['file_subpath'].split('/')[-1]

                    srcp = '{}:{}/{}'.format(rep, rep_sub, fsp)
                    dstp = '{}:{}/{}'.format(lep, lep_sub, lsp)

                    log.info('transferring {} to {}'.format(srcp, dstp))

                    if not gsm.cp(srcp, dstp):  # XXX: check if exists 1st?
                        emsg = "couldn't transfer {} to {}".format(srcp, dstp)
                        log.error(emsg)
                        raise dj.DataJointError(emsg)

                    lfname = lep_dir + lsp  # local filesysem copy location

                    dftmap[s['position']] = TrackingIngest.load_campath(lfname)

            if len(h2o) != 1 or len(sdate) != 1:
                log.info('skipping. bad h2o {} or session date {}'.format(
                    h2o, sdate))
                return

            h2o, sdate = next(iter(h2o)), next(iter(sdate))

            for d in dvfmap:
                if d in dftmap:  # remap video no -> trial
                    dtfmap[d] = {
                        dftmap[d][v]: dict(dvfmap[d][v], trial=dftmap[d][v])
                        for v in dvfmap[d]
                    }
                else:  # assign video no -> trial
                    dtfmap[d] = {
                        k: dict(v, trial=v['video'])
                        for k, v in dvfmap[d].items()
                    }

            # DataSet
            ds_type = 'tracking-video'
            ds_name = '{}_{}_{}'.format(h2o, sdate, ds_type)
            ds_key = {'dataset_name': ds_name, 'globus_alias': globus_alias}

            if (DataSet & ds_key):
                log.info(
                    'DataSet: {} already exists. Skipping.'.format(ds_key))
                return

            DataSet.insert1({
                **ds_key, 'dataset_type': ds_type
            },
                            allow_direct_insert=True)

            # ArchivedSession
            as_key = {
                k: v
                for k, v in smap[skey].items()
                if k in ArchivedSession.primary_key
            }

            ArchivedSession.insert1({
                **as_key, 'globus_alias': globus_alias
            },
                                    allow_direct_insert=True,
                                    skip_duplicates=True)

            for d in dtfmap:

                # ArchivedTrackingVideo
                atv_key = {**as_key, **ds_key, 'tracking_device': tpos_dev[d]}

                ArchivedTrackingVideo.insert1(atv_key,
                                              allow_direct_insert=True)

                for t in dtfmap[d]:
                    for f in dtfmap[d][t]:

                        DataSet.PhysicalFile.insert1({
                            **ds_key,
                            **f
                        },
                                                     allow_direct_insert=True,
                                                     ignore_extra_fields=True)

                        ArchivedTrackingVideo.TrialVideo.insert1(
                            {
                                **atv_key,
                                **ds_key, 'trial': t,
                                'file_subpath': f['file_subpath']
                            },
                            allow_direct_insert=True)
Esempio n. 16
0
    def _insert(self,
                formatted_dict,
                _id=None,
                primary_dict=None,
                check_reserved=True,
                override_update_truth=False,
                **kwargs):
        """insert helper function
        """

        insert_dict = {}

        for key, value in formatted_dict.items():
            if key in self.fields:
                insert_dict.update(self.fields[key].format_value(value))

        if _id is None or kwargs.get('replace', False):
            truth = True
        elif len(self.table & _id) == 0:
            if override_update_truth:
                truth = True
            else:
                raise dj.DataJointError(
                    f'Entry {_id} does not exist; cannot update.')
        else:
            truth = False

        if primary_dict is not None:
            insert_dict = {**primary_dict, **insert_dict}

        primary_dict = {
            key: value
            for key, value in insert_dict.items()
            if key in self.table.primary_key
        }

        jobs = config['schemata'][self.table.database].schema.jobs

        if check_reserved:
            reserved = (jobs
                        & {
                            'table_name': self.table.table_name,
                            'key_hash': key_hash(primary_dict)
                        })
            if reserved:
                raise dj.DataJointError(
                    f"Entry {primary_dict} has been reserved for table "
                    f"{self.table.full_table_name}; "
                    "change your primary key values.")

        if truth:
            try:
                self.table.insert1(insert_dict, **kwargs)
            except dj.DataJointError as e:
                raise dj.DataJointError(
                    "An error occured while inserting into table "
                    f"{self.table.full_table_name}: {e}")
        else:  # editing entries savely
            # DO NOT remove primary keys with new update1 method
            insert_dict = {
                key: value
                for key, value in insert_dict.items() if (
                    # key not in self.table.primary_key
                    # skip updating non-specified files
                    # TODO fix for uploading files
                    not (value is None and
                         (self.fields[key].attr.is_blob
                          or self.fields[key].attr.is_attachment)))
            }
            if insert_dict:
                try:
                    self.table.update1(insert_dict)
                except dj.DataJointError as e:
                    raise dj.DataJointError(
                        "An error occured while updating table "
                        f"{self.table.full_table_name}: {e}")

        return primary_dict
Esempio n. 17
0
    def make(self, key):
        """
        discover files in local endpoint and transfer/register
        """
        log.info('ArchivedVideoFile.make(): {}'.format(key))

        # {'tracking_device': 'Camera 0', 'subject_id': 432572, 'session': 1}

        globus_alias = 'raw-video'
        le = GlobusStorageLocation.local_endpoint(globus_alias)
        lep, lep_sub, lep_dir = (le['endpoint'], le['endpoint_subdir'],
                                 le['endpoint_path'])

        re = (GlobusStorageLocation & {'globus_alias': globus_alias}).fetch1()
        rep, rep_sub = re['globus_endpoint'], re['globus_path']

        log.info('local_endpoint: {}:{} -> {}'.format(lep, lep_sub, lep_dir))
        log.info('remote_endpoint: {}:{}'.format(rep, rep_sub))

        h2o = (lab.WaterRestriction & key).fetch1('water_restriction_number')

        session = (experiment.Session & key).fetch1()
        sdate = session['session_date']
        sdate_sml = "{}{:02d}{:02d}".format(sdate.year, sdate.month, sdate.day)

        dev = (tracking.TrackingDevice & key).fetch1()

        trls = (experiment.SessionTrial & key).fetch(order_by='trial',
                                                     as_dict=True)

        tracking_ingest = self.get_ingest()

        tdev = dev['tracking_device']  # NOQA: notused
        tpos = dev['tracking_position']

        camtrial = '{}_{}_{}.txt'.format(h2o, sdate_sml, tpos)
        vbase = pathlib.Path(lep_dir, h2o, sdate_sml, 'video')
        campath = vbase / camtrial

        if not campath.exists():  # XXX: uses 1st found
            log.warning('trial map {} n/a! skipping.'.format(campath))
            return

        log.info('loading trial map: {}'.format(campath))
        vmap = {
            v: k
            for k, v in tracking_ingest.TrackingIngest.load_campath(
                campath).items()
        }
        log.debug('loaded video map: {}'.format(vmap))

        # add ArchivedSession

        as_key = {
            k: v
            for k, v in key.items() if k in experiment.Session.primary_key
        }
        as_rec = {**as_key, 'globus_alias': globus_alias}

        ArchivedSession.insert1(as_rec,
                                allow_direct_insert=True,
                                skip_duplicates=True)

        # add DataSet

        ds_type = 'tracking-video'
        ds_name = '{}_{}_{}_{}'.format(h2o, sdate.isoformat(), ds_type, tpos)
        ds_key = {'globus_alias': globus_alias, 'dataset_name': ds_name}
        ds_rec = {**ds_key, 'dataset_type': ds_type}

        DataSet.insert1(ds_rec, allow_direct_insert=True)

        # add ArchivedVideoTracking

        vt_key = {**as_key, 'tracking_device': tdev}
        vt_rec = {
            **vt_key, 'globus_alias': globus_alias,
            'dataset_name': ds_name
        }

        self.insert1(vt_rec)

        filetype = 'tracking-video-trial'

        for t in trls:
            trial = t['trial']
            log.info('.. tracking trial {} ({})'.format(trial, t))

            if t['trial'] not in vmap:
                log.warning('trial {} not in video map. skipping!'.format(t))
                continue

            vmatch = '{}_{}_{}-*'.format(h2o, tpos, vmap[trial])
            log.debug('vbase: {}, vmatch: {}'.format(vbase, vmatch))
            vglob = list(vbase.glob(vmatch))

            if len(vglob) != 1:
                emsg = 'incorrect videos found in {}: {}'.format(vbase, vglob)
                log.warning(emsg)
                raise dj.DataJointError(emsg)

            vfile = vglob[0].name
            gfile = '{}/{}/{}/{}'.format(h2o, sdate_sml, 'video',
                                         vfile)  # subpath

            srcp = '{}:{}/{}'.format(lep, lep_sub, gfile)  # source path
            dstp = '{}:{}/{}'.format(rep, rep_sub, gfile)  # dest path

            gsm = self.get_gsm()
            gsm.activate_endpoint(lep)  # XXX: cache / prevent duplicate RPC?
            gsm.activate_endpoint(rep)  # XXX: cache / prevent duplicate RPC?

            log.info('transferring {} to {}'.format(srcp, dstp))

            if not gsm.cp(srcp, dstp):
                emsg = "couldn't transfer {} to {}".format(srcp, dstp)
                log.error(emsg)
                raise dj.DataJointError(emsg)

            pf_key = {**ds_key, 'file_subpath': vfile}
            pf_rec = {**pf_key, 'file_type': filetype}

            DataSet.PhysicalFile.insert1({**pf_rec}, allow_direct_insert=True)

            trk_key = {
                k: v
                for k, v in {
                    **key, 'trial': trial
                }.items() if k in experiment.SessionTrial.primary_key
            }

            tv_rec = {**vt_key, **trk_key, **pf_key}
            self.TrialVideo.insert1({**tv_rec})
Esempio n. 18
0
    def make(self, key):
        """
        discover files in local endpoint and transfer/register
        """

        log.debug(key)
        globus_alias = 'raw-ephys'
        le = GlobusStorageLocation.local_endpoint(globus_alias)
        lep, lep_sub, lep_dir = (le['endpoint'], le['endpoint_subdir'],
                                 le['endpoint_path'])

        re, rep, rep_sub = (GlobusStorageLocation()
                            & {
                                'globus_alias': globus_alias
                            }).fetch1().values()

        log.info('local_endpoint: {}:{} -> {}'.format(lep, lep_sub, lep_dir))

        # Get session related information needed for filenames/records

        sinfo = (
            lab.WaterRestriction * lab.Subject.proj() * experiment.Session()
            & key).fetch1()

        tinfo = ((lab.WaterRestriction * lab.Subject.proj() *
                  experiment.Session() * experiment.SessionTrial)
                 & key).fetch()

        h2o = sinfo['water_restriction_number']
        sdate = sinfo['session_date']

        subdir = pathlib.Path(h2o, str(sdate).replace('-', ''))  # + probeno
        lep_subdir = pathlib.Path(lep_dir, subdir)

        probechoice = [str(i) for i in range(1, 10)]  # XXX: hardcoded

        file_globs = {
            i['file_glob']: i['file_type']
            for i in FileType & "file_type like 'ephys%%'"
        }

        # Process each probe folder

        for lep_probedir in lep_subdir.glob('*'):
            lep_probe = str(lep_probedir.relative_to(lep_subdir))
            if lep_probe not in probechoice:
                log.info('skipping lep_probedir: {} - unexpected name'.format(
                    lep_probedir))
                continue

            lep_matchfiles = {}
            lep_probefiles = lep_probedir.glob('*.*')

            for pf in lep_probefiles:
                pfbase = pf.relative_to(lep_probedir)
                pfmatch = {k: pfbase.match(k) for k in file_globs}
                if any(pfmatch.values()):
                    log.debug('found valid file: {}'.format(pf))
                    lep_matchfiles[pf] = tuple(k for k in pfmatch
                                               if pfmatch[k])
                else:
                    log.debug('skipping non-match file: {}'.format(pf))
                    continue

            # Build/Validate file records

            if not all([len(lep_matchfiles[i]) == 1 for i in lep_matchfiles]):
                # TODO: handle trial + concatenated match case...
                log.warning(
                    'files matched multiple types'.format(lep_matchfiles))
                continue

            type_to_file = {
                file_globs[lep_matchfiles[mf][0]]: mf
                for mf in lep_matchfiles
            }

            ds_key, ds_name, ds_files, ds_trials = (None, None, None, [], [])

            if all(['trial' in t for t in type_to_file]):
                dataset_type = 'ephys-raw-trialized'

                ds_name = '{}_{}_{}'.format(h2o, sdate.isoformat(),
                                            dataset_type)

                ds_key = {
                    'dataset_name': ds_name,
                    'globus_storage_location': globus_alias
                }

                for t in type_to_file:
                    fsp = type_to_file[t].relative_to(lep_dir)
                    dsf = {**ds_key, 'file_subpath': str(fsp)}

                    # e.g : 'tw34_g0_t0.imec.ap.meta' -> *_t(trial).*
                    trial = int(fsp.name.split('_t')[1].split('.')[0])

                    if trial not in tinfo['trial']:
                        log.warning(
                            'unknown trial file: {}. skipping'.format(dsf))
                        continue

                    ds_trials.append({**dsf, 'trial': trial})
                    ds_files.append({**dsf, 'file_type': t})

            elif all(['concat' in t for t in type_to_file]):
                dataset_type = 'ephys-raw-continuous'

                ds_name = '{}_{}_{}'.format(h2o, sdate.isoformat(),
                                            dataset_type)

                ds_key = {
                    'dataset_name': ds_name,
                    'globus_storage_location': globus_alias
                }

                for t in type_to_file:
                    fsp = type_to_file[t].relative_to(lep_dir)
                    ds_files.append({
                        **ds_key, 'file_subpath': str(fsp),
                        'file_type': t
                    })

            else:
                log.warning("couldn't determine dataset type for {}".format(
                    lep_probedir))
                continue

            # Transfer Files

            gsm = self.get_gsm()
            gsm.activate_endpoint(lep)  # XXX: cache / prevent duplicate RPC?
            gsm.activate_endpoint(rep)  # XXX: cache / prevent duplicate RPC?

            DataSet.insert1({
                **ds_key, 'dataset_type': dataset_type
            },
                            allow_direct_insert=True)

            for f in ds_files:
                fsp = ds_files[f]['file_subpath']
                srcp = '{}:{}/{}'.format(lep, lep_sub, fsp)
                dstp = '{}:{}/{}'.format(rep, rep_sub, fsp)

                log.info('transferring {} to {}'.format(srcp, dstp))

                # XXX: check if exists 1st?
                if not gsm.cp(srcp, dstp):
                    emsg = "couldn't transfer {} to {}".format(srcp, dstp)
                    log.error(emsg)
                    raise dj.DataJointError(emsg)

                DataSet.PhysicalFile.insert1({
                    **ds_key,
                    **ds_files[f]
                },
                                             allow_direct_insert=True)

            # Add Records
            ArchivedSession.insert1(
                {
                    **key, 'globus_storage_location': globus_alias
                },
                skip_duplicates=True,
                allow_direct_insert=True)

            ArchivedRawEphys.insert1(
                {
                    **key,
                    **ds_key, 'probe_folder': int(str(lep_probe))
                },
                allow_direct_insert=True)

            if dataset_type == 'ephys-raw-trialized':
                ArchivedRawEphys.ArchivedTrials.insert(
                    [{
                        **key,
                        **t
                    } for t in ds_trials],
                    allow_direct_insert=True)
Esempio n. 19
0
    def make(self, key):
        '''
        determine available files from local endpoint and publish
        (create database records and transfer to globus)
        '''

        # >>> list(key.keys())
        # ['subject_id', 'session', 'trial', 'electrode_group', 'globus_alias']

        log.debug(key)
        globus_alias = 'raw-ephys'
        le = GlobusStorageLocation.local_endpoint(globus_alias)
        lep, lep_sub, lep_dir = (le['endpoint'], le['endpoint_subdir'],
                                 le['endpoint_path'])

        log.info('local_endpoint: {}:{} -> {}'.format(lep, lep_sub, lep_dir))

        # get session related information needed for filenames/records
        sinfo = ((lab.WaterRestriction * lab.Subject.proj() *
                  experiment.Session() * experiment.SessionTrial)
                 & key).fetch1()

        h2o = sinfo['water_restriction_number']
        sdate = sinfo['session_date']
        eg = key['electrode_group']
        trial = key['trial']

        # build file locations:
        # subdir - common subdirectory for globus/native filesystem
        # fpat: base file pattern for this sessions files
        # fbase: filesystem base path for this sessions files
        # gbase: globus-url base path for this sessions files

        subdir = os.path.join(h2o, str(sdate), str(eg))
        fpat = '{}_{}_{}_g0_t{}'.format(h2o, sdate, eg, trial)
        fbase = os.path.join(lep_dir, subdir, fpat)
        gbase = '/'.join((h2o, str(sdate), str(eg), fpat))

        # check for existence of actual files & use to build xfer list
        log.debug('checking {}'.format(fbase))

        ffound = []
        ftypes = RawEphysFileTypes.contents
        for ft in ftypes:
            fname = '{}{}'.format(fbase, ft[1])
            gname = '{}{}'.format(gbase, ft[1])
            if not os.path.exists(fname):
                log.debug('... {}: not found'.format(fname))
                continue

            log.debug('... {}: found'.format(fname))
            ffound.append((
                ft,
                gname,
            ))

        # if files are found, transfer and create publication schema records

        if not len(ffound):
            log.info('no files found for key')
            return

        log.info('found files for key: {}'.format([f[1] for f in ffound]))

        repname, rep, rep_sub = (GlobusStorageLocation()
                                 & {
                                     'globus_alias': globus_alias
                                 }).fetch(limit=1)[0]

        gsm = self.get_gsm()
        gsm.activate_endpoint(lep)  # XXX: cache this / prevent duplicate RPC?
        gsm.activate_endpoint(rep)  # XXX: cache this / prevent duplicate RPC?

        if not self & key:
            log.info('ArchivedRawEphysTrial.insert1()')
            self.insert1({**key, 'globus_alias': globus_alias})

        ftmap = {
            'ap-30kHz': ArchivedRawEphysTrial.ArchivedApChannel,
            'ap-30kHz-meta': ArchivedRawEphysTrial.ArchivedApMeta,
            'lf-2.5kHz': ArchivedRawEphysTrial.ArchivedLfChannel,
            'lf-2.5kHz-meta': ArchivedRawEphysTrial.ArchivedLfMeta
        }

        for ft, gname in ffound:  # XXX: transfer/insert could be batched
            ft_class = ftmap[ft[0]]
            if not ft_class & key:
                srcp = '{}:/{}/{}'.format(lep, lep_sub, gname)
                dstp = '{}:/{}/{}'.format(rep, rep_sub, gname)

                log.info('transferring {} to {}'.format(srcp, dstp))

                # XXX: check if exists 1st? (manually or via API copy-checksum)
                if not gsm.cp(srcp, dstp):
                    emsg = "couldn't transfer {} to {}".format(srcp, dstp)
                    log.error(emsg)
                    raise dj.DataJointError(emsg)

                log.info('ArchivedRawEphysTrial.{}.insert1()'.format(
                    ft_class.__name__))

                ft_class.insert1(key)
Esempio n. 20
0
    def make(self, key):
        '''
        determine available files from local endpoint and publish
        (create database records and transfer to globus)
        '''
        log.info('ArchivedVideoFile.make(): {}'.format(key))

        globus_alias = 'raw-video'
        le = GlobusStorageLocation.local_endpoint(globus_alias)
        lep, lep_sub, lep_dir = (le['endpoint'], le['endpoint_subdir'],
                                 le['endpoint_path'])

        log.info('local_endpoint: {}:{} -> {}'.format(lep, lep_sub, lep_dir))

        h2o = (lab.WaterRestriction & key).fetch1('water_restriction_number')

        trial = key['trial']
        session = (experiment.Session & key).fetch1()
        sdate = session['session_date']
        sdate_iso = sdate.isoformat()  # YYYY-MM-DD
        sdate_sml = "{}{:02d}{:02d}".format(sdate.year, sdate.month, sdate.day)

        trk = (tracking.TrackingDevice *
               (tracking.Tracking & key).proj()).fetch1()

        tdev = trk['tracking_device']  # NOQA: notused
        tpos = trk['tracking_position']

        camtrial = '{}_{}_{}.txt'.format(h2o, sdate_sml, tpos)

        tracking_ingest = self.get_ingest()
        tpaths = tracking_ingest.TrackingDataPath.fetch(as_dict=True)

        campath = None
        tbase, vbase = None, None  # tracking, video session base paths
        for p in tpaths:

            tdat = p['tracking_data_path']

            tbase = pathlib.Path(tdat, h2o, sdate_iso, 'tracking')
            vbase = pathlib.Path(tdat, h2o, sdate_iso, 'video')

            campath = tbase / camtrial

            log.debug('trying camera position trial map: {}'.format(campath))

            if campath.exists():  # XXX: uses 1st found
                break

            log.debug('tracking path {} n/a - skipping'.format(tbase))
            campath = None

        if not campath:
            log.warning('tracking data not found for {} '.format(tpos))
            return

        tmap = tracking_ingest.TrackingIngest.load_campath(campath)

        if trial not in tmap:
            log.warning('nonexistant trial {}.. skipping'.format(trial))
            return

        repname, rep, rep_sub = (GlobusStorageLocation
                                 & {
                                     'globus_alias': globus_alias
                                 }).fetch(limit=1)[0]

        vmatch = '{}_{}_{}-*'.format(h2o, tpos, tmap[trial])
        vglob = list(vbase.glob(vmatch))

        if len(vglob) != 1:  # XXX: error instead of warning?
            log.warning('more than one video found: {}'.format(vglob))
            return

        vfile = vglob[0].name
        gfile = '{}/{}/{}/{}'.format(h2o, sdate_iso, 'video', vfile)  # subpath
        srcp = '{}:{}/{}'.format(lep, lep_sub, gfile)  # source path
        dstp = '{}:{}/{}'.format(rep, rep_sub, gfile)  # dest path

        gsm = self.get_gsm()
        gsm.activate_endpoint(lep)  # XXX: cache this / prevent duplicate RPC?
        gsm.activate_endpoint(rep)  # XXX: cache this / prevent duplicate RPC?

        log.info('transferring {} to {}'.format(srcp, dstp))
        if not gsm.cp(srcp, dstp):
            emsg = "couldn't transfer {} to {}".format(srcp, dstp)
            log.error(emsg)
            raise dj.DataJointError(emsg)

        self.insert1({
            **key, 'globus_alias': globus_alias,
            'video_file_name': vfile
        })