コード例 #1
0
    def _find(self, key, write, fuzzy_for, fuzzy_for_options):

        # Check exact match / write case
        dirname = osp.join(self.path, str(key))
        bk = self.backend_key(dirname)
        if osp.exists(dirname):
            if write:
                if self._can_overwrite(key):
                    return bk
                raise strax.DataExistsError(at=bk)
            return bk
        if write:
            return bk

        if not fuzzy_for and not fuzzy_for_options:
            raise strax.DataNotAvailable

        # Check metadata of all potentially matching data dirs for match...
        for dirname in os.listdir(self.path):
            fn = osp.join(self.path, dirname)
            if not osp.isdir(fn):
                continue
            _run_id, _data_type, _ = dirname.split('_')
            if _run_id != key.run_id or _data_type != key.data_type:
                continue
            # TODO: check for broken data
            metadata = self.backends[0].get_metadata(fn)
            if self._matches(metadata['lineage'], key.lineage, fuzzy_for,
                             fuzzy_for_options):
                return self.backend_key(dirname)

        raise strax.DataNotAvailable
コード例 #2
0
    def _find(self, key, write, allow_incomplete, fuzzy_for,
              fuzzy_for_options):
        """Determine if data exists

        Search the S3 store to see if data is there.
        """
        if fuzzy_for or fuzzy_for_options:
            raise NotImplementedError("Can't do fuzzy with S3")

        key_str = str(key)
        bk = self.backend_key(key_str)

        try:
            self.backends[0].get_metadata(key)
        except ClientError as ex:
            if ex.response['Error']['Code'] == 'NoSuchKey':
                if write:
                    return bk
                else:
                    raise strax.DataNotAvailable
            else:
                raise ex

        if write and not self._can_overwrite(key):
            raise strax.DataExistsError(at=bk)
        return bk
コード例 #3
0
    def _find(self, key, write,
              allow_incomplete, fuzzy_for, fuzzy_for_options):
        dirname = osp.join(self.path, str(key))
        exists = os.path.exists(dirname)
        bk = self.backend_key(dirname)

        if write:
            if exists and not self._can_overwrite(key):
                raise strax.DataExistsError(at=dirname)
            return bk

        if allow_incomplete:
            # Check for incomplete data (only exact matching for now)
            if fuzzy_for or fuzzy_for_options:
                raise NotImplementedError(
                    "Mixing of fuzzy matching and allow_incomplete "
                    "not supported by DataDirectory.")
            tempdirname = dirname + '_temp'
            bk = self.backend_key(tempdirname)
            if osp.exists(tempdirname):
                return bk

        # Check exact match
        if exists and self._folder_matches(dirname, key, None, None):
            return bk

        # Check metadata of all potentially matching data dirs for match...
        for fn in self._subfolders():
            if self._folder_matches(fn, key,
                                    fuzzy_for, fuzzy_for_options):
                return self.backend_key(fn)

        raise strax.DataNotAvailable
コード例 #4
0
    def _find(self, key, write, fuzzy_for, fuzzy_for_options):
        """Determine if data exists

        Search the S3 store to see if data is there.
        """
        if fuzzy_for or fuzzy_for_options:
            raise NotImplementedError("Can't do fuzzy with S3")

        # Check exact match / write case
        key_str = str(key)
        bk = self.backend_key(key_str)

        # See if any objects exist for this key
        objects_list = self.s3.list_objects(Bucket=BUCKET_NAME,
                                            Prefix=key_str)
        if 'Contents' in objects_list:
            if write and not self._can_overwrite(key):
                raise strax.DataExistsError(at=bk)
            return bk
        else:
            # No objects yet...
            if write:
                return bk
            else:
                # If reading and no objects, then problem
                raise strax.DataNotAvailable
コード例 #5
0
    def _find(self, key: strax.DataKey, write, allow_incomplete, fuzzy_for,
              fuzzy_for_options):
        if fuzzy_for or fuzzy_for_options:
            raise NotImplementedError("Can't do fuzzy with RunDB yet.")

        # Check if the run exists
        if self.runid_field == 'name':
            run_query = {'name': key.run_id}
        else:
            run_query = {'number': int(key.run_id)}
        dq = self._data_query(key)
        doc = self.collection.find_one({**run_query, **dq}, projection=dq)
        if doc is None:
            # Data was not found
            if not write:
                raise strax.DataNotAvailable

            output_path = os.path.join(self.new_data_path, str(key))

            if self.new_data_path is not None:
                doc = self.collection.find_one(run_query, projection={'_id'})
                if not doc:
                    raise ValueError(
                        f"Attempt to register new data for non-existing run {key.run_id}"
                    )  # noqa
                self.collection.find_one_and_update(
                    {'_id': doc['_id']},
                    {
                        '$push': {
                            'data': {
                                'location': output_path,
                                'host': self.hostname,
                                'type': key.data_type,
                                'protocol': strax.FileSytemBackend.__name__,
                                # TODO: duplication with metadata stuff elsewhere?
                                'meta': {
                                    'lineage': key.lineage
                                }
                            }
                        }
                    })

            return (strax.FileSytemBackend.__name__, output_path)

        datum = doc['data'][0]

        if write and not self._can_overwrite(key):
            raise strax.DataExistsError(at=datum['location'])

        return datum['protocol'], datum['location']
コード例 #6
0
    def _find(self, key, write,
              allow_incomplete, fuzzy_for, fuzzy_for_options):
        self.raise_if_non_compatible_run_id(key.run_id)
        dirname = osp.join(self.path, str(key))
        exists = os.path.exists(dirname)
        bk = self.backend_key(dirname)

        if write:
            if exists and not self._can_overwrite(key):
                raise strax.DataExistsError(at=dirname)
            return bk

        if allow_incomplete and not exists:
            # Check for incomplete data (only exact matching for now)
            if fuzzy_for or fuzzy_for_options:
                raise NotImplementedError(
                    "Mixing of fuzzy matching and allow_incomplete "
                    "not supported by DataDirectory.")
            tempdirname = dirname + '_temp'
            bk = self.backend_key(tempdirname)
            if osp.exists(tempdirname):
                return bk

        # Check exact match
        if exists and self._folder_matches(dirname, key, None, None):
            return bk

        # Check metadata of all potentially matching data dirs for
        # matches. This only makes sense for fuzzy searches since
        # otherwise we should have had an exact match already. (Also
        # really slows down st.select runs otherwise because we doing an
        # entire search over all the files in self._subfolders for all
        # non-available keys).
        if fuzzy_for or fuzzy_for_options:
            for fn in self._subfolders():
                if self._folder_matches(fn, key, fuzzy_for, fuzzy_for_options):
                    return self.backend_key(fn)

        raise strax.DataNotAvailable
コード例 #7
0
    def _find(self, key: strax.DataKey, write, allow_incomplete, fuzzy_for,
              fuzzy_for_options):
        if fuzzy_for or fuzzy_for_options:
            raise NotImplementedError("Can't do fuzzy with RunDB yet.")

        # Check if the run exists
        if self.runid_field == 'name':
            run_query = {'name': str(key.run_id)}
        else:
            run_query = {'number': int(key.run_id)}

        # Check that we are in rucio backend
        if self.rucio_path is not None:
            rucio_key = self.key_to_rucio_did(key)
            dq = {
                'data': {
                    '$elemMatch': {
                        # TODO can we query smart on the lineage_hash?
                        'type': key.data_type,
                        'did': rucio_key,
                        'protocol': 'rucio'
                    }
                }
            }
            doc = self.collection.find_one({**run_query, **dq}, projection=dq)
            if doc is not None:
                datum = doc['data'][0]
                assert datum.get(
                    'did', ''
                ) == rucio_key, f'Expected {rucio_key} got data on {datum["location"]}'
                backend_name, backend_key = datum[
                    'protocol'], f'{key.run_id}-{key.data_type}-{key.lineage_hash}'
                return backend_name, backend_key

        dq = self._data_query(key)
        doc = self.collection.find_one({**run_query, **dq}, projection=dq)

        if doc is None:
            # Data was not found
            if not write:
                raise strax.DataNotAvailable

            output_path = os.path.join(self.new_data_path, str(key))

            if self.new_data_path is not None:
                doc = self.collection.find_one(run_query, projection={'_id'})
                if not doc:
                    raise ValueError(
                        f"Attempt to register new data for non-existing run {key.run_id}"
                    )  # noqa
                self.collection.find_one_and_update(
                    {'_id': doc['_id']},
                    {
                        '$push': {
                            'data': {
                                'location': output_path,
                                'host': self.hostname,
                                'type': key.data_type,
                                'protocol': strax.FileSytemBackend.__name__,
                                # TODO: duplication with metadata stuff elsewhere?
                                'meta': {
                                    'lineage': key.lineage
                                }
                            }
                        }
                    })

            return (strax.FileSytemBackend.__name__, output_path)

        datum = doc['data'][0]

        if write and not self._can_overwrite(key):
            raise strax.DataExistsError(at=datum['location'])

        return datum['protocol'], datum['location']
コード例 #8
0
ファイル: rundb.py プロジェクト: XENONnT/straxen
    def _find(self, key: strax.DataKey, write, allow_incomplete, fuzzy_for,
              fuzzy_for_options):
        if key.run_id.startswith('_'):
            # Superruns are currently not supprorted..
            raise strax.DataNotAvailable

        if fuzzy_for or fuzzy_for_options:
            warnings.warn(
                "Can't do fuzzy with RunDB yet. Only returning exact matches")

        # Check if the run exists
        if self.runid_field == 'name':
            run_query = {'name': str(key.run_id)}
        else:
            run_query = {'number': int(key.run_id)}

        # Check that we are in rucio backend
        if self.rucio_path is not None:
            rucio_key = key_to_rucio_did(key)
            rucio_available_query = self.available_query[-1]
            dq = {
                'data': {
                    '$elemMatch': {
                        'type': key.data_type,
                        'did': rucio_key,
                        **rucio_available_query,
                    },
                }
            }
            doc = self.collection.find_one({
                **run_query,
                **dq,
            },
                                           projection=dq)
            if doc is not None:
                datum = doc['data'][0]
                error_message = f'Expected {rucio_key} got data on {datum["location"]}'
                if datum.get('did', '') != rucio_key:
                    raise RuntimeError(error_message)
                backend_name = 'RucioLocalBackend'
                backend_key = key_to_rucio_did(key)
                return backend_name, backend_key

        dq = self._data_query(key)
        doc = self.collection.find_one({**run_query, **dq}, projection=dq)

        if doc is None:
            # Data was not found
            if not write:
                raise strax.DataNotAvailable

            output_path = os.path.join(self.new_data_path, str(key))

            if self.new_data_path is not None:
                doc = self.collection.find_one(run_query, projection={'_id'})
                if not doc:
                    raise ValueError(f"Attempt to register new data for"
                                     f" non-existing run {key.run_id}")
                self.collection.find_one_and_update({'_id': doc['_id']}, {
                    '$push': {
                        'data': {
                            'location': output_path,
                            'host': self.hostname,
                            'type': key.data_type,
                            'protocol': strax.FileSytemBackend.__name__,
                            'meta': {
                                'lineage': key.lineage
                            }
                        }
                    }
                })

            return (strax.FileSytemBackend.__name__, output_path)
        datum = doc['data'][0]

        if datum['host'] == 'rucio-catalogue':
            raise strax.DataNotAvailable

        if write and not self._can_overwrite(key):
            raise strax.DataExistsError(at=datum['location'])

        return datum['protocol'], datum['location']