コード例 #1
0
ファイル: datasets.py プロジェクト: ankap2/sparc-curation
    def counts(self):
        if not hasattr(self, '_counts'):
            size = 0
            dirs = 0
            files = 0
            need_meta = []
            if not self.is_dir():
                gen = self,

            else:
                gen = self.rchildren

            for c in gen:
                if c.is_dir():
                    dirs += 1
                else:
                    files += 1  # testing for broken symlinks is hard
                    try:
                        maybe_size = c.cache.meta.size
                    except AttributeError as e:
                        log.error(f'no cache or no meta for {c}\n{e}')
                        continue

                    if maybe_size is None:
                        need_meta.append(c)
                    else:
                        size += maybe_size

            if need_meta and self._refresh_on_missing:
                nl = '\n'
                log.info(
                    f'refreshing {len(need_meta)} files with missing metadata in {self}'
                    f'\n{nl.join(_.as_posix() for _ in need_meta)}')
                new_caches = Async(rate=self.rate)(deferred(c.cache.refresh)()
                                                   for c in need_meta)
                for c in new_caches:  # FIXME first time around meta doesn't get updated ??
                    if c is None:
                        continue  # file was deleted (logged previously)

                    if c.meta is None:
                        log.critical(f'missing metdata! {c}')
                        continue

                    size += c.meta.size

            self._counts = dict(size=FileSize(size), dirs=dirs, files=files)

        return self._counts
コード例 #2
0
ファイル: datasets.py プロジェクト: tmsincomb/sparc-curation
    def _abstracted_paths(self, name_prefix, glob_type=None):
        """ A bottom up search for the closest file in the parent directory.
            For datasets, if the bids root and path do not match, use the bids root.
            In the future this needs to be normalized because the extra code required
            for dealing with the intervening node is quite annoying to maintain.
        """
        if glob_type is None:
            glob_type = self.default_glob

        path = self
        if (self.cache and
            self.cache.is_dataset and
            self.bids_root is not None and
            self.bids_root != self):
            path = self.bids_root

        first = name_prefix[0]
        cased_np = '[' + first.upper() + first + ']' + name_prefix[1:]  # FIXME warn and normalize
        glob = getattr(path, glob_type)
        gen = glob(cased_np + '*.*')

        try:
            path = next(gen)
            for path in chain((path,), gen):
                if path.is_broken_symlink():
                    log.info(f'fetching unretrieved metadata path {path.as_posix()!r}'
                             '\nFIXME batch these using async in cli export ...')
                    path.cache.fetch(size_limit_mb=path.cache.meta.size.mb + 1)

                if path.suffix in path.stem:
                    msg = f'path has duplicate suffix {path.as_posix()!r}'
                    self.addError(msg)
                    logd.error(msg)

                if path.name[0].isupper():
                    msg = f'path has bad casing {path.as_posix()!r}'
                    self.addError(msg)
                    logd.error(msg)

                yield path

        except StopIteration:
            if (self.cache.parent.meta is not None and
                self.parent.cache != self.cache.anchor and
                self.parent != self):
                yield from getattr(self.parent, name_prefix + '_paths')
コード例 #3
0
    def _get_protocol_json(self, uri):
        #juri = uri + '.json'
        logd.info(
            uri.identifier if isinstance(uri, idlib.Stream) else uri)  # FIXME
        pi = idlib.get_right_id(uri)
        if 'protocols.io' in pi:
            pioid = pi.slug  # FIXME normalize before we ever get here ...
            log.info(pioid)
        else:
            msg = f'protocol uri is not from protocols.io {pi} {self.id}'
            logd.error(msg)
            self.addError(msg)
            return

        #uri_path = uri.rsplit('/', 1)[-1]
        apiuri = 'https://www.protocols.io/api/v3/protocols/' + pioid
        #'https://www.protocols.io/api/v3/groups/sparc/protocols'
        #apiuri = 'https://www.protocols.io/api/v3/filemanager/folders?top'
        #print(apiuri, header)
        log.debug('going to network for protocols')
        resp = requests.get(apiuri, headers=self._pio_header)
        #log.info(str(resp.request.headers))
        if resp.ok:
            try:
                j = resp.json()  # the api is reasonably consistent
            except BaseException as e:
                log.exception(e)
                breakpoint()
                raise e
            return j
        else:
            try:
                j = resp.json()
                sc = j['status_code']
                em = j['error_message']
                msg = f'protocol issue {uri} {resp.status_code} {sc} {em} {self.id!r}'
                logd.error(msg)
                self.addError(msg)
                # can't return here because of the cache
            except BaseException as e:
                log.exception(e)

            logd.error(f'protocol no access {uri} {self.id!r}')