예제 #1
0
    def dataset(self):
        if self.is_dataset():
            return self

        elif self.parent and self.parent != self:  # Path('.') issue
            log.debug(self.parent)
            return self.parent.dataset
예제 #2
0
    def _protocol_uris_resolved(self):
        # FIXME quite slow ...
        for start_uri in self.protocol_uris:
            log.debug(start_uri)
            try:
                if not hasattr(start_uri, 'dereference'):
                    start_uri = idlib.StreamUri(start_uri)

                end_uri = start_uri.dereference()
                yield end_uri
                sc = end_uri.progenitor.status_code
                if sc > 400:
                    msg = f'error accessing {end_uri} {sc}'
                    if self.addError(msg, blame='submission'):
                        logd.error(msg)

            except idlib.exceptions.ResolutionError as e:
                pass  # FIXME I think we already log this error?
            except self._MissingSchema as e:
                if self.addError(e, blame='submission'):
                    logd.error(e)
            except OntId.BadCurieError as e:
                if self.addError(e, blame='submission'):
                    logd.error(e)
            except BaseException as e:
                #breakpoint()
                log.exception(e)
                log.critical('see exception above')
예제 #3
0
    def update_cache(self):
        log.debug(f'maybe updating cache for {self.name}')
        file_is_different = self.cache._meta_updater(self.meta)
        # update the cache first
        # then move to the new name if relevant
        # prevents moving partial metadata onto existing files
        parent_changed = (hasattr(self._bfobject, 'parent')
                          and self._bfobject.parent != self.cache.parent.id)
        if self.cache.name != self.name or parent_changed:  # this is localy correct
            # the issue is that move is now smarter
            # and will detect if a parent path has changed
            try:
                self.cache.move(remote=self)
            except exc.WhyDidntThisGetMovedBeforeError as e:
                # AAAAAAAAAAAAAAAAAAAAAAAAAAAAA
                # deal with the sadness that is non-unique filenames
                # I am 99.999999999999999% certain that users do not
                # expect this behavior ...
                log.error(e)
                if self.bfobject.package.name != self.bfobject.name:
                    argh = self.bfobject.name
                    self.bfobject.name = self.bfobject.package.name
                    try:
                        log.critical(
                            f'Non unique filename :( '
                            f'{self.cache.name} -> {argh} -> {self.bfobject.name}'
                        )
                        self.cache.move(remote=self)
                    finally:
                        self.bfobject.name = argh
                else:
                    raise e

        return file_is_different
예제 #4
0
    def get(self, uri):
        #juri = uri + '.json'
        logd.info(uri)
        log.debug('going to network for protocols')
        resp = requests.get(uri, headers=self._pio_header)
        #log.info(str(resp.request.headers))
        if resp.ok:
            try:
                j = resp.json()  # the api is reasonably consistent
            except BaseException as e:
                log.exception(e)
                breakpoint()
                raise e
            return j
        else:
            try:
                j = resp.json()
                sc = j['status_code']
                em = j['error_message']
                msg = f'protocol issue {uri} {resp.status_code} {sc} {em} {self.id!r}'
                logd.error(msg)
                self.addError(msg)
                # can't return here because of the cache
            except BaseException as e:
                log.exception(e)

            logd.error(f'protocol no access {uri} {self.id!r}')
예제 #5
0
    def triples_gen(self, subject):
        if not (isinstance(subject, rdflib.URIRef)
                or isinstance(subject, rdflib.BNode)):
            if isinstance(subject, idlib.Stream):
                subject = subject.asType(rdflib.URIRef)
            else:
                subject = rdflib.URIRef(subject)

        #maybe_not_normalized = self.message_passing_key in self._source  # TODO maybe not here?
        for field, value in self._source.items():
            #normalized = not (maybe_not_normalized and field in self._source)  # TODO

            #log.debug(f'{field}: {value}')
            if type(field) is object:
                continue  # the magic helper key for Pipeline
            convert = getattr(self, field, None)
            extra = getattr(self.extra, field, None)
            if convert is not None:
                if isinstance(value, tuple) or isinstance(value, list):
                    values = value
                else:
                    values = value,

                for v in values:
                    log.debug(f'{field} {v} {convert}')
                    try:
                        p, o = convert(v)
                    except exc.NoTripleError as e:
                        continue

                    log.debug(o)
                    if isinstance(o, Expr) or isinstance(o, Quantity):
                        s = rdflib.BNode()
                        yield subject, p, s
                        qt = sparc.Measurement
                        if isinstance(o, Range):
                            yield from o.asRdf(s, quantity_rdftype=qt)
                        elif isinstance(o, Quantity):
                            yield from o.asRdf(s, rdftype=qt)
                        else:
                            log.warning(f'unhanded Expr type {o}')
                            yield from o.asRdf(s)
                    else:
                        yield subject, p, o

                    if extra is not None:
                        yield from extra(v)

            elif field in self.known_skipped:
                pass

            else:
                msg = f'Unhandled {self.__class__.__name__} field: {field}'
                if msg not in self._already_warned:
                    self._already_warned.add(msg)
                    log.warning(msg)
                    self.addError(msg,
                                  pipeline_stage=self.__class__.__name__ +
                                  '.export-error')
예제 #6
0
 def _protocol_uris_resolved(self):
     # FIXME quite slow ...
     for start_uri in self.protocol_uris:
         log.debug(start_uri)
         for end_uri in resolution_chain(start_uri):
             pass
         else:
             yield end_uri
예제 #7
0
    def _derive(data, derives, source_key_optional=True, allow_empty=False):
        # OLD
        """ derives is a list with the following structure
            [[[source-path, ...], derive-function, [target-path, ...]], ...]

        """
        # TODO this is an implementaiton of copy that has semantics for handling lists
        for source_path, function, target_paths in derives:
            source_prefixes = source_path[:-1]
            source_key = source_path[-1]
            source = data
            failed = False
            for i, node_key in enumerate(source_prefixes):
                log.debug(lj(source))
                if node_key in source:
                    source = source[node_key]
                else:
                    msg = f'did not find {node_key} in {source.keys()}'
                    if not i:
                        log.error(msg)
                        failed = True
                        break
                    raise exc.NoSourcePathError(msg)
                if isinstance(source, list) or isinstance(source, tuple):
                    new_source_path = source_prefixes[i + 1:] + [source_key]
                    new_target_paths = [tp[i + 1:] for tp in target_paths]
                    new_derives = [(new_source_path, function, new_target_paths)]
                    for sub_source in source:
                        _DictTransformer.derive(sub_source, new_derives,
                                                source_key_optional=source_key_optional)

                    return  # no more to do here

            if failed:
                continue  # sometimes things are missing we continue to others

            if source_key not in source:
                msg = f'did not find {source_key} in {source.keys()}'
                if source_key_optional:
                    return logd.info(msg)
                else:
                    raise exc.NoSourcePathError(msg)

            source_value = source[source_key]

            new_values = function(source_value)
            if len(new_values) != len(target_paths):
                log.debug(f'{source_paths} {target_paths}')
                raise TypeError(f'wrong number of values returned for {function}\n'
                                f'was {len(new_values)} expect {len(target_paths)}')
            #temp = b'__temporary'
            #data[temp] = {}  # bytes ensure no collisions
            for target_path, value in zip(target_paths, new_values):
                if (not allow_empty and
                    (value is None or
                     hasattr(value, '__iter__') and not len(value))):
                    raise ValueError(f'value to add to {target_path} may not be empty!')
                adops.add(data, target_path, value, fail_on_exists=True)
예제 #8
0
        def protocol_url_or_doi(self, value):
            #_, s = self.c.protocol_url_or_doi(value)
            #yield s, rdf.type, owl.NamedIndividual
            #yield s, rdf.type, sparc.Protocol
            log.debug(value)
            if not isinstance(value, idlib.Pio):
                if isinstance(value, idlib.Doi):
                    try:
                        t = None
                        for t in value.triples_gen:
                            yield t
                    except idlib.exc.RemoteError as e:
                        if t is None:
                            # we already logged this error during id dereferencing
                            return

                    ds, _, _ = t
                    try:
                        pioid = value.dereference(asType=idlib.Pio)
                        s = self.c.l(pioid)
                        yield ds, TEMP.dereferencesTo, s
                        yield s, TEMP.hasDoi, ds
                    except idlib.exc.MalformedIdentifierError as e:
                        log.warning(e)
                        return
                else:
                    try:
                        pioid = idlib.Pio(
                            value
                        )  # FIXME :/ should be handled in Pio directly probably?
                    except idlib.exc.MalformedIdentifierError as e:
                        logd.warning(e)
                        return
            else:
                pioid = value

            try:
                pioid_int = pioid.uri_api_int
                s = self.c.l(pioid_int)
                yield from pioid_int.triples_gen
                # FIXME needs to be a pipeline so that we can export errors
                try:
                    data = pioid.data()
                except (OntId.BadCurieError,
                        idlib.exc.MalformedIdentifierError) as e:
                    loge.error(e)  # FIXME export errors ...
                    data = None
            except idlib.exc.RemoteError as e:  # FIXME sandbox violation
                loge.exception(e)
                s = self.c.l(pioid)
                data = None

            yield s, rdf.type, sparc.Protocol

            if data:
                yield s, rdfs.label, rdflib.Literal(pioid.label)
                nsteps = len(data['steps'])
                yield s, TEMP.protocolHasNumberOfSteps, rdflib.Literal(nsteps)
예제 #9
0
 def get_file_by_url(cls, url):
     """ NOTE THAT THE FIRST YIELD IS HEADERS """
     resp = requests.get(url, stream=True)
     headers = resp.headers
     yield headers
     log.debug(f'reading from {url}')
     for chunk in resp.iter_content(chunk_size=4096):  # FIXME align chunksizes between local and remote
         if chunk:
             yield chunk
예제 #10
0
    def pull(
        self,
        *args,
        paths=None,
        time_now=None,
        debug=False,
        n_jobs=12,
        cache_anchor=None,
        log_name=None,
        log_level='INFO',
        # pass in Parallel in at run time if needed
        Parallel=None,
        delayed=None,
        _in_parallel=False,
        exclude_uploaded=True,
    ):
        # TODO usage errors

        if time_now is None:
            time_now = GetTimeNow()
            log.debug('No time provided to pull so using '
                      f'{time_now.START_TIMESTAMP}')

        if _in_parallel:
            _log = logging.getLogger(log_name)
            _log.setLevel(log_level)
            rc = self._remote_class
            if not hasattr(rc, '_cache_anchor'):
                rc.anchorTo(cache_anchor)

        else:
            _log = log

        cache = self.cache

        if cache.is_organization():
            if debug or Parallel is None or n_jobs <= 1:
                for child in self.children:
                    if paths is None or child in paths:
                        child.pull()
            else:
                Parallel(n_jobs=n_jobs)(delayed(child.pull)(
                    _in_parallel=True,
                    time_now=time_now,
                    cache_anchor=cache.anchor,
                    log_name=_log.name,
                    log_level=log_level,
                    exclude_uploaded=exclude_uploaded,
                ) for child in self.children
                                        if paths is None or child in paths)

        elif cache.is_dataset():
            self._pull_dataset(
                time_now, exclude_uploaded)  # XXX actual pull happens in here

        else:
            raise NotImplementedError(self)
예제 #11
0
    def update_cache(self):
        log.debug(f'updating cache for {self.name}')
        if self.cache.name != self.name:  # this is localy correct
            # the issue is that move is now smarter
            # and will detect if a parent path has changed
            self.cache.move(remote=self)

        file_is_different = self.cache._meta_updater(self.meta)
        return file_is_different
예제 #12
0
 def fromId(cls, identifier, cache_class, local_class):
     # FIXME decouple class construction for identifier binding
     # _api is not required at all and can be bound explicitly later
     api = cls._api_class(identifier)
     self = RemoteFactory.__new__(cls, local_class, cache_class, _api=api)
     self._errors = []
     self.root = self._api.root
     log.debug(
         'When initializing a remote using fromId be sure to set the cache anchor '
         'before doing anything else, otherwise you will have a baaad time')
     return self
예제 #13
0
 def allOf(obj):
     for o in obj['allOf']:
         if '$ref' in o:
             ref = o['$ref']
             if ref in types:
                 yield types[ref]
             else:
                 jpath = ref_to_list(ref)
                 no = adops.get(schema, jpath)
                 yield top(jpath[-1], no)
         else:
             log.debug(f'{obj}')
예제 #14
0
 def etag(self):
     """ NOTE returns checksum, count since it is an etag"""
     # FIXME rename to etag in the event that we get proper checksumming ??
     if hasattr(self.bfobject, 'checksum'):
         checksum = self.bfobject.checksum
         if checksum and '-' in checksum:
             log.debug(checksum)
             if isinstance(checksum, str):
                 checksum, strcount = checksum.rsplit('-', 1)
                 count = int(strcount)
                 #if checksum[-2] == '-':  # these are 34 long, i assume the -1 is a check byte?
                 #return bytes.fromhex(checksum[:-2])
                 return bytes.fromhex(checksum), count
예제 #15
0
    def subpipeline(data,
                    runtime_context,
                    subpipelines,
                    update=True,
                    source_key_optional=True,
                    lifters=None):
        """
            [[[[get-path, add-path], ...], pipeline-class, target-path], ...]

            NOTE: this function is a generator, you have to express it!
        """
        class DataWrapper:
            def __init__(self, data):
                self.data = data

        prepared = []
        for get_adds, pipeline_class, target_path in subpipelines:
            selected_data = {}
            ok = True
            for get_path, add_path in get_adds:
                try:
                    value = adops.get(data, get_path)
                    if add_path is not None:
                        adops.add(selected_data, add_path, value)
                    else:
                        selected_data = value
                except exc.NoSourcePathError as e:
                    if source_key_optional:
                        yield get_path, e, pipeline_class
                        ok = False
                        break  # breaks the inner loop
                    else:
                        raise e

            if not ok:
                continue

            log.debug(lj(selected_data))
            prepared.append(
                (target_path, pipeline_class, DataWrapper(selected_data),
                 lifters, runtime_context))

        function = adops.update if update else adops.add
        for target_path, pc, *args in prepared:
            p = pc(*args)
            if target_path is not None:
                function(data, target_path, p.data)
            else:
                p.data  # trigger the pipeline since it is stateful

            yield p
예제 #16
0
    def _rchildren(self, create_cache=True):
        if isinstance(self.bfobject, File):
            return
        elif isinstance(self.bfobject, DataPackage):
            return  # should we return files inside packages? are they 1:1?
        elif any(
                isinstance(self.bfobject, t)
                for t in (Organization, Collection)):
            for child in self.children:
                yield child
                yield from child.rchildren
        elif isinstance(self.bfobject, Dataset):
            for bfobject in self.bfobject.packages:
                child = self.__class__(bfobject)
                if child.is_dir() or child.is_file():
                    if child.is_file():
                        cid = child.id
                        existing = [
                            c for c in self.cache.local.children
                            if (c.is_file() and c.cache
                                or c.is_broken_symlink()) and c.cache.id == cid
                        ]
                        if existing:
                            unmatched = [
                                e for e in existing if child.name != e.name
                            ]
                            if unmatched:
                                log.debug(
                                    f'skipping {child.name} becuase a file with that '
                                    f'id already exists {unmatched}')
                                continue

                    if create_cache:
                        # FIXME I don't think existing detection is working
                        # correctly here so this get's triggered incorrectly?
                        self.cache / child  # construction will cause registration without needing to assign
                        assert child.cache is not None

                    yield child
                else:
                    # probably a package that has files
                    log.debug(
                        f'skipping {child} becuase it is neither a directory nor a file'
                    )
        else:
            raise exc.UnhandledTypeError  # TODO
예제 #17
0
def check_for_updates(project_id):
    datasets = datasets_remote_from_project_id(project_id)
    #datasets = sorted(datasets, key=lambda r:r.id)[:3]
    for dataset in datasets:
        dataset_id = dataset.id
        sid = 'state-' + dataset_id
        uid = 'updated-' + dataset_id
        fid = 'failed-' + dataset_id
        qid = 'queued-' + dataset_id
        
        _updated = conn.get(uid)
        updated = _updated.decode() if _updated is not None else _updated

        _qupdated = conn.get(qid)
        qupdated = _qupdated.decode() if _qupdated is not None else _qupdated

        _failed = conn.get(fid)
        failed = _failed.decode() if _failed is not None else _failed

        _state = conn.get(sid)
        state = int(_state) if _state is not None else _state

        rq = state == _qed_run
        running = state == _run or rq
        queued = state == _qed or rq
            
        #log.debug(f'STATUS :id {dataset_id} :u {updated} :f {failed} :q {queued} :r {running}')
        # All the logic for whether to run a particular dataset
        # timestamp_updated or timestamp_updated_contents whichever is greater
        # NOTE we populate updated values into redis at startup from
        # the latest export of each individual dataset
        # TODO also need to check sparcur code changes to see if we need to rerun
        if (not (updated or failed) or
            failed and dataset.updated > failed or
            not failed and updated and dataset.updated > updated):
            log.debug((f'MAYBE ENQUEUE :id {dataset_id} du: '
                      f'{dataset.updated} u: {updated} f: {failed}'))
            if queued:
                pass
            elif running and updated and qupdated and updated > qupdated:
                conn.incr(sid)
            else:
                conn.incr(sid)
                conn.set(qid, dataset.updated)
                export_single_dataset.delay(dataset_id, dataset.updated)
예제 #18
0
    def _get_protocol_json(self, uri):
        #juri = uri + '.json'
        logd.info(uri)
        pi = get_right_id(uri)
        if 'protocols.io' in pi:
            pioid = pi.slug  # FIXME normalize before we ever get here ...
            log.info(pioid)
        else:
            msg = f'protocol uri is not from protocols.io {pi} {self.id}'
            logd.error(msg)
            self.addError(msg)
            return

        #uri_path = uri.rsplit('/', 1)[-1]
        apiuri = 'https://protocols.io/api/v3/protocols/' + pioid
        #'https://www.protocols.io/api/v3/groups/sparc/protocols'
        #apiuri = 'https://www.protocols.io/api/v3/filemanager/folders?top'
        #print(apiuri, header)
        log.debug('going to network for protocols')
        resp = requests.get(apiuri, headers=self._pio_header)
        #log.info(str(resp.request.headers))
        if resp.ok:
            try:
                j = resp.json()  # the api is reasonably consistent
            except BaseException as e:
                log.exception(e)
                breakpoint()
                raise e
            return j
        else:
            try:
                j = resp.json()
                sc = j['status_code']
                em = j['error_message']
                msg = f'protocol issue {uri} {resp.status_code} {sc} {em} {self.id!r}'
                logd.error(msg)
                self.addError(msg)
                # can't return here because of the cache
            except BaseException as e:
                log.exception(e)

            logd.error(f'protocol no access {uri} {self.id!r}')
예제 #19
0
    def _render(self, e, stage, blame, path):
        o = {'pipeline_stage': stage,
                'blame': blame,}  # FIXME

        if path is not None:
            o['file_path'] = path

        if isinstance(e, str):
            o['message'] = e
            o['type'] = None  # FIXME probably wan our own?

        elif isinstance(e, BaseException):
            o['message'] = str(e)
            o['type'] = str(type(e))

        else:
            raise TypeError(repr(e))

        log.debug(o)
        return o
예제 #20
0
    def data(self):
        """ get the 'cached' data which isn't really cached at the moment
            once we implement an index for local files then we can hit that
            first from here """
        # we don't keep two copies of the local data
        # unless we are doing a git-like thing
        if self.is_dir():
            raise TypeError('can\'t retrieve data for a directory')

        meta = self.meta
        if meta.file_id is None:
            raise NotImplementedError('can\'t fetch data without a file id')

        #cands = list(self.local_object_cache_dir.glob(self.cache_key))
        # FIXME this does not play well with old_id ...
        # can probably get away with just globing for the old_id in
        # most cases
        # TODO where to store the chain of prior versions? i.e. do
        # we just keep the xattrs in the object cache? how about file moves?
        # sigh git ...
        if self.local_object_cache_path.exists():
            gen = chain((f'from local cache {self.local_object_cache_path}', ),
                        self.local_object_cache_path.data)
        else:
            gen = self._remote_class.get_file_by_id(meta.id, meta.file_id)

        try:
            self.data_headers = next(gen)
        except exc.NoRemoteFileWithThatIdError as e:
            log.error(f'{self} {e}')
            raise FileNotFoundError(
                f'{self}'
            ) from e  # have to raise so that we don't overwrite the file

        log.debug(self.data_headers)
        if self.local_object_cache_path.exists():
            yield from gen
        else:
            yield from self.local_object_cache_path._data_setter(gen)
            self.local_object_cache_path.cache_init(
                self.meta)  # FIXME self.meta be stale here?!
예제 #21
0
    def update_from_ir(self, ir):
        oi = OntTerm.query._instrumented
        if oi is not OntTerm:
            OntTerm.query._instrumented = OntTerm

        def cformat(cell):
            if isinstance(cell, OntTerm):
                cell = cell.asCell()

            return cell

        try:
            dataset_blobs = ir['datasets']
            self._wat = self.values[8]
            for blob in dataset_blobs:
                meta = blob['meta']
                #species = adops.get(blob, ['subjects', int, 'species'], on_failure='')  # TODO not implemented
                if 'subjects' in blob:
                    species = '\n'.join(
                        sorted(
                            set([
                                cformat(s['species']) for s in blob['subjects']
                                if 'species' in s
                            ])))
                else:
                    species = ''

                self._update_dataset_metadata(
                    id=blob['id'],
                    name=adops.get(blob, ['meta', 'folder_name'],
                                   on_failure=''),
                    award=adops.get(blob, ['meta', 'award_number'],
                                    on_failure=''),
                    species=species,
                )
        finally:
            # FIXME this is so dumb :/
            OntTerm.query._instrumented = oi
        log.debug(self.uncommitted())
        self.commit()
예제 #22
0
    def data(self):
        """ get the 'cached' data which isn't really cached at the moment
            once we implement an index for local files then we can hit that
            first from here """
        # we don't keep two copies of the local data
        # unless we are doing a git-like thing
        if self.is_dir():
            raise TypeError('can\'t retrieve data for a directory')

        meta = self.meta
        if meta.file_id is None:
            raise NotImplementedError('can\'t fetch data without a file id')

        #cands = list(self.local_object_cache_dir.glob(self.cache_key))
        # FIXME this does not play well with old_id ...
        # can probably get away with just globing for the old_id in
        # most cases
        # TODO where to store the chain of prior versions? i.e. do
        # we just keep the xattrs in the object cache? how about file moves?
        # sigh git ...
        if self.local_object_cache_path.exists():
            locsize = self.local_object_cache_path.size
            if locsize != meta.size:
                raise NotImplementedError(
                    'TODO yield from local then fetch the rest starting at offset'
                )

            gen = chain((f'from local cache {self.local_object_cache_path}', ),
                        self.local_object_cache_path.data)
        else:
            if not hasattr(self._remote_class, '_api'):
                # NOTE we do not want to dereference self.remote
                # in this situation because we just want the file
                # not the FS metadata, so we have to ensure that _api
                # is bound
                self._remote_class.anchorToCache(self.anchor)

            gen = self._remote_class.get_file_by_id(meta.id, meta.file_id)

        try:
            self.data_headers = next(gen)
        except exc.NoRemoteFileWithThatIdError as e:
            log.error(f'{self} {e}')
            raise exc.CacheNotFoundError(
                f'{self}'
            ) from e  # have to raise so that we don't overwrite the file

        log.debug(self.data_headers)
        if self.local_object_cache_path.exists():
            yield from gen
        else:
            # FIXME we MUST write the metadata first so that we know the expected size
            # so that in the event that the generator is only partially run out we know
            # that we can pick up where we left off with the fetch, this also explains
            # why all the cases where the cached data size did not match were missing
            # xattrs entirely

            self.local_object_cache_path.touch()
            self.local_object_cache_path.cache_init(meta)

            yield from self.local_object_cache_path._data_setter(gen)

            ls = self.local_object_cache_path.size
            if ls != meta.size:
                self.local_object_cache_path.unlink()
                msg = f'{ls} != {meta.size} for {self}'
                raise ValueError(msg)  # FIXME TODO
예제 #23
0
    def subpipeline(cls, data, runtime_context, subpipelines, update=True,
                    source_key_optional=True, lifters=None):
        """
            [[[[get-path, add-path], ...], pipeline-class, target-path], ...]

            NOTE: this function is a generator, you have to express it!
        """

        class DataWrapper:
            def __init__(self, data):
                self.data = data

        prepared = []
        for get_adds, pipeline_class, target_path in subpipelines:
            selected_data = {}
            ok = True
            for get_path, add_path in get_adds:
                try:
                    value = adops.get(data, get_path)
                    if add_path is not None:
                        adops.add(selected_data, add_path, value)
                    else:
                        selected_data = value
                except exc.NoSourcePathError as e:
                    if source_key_optional:
                        yield get_path, e, pipeline_class
                        ok = False
                        break  # breaks the inner loop
                    else:
                        raise e

            if not ok:
                continue

            log.debug(lj(selected_data))
            prepared.append((target_path, pipeline_class, DataWrapper(selected_data),
                             lifters, runtime_context))

        function = adops.update if update else adops.add
        for target_path, pc, *args in prepared:
            p = pc(*args)
            if target_path is not None:
                try:
                    function(data, target_path, p.data)
                except BaseException as e:
                    import inspect
                    if isinstance(pc, object):
                        pi, pc = pc, pc.__class__

                    try:
                        __file = inspect.getsourcefile(pc)
                        __line = ' line ' + inspect.getsourcelines(pc)[-1]
                    except TypeError as e2:
                        __file = f'<Thing that is not defined in a file: {pc}>'
                        __line = ''

                    if hasattr(p, 'path'):
                        __path = f'"{p.path}"'
                    else:
                        __path = 'unknown input'

                    raise exc.SubPipelineError(
                        f'Error while processing {p}.data for\n{__path}\n'
                        f'{__file}{__line}') from e

            else:
                p.data  # trigger the pipeline since it is stateful

            yield p