Ejemplo n.º 1
0
class EcospoldV1Archive(NsUuidArchive):
    """
    Create an Ecospold Archive object from a path.  By default, assumes the path points to a literal
    .7z file, of the type that one can download from the ecoinvent website.  Creates an accessor for
    files in that archive and allows a user to
    """

    nsmap = 'http://www.EcoInvent.org/EcoSpold01'  # only valid for v1 ecospold files
    spold_version = tail.search(nsmap).groups()[0]

    def __init__(self, ref, prefix=None, **kwargs):
        """
        Just instantiates the parent class.
        :param ref: just a reference
        :param prefix: difference between the internal path (ref) and the ILCD base
        :return:
        """
        super(EcospoldV1Archive, self).__init__(ref, **kwargs)
        self.internal_prefix = prefix
        self._q_dict = dict()
        self._archive = Archive(self.ref)

    def _build_prefix(self):
        path = ''
        if self.internal_prefix is not None:
            path = os.path.join(self.internal_prefix, path)
        return path

    def list_datasets(self):
        assert self._archive.remote is False, "Cannot list objects for remote archives"
        return self._archive.listfiles(in_prefix=self._build_prefix())

    def _fetch_filename(self, filename):
        return self._archive.readfile(filename)

    def _get_objectified_entity(self, filename):
        o = objectify.fromstring(self._archive.readfile(filename))
        if o.nsmap[None] != self.nsmap:
            raise EcospoldVersionError('This class is for EcoSpold v%s only!' % self.nsmap[-2:])
        return o

    def _create_quantity(self, unitstring):
        """
        In ecospold v1, quantities are only units, defined by string
        :param unitstring:
        :return:
        """
        if unitstring in self._q_dict:
            q = self._q_dict[unitstring]
        else:
            ref_unit, _ = self._create_unit(unitstring)
            uid = self._key_to_id(unitstring)

            q = LcQuantity(uid, Name='EcoSpold Quantity %s' % unitstring,
                           ReferenceUnit=ref_unit, Comment=self.spold_version)
            q.set_external_ref(unitstring)
            self.add(q)
            self._q_dict[unitstring] = q

        return q

    def _create_flow(self, exch):
        """
        An ecospold01 exchange is really just a long attribute list, plus an inputGroup or outputGroup (ignored here)
        :param exch:
        :return:
        """
        number = int(exch.get('number'))
        uid = self._key_to_id(number)
        try_f = self[uid]
        if try_f is not None:
            f = try_f
            assert f.entity_type == 'flow', "Expected flow, found %s" % f.entity_type

        else:
            # generate flow
            n = exch.get("name")
            q = self._create_quantity(exch.get("unit"))
            c = not_none(exch.get("generalComment"))
            cas = not_none(exch.get("CASNumber"))
            cat = [exch.get('category'), exch.get('subCategory')]

            f = LcFlow(uid, Name=n, CasNumber=cas, Comment=c, Compartment=cat)
            f.add_characterization(q, reference=True)
            f.set_external_ref(number)
            self.add(f)

        if exch.get("unit") != f.unit():
            local_q = self._create_quantity(exch.get("unit"))
            if not f.has_characterization(local_q):
                if (f.unit(), local_q.unit()) not in conversion_dict:
                    print('Flow %s needs characterization for unit %s' % (f, local_q))
                    val = parse_math(input('Enter conversion factor 1 %s = x %s' % (f.unit(), local_q)))
                else:
                    val = conversion_dict[(f.unit(), local_q.unit())]
                f.add_characterization(local_q, value=val)
        return f

    def _create_process(self, filename):
        """
        Extract dataset object from XML file
        :param filename:
        :return:
        """
        o = self._get_objectified_entity(filename)

        rf = None  # reference flow
        flowlist = []

        for exch in o.dataset.flowData.getchildren():
            f = self._create_flow(exch)
            if hasattr(exch, 'outputGroup'):
                d = 'Output'
                if exch.outputGroup == 0:
                    assert rf is None, "Multiple reference flows found!"
                    rf = f
            elif hasattr(exch, 'inputGroup'):
                d = 'Input'
            else:
                raise DirectionlessExchangeError
            local_q = self._create_quantity(exch.get("unit"))
            v = float(exch.get('meanValue'))  # returns none if missing
            if local_q is not f.reference_entity:
                v = v / f.cf(local_q)
            flowlist.append((f, d, v))

        p_meta = o.dataset.metaInformation.processInformation
        n = p_meta.referenceFunction.get('name')

        u = self._key_to_id(n)

        try_p = self[u]
        if try_p is not None:
            p = try_p
            assert p.entity_type == 'process', "Expected process, found %s" % p.entity_type

        else:
            # create new process
            g = p_meta.geography.get('location')
            stt = {'begin': str(find_tag(p_meta, 'startDate')[0]), 'end': str(find_tag(p_meta, 'endDate')[0])}

            c = p_meta.referenceFunction.get('generalComment')

            cls = [p_meta.referenceFunction.get('category'), p_meta.referenceFunction.get('subCategory')]
            p = LcProcess(u, Name=n, Comment=c, SpatialScope=g, TemporalScope=stt,
                          Classifications=cls)
            p.set_external_ref(n)

            if rf is None:
                rx = None
            else:
                rx = p.add_reference(rf, 'Output')
            for flow, f_dir, val in flowlist:
                self._print('Exch %s [%s] (%g)' % (flow, f_dir, val))
                p.add_exchange(flow, f_dir, reference=None, value=val, add_dups=True)

            self.add(p)

        return p

    def _fetch(self, uid, **kwargs):
        """
        Nothing to do here-- if it's not found, it needs to be loaded
        :param uid:
        :param kwargs:
        :return:
        """
        print('No way to fetch by UUID. Loading all processes...')
        self.load_all()

    def _load_all(self):
        """
        No need to "fetch" with ecospold v1, since UUIDs are not known in advance.
        Instead, just load all the processes at once.
        :return:
        """
        for k in self.list_datasets():
            self._create_process(k)
        self.check_counter('quantity')
        self.check_counter('flow')
        self.check_counter('process')

    def serialize(self, **kwargs):
        j = super(EcospoldV1Archive, self).serialize(**kwargs)
        if self.internal_prefix is not None:
            j['prefix'] = self.internal_prefix
        return j
Ejemplo n.º 2
0
class EcospoldV2Archive(LcArchive):
    """
    class for loading metadata from ecospold v2 files. Now I know ecoinvent supplies a whole ton of supplementary
    information in files that are *outside* the ecospold archives- and that information is going to be IGNORED.
    or loaded separately. But not handled here.
    """

    nsmap = 'http://www.EcoInvent.org/EcoSpold02'  # only valid for v1 ecospold files
    spold_version = tail.search(nsmap).groups()[0]

    def __init__(self, ref, prefix=None, **kwargs):
        """
        Just instantiates the parent class.
        :param ref: just a reference
        :param prefix: difference between the internal path (ref) and the ILCD base
        :return:
        """
        super(EcospoldV2Archive, self).__init__(ref, **kwargs)
        self.internal_prefix = prefix
        if self.internal_prefix is not None:
            self._serialize_dict['prefix'] = self.internal_prefix

        self._archive = Archive(self.ref)

    def fg_proxy(self, proxy):
        for ds in self.list_datasets(proxy):
            self.retrieve_or_fetch_entity(ds)
        return self[proxy]

    def bg_proxy(self, proxy):
        return self.fg_proxy(proxy)

    # no need for _key_to_id - keys in ecospold are uuids
    def _prefix(self, filename):
        if self.internal_prefix is not None:
            try:
                filename = os.path.join(self.internal_prefix, filename)
            except TypeError:  # None filename
                filename = self.internal_prefix
        return filename

    def _de_prefix(self, string):
        if self.internal_prefix is None:
            return string
        else:
            return re.sub('^' + os.path.join(self.internal_prefix, ''), '', string)

    def _fetch_filename(self, filename):
        return self._archive.readfile(self._prefix(filename))

    def list_datasets(self, startswith=None):
        assert self._archive.remote is False, "Cannot list objects for remote archives"
        return [self._de_prefix(x) for x in self._archive.listfiles(in_prefix=self._prefix(startswith))]

    def _get_objectified_entity(self, filename):
        try:
            o = objectify.fromstring(self._fetch_filename(filename))
        except ValueError:
            print('failed on :%s:' % filename)
            return None
        if o.nsmap[None] != self.nsmap:
            raise EcospoldV2Error('This class is for EcoSpold v%s only!' % self.nsmap[-2:])
        return o

    def _get_objectified_entity_with_lt_gt(self, filename):
        try:
            f = self._fetch_filename(filename)
            f = re.sub(' < ', ' &lt; ', re.sub(' > ', ' &gt; ', f.decode()))
            o = objectify.fromstring(f)
        except ValueError:
            print('failed on :%s:' % filename)
            return None
        except TypeError:
            print('failed on :%s:' % filename)
            raise
        if o.nsmap[None] != self.nsmap:
            raise EcospoldV2Error('This class is for EcoSpold v%s only!' % self.nsmap[-2:])
        return o

    def _create_quantity(self, exchange):
        """
        In ecospold v2, quantities are still only units, defined by string.  They do get their own uuids, but only
        as 'properties' of the flows- flows themselves are only measured by unit.
        this code is cc'd from ecospold1
        :param exchange:
        :return:
        """
        unitstring = exchange.unitName.text
        unit_uuid = exchange.attrib['unitId']
        try_q = self[unit_uuid]
        if try_q is None:
            ref_unit, _ = self._create_unit(unitstring)

            q = LcQuantity(unit_uuid, Name='EcoSpold Quantity %s' % unitstring, ReferenceUnit=ref_unit,
                           Comment=self.spold_version)
            self.add(q)
        else:
            q = try_q

        return q

    @staticmethod
    def _cls_to_text(i):
        if isinstance(i, objectify.ObjectifiedElement):
            return ': '.join([i.classificationSystem.text, i.classificationValue.text])
        else:
            return ''

    @staticmethod
    def _cat_to_text(i):
        if isinstance(i, objectify.ObjectifiedElement):
            return [i.compartment.text, i.subcompartment.text]
        else:
            return []

    def _create_flow(self, exchange):
        """
        makes a flow entity and adds to the db
        :param exchange:
        :return:
        """
        if 'intermediate' in exchange.tag:
            uid = exchange.attrib['intermediateExchangeId']
            cat = [self._cls_to_text(exchange.classification)]
        elif 'elementary' in exchange.tag:
            uid = exchange.attrib['elementaryExchangeId']
            cat = self._cat_to_text(exchange.compartment)
        else:
            raise AttributeError('No exchange type found for id %s' % exchange.attrib['id'])

        if self[uid] is not None:
            return self[uid]

        if 'casNumber' in exchange.attrib:
            cas = exchange.attrib['casNumber']
        else:
            cas = ''

        q = self._create_quantity(exchange)

        n = exchange.name.text
        c = 'EcoSpold02 Flow'

        f = LcFlow(uid, Name=n, CasNumber=cas, Comment=c, Compartment=cat)
        f.add_characterization(quantity=q, reference=True)

        self.add(f)

        return f

    def _create_process_entity(self, o):
        ad = find_tag(o, 'activityDescription')[0]

        u = ad.activity.get('id')

        if self[u] is not None:
            return self[u]

        n = find_tag(ad, 'activityName')[0].text
        try:
            c = find_tag(ad, 'generalComment')[0]['text'].text
        except TypeError:
            c = 'no comment.'
        except AttributeError:
            print('activity ID %s: no comment' % u)
            c = 'no comment.'
        g = find_tag(ad, 'geography')[0].shortname.text

        tp = find_tag(ad, 'timePeriod')[0]
        stt = {'begin': tp.get('startDate'), 'end': tp.get('endDate')}
        cls = [self._cls_to_text(i) for i in find_tag(ad, 'classification')]

        p = LcProcess(u, Name=n, Comment=c, SpatialScope=g, TemporalScope=stt,
                      Classifications=cls)

        self.add(p)
        return p

    def _grab_reference_flow(self, o, rf):
        """
        Create a reference exchange from the flowdata
        :param o:
        :param rf:
        :return:
        """
        for x in find_tag(o, 'flowData')[0].getchildren():
            if 'intermediate' in x.tag:
                if x.attrib['intermediateExchangeId'] == rf:
                    return self._create_flow(x)

        raise KeyError('Noted reference exchange %s not found!' % rf)

    def _collect_exchanges(self, o):
        """

        :param o:
        :return:
        """
        flowlist = []

        for exch in find_tag(o, 'flowData')[0].getchildren():
            if 'parameter' in exch.tag:
                continue
            if 'impactIndicator' in exch.tag:
                continue

            f = self._create_flow(exch)
            if hasattr(exch, 'outputGroup'):
                d = 'Output'
            elif hasattr(exch, 'inputGroup'):
                d = 'Input'
            else:
                raise DirectionlessExchangeError
            v = float(exch.get('amount'))  # or None if not found
            t = exch.get('activityLinkId')  # or None if not found
            flowlist.append(EcospoldExchange(f, d, v, t))
        return flowlist

    def _collect_impact_scores(self, o, process, flow):
        """
        the old "1115"
        :param o:
        :return:
        """

        scores = []
        exch = ExchangeValue(process, flow, 'Output', value=1.0)

        for cf in find_tag(o, 'flowData')[0].getchildren():
            if 'impactIndicator' in cf.tag:
                m = cf.impactMethodName.text
                c = cf.impactCategoryName.text
                i = cf.name.text
                v = float(cf.get('amount'))
                scores.append(EcospoldLciaResult(m, c, i, v))

        return scores

    def objectify(self, filename):
        try:
            o = self._get_objectified_entity(filename)
        except XMLSyntaxError:
            print('  !!XMLSyntaxError-- trying to escape < and > signs')
            try:
                o = self._get_objectified_entity_with_lt_gt(filename)
            except XMLSyntaxError:
                print('  !!Failed loading %s' % filename)
                raise
        return o

    def _create_process(self, filename, exchanges=True):
        """
        Extract dataset object from XML file
        :param filename:
        :return:
        """
        o = self.objectify(filename)

        p = self._create_process_entity(o)
        rf = self._grab_reference_flow(o, spold_reference_flow(filename))
        rx = p.add_reference(rf, 'Output')
        self._print('Identified reference exchange\n %s' % rx)
        if exchanges:
            for exch in self._collect_exchanges(o):
                if exch.value != 0:
                    self._print('Exch %s [%s] (%g)' % (exch.flow, exch.direction, exch.value))
                    p.add_exchange(exch.flow, exch.direction, reference=rx, value=exch.value,
                                   termination=exch.termination)

        return p

    '''
    def _fetch(self, uid, ref_flow=None):
        """
        ecospoldV2 files are named by activityId_referenceFlow - if none is supplied, take the first one found
        matching activityId
        :param uid:
        :param ref_flow:
        :return:
        """
        if ref_flow is not None:
            uid = '_'.join([uid, ref_flow])
        files = self.list_datasets(uid)
        if len(files) == 0:
            return None
        return self._create_process(files[0])
    '''

    def retrieve_or_fetch_entity(self, filename, **kwargs):
        entity = self._get_entity(filename)  # this checks upstream if it exists
        if entity is not None:
            if spold_reference_flow(filename) in [x.flow.get_uuid() for x in entity.reference_entity]:
                return entity
        return self._create_process(filename, **kwargs)

    def retrieve_lcia_scores(self, filename, quantities=None):
        """
        This function retrieves LCIA scores from an Ecospold02 file and stores them as characterizations in
        an LcFlow entity corresponding to the *first* (and presumably, only) reference intermediate flow

        Only stores cfs for quantities that exist locally.
        :param filename:
        :param quantities: list of quantity entities to look for (defaults to self.quantities())
        :return: a dict of quantity uuid to score
        """
        if quantities is None:
            quantities = self.quantities()

        import time
        start_time = time.time()
        print('Loading LCIA results from %s' % filename)
        o = self.objectify(filename)

        self._print('%30.30s -- %5f' % ('Objectified', time.time() - start_time))
        p = self._create_process_entity(o)
        rf = self._grab_reference_flow(o, spold_reference_flow(filename))

        exch = ExchangeValue(p, rf, 'Output', value=1.0)

        tags = dict()
        for q in quantities:
            if 'Method' in q.keys():
                if q['Name'] in tags:
                    raise KeyError('Name collision %s' % q['Name'])
                tags[q['Name']] = q

        results = LciaResults(p)

        for char in find_tag(o, 'flowData')[0].getchildren():
            if 'impactIndicator' in char.tag:
                m = char.impactMethodName.text
                c = char.impactCategoryName.text
                i = char.name.text
                v = float(char.get('amount'))
                my_tag = ', '.join([m, c, i])
                if my_tag in tags:
                    q = tags[my_tag]
                    result = LciaResult(q)
                    cf = Characterization(rf, q, value=v, location=p['SpatialScope'])
                    result.add_score(p.get_uuid(), exch, cf, p['SpatialScope'])
                    results[q.get_uuid()] = result

        self._print('%30.30s -- %5f' % ('Impact scores collected', time.time() - start_time))

        return results

    def _load_all(self, exchanges=True):
        now = time()
        count = 0
        for k in self.list_datasets():
            self.retrieve_or_fetch_entity(k, exchanges=exchanges)
            count += 1
            if count % 100 == 0:
                print(' Loaded %d processes (t=%.2f s)' % (count, time()-now))

        print(' Loaded %d processes (t=%.2f s)' % (count, time() - now))
        self.check_counter()
Ejemplo n.º 3
0
class IlcdArchive(LcArchive):
    """
    This class handles de-referencing for ILCD archives
    """

    def __init__(self, ref, prefix=None, **kwargs):
        """
        Just instantiates the parent class.
        :param ref: root of the archive
        :param prefix: difference between the internal path (ref) and the ILCD base
          (note: for local archives, this defaults to 'ILCD'; for remote arcnives it
           defaults to empty)
        :param quiet: forwarded to ArchiveInterface
        :return:
        """
        super(IlcdArchive, self).__init__(ref, **kwargs)
        self.internal_prefix = prefix
        if prefix is not None:
            self._serialize_dict['prefix'] = prefix

        self._archive = Archive(self.ref)

        if not self._archive.OK:
            print('Trying local ELCD reference')
            self._archive = Archive(elcd3_local_fallback)
        if not self._archive.OK:
            print('Falling back to ELCD Remote Reference')
            self._archive = Archive(elcd3_remote_fallback, query_string='format=xml')

        if self._archive.compressed or self._archive.remote:
            self._pathtype = posixpath
        else:
            self._pathtype = os.path

    def _build_prefix(self):
        if self._archive.remote:
            path = ''
        else:
            path = 'ILCD'
        if self.internal_prefix is not None:
            path = self._pathtype.join(self.internal_prefix, path)
        return path

    def _de_prefix(self, file):
        return re.sub('^' + self._pathtype.join(self._build_prefix(), ''), '', file)

    def _path_from_parts(self, dtype, uid, version=None):
        """
        aka 'path from parts'
        :param dtype: required
        :param uid: required
        :param version: optional [None]
        :return: a single (prefixed) path
        """
        assert _check_dtype(dtype)
        postpath = self._pathtype.join(self._build_prefix(), typeDirs[dtype], uid)
        if version is not None:
            postpath += '_' + version
        return postpath + '.xml'

    def _path_from_search(self, search_result):
        return self._pathtype.join(self._build_prefix(), search_result)

    def search_by_id(self, uid, dtype=None):
        return [i for i in self.list_objects(dtype=dtype) if re.search(uid, i, flags=re.IGNORECASE)]

    def list_objects(self, dtype=None):
        assert self._archive.remote is False, "Cannot list objects for remote archives"
        in_prefix = self._build_prefix()
        if dtype is not None:
            assert _check_dtype(dtype)
            in_prefix = self._pathtype.join(in_prefix, typeDirs[dtype])
        return [self._de_prefix(f) for f in self._archive.listfiles(in_prefix=in_prefix)]

    def _fetch_filename(self, filename):
        return self._archive.readfile(filename)

    def _check_or_retrieve_child(self, uid, uri):
        child = self._get_entity(uid)
        if child is None:
            dtype = _extract_dtype(uri, self._pathtype)
            child = self.retrieve_or_fetch_entity(uid, dtype=dtype)
        return child

    def _get_objectified_entity(self, filename):
        return objectify.fromstring(self._fetch_filename(filename))

    def _search_for_term(self, term, dtype=None):
        search_results = self.search_by_id(term, dtype=dtype)
        if len(search_results) > 0:
            self._print('Found Results:')
            [print(i) for i in search_results]
            if len(search_results) > 1:
                print('Please refine search')
                return None
            result = self._path_from_search(search_results[0])
            dtype = _extract_dtype(result, self._pathtype)
            if dtype is None:
                raise ValueError('Search result with no matching dtype')
            return self.objectify(result, dtype=dtype)
        print('No results.')
        return None

    def objectify(self, term, dtype=None, version=None):
        if dtype is None:
            return self._search_for_term(term)

        try:
            uid = _extract_uuid(term)
        except AttributeError:
            # can't find UUID: search is required
            return self._search_for_term(term, dtype=dtype)

        # if we get here, uid is valid and dtype is valid
        entity = self._get_entity(uid)
        if entity is not None:
            return entity

        try:
            # if we are a search result, this will succeed
            o = self._get_objectified_entity(self._path_from_search(term))
        except (KeyError, FileNotFoundError):
            # we are not a search result-- let's build the entity path
            o = self._get_objectified_entity(self._path_from_parts(dtype, uid, version=version))

        return o

    def _create_unit(self, unit_ref):
        """
        UnitGroups aren't stored as full-fledged entities- they are stored as dicts inside quantities.
        :param unit_ref:
        :return:
        """
        dtype = _extract_dtype(unit_ref, self._pathtype)
        uid = _extract_uuid(unit_ref)
        filename = self._path_from_parts(dtype, uid)
        o = self._get_objectified_entity(filename)

        ns = find_ns(o.nsmap, 'UnitGroup')

        u = str(find_common(o, 'UUID')[0])
        reference_unit = int(find_tag(o, 'referenceToReferenceUnit', ns=ns)[0])
        unitstring = str(o['units'].getchildren()[reference_unit]['name'])
        ref_unit = LcUnit(unitstring, unit_uuid=u)
        ref_unit.set_external_ref('%s/%s' % (typeDirs['UnitGroup'], u))

        unitconv = dict()
        for i in o['units'].getchildren():
            unitconv[str(i['name'])] = 1.0 / float(i['meanValue'])
        return ref_unit, unitconv

    def _create_quantity(self, o):
        """

        :param o: objectified FlowProperty
        :return:
        """
        ns = find_ns(o.nsmap, 'FlowProperty')

        u = str(find_common(o, 'UUID')[0])
        n = str(find_common(o, 'name')[0])

        c = str(find_common(o, 'generalComment')[0])

        ug, ug_uri = get_reference_unit_group(o, ns=ns)

        ug_path = self._pathtype.join('unitgroups', ug)  # need the path without extension- I know- it's all sloppy

        refunit, unitconv = self._create_unit(ug_path)

        q = LcQuantity(u, Name=n, ReferenceUnit=refunit, UnitConversion=unitconv, Comment=c)
        q.set_external_ref('%s/%s' % (typeDirs['FlowProperty'], u))

        self.add(q)

        return q

    @staticmethod
    def _create_dummy_flow_from_exch(uid, exch):
        n = str(find_common(exch, 'shortDescription')[0])
        print('Creating DUMMY flow (%s) with name %s' % (uid, n))
        return LcFlow(uid, Name=n, Comment='Dummy flow (HTTP or XML error)', Compartment=['dummy flows'])

    def _create_flow(self, o):
        """

        :param o: objectified flow
        :return: an LcFlow
        """
        ns = find_ns(o.nsmap, 'Flow')
        n = grab_flow_name(o, ns=ns)

        u = str(find_common(o, 'UUID')[0])

        c = str(find_common(o, 'generalComment')[0])

        cas = str(find_tag(o, 'CASNumber', ns=ns)[0])

        cat = find_common(o, 'category')
        if cat == ['']:
            cat = find_common(o, 'class')
        cat = [str(i) for i in cat]

        f = LcFlow(u, Name=n, CasNumber=cas, Comment=c, Compartment=cat)
        f.set_external_ref('%s/%s' % (typeDirs['Flow'], u))

        ref_to_ref = get_reference_flow_property_id(o, ns=ns)
        for fp in o['flowProperties'].getchildren():
            if int(fp.attrib['dataSetInternalID']) == ref_to_ref:
                is_ref = True
            else:
                is_ref = False
            val = float(find_tag(fp, 'meanValue', ns=ns)[0])

            ref = find_tag(fp, 'referenceToFlowPropertyDataSet', ns=ns)[0]
            rfp_uuid = ref.attrib['refObjectId']
            rfp_uri = ref.attrib['uri']

            try:
                q = self._check_or_retrieve_child(rfp_uuid, rfp_uri)
            except (HTTPError, XMLSyntaxError, KeyError):
                continue

            try:
                f.add_characterization(q, reference=is_ref, value=val)
            except DuplicateCharacterizationError:
                print('Duplicate Characterization in entity %s\n %s = %g' % (u, q, val))
                # let it go

        try:
            self.add(f)
        except KeyError:
            print('Found duplicate entity %s' % u)
            raise
        return f

    def _create_process_entity(self, o, ns):
        u = str(find_common(o, 'UUID')[0])
        n = ', '.join(chain(filter(len, [str(find_tag(o, k, ns=ns)[0])
                                         for k in ('baseName',
                                                   'treatmentStandardsRoutes',
                                                   'mixAndLocationTypes',
                                                   'functionalUnitFlowProperties')])))

        g = find_tag(o, 'locationOfOperationSupplyOrProduction', ns=ns)[0].attrib['location']

        stt = {'begin': str(find_common(o, 'referenceYear')[0]), 'end': str(find_common(o, 'dataSetValidUntil')[0])}

        c = str(find_common(o, 'generalComment')[0])

        cls = [str(i) for i in find_common(o, 'class')]

        p = LcProcess(u, Name=n, Comment=c, SpatialScope=g, TemporalScope=stt,
                      Classifications=cls)
        self.add(p)

        p.set_external_ref('%s/%s' % (typeDirs['Process'], u))

        return p

    def _create_process(self, o):
        """

        :param o: objectified process
        :return:
        """
        ns = find_ns(o.nsmap, 'Process')

        try:
            rf, rf_uri, rf_dir = get_reference_flow(o, ns=ns)
        except XMLSyntaxError:
            rf = None
            rf_dir = None

        exch_list = []

        for exch in o['exchanges'].getchildren():
            # load all child flows
            f_id, f_uri, f_dir = get_flow_ref(exch, ns=ns)
            try:
                f = self._check_or_retrieve_child(f_id, f_uri)
            except (HTTPError, XMLSyntaxError, KeyError):
                u = str(find_common(o, 'UUID')[0])
                print('In UUID %s:' % u)
                f = self._create_dummy_flow_from_exch(f_id, exch)
                self.add(f)
            v = get_exch_value(exch, ns=ns)
            exch_list.append((f, f_dir, v))

        p = self._create_process_entity(o, ns)

        for flow, f_dir, val in exch_list:
            if rf == flow.get_uuid() and rf_dir == f_dir:
                p.add_reference(flow, f_dir)
            p.add_exchange(flow, f_dir, reference=None, value=val,
                           add_dups=True)  # add_dups: poor quality control on ELCD

        return p

    def _fetch(self, term, dtype=None, version=None):
        """
        fetch an object from the archive by reference.

        term is either: a uid and a dtype (and optional version) OR a filename
        dtype MUST be specified as kwarg for remote archives; otherwise will search
        :param term:
        :return:
        """
        if dtype is None:
            dtype = _extract_dtype(term, self._pathtype)

        o = self.objectify(term, dtype=dtype, version=version)
        if o is None:
            return None

        if dtype is None:
            dtype = _dtype_from_nsmap(o.nsmap)

        if dtype == 'Flow':
            try:
                return self._create_flow(o)
            except KeyError:
                print('KeyError on term %s dtype %s version %s'% (term, dtype, version))
        elif dtype == 'Process':
            return self._create_process(o)
        elif dtype == 'FlowProperty':
            return self._create_quantity(o)
        else:
            return o

    def _load_all(self):
        for i in self.list_objects('Process'):
            self.retrieve_or_fetch_entity(i)
        self.check_counter('quantity')
        self.check_counter('flow')
        self.check_counter('process')