Python Archive Examples

Programming Language: Python

Namespace/Package Name: lcatools.providers.archive

Class/Type: Archive

Examples at hotexamples.com: 5

Python Archive - 5 examples found. These are the top rated real world Python examples of lcatools.providers.archive.Archive extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

listfiles(3)

readfile(3)

Example #1

Show file

File: ilcd.py Project: bkuczenski/lca-tools

    def __init__(self, ref, prefix=None, **kwargs):
        """
        Just instantiates the parent class.
        :param ref: root of the archive
        :param prefix: difference between the internal path (ref) and the ILCD base
          (note: for local archives, this defaults to 'ILCD'; for remote arcnives it
           defaults to empty)
        :param quiet: forwarded to ArchiveInterface
        :return:
        """
        super(IlcdArchive, self).__init__(ref, **kwargs)
        self.internal_prefix = prefix
        if prefix is not None:
            self._serialize_dict['prefix'] = prefix

        self._archive = Archive(self.ref)

        if not self._archive.OK:
            print('Trying local ELCD reference')
            self._archive = Archive(elcd3_local_fallback)
        if not self._archive.OK:
            print('Falling back to ELCD Remote Reference')
            self._archive = Archive(elcd3_remote_fallback, query_string='format=xml')

        if self._archive.compressed or self._archive.remote:
            self._pathtype = posixpath
        else:
            self._pathtype = os.path

Example #2

Show file

File: ecospold.py Project: bkuczenski/lca-tools

 def __init__(self, ref, prefix=None, **kwargs):
     """
     Just instantiates the parent class.
     :param ref: just a reference
     :param prefix: difference between the internal path (ref) and the ILCD base
     :return:
     """
     super(EcospoldV1Archive, self).__init__(ref, **kwargs)
     self.internal_prefix = prefix
     self._q_dict = dict()
     self._archive = Archive(self.ref)

Example #3

Show file

File: ecospold2.py Project: bkuczenski/lca-tools

class EcospoldV2Archive(LcArchive):
    """
    class for loading metadata from ecospold v2 files. Now I know ecoinvent supplies a whole ton of supplementary
    information in files that are *outside* the ecospold archives- and that information is going to be IGNORED.
    or loaded separately. But not handled here.
    """

    nsmap = 'http://www.EcoInvent.org/EcoSpold02'  # only valid for v1 ecospold files
    spold_version = tail.search(nsmap).groups()[0]

    def __init__(self, ref, prefix=None, **kwargs):
        """
        Just instantiates the parent class.
        :param ref: just a reference
        :param prefix: difference between the internal path (ref) and the ILCD base
        :return:
        """
        super(EcospoldV2Archive, self).__init__(ref, **kwargs)
        self.internal_prefix = prefix
        if self.internal_prefix is not None:
            self._serialize_dict['prefix'] = self.internal_prefix

        self._archive = Archive(self.ref)

    def fg_proxy(self, proxy):
        for ds in self.list_datasets(proxy):
            self.retrieve_or_fetch_entity(ds)
        return self[proxy]

    def bg_proxy(self, proxy):
        return self.fg_proxy(proxy)

    # no need for _key_to_id - keys in ecospold are uuids
    def _prefix(self, filename):
        if self.internal_prefix is not None:
            try:
                filename = os.path.join(self.internal_prefix, filename)
            except TypeError:  # None filename
                filename = self.internal_prefix
        return filename

    def _de_prefix(self, string):
        if self.internal_prefix is None:
            return string
        else:
            return re.sub('^' + os.path.join(self.internal_prefix, ''), '', string)

    def _fetch_filename(self, filename):
        return self._archive.readfile(self._prefix(filename))

    def list_datasets(self, startswith=None):
        assert self._archive.remote is False, "Cannot list objects for remote archives"
        return [self._de_prefix(x) for x in self._archive.listfiles(in_prefix=self._prefix(startswith))]

    def _get_objectified_entity(self, filename):
        try:
            o = objectify.fromstring(self._fetch_filename(filename))
        except ValueError:
            print('failed on :%s:' % filename)
            return None
        if o.nsmap[None] != self.nsmap:
            raise EcospoldV2Error('This class is for EcoSpold v%s only!' % self.nsmap[-2:])
        return o

    def _get_objectified_entity_with_lt_gt(self, filename):
        try:
            f = self._fetch_filename(filename)
            f = re.sub(' < ', ' &lt; ', re.sub(' > ', ' &gt; ', f.decode()))
            o = objectify.fromstring(f)
        except ValueError:
            print('failed on :%s:' % filename)
            return None
        except TypeError:
            print('failed on :%s:' % filename)
            raise
        if o.nsmap[None] != self.nsmap:
            raise EcospoldV2Error('This class is for EcoSpold v%s only!' % self.nsmap[-2:])
        return o

    def _create_quantity(self, exchange):
        """
        In ecospold v2, quantities are still only units, defined by string.  They do get their own uuids, but only
        as 'properties' of the flows- flows themselves are only measured by unit.
        this code is cc'd from ecospold1
        :param exchange:
        :return:
        """
        unitstring = exchange.unitName.text
        unit_uuid = exchange.attrib['unitId']
        try_q = self[unit_uuid]
        if try_q is None:
            ref_unit, _ = self._create_unit(unitstring)

            q = LcQuantity(unit_uuid, Name='EcoSpold Quantity %s' % unitstring, ReferenceUnit=ref_unit,
                           Comment=self.spold_version)
            self.add(q)
        else:
            q = try_q

        return q

    @staticmethod
    def _cls_to_text(i):
        if isinstance(i, objectify.ObjectifiedElement):
            return ': '.join([i.classificationSystem.text, i.classificationValue.text])
        else:
            return ''

    @staticmethod
    def _cat_to_text(i):
        if isinstance(i, objectify.ObjectifiedElement):
            return [i.compartment.text, i.subcompartment.text]
        else:
            return []

    def _create_flow(self, exchange):
        """
        makes a flow entity and adds to the db
        :param exchange:
        :return:
        """
        if 'intermediate' in exchange.tag:
            uid = exchange.attrib['intermediateExchangeId']
            cat = [self._cls_to_text(exchange.classification)]
        elif 'elementary' in exchange.tag:
            uid = exchange.attrib['elementaryExchangeId']
            cat = self._cat_to_text(exchange.compartment)
        else:
            raise AttributeError('No exchange type found for id %s' % exchange.attrib['id'])

        if self[uid] is not None:
            return self[uid]

        if 'casNumber' in exchange.attrib:
            cas = exchange.attrib['casNumber']
        else:
            cas = ''

        q = self._create_quantity(exchange)

        n = exchange.name.text
        c = 'EcoSpold02 Flow'

        f = LcFlow(uid, Name=n, CasNumber=cas, Comment=c, Compartment=cat)
        f.add_characterization(quantity=q, reference=True)

        self.add(f)

        return f

    def _create_process_entity(self, o):
        ad = find_tag(o, 'activityDescription')[0]

        u = ad.activity.get('id')

        if self[u] is not None:
            return self[u]

        n = find_tag(ad, 'activityName')[0].text
        try:
            c = find_tag(ad, 'generalComment')[0]['text'].text
        except TypeError:
            c = 'no comment.'
        except AttributeError:
            print('activity ID %s: no comment' % u)
            c = 'no comment.'
        g = find_tag(ad, 'geography')[0].shortname.text

        tp = find_tag(ad, 'timePeriod')[0]
        stt = {'begin': tp.get('startDate'), 'end': tp.get('endDate')}
        cls = [self._cls_to_text(i) for i in find_tag(ad, 'classification')]

        p = LcProcess(u, Name=n, Comment=c, SpatialScope=g, TemporalScope=stt,
                      Classifications=cls)

        self.add(p)
        return p

    def _grab_reference_flow(self, o, rf):
        """
        Create a reference exchange from the flowdata
        :param o:
        :param rf:
        :return:
        """
        for x in find_tag(o, 'flowData')[0].getchildren():
            if 'intermediate' in x.tag:
                if x.attrib['intermediateExchangeId'] == rf:
                    return self._create_flow(x)

        raise KeyError('Noted reference exchange %s not found!' % rf)

    def _collect_exchanges(self, o):
        """

        :param o:
        :return:
        """
        flowlist = []

        for exch in find_tag(o, 'flowData')[0].getchildren():
            if 'parameter' in exch.tag:
                continue
            if 'impactIndicator' in exch.tag:
                continue

            f = self._create_flow(exch)
            if hasattr(exch, 'outputGroup'):
                d = 'Output'
            elif hasattr(exch, 'inputGroup'):
                d = 'Input'
            else:
                raise DirectionlessExchangeError
            v = float(exch.get('amount'))  # or None if not found
            t = exch.get('activityLinkId')  # or None if not found
            flowlist.append(EcospoldExchange(f, d, v, t))
        return flowlist

    def _collect_impact_scores(self, o, process, flow):
        """
        the old "1115"
        :param o:
        :return:
        """

        scores = []
        exch = ExchangeValue(process, flow, 'Output', value=1.0)

        for cf in find_tag(o, 'flowData')[0].getchildren():
            if 'impactIndicator' in cf.tag:
                m = cf.impactMethodName.text
                c = cf.impactCategoryName.text
                i = cf.name.text
                v = float(cf.get('amount'))
                scores.append(EcospoldLciaResult(m, c, i, v))

        return scores

    def objectify(self, filename):
        try:
            o = self._get_objectified_entity(filename)
        except XMLSyntaxError:
            print('  !!XMLSyntaxError-- trying to escape < and > signs')
            try:
                o = self._get_objectified_entity_with_lt_gt(filename)
            except XMLSyntaxError:
                print('  !!Failed loading %s' % filename)
                raise
        return o

    def _create_process(self, filename, exchanges=True):
        """
        Extract dataset object from XML file
        :param filename:
        :return:
        """
        o = self.objectify(filename)

        p = self._create_process_entity(o)
        rf = self._grab_reference_flow(o, spold_reference_flow(filename))
        rx = p.add_reference(rf, 'Output')
        self._print('Identified reference exchange\n %s' % rx)
        if exchanges:
            for exch in self._collect_exchanges(o):
                if exch.value != 0:
                    self._print('Exch %s [%s] (%g)' % (exch.flow, exch.direction, exch.value))
                    p.add_exchange(exch.flow, exch.direction, reference=rx, value=exch.value,
                                   termination=exch.termination)

        return p

    '''
    def _fetch(self, uid, ref_flow=None):
        """
        ecospoldV2 files are named by activityId_referenceFlow - if none is supplied, take the first one found
        matching activityId
        :param uid:
        :param ref_flow:
        :return:
        """
        if ref_flow is not None:
            uid = '_'.join([uid, ref_flow])
        files = self.list_datasets(uid)
        if len(files) == 0:
            return None
        return self._create_process(files[0])
    '''

    def retrieve_or_fetch_entity(self, filename, **kwargs):
        entity = self._get_entity(filename)  # this checks upstream if it exists
        if entity is not None:
            if spold_reference_flow(filename) in [x.flow.get_uuid() for x in entity.reference_entity]:
                return entity
        return self._create_process(filename, **kwargs)

    def retrieve_lcia_scores(self, filename, quantities=None):
        """
        This function retrieves LCIA scores from an Ecospold02 file and stores them as characterizations in
        an LcFlow entity corresponding to the *first* (and presumably, only) reference intermediate flow

        Only stores cfs for quantities that exist locally.
        :param filename:
        :param quantities: list of quantity entities to look for (defaults to self.quantities())
        :return: a dict of quantity uuid to score
        """
        if quantities is None:
            quantities = self.quantities()

        import time
        start_time = time.time()
        print('Loading LCIA results from %s' % filename)
        o = self.objectify(filename)

        self._print('%30.30s -- %5f' % ('Objectified', time.time() - start_time))
        p = self._create_process_entity(o)
        rf = self._grab_reference_flow(o, spold_reference_flow(filename))

        exch = ExchangeValue(p, rf, 'Output', value=1.0)

        tags = dict()
        for q in quantities:
            if 'Method' in q.keys():
                if q['Name'] in tags:
                    raise KeyError('Name collision %s' % q['Name'])
                tags[q['Name']] = q

        results = LciaResults(p)

        for char in find_tag(o, 'flowData')[0].getchildren():
            if 'impactIndicator' in char.tag:
                m = char.impactMethodName.text
                c = char.impactCategoryName.text
                i = char.name.text
                v = float(char.get('amount'))
                my_tag = ', '.join([m, c, i])
                if my_tag in tags:
                    q = tags[my_tag]
                    result = LciaResult(q)
                    cf = Characterization(rf, q, value=v, location=p['SpatialScope'])
                    result.add_score(p.get_uuid(), exch, cf, p['SpatialScope'])
                    results[q.get_uuid()] = result

        self._print('%30.30s -- %5f' % ('Impact scores collected', time.time() - start_time))

        return results

    def _load_all(self, exchanges=True):
        now = time()
        count = 0
        for k in self.list_datasets():
            self.retrieve_or_fetch_entity(k, exchanges=exchanges)
            count += 1
            if count % 100 == 0:
                print(' Loaded %d processes (t=%.2f s)' % (count, time()-now))

        print(' Loaded %d processes (t=%.2f s)' % (count, time() - now))
        self.check_counter()

Example #4

Show file

File: ecospold.py Project: bkuczenski/lca-tools

class EcospoldV1Archive(NsUuidArchive):
    """
    Create an Ecospold Archive object from a path.  By default, assumes the path points to a literal
    .7z file, of the type that one can download from the ecoinvent website.  Creates an accessor for
    files in that archive and allows a user to
    """

    nsmap = 'http://www.EcoInvent.org/EcoSpold01'  # only valid for v1 ecospold files
    spold_version = tail.search(nsmap).groups()[0]

    def __init__(self, ref, prefix=None, **kwargs):
        """
        Just instantiates the parent class.
        :param ref: just a reference
        :param prefix: difference between the internal path (ref) and the ILCD base
        :return:
        """
        super(EcospoldV1Archive, self).__init__(ref, **kwargs)
        self.internal_prefix = prefix
        self._q_dict = dict()
        self._archive = Archive(self.ref)

    def _build_prefix(self):
        path = ''
        if self.internal_prefix is not None:
            path = os.path.join(self.internal_prefix, path)
        return path

    def list_datasets(self):
        assert self._archive.remote is False, "Cannot list objects for remote archives"
        return self._archive.listfiles(in_prefix=self._build_prefix())

    def _fetch_filename(self, filename):
        return self._archive.readfile(filename)

    def _get_objectified_entity(self, filename):
        o = objectify.fromstring(self._archive.readfile(filename))
        if o.nsmap[None] != self.nsmap:
            raise EcospoldVersionError('This class is for EcoSpold v%s only!' % self.nsmap[-2:])
        return o

    def _create_quantity(self, unitstring):
        """
        In ecospold v1, quantities are only units, defined by string
        :param unitstring:
        :return:
        """
        if unitstring in self._q_dict:
            q = self._q_dict[unitstring]
        else:
            ref_unit, _ = self._create_unit(unitstring)
            uid = self._key_to_id(unitstring)

            q = LcQuantity(uid, Name='EcoSpold Quantity %s' % unitstring,
                           ReferenceUnit=ref_unit, Comment=self.spold_version)
            q.set_external_ref(unitstring)
            self.add(q)
            self._q_dict[unitstring] = q

        return q

    def _create_flow(self, exch):
        """
        An ecospold01 exchange is really just a long attribute list, plus an inputGroup or outputGroup (ignored here)
        :param exch:
        :return:
        """
        number = int(exch.get('number'))
        uid = self._key_to_id(number)
        try_f = self[uid]
        if try_f is not None:
            f = try_f
            assert f.entity_type == 'flow', "Expected flow, found %s" % f.entity_type

        else:
            # generate flow
            n = exch.get("name")
            q = self._create_quantity(exch.get("unit"))
            c = not_none(exch.get("generalComment"))
            cas = not_none(exch.get("CASNumber"))
            cat = [exch.get('category'), exch.get('subCategory')]

            f = LcFlow(uid, Name=n, CasNumber=cas, Comment=c, Compartment=cat)
            f.add_characterization(q, reference=True)
            f.set_external_ref(number)
            self.add(f)

        if exch.get("unit") != f.unit():
            local_q = self._create_quantity(exch.get("unit"))
            if not f.has_characterization(local_q):
                if (f.unit(), local_q.unit()) not in conversion_dict:
                    print('Flow %s needs characterization for unit %s' % (f, local_q))
                    val = parse_math(input('Enter conversion factor 1 %s = x %s' % (f.unit(), local_q)))
                else:
                    val = conversion_dict[(f.unit(), local_q.unit())]
                f.add_characterization(local_q, value=val)
        return f

    def _create_process(self, filename):
        """
        Extract dataset object from XML file
        :param filename:
        :return:
        """
        o = self._get_objectified_entity(filename)

        rf = None  # reference flow
        flowlist = []

        for exch in o.dataset.flowData.getchildren():
            f = self._create_flow(exch)
            if hasattr(exch, 'outputGroup'):
                d = 'Output'
                if exch.outputGroup == 0:
                    assert rf is None, "Multiple reference flows found!"
                    rf = f
            elif hasattr(exch, 'inputGroup'):
                d = 'Input'
            else:
                raise DirectionlessExchangeError
            local_q = self._create_quantity(exch.get("unit"))
            v = float(exch.get('meanValue'))  # returns none if missing
            if local_q is not f.reference_entity:
                v = v / f.cf(local_q)
            flowlist.append((f, d, v))

        p_meta = o.dataset.metaInformation.processInformation
        n = p_meta.referenceFunction.get('name')

        u = self._key_to_id(n)

        try_p = self[u]
        if try_p is not None:
            p = try_p
            assert p.entity_type == 'process', "Expected process, found %s" % p.entity_type

        else:
            # create new process
            g = p_meta.geography.get('location')
            stt = {'begin': str(find_tag(p_meta, 'startDate')[0]), 'end': str(find_tag(p_meta, 'endDate')[0])}

            c = p_meta.referenceFunction.get('generalComment')

            cls = [p_meta.referenceFunction.get('category'), p_meta.referenceFunction.get('subCategory')]
            p = LcProcess(u, Name=n, Comment=c, SpatialScope=g, TemporalScope=stt,
                          Classifications=cls)
            p.set_external_ref(n)

            if rf is None:
                rx = None
            else:
                rx = p.add_reference(rf, 'Output')
            for flow, f_dir, val in flowlist:
                self._print('Exch %s [%s] (%g)' % (flow, f_dir, val))
                p.add_exchange(flow, f_dir, reference=None, value=val, add_dups=True)

            self.add(p)

        return p

    def _fetch(self, uid, **kwargs):
        """
        Nothing to do here-- if it's not found, it needs to be loaded
        :param uid:
        :param kwargs:
        :return:
        """
        print('No way to fetch by UUID. Loading all processes...')
        self.load_all()

    def _load_all(self):
        """
        No need to "fetch" with ecospold v1, since UUIDs are not known in advance.
        Instead, just load all the processes at once.
        :return:
        """
        for k in self.list_datasets():
            self._create_process(k)
        self.check_counter('quantity')
        self.check_counter('flow')
        self.check_counter('process')

    def serialize(self, **kwargs):
        j = super(EcospoldV1Archive, self).serialize(**kwargs)
        if self.internal_prefix is not None:
            j['prefix'] = self.internal_prefix
        return j

Example #5

Show file

File: ilcd.py Project: bkuczenski/lca-tools

class IlcdArchive(LcArchive):
    """
    This class handles de-referencing for ILCD archives
    """

    def __init__(self, ref, prefix=None, **kwargs):
        """
        Just instantiates the parent class.
        :param ref: root of the archive
        :param prefix: difference between the internal path (ref) and the ILCD base
          (note: for local archives, this defaults to 'ILCD'; for remote arcnives it
           defaults to empty)
        :param quiet: forwarded to ArchiveInterface
        :return:
        """
        super(IlcdArchive, self).__init__(ref, **kwargs)
        self.internal_prefix = prefix
        if prefix is not None:
            self._serialize_dict['prefix'] = prefix

        self._archive = Archive(self.ref)

        if not self._archive.OK:
            print('Trying local ELCD reference')
            self._archive = Archive(elcd3_local_fallback)
        if not self._archive.OK:
            print('Falling back to ELCD Remote Reference')
            self._archive = Archive(elcd3_remote_fallback, query_string='format=xml')

        if self._archive.compressed or self._archive.remote:
            self._pathtype = posixpath
        else:
            self._pathtype = os.path

    def _build_prefix(self):
        if self._archive.remote:
            path = ''
        else:
            path = 'ILCD'
        if self.internal_prefix is not None:
            path = self._pathtype.join(self.internal_prefix, path)
        return path

    def _de_prefix(self, file):
        return re.sub('^' + self._pathtype.join(self._build_prefix(), ''), '', file)

    def _path_from_parts(self, dtype, uid, version=None):
        """
        aka 'path from parts'
        :param dtype: required
        :param uid: required
        :param version: optional [None]
        :return: a single (prefixed) path
        """
        assert _check_dtype(dtype)
        postpath = self._pathtype.join(self._build_prefix(), typeDirs[dtype], uid)
        if version is not None:
            postpath += '_' + version
        return postpath + '.xml'

    def _path_from_search(self, search_result):
        return self._pathtype.join(self._build_prefix(), search_result)

    def search_by_id(self, uid, dtype=None):
        return [i for i in self.list_objects(dtype=dtype) if re.search(uid, i, flags=re.IGNORECASE)]

    def list_objects(self, dtype=None):
        assert self._archive.remote is False, "Cannot list objects for remote archives"
        in_prefix = self._build_prefix()
        if dtype is not None:
            assert _check_dtype(dtype)
            in_prefix = self._pathtype.join(in_prefix, typeDirs[dtype])
        return [self._de_prefix(f) for f in self._archive.listfiles(in_prefix=in_prefix)]

    def _fetch_filename(self, filename):
        return self._archive.readfile(filename)

    def _check_or_retrieve_child(self, uid, uri):
        child = self._get_entity(uid)
        if child is None:
            dtype = _extract_dtype(uri, self._pathtype)
            child = self.retrieve_or_fetch_entity(uid, dtype=dtype)
        return child

    def _get_objectified_entity(self, filename):
        return objectify.fromstring(self._fetch_filename(filename))

    def _search_for_term(self, term, dtype=None):
        search_results = self.search_by_id(term, dtype=dtype)
        if len(search_results) > 0:
            self._print('Found Results:')
            [print(i) for i in search_results]
            if len(search_results) > 1:
                print('Please refine search')
                return None
            result = self._path_from_search(search_results[0])
            dtype = _extract_dtype(result, self._pathtype)
            if dtype is None:
                raise ValueError('Search result with no matching dtype')
            return self.objectify(result, dtype=dtype)
        print('No results.')
        return None

    def objectify(self, term, dtype=None, version=None):
        if dtype is None:
            return self._search_for_term(term)

        try:
            uid = _extract_uuid(term)
        except AttributeError:
            # can't find UUID: search is required
            return self._search_for_term(term, dtype=dtype)

        # if we get here, uid is valid and dtype is valid
        entity = self._get_entity(uid)
        if entity is not None:
            return entity

        try:
            # if we are a search result, this will succeed
            o = self._get_objectified_entity(self._path_from_search(term))
        except (KeyError, FileNotFoundError):
            # we are not a search result-- let's build the entity path
            o = self._get_objectified_entity(self._path_from_parts(dtype, uid, version=version))

        return o

    def _create_unit(self, unit_ref):
        """
        UnitGroups aren't stored as full-fledged entities- they are stored as dicts inside quantities.
        :param unit_ref:
        :return:
        """
        dtype = _extract_dtype(unit_ref, self._pathtype)
        uid = _extract_uuid(unit_ref)
        filename = self._path_from_parts(dtype, uid)
        o = self._get_objectified_entity(filename)

        ns = find_ns(o.nsmap, 'UnitGroup')

        u = str(find_common(o, 'UUID')[0])
        reference_unit = int(find_tag(o, 'referenceToReferenceUnit', ns=ns)[0])
        unitstring = str(o['units'].getchildren()[reference_unit]['name'])
        ref_unit = LcUnit(unitstring, unit_uuid=u)
        ref_unit.set_external_ref('%s/%s' % (typeDirs['UnitGroup'], u))

        unitconv = dict()
        for i in o['units'].getchildren():
            unitconv[str(i['name'])] = 1.0 / float(i['meanValue'])
        return ref_unit, unitconv

    def _create_quantity(self, o):
        """

        :param o: objectified FlowProperty
        :return:
        """
        ns = find_ns(o.nsmap, 'FlowProperty')

        u = str(find_common(o, 'UUID')[0])
        n = str(find_common(o, 'name')[0])

        c = str(find_common(o, 'generalComment')[0])

        ug, ug_uri = get_reference_unit_group(o, ns=ns)

        ug_path = self._pathtype.join('unitgroups', ug)  # need the path without extension- I know- it's all sloppy

        refunit, unitconv = self._create_unit(ug_path)

        q = LcQuantity(u, Name=n, ReferenceUnit=refunit, UnitConversion=unitconv, Comment=c)
        q.set_external_ref('%s/%s' % (typeDirs['FlowProperty'], u))

        self.add(q)

        return q

    @staticmethod
    def _create_dummy_flow_from_exch(uid, exch):
        n = str(find_common(exch, 'shortDescription')[0])
        print('Creating DUMMY flow (%s) with name %s' % (uid, n))
        return LcFlow(uid, Name=n, Comment='Dummy flow (HTTP or XML error)', Compartment=['dummy flows'])

    def _create_flow(self, o):
        """

        :param o: objectified flow
        :return: an LcFlow
        """
        ns = find_ns(o.nsmap, 'Flow')
        n = grab_flow_name(o, ns=ns)

        u = str(find_common(o, 'UUID')[0])

        c = str(find_common(o, 'generalComment')[0])

        cas = str(find_tag(o, 'CASNumber', ns=ns)[0])

        cat = find_common(o, 'category')
        if cat == ['']:
            cat = find_common(o, 'class')
        cat = [str(i) for i in cat]

        f = LcFlow(u, Name=n, CasNumber=cas, Comment=c, Compartment=cat)
        f.set_external_ref('%s/%s' % (typeDirs['Flow'], u))

        ref_to_ref = get_reference_flow_property_id(o, ns=ns)
        for fp in o['flowProperties'].getchildren():
            if int(fp.attrib['dataSetInternalID']) == ref_to_ref:
                is_ref = True
            else:
                is_ref = False
            val = float(find_tag(fp, 'meanValue', ns=ns)[0])

            ref = find_tag(fp, 'referenceToFlowPropertyDataSet', ns=ns)[0]
            rfp_uuid = ref.attrib['refObjectId']
            rfp_uri = ref.attrib['uri']

            try:
                q = self._check_or_retrieve_child(rfp_uuid, rfp_uri)
            except (HTTPError, XMLSyntaxError, KeyError):
                continue

            try:
                f.add_characterization(q, reference=is_ref, value=val)
            except DuplicateCharacterizationError:
                print('Duplicate Characterization in entity %s\n %s = %g' % (u, q, val))
                # let it go

        try:
            self.add(f)
        except KeyError:
            print('Found duplicate entity %s' % u)
            raise
        return f

    def _create_process_entity(self, o, ns):
        u = str(find_common(o, 'UUID')[0])
        n = ', '.join(chain(filter(len, [str(find_tag(o, k, ns=ns)[0])
                                         for k in ('baseName',
                                                   'treatmentStandardsRoutes',
                                                   'mixAndLocationTypes',
                                                   'functionalUnitFlowProperties')])))

        g = find_tag(o, 'locationOfOperationSupplyOrProduction', ns=ns)[0].attrib['location']

        stt = {'begin': str(find_common(o, 'referenceYear')[0]), 'end': str(find_common(o, 'dataSetValidUntil')[0])}

        c = str(find_common(o, 'generalComment')[0])

        cls = [str(i) for i in find_common(o, 'class')]

        p = LcProcess(u, Name=n, Comment=c, SpatialScope=g, TemporalScope=stt,
                      Classifications=cls)
        self.add(p)

        p.set_external_ref('%s/%s' % (typeDirs['Process'], u))

        return p

    def _create_process(self, o):
        """

        :param o: objectified process
        :return:
        """
        ns = find_ns(o.nsmap, 'Process')

        try:
            rf, rf_uri, rf_dir = get_reference_flow(o, ns=ns)
        except XMLSyntaxError:
            rf = None
            rf_dir = None

        exch_list = []

        for exch in o['exchanges'].getchildren():
            # load all child flows
            f_id, f_uri, f_dir = get_flow_ref(exch, ns=ns)
            try:
                f = self._check_or_retrieve_child(f_id, f_uri)
            except (HTTPError, XMLSyntaxError, KeyError):
                u = str(find_common(o, 'UUID')[0])
                print('In UUID %s:' % u)
                f = self._create_dummy_flow_from_exch(f_id, exch)
                self.add(f)
            v = get_exch_value(exch, ns=ns)
            exch_list.append((f, f_dir, v))

        p = self._create_process_entity(o, ns)

        for flow, f_dir, val in exch_list:
            if rf == flow.get_uuid() and rf_dir == f_dir:
                p.add_reference(flow, f_dir)
            p.add_exchange(flow, f_dir, reference=None, value=val,
                           add_dups=True)  # add_dups: poor quality control on ELCD

        return p

    def _fetch(self, term, dtype=None, version=None):
        """
        fetch an object from the archive by reference.

        term is either: a uid and a dtype (and optional version) OR a filename
        dtype MUST be specified as kwarg for remote archives; otherwise will search
        :param term:
        :return:
        """
        if dtype is None:
            dtype = _extract_dtype(term, self._pathtype)

        o = self.objectify(term, dtype=dtype, version=version)
        if o is None:
            return None

        if dtype is None:
            dtype = _dtype_from_nsmap(o.nsmap)

        if dtype == 'Flow':
            try:
                return self._create_flow(o)
            except KeyError:
                print('KeyError on term %s dtype %s version %s'% (term, dtype, version))
        elif dtype == 'Process':
            return self._create_process(o)
        elif dtype == 'FlowProperty':
            return self._create_quantity(o)
        else:
            return o

    def _load_all(self):
        for i in self.list_objects('Process'):
            self.retrieve_or_fetch_entity(i)
        self.check_counter('quantity')
        self.check_counter('flow')
        self.check_counter('process')