def synonyms_from_ecospold_exchange(exch): """ Ecospold exchanges: synonyms are Name, CAS Number, and ', '-separated contents of synonym tags. Care must be taken not to split on ',' as some chemical names include commas :param exch: :return: set of synonyms (stripped) """ syns = set() name = str(exch['name']) syns.add(name) cas = exch.get('casNumber') if cas is not None: syns.add(cas) synonym_tag = find_tag(exch, 'synonym') if len(synonym_tag) == 1: # parse the comma-separated list if bool(re.search('etc\.', str(synonym_tag[0]))): syns.add(str(synonym_tag[0]).strip()) else: for x in str(synonym_tag[0]).split(', '): _add_syn_if(x, syns) else: # multiple entries- allow embedded comma-space for syn in synonym_tag: _add_syn_if(str(syn), syns) return name, syns
def synonyms_from_ilcd_flow(flow): """ ILCD flow files have long synonym blocks at the top. They also have a CAS number and a basename. :param flow: :return: """ ns = find_ns(flow.nsmap, 'Flow') syns = set() name = grab_flow_name(flow, ns=ns) syns.add(name) uid = str(find_common(flow, 'UUID')[0]).strip() syns.add(uid) cas = str(find_tag(flow, 'CASNumber', ns=ns)[0]).strip() if cas != '': syns.add(cas) for syn in find_common(flow, 'synonyms'): for x in str(syn).split(';'): if x.strip() != '' and x.strip().lower() != 'wood': syns.add(x.strip()) return name, syns, uid
def get_ecospold_exchanges(archive=ECOSPOLD, prefix='datasets', file=ES_FILE): E = EcospoldV2Archive(archive, prefix=prefix) o = E.objectify(file) return find_tag(o, 'elementaryExchange')
def _create_process(self, filename): """ Extract dataset object from XML file :param filename: :return: """ o = self._get_objectified_entity(filename) rf = None # reference flow flowlist = [] for exch in o.dataset.flowData.getchildren(): f = self._create_flow(exch) if hasattr(exch, 'outputGroup'): d = 'Output' if exch.outputGroup == 0: assert rf is None, "Multiple reference flows found!" rf = f elif hasattr(exch, 'inputGroup'): d = 'Input' else: raise DirectionlessExchangeError local_q = self._create_quantity(exch.get("unit")) v = float(exch.get('meanValue')) # returns none if missing if local_q is not f.reference_entity: v = v / f.cf(local_q) flowlist.append((f, d, v)) p_meta = o.dataset.metaInformation.processInformation n = p_meta.referenceFunction.get('name') u = self._key_to_id(n) try_p = self[u] if try_p is not None: p = try_p assert p.entity_type == 'process', "Expected process, found %s" % p.entity_type else: # create new process g = p_meta.geography.get('location') stt = {'begin': str(find_tag(p_meta, 'startDate')[0]), 'end': str(find_tag(p_meta, 'endDate')[0])} c = p_meta.referenceFunction.get('generalComment') cls = [p_meta.referenceFunction.get('category'), p_meta.referenceFunction.get('subCategory')] p = LcProcess(u, Name=n, Comment=c, SpatialScope=g, TemporalScope=stt, Classifications=cls) p.set_external_ref(n) if rf is None: rx = None else: rx = p.add_reference(rf, 'Output') for flow, f_dir, val in flowlist: self._print('Exch %s [%s] (%g)' % (flow, f_dir, val)) p.add_exchange(flow, f_dir, reference=None, value=val, add_dups=True) self.add(p) return p