Exemple #1
0
    def extract(self):
        def extract_flow_data(o):
            ds = {
                "categories": (
                    o.compartment.compartment.text,
                    o.compartment.subcompartment.text,
                ),
                "code":
                o.get("id"),
                "CAS number":
                o.get("casNumber"),
                "name":
                o.name.text,
                "database":
                self.db_name,
                "exchanges": [],
                "unit":
                o.unitName.text,
            }
            ds["type"] = EMISSIONS_CATEGORIES.get(ds["categories"][0],
                                                  ds["categories"][0])
            return ds

        lci_dirpath = os.path.join(os.path.dirname(__file__), "..", "data",
                                   "lci")

        fp = os.path.join(lci_dirpath, "ecoinvent elementary flows 3.7.xml")
        root = objectify.parse(open(fp, encoding="utf-8")).getroot()
        flow_data = recursive_str_to_unicode(
            [extract_flow_data(ds) for ds in root.iterchildren()])

        previous = os.path.join(lci_dirpath, "previous elementary flows.json")
        return flow_data + json.load(open(previous))
Exemple #2
0
    def extract(cls, path, db_name, use_mp=True):
        data = []
        if os.path.isdir(path):
            filelist = [
                os.path.join(path, filename)
                for filename in os.listdir(path)
                if filename[-4:].lower() == ".xml"
                # Skip SimaPro-specific flow list
                and filename != 'ElementaryFlows.xml'
            ]
        else:
            filelist = [path]

        if not filelist:
            raise OSError("Provided path doesn't appear to have any XML files")

        if sys.version_info < (3, 0):
            use_mp = False

        if use_mp:
            with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
                print("Extracting XML data from {} datasets".format(len(filelist)))
                results = [
                    pool.apply_async(
                        Ecospold1DataExtractor.process_file,
                        args=(x, db_name)
                    ) for x in filelist
                ]
                data = [
                    x
                    for p in results
                    for x in p.get()
                    if x
                ]

        else:
            pbar = pyprind.ProgBar(len(filelist), title="Extracting ecospold1 files:", monitor=True)
            data = []

            for index, filepath in enumerate(filelist):
                for x in cls.process_file(filepath, db_name):
                    if x:
                        data.append(x)

                pbar.update(item_id = filename[:15])

            print(pbar)

        if sys.version_info < (3, 0):
            print("Converting to unicode")
            return recursive_str_to_unicode(data)
        else:
            return data
    def delete_activity (self,activity):
        """A method to delete a flow from database.
        
        Parameters
        ----------

        activity: str

            The flow to be deleted.
        """
        data = self.db.load()
        del data[activity]
        from bw2data.utils import recursive_str_to_unicode
        self.db.write(recursive_str_to_unicode(data))
        self.db.process()
        print ("deleted activity flow: %s" % (str(activity)))
Exemple #4
0
    def extract(cls, dirpath, db_name, use_mp=True):
        assert os.path.exists(dirpath)
        if os.path.isdir(dirpath):
            filelist = [
                filename
                for filename in os.listdir(dirpath)
                if os.path.isfile(os.path.join(dirpath, filename))
                and filename.split(".")[-1].lower() == "spold"
            ]
        elif os.path.isfile(dirpath):
            filelist = [dirpath]
        else:
            raise OSError("Can't understand path {}".format(dirpath))

        if sys.version_info < (3, 0):
            use_mp = False

        if use_mp:
            with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
                print("Extracting XML data from {} datasets".format(len(filelist)))
                results = [
                    pool.apply_async(
                        Ecospold2DataExtractor.extract_activity,
                        args=(dirpath, x, db_name),
                    )
                    for x in filelist
                ]
                data = [p.get() for p in results]
        else:
            pbar = pyprind.ProgBar(
                len(filelist), title="Extracting ecospold2 files:", monitor=True
            )

            data = []
            for index, filename in enumerate(filelist):
                data.append(cls.extract_activity(dirpath, filename, db_name))
                pbar.update(item_id=filename[:15])

            print(pbar)

        if sys.version_info < (3, 0):
            print("Converting to unicode")
            return recursive_str_to_unicode(data)
        else:
            return data
Exemple #5
0
    def add_missing_cfs(self):
        new_flows = []

        for method in self.data:
            for cf in method["exchanges"]:
                if "input" not in cf:
                    cf["code"] = str(uuid.uuid4())
                    new_flows.append(cf)

        new_flows = recursive_str_to_unicode(
            dict([self._format_flow(cf) for cf in new_flows]))

        if new_flows:
            biosphere = Database(self.biosphere_name)
            biosphere_data = biosphere.load()
            biosphere_data.update(new_flows)
            biosphere.write(biosphere_data)

            print(u"Added {} new biosphere flows".format(len(new_flows)))
Exemple #6
0
def _to_unicode(data):
    if sys.version_info < (3, 0):
        return recursive_str_to_unicode(data)
    else:
        return data
Exemple #7
0
    def save_as_bw2_dataset(self,
                            db_name="MP default",
                            unit=None,
                            location=None,
                            categories=[],
                            save_aggregated_inventory=False):
        """Save simplified process to a database.

        Creates database if necessary; otherwise *adds* to existing database. Uses the ``unit`` and ``location`` of ``self.scaling_activities[0]``, if not otherwise provided. Assumes that one unit of the scaling activity is being produced.

        Args:
            * *db_name* (str): Name of Database
            * *unit* (str, optional): Unit of the simplified process
            * *location* (str, optional): Location of the simplified process
            * *categories* (list, optional): Category/ies of the scaling activity
            * *save_aggregated_inventory* (bool, optional): Saves in output minus input style by default (True), otherwise aggregated inventory of all inventories linked within the meta-process

        """
        db = Database(db_name)
        if db_name not in databases:
            db.register()
            data = {}
        else:
            data = db.load()
        # GATHER DATASET INFORMATION
        self.key = (unicode(db_name), unicode(uuid.uuid4().urn[9:]))
        activity = self.scaling_activities[0]
        metadata = Database(activity[0]).load()[activity]
        # unit: if all scaling activities have the same unit, then set a unit, otherwise 'several'
        if self.scaling_activities != 1:
            units_set = set([
                Database(sa[0]).load()[sa].get(u'unit', '')
                for sa in self.scaling_activities
            ])
            if len(units_set) > 1:
                unit = 'several'  # if several units, display nothing
            else:
                unit = units_set.pop()
        # EXCHANGES
        exchanges = []
        if not save_aggregated_inventory:  # save inventory as scaling activities - cuts
            # scaling activities
            for sa in self.scaling_activities:
                exchanges.append({
                    "amount":
                    self.demand[self.mapping[sa]],
                    "input":
                    sa,
                    "type":
                    "biosphere" if sa[0] in (u"biosphere", u"biosphere3") else
                    "technosphere",
                })
            # cuts
            for cut in self.cuts:
                exchanges.append({
                    "amount":
                    -cut[3],
                    "input":
                    cut[0],
                    "type":
                    "biosphere" if cut[0] in (u"biosphere", u"biosphere3") else
                    "technosphere",
                })
        else:  # save aggregated inventory of all processes in chain
            exchanges = [{
                "amount":
                exc[2],
                "input":
                exc[0],
                "type":
                "biosphere" if exc[0][0] in (u"biosphere", u"biosphere3") else
                "technosphere",
            } for exc in self.external_scaled_edges]
        # Production amount
        exchanges.append({
            # Output value unless several outputs, then 1.0
            "amount":
            self.outputs[0][2] if len(self.outputs) == 1 else 1.0,
            "input":
            self.key,
            "type":
            "production"
        })
        # WRITE DATASET INFORMATION
        data[self.key] = {
            "name": self.name,
            "unit": unit or metadata.get(u'unit', ''),
            "location": location or metadata.get(u'location', ''),
            "categories": categories,
            "type": "process",
            "exchanges": exchanges,
        }

        # TODO: Include uncertainty from original databases. Can't just scale
        # uncertainty parameters. Maybe solution is to use "dummy" processes
        # like we want to do to separate inputs of same flow in any case.
        # data = db.relabel_data(data, db_name)
        db.write(recursive_str_to_unicode(data))
        db.process()