def backup_project_directory(project): """Backup project data directory to a ``.tar.gz`` (compressed tar archive). ``project`` is the name of a project. Backup archive is saved to the user's home directory. Restoration is done using ``restore_project_directory``. Returns the filepath of the backup archive.""" if project not in projects: raise ValueError("Project {} does not exist".format(project)) fp = os.path.join( os.path.expanduser("~"), "brightway2-project-{}-backup.{}.tar.gz".format( project, datetime.datetime.now().strftime("%d-%B-%Y-%I-%M%p")), ) dir_path = os.path.join(projects._base_data_dir, safe_filename(project)) with open(os.path.join(dir_path, ".project-name.json"), "w") as f: json.dump({"name": project}, f) print("Creating project backup archive - this could take a few minutes...") with tarfile.open(fp, "w:gz") as tar: tar.add(dir_path, arcname=safe_filename(project))
def delete_project(self, name=None, delete_dir=False): """Delete project ``name``, or the current project. ``name`` is the project to delete. If ``name`` is not provided, delete the current project. By default, the underlying project directory is not deleted; only the project name is removed from the list of active projects. If ``delete_dir`` is ``True``, then also delete the project directory. If deleting the current project, this function sets the current directory to ``default`` if it exists, or to a random project. Returns the current project.""" victim = name or self.current if victim not in self: raise ValueError("{} is not a project".format(victim)) if len(self) == 1: raise ValueError("Can't delete only remaining project") ProjectDataset.delete().where(ProjectDataset.name == victim).execute() if delete_dir: dir_path = self._base_data_dir / safe_filename(victim) assert dir_path.is_dir(), "Can't find project directory" shutil.rmtree(dir_path) if name is None or name == self.current: if "default" in self: self.set_current("default") else: self.set_current(next(iter(self)).name) return self.current
def copy_project(self, new_name, switch=True): """Copy current project to a new project named ``new_name``. If ``switch``, switch to new project.""" if new_name in self: raise ValueError("Project {} already exists".format(new_name)) fp = self._base_data_dir / safe_filename(new_name, full=self.dataset.full_hash) if fp.exists(): raise ValueError("Project directory already exists") project_data = ProjectDataset.get( ProjectDataset.name == self.current).data ProjectDataset.create(data=project_data, name=new_name, full_hash=self.dataset.full_hash) shutil.copytree(self.dir, fp, ignore=lambda x, y: ["write-lock"]) create_dir(self._base_logs_dir / safe_filename(new_name)) if switch: self.set_current(new_name)
def purge_deleted_directories(self): """Delete project directories for projects which are no longer registered. Returns number of directories deleted.""" registered = {safe_filename(obj.name) for obj in self} bad_directories = [ self._base_data_dir / dirname for dirname in os.listdir(self._base_data_dir) if (self._base_data_dir / dirname).is_dir() and dirname not in registered ] for fp in bad_directories: shutil.rmtree(fp) return len(bad_directories)
def write_lci_excel(database_name, objs=None, sections=None): """Export database `database_name` to an Excel spreadsheet. Not all data can be exported. The following constraints apply: * Nested data, e.g. `{'foo': {'bar': 'baz'}}` are excluded. Spreadsheets are not a great format for nested data. However, *tuples* are exported, and the characters `::` are used to join elements of the tuple. * The only well-supported data types are strings, numbers, and booleans. Returns the filepath of the exported file. """ safe_name = safe_filename(database_name, False) filepath = os.path.join(projects.output_dir, "lci-" + safe_name + ".xlsx") workbook = xlsxwriter.Workbook(filepath) bold = workbook.add_format({"bold": True}) bold.set_font_size(12) highlighted = { "Activity", "Database", "Exchanges", "Parameters", "Database parameters", "Project parameters", } frmt = lambda x: bold if row[0] in highlighted else None sheet = workbook.add_worksheet(create_valid_worksheet_name(database_name)) data = CSVFormatter(database_name, objs).get_formatted_data(sections) for row_index, row in enumerate(data): for col_index, value in enumerate(row): if value is None: continue elif isinstance(value, numbers.Number): sheet.write_number(row_index, col_index, value, frmt(value)) else: try: sheet.write_string(row_index, col_index, value, frmt(value)) except TypeError: pass workbook.close() return filepath
def write_lcia_matching(db, name): """Write matched and unmatched CFs to Excel file""" def write_headers(sheet, row): columns = ('Name', 'Amount', 'Unit', 'Categories', 'Matched') for index, col in enumerate(columns): sheet.write_string(row, index, col, bold) def write_row(sheet, row, data): sheet.write_string(row, 0, data.get('name', '(unknown)')) sheet.write_number(row, 1, data.get('amount', -1)) sheet.write_string(row, 2, data.get('unit', '(unknown)')) sheet.write_string(row, 3, u":".join(data.get('categories', ['(unknown)']))) sheet.write_boolean(row, 4, 'input' in data) safe_name = safe_filename(name, False) filepath = os.path.join(projects.output_dir, "lcia-matching-" + safe_name + ".xlsx") workbook = xlsxwriter.Workbook(filepath) bold = workbook.add_format({'bold': True}) bold.set_font_size(12) sheet = workbook.add_worksheet('matching') sheet.set_column('A:A', 60) sheet.set_column('B:B', 12) sheet.set_column('C:C', 12) sheet.set_column('D:D', 40) row = 0 for ds in db: for index, elem in enumerate(ds['name']): sheet.write_string(row, index, elem, bold) write_headers(sheet, row + 1) row += 2 for cf in sorted(ds.get('exchanges', []), key=lambda x: x.get('name')): write_row(sheet, row, cf) row += 1 row += 1 workbook.close() return filepath
def write_lcia_matching(db, name): """Write matched and unmatched CFs to Excel file""" def write_headers(sheet, row): columns = ("Name", "Amount", "Unit", "Categories", "Matched") for index, col in enumerate(columns): sheet.write_string(row, index, col, bold) def write_row(sheet, row, data): sheet.write_string(row, 0, data.get("name", "(unknown)")) sheet.write_number(row, 1, data.get("amount", -1)) sheet.write_string(row, 2, data.get("unit", "(unknown)")) sheet.write_string(row, 3, u":".join(data.get("categories", ["(unknown)"]))) sheet.write_boolean(row, 4, "input" in data) safe_name = safe_filename(name, False) filepath = os.path.join(projects.output_dir, "lcia-matching-" + safe_name + ".xlsx") workbook = xlsxwriter.Workbook(filepath) bold = workbook.add_format({"bold": True}) bold.set_font_size(12) sheet = workbook.add_worksheet("matching") sheet.set_column("A:A", 60) sheet.set_column("B:B", 12) sheet.set_column("C:C", 12) sheet.set_column("D:D", 40) row = 0 for ds in db: for index, elem in enumerate(ds["name"]): sheet.write_string(row, index, elem, bold) write_headers(sheet, row + 1) row += 2 for cf in sorted(ds.get("exchanges", []), key=lambda x: x.get("name")): write_row(sheet, row, cf) row += 1 row += 1 workbook.close() return filepath
def write_lci_csv(database_name, objs=None, sections=None): """Export database `database_name` to a CSV file. Not all data can be exported. The following constraints apply: * Nested data, e.g. `{'foo': {'bar': 'baz'}}` are excluded. CSV is not a great format for nested data. However, *tuples* are exported, and the characters `::` are used to join elements of the tuple. * The only well-supported data types are strings, numbers, and booleans. Returns the filepath of the exported file. """ data = CSVFormatter(database_name, objs).get_formatted_data(sections) safe_name = safe_filename(database_name, False) filepath = os.path.join(projects.output_dir, "lci-" + safe_name + ".csv") with open(filepath, "w", newline='') as f: writer = csv.writer(f) for line in data: writer.writerow(line) return filepath
def abbreviate(names, length=8): """Take a tuple or list, and construct a string, doing the following: First, apply :func:`.filesystem.safe_filename` to each element in ``names``. Next, take the following, in order: * The first word of the first element in names, lower-cased, where word is defined as everything up to the first empty space character. * Join the rest of the first element (i.e. after the first word) with all other elements. Use the empty space character to join. * In this long string separated by spaces, take the lowercase first character of each word. Add the first word to this new string. * Finally, add a dash, and then the MD5 hash of the entire identifier, where each element is joined by a dash character. ``('ReCiPe Endpoint (E,A)', 'human health', 'ionising radiation')`` becomes ``'recipee(hhir-70eeef20a20deb6347ad428e3f6c5f3c'``. The MD5 hash is needed because taking the first characters doesn't guarantee unique strings. """ safe_names = [safe_filename(x, False) for x in names] abbrev = lambda x: x if x[0] in string.digits else x[0].lower() name = " ".join(safe_names).split(" ")[0].lower() + "".join( [abbrev(x) for x in " ".join(safe_names).split(" ")[1:]] ) return name + "." + str(hashlib.md5(("-".join(names)).encode("utf-8")).hexdigest())
def export_objs(cls, objs, filename, folder="export", backwards_compatible=False): """Export a list of objects. Can have heterogeneous types. Args: * *objs* (list): List of objects to export. * *filename* (str): Name of file to create. * *folder* (str, optional): Folder to create file in. Default is ``export``. * *backwards_compatible* (bool, optional): Create package compatible with bw2data version 1. Returns: Filepath of created file. """ filepath = os.path.join(projects.request_directory(folder), safe_filename(filename) + u".bw2package") cls._write_file( filepath, [cls._prepare_obj(o, backwards_compatible) for o in objs]) return filepath
def lci_matrices_to_excel(database_name, include_descendants=True): """Fake docstring""" from bw2calc import LCA print("Starting Excel export. This can be slow for large matrices!") safe_name = safe_filename(database_name, False) filepath = os.path.join(projects.output_dir, safe_name + ".xlsx") lca = LCA({Database(database_name).random(): 1}) lca.load_lci_data() lca.fix_dictionaries() if not include_descendants: lca.activity_dict = { key: value for key, value in lca.activity_dict.items() if key[0] == database_name } # Drop biosphere flows with zero references # TODO: This will ignore (-1 + 1 = 0) references lca.biosphere_dict = { key: value for key, value in lca.biosphere_dict.items() if lca.biosphere_matrix[lca.biosphere_dict[key], :].sum() != 0 } workbook = xlsxwriter.Workbook(filepath) bold = workbook.add_format({'bold': True}) print("Sorting objects") sorted_activity_keys = sorted([(Database.get(key).get("name") or u"Unknown", key) for key in lca.activity_dict]) sorted_product_keys = sorted([(Database.get(key).get("name") or u"Unknown", key) for key in lca.product_dict]) sorted_bio_keys = sorted([(Database.get(key).get("name") or u"Unknown", key) for key in lca.biosphere_dict]) tm_sheet = workbook.add_worksheet('technosphere') tm_sheet.set_column('A:A', 50) data = Database(database_name).load() # Labels for index, data in enumerate(sorted_activity_keys): tm_sheet.write_string(0, index + 1, data[0]) for index, data in enumerate(sorted_product_keys): tm_sheet.write_string(index + 1, 0, data[0]) print("Entering technosphere matrix data") coo = lca.technosphere_matrix.tocoo() # Translate row index to sorted product index act_dict = {obj[1]: idx for idx, obj in enumerate(sorted_activity_keys)} pro_dict = {obj[1]: idx for idx, obj in enumerate(sorted_product_keys)} bio_dict = {obj[1]: idx for idx, obj in enumerate(sorted_bio_keys)} pro_lookup = {v: pro_dict[k] for k, v in lca.product_dict.items()} bio_lookup = {v: bio_dict[k] for k, v in lca.biosphere_dict.items()} act_lookup = {v: act_dict[k] for k, v in lca.activity_dict.items()} # Matrix values for row, col, value in zip(coo.row, coo.col, coo.data): tm_sheet.write_number(pro_lookup[row] + 1, act_lookup[col] + 1, value) bm_sheet = workbook.add_worksheet('biosphere') bm_sheet.set_column('A:A', 50) data = Database(database_name).load() # Labels for index, data in enumerate(sorted_activity_keys): bm_sheet.write_string(0, index + 1, data[0]) for index, data in enumerate(sorted_bio_keys): bm_sheet.write_string(index + 1, 0, data[0]) print("Entering biosphere matrix data") coo = lca.biosphere_matrix.tocoo() # Matrix values for row, col, value in zip(coo.row, coo.col, coo.data): bm_sheet.write_number(bio_lookup[row] + 1, act_lookup[col] + 1, value) COLUMNS = (u"Index", u"Name", u"Reference product", u"Unit", u"Categories", u"Location") tech_sheet = workbook.add_worksheet('technosphere-labels') tech_sheet.set_column('B:B', 60) tech_sheet.set_column('C:C', 30) tech_sheet.set_column('D:D', 15) tech_sheet.set_column('E:E', 30) print("Writing metadata") # Header for index, col in enumerate(COLUMNS): tech_sheet.write_string(0, index, col, bold) tech_sheet.write_comment( 'C1', "Only for ecoinvent 3, where names =/= products.", ) for index, data in enumerate(sorted_activity_keys): obj = Database.get(data[1]) tech_sheet.write_number(index + 1, 0, index + 1) tech_sheet.write_string(index + 1, 1, obj.get(u'name') or u'Unknown') tech_sheet.write_string(index + 1, 2, obj.get(u'reference product') or u'') tech_sheet.write_string(index + 1, 3, obj.get(u'unit') or u'Unknown') tech_sheet.write_string(index + 1, 4, u" - ".join(obj.get(u'categories') or [])) tech_sheet.write_string(index + 1, 5, obj.get(u'location') or u'Unknown') COLUMNS = ( u"Index", u"Name", u"Unit", u"Categories", ) bio_sheet = workbook.add_worksheet('biosphere-labels') bio_sheet.set_column('B:B', 60) bio_sheet.set_column('C:C', 15) bio_sheet.set_column('D:D', 30) # Header for index, col in enumerate(COLUMNS): bio_sheet.write_string(0, index, col, bold) for index, data in enumerate(sorted_bio_keys): obj = Database.get(data[1]) bio_sheet.write_number(index + 1, 0, index + 1) bio_sheet.write_string(index + 1, 1, obj.get(u'name') or u'Unknown') bio_sheet.write_string(index + 1, 2, obj.get(u'unit') or u'Unknown') bio_sheet.write_string(index + 1, 3, u" - ".join(obj.get(u'categories') or [])) workbook.close() return filepath
def write_lci_matching(db, database_name, only_unlinked=False, only_activity_names=False): """Write matched and unmatched exchanges to Excel file""" def write_headers(sheet, row): columns = ('Name', 'Reference Product', 'Amount', 'Database', 'Unit', 'Categories', 'Location', 'Type', 'Matched') for index, col in enumerate(columns): sheet.write_string(row, index, col, bold) def write_row(sheet, row, data, exc=True): style = highlighted if ('input' not in data and exc) else None if exc: sheet.write_string(row, 0, data.get('name', '(unknown)'), style) sheet.write_string(row, 1, data.get('reference product', '(unknown)'), style) try: sheet.write_number(row, 2, float(data.get('amount')), style) except ValueError: sheet.write_string(row, 2, 'Unknown', style) else: sheet.write_string(row, 0, data.get('name', '(unknown)'), bold) sheet.write_string(row, 3, data.get('input', [''])[0], style) sheet.write_string(row, 4, data.get('unit', '(unknown)'), style) sheet.write_string(row, 5, u":".join(data.get('categories', ['(unknown)'])), style) sheet.write_string(row, 6, data.get('location', '(unknown)'), style) if exc: sheet.write_string(row, 7, data.get('type', '(unknown)'), style) sheet.write_boolean(row, 8, 'input' in data, style) if only_unlinked and only_activity_names: raise ValueError( "Must choose only one of ``only_unlinked`` and ``only_activity_names``" ) safe_name = safe_filename(database_name, False) suffix = "-unlinked" if only_unlinked else "-names" if only_activity_names else "" filepath = os.path.join(projects.output_dir, "db-matching-" + safe_name + suffix + ".xlsx") workbook = xlsxwriter.Workbook(filepath) bold = workbook.add_format({'bold': True}) highlighted = workbook.add_format({'bg_color': '#FFB5B5'}) bold.set_font_size(12) sheet = workbook.add_worksheet('matching') sheet.set_column('A:A', 60) sheet.set_column('B:B', 12) sheet.set_column('C:C', 12) sheet.set_column('D:D', 20) sheet.set_column('E:E', 40) sheet.set_column('F:F', 12) sheet.set_column('G:G', 12) row = 0 if only_unlinked: unique_unlinked = collections.defaultdict(set) hash_dict = {} for ds in db: for exc in (e for e in ds.get('exchanges', []) if not e.get('input')): ah = activity_hash(exc) unique_unlinked[exc.get('type')].add(ah) hash_dict[ah] = exc for key in sorted(unique_unlinked.keys()): sheet.write_string(row, 0, key, bold) write_headers(sheet, row + 1) row += 2 exchanges = [hash_dict[ah] for ah in unique_unlinked[key]] exchanges.sort( key=lambda x: (x['name'], list(x.get('categories', [])))) for exc in exchanges: write_row(sheet, row, exc) row += 1 row += 1 else: for ds in db: if not ds.get('exchanges'): continue write_row(sheet, row, ds, False) if only_activity_names: row += 1 continue write_headers(sheet, row + 1) row += 2 for exc in sorted(ds.get('exchanges', []), key=lambda x: x.get('name')): write_row(sheet, row, exc) row += 1 row += 1 workbook.close() return filepath
def lci_matrices_to_matlab(database_name): from bw2calc import LCA lca = LCA({Database(database_name).random(): 1}) lca.lci() lca.fix_dictionaries() ra, rp, rb = lca.reverse_dict() safe_name = safe_filename(database_name, False) scipy.io.savemat( os.path.join(projects.output_dir, safe_name + ".mat"), { "technosphere": lca.technosphere_matrix, "biosphere": lca.biosphere_matrix }, ) workbook = xlsxwriter.Workbook(os.path.join(dirpath, safe_name + ".xlsx")) bold = workbook.add_format({"bold": True}) COLUMNS = ("Index", "Name", "Reference product", "Unit", "Categories", "Location") tech_sheet = workbook.add_worksheet("technosphere") tech_sheet.set_column("B:B", 60) tech_sheet.set_column("C:C", 30) tech_sheet.set_column("D:D", 15) tech_sheet.set_column("E:E", 30) # Header for index, col in enumerate(COLUMNS): tech_sheet.write_string(0, index, col, bold) tech_sheet.write_comment( "C1", "Only for ecoinvent 3, where names =/= products.", ) data = Database(database_name).load() for index, key in sorted(ra.items()): tech_sheet.write_number(index + 1, 0, index + 1) tech_sheet.write_string(index + 1, 1, data[key].get("name") or "Unknown") tech_sheet.write_string(index + 1, 2, data[key].get("reference product") or "") tech_sheet.write_string(index + 1, 3, data[key].get("unit") or "Unknown") tech_sheet.write_string(index + 1, 4, " - ".join(data[key].get("categories") or [])) tech_sheet.write_string(index + 1, 5, data[key].get("location") or "Unknown") COLUMNS = ( "Index", "Name", "Unit", "Categories", ) biosphere_dicts = {} bio_sheet = workbook.add_worksheet("biosphere") bio_sheet.set_column("B:B", 60) bio_sheet.set_column("C:C", 15) bio_sheet.set_column("D:D", 30) # Header for index, col in enumerate(COLUMNS): bio_sheet.write_string(0, index, col, bold) for index, key in sorted(rb.items()): if key[0] not in biosphere_dicts: biosphere_dicts[key[0]] = Database(key[0]).load() obj = biosphere_dicts[key[0]][key] bio_sheet.write_number(index + 1, 0, index + 1) bio_sheet.write_string(index + 1, 1, obj.get("name", "Unknown")) bio_sheet.write_string(index + 1, 2, obj.get("unit", "Unknown")) bio_sheet.write_string(index + 1, 3, " - ".join(obj.get("categories", []))) workbook.close() return dirpath
def write_lci_matching(db, database_name, only_unlinked=False, only_activity_names=False): """Write matched and unmatched exchanges to Excel file""" def write_headers(sheet, row): columns = ( "Name", "Reference Product", "Amount", "Database", "Unit", "Categories", "Location", "Type", "Matched", ) for index, col in enumerate(columns): sheet.write_string(row, index, col, bold) def write_row(sheet, row, data, exc=True): style = highlighted if ("input" not in data and exc) else None if exc: sheet.write_string(row, 0, data.get("name", "(unknown)"), style) sheet.write_string(row, 1, data.get("reference product", "(unknown)"), style) try: sheet.write_number(row, 2, float(data.get("amount")), style) except ValueError: sheet.write_string(row, 2, "Unknown", style) else: sheet.write_string(row, 0, data.get("name", "(unknown)"), bold) sheet.write_string(row, 3, data.get("input", [""])[0], style) sheet.write_string(row, 4, data.get("unit", "(unknown)"), style) sheet.write_string(row, 5, u":".join(data.get("categories", ["(unknown)"])), style) sheet.write_string(row, 6, data.get("location", "(unknown)"), style) if exc: sheet.write_string(row, 7, data.get("type", "(unknown)"), style) sheet.write_boolean(row, 8, "input" in data, style) if only_unlinked and only_activity_names: raise ValueError( "Must choose only one of ``only_unlinked`` and ``only_activity_names``" ) safe_name = safe_filename(database_name, False) suffix = "-unlinked" if only_unlinked else "-names" if only_activity_names else "" filepath = os.path.join(projects.output_dir, "db-matching-" + safe_name + suffix + ".xlsx") workbook = xlsxwriter.Workbook(filepath) bold = workbook.add_format({"bold": True}) highlighted = workbook.add_format({"bg_color": "#FFB5B5"}) bold.set_font_size(12) sheet = workbook.add_worksheet("matching") sheet.set_column("A:A", 60) sheet.set_column("B:B", 12) sheet.set_column("C:C", 12) sheet.set_column("D:D", 20) sheet.set_column("E:E", 40) sheet.set_column("F:F", 12) sheet.set_column("G:G", 12) row = 0 if only_unlinked: unique_unlinked = collections.defaultdict(set) hash_dict = {} for ds in db: for exc in (e for e in ds.get("exchanges", []) if not e.get("input")): ah = activity_hash(exc) unique_unlinked[exc.get("type")].add(ah) hash_dict[ah] = exc for key in sorted(unique_unlinked.keys()): sheet.write_string(row, 0, key, bold) write_headers(sheet, row + 1) row += 2 exchanges = [hash_dict[ah] for ah in unique_unlinked[key]] exchanges.sort( key=lambda x: (x["name"], list(x.get("categories", [])))) for exc in exchanges: write_row(sheet, row, exc) row += 1 row += 1 else: for ds in db: if not ds.get("exchanges"): continue write_row(sheet, row, ds, False) if only_activity_names: row += 1 continue write_headers(sheet, row + 1) row += 2 for exc in sorted(ds.get("exchanges", []), key=lambda x: x.get("name")): write_row(sheet, row, exc) row += 1 row += 1 workbook.close() return filepath
def logs_dir(self): return Path(self._base_logs_dir) / safe_filename( self.current, full=self.dataset.full_hash)
def filename(self): """Remove filesystem-unsafe characters and perform unicode normalization on ``self.name`` using :func:`.filesystem.safe_filename`.""" return safe_filename(self.name)