def test_copy(activity):
    assert ExchangeDataset.select().count() == 1
    assert ActivityDataset.select().count() == 1
    cp = activity.copy("baz")
    assert cp['code'] != activity['code']
    assert cp['name'] == 'baz'
    assert cp['location'] == 'bar'
    assert ExchangeDataset.select().count() == 2
    assert ActivityDataset.select().count() == 2
    assert ActivityDataset.select().where(
        ActivityDataset.code == cp['code'],
        ActivityDataset.database == cp['database'],
    ).count() == 1
    assert ActivityDataset.select().where(
        ActivityDataset.code == activity['code'],
        ActivityDataset.database == activity['database'],
    ).count() == 1
    assert ExchangeDataset.select().where(
        ExchangeDataset.input_code == cp['code'],
        ExchangeDataset.input_database == cp['database'],
    ).count() == 1
    assert ExchangeDataset.select().where(
        ExchangeDataset.input_database == activity['database'],
        ExchangeDataset.input_code == activity['code'],
    ).count() == 1
Beispiel #2
0
def handle_code_weirdness(codes: set, dbs: set, struct_db: str) -> dict:
    """Sometimes, we might be working with weird data, where the codes of
    activities no longer match, while the rest of the data absolutely does.

    So, here we takes these codes and original databases and yoink the
    other important data (location, name, product, etc.), using that
    to find a match in the superstructure database.

    The return value is a dictionary where the invalid keys are linked
    to the valid ones.
    """
    query = (AD.select(AD.name, AD.product, AD.location,
                       AD.code).where((AD.code.in_(codes))
                                      & (AD.database.in_(dbs))).tuples())
    combo_dict = {x[:-1]: x[-1] for x in query.iterator()}

    names, products, locations = zip(*combo_dict.keys())
    query = (AD.select(AD.name, AD.product, AD.location,
                       AD.code).where((AD.name.in_(set(names)))
                                      & (AD.product.in_(set(products)))
                                      & (AD.location.in_(set(locations)))
                                      & (AD.database == struct_db)).tuples())
    match_dict = {x[:-1]: x[-1] for x in query.iterator()}
    final = {(struct_db, combo_dict[k]): (struct_db, v)
             for k, v in match_dict.items()}
    return final
def test_copy_with_kwargs(activity):
    assert ExchangeDataset.select().count() == 1
    assert ActivityDataset.select().count() == 1
    cp = activity.copy("baz", location="here", widget="squirt gun")
    assert cp['code'] != activity['code']
    assert cp['name'] == 'baz'
    assert cp['location'] == 'here'
    assert cp['widget'] == 'squirt gun'
    assert ExchangeDataset.select().count() == 2
    assert ActivityDataset.select().count() == 2
Beispiel #4
0
def find_missing_activities(existing_codes: set, delta: str) -> Tuple[set, list]:
    query = (AD.select(AD.code)
             .where(AD.database == delta)
             .distinct()
             .tuples())
    diff = set(x[0] for x in query.iterator()).difference(existing_codes)
    # Now query again, and create a list of Activities of the diff.
    query = (AD.select()
             .where((AD.database == delta) & (AD.code.in_(diff))))
    diff_list = [Activity(x) for x in query.iterator()]
    return diff, diff_list
Beispiel #5
0
def extract_brightway2_databases(database_names):
    """Extract a Brightway2 SQLiteBackend database to the Wurst internal format.

    ``database_names`` is a list of database names. You should already be in the correct project.

    Returns a list of dataset documents."""
    ERROR = "Must pass list of database names"
    assert isinstance(database_names, (list, tuple)), ERROR

    databases = [DatabaseChooser(name) for name in database_names]
    ERROR = "Wrong type of database object (must be SQLiteBackend)"
    assert all(isinstance(obj, SQLiteBackend) for obj in databases), ERROR

    # Construct generators for both activities and exchanges
    # Need to be clever to minimize copying and memory use
    activity_qs = ActivityDataset.select().where(
        ActivityDataset.database << database_names)
    exchange_qs = ExchangeDataset.select().where(
        ExchangeDataset.output_database << database_names)

    # Retrieve all activity data
    print("Getting activity data")
    activities = [extract_activity(o) for o in tqdm(activity_qs)]
    # Add each exchange to the activity list of exchanges
    print("Adding exchange data to activities")
    add_exchanges_to_consumers(activities, exchange_qs)
    # Add details on exchanges which come from our databases
    print("Filling out exchange data")
    add_input_info_for_indigenous_exchanges(activities, database_names)
    add_input_info_for_external_exchanges(activities, database_names)
    return activities
Beispiel #6
0
def select_superstructure_codes(struct: str) -> set:
    query = (AD.select(AD.code)
             .where(AD.database == struct)
             .distinct()
             .tuples())
    codes = set(x[0] for x in query.iterator())
    return codes
Beispiel #7
0
 def _load_groups_sqlite(self):
     """Return dictionary of ``{(name, product): [(location, code)]`` from SQLite3 database"""
     data = defaultdict(list)
     # AD is the ActivityDataset db table (Model in Peewee) imported from bw2data.backends.peewee
     qs = list(AD.select(AD.name, AD.product, AD.location, AD.code).where(
         AD.database == self.db.name).dicts())
     for obj in qs:
         data[(obj['name'], obj['product'])].append((obj['location'], obj['code']))
     return data
Beispiel #8
0
def convert_key_to_fields(df: pd.DataFrame) -> pd.DataFrame:
    """Converts the process fields to its actual key by matching the database.
    """
    keys = set(df.iloc[:, 5])
    dbs, codes = zip(*keys)
    query = (AD.select().where((AD.database.in_(set(dbs)))
                               & (AD.code.in_(set(codes)))).namedtuples())
    key_data = dict(constuct_ad_data(x) for x in query.iterator())
    subdf = pd.DataFrame([key_data[x] for x in df.iloc[:, 5]],
                         columns=df.columns[0:5])
    return subdf
Beispiel #9
0
def data_from_index(index: tuple) -> dict:
    """Take the given 'Index' tuple and build a complete SUPERSTRUCTURE row
    from it.
    """
    from_key, to_key = index[0], index[1]
    from_key, from_data = constuct_ad_data(ActivityDataset.get(database=from_key[0], code=from_key[1]))
    to_key, to_data = constuct_ad_data(ActivityDataset.get(database=to_key[0], code=to_key[1]))
    return {
        "from activity name": from_data[0],
        "from reference product": from_data[1],
        "from location": from_data[2],
        "from categories": from_data[3],
        "from database": from_data[4],
        "from key": from_key,
        "to activity name": to_data[0],
        "to reference product": to_data[1],
        "to location": to_data[1],
        "to categories": to_data[2],
        "to database": to_data[3],
        "to key": to_key,
        "flow type": getattr(index, "flow_type", None),
    }
def get_relevant_flows(df: pd.DataFrame, part: str = "from") -> dict:
    """Determines if all activities from the given 'from' or 'to' chunk"""
    select = FROM_BIOS if part == "from" else TO_BIOS
    sub = df.loc[:, select]
    sub = sub[sub.iloc[:, 2] ==
              bw.config.biosphere]  # Use only biosphere exchanges
    if sub.empty:
        return {}

    names, categories, dbs = sub.iloc[:, 0:3].apply(set, axis=0)
    query = (ActivityDataset.select(
        ActivityDataset.name, ActivityDataset.data, ActivityDataset.database,
        ActivityDataset.code).where((ActivityDataset.name.in_(names)) & (
            ActivityDataset.database.in_(dbs))).namedtuples())
    flows = dict(process_ad_flow(x) for x in query.iterator())
    return flows
Beispiel #11
0
def check_for_invalid_codes(df: pd.DataFrame, struct_db: str) -> set:
    """Check if the given superstructure contains keys for the superstructure
    database that do not exist.

    Return a set of codes where the keys are invalid.
    """
    codes = set(x[1] for x in df["from key"]
                if x[0] == struct_db).union(x[1] for x in df["to key"]
                                            if x[0] == struct_db)
    missing_codes = set()
    query = (AD.select(AD.code).where((AD.code.in_(codes))
                                      & (AD.database == struct_db)).tuples())
    if not len(codes) == query.count():
        # This means not all of the codes exist in the superstructure.
        missing_codes = codes.difference(x[0] for x in query.iterator())
    return missing_codes
def all_flows_found(df: pd.DataFrame, part: str = "from") -> bool:
    """Determines if all activities from the given 'from' or 'to' chunk"""
    select = FROM_BIOS if part == "from" else TO_BIOS
    sub = df.loc[:, select]
    sub = sub[sub.iloc[:, 2] ==
              bw.config.biosphere]  # Use only biosphere exchanges
    if sub.empty:
        return True

    names, categories, dbs = sub.iloc[:, 0:3].apply(set, axis=0)
    query = (ActivityDataset.select(
        ActivityDataset.name, ActivityDataset.data,
        ActivityDataset.database).where((ActivityDataset.name.in_(names)) & (
            ActivityDataset.database.in_(dbs))).tuples())
    matches = set((x[0], x[1]["categories"], x[2]) for x in query.iterator()
                  if "categories" in x[1])
    combinations = sub.iloc[:, 0:3].apply(tuple, axis=1)
    return combinations.isin(matches).all()
def get_relevant_activities(df: pd.DataFrame, part: str = "from") -> dict:
    """Build a dictionary of (name, product, location) -> (database, key) pairs."""
    select = FROM_ACT if part == "from" else TO_ACT
    sub = df.loc[:, select]
    sub = sub[sub.iloc[:, 3] !=
              bw.config.biosphere]  # Exclude biosphere exchanges
    if sub.empty:
        return {}

    names, products, locations, dbs = sub.iloc[:, 0:4].apply(set, axis=0)
    query = (ActivityDataset.select(
        ActivityDataset.name, ActivityDataset.product,
        ActivityDataset.location, ActivityDataset.database,
        ActivityDataset.code).where(
            (ActivityDataset.name.in_(names))
            & (ActivityDataset.product.in_(products))
            & (ActivityDataset.location.in_(locations))
            & (ActivityDataset.database.in_(dbs))).namedtuples())
    activities = dict(process_ad_namedtuple(x) for x in query.iterator())
    return activities
def all_activities_found(df: pd.DataFrame, part: str = "from") -> bool:
    """Determines if all activities from the given 'from' or 'to' chunk"""
    select = FROM_ACT if part == "from" else TO_ACT
    sub = df.loc[:, select]
    sub = sub[sub.iloc[:, 3] !=
              bw.config.biosphere]  # Exclude biosphere exchanges
    if sub.empty:
        return True

    names, products, locations, dbs = sub.iloc[:, 0:4].apply(set, axis=0)
    query = (ActivityDataset.select(
        ActivityDataset.name, ActivityDataset.product,
        ActivityDataset.location).where(
            (ActivityDataset.name.in_(names))
            & (ActivityDataset.product.in_(products))
            & (ActivityDataset.location.in_(locations))
            & (ActivityDataset.database.in_(dbs))).tuples())
    matches = set(query.iterator())
    combinations = sub.iloc[:, 0:3].apply(tuple, axis=1)
    return combinations.isin(matches).all()
Beispiel #15
0
def add_input_info_for_external_exchanges(activities, names):
    """Add details on exchange inputs from other databases"""
    names = set(names)
    cache = {}

    for ds in tqdm(activities):
        for exc in ds['exchanges']:
            if 'input' not in exc or exc['input'][0] in names:
                continue
            if exc['input'] not in cache:
                cache[exc['input']] = ActivityDataset.get(
                    ActivityDataset.database == exc['input'][0],
                    ActivityDataset.code == exc['input'][1],
                )
            obj = cache[exc['input']]
            exc['activity'] = obj.name
            exc['product'] = obj.product
            exc['unit'] = obj.data['unit']
            exc['location'] = obj.location
            if exc['type'] == 'biosphere':
                exc['categories'] = obj.data['categories']
Beispiel #16
0
def add_input_info_for_external_exchanges(activities, names):
    """Add details on exchange inputs from other databases"""
    names = set(names)
    cache = {}

    for ds in tqdm(activities):
        for exc in ds["exchanges"]:
            if "input" not in exc or exc["input"][0] in names:
                continue
            if exc["input"] not in cache:
                cache[exc["input"]] = ActivityDataset.get(
                    ActivityDataset.database == exc["input"][0],
                    ActivityDataset.code == exc["input"][1],
                )
            obj = cache[exc["input"]]
            exc["name"] = obj.name
            exc["product"] = obj.product
            exc["unit"] = obj.data.get("unit")
            exc["location"] = obj.location
            exc["database"] = obj.database
            if exc["type"] == "biosphere":
                exc["categories"] = obj.data.get("categories")
def test_delete(activity):
    assert ExchangeDataset.select().count() == 1
    assert ActivityDataset.select().count() == 1
    activity.delete()
    assert ExchangeDataset.select().count() == 0
    assert ActivityDataset.select().count() == 0
 def output_document_id(self) -> int:
     return ActivityDataset.get(
         ActivityDataset.code == self.output.code,
         ActivityDataset.database == self.output.database).id