Ejemplo n.º 1
0
def handle_code_weirdness(codes: set, dbs: set, struct_db: str) -> dict:
    """Sometimes, we might be working with weird data, where the codes of
    activities no longer match, while the rest of the data absolutely does.

    So, here we takes these codes and original databases and yoink the
    other important data (location, name, product, etc.), using that
    to find a match in the superstructure database.

    The return value is a dictionary where the invalid keys are linked
    to the valid ones.
    """
    query = (AD.select(AD.name, AD.product, AD.location,
                       AD.code).where((AD.code.in_(codes))
                                      & (AD.database.in_(dbs))).tuples())
    combo_dict = {x[:-1]: x[-1] for x in query.iterator()}

    names, products, locations = zip(*combo_dict.keys())
    query = (AD.select(AD.name, AD.product, AD.location,
                       AD.code).where((AD.name.in_(set(names)))
                                      & (AD.product.in_(set(products)))
                                      & (AD.location.in_(set(locations)))
                                      & (AD.database == struct_db)).tuples())
    match_dict = {x[:-1]: x[-1] for x in query.iterator()}
    final = {(struct_db, combo_dict[k]): (struct_db, v)
             for k, v in match_dict.items()}
    return final
Ejemplo n.º 2
0
def test_copy(activity):
    assert ExchangeDataset.select().count() == 1
    assert ActivityDataset.select().count() == 1
    cp = activity.copy("baz")
    assert cp['code'] != activity['code']
    assert cp['name'] == 'baz'
    assert cp['location'] == 'bar'
    assert ExchangeDataset.select().count() == 2
    assert ActivityDataset.select().count() == 2
    assert ActivityDataset.select().where(
        ActivityDataset.code == cp['code'],
        ActivityDataset.database == cp['database'],
    ).count() == 1
    assert ActivityDataset.select().where(
        ActivityDataset.code == activity['code'],
        ActivityDataset.database == activity['database'],
    ).count() == 1
    assert ExchangeDataset.select().where(
        ExchangeDataset.input_code == cp['code'],
        ExchangeDataset.input_database == cp['database'],
    ).count() == 1
    assert ExchangeDataset.select().where(
        ExchangeDataset.input_database == activity['database'],
        ExchangeDataset.input_code == activity['code'],
    ).count() == 1
Ejemplo n.º 3
0
def test_copy_with_kwargs(activity):
    assert ExchangeDataset.select().count() == 1
    assert ActivityDataset.select().count() == 1
    cp = activity.copy("baz", location="here", widget="squirt gun")
    assert cp['code'] != activity['code']
    assert cp['name'] == 'baz'
    assert cp['location'] == 'here'
    assert cp['widget'] == 'squirt gun'
    assert ExchangeDataset.select().count() == 2
    assert ActivityDataset.select().count() == 2
Ejemplo n.º 4
0
def find_missing_activities(existing_codes: set, delta: str) -> Tuple[set, list]:
    query = (AD.select(AD.code)
             .where(AD.database == delta)
             .distinct()
             .tuples())
    diff = set(x[0] for x in query.iterator()).difference(existing_codes)
    # Now query again, and create a list of Activities of the diff.
    query = (AD.select()
             .where((AD.database == delta) & (AD.code.in_(diff))))
    diff_list = [Activity(x) for x in query.iterator()]
    return diff, diff_list
Ejemplo n.º 5
0
def select_superstructure_codes(struct: str) -> set:
    query = (AD.select(AD.code)
             .where(AD.database == struct)
             .distinct()
             .tuples())
    codes = set(x[0] for x in query.iterator())
    return codes
Ejemplo n.º 6
0
def extract_brightway2_databases(database_names):
    """Extract a Brightway2 SQLiteBackend database to the Wurst internal format.

    ``database_names`` is a list of database names. You should already be in the correct project.

    Returns a list of dataset documents."""
    ERROR = "Must pass list of database names"
    assert isinstance(database_names, (list, tuple)), ERROR

    databases = [DatabaseChooser(name) for name in database_names]
    ERROR = "Wrong type of database object (must be SQLiteBackend)"
    assert all(isinstance(obj, SQLiteBackend) for obj in databases), ERROR

    # Construct generators for both activities and exchanges
    # Need to be clever to minimize copying and memory use
    activity_qs = ActivityDataset.select().where(
        ActivityDataset.database << database_names)
    exchange_qs = ExchangeDataset.select().where(
        ExchangeDataset.output_database << database_names)

    # Retrieve all activity data
    print("Getting activity data")
    activities = [extract_activity(o) for o in tqdm(activity_qs)]
    # Add each exchange to the activity list of exchanges
    print("Adding exchange data to activities")
    add_exchanges_to_consumers(activities, exchange_qs)
    # Add details on exchanges which come from our databases
    print("Filling out exchange data")
    add_input_info_for_indigenous_exchanges(activities, database_names)
    add_input_info_for_external_exchanges(activities, database_names)
    return activities
Ejemplo n.º 7
0
 def _load_groups_sqlite(self):
     """Return dictionary of ``{(name, product): [(location, code)]`` from SQLite3 database"""
     data = defaultdict(list)
     # AD is the ActivityDataset db table (Model in Peewee) imported from bw2data.backends.peewee
     qs = list(AD.select(AD.name, AD.product, AD.location, AD.code).where(
         AD.database == self.db.name).dicts())
     for obj in qs:
         data[(obj['name'], obj['product'])].append((obj['location'], obj['code']))
     return data
Ejemplo n.º 8
0
def convert_key_to_fields(df: pd.DataFrame) -> pd.DataFrame:
    """Converts the process fields to its actual key by matching the database.
    """
    keys = set(df.iloc[:, 5])
    dbs, codes = zip(*keys)
    query = (AD.select().where((AD.database.in_(set(dbs)))
                               & (AD.code.in_(set(codes)))).namedtuples())
    key_data = dict(constuct_ad_data(x) for x in query.iterator())
    subdf = pd.DataFrame([key_data[x] for x in df.iloc[:, 5]],
                         columns=df.columns[0:5])
    return subdf
Ejemplo n.º 9
0
def get_relevant_flows(df: pd.DataFrame, part: str = "from") -> dict:
    """Determines if all activities from the given 'from' or 'to' chunk"""
    select = FROM_BIOS if part == "from" else TO_BIOS
    sub = df.loc[:, select]
    sub = sub[sub.iloc[:, 2] ==
              bw.config.biosphere]  # Use only biosphere exchanges
    if sub.empty:
        return {}

    names, categories, dbs = sub.iloc[:, 0:3].apply(set, axis=0)
    query = (ActivityDataset.select(
        ActivityDataset.name, ActivityDataset.data, ActivityDataset.database,
        ActivityDataset.code).where((ActivityDataset.name.in_(names)) & (
            ActivityDataset.database.in_(dbs))).namedtuples())
    flows = dict(process_ad_flow(x) for x in query.iterator())
    return flows
Ejemplo n.º 10
0
def check_for_invalid_codes(df: pd.DataFrame, struct_db: str) -> set:
    """Check if the given superstructure contains keys for the superstructure
    database that do not exist.

    Return a set of codes where the keys are invalid.
    """
    codes = set(x[1] for x in df["from key"]
                if x[0] == struct_db).union(x[1] for x in df["to key"]
                                            if x[0] == struct_db)
    missing_codes = set()
    query = (AD.select(AD.code).where((AD.code.in_(codes))
                                      & (AD.database == struct_db)).tuples())
    if not len(codes) == query.count():
        # This means not all of the codes exist in the superstructure.
        missing_codes = codes.difference(x[0] for x in query.iterator())
    return missing_codes
Ejemplo n.º 11
0
def all_flows_found(df: pd.DataFrame, part: str = "from") -> bool:
    """Determines if all activities from the given 'from' or 'to' chunk"""
    select = FROM_BIOS if part == "from" else TO_BIOS
    sub = df.loc[:, select]
    sub = sub[sub.iloc[:, 2] ==
              bw.config.biosphere]  # Use only biosphere exchanges
    if sub.empty:
        return True

    names, categories, dbs = sub.iloc[:, 0:3].apply(set, axis=0)
    query = (ActivityDataset.select(
        ActivityDataset.name, ActivityDataset.data,
        ActivityDataset.database).where((ActivityDataset.name.in_(names)) & (
            ActivityDataset.database.in_(dbs))).tuples())
    matches = set((x[0], x[1]["categories"], x[2]) for x in query.iterator()
                  if "categories" in x[1])
    combinations = sub.iloc[:, 0:3].apply(tuple, axis=1)
    return combinations.isin(matches).all()
Ejemplo n.º 12
0
def get_relevant_activities(df: pd.DataFrame, part: str = "from") -> dict:
    """Build a dictionary of (name, product, location) -> (database, key) pairs."""
    select = FROM_ACT if part == "from" else TO_ACT
    sub = df.loc[:, select]
    sub = sub[sub.iloc[:, 3] !=
              bw.config.biosphere]  # Exclude biosphere exchanges
    if sub.empty:
        return {}

    names, products, locations, dbs = sub.iloc[:, 0:4].apply(set, axis=0)
    query = (ActivityDataset.select(
        ActivityDataset.name, ActivityDataset.product,
        ActivityDataset.location, ActivityDataset.database,
        ActivityDataset.code).where(
            (ActivityDataset.name.in_(names))
            & (ActivityDataset.product.in_(products))
            & (ActivityDataset.location.in_(locations))
            & (ActivityDataset.database.in_(dbs))).namedtuples())
    activities = dict(process_ad_namedtuple(x) for x in query.iterator())
    return activities
Ejemplo n.º 13
0
def all_activities_found(df: pd.DataFrame, part: str = "from") -> bool:
    """Determines if all activities from the given 'from' or 'to' chunk"""
    select = FROM_ACT if part == "from" else TO_ACT
    sub = df.loc[:, select]
    sub = sub[sub.iloc[:, 3] !=
              bw.config.biosphere]  # Exclude biosphere exchanges
    if sub.empty:
        return True

    names, products, locations, dbs = sub.iloc[:, 0:4].apply(set, axis=0)
    query = (ActivityDataset.select(
        ActivityDataset.name, ActivityDataset.product,
        ActivityDataset.location).where(
            (ActivityDataset.name.in_(names))
            & (ActivityDataset.product.in_(products))
            & (ActivityDataset.location.in_(locations))
            & (ActivityDataset.database.in_(dbs))).tuples())
    matches = set(query.iterator())
    combinations = sub.iloc[:, 0:3].apply(tuple, axis=1)
    return combinations.isin(matches).all()
Ejemplo n.º 14
0
def test_delete(activity):
    assert ExchangeDataset.select().count() == 1
    assert ActivityDataset.select().count() == 1
    activity.delete()
    assert ExchangeDataset.select().count() == 0
    assert ActivityDataset.select().count() == 0