def sector_disaggregation_generalized(fbs, group_cols): """ function to disaggregate sectors if there is only one naics at a lower level works for lower than naics 4 :param df: A FBS df :return: A FBS df with missing naics5 and naics6 """ # load naics 2 to naics 6 crosswalk cw_load = load_sector_length_crosswalk_w_nonnaics() # for loop min length to 6 digits length = min(fbs['Sector'].apply(lambda x: len(x)).unique()) # appends missing naics levels to df for i in range(length, 6): sector_merge = 'NAICS_' + str(i) sector_add = 'NAICS_' + str(i+1) # subset the df by naics length cw = cw_load[[sector_merge, sector_add]] # only keep the rows where there is only one value in sector_add for a value in sector_merge cw = cw.drop_duplicates(subset=[sector_merge], keep=False).reset_index(drop=True) sector_list = cw[sector_merge].values.tolist() # subset df to sectors with length = i and length = i + 1 df_subset = fbs[fbs['Sector'].apply(lambda x: i + 1 >= len(x) >= i)] # create new columns that are length i df_subset = df_subset.assign(Sector_tmp=df_subset['Sector'].apply(lambda x: x[0:i])) # subset the df to the rows where the tmp sector columns are in naics list df_subset = df_subset.loc[df_subset['Sector_tmp'].isin(sector_list)] # drop all rows with duplicate temp values, as a less aggregated naics exists group_cols = [e for e in group_cols if e not in ('Sector')] group_cols.append('Sector_tmp') df_subset2 = df_subset.drop_duplicates(subset=group_cols, keep=False).reset_index(drop=True) # merge the naics cw new_naics = pd.merge(df_subset2, cw[[sector_merge, sector_add]], how='left', left_on=['Sector_tmp'], right_on=[sector_merge]) # add column counting the number of child naics associated with a parent new_naics = new_naics.assign(sector_count=new_naics.groupby(['Location', 'Sector_tmp'])['Sector_tmp'].transform('count')) # only keep the rows where the count is 1 new_naics2 = new_naics[new_naics['sector_count'] == 1] del new_naics2['sector_count'] # issue warning if rows with more than one child naics that get dropped - will need method of estimation missing_naics = new_naics[new_naics['sector_count'] > 1] if len(missing_naics) > 0: missing_naics = missing_naics[['Location', 'Sector']].values.tolist() log.warning('There is data at sector length ' + str(i) + ' that is lost at sector length ' + str(i+1) + ' for ' + str(missing_naics)) new_naics2 = new_naics2.rename(columns={sector_add: "ST"}) new_naics2 = new_naics2.drop(columns=[sector_merge]) # drop columns and rename new sector columns new_naics2 = new_naics2.drop(columns=["Sector", "Sector_tmp"]) new_naics2 = new_naics2.rename(columns={"ST": "Sector"}) # append new naics to df if len(new_naics2) > 1: fbs = pd.concat([fbs, new_naics2], sort=True) return fbs
def return_fba_method_meta(sourcename, **kwargs): """ Return meta for a FlowByActivity method :param sourcename: string, the FlowByActivity sourcename :param kwargs: requires "year" defined :return: meta object """ from flowsa.bibliography import load_source_dict # load info from either a FBA method yaml or the literature yaml fba = load_source_dict(sourcename) # initiate empty dictionary fba_dict = {} # add year if creating an FBA metafile if 'year' in kwargs: fba_dict['data_year'] = kwargs['year'] try: # loop through the FBA yaml and add info for k, v in fba.items(): # include bib_id because this ifno pulled when generating a method bib if k in ('author', 'source_name', 'source_url', 'original_data_download_date', 'date_accessed', 'bib_id'): fba_dict[k] = str(v) except: log.warning('No metadata found for %s', sourcename) fba_dict['meta_data'] = f'No metadata found for {sourcename}' return fba_dict
def map_elementary_flows(fba, from_fba_source, keep_unmapped_rows=False): """ Applies mapping from fedelemflowlist to convert flows to fedelemflowlist flows :param fba: df flow-by-activity or flow-by-sector with 'Flowable', 'Context', and 'Unit' fields :param from_fba_source: str Source name of fba list to look for mappings :param keep_unmapped_rows: False if want unmapped rows dropped, True if want to retain :return: """ from fedelemflowlist import get_flowmapping # rename columns to match FBS formatting fba = fba.rename(columns={ "FlowName": 'Flowable', "Compartment": "Context" }) flowmapping = get_flowmapping(from_fba_source) mapping_fields = [ "SourceListName", "SourceFlowName", "SourceFlowContext", "SourceUnit", "ConversionFactor", "TargetFlowName", "TargetFlowContext", "TargetUnit" ] if flowmapping.empty: log.warning("No mapping file in fedelemflowlist found for " + from_fba_source) # return the original df but with columns renamed so can continue working on the FBS fba_mapped_df = fba.copy() else: flowmapping = flowmapping[mapping_fields] # define merge type based on keeping or dropping unmapped data if keep_unmapped_rows is False: merge_type = 'inner' else: merge_type = 'left' # merge fba with flows fba_mapped_df = pd.merge( fba, flowmapping, left_on=["Flowable", "Context"], right_on=["SourceFlowName", "SourceFlowContext"], how=merge_type) fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Flowable"] = fba_mapped_df["TargetFlowName"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Context"] = fba_mapped_df["TargetFlowContext"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Unit"] = fba_mapped_df["TargetUnit"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "FlowAmount"] = \ fba_mapped_df["FlowAmount"] * fba_mapped_df["ConversionFactor"] # drop fba_mapped_df = fba_mapped_df.drop(columns=mapping_fields) return fba_mapped_df
def check_for_nonetypes_in_sector_col(df): """ Check for NoneType in columns where datatype = string :param df: df with columns where datatype = object :return: warning message if there are NoneTypes """ # if datatypes are strings, return warning message if df['Sector'].isnull().any(): log.warning("There are NoneType values in the 'Sector' column") return df
def map_flows(fba, from_fba_source, flow_type='ELEMENTARY_FLOW', ignore_source_name=False, **kwargs): """ Applies mapping via esupy from fedelemflowlist or material flow list to convert flows to standardized list of flows :param fba: df flow-by-activity or flow-by-sector :param from_fba_source: str Source name of fba list to look for mappings :param flow_type: str either 'ELEMENTARY_FLOW', 'TECHNOSPHERE_FLOW', or 'WASTE_FLOW' :param ignore_source_name: bool, passed to apply_flow_mapping :param kwargs: optional - keep_unmapped_rows: False if want unmapped rows dropped, True if want to retain and keep_fba_columns: boolean, True or False, indicate if want to maintain 'FlowName' and 'Compartment' columns in returned df :return: df, with flows mapped using federal elementary flow list or material flow list """ # prior to mapping elementary flows, ensure all data are in an annual format fba = convert_units_to_annual(fba) keep_unmapped_rows = False # if need to maintain FBA columns, create copies of columns if kwargs != {}: if ('keep_fba_columns' in kwargs) & (kwargs['keep_fba_columns'] is True): fba['Flowable'] = fba['FlowName'] fba['Context'] = fba['Compartment'] # if keep unmapped rows identified in kwargs, then use if 'keep_unmapped_rows' in kwargs: keep_unmapped_rows = kwargs['keep_unmapped_rows'] # else, rename else: fba = fba.rename(columns={ 'FlowName': 'Flowable', 'Compartment': 'Context' }) mapped_df = apply_flow_mapping(fba, from_fba_source, flow_type=flow_type, keep_unmapped_rows=keep_unmapped_rows, ignore_source_name=ignore_source_name) if mapped_df is None or len(mapped_df) == 0: # return the original df but with columns renamed so can continue working on the FBS log.warning("Error in flow mapping") mapped_df = fba.copy() mapped_df['FlowUUID'] = None return mapped_df
def check_if_location_systems_match(df1, df2): """ Check if two dataframes share the same location system :param df1: fba or fbs df :param df2: fba or fbs df :return: """ if df1["LocationSystem"].all() == df2["LocationSystem"].all(): log.info("LocationSystems match") else: log.warning("LocationSystems do not match, might lose county level data")
def map_elementary_flows(fba, from_fba_source): """ Applies mapping from fedelemflowlist to convert flows to fedelemflowlist flows :param fba: df flow-by-activity or flow-by-sector with 'Flowable', 'Context', and 'Unit' fields :param from_fba_source: str Source name of fba list to look for mappings :return: """ from fedelemflowlist import get_flowmapping # rename flow name to flowable - remove this once elementary flows are mapped fba = fba.rename(columns={"FlowName": 'Flowable', "Compartment": "Context"}) flowmapping = get_flowmapping(from_fba_source) mapping_fields = ["SourceListName", "SourceFlowName", "SourceFlowContext", "SourceUnit", "ConversionFactor", "TargetFlowName", "TargetFlowContext", "TargetUnit"] if flowmapping.empty: log.warning("No mapping file in fedelemflowlist found for " + from_fba_source) # return the original df but with columns renamed so can continue working on the FBS fba_mapped_df = fba.copy() else: flowmapping = flowmapping[mapping_fields] # merge fba with flows fba_mapped_df = pd.merge(fba, flowmapping, left_on=["Flowable", "Context"], right_on=["SourceFlowName", "SourceFlowContext"], how="left") fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Flowable"] = fba_mapped_df["TargetFlowName"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Context"] = fba_mapped_df["TargetFlowContext"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Unit"] = fba_mapped_df["TargetUnit"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "FlowAmount"] = \ fba_mapped_df["FlowAmount"] * fba_mapped_df["ConversionFactor"] # drop fba_mapped_df = fba_mapped_df.drop(columns=mapping_fields) return fba_mapped_df
def getMetadata(source, year): """ Use the esupy package functions to return the metadata for a FBA used to generate a FBS :param source: string, FBA source name :param year: string, year of FBA data :param paths: paths as defined in common.py :return: meta object, previously generated FBA meta """ from flowsa.flowbyactivity import set_fba_name name = set_fba_name(source, year) meta = read_source_metadata(paths, set_fb_meta(name, 'FlowByActivity')) if meta is None: log.warning('No metadata found for %s', source) meta = {'source_meta': f'No metadata found for {source} {year}'} return meta
def check_if_data_exists_for_same_geoscales( fba_wsec_walloc, source, activity): # fba_w_aggregated_sectors """ Determine if data exists at the same scales for datasource and allocation source :param source_fba: :param allocation_fba: :return: """ # todo: modify so only returns warning if no value for entire location, not just no value for one of the possible sectors from flowsa.mapping import get_activitytosector_mapping # create list of highest sector level for which there should be data mapping = get_activitytosector_mapping(source) # filter by activity of interest mapping = mapping.loc[mapping['Activity'].isin(activity)] # add sectors to list sectors_list = pd.unique(mapping['Sector']).tolist() # subset fba w sectors and with merged allocation table so only have rows with aggregated sector list df_subset = fba_wsec_walloc.loc[ (fba_wsec_walloc[fbs_activity_fields[0]].isin(sectors_list)) | (fba_wsec_walloc[fbs_activity_fields[1]].isin(sectors_list) )].reset_index(drop=True) # only interested in total flows # df_subset = df_subset.loc[df_subset['FlowName'] == 'total'].reset_index(drop=True) # df_subset = df_subset.loc[df_subset['Compartment'] == 'total'].reset_index(drop=True) # create subset of fba where the allocation data is missing missing_alloc = df_subset.loc[ df_subset['FlowAmountRatio'].isna()].reset_index(drop=True) # drop any rows where source flow value = 0 missing_alloc = missing_alloc.loc[ missing_alloc['FlowAmount'] != 0].reset_index(drop=True) # create list of locations with missing alllocation data states_missing_data = pd.unique(missing_alloc['Location']).tolist() if len(missing_alloc) == 0: log.info("All aggregated sector flows have allocation flow ratio data") else: log.warning("Missing allocation flow ratio data for " + ', '.join(states_missing_data)) return None
def assign_fips_location_system(df, year_of_data): """ Add location system based on year of data. County level FIPS change over the years. :param df: df with FIPS location system :param year_of_data: year of data pulled :return: """ if '2015' <= year_of_data: df.loc[:, 'LocationSystem'] = 'FIPS_2015' elif '2013' <= year_of_data < '2015': df.loc[:, 'LocationSystem'] = 'FIPS_2013' elif '2010' <= year_of_data < '2013': df.loc[:, 'LocationSystem'] = 'FIPS_2010' elif year_of_data < '2010': log.warning( "Missing FIPS codes from crosswalk for " + year_of_data + ". Temporarily assigning to FIPS_2010") df.loc[:, 'LocationSystem'] = 'FIPS_2010' return df
def assign_fips_location_system(df, year_of_data): """ Add location system based on year of data. County level FIPS change over the years. :param df: df with FIPS location system :param year_of_data: str, year of data pulled :return: df, with 'LocationSystem' column values """ if year_of_data >= '2015': df.loc[:, 'LocationSystem'] = 'FIPS_2015' elif '2013' <= year_of_data < '2015': df.loc[:, 'LocationSystem'] = 'FIPS_2013' elif '2010' <= year_of_data < '2013': df.loc[:, 'LocationSystem'] = 'FIPS_2010' elif year_of_data < '2010': log.warning( "Missing FIPS codes from crosswalk for %s. Assigning to FIPS_2010", year_of_data) df.loc[:, 'LocationSystem'] = 'FIPS_2010' return df
def check_for_missing_sector_data(df, target_sector_level): """ Modeled after datachecks.py check_if_losing_sector_data Allocates flow amount equally across child NAICS when parent NAICS is not target_level :param df: :param target_sector_level: :return: """ from flowsa.flowbyfunctions import replace_NoneType_with_empty_cells, replace_strings_with_NoneType # temporarily replace null values with empty cells df = replace_NoneType_with_empty_cells(df) activity_field = "SectorProducedBy" rows_lost = pd.DataFrame() cw_load = load_sector_length_crosswalk_w_nonnaics() for i in range(3, sector_level_key[target_sector_level]): # create df of i length df_subset = df.loc[df[activity_field].apply(lambda x: len(x) == i)] # import cw and subset to current sector length and target sector length nlength = list(sector_level_key.keys())[list( sector_level_key.values()).index(i)] cw = cw_load[[nlength, target_sector_level]].drop_duplicates() # add column with counts cw['sector_count'] = cw.groupby(nlength)[nlength].transform('count') # merge df & replace sector produced columns df_x = pd.merge(df_subset, cw, how='left', left_on=[activity_field], right_on=[nlength]) df_x[activity_field] = df_x[target_sector_level] df_x = df_x.drop(columns=[nlength, target_sector_level]) # calculate new flow amounts, based on sector count, allocating equally to the new sector length codes df_x['FlowAmount'] = df_x['FlowAmount'] / df_x['sector_count'] df_x = df_x.drop(columns=['sector_count']) # replace null values with empty cells df_x = replace_NoneType_with_empty_cells(df_x) # append to df sector_list = df_subset[activity_field].drop_duplicates() if len(df_x) != 0: log.warning('Data found at ' + str(i) + ' digit NAICS to be allocated' ': {}'.format(' '.join(map(str, sector_list)))) rows_lost = rows_lost.append(df_x, ignore_index=True, sort=True) if len(rows_lost) == 0: log.info('No data loss from NAICS in dataframe') else: log.info('Allocating FlowAmounts equally to each ' + target_sector_level) # add rows of missing data to the fbs sector subset df_allocated = pd.concat([df, rows_lost], ignore_index=True, sort=True) df_allocated = df_allocated.loc[df_allocated[activity_field].apply( lambda x: len(x) == sector_level_key[target_sector_level])] df_allocated.reset_index(inplace=True) # replace empty cells with NoneType (if dtype is object) df_allocated = replace_strings_with_NoneType(df_allocated) return df_allocated
def check_if_losing_sector_data(df, df_subset, target_sector_level): """ Determine rows of data that will be lost if subset data at target sector level In some instances, not all :param fbs: :return: """ df = df.fillna(fbs_fill_na_dict) # exclude nonsectors df = df.replace({'nan': '', 'None': ''}) rows_lost = pd.DataFrame() for i in range(2, sector_level_key[target_sector_level]): # create df of i length df_x1 = df.loc[ (df[fbs_activity_fields[0]].apply(lambda x: len(x) == i)) & (df[fbs_activity_fields[1]] == '')] df_x2 = df.loc[(df[fbs_activity_fields[0]] == '') & ( df[fbs_activity_fields[1]].apply(lambda x: len(x) == i))] df_x3 = df.loc[ (df[fbs_activity_fields[0]].apply(lambda x: len(x) == i)) & (df[fbs_activity_fields[1]].apply(lambda x: len(x) == i))] df_x = pd.concat([df_x1, df_x2, df_x3], ignore_index=True, sort=False) # create df of i + 1 length df_y1 = df.loc[ df[fbs_activity_fields[0]].apply(lambda x: len(x) == i + 1) | df[fbs_activity_fields[1]].apply(lambda x: len(x) == i + 1)] df_y2 = df.loc[ df[fbs_activity_fields[0]].apply(lambda x: len(x) == i + 1) & df[fbs_activity_fields[1]].apply(lambda x: len(x) == i + 1)] df_y = pd.concat([df_y1, df_y2], ignore_index=True, sort=False) # create temp sector columns in df y, that are i digits in length df_y.loc[:, 'spb_tmp'] = df_y[fbs_activity_fields[0]].apply( lambda x: x[0:i]) df_y.loc[:, 'scb_tmp'] = df_y[fbs_activity_fields[1]].apply( lambda x: x[0:i]) # don't modify household sector lengths df_y = df_y.replace({'F0': 'F010', 'F01': 'F010'}) # merge the two dfs df_m = pd.merge(df_x, df_y[[ 'Class', 'Context', 'FlowType', 'Flowable', 'Location', 'LocationSystem', 'Unit', 'Year', 'spb_tmp', 'scb_tmp' ]], how='left', left_on=[ 'Class', 'Context', 'FlowType', 'Flowable', 'Location', 'LocationSystem', 'Unit', 'Year', 'SectorProducedBy', 'SectorConsumedBy' ], right_on=[ 'Class', 'Context', 'FlowType', 'Flowable', 'Location', 'LocationSystem', 'Unit', 'Year', 'spb_tmp', 'scb_tmp' ]) # extract the rows that are not disaggregated to more specific naics rl = df_m[(df_m['scb_tmp'].isnull()) & (df_m['spb_tmp'].isnull())] # clean df rl = clean_df(rl, flow_by_sector_fields, fbs_fill_na_dict) rl_list = rl[['SectorProducedBy', 'SectorConsumedBy']].drop_duplicates().values.tolist() # match sectors with target sector length sectors # import cw and subset to current sector length and target sector length cw_load = load_sector_length_crosswalk_w_nonnaics() nlength = list(sector_level_key.keys())[list( sector_level_key.values()).index(i)] cw = cw_load[[nlength, target_sector_level]].drop_duplicates() # add column with counts cw['sector_count'] = cw.groupby(nlength)[nlength].transform('count') # merge df & conditionally replace sector produced/consumed columns rl_m = pd.merge(rl, cw, how='left', left_on=[fbs_activity_fields[0]], right_on=[nlength]) rl_m.loc[rl_m[fbs_activity_fields[0]] != '', fbs_activity_fields[0]] = rl_m[target_sector_level] rl_m = rl_m.drop(columns=[nlength, target_sector_level]) rl_m2 = pd.merge(rl_m, cw, how='left', left_on=[fbs_activity_fields[1]], right_on=[nlength]) rl_m2.loc[rl_m2[fbs_activity_fields[1]] != '', fbs_activity_fields[1]] = rl_m2[target_sector_level] rl_m2 = rl_m2.drop(columns=[nlength, target_sector_level]) # create one sector count column rl_m2['sector_count_x'] = rl_m2['sector_count_x'].fillna( rl_m2['sector_count_y']) rl_m3 = rl_m2.rename(columns={'sector_count_x': 'sector_count'}) rl_m3 = rl_m3.drop(columns=['sector_count_y']) # calculate new flow amounts, based on sector count, allocating equally to the new sector length codes rl_m3['FlowAmount'] = rl_m3['FlowAmount'] / rl_m3['sector_count'] rl_m3 = rl_m3.drop(columns=['sector_count']) # append to df if len(rl) != 0: log.warning('Data found at ' + str(i) + ' digit NAICS not represented in current ' 'data subset: {}'.format(' '.join(map(str, rl_list)))) rows_lost = rows_lost.append(rl_m3, ignore_index=True, sort=True) if len(rows_lost) == 0: log.info( 'No data loss from subsetting the dataframe by specified sector length' ) else: log.info('Allocating FlowAmounts equally to each ' + target_sector_level + ' associated with the sectors previously being dropped') # add rows of missing data to the fbs sector subset df_w_lost_data = pd.concat([df_subset, rows_lost], ignore_index=True, sort=True) df_w_lost_data = df_w_lost_data.replace({'': None}) return df_w_lost_data
def check_for_negative_flowamounts(df): if (df['FlowAmount'].values < 0).any(): log.warning('There are negative FlowAmounts') return df
def main(**kwargs): """ Generate FBA parquet(s) :param kwargs: 'source' and 'year' :return: parquet saved to local directory """ # assign arguments if len(kwargs) == 0: kwargs = parse_args() source = kwargs['source'] year = kwargs['year'] # assign yaml parameters (common.py fxn), drop any extensions to FBA # filename if run into error try: config = load_yaml_dict(source, flowbytype='FBA') except UnboundLocalError: log.info(f'Could not find Flow-By-Activity config file for {source}') source = get_flowsa_base_name(sourceconfigpath, source, "yaml") log.info(f'Generating FBA for {source}') config = load_yaml_dict(source, flowbytype='FBA') log.info("Creating dataframe list") # year input can either be sequential years (e.g. 2007-2009) or single year if '-' in str(year): years = str(year).split('-') year_iter = list(range(int(years[0]), int(years[1]) + 1)) else: # Else only a single year defined, create an array of one: year_iter = [year] # check that year(s) are listed in the method yaml, return warning if not years_list = list(set(list(map(int, year_iter)) ).difference(config['years'])) if len(years_list) != 0: log.warning(f'Years not listed in FBA method yaml: {years_list}, ' f'data might not exist') for p_year in year_iter: year = str(p_year) # replace parts of urls with specific instructions from source.py urls = assemble_urls_for_query(source=source, year=year, config=config) # create a list with data from all source urls df_list = call_urls(url_list=urls, source=source, year=year, config=config) # concat the dataframes and parse data with specific # instructions from source.py log.info("Concat dataframe list and parse data") dfs = parse_data(df_list=df_list, source=source, year=year, config=config) if isinstance(dfs, list): for frame in dfs: if not len(frame.index) == 0: try: source_names = frame['SourceName'] source_name = source_names.iloc[0] except KeyError: source_name = source process_data_frame(df=frame, source=source_name, year=year, config=config) else: process_data_frame(df=dfs, source=source, year=year, config=config)
def generate_fbs_bibliography(methodname): """ Generate bibliography for a FlowBySector :param methodname: string, methodname to create a bibliiography :return: a .bib file saved in local directory """ from flowsa.metadata import getMetadata # create list of sources in method sources = generate_list_of_sources_in_fbs_method(methodname) # loop through list of sources, load source method yaml, and create bib entry bib_list = [] source_set = set() for source in sources: # drop list duplicates and any where year is None (because allocation # is a function, not a datasource) if source[1] != 'None': try: config = load_values_from_literature_citations_config()[ source[0]] except KeyError: try: config = getMetadata(source[0], source[1]) except KeyError or AttributeError: log.info('Could not find metadata for %s', source[0]) continue if config is not None: # ensure data sources are not duplicated when different source names try: if (config['source_name'], config['author'], source[1], config['source_url']) not in source_set: source_set.add( (config['source_name'], config['author'], source[1], config['source_url'])) # if there is a date downloaded, use in citation over date generated if 'original_data_download_date' in config: bib_date = config['original_data_download_date'] elif 'date_accessed' in config: bib_date = config['date_accessed'] else: bib_date = config['date_created'] db = BibDatabase() db.entries = [{ 'title': config['source_name'] + ' ' + str(source[1]), 'author': config['author'], 'year': str(source[1]), 'url': config['source_url'], 'urldate': bib_date, 'ID': config['bib_id'] + '_' + str(source[1]), 'ENTRYTYPE': 'misc' }] # append each entry to a list of BibDatabase entries bib_list.append(db) except KeyError: log.warning( 'Missing information needed to create bib for %s, %s', source[0], source[1]) continue # write out bibliography writer = BibTexWriter() # create directory if missing os.makedirs(outputpath + '/Bibliography', exist_ok=True) with open(f'{biboutputpath}{methodname}.bib', 'w') as bibfile: # loop through all entries in bib_list for b in bib_list: bibfile.write(writer.write(b))