Example #1
0
def test_init_df_with_na_unit(test_pd_df, tmpdir):
    # missing values in the unit column are replaced by an empty string
    test_pd_df.loc[1, "unit"] = np.nan
    df = IamDataFrame(test_pd_df)
    assert df.unit == ["", "EJ/yr"]

    # writing to file and importing as pandas returns `nan`, not empty string
    file = tmpdir / "na_unit.csv"
    df.to_csv(file)
    df_csv = pd.read_csv(file)
    assert np.isnan(df_csv.loc[1, "Unit"])
    IamDataFrame(file)  # reading from file as IamDataFrame works

    file = tmpdir / "na_unit.xlsx"
    df.to_excel(file)
    df_excel = pd.read_excel(file, engine="openpyxl")
    assert np.isnan(df_excel.loc[1, "Unit"])
    IamDataFrame(file)  # reading from file as IamDataFrame works
Example #2
0
def test_init_df_with_na_unit(test_pd_df, tmpdir):
    # missing values in the unit column are replaced by an empty string
    test_pd_df.loc[1, 'unit'] = np.nan
    df = IamDataFrame(test_pd_df)
    assert df.unit == ['', 'EJ/yr']

    # writing to file and importing as pandas returns `nan`, not empty string
    file = tmpdir / 'na_unit.csv'
    df.to_csv(file)
    df_csv = pd.read_csv(file)
    assert np.isnan(df_csv.loc[1, 'Unit'])
    IamDataFrame(file)  # reading from file as IamDataFrame works

    file = tmpdir / 'na_unit.xlsx'
    df.to_excel(file)
    df_excel = pd.read_excel(file)
    assert np.isnan(df_excel.loc[1, 'Unit'])
    IamDataFrame(file)  # reading from file as IamDataFrame works
Example #3
0
def make_excel(ifile, ofile, scenario):

    if ifile is None:
        raise "You did not specify the input file, remember to use '-i' option"
        print(
            "Use as :\n	python DB_to_Excel.py -i <input_file> (Optional -o <output_excel_file_name_only>)\n	Use -h for help."
        )
        sys.exit(2)
    else:
        file_type = re.search(
            r"(\w+)\.(\w+)\b",
            ifile)  # Extract the input filename and extension
    if not file_type:
        print("The file type %s is not recognized. Use a db file." % ifile)
        sys.exit(2)
    if ofile is None:
        ofile = file_type.group(1)
        print("Look for output in %s_*.xls" % ofile)

    con = sqlite3.connect(ifile)
    cur = con.cursor(
    )  # a database cursor is a control structure that enables traversal over the records in a database
    con.text_factory = str  #this ensures data is explored with the correct UTF-8 encoding
    scenario = scenario.pop()
    writer = pd.ExcelWriter(
        ofile + '.xlsx',
        engine='xlsxwriter',
        engine_kwargs={'options': {
            'strings_to_formulas': False
        }})

    workbook = writer.book

    header_format = workbook.add_format({
        'bold': True,
        'text_wrap': True,
        'align': 'left',
    })

    query = "SELECT DISTINCT Efficiency.regions, Efficiency.tech, technologies.sector FROM Efficiency \
	INNER JOIN technologies ON Efficiency.tech=technologies.tech"

    all_techs = pd.read_sql_query(query, con)

    query = "SELECT regions, tech, sector, t_periods, capacity FROM Output_CapacityByPeriodAndTech WHERE scenario='" + scenario + "'"
    df_capacity = pd.read_sql_query(query, con)
    for sector in sorted(df_capacity['sector'].unique()):
        df_capacity_sector = df_capacity[df_capacity['sector'] == sector]
        df_capacity_sector = df_capacity_sector.drop(
            columns=['sector']).pivot_table(values='capacity',
                                            index=['regions', 'tech'],
                                            columns='t_periods')
        df_capacity_sector.reset_index(inplace=True)
        sector_techs = all_techs[all_techs['sector'] == sector]
        df_capacity_sector = pd.merge(sector_techs[['regions', 'tech']],
                                      df_capacity_sector,
                                      on=['regions', 'tech'],
                                      how='left')
        df_capacity_sector.rename(columns={
            'regions': 'Region',
            'tech': 'Technology'
        },
                                  inplace=True)
        df_capacity_sector.to_excel(writer,
                                    sheet_name='Capacity_' + sector,
                                    index=False,
                                    encoding='utf-8',
                                    startrow=1,
                                    header=False)
        worksheet = writer.sheets['Capacity_' + sector]
        worksheet.set_column('A:A', 10)
        worksheet.set_column('B:B', 10)
        for col, val in enumerate(df_capacity_sector.columns.values):
            worksheet.write(0, col, val, header_format)

    query = "SELECT regions, tech, sector, t_periods, sum(vflow_out) as vflow_out FROM Output_VFlow_Out WHERE scenario='" + scenario + "' GROUP BY \
	regions, tech, sector, t_periods"

    df_activity = pd.read_sql_query(query, con)
    for sector in sorted(df_activity['sector'].unique()):
        df_activity_sector = df_activity[df_activity['sector'] == sector]
        df_activity_sector = df_activity_sector.drop(
            columns=['sector']).pivot_table(values='vflow_out',
                                            index=['regions', 'tech'],
                                            columns='t_periods')
        df_activity_sector.reset_index(inplace=True)
        sector_techs = all_techs[all_techs['sector'] == sector]
        df_activity_sector = pd.merge(sector_techs[['regions', 'tech']],
                                      df_activity_sector,
                                      on=['regions', 'tech'],
                                      how='left')
        df_activity_sector.rename(columns={
            'regions': 'Region',
            'tech': 'Technology'
        },
                                  inplace=True)
        df_activity_sector.to_excel(writer,
                                    sheet_name='Activity_' + sector,
                                    index=False,
                                    encoding='utf-8',
                                    startrow=1,
                                    header=False)
        worksheet = writer.sheets['Activity_' + sector]
        worksheet.set_column('A:A', 10)
        worksheet.set_column('B:B', 10)
        for col, val in enumerate(df_activity_sector.columns.values):
            worksheet.write(0, col, val, header_format)

    query = "SELECT DISTINCT EmissionActivity.regions, EmissionActivity.tech, EmissionActivity.emis_comm as emissions_comm, technologies.sector FROM EmissionActivity \
	INNER JOIN technologies ON EmissionActivity.tech=technologies.tech"

    all_emis_techs = pd.read_sql_query(query, con)

    query = "SELECT regions, tech, sector, t_periods, emissions_comm, sum(emissions) as emissions FROM Output_Emissions WHERE scenario='" + scenario + "' GROUP BY \
	regions, tech, sector, t_periods, emissions_comm"

    df_emissions_raw = pd.read_sql_query(query, con)
    df_emissions = df_emissions_raw.pivot_table(
        values='emissions',
        index=['regions', 'tech', 'sector', 'emissions_comm'],
        columns='t_periods')
    df_emissions.reset_index(inplace=True)
    df_emissions = pd.merge(all_emis_techs,
                            df_emissions,
                            on=['regions', 'tech', 'sector', 'emissions_comm'],
                            how='left')
    df_emissions.rename(columns={
        'regions': 'Region',
        'tech': 'Technology',
        'emissions_comm': 'Emission Commodity',
        'sector': 'Sector'
    },
                        inplace=True)
    df_emissions.to_excel(writer,
                          sheet_name='Emissions',
                          index=False,
                          encoding='utf-8',
                          startrow=1,
                          header=False)
    worksheet = writer.sheets['Emissions']
    worksheet.set_column('A:A', 10)
    worksheet.set_column('B:B', 10)
    worksheet.set_column('C:C', 10)
    worksheet.set_column('D:D', 20)
    for col, val in enumerate(df_emissions.columns.values):
        worksheet.write(0, col, val, header_format)

    query = "SELECT regions, tech, sector, output_name,  vintage, output_cost FROM Output_Costs WHERE output_name LIKE '%V_Discounted%' AND scenario='" + scenario + "'"
    df_costs = pd.read_sql_query(query, con)
    df_costs.columns = [
        'Region', 'Technology', 'Sector', 'Output Name', 'Vintage', 'Cost'
    ]
    df_costs.to_excel(writer,
                      sheet_name='Costs',
                      index=False,
                      encoding='utf-8',
                      startrow=1,
                      header=False)
    worksheet = writer.sheets['Costs']
    worksheet.set_column('A:A', 10)
    worksheet.set_column('B:B', 10)
    worksheet.set_column('C:C', 10)
    worksheet.set_column('D:D', 30)
    for col, val in enumerate(df_costs.columns.values):
        worksheet.write(0, col, val, header_format)

    writer.save()

    #prepare results for IamDataFrame
    df_emissions_raw['scenario'] = scenario
    df_emissions_raw['unit'] = '?'
    df_emissions_raw['variable'] = 'Emissions|' + df_emissions_raw[
        'emissions_comm'] + '|' + df_emissions_raw['tech']
    df_emissions_raw.rename(columns={
        't_periods': 'year',
        'emissions': 'value',
        'regions': 'region'
    },
                            inplace=True)

    df_capacity['scenario'] = scenario
    df_capacity['unit'] = '?'
    df_capacity['variable'] = 'Capacity|' + df_capacity[
        'sector'] + '|' + df_capacity['tech']
    df_capacity.rename(columns={
        't_periods': 'year',
        'capacity': 'value',
        'regions': 'region'
    },
                       inplace=True)

    df_activity['scenario'] = scenario
    df_activity['unit'] = '?'
    df_activity['variable'] = 'Activity|' + df_activity[
        'sector'] + '|' + df_activity['tech']
    df_activity.rename(columns={
        't_periods': 'year',
        'vflow_out': 'value',
        'regions': 'region'
    },
                       inplace=True)

    # cast results to IamDataFrame and write to xlsx
    columns = ['scenario', 'region', 'variable', 'year', 'value', 'unit']
    _results = pd.concat([
        df_emissions_raw[columns], df_activity[columns], df_capacity[columns]
    ])
    df = IamDataFrame(_results, model='Temoa')

    emiss = df_emissions_raw['emissions_comm'].unique()
    sector = df_capacity['sector'].unique()

    # adding aggregates of emissions for each species
    df.aggregate([f'Emissions|{q}' for q in emiss], append=True)

    # adding aggregates of activity/capacity for each sector
    prod = itertools.product(['Activity', 'Capacity'], sector)
    df.aggregate([f'{t}|{s}' for t, s in prod], append=True)

    # write IamDataFrame to xlsx
    df.to_excel(ofile + '_pyam.xlsx')

    cur.close()
    con.close()