예제 #1
0
파일: eia_annual.py 프로젝트: sonya/eea
def create_consolidated_tables():
    allsources = eia.fossilfuels + eia.elec_sources + \
        eia.nuclear + eia.renewables
    allsectors = ["TC", "AC", "CC", "IC", "RC"] + eia.elec_sectors

    for year in config.STUDY_YEARS:
        strings = {
            "renewables": sqlhelper.set_repr(eia.renewables),
            "elec_sources": sqlhelper.set_repr(eia.elec_sources),
            "elec_sectors": sqlhelper.set_repr(eia.elec_sectors),
            "allsources": sqlhelper.set_repr(allsources),
            "allsectors": sqlhelper.set_repr(allsectors),
            "from_table": "%s.seds_us_%d" % (config.EIA_SCHEMA, year),
            "tablename": "%s.seds_short_%d" % (config.EIA_SCHEMA, year),
            }

        db.execute("DROP TABLE IF EXISTS %(tablename)s CASCADE" % strings)
        db.execute("""
        SELECT source, sector,
               case when sum(use_btu) = 0 then 0
                    else sum(ex) / sum(use_btu) end as price,
               sum(use_btu) as use_btu,
               sum(ex) as ex
          INTO %(tablename)s
          FROM (SELECT case when source in %(renewables)s then 'RE'
                            when source in %(elec_sources)s then 'ES'
                            else source end as source,
                       case when sector in %(elec_sectors)s then 'EI'
                            else sector end as sector,
                       price, use_btu, ex
                  FROM %(from_table)s
                 WHERE source in %(allsources)s
                   AND sector in %(allsectors)s) e
         GROUP by source, sector order by source, sector""" % strings)
예제 #2
0
파일: pcebridge.py 프로젝트: sonya/eea
def create_views():
    test_view = "%s.nipa_groups" % common.config.TEST_SCHEMA

    subqueries = []
    for year in config.STUDY_YEARS:
        subqueries.append(
            """SELECT %d AS year, nipa_group, description
                 FROM %s.nipa_codes_%d""" % (year, config.IO_SCHEMA, year))

    db.execute("CREATE OR REPLACE VIEW %s AS %s"
               % (test_view, "\n UNION\n".join(subqueries)))

    for year in config.STUDY_YEARS:
        strings = {
            "view_name": "%s.nipa_pce_%d" % (config.IO_SCHEMA, year),
            "bridge_table": "%s.pcebridge_%d" % (config.IO_SCHEMA, year),
            "code_table": "%s.nipa_codes_%d" % (config.IO_SCHEMA, year),
            "groups_table": "%s.pce_codes" % config.NIPA_SCHEMA,
            "pa_naics": eia.source_naics_map['PA'][year],
            "pa_trans_naics": eia.source_naics_map['PA-trans'][year],
            "pa_nontrans_naics": eia.source_naics_map['PA-nontrans'][year],
            "tourism_adjustment": bea.tourism_adjustment_codes[year],
            }
    
        # make sure gasoline/motor fuel purchases are allocated
        # among the split commodities we made for petroleum
        #
        # fortunately the group description for gasoline in all years
        # all start with "Gasoline" or "GASOLINE"
        sql = """
CREATE OR REPLACE VIEW %(view_name)s AS
    SELECT commodity, pcegroup,
           cast(sum(prod_val) as float) as value
      FROM (SELECT CASE
                   WHEN UPPER(codes.description) LIKE 'GASOLINE%%'
                    AND commodity = '%(pa_naics)s' THEN '%(pa_trans_naics)s'
                   WHEN commodity = '%(pa_naics)s' THEN '%(pa_nontrans_naics)s'
                   ELSE bridge.commodity END AS commodity,
                   bridge.prod_val,
                   groups.description as pcegroup
              FROM %(bridge_table)s bridge,
                   %(code_table)s codes,
                   %(groups_table)s groups
             WHERE bridge.pce_code = codes.pce_code
               AND bridge.commodity <> '%(tourism_adjustment)s'
               AND codes.nipa_group = groups.code) a
     GROUP BY commodity, pcegroup""" % strings

        db.execute(sql)
예제 #3
0
파일: pcebridge.py 프로젝트: sonya/eea
def create_views():
    test_view = "%s.nipa_groups" % common.config.TEST_SCHEMA

    subqueries = []
    for year in config.STUDY_YEARS:
        subqueries.append("""SELECT %d AS year, nipa_group, description
                 FROM %s.nipa_codes_%d""" % (year, config.IO_SCHEMA, year))

    db.execute("CREATE OR REPLACE VIEW %s AS %s" %
               (test_view, "\n UNION\n".join(subqueries)))

    for year in config.STUDY_YEARS:
        strings = {
            "view_name": "%s.nipa_pce_%d" % (config.IO_SCHEMA, year),
            "bridge_table": "%s.pcebridge_%d" % (config.IO_SCHEMA, year),
            "code_table": "%s.nipa_codes_%d" % (config.IO_SCHEMA, year),
            "groups_table": "%s.pce_codes" % config.NIPA_SCHEMA,
            "pa_naics": eia.source_naics_map['PA'][year],
            "pa_trans_naics": eia.source_naics_map['PA-trans'][year],
            "pa_nontrans_naics": eia.source_naics_map['PA-nontrans'][year],
            "tourism_adjustment": bea.tourism_adjustment_codes[year],
        }

        # make sure gasoline/motor fuel purchases are allocated
        # among the split commodities we made for petroleum
        #
        # fortunately the group description for gasoline in all years
        # all start with "Gasoline" or "GASOLINE"
        sql = """
CREATE OR REPLACE VIEW %(view_name)s AS
    SELECT commodity, pcegroup,
           cast(sum(prod_val) as float) as value
      FROM (SELECT CASE
                   WHEN UPPER(codes.description) LIKE 'GASOLINE%%'
                    AND commodity = '%(pa_naics)s' THEN '%(pa_trans_naics)s'
                   WHEN commodity = '%(pa_naics)s' THEN '%(pa_nontrans_naics)s'
                   ELSE bridge.commodity END AS commodity,
                   bridge.prod_val,
                   groups.description as pcegroup
              FROM %(bridge_table)s bridge,
                   %(code_table)s codes,
                   %(groups_table)s groups
             WHERE bridge.pce_code = codes.pce_code
               AND bridge.commodity <> '%(tourism_adjustment)s'
               AND codes.nipa_group = groups.code) a
     GROUP BY commodity, pcegroup""" % strings

        db.execute(sql)
예제 #4
0
파일: parser.py 프로젝트: sonya/eea
def create_views():
    for year in config.STUDY_YEARS:
        strings = {
            "year": year,
            "schema": config.SCHEMA,
            }
        sql = """CREATE OR REPLACE VIEW %(schema)s.ixi_view_%(year)d AS
            SELECT codes.to_code as from_sector,
                   io.to_sector, sum(io.value) as value
              FROM uk.ixi_%(year)d io,
                   (SELECT DISTINCT from_code, to_code
                      FROM %(schema)s.code_map
                     WHERE from_code is not null
                       AND to_code is not null) codes
             WHERE io.from_sector = codes.from_code
             GROUP BY codes.to_code, io.to_sector""" % strings
        db.execute(sql)
예제 #5
0
파일: parser.py 프로젝트: sonya/eea
def create_views():
    for year in config.STUDY_YEARS:
        strings = {
            "year": year,
            "schema": config.SCHEMA,
        }
        sql = """CREATE OR REPLACE VIEW %(schema)s.ixi_view_%(year)d AS
            SELECT codes.to_code as from_sector,
                   io.to_sector, sum(io.value) as value
              FROM uk.ixi_%(year)d io,
                   (SELECT DISTINCT from_code, to_code
                      FROM %(schema)s.code_map
                     WHERE from_code is not null
                       AND to_code is not null) codes
             WHERE io.from_sector = codes.from_code
             GROUP BY codes.to_code, io.to_sector""" % strings
        db.execute(sql)
예제 #6
0
def create_views():
    years = []
    files = fileutils.getcachecontents("io-annual")
    for filename in files:
        year = is_make(filename)
        if year:
            years.append(year)

    for year in years:
        strings = {
            "make_table": "%s.annual_make_%s" % (config.IO_SCHEMA, year),
            "use_table": "%s.annual_use_%s" % (config.IO_SCHEMA, year),
            "cxc_table": "%s.annual_cxc_%s" % (config.IO_SCHEMA, year),
            }

        db.execute("DROP TABLE %(cxc_table)s" % strings)

        db.execute("""SELECT from_sector, to_sector, SUM(value) AS value
          INTO %(cxc_table)s
          FROM (SELECT use.commodity AS from_sector,
                       indshare.commodity AS to_sector,
                       use.value * indshare.output_share AS value
                  FROM (SELECT make.industry,
                               make.commodity,
                               make.value / indtotal.value AS output_share
                          FROM %(make_table)s make,
                               (SELECT industry, SUM(value) AS value
                                  FROM %(make_table)s
                                 GROUP BY industry) indtotal
                         WHERE make.industry = indtotal.industry) indshare,
                       %(use_table)s use
                 WHERE indshare.industry = use.industry
              UNION
                SELECT use.commodity AS from_sector,
                       use.industry AS to_sector,
                       use.value AS value
                  FROM %(use_table)s use
                 WHERE industry NOT IN (SELECT industry
                                          FROM %(make_table)s make
                                         WHERE commodity = 'TIO')
               ) allocations
         GROUP BY from_sector, to_sector""" % strings)
예제 #7
0
파일: trade_results.py 프로젝트: sonya/eea
def trade_sector_stats(countries, is_export):
    minyear = min(config.STUDY_YEARS)
    maxyear = max(config.STUDY_YEARS)

    strings = {
        "minyear": minyear,
        "maxyear": maxyear,
        "schema": config.WIOD_SCHEMA,
        "is_export": is_export,
        "countries": sqlhelper.set_repr(countries),
        "blacklist": sqlhelper.set_repr(config.bad_data_blacklist),
        }

    if is_export:
        strings["view"] = "export_view"
        strings["is_export_str"] = "true"
    else:
        strings["view"] = "import_view"
        strings["is_export_str"] = "false"

    db.execute("""CREATE OR REPLACE VIEW %(view)s AS
    SELECT year, industry, sum(value) as value
      FROM trade_results
     WHERE is_export is %(is_export_str)s
       AND country IN %(countries)s
       AND country NOT IN %(blacklist)s
     GROUP BY year, industry""" % strings)

    stmt = db.prepare("""
    SELECT a.year, a.industry, a.value, a.value / b.value * 100
      FROM %(view)s a,
           (SELECT year, sum(value) as value
              FROM %(view)s
             GROUP BY year) b
     WHERE a.year in (%(minyear)d, %(maxyear)d)
       AND a.year = b.year""" % strings)

    print()
    print(countries)
    print()
    print_result(stmt(), minyear, maxyear)
예제 #8
0
파일: trade_results.py 프로젝트: sonya/eea
def trade_sector_stats(countries, is_export):
    minyear = min(config.STUDY_YEARS)
    maxyear = max(config.STUDY_YEARS)

    strings = {
        "minyear": minyear,
        "maxyear": maxyear,
        "schema": config.WIOD_SCHEMA,
        "is_export": is_export,
        "countries": sqlhelper.set_repr(countries),
        "blacklist": sqlhelper.set_repr(config.bad_data_blacklist),
    }

    if is_export:
        strings["view"] = "export_view"
        strings["is_export_str"] = "true"
    else:
        strings["view"] = "import_view"
        strings["is_export_str"] = "false"

    db.execute("""CREATE OR REPLACE VIEW %(view)s AS
    SELECT year, industry, sum(value) as value
      FROM trade_results
     WHERE is_export is %(is_export_str)s
       AND country IN %(countries)s
       AND country NOT IN %(blacklist)s
     GROUP BY year, industry""" % strings)

    stmt = db.prepare("""
    SELECT a.year, a.industry, a.value, a.value / b.value * 100
      FROM %(view)s a,
           (SELECT year, sum(value) as value
              FROM %(view)s
             GROUP BY year) b
     WHERE a.year in (%(minyear)d, %(maxyear)d)
       AND a.year = b.year""" % strings)

    print()
    print(countries)
    print()
    print_result(stmt(), minyear, maxyear)
예제 #9
0
def create_hybrid_tables():
    for year in config.STUDY_YEARS:
        strings = {
            # table names
            "eia_table": "%s.seds_us_%d" % (config.EIA_SCHEMA, year),
            "io_table": "%s.transact_view_%d" % (config.IO_SCHEMA, year),
            "map_table": "%s.eia_code_map_%d" % (config.IO_SCHEMA, year),
            "hybrid_table": "hybrid_transactions_%d" % year,

            # selected sector codes
            "pa_trans_code": eia.source_naics_map['PA-trans'][year],
            "pa_nontrans_code": eia.source_naics_map['PA-nontrans'][year],
        }

        runsql("DROP TABLE IF EXISTS %s CASCADE" % strings["hybrid_table"])
        runsql("""
            CREATE TABLE %(hybrid_table)s (
                from_sector varchar(6),
                to_sector varchar(6),
                expenditure float
            )""" % strings)

        for source in eia.sources:
            strings["shares_view"] = "%s_ex_proportions_%s" % (source.lower(),
                                                               year)
            strings["source"] = source
            strings["source_naics"] = eia.source_naics_map[source][year]

            # seds prices are c.i.f., but when allocating btu among user
            # industries it is unfair to assign based on c.i.f. since what
            # they end up using is f.o.b.
            subquery = """
                SELECT codes.eia_sector, sum(io.fob) as fob
                  FROM %(io_table)s io, %(map_table)s codes
                 WHERE codes.eia_source = '%(source)s'
                   AND io.from_sector = '%(source_naics)s'
                   AND io.to_sector = codes.io_sector
                 GROUP BY codes.eia_sector""" % strings

            strings["subquery"] = subquery

            # the price each industry ends up actually paying for energy
            # should be the f.o.b. price which is (fob / cif) of the seds price
            runsql("""
            CREATE OR REPLACE VIEW %(shares_view)s AS
                SELECT io.to_sector, codes.eia_sector,
                       cast(io.fob as float) / cast(io.cif as float) as fob_share,
                       cast(io.fob as float) / cast(totals.fob as float) as ex_share
                  FROM %(io_table)s io, %(map_table)s codes,
                       (%(subquery)s) totals
                 WHERE codes.eia_source = '%(source)s'
                   AND io.from_sector = '%(source_naics)s'
                   AND io.to_sector = codes.io_sector
                   AND totals.eia_sector = codes.eia_sector""" % strings)

            # split petroleum
            if source == 'PA':
                strings["aviation_pa"] = sqlhelper.set_repr(
                    eia.aviation_petroleum)
                strings["other_pa"] = sqlhelper.set_repr(eia.other_petroleum)
                strings["all_pa"] = sqlhelper.set_repr(eia.aviation_petroleum +
                                                       eia.other_petroleum)

                strings["pa_nontrans_view"] = "pa_nontrans_%d" % year
                strings["pa_trans_view"] = "pa_trans_%d" % year
                strings[
                    "pa_trans_shares_view"] = "pa_trans_proportions_%d" % year
                strings["aviation_code"] = eia.air_transportation_codes[year]

                # non transportation petroleum use
                runsql("""
                CREATE OR REPLACE VIEW %(pa_nontrans_view)s AS
                    SELECT shares.to_sector, shares.eia_sector,
                           sum(shares.ex_share * eia.use_btu) as btu,
                           sum(shares.ex_share * eia.use_btu
                               * eia.price * shares.fob_share) as ex
                      FROM %(shares_view)s shares, %(eia_table)s eia
                     WHERE eia.source in %(all_pa)s
                       AND shares.eia_sector = eia.sector
                       -- these two below are double counted
                       AND eia.source || eia.sector not in ('DFEI', 'PCIC')
                       AND eia.sector <> 'AC'
                     GROUP BY shares.to_sector, shares.eia_sector""" % strings)

                # petroleum use for transportation other than air
                runsql("""
                CREATE OR REPLACE VIEW %(pa_trans_view)s AS
                    SELECT io.to_sector, io.fob,
                           io.fob - nontrans.ex as remaining
                      FROM %(io_table)s io, %(pa_nontrans_view)s nontrans
                     WHERE io.from_sector = '%(source_naics)s'
                       AND io.to_sector = nontrans.to_sector
                       -- remaining is negative for IC and EI
                       AND nontrans.eia_sector in ('CC', 'RC')
                   UNION
                    SELECT io.to_sector, io.fob,
                           cast(io.fob as float) as remaining
                      FROM %(io_table)s io, %(map_table)s codes
                     WHERE io.from_sector = '%(source_naics)s'
                       AND io.to_sector = codes.io_sector
                       AND codes.eia_source = 'PA'
                       AND codes.eia_sector = 'AC'
                       AND io.to_sector <> '%(aviation_code)s' """ % strings)

                # proportions for petroleum allocated to transportation
                runsql("""
                CREATE OR REPLACE VIEW %(pa_trans_shares_view)s AS
                    SELECT use.to_sector, use.remaining / total.total as ex_share
                      FROM %(pa_trans_view)s use,
                           (SELECT sum(remaining) as total
                              FROM %(pa_trans_view)s) total""" % strings)

                # allocate all of JF and AV to air transportation
                runsql("""
                INSERT INTO %(hybrid_table)s
                    SELECT '%(pa_trans_code)s', io.to_sector, sum(eia.use_btu)
                      FROM %(io_table)s io,
                           %(eia_table)s eia
                     WHERE eia.source in %(aviation_pa)s
                       and eia.sector = 'AC'
                       and io.from_sector = '%(source_naics)s'
                       and io.to_sector = '%(aviation_code)s'
                     GROUP BY io.to_sector """ % strings)

                # allocate all other transportation
                runsql("""
                INSERT INTO %(hybrid_table)s
                    SELECT '%(pa_trans_code)s',
                           shares.to_sector, sum(shares.ex_share * eia.use_btu)
                      FROM %(pa_trans_shares_view)s shares, %(eia_table)s eia
                     WHERE eia.source in %(other_pa)s
                       AND eia.sector = 'AC'
                     GROUP BY shares.to_sector""" % strings)

                # allocate non-transportation petroleum use
                runsql("""
                INSERT INTO %(hybrid_table)s
                    SELECT '%(pa_nontrans_code)s', to_sector, btu
                      FROM %(pa_nontrans_view)s""" % strings)
                #WHERE eia_sector in ('IC', 'EI')"""

                # dependencies in reverse order
                runsql("DROP VIEW %s" % strings["pa_trans_shares_view"])
                runsql("DROP VIEW %s" % strings["pa_trans_view"])
                runsql("DROP VIEW %s" % strings["pa_nontrans_view"])
                runsql("DROP VIEW %s" % strings["shares_view"])

            else:
                runsql("""
                INSERT INTO %(hybrid_table)s
                    SELECT '%(source_naics)s',
                           shares.to_sector, shares.ex_share * eia.use_btu
                      FROM %(shares_view)s shares, %(eia_table)s eia
                     WHERE eia.source = '%(source)s'
                       AND shares.eia_sector = eia.sector""" % strings)

                runsql("DROP VIEW %s" % strings["shares_view"])

        # insert remainder of standard io table
        energy_sectors = []
        for source in eia.sources:
            energy_sectors.append(eia.source_naics_map[source][year])

        strings["sectors"] = ", ".join(["'%s'" % s for s in energy_sectors])

        db.execute("""
            INSERT INTO %(hybrid_table)s
            SELECT from_sector, to_sector, fob
              FROM %(io_table)s
             WHERE from_sector not in (%(sectors)s)""" % strings)

        # split petroleum column proportional to trans and nontrans uses
        stmt = db.prepare("""
            SELECT trans.use_btu / total.use_btu as trans_share
              FROM (SELECT use_btu FROM %(eia_table)s
                     WHERE source = 'PA' AND sector = 'AC') trans,
                   (SELECT use_btu FROM %(eia_table)s
                     WHERE source = 'PA' AND sector = 'TC') total""" % strings)

        result = stmt()
        if len(result) and len(result[0]):
            strings["pa_naics"] = eia.source_naics_map['PA'][year]
            strings["trans_share"] = result[0][0]
            strings["nontrans_share"] = 1 - result[0][0]

            # transportation petroleum use column
            runsql("""
                INSERT INTO %(hybrid_table)s
                SELECT from_sector, '%(pa_trans_code)s',
                       %(trans_share).4f * expenditure
                  FROM %(hybrid_table)s
                 WHERE to_sector = '%(pa_naics)s' """ % strings)

            # non-transportation petroleum use column
            runsql("""
                UPDATE %(hybrid_table)s
                   SET expenditure = %(nontrans_share).4f * expenditure,
                       to_sector = '%(pa_nontrans_code)s'
                 WHERE to_sector = '%(pa_naics)s' """ % strings)
예제 #10
0
def runsql(sql):
    print(sql)
    db.execute(sql)
예제 #11
0
파일: parser.py 프로젝트: sonya/eea
def create_views():
    va_sectors = set(config.va_sectors.values())
    fd_sectors = set(config.fd_sectors.values())

    for year in config.STUDY_YEARS:
        strings = {
            "test_schema": common.config.TEST_SCHEMA,
            "schema": config.WIOD_SCHEMA,
            "extra_schema": "wiod_plus",
            "year": year,
            "fd_sectors": sqlhelper.set_repr(fd_sectors),
            "va_sectors": sqlhelper.set_repr(va_sectors),
            "margins": sqlhelper.set_repr(config.margin_sectors)
        }

        ### indbyind tables ignoring imports
        db.execute("""CREATE OR REPLACE VIEW %(schema)s.indbyind_%(year)d AS
               SELECT country, from_ind, to_ind, value
                 FROM %(schema)s.niot_%(year)d
                WHERE NOT is_import
             UNION
               SELECT country, from_ind, 'IMP', sum(value)
                 FROM %(schema)s.niot_%(year)d
                WHERE is_import
                GROUP BY country, from_ind""" % strings)

        #continue

        # co2 intensity views
        # put in test since we're just checking results
        sql = """CREATE OR REPLACE VIEW %(test_schema)s.co2_intensity_%(year)d AS
            SELECT a.country, CAST(a.gdp as int) gdp,
                   CAST(b.emissions as int) emissions,
                   b.emissions / a.gdp AS intensity
              FROM (SELECT country, sum(value) AS gdp
                      FROM %(schema)s.indbyind_%(year)d
                     WHERE from_ind not in %(va_sectors)s
                       AND to_ind in %(fd_sectors)s
                     GROUP BY country) a,
                   (SELECT country, value AS emissions
                      FROM %(schema)s.env_%(year)d where industry = 'total'
                       AND measurement = 'CO2') b
             WHERE a.country = b.country
             ORDER BY country""" % strings
        db.execute(sql)

        # commodity output proportions tables for all countries
        sql = """CREATE OR REPLACE VIEW %(schema)s.comshare_%(year)d AS
            SELECT make.country, make.commodity, make.industry,
                   make.value / totals.value AS use_share
              FROM (SELECT country, commodity, industry, value
                      FROM wiod.int_make_%(year)d
                     WHERE commodity not in %(va_sectors)s
                       AND industry not in %(margins)s) make,
                   (SELECT country, commodity, sum(value) as value
                      FROM wiod.int_make_%(year)d
                     WHERE commodity not in %(va_sectors)s
                       AND industry not in %(margins)s
                     GROUP BY country, commodity) totals
             WHERE make.country = totals.country
               AND make.commodity = totals.commodity""" % strings
        db.execute(sql)

        for country in config.countries:
            strings["country"] = country.lower()
            table = "%(extra_schema)s.%(country)s_io_import_%(year)d" % strings
            strings["io_import_table"] = table

            sql = "DROP TABLE IF EXISTS %(io_import_table)s" % strings
            db.execute(sql)

            sql = """SELECT comshare.country,
                            comshare.industry AS from_sector,
                            use.industry AS to_sector,
                            sum(use.value * comshare.use_share) AS value
                       INTO %(io_import_table)s
                       FROM %(schema)s.comshare_%(year)d comshare,
                            (SELECT from_country, industry, commodity, value
                               FROM %(schema)s.int_use_%(year)d
                              WHERE to_country = $1
                                AND from_country <> $1) use
                      WHERE comshare.country = use.from_country
                        AND comshare.commodity = use.commodity
                      GROUP BY comshare.country, comshare.industry,
                            use.industry""" % strings

            print(sql)
            stmt = db.prepare(sql)
            stmt(country)
예제 #12
0
파일: pcebridge.py 프로젝트: sonya/eea
def parse_nipa_data():
    test_view = "%s.nipa_groups" % common.config.TEST_SCHEMA
    db.execute("DROP VIEW IF EXISTS %s" % test_view)

    # get table for pce category harmonization
    trailing_pat = re.compile('(.+) \(.*\d.*\)$')
    
    nipa_code_map = {}
    filename = fileutils.getdatapath("nipa_code_map.csv", "usa")
    fh = open(filename)
    csvf = csv.reader(fh)
    for row in csvf:
        if len(row) == 2:
            harmonized = row[0]
            trailing = trailing_pat.match(harmonized)
            if trailing:
                harmonized = trailing.group(1)
            nipa_code_map[row[1]] = harmonized
    fh.close()
    
    # get nipa series codes from underlying detail tables
    tracker = TableStateTracker()
    tracker.create_table("%s.pce_codes" % config.NIPA_SCHEMA,
                         ["code", "parent", "description"],
                         ["char(7)", "char(7)", "text"],
                         True)

    number_pat = re.compile('^\d+$')
    trailing_pat = re.compile('(.+) \(.*\d.*\)$')
    
    filename = fileutils.getcache("Section2All_underlying.csv", "bea", "nipa")
    fh = open(filename)
    csvf = csv.reader(fh)
    is_in_table = False
    
    code_stack = [None]
    indent_stack = [-1]
    
    # the code mapping has been done such that each item is at least at
    # three levels of disaggregation below the top, i.e. there is always
    # an ancestor at the second level. we only want to keep track of the
    # ancestor at the third level (root is zero)
    # the first level below root has goods and services
    # the second level has durable goods, nondurable goods, and services.
    reverse_code_dict = {}
    second_level_nodes = []
    
    for row in csvf:
        if len(row):
            if not is_in_table:
                if row[0].startswith("Table 2.4.5U"):
                    is_in_table = True
            else:
                if row[0].startswith("Table 2.4.5U"):
                    # we only need to go through one instance of this table
                    break
                else:
                    if number_pat.match(row[0]) and len(row) > 2:
                        title = row[1].lstrip()
    
                        # these are duplicate codes
                        if title.startswith("Market-based PCE"):
                            continue
    
                        code = row[2]
                        current_indent = len(row[1]) - len(title)
    
                        while current_indent <= indent_stack[-1]:
                            indent_stack.pop()
                            code_stack.pop()
    
                        indent_stack.append(current_indent)
                        code_stack.append(code)
    
                        if len(code_stack) > 1:
                            parent = code_stack[-2]
                        else:
                            parent = None
    
                        title = title.strip()
                        trailing = trailing_pat.match(title)
                        if trailing:
                            title = trailing.group(1)
                        
                        if len(code_stack) > 4:
                            reverse_code_dict[title] = code_stack[3]
                        else:
                            reverse_code_dict[title] = code
    
                        tracker.insert_row((code, parent, title))
    
    tracker.flush()
    fh.close()
    
    # table for price deflators
    
    tracker.create_table("%s.implicit_price_deflators" % config.NIPA_SCHEMA,
                         ["year", "gdp", "pce"],
                         ["int", "float", "float"])
    
    filename = fileutils.getcache("Section1all_csv.csv", "bea/nipa")
    fh = open(filename)
    csvf = csv.reader(fh)
    is_in_table = False
    
    data = {} # we need to parse two rows before we can populate
    years = {}
    
    for row in csvf:
        if len(row):
            if not is_in_table:
                if row[0].startswith("Table 1.1.9"):
                    is_in_table = True
            else:
                if row[0].startswith("Table 1.1.9"):
                    # this is seasonally adjusted version of the same table
                    break
                else:
                    if row[0] == "Line":
                        for i in range(len(row)):
                            if number_pat.match(row[i]):
                                year = int(row[i])
                                years[year] = i
                                data[year] = {}
    
                    elif number_pat.match(row[0]) and len(row) > 2:
                        title = row[1].lstrip()
                        if title == "Gross domestic product":
                            column = "gdp"
                        elif title == "Personal consumption expenditures":
                            column = "pce"
                        else:
                            continue
    
                        for (year, colindex) in years.items():
                            data[year][column] = float(row[colindex])
    
    for (year, results) in data.items():
        tracker.insert_row([year, results["gdp"], results["pce"]])
    
    tracker.flush()
    fh.close()
    
    # parse pce bridge
    
    class IONIPAStateTracker(TableStateTracker):
    
        def flush(self):
            TableStateTracker.flush(self)
            if self.fh is not None and not self.fh.closed:
                self.fh.close()
    
        def __init__(self):
            TableStateTracker.__init__(self)
            self.fh = None
            self.code_dict = None
    
            self.value_columns = [
                "prod_val",
                "rail_margin",
                "truck_margin",
                "water_margin",
                "air_margin",
                "pipe_margin",
                "gaspipe_margin",
                "wholesale_margin",
                "retail_margin",
                "purchase_val"
                ]
    
            self.old_style_field_map = {
                "Producers' Value": "prod_val",
                "MfgExciseTax": "prod_val",
                "RailMargin": "rail_margin",
                "TruckMargin": "truck_margin",
                "WaterMargin": "water_margin",
                "AirMargin": "air_margin",
                "PipeMargin": "pipe_margin",
                "WholesaleMargin": "wholesale_margin",
                "WholesaleTax": "wholesale_margin",
                "RetailMargin": "retail_margin",
                "RetailSalesTax": "retail_margin",
                "OtherRetailTax": "retail_margin",
                "Purchasers' Value": "purchase_val",
                }
    
        def set_filename(self, filename):
            path = fileutils.getcache(filename, str(self.year))
            self.filename = path
    
        def set_year(self, year):
            self.flush()
            self.year = year
            tablename = "%s.pcebridge_%d" % (config.IO_SCHEMA, year)
            fields = ["pce_code", "commodity"] + self.value_columns
            types = ["varchar(6)", "varchar(6)"] + \
                ["bigint"]*len(self.value_columns)
            self.create_table(tablename, fields, types)
    
        def setup_for_codes(self):
            self.code_dict = {}
    
        def flush_codes(self):
            if self.code_dict is not None:
                tablename = "%s.nipa_codes_%d" % (config.IO_SCHEMA, self.year)
                self.create_table(tablename,
                                  ["pce_code", "nipa_group", "description"],
                                  ["varchar(6)", "char(7)", "text"])
                for (code, raw_desc) in self.code_dict.items():
    
                    desc = raw_desc
                    if desc.endswith('(s.)') or desc.endswith('(d.)'):
                        desc = desc[:-4].strip()
                    elif desc.endswith('(n.d.)'):
                        desc = desc[:-6].strip()
    
                    if desc in nipa_code_map:
                        desc = nipa_code_map[desc]
    
                    if desc in reverse_code_dict:
                        nipa_code = reverse_code_dict[desc]
                    else:
                        nipa_code = None
                    #self.current_stmt(code, nipa_code, raw_desc)
                    self.table.insert([code, nipa_code, raw_desc])
    
                self.code_dict = None
                self.flush()
    
        def insert_code_row(self, code, desc):
            # workaround for the way excel interprets numbers as floats
            # when we know the codes should be strings
            if type(code) is float:
                code = int(code)
    
            self.code_dict[str(code)] = desc.strip()
    
        def insert_row(self, pce_code, commod, dollar_values, factor=1):
            # workaround for the way excel interprets numbers as floats
            # when we know the codes should be strings
            if type(pce_code) is float:
                pce_code = int(pce_code)
    
            values = [str(pce_code).strip(), commod.strip()]
            for column in self.value_columns:
                if column in dollar_values:
                    if factor == 1:
                        values.append(dollar_values[column])
                    else:
                        values.append(int(float(dollar_values[column]) * factor))
                else:
                    values.append(None)
            #self.current_stmt(*values)
            self.table.insert(values)
    
        def parse_old_style_xls(self, year):
            self.set_year(year)
            self.set_filename("%d_PCE_Commodity.xls" % self.year)
            wb = xlrd.open_workbook(self.filename)
    
            # parse pce bridge data
            sheet = wb.sheet_by_name("%d PCE Workfile - Commodity" % self.year)
            field_indexes = {}
            pce_code_idx = 0
            commod_idx = 2
            for rowindex in range(sheet.nrows):
                row = sheet.row_values(rowindex)
                if len(row) > 1:
                    if "PCE Category" in row:
                        pce_code_idx = row.index("PCE Category")
                        if "Commodity" in row:
                            commod_idx = row.index("Commodity")
                        for i in range(len(row)):
                            xls_col = row[i]
                            if xls_col in self.old_style_field_map:
                                colname = self.old_style_field_map[xls_col]
                                if colname not in field_indexes:
                                    field_indexes[colname] = []
                                field_indexes[colname].append(i)
                    elif len(field_indexes):
                        pce_code = row[pce_code_idx]
                        commod = str(int(row[commod_idx])).rjust(6, "0")
                        values = {}
                        for (field, columns) in field_indexes.items():
                            # doclumentation says units are in 100,000 dollars
                            # but the orders of magnitude don't match up with
                            # later years if we use 100
                            components = [int(float(row[column] * 1000))
                                          for column in columns]
                            value = 0
                            for component in components:
                                value += component
                            values[field] = value
                        self.insert_row(pce_code, commod, values)
    
            # parse codes from neighboring worksheet
            self.setup_for_codes()
            sheet = wb.sheet_by_name("%d PCE Category Descriptions" % self.year)
            code_idx = None
            desc_idx = None
            for rowindex in range(sheet.nrows):
                row = sheet.row_values(rowindex)
                if len(row) > 1:
                    codetab = "PCE Category Code"
                    codetab2 = "%s - %d" % (codetab, self.year)
                    if codetab in row or codetab2 in row:
                        if codetab in row:
                            code_idx = row.index(codetab)
                        else:
                            code_idx = row.index(codetab2)
                        desctab = "PCE Category Description - %d" % self.year
                        if desctab in row:
                            desc_idx = row.index(desctab)
                        else:
                            desctab = "PCE Category Description"
                            if desctab in row:
                                desc_idx = row.index(desctab)
                    elif code_idx is not None and desc_idx is not None:
                        code = row[code_idx]
                        desc = str(row[desc_idx])
                        self.insert_code_row(code, desc)
            self.flush_codes()
    
        def get_file_handle(self, filetype, options={}):
            if filetype == "txt":
                self.fh = open(self.filename)
                return self.fh
            elif filetype == "csv":
                self.fh = open(self.filename)
                if "delim" in options:
                    csvf = csv.reader(self.fh, delimiter=options["delim"])
                else:
                    csvf = csv.reader(self.fh)
                return csvf
            elif filetype == "xls":
                wb = xlrd.open_workbook(self.filename)
                return wb
    
        def parse_text(self, rowcallback):
            path = fileutils.getcache(filename, str(self.year))
            f = open(path)
            for line in f:
                rowcallback(line, this)
            f.close()
    
    tracker = IONIPAStateTracker()
    tracker.parse_old_style_xls(1967)
    tracker.parse_old_style_xls(1972)
    tracker.parse_old_style_xls(1977)
    tracker.parse_old_style_xls(1982)
    
    tracker.set_year(1987)
    tracker.set_filename("tbld-87.dat")
    fh = tracker.get_file_handle("txt")
    for line in fh:
        if len(line) < 103:
            continue
        commod = line[0:6]
        pce_code = line[14:18]
        values = {
            "prod_val": line[21:30],
            "rail_margin": line[30:39],
            "truck_margin": line[39:48],
            "water_margin": line[48:57],
            "air_margin": line[57:66],
            "pipe_margin": line[66:75],
            "wholesale_margin": line[75:84],
            "retail_margin": line[84:93],
            "purchase_val": line[93:102],
            }
        tracker.insert_row(pce_code, commod, values, 1000)
    
    tracker.setup_for_codes()
    tracker.set_filename("io-nipa.doc")
    fh = tracker.get_file_handle("txt")
    for line in fh:
        if len(line) < 27:
            continue
        code = line[0:4].strip()
        desc = line[26:].strip()
        tracker.insert_code_row(code, desc)
    tracker.flush_codes()
    
    tracker.set_year(1992)
    tracker.set_filename("TabD.txt")
    fh = tracker.get_file_handle("csv", {"delim": "\t"})
    for row in fh:
        values = {
            "prod_val": row[4],
            "rail_margin": row[5],
            "truck_margin": row[6],
            "water_margin": row[7],
            "air_margin": row[8],
            "pipe_margin": row[9],
            "gaspipe_margin": row[10],
            "wholesale_margin": row[11],
            "retail_margin": row[12],
            "purchase_val": row[13],
            }
        tracker.insert_row(row[2], row[0], values, 1000)
    
    tracker.setup_for_codes()
    tracker.set_filename("IO-NIPA.txt")
    fh = tracker.get_file_handle("csv", {"delim": "\t"})
    for row in fh:
        code = row[0]
        desc = row[4]
        tracker.insert_code_row(code, desc)
    tracker.flush_codes()
    
    tracker.set_year(1997)
    tracker.set_filename("AppendixC_Detail.txt")
    fh = tracker.get_file_handle("csv", {"delim": ","})
    for row in fh:
        values = {
            "prod_val": row[3],
            "rail_margin": row[4],
            "truck_margin": row[5],
            "water_margin": row[6],
            "air_margin": row[7],
            "pipe_margin": row[8],
            "gaspipe_margin": row[9],
            "wholesale_margin": row[10],
            "retail_margin": row[11],
            "purchase_val": row[12],
            }
        tracker.insert_row(row[1], row[0], values, 1000)
    
    tracker.setup_for_codes()
    tracker.set_filename("IO-NIPA_PCE.txt")
    fh = tracker.get_file_handle("csv", {"delim": ","})
    for row in fh:
        code = row[1]
        desc = row[2]
        tracker.insert_code_row(code, desc)
    tracker.flush_codes()
    
    tracker.set_year(2002)
    tracker.setup_for_codes() # do this simultaneously since it's all one file
    tracker.set_filename("2002_PCE_Bridge.xls")
    wb = tracker.get_file_handle("xls")
    naics_pat = re.compile('[A-Z0-9]{6}')
    sheet = wb.sheet_by_name("PCE_Bridge_Detail")
    pce_codes = []
    for rowindex in range(sheet.nrows):
        row = sheet.row_values(rowindex)
        if len(row) == 13 and naics_pat.match(row[1]):
            pce_desc = row[0]
            # we don't need the distinction between households and
            # nonprofit institutions service households
            parts = pce_desc.split('-')
            if len(parts) > 1:
                lastpart = parts[-1].strip()
                if lastpart == 'HH' or lastpart == 'NPISH':
                    pce_desc = '-'.join(parts[:-1])
            pce_desc = pce_desc.strip()
    
            if pce_desc in pce_codes:
                pce_code = pce_codes.index(pce_desc)
            else:
                pce_code = len(pce_codes)
                pce_codes.append(pce_desc)
                tracker.insert_code_row(str(pce_code), pce_desc)
            
            values = {
                "prod_val": row[3],
                "rail_margin": row[4],
                "truck_margin": row[5],
                "water_margin": row[6],
                "air_margin": row[7],
                "pipe_margin": row[8],
                "gaspipe_margin": row[9],
                "wholesale_margin": row[10],
                "retail_margin": row[11],
                "purchase_val": row[12],
                }
            tracker.insert_row(str(pce_code), row[1], values, 1000)
    
    tracker.flush_codes()
예제 #13
0
def dopostparse():
    for year in config.STUDY_YEARS:
        tablename = config.IO_SCHEMA + ".eia_code_map_" + str(year)
        codetable = config.IO_SCHEMA + ".codes_" + str(year)

        db.execute("DROP TABLE IF EXISTS %s CASCADE" % tablename)
        db.execute("""
    CREATE TABLE %s (
        io_sector char(6),
        eia_source char(2),
        eia_sector char(2)
    );""" % tablename)

        if year <= 1992:
            conditions = eia.sector_naics_map_old
        else:
            conditions = eia.sector_naics_map

        for source in eia.sources:
            # AC prices for natural gas are zero for years prior to 1990
            # but except for petroleum, only the use_btu column is looked at
            # so this mapping still works for NG/AC before 1990.
            cases = [
                "WHEN %s THEN '%s'" %
                (conditions[sector],
                 eia.valid_sectors_by_source[source][sector])
                for sector in ("ind", "res", "elec", "com", "trans")
            ]

            db.execute("""
    INSERT INTO %s
    SELECT code, '%s', CASE
           %s
           ELSE 'IC' END
      FROM %s;
    """ % (tablename, source, "\n       ".join(cases), codetable))

        # create views
        checktotal_parts = []
        checksector_parts = []

        strings = {
            "iotable": "%s.transact_view_%d" % (config.IO_SCHEMA, year),
            "eiatable": "%s.seds_us_%d" % (config.EIA_SCHEMA, year),
            "maptable": "%s.eia_code_map_%d" % (config.IO_SCHEMA, year),
        }

        for (eia_source, io_sectors) in eia.source_naics_map.items():
            if eia_source not in eia.sources:
                # make sure we don't create tables for modified sources
                continue

            strings["io_view"] = "io_%s_%d" % (eia_source.lower(), year)
            strings["from_sector"] = io_sectors[year]
            strings["eia_source"] = eia_source

            runsql("""
    CREATE OR REPLACE VIEW %(io_view)s AS
      SELECT io.to_sector, energy.sector, io.fob, io.cif,
             energy.price * (cast(io.fob as float) / cast(io.cif as float)) as price
        FROM %(iotable)s io,
             %(maptable)s codes,
             %(eiatable)s energy
       WHERE io.from_sector = '%(from_sector)s'
         AND energy.source = '%(eia_source)s'
         AND codes.eia_source = '%(eia_source)s'
         AND io.to_sector = codes.io_sector
         AND energy.sector = codes.eia_sector;
    """ % strings)

            checktotal_parts.append("""
      SELECT eia.source,
             CAST(eia.ex as int) AS eia_ex,
             SUM(io.cif) AS io_ex,
             CAST(eia.use_btu as int) AS eia_btu,
             CAST(SUM(io.fob / io.price) as int) AS io_btu
        FROM %(eiatable)s eia, %(io_view)s io
       WHERE eia.source = '%(eia_source)s'
         AND eia.sector = 'TC'
       GROUP BY eia.source, eia.ex, eia.use_btu
    """ % strings)

            checksector_parts.append("""
      SELECT eia.source, eia.sector,
             CAST(eia.ex as int) AS eia_ex,
             SUM(io.cif) AS io_ex,
             CAST(eia.use_btu as int) AS eia_btu,
             CAST(SUM(io.fob / io.price) as int) AS io_btu
        FROM %(eiatable)s eia, %(io_view)s io
       WHERE eia.source = '%(eia_source)s'
         AND eia.sector = io.sector
         AND eia.sector <> 'TC'
       GROUP BY eia.source, eia.sector, eia.price, eia.ex, eia.use_btu
    """ % strings)

        checktotal_view = "%s.eia_io_totals_%d" % (common.config.TEST_SCHEMA,
                                                   year)
        checksector_view = "%s.eia_io_sectors_%d" % (common.config.TEST_SCHEMA,
                                                     year)

        runsql("CREATE OR REPLACE VIEW %s AS %s" % \
            (checktotal_view, "UNION".join(checktotal_parts)))
        runsql("CREATE OR REPLACE VIEW %s AS %s" % \
            (checksector_view, "UNION".join(checksector_parts)))
예제 #14
0
파일: parser.py 프로젝트: sonya/eea
def create_views():
    va_sectors = set(config.va_sectors.values())
    fd_sectors = set(config.fd_sectors.values())

    for year in config.STUDY_YEARS:
        strings = {
            "test_schema": common.config.TEST_SCHEMA,
            "schema": config.WIOD_SCHEMA,
            "extra_schema": "wiod_plus",
            "year": year,
            "fd_sectors": sqlhelper.set_repr(fd_sectors),
            "va_sectors": sqlhelper.set_repr(va_sectors),
            "margins": sqlhelper.set_repr(config.margin_sectors)
            }

        ### indbyind tables ignoring imports
        db.execute(
            """CREATE OR REPLACE VIEW %(schema)s.indbyind_%(year)d AS
               SELECT country, from_ind, to_ind, value
                 FROM %(schema)s.niot_%(year)d
                WHERE NOT is_import
             UNION
               SELECT country, from_ind, 'IMP', sum(value)
                 FROM %(schema)s.niot_%(year)d
                WHERE is_import
                GROUP BY country, from_ind""" % strings)

        #continue

        # co2 intensity views
        # put in test since we're just checking results
        sql = """CREATE OR REPLACE VIEW %(test_schema)s.co2_intensity_%(year)d AS
            SELECT a.country, CAST(a.gdp as int) gdp,
                   CAST(b.emissions as int) emissions,
                   b.emissions / a.gdp AS intensity
              FROM (SELECT country, sum(value) AS gdp
                      FROM %(schema)s.indbyind_%(year)d
                     WHERE from_ind not in %(va_sectors)s
                       AND to_ind in %(fd_sectors)s
                     GROUP BY country) a,
                   (SELECT country, value AS emissions
                      FROM %(schema)s.env_%(year)d where industry = 'total'
                       AND measurement = 'CO2') b
             WHERE a.country = b.country
             ORDER BY country""" % strings
        db.execute(sql)

        # commodity output proportions tables for all countries
        sql = """CREATE OR REPLACE VIEW %(schema)s.comshare_%(year)d AS
            SELECT make.country, make.commodity, make.industry,
                   make.value / totals.value AS use_share
              FROM (SELECT country, commodity, industry, value
                      FROM wiod.int_make_%(year)d
                     WHERE commodity not in %(va_sectors)s
                       AND industry not in %(margins)s) make,
                   (SELECT country, commodity, sum(value) as value
                      FROM wiod.int_make_%(year)d
                     WHERE commodity not in %(va_sectors)s
                       AND industry not in %(margins)s
                     GROUP BY country, commodity) totals
             WHERE make.country = totals.country
               AND make.commodity = totals.commodity""" % strings
        db.execute(sql)

        for country in config.countries:
            strings["country"] = country.lower()
            table = "%(extra_schema)s.%(country)s_io_import_%(year)d" % strings
            strings["io_import_table"] = table

            sql = "DROP TABLE IF EXISTS %(io_import_table)s" % strings
            db.execute(sql)

            sql = """SELECT comshare.country,
                            comshare.industry AS from_sector,
                            use.industry AS to_sector,
                            sum(use.value * comshare.use_share) AS value
                       INTO %(io_import_table)s
                       FROM %(schema)s.comshare_%(year)d comshare,
                            (SELECT from_country, industry, commodity, value
                               FROM %(schema)s.int_use_%(year)d
                              WHERE to_country = $1
                                AND from_country <> $1) use
                      WHERE comshare.country = use.from_country
                        AND comshare.commodity = use.commodity
                      GROUP BY comshare.country, comshare.industry,
                            use.industry""" % strings
    
            print(sql)
            stmt = db.prepare(sql)
            stmt(country)
예제 #15
0
파일: pcebridge.py 프로젝트: sonya/eea
def parse_nipa_data():
    test_view = "%s.nipa_groups" % common.config.TEST_SCHEMA
    db.execute("DROP VIEW IF EXISTS %s" % test_view)

    # get table for pce category harmonization
    trailing_pat = re.compile('(.+) \(.*\d.*\)$')

    nipa_code_map = {}
    filename = fileutils.getdatapath("nipa_code_map.csv", "usa")
    fh = open(filename)
    csvf = csv.reader(fh)
    for row in csvf:
        if len(row) == 2:
            harmonized = row[0]
            trailing = trailing_pat.match(harmonized)
            if trailing:
                harmonized = trailing.group(1)
            nipa_code_map[row[1]] = harmonized
    fh.close()

    # get nipa series codes from underlying detail tables
    tracker = TableStateTracker()
    tracker.create_table("%s.pce_codes" % config.NIPA_SCHEMA,
                         ["code", "parent", "description"],
                         ["char(7)", "char(7)", "text"], True)

    number_pat = re.compile('^\d+$')
    trailing_pat = re.compile('(.+) \(.*\d.*\)$')

    filename = fileutils.getcache("Section2All_underlying.csv", "bea", "nipa")
    fh = open(filename)
    csvf = csv.reader(fh)
    is_in_table = False

    code_stack = [None]
    indent_stack = [-1]

    # the code mapping has been done such that each item is at least at
    # three levels of disaggregation below the top, i.e. there is always
    # an ancestor at the second level. we only want to keep track of the
    # ancestor at the third level (root is zero)
    # the first level below root has goods and services
    # the second level has durable goods, nondurable goods, and services.
    reverse_code_dict = {}
    second_level_nodes = []

    for row in csvf:
        if len(row):
            if not is_in_table:
                if row[0].startswith("Table 2.4.5U"):
                    is_in_table = True
            else:
                if row[0].startswith("Table 2.4.5U"):
                    # we only need to go through one instance of this table
                    break
                else:
                    if number_pat.match(row[0]) and len(row) > 2:
                        title = row[1].lstrip()

                        # these are duplicate codes
                        if title.startswith("Market-based PCE"):
                            continue

                        code = row[2]
                        current_indent = len(row[1]) - len(title)

                        while current_indent <= indent_stack[-1]:
                            indent_stack.pop()
                            code_stack.pop()

                        indent_stack.append(current_indent)
                        code_stack.append(code)

                        if len(code_stack) > 1:
                            parent = code_stack[-2]
                        else:
                            parent = None

                        title = title.strip()
                        trailing = trailing_pat.match(title)
                        if trailing:
                            title = trailing.group(1)

                        if len(code_stack) > 4:
                            reverse_code_dict[title] = code_stack[3]
                        else:
                            reverse_code_dict[title] = code

                        tracker.insert_row((code, parent, title))

    tracker.flush()
    fh.close()

    # table for price deflators

    tracker.create_table("%s.implicit_price_deflators" % config.NIPA_SCHEMA,
                         ["year", "gdp", "pce"], ["int", "float", "float"])

    filename = fileutils.getcache("Section1all_csv.csv", "bea/nipa")
    fh = open(filename)
    csvf = csv.reader(fh)
    is_in_table = False

    data = {}  # we need to parse two rows before we can populate
    years = {}

    for row in csvf:
        if len(row):
            if not is_in_table:
                if row[0].startswith("Table 1.1.9"):
                    is_in_table = True
            else:
                if row[0].startswith("Table 1.1.9"):
                    # this is seasonally adjusted version of the same table
                    break
                else:
                    if row[0] == "Line":
                        for i in range(len(row)):
                            if number_pat.match(row[i]):
                                year = int(row[i])
                                years[year] = i
                                data[year] = {}

                    elif number_pat.match(row[0]) and len(row) > 2:
                        title = row[1].lstrip()
                        if title == "Gross domestic product":
                            column = "gdp"
                        elif title == "Personal consumption expenditures":
                            column = "pce"
                        else:
                            continue

                        for (year, colindex) in years.items():
                            data[year][column] = float(row[colindex])

    for (year, results) in data.items():
        tracker.insert_row([year, results["gdp"], results["pce"]])

    tracker.flush()
    fh.close()

    # parse pce bridge

    class IONIPAStateTracker(TableStateTracker):
        def flush(self):
            TableStateTracker.flush(self)
            if self.fh is not None and not self.fh.closed:
                self.fh.close()

        def __init__(self):
            TableStateTracker.__init__(self)
            self.fh = None
            self.code_dict = None

            self.value_columns = [
                "prod_val", "rail_margin", "truck_margin", "water_margin",
                "air_margin", "pipe_margin", "gaspipe_margin",
                "wholesale_margin", "retail_margin", "purchase_val"
            ]

            self.old_style_field_map = {
                "Producers' Value": "prod_val",
                "MfgExciseTax": "prod_val",
                "RailMargin": "rail_margin",
                "TruckMargin": "truck_margin",
                "WaterMargin": "water_margin",
                "AirMargin": "air_margin",
                "PipeMargin": "pipe_margin",
                "WholesaleMargin": "wholesale_margin",
                "WholesaleTax": "wholesale_margin",
                "RetailMargin": "retail_margin",
                "RetailSalesTax": "retail_margin",
                "OtherRetailTax": "retail_margin",
                "Purchasers' Value": "purchase_val",
            }

        def set_filename(self, filename):
            path = fileutils.getcache(filename, str(self.year))
            self.filename = path

        def set_year(self, year):
            self.flush()
            self.year = year
            tablename = "%s.pcebridge_%d" % (config.IO_SCHEMA, year)
            fields = ["pce_code", "commodity"] + self.value_columns
            types = ["varchar(6)", "varchar(6)"] + \
                ["bigint"]*len(self.value_columns)
            self.create_table(tablename, fields, types)

        def setup_for_codes(self):
            self.code_dict = {}

        def flush_codes(self):
            if self.code_dict is not None:
                tablename = "%s.nipa_codes_%d" % (config.IO_SCHEMA, self.year)
                self.create_table(tablename,
                                  ["pce_code", "nipa_group", "description"],
                                  ["varchar(6)", "char(7)", "text"])
                for (code, raw_desc) in self.code_dict.items():

                    desc = raw_desc
                    if desc.endswith('(s.)') or desc.endswith('(d.)'):
                        desc = desc[:-4].strip()
                    elif desc.endswith('(n.d.)'):
                        desc = desc[:-6].strip()

                    if desc in nipa_code_map:
                        desc = nipa_code_map[desc]

                    if desc in reverse_code_dict:
                        nipa_code = reverse_code_dict[desc]
                    else:
                        nipa_code = None
                    #self.current_stmt(code, nipa_code, raw_desc)
                    self.table.insert([code, nipa_code, raw_desc])

                self.code_dict = None
                self.flush()

        def insert_code_row(self, code, desc):
            # workaround for the way excel interprets numbers as floats
            # when we know the codes should be strings
            if type(code) is float:
                code = int(code)

            self.code_dict[str(code)] = desc.strip()

        def insert_row(self, pce_code, commod, dollar_values, factor=1):
            # workaround for the way excel interprets numbers as floats
            # when we know the codes should be strings
            if type(pce_code) is float:
                pce_code = int(pce_code)

            values = [str(pce_code).strip(), commod.strip()]
            for column in self.value_columns:
                if column in dollar_values:
                    if factor == 1:
                        values.append(dollar_values[column])
                    else:
                        values.append(
                            int(float(dollar_values[column]) * factor))
                else:
                    values.append(None)
            #self.current_stmt(*values)
            self.table.insert(values)

        def parse_old_style_xls(self, year):
            self.set_year(year)
            self.set_filename("%d_PCE_Commodity.xls" % self.year)
            wb = xlrd.open_workbook(self.filename)

            # parse pce bridge data
            sheet = wb.sheet_by_name("%d PCE Workfile - Commodity" % self.year)
            field_indexes = {}
            pce_code_idx = 0
            commod_idx = 2
            for rowindex in range(sheet.nrows):
                row = sheet.row_values(rowindex)
                if len(row) > 1:
                    if "PCE Category" in row:
                        pce_code_idx = row.index("PCE Category")
                        if "Commodity" in row:
                            commod_idx = row.index("Commodity")
                        for i in range(len(row)):
                            xls_col = row[i]
                            if xls_col in self.old_style_field_map:
                                colname = self.old_style_field_map[xls_col]
                                if colname not in field_indexes:
                                    field_indexes[colname] = []
                                field_indexes[colname].append(i)
                    elif len(field_indexes):
                        pce_code = row[pce_code_idx]
                        commod = str(int(row[commod_idx])).rjust(6, "0")
                        values = {}
                        for (field, columns) in field_indexes.items():
                            # doclumentation says units are in 100,000 dollars
                            # but the orders of magnitude don't match up with
                            # later years if we use 100
                            components = [
                                int(float(row[column] * 1000))
                                for column in columns
                            ]
                            value = 0
                            for component in components:
                                value += component
                            values[field] = value
                        self.insert_row(pce_code, commod, values)

            # parse codes from neighboring worksheet
            self.setup_for_codes()
            sheet = wb.sheet_by_name("%d PCE Category Descriptions" %
                                     self.year)
            code_idx = None
            desc_idx = None
            for rowindex in range(sheet.nrows):
                row = sheet.row_values(rowindex)
                if len(row) > 1:
                    codetab = "PCE Category Code"
                    codetab2 = "%s - %d" % (codetab, self.year)
                    if codetab in row or codetab2 in row:
                        if codetab in row:
                            code_idx = row.index(codetab)
                        else:
                            code_idx = row.index(codetab2)
                        desctab = "PCE Category Description - %d" % self.year
                        if desctab in row:
                            desc_idx = row.index(desctab)
                        else:
                            desctab = "PCE Category Description"
                            if desctab in row:
                                desc_idx = row.index(desctab)
                    elif code_idx is not None and desc_idx is not None:
                        code = row[code_idx]
                        desc = str(row[desc_idx])
                        self.insert_code_row(code, desc)
            self.flush_codes()

        def get_file_handle(self, filetype, options={}):
            if filetype == "txt":
                self.fh = open(self.filename)
                return self.fh
            elif filetype == "csv":
                self.fh = open(self.filename)
                if "delim" in options:
                    csvf = csv.reader(self.fh, delimiter=options["delim"])
                else:
                    csvf = csv.reader(self.fh)
                return csvf
            elif filetype == "xls":
                wb = xlrd.open_workbook(self.filename)
                return wb

        def parse_text(self, rowcallback):
            path = fileutils.getcache(filename, str(self.year))
            f = open(path)
            for line in f:
                rowcallback(line, this)
            f.close()

    tracker = IONIPAStateTracker()
    tracker.parse_old_style_xls(1967)
    tracker.parse_old_style_xls(1972)
    tracker.parse_old_style_xls(1977)
    tracker.parse_old_style_xls(1982)

    tracker.set_year(1987)
    tracker.set_filename("tbld-87.dat")
    fh = tracker.get_file_handle("txt")
    for line in fh:
        if len(line) < 103:
            continue
        commod = line[0:6]
        pce_code = line[14:18]
        values = {
            "prod_val": line[21:30],
            "rail_margin": line[30:39],
            "truck_margin": line[39:48],
            "water_margin": line[48:57],
            "air_margin": line[57:66],
            "pipe_margin": line[66:75],
            "wholesale_margin": line[75:84],
            "retail_margin": line[84:93],
            "purchase_val": line[93:102],
        }
        tracker.insert_row(pce_code, commod, values, 1000)

    tracker.setup_for_codes()
    tracker.set_filename("io-nipa.doc")
    fh = tracker.get_file_handle("txt")
    for line in fh:
        if len(line) < 27:
            continue
        code = line[0:4].strip()
        desc = line[26:].strip()
        tracker.insert_code_row(code, desc)
    tracker.flush_codes()

    tracker.set_year(1992)
    tracker.set_filename("TabD.txt")
    fh = tracker.get_file_handle("csv", {"delim": "\t"})
    for row in fh:
        values = {
            "prod_val": row[4],
            "rail_margin": row[5],
            "truck_margin": row[6],
            "water_margin": row[7],
            "air_margin": row[8],
            "pipe_margin": row[9],
            "gaspipe_margin": row[10],
            "wholesale_margin": row[11],
            "retail_margin": row[12],
            "purchase_val": row[13],
        }
        tracker.insert_row(row[2], row[0], values, 1000)

    tracker.setup_for_codes()
    tracker.set_filename("IO-NIPA.txt")
    fh = tracker.get_file_handle("csv", {"delim": "\t"})
    for row in fh:
        code = row[0]
        desc = row[4]
        tracker.insert_code_row(code, desc)
    tracker.flush_codes()

    tracker.set_year(1997)
    tracker.set_filename("AppendixC_Detail.txt")
    fh = tracker.get_file_handle("csv", {"delim": ","})
    for row in fh:
        values = {
            "prod_val": row[3],
            "rail_margin": row[4],
            "truck_margin": row[5],
            "water_margin": row[6],
            "air_margin": row[7],
            "pipe_margin": row[8],
            "gaspipe_margin": row[9],
            "wholesale_margin": row[10],
            "retail_margin": row[11],
            "purchase_val": row[12],
        }
        tracker.insert_row(row[1], row[0], values, 1000)

    tracker.setup_for_codes()
    tracker.set_filename("IO-NIPA_PCE.txt")
    fh = tracker.get_file_handle("csv", {"delim": ","})
    for row in fh:
        code = row[1]
        desc = row[2]
        tracker.insert_code_row(code, desc)
    tracker.flush_codes()

    tracker.set_year(2002)
    tracker.setup_for_codes()  # do this simultaneously since it's all one file
    tracker.set_filename("2002_PCE_Bridge.xls")
    wb = tracker.get_file_handle("xls")
    naics_pat = re.compile('[A-Z0-9]{6}')
    sheet = wb.sheet_by_name("PCE_Bridge_Detail")
    pce_codes = []
    for rowindex in range(sheet.nrows):
        row = sheet.row_values(rowindex)
        if len(row) == 13 and naics_pat.match(row[1]):
            pce_desc = row[0]
            # we don't need the distinction between households and
            # nonprofit institutions service households
            parts = pce_desc.split('-')
            if len(parts) > 1:
                lastpart = parts[-1].strip()
                if lastpart == 'HH' or lastpart == 'NPISH':
                    pce_desc = '-'.join(parts[:-1])
            pce_desc = pce_desc.strip()

            if pce_desc in pce_codes:
                pce_code = pce_codes.index(pce_desc)
            else:
                pce_code = len(pce_codes)
                pce_codes.append(pce_desc)
                tracker.insert_code_row(str(pce_code), pce_desc)

            values = {
                "prod_val": row[3],
                "rail_margin": row[4],
                "truck_margin": row[5],
                "water_margin": row[6],
                "air_margin": row[7],
                "pipe_margin": row[8],
                "gaspipe_margin": row[9],
                "wholesale_margin": row[10],
                "retail_margin": row[11],
                "purchase_val": row[12],
            }
            tracker.insert_row(str(pce_code), row[1], values, 1000)

    tracker.flush_codes()
예제 #16
0
파일: eiasupp.py 프로젝트: sonya/eea
def dopostparse():
    for year in config.STUDY_YEARS:
        tablename = config.IO_SCHEMA + ".eia_code_map_" + str(year)
        codetable = config.IO_SCHEMA + ".codes_" + str(year)
    
        db.execute("DROP TABLE IF EXISTS %s CASCADE" % tablename)
        db.execute("""
    CREATE TABLE %s (
        io_sector char(6),
        eia_source char(2),
        eia_sector char(2)
    );""" % tablename)
    
        if year <= 1992:
            conditions = eia.sector_naics_map_old
        else:
            conditions = eia.sector_naics_map
    
        for source in eia.sources:
            # AC prices for natural gas are zero for years prior to 1990
            # but except for petroleum, only the use_btu column is looked at
            # so this mapping still works for NG/AC before 1990.
            cases = ["WHEN %s THEN '%s'"
                         % (conditions[sector],
                            eia.valid_sectors_by_source[source][sector])
                         for sector in ("ind", "res", "elec", "com", "trans")]
    
            db.execute("""
    INSERT INTO %s
    SELECT code, '%s', CASE
           %s
           ELSE 'IC' END
      FROM %s;
    """ % (tablename, source, "\n       ".join(cases), codetable))
    
        # create views
        checktotal_parts = []
        checksector_parts = []
    
        strings = {
            "iotable": "%s.transact_view_%d" % (config.IO_SCHEMA, year),
            "eiatable": "%s.seds_us_%d" % (config.EIA_SCHEMA, year),
            "maptable": "%s.eia_code_map_%d" % (config.IO_SCHEMA, year),
            }
    
        for (eia_source, io_sectors) in eia.source_naics_map.items():
            if eia_source not in eia.sources:
                # make sure we don't create tables for modified sources
                continue

            strings["io_view"] = "io_%s_%d" % (eia_source.lower(), year)
            strings["from_sector"] = io_sectors[year]
            strings["eia_source"] = eia_source

            runsql("""
    CREATE OR REPLACE VIEW %(io_view)s AS
      SELECT io.to_sector, energy.sector, io.fob, io.cif,
             energy.price * (cast(io.fob as float) / cast(io.cif as float)) as price
        FROM %(iotable)s io,
             %(maptable)s codes,
             %(eiatable)s energy
       WHERE io.from_sector = '%(from_sector)s'
         AND energy.source = '%(eia_source)s'
         AND codes.eia_source = '%(eia_source)s'
         AND io.to_sector = codes.io_sector
         AND energy.sector = codes.eia_sector;
    """ % strings)

            checktotal_parts.append("""
      SELECT eia.source,
             CAST(eia.ex as int) AS eia_ex,
             SUM(io.cif) AS io_ex,
             CAST(eia.use_btu as int) AS eia_btu,
             CAST(SUM(io.fob / io.price) as int) AS io_btu
        FROM %(eiatable)s eia, %(io_view)s io
       WHERE eia.source = '%(eia_source)s'
         AND eia.sector = 'TC'
       GROUP BY eia.source, eia.ex, eia.use_btu
    """ % strings)
    
            checksector_parts.append("""
      SELECT eia.source, eia.sector,
             CAST(eia.ex as int) AS eia_ex,
             SUM(io.cif) AS io_ex,
             CAST(eia.use_btu as int) AS eia_btu,
             CAST(SUM(io.fob / io.price) as int) AS io_btu
        FROM %(eiatable)s eia, %(io_view)s io
       WHERE eia.source = '%(eia_source)s'
         AND eia.sector = io.sector
         AND eia.sector <> 'TC'
       GROUP BY eia.source, eia.sector, eia.price, eia.ex, eia.use_btu
    """ % strings)
    
        checktotal_view = "%s.eia_io_totals_%d" % (common.config.TEST_SCHEMA, year)
        checksector_view = "%s.eia_io_sectors_%d" % (common.config.TEST_SCHEMA, year)
    
        runsql("CREATE OR REPLACE VIEW %s AS %s" % \
            (checktotal_view, "UNION".join(checktotal_parts)))
        runsql("CREATE OR REPLACE VIEW %s AS %s" % \
            (checksector_view, "UNION".join(checksector_parts)))
예제 #17
0
def runsql(sql):
    print(sql)
    db.execute(sql)
예제 #18
0
파일: parser.py 프로젝트: sonya/eea
def parse_io():
    io_files = {
        1996: "410281134571.xls",
        1999: "4102715414971.xls",
        2001: "4122111363671.xls",
        2004: "611239581071.xls",
        2006: "9121414285971.xls",
        2007: "1139203871.xls",
        2008: "1139204871.xls",
        2009: "11229101502.xls",
        2010: "1122910141371.xls",
    }

    for (year, filename) in io_files.items():
        tablename = "%s.io_%d" % (config.SCHEMA, year)

        # millions are in NTD
        table = SQLTable(tablename, ["from_sector", "to_sector", "millions"],
                         ["varchar(255)", "varchar(255)", "float"])
        table.create()
        table.truncate()

        path = fileutils.getcache(filename, "tw/%d" % year)
        wb = xlrd.open_workbook(path)
        sheet = wb.sheets()[0]
        to_codes = sheet.row_values(0)
        to_names = sheet.row_values(1)
        for rowindex in range(2, sheet.nrows):
            row = sheet.row_values(rowindex)
            from_code = row[0].strip()
            from_name = row[1].strip()
            for i in range(2, len(to_names)):
                to_name = to_names[i].strip()
                value = row[i]
                table.insert([from_name, to_name, value])

        if year == 2010:
            strings = {
                "viewname": "%s.io_view_%d" % (config.SCHEMA, year),
                "tablename": tablename,
                "maptable": "%s.sector_map_%d" % (config.SCHEMA, year),
                "to_blacklist": sqlhelper.set_repr(config.to_blacklists[year]),
                "from_blacklist":
                sqlhelper.set_repr(config.from_blacklists[year]),
            }

            sql = """CREATE OR REPLACE VIEW %(viewname)s AS
                SELECT from_map.io_sector AS from_sector,
                       to_map.io_sector as to_sector,
                       sum(millions) as millions
                  FROM %(tablename)s io,
                       (SELECT DISTINCT io_sector, io_commod
                          FROM %(maptable)s) from_map,
                       (SELECT DISTINCT io_sector, io_ind
                          FROM %(maptable)s) to_map
                 WHERE io.to_sector NOT IN %(to_blacklist)s
                   AND io.from_sector NOT IN %(from_blacklist)s
                   AND from_map.io_commod = io.from_sector
                   AND to_map.io_ind = io.to_sector
                 GROUP BY from_map.io_sector, to_map.io_sector""" % strings

            print(sql)
            db.execute(sql)
예제 #19
0
def create_hybrid_tables():
    for year in config.STUDY_YEARS:
        strings = {
            # table names
            "eia_table": "%s.seds_us_%d" % (config.EIA_SCHEMA, year),
            "io_table": "%s.transact_view_%d" % (config.IO_SCHEMA, year),
            "map_table": "%s.eia_code_map_%d" % (config.IO_SCHEMA, year),
            "hybrid_table": "hybrid_transactions_%d" % year,
            # selected sector codes
            "pa_trans_code": eia.source_naics_map["PA-trans"][year],
            "pa_nontrans_code": eia.source_naics_map["PA-nontrans"][year],
        }

        runsql("DROP TABLE IF EXISTS %s CASCADE" % strings["hybrid_table"])
        runsql(
            """
            CREATE TABLE %(hybrid_table)s (
                from_sector varchar(6),
                to_sector varchar(6),
                expenditure float
            )"""
            % strings
        )

        for source in eia.sources:
            strings["shares_view"] = "%s_ex_proportions_%s" % (source.lower(), year)
            strings["source"] = source
            strings["source_naics"] = eia.source_naics_map[source][year]

            # seds prices are c.i.f., but when allocating btu among user
            # industries it is unfair to assign based on c.i.f. since what
            # they end up using is f.o.b.
            subquery = (
                """
                SELECT codes.eia_sector, sum(io.fob) as fob
                  FROM %(io_table)s io, %(map_table)s codes
                 WHERE codes.eia_source = '%(source)s'
                   AND io.from_sector = '%(source_naics)s'
                   AND io.to_sector = codes.io_sector
                 GROUP BY codes.eia_sector"""
                % strings
            )

            strings["subquery"] = subquery

            # the price each industry ends up actually paying for energy
            # should be the f.o.b. price which is (fob / cif) of the seds price
            runsql(
                """
            CREATE OR REPLACE VIEW %(shares_view)s AS
                SELECT io.to_sector, codes.eia_sector,
                       cast(io.fob as float) / cast(io.cif as float) as fob_share,
                       cast(io.fob as float) / cast(totals.fob as float) as ex_share
                  FROM %(io_table)s io, %(map_table)s codes,
                       (%(subquery)s) totals
                 WHERE codes.eia_source = '%(source)s'
                   AND io.from_sector = '%(source_naics)s'
                   AND io.to_sector = codes.io_sector
                   AND totals.eia_sector = codes.eia_sector"""
                % strings
            )

            # split petroleum
            if source == "PA":
                strings["aviation_pa"] = sqlhelper.set_repr(eia.aviation_petroleum)
                strings["other_pa"] = sqlhelper.set_repr(eia.other_petroleum)
                strings["all_pa"] = sqlhelper.set_repr(eia.aviation_petroleum + eia.other_petroleum)

                strings["pa_nontrans_view"] = "pa_nontrans_%d" % year
                strings["pa_trans_view"] = "pa_trans_%d" % year
                strings["pa_trans_shares_view"] = "pa_trans_proportions_%d" % year
                strings["aviation_code"] = eia.air_transportation_codes[year]

                # non transportation petroleum use
                runsql(
                    """
                CREATE OR REPLACE VIEW %(pa_nontrans_view)s AS
                    SELECT shares.to_sector, shares.eia_sector,
                           sum(shares.ex_share * eia.use_btu) as btu,
                           sum(shares.ex_share * eia.use_btu
                               * eia.price * shares.fob_share) as ex
                      FROM %(shares_view)s shares, %(eia_table)s eia
                     WHERE eia.source in %(all_pa)s
                       AND shares.eia_sector = eia.sector
                       -- these two below are double counted
                       AND eia.source || eia.sector not in ('DFEI', 'PCIC')
                       AND eia.sector <> 'AC'
                     GROUP BY shares.to_sector, shares.eia_sector"""
                    % strings
                )

                # petroleum use for transportation other than air
                runsql(
                    """
                CREATE OR REPLACE VIEW %(pa_trans_view)s AS
                    SELECT io.to_sector, io.fob,
                           io.fob - nontrans.ex as remaining
                      FROM %(io_table)s io, %(pa_nontrans_view)s nontrans
                     WHERE io.from_sector = '%(source_naics)s'
                       AND io.to_sector = nontrans.to_sector
                       -- remaining is negative for IC and EI
                       AND nontrans.eia_sector in ('CC', 'RC')
                   UNION
                    SELECT io.to_sector, io.fob,
                           cast(io.fob as float) as remaining
                      FROM %(io_table)s io, %(map_table)s codes
                     WHERE io.from_sector = '%(source_naics)s'
                       AND io.to_sector = codes.io_sector
                       AND codes.eia_source = 'PA'
                       AND codes.eia_sector = 'AC'
                       AND io.to_sector <> '%(aviation_code)s' """
                    % strings
                )

                # proportions for petroleum allocated to transportation
                runsql(
                    """
                CREATE OR REPLACE VIEW %(pa_trans_shares_view)s AS
                    SELECT use.to_sector, use.remaining / total.total as ex_share
                      FROM %(pa_trans_view)s use,
                           (SELECT sum(remaining) as total
                              FROM %(pa_trans_view)s) total"""
                    % strings
                )

                # allocate all of JF and AV to air transportation
                runsql(
                    """
                INSERT INTO %(hybrid_table)s
                    SELECT '%(pa_trans_code)s', io.to_sector, sum(eia.use_btu)
                      FROM %(io_table)s io,
                           %(eia_table)s eia
                     WHERE eia.source in %(aviation_pa)s
                       and eia.sector = 'AC'
                       and io.from_sector = '%(source_naics)s'
                       and io.to_sector = '%(aviation_code)s'
                     GROUP BY io.to_sector """
                    % strings
                )

                # allocate all other transportation
                runsql(
                    """
                INSERT INTO %(hybrid_table)s
                    SELECT '%(pa_trans_code)s',
                           shares.to_sector, sum(shares.ex_share * eia.use_btu)
                      FROM %(pa_trans_shares_view)s shares, %(eia_table)s eia
                     WHERE eia.source in %(other_pa)s
                       AND eia.sector = 'AC'
                     GROUP BY shares.to_sector"""
                    % strings
                )

                # allocate non-transportation petroleum use
                runsql(
                    """
                INSERT INTO %(hybrid_table)s
                    SELECT '%(pa_nontrans_code)s', to_sector, btu
                      FROM %(pa_nontrans_view)s"""
                    % strings
                )
                # WHERE eia_sector in ('IC', 'EI')"""

                # dependencies in reverse order
                runsql("DROP VIEW %s" % strings["pa_trans_shares_view"])
                runsql("DROP VIEW %s" % strings["pa_trans_view"])
                runsql("DROP VIEW %s" % strings["pa_nontrans_view"])
                runsql("DROP VIEW %s" % strings["shares_view"])

            else:
                runsql(
                    """
                INSERT INTO %(hybrid_table)s
                    SELECT '%(source_naics)s',
                           shares.to_sector, shares.ex_share * eia.use_btu
                      FROM %(shares_view)s shares, %(eia_table)s eia
                     WHERE eia.source = '%(source)s'
                       AND shares.eia_sector = eia.sector"""
                    % strings
                )

                runsql("DROP VIEW %s" % strings["shares_view"])

        # insert remainder of standard io table
        energy_sectors = []
        for source in eia.sources:
            energy_sectors.append(eia.source_naics_map[source][year])

        strings["sectors"] = ", ".join(["'%s'" % s for s in energy_sectors])

        db.execute(
            """
            INSERT INTO %(hybrid_table)s
            SELECT from_sector, to_sector, fob
              FROM %(io_table)s
             WHERE from_sector not in (%(sectors)s)"""
            % strings
        )

        # split petroleum column proportional to trans and nontrans uses
        stmt = db.prepare(
            """
            SELECT trans.use_btu / total.use_btu as trans_share
              FROM (SELECT use_btu FROM %(eia_table)s
                     WHERE source = 'PA' AND sector = 'AC') trans,
                   (SELECT use_btu FROM %(eia_table)s
                     WHERE source = 'PA' AND sector = 'TC') total"""
            % strings
        )

        result = stmt()
        if len(result) and len(result[0]):
            strings["pa_naics"] = eia.source_naics_map["PA"][year]
            strings["trans_share"] = result[0][0]
            strings["nontrans_share"] = 1 - result[0][0]

            # transportation petroleum use column
            runsql(
                """
                INSERT INTO %(hybrid_table)s
                SELECT from_sector, '%(pa_trans_code)s',
                       %(trans_share).4f * expenditure
                  FROM %(hybrid_table)s
                 WHERE to_sector = '%(pa_naics)s' """
                % strings
            )

            # non-transportation petroleum use column
            runsql(
                """
                UPDATE %(hybrid_table)s
                   SET expenditure = %(nontrans_share).4f * expenditure,
                       to_sector = '%(pa_nontrans_code)s'
                 WHERE to_sector = '%(pa_naics)s' """
                % strings
            )
예제 #20
0
 def runsql(self, sql):
     if common.config.DEBUG_MODE:
         print(sql)
     db.execute(sql)
예제 #21
0
파일: parser.py 프로젝트: sonya/eea
def parse_io():
    io_files = {
        1996: "410281134571.xls",
        1999: "4102715414971.xls",
        2001: "4122111363671.xls",
        2004: "611239581071.xls",
        2006: "9121414285971.xls",
        2007: "1139203871.xls",
        2008: "1139204871.xls",
        2009: "11229101502.xls",
        2010: "1122910141371.xls",
        }

    for (year, filename) in io_files.items():
        tablename = "%s.io_%d" % (config.SCHEMA, year)
    
        # millions are in NTD
        table = SQLTable(tablename,
                         ["from_sector", "to_sector", "millions"],
                         ["varchar(255)", "varchar(255)", "float"])
        table.create()
        table.truncate()
    
        path = fileutils.getcache(filename, "tw/%d" % year)
        wb = xlrd.open_workbook(path)
        sheet = wb.sheets()[0]
        to_codes = sheet.row_values(0)
        to_names = sheet.row_values(1)
        for rowindex in range(2, sheet.nrows):
            row = sheet.row_values(rowindex)
            from_code = row[0].strip()
            from_name = row[1].strip()
            for i in range(2, len(to_names)):
                to_name = to_names[i].strip()
                value = row[i]
                table.insert([from_name, to_name, value])

        if year == 2010:
            strings = {
                "viewname": "%s.io_view_%d" % (config.SCHEMA, year),
                "tablename": tablename,
                "maptable": "%s.sector_map_%d" % (config.SCHEMA, year),
                "to_blacklist": sqlhelper.set_repr(config.to_blacklists[year]),
                "from_blacklist":
                    sqlhelper.set_repr(config.from_blacklists[year]),
                }

            sql = """CREATE OR REPLACE VIEW %(viewname)s AS
                SELECT from_map.io_sector AS from_sector,
                       to_map.io_sector as to_sector,
                       sum(millions) as millions
                  FROM %(tablename)s io,
                       (SELECT DISTINCT io_sector, io_commod
                          FROM %(maptable)s) from_map,
                       (SELECT DISTINCT io_sector, io_ind
                          FROM %(maptable)s) to_map
                 WHERE io.to_sector NOT IN %(to_blacklist)s
                   AND io.from_sector NOT IN %(from_blacklist)s
                   AND from_map.io_commod = io.from_sector
                   AND to_map.io_ind = io.to_sector
                 GROUP BY from_map.io_sector, to_map.io_sector""" % strings

            print(sql)
            db.execute(sql)