Exemple #1
0
        def __init__(self):
            TableStateTracker.__init__(self)
            self.fh = None
            self.code_dict = None

            self.value_columns = [
                "prod_val", "rail_margin", "truck_margin", "water_margin",
                "air_margin", "pipe_margin", "gaspipe_margin",
                "wholesale_margin", "retail_margin", "purchase_val"
            ]

            self.old_style_field_map = {
                "Producers' Value": "prod_val",
                "MfgExciseTax": "prod_val",
                "RailMargin": "rail_margin",
                "TruckMargin": "truck_margin",
                "WaterMargin": "water_margin",
                "AirMargin": "air_margin",
                "PipeMargin": "pipe_margin",
                "WholesaleMargin": "wholesale_margin",
                "WholesaleTax": "wholesale_margin",
                "RetailMargin": "retail_margin",
                "RetailSalesTax": "retail_margin",
                "OtherRetailTax": "retail_margin",
                "Purchasers' Value": "purchase_val",
            }
Exemple #2
0
     def __init__(self):
         TableStateTracker.__init__(self)
         self.fh = None
         self.code_dict = None
 
         self.value_columns = [
             "prod_val",
             "rail_margin",
             "truck_margin",
             "water_margin",
             "air_margin",
             "pipe_margin",
             "gaspipe_margin",
             "wholesale_margin",
             "retail_margin",
             "purchase_val"
             ]
 
         self.old_style_field_map = {
             "Producers' Value": "prod_val",
             "MfgExciseTax": "prod_val",
             "RailMargin": "rail_margin",
             "TruckMargin": "truck_margin",
             "WaterMargin": "water_margin",
             "AirMargin": "air_margin",
             "PipeMargin": "pipe_margin",
             "WholesaleMargin": "wholesale_margin",
             "WholesaleTax": "wholesale_margin",
             "RetailMargin": "retail_margin",
             "RetailSalesTax": "retail_margin",
             "OtherRetailTax": "retail_margin",
             "Purchasers' Value": "purchase_val",
             }
Exemple #3
0
def parse_nipa_data():
    test_view = "%s.nipa_groups" % common.config.TEST_SCHEMA
    db.execute("DROP VIEW IF EXISTS %s" % test_view)

    # get table for pce category harmonization
    trailing_pat = re.compile('(.+) \(.*\d.*\)$')

    nipa_code_map = {}
    filename = fileutils.getdatapath("nipa_code_map.csv", "usa")
    fh = open(filename)
    csvf = csv.reader(fh)
    for row in csvf:
        if len(row) == 2:
            harmonized = row[0]
            trailing = trailing_pat.match(harmonized)
            if trailing:
                harmonized = trailing.group(1)
            nipa_code_map[row[1]] = harmonized
    fh.close()

    # get nipa series codes from underlying detail tables
    tracker = TableStateTracker()
    tracker.create_table("%s.pce_codes" % config.NIPA_SCHEMA,
                         ["code", "parent", "description"],
                         ["char(7)", "char(7)", "text"], True)

    number_pat = re.compile('^\d+$')
    trailing_pat = re.compile('(.+) \(.*\d.*\)$')

    filename = fileutils.getcache("Section2All_underlying.csv", "bea", "nipa")
    fh = open(filename)
    csvf = csv.reader(fh)
    is_in_table = False

    code_stack = [None]
    indent_stack = [-1]

    # the code mapping has been done such that each item is at least at
    # three levels of disaggregation below the top, i.e. there is always
    # an ancestor at the second level. we only want to keep track of the
    # ancestor at the third level (root is zero)
    # the first level below root has goods and services
    # the second level has durable goods, nondurable goods, and services.
    reverse_code_dict = {}
    second_level_nodes = []

    for row in csvf:
        if len(row):
            if not is_in_table:
                if row[0].startswith("Table 2.4.5U"):
                    is_in_table = True
            else:
                if row[0].startswith("Table 2.4.5U"):
                    # we only need to go through one instance of this table
                    break
                else:
                    if number_pat.match(row[0]) and len(row) > 2:
                        title = row[1].lstrip()

                        # these are duplicate codes
                        if title.startswith("Market-based PCE"):
                            continue

                        code = row[2]
                        current_indent = len(row[1]) - len(title)

                        while current_indent <= indent_stack[-1]:
                            indent_stack.pop()
                            code_stack.pop()

                        indent_stack.append(current_indent)
                        code_stack.append(code)

                        if len(code_stack) > 1:
                            parent = code_stack[-2]
                        else:
                            parent = None

                        title = title.strip()
                        trailing = trailing_pat.match(title)
                        if trailing:
                            title = trailing.group(1)

                        if len(code_stack) > 4:
                            reverse_code_dict[title] = code_stack[3]
                        else:
                            reverse_code_dict[title] = code

                        tracker.insert_row((code, parent, title))

    tracker.flush()
    fh.close()

    # table for price deflators

    tracker.create_table("%s.implicit_price_deflators" % config.NIPA_SCHEMA,
                         ["year", "gdp", "pce"], ["int", "float", "float"])

    filename = fileutils.getcache("Section1all_csv.csv", "bea/nipa")
    fh = open(filename)
    csvf = csv.reader(fh)
    is_in_table = False

    data = {}  # we need to parse two rows before we can populate
    years = {}

    for row in csvf:
        if len(row):
            if not is_in_table:
                if row[0].startswith("Table 1.1.9"):
                    is_in_table = True
            else:
                if row[0].startswith("Table 1.1.9"):
                    # this is seasonally adjusted version of the same table
                    break
                else:
                    if row[0] == "Line":
                        for i in range(len(row)):
                            if number_pat.match(row[i]):
                                year = int(row[i])
                                years[year] = i
                                data[year] = {}

                    elif number_pat.match(row[0]) and len(row) > 2:
                        title = row[1].lstrip()
                        if title == "Gross domestic product":
                            column = "gdp"
                        elif title == "Personal consumption expenditures":
                            column = "pce"
                        else:
                            continue

                        for (year, colindex) in years.items():
                            data[year][column] = float(row[colindex])

    for (year, results) in data.items():
        tracker.insert_row([year, results["gdp"], results["pce"]])

    tracker.flush()
    fh.close()

    # parse pce bridge

    class IONIPAStateTracker(TableStateTracker):
        def flush(self):
            TableStateTracker.flush(self)
            if self.fh is not None and not self.fh.closed:
                self.fh.close()

        def __init__(self):
            TableStateTracker.__init__(self)
            self.fh = None
            self.code_dict = None

            self.value_columns = [
                "prod_val", "rail_margin", "truck_margin", "water_margin",
                "air_margin", "pipe_margin", "gaspipe_margin",
                "wholesale_margin", "retail_margin", "purchase_val"
            ]

            self.old_style_field_map = {
                "Producers' Value": "prod_val",
                "MfgExciseTax": "prod_val",
                "RailMargin": "rail_margin",
                "TruckMargin": "truck_margin",
                "WaterMargin": "water_margin",
                "AirMargin": "air_margin",
                "PipeMargin": "pipe_margin",
                "WholesaleMargin": "wholesale_margin",
                "WholesaleTax": "wholesale_margin",
                "RetailMargin": "retail_margin",
                "RetailSalesTax": "retail_margin",
                "OtherRetailTax": "retail_margin",
                "Purchasers' Value": "purchase_val",
            }

        def set_filename(self, filename):
            path = fileutils.getcache(filename, str(self.year))
            self.filename = path

        def set_year(self, year):
            self.flush()
            self.year = year
            tablename = "%s.pcebridge_%d" % (config.IO_SCHEMA, year)
            fields = ["pce_code", "commodity"] + self.value_columns
            types = ["varchar(6)", "varchar(6)"] + \
                ["bigint"]*len(self.value_columns)
            self.create_table(tablename, fields, types)

        def setup_for_codes(self):
            self.code_dict = {}

        def flush_codes(self):
            if self.code_dict is not None:
                tablename = "%s.nipa_codes_%d" % (config.IO_SCHEMA, self.year)
                self.create_table(tablename,
                                  ["pce_code", "nipa_group", "description"],
                                  ["varchar(6)", "char(7)", "text"])
                for (code, raw_desc) in self.code_dict.items():

                    desc = raw_desc
                    if desc.endswith('(s.)') or desc.endswith('(d.)'):
                        desc = desc[:-4].strip()
                    elif desc.endswith('(n.d.)'):
                        desc = desc[:-6].strip()

                    if desc in nipa_code_map:
                        desc = nipa_code_map[desc]

                    if desc in reverse_code_dict:
                        nipa_code = reverse_code_dict[desc]
                    else:
                        nipa_code = None
                    #self.current_stmt(code, nipa_code, raw_desc)
                    self.table.insert([code, nipa_code, raw_desc])

                self.code_dict = None
                self.flush()

        def insert_code_row(self, code, desc):
            # workaround for the way excel interprets numbers as floats
            # when we know the codes should be strings
            if type(code) is float:
                code = int(code)

            self.code_dict[str(code)] = desc.strip()

        def insert_row(self, pce_code, commod, dollar_values, factor=1):
            # workaround for the way excel interprets numbers as floats
            # when we know the codes should be strings
            if type(pce_code) is float:
                pce_code = int(pce_code)

            values = [str(pce_code).strip(), commod.strip()]
            for column in self.value_columns:
                if column in dollar_values:
                    if factor == 1:
                        values.append(dollar_values[column])
                    else:
                        values.append(
                            int(float(dollar_values[column]) * factor))
                else:
                    values.append(None)
            #self.current_stmt(*values)
            self.table.insert(values)

        def parse_old_style_xls(self, year):
            self.set_year(year)
            self.set_filename("%d_PCE_Commodity.xls" % self.year)
            wb = xlrd.open_workbook(self.filename)

            # parse pce bridge data
            sheet = wb.sheet_by_name("%d PCE Workfile - Commodity" % self.year)
            field_indexes = {}
            pce_code_idx = 0
            commod_idx = 2
            for rowindex in range(sheet.nrows):
                row = sheet.row_values(rowindex)
                if len(row) > 1:
                    if "PCE Category" in row:
                        pce_code_idx = row.index("PCE Category")
                        if "Commodity" in row:
                            commod_idx = row.index("Commodity")
                        for i in range(len(row)):
                            xls_col = row[i]
                            if xls_col in self.old_style_field_map:
                                colname = self.old_style_field_map[xls_col]
                                if colname not in field_indexes:
                                    field_indexes[colname] = []
                                field_indexes[colname].append(i)
                    elif len(field_indexes):
                        pce_code = row[pce_code_idx]
                        commod = str(int(row[commod_idx])).rjust(6, "0")
                        values = {}
                        for (field, columns) in field_indexes.items():
                            # doclumentation says units are in 100,000 dollars
                            # but the orders of magnitude don't match up with
                            # later years if we use 100
                            components = [
                                int(float(row[column] * 1000))
                                for column in columns
                            ]
                            value = 0
                            for component in components:
                                value += component
                            values[field] = value
                        self.insert_row(pce_code, commod, values)

            # parse codes from neighboring worksheet
            self.setup_for_codes()
            sheet = wb.sheet_by_name("%d PCE Category Descriptions" %
                                     self.year)
            code_idx = None
            desc_idx = None
            for rowindex in range(sheet.nrows):
                row = sheet.row_values(rowindex)
                if len(row) > 1:
                    codetab = "PCE Category Code"
                    codetab2 = "%s - %d" % (codetab, self.year)
                    if codetab in row or codetab2 in row:
                        if codetab in row:
                            code_idx = row.index(codetab)
                        else:
                            code_idx = row.index(codetab2)
                        desctab = "PCE Category Description - %d" % self.year
                        if desctab in row:
                            desc_idx = row.index(desctab)
                        else:
                            desctab = "PCE Category Description"
                            if desctab in row:
                                desc_idx = row.index(desctab)
                    elif code_idx is not None and desc_idx is not None:
                        code = row[code_idx]
                        desc = str(row[desc_idx])
                        self.insert_code_row(code, desc)
            self.flush_codes()

        def get_file_handle(self, filetype, options={}):
            if filetype == "txt":
                self.fh = open(self.filename)
                return self.fh
            elif filetype == "csv":
                self.fh = open(self.filename)
                if "delim" in options:
                    csvf = csv.reader(self.fh, delimiter=options["delim"])
                else:
                    csvf = csv.reader(self.fh)
                return csvf
            elif filetype == "xls":
                wb = xlrd.open_workbook(self.filename)
                return wb

        def parse_text(self, rowcallback):
            path = fileutils.getcache(filename, str(self.year))
            f = open(path)
            for line in f:
                rowcallback(line, this)
            f.close()

    tracker = IONIPAStateTracker()
    tracker.parse_old_style_xls(1967)
    tracker.parse_old_style_xls(1972)
    tracker.parse_old_style_xls(1977)
    tracker.parse_old_style_xls(1982)

    tracker.set_year(1987)
    tracker.set_filename("tbld-87.dat")
    fh = tracker.get_file_handle("txt")
    for line in fh:
        if len(line) < 103:
            continue
        commod = line[0:6]
        pce_code = line[14:18]
        values = {
            "prod_val": line[21:30],
            "rail_margin": line[30:39],
            "truck_margin": line[39:48],
            "water_margin": line[48:57],
            "air_margin": line[57:66],
            "pipe_margin": line[66:75],
            "wholesale_margin": line[75:84],
            "retail_margin": line[84:93],
            "purchase_val": line[93:102],
        }
        tracker.insert_row(pce_code, commod, values, 1000)

    tracker.setup_for_codes()
    tracker.set_filename("io-nipa.doc")
    fh = tracker.get_file_handle("txt")
    for line in fh:
        if len(line) < 27:
            continue
        code = line[0:4].strip()
        desc = line[26:].strip()
        tracker.insert_code_row(code, desc)
    tracker.flush_codes()

    tracker.set_year(1992)
    tracker.set_filename("TabD.txt")
    fh = tracker.get_file_handle("csv", {"delim": "\t"})
    for row in fh:
        values = {
            "prod_val": row[4],
            "rail_margin": row[5],
            "truck_margin": row[6],
            "water_margin": row[7],
            "air_margin": row[8],
            "pipe_margin": row[9],
            "gaspipe_margin": row[10],
            "wholesale_margin": row[11],
            "retail_margin": row[12],
            "purchase_val": row[13],
        }
        tracker.insert_row(row[2], row[0], values, 1000)

    tracker.setup_for_codes()
    tracker.set_filename("IO-NIPA.txt")
    fh = tracker.get_file_handle("csv", {"delim": "\t"})
    for row in fh:
        code = row[0]
        desc = row[4]
        tracker.insert_code_row(code, desc)
    tracker.flush_codes()

    tracker.set_year(1997)
    tracker.set_filename("AppendixC_Detail.txt")
    fh = tracker.get_file_handle("csv", {"delim": ","})
    for row in fh:
        values = {
            "prod_val": row[3],
            "rail_margin": row[4],
            "truck_margin": row[5],
            "water_margin": row[6],
            "air_margin": row[7],
            "pipe_margin": row[8],
            "gaspipe_margin": row[9],
            "wholesale_margin": row[10],
            "retail_margin": row[11],
            "purchase_val": row[12],
        }
        tracker.insert_row(row[1], row[0], values, 1000)

    tracker.setup_for_codes()
    tracker.set_filename("IO-NIPA_PCE.txt")
    fh = tracker.get_file_handle("csv", {"delim": ","})
    for row in fh:
        code = row[1]
        desc = row[2]
        tracker.insert_code_row(code, desc)
    tracker.flush_codes()

    tracker.set_year(2002)
    tracker.setup_for_codes()  # do this simultaneously since it's all one file
    tracker.set_filename("2002_PCE_Bridge.xls")
    wb = tracker.get_file_handle("xls")
    naics_pat = re.compile('[A-Z0-9]{6}')
    sheet = wb.sheet_by_name("PCE_Bridge_Detail")
    pce_codes = []
    for rowindex in range(sheet.nrows):
        row = sheet.row_values(rowindex)
        if len(row) == 13 and naics_pat.match(row[1]):
            pce_desc = row[0]
            # we don't need the distinction between households and
            # nonprofit institutions service households
            parts = pce_desc.split('-')
            if len(parts) > 1:
                lastpart = parts[-1].strip()
                if lastpart == 'HH' or lastpart == 'NPISH':
                    pce_desc = '-'.join(parts[:-1])
            pce_desc = pce_desc.strip()

            if pce_desc in pce_codes:
                pce_code = pce_codes.index(pce_desc)
            else:
                pce_code = len(pce_codes)
                pce_codes.append(pce_desc)
                tracker.insert_code_row(str(pce_code), pce_desc)

            values = {
                "prod_val": row[3],
                "rail_margin": row[4],
                "truck_margin": row[5],
                "water_margin": row[6],
                "air_margin": row[7],
                "pipe_margin": row[8],
                "gaspipe_margin": row[9],
                "wholesale_margin": row[10],
                "retail_margin": row[11],
                "purchase_val": row[12],
            }
            tracker.insert_row(str(pce_code), row[1], values, 1000)

    tracker.flush_codes()
Exemple #4
0
 def flush(self):
     TableStateTracker.flush(self)
     if self.fh is not None and not self.fh.closed:
         self.fh.close()
Exemple #5
0
def parse_nipa_data():
    test_view = "%s.nipa_groups" % common.config.TEST_SCHEMA
    db.execute("DROP VIEW IF EXISTS %s" % test_view)

    # get table for pce category harmonization
    trailing_pat = re.compile('(.+) \(.*\d.*\)$')
    
    nipa_code_map = {}
    filename = fileutils.getdatapath("nipa_code_map.csv", "usa")
    fh = open(filename)
    csvf = csv.reader(fh)
    for row in csvf:
        if len(row) == 2:
            harmonized = row[0]
            trailing = trailing_pat.match(harmonized)
            if trailing:
                harmonized = trailing.group(1)
            nipa_code_map[row[1]] = harmonized
    fh.close()
    
    # get nipa series codes from underlying detail tables
    tracker = TableStateTracker()
    tracker.create_table("%s.pce_codes" % config.NIPA_SCHEMA,
                         ["code", "parent", "description"],
                         ["char(7)", "char(7)", "text"],
                         True)

    number_pat = re.compile('^\d+$')
    trailing_pat = re.compile('(.+) \(.*\d.*\)$')
    
    filename = fileutils.getcache("Section2All_underlying.csv", "bea", "nipa")
    fh = open(filename)
    csvf = csv.reader(fh)
    is_in_table = False
    
    code_stack = [None]
    indent_stack = [-1]
    
    # the code mapping has been done such that each item is at least at
    # three levels of disaggregation below the top, i.e. there is always
    # an ancestor at the second level. we only want to keep track of the
    # ancestor at the third level (root is zero)
    # the first level below root has goods and services
    # the second level has durable goods, nondurable goods, and services.
    reverse_code_dict = {}
    second_level_nodes = []
    
    for row in csvf:
        if len(row):
            if not is_in_table:
                if row[0].startswith("Table 2.4.5U"):
                    is_in_table = True
            else:
                if row[0].startswith("Table 2.4.5U"):
                    # we only need to go through one instance of this table
                    break
                else:
                    if number_pat.match(row[0]) and len(row) > 2:
                        title = row[1].lstrip()
    
                        # these are duplicate codes
                        if title.startswith("Market-based PCE"):
                            continue
    
                        code = row[2]
                        current_indent = len(row[1]) - len(title)
    
                        while current_indent <= indent_stack[-1]:
                            indent_stack.pop()
                            code_stack.pop()
    
                        indent_stack.append(current_indent)
                        code_stack.append(code)
    
                        if len(code_stack) > 1:
                            parent = code_stack[-2]
                        else:
                            parent = None
    
                        title = title.strip()
                        trailing = trailing_pat.match(title)
                        if trailing:
                            title = trailing.group(1)
                        
                        if len(code_stack) > 4:
                            reverse_code_dict[title] = code_stack[3]
                        else:
                            reverse_code_dict[title] = code
    
                        tracker.insert_row((code, parent, title))
    
    tracker.flush()
    fh.close()
    
    # table for price deflators
    
    tracker.create_table("%s.implicit_price_deflators" % config.NIPA_SCHEMA,
                         ["year", "gdp", "pce"],
                         ["int", "float", "float"])
    
    filename = fileutils.getcache("Section1all_csv.csv", "bea/nipa")
    fh = open(filename)
    csvf = csv.reader(fh)
    is_in_table = False
    
    data = {} # we need to parse two rows before we can populate
    years = {}
    
    for row in csvf:
        if len(row):
            if not is_in_table:
                if row[0].startswith("Table 1.1.9"):
                    is_in_table = True
            else:
                if row[0].startswith("Table 1.1.9"):
                    # this is seasonally adjusted version of the same table
                    break
                else:
                    if row[0] == "Line":
                        for i in range(len(row)):
                            if number_pat.match(row[i]):
                                year = int(row[i])
                                years[year] = i
                                data[year] = {}
    
                    elif number_pat.match(row[0]) and len(row) > 2:
                        title = row[1].lstrip()
                        if title == "Gross domestic product":
                            column = "gdp"
                        elif title == "Personal consumption expenditures":
                            column = "pce"
                        else:
                            continue
    
                        for (year, colindex) in years.items():
                            data[year][column] = float(row[colindex])
    
    for (year, results) in data.items():
        tracker.insert_row([year, results["gdp"], results["pce"]])
    
    tracker.flush()
    fh.close()
    
    # parse pce bridge
    
    class IONIPAStateTracker(TableStateTracker):
    
        def flush(self):
            TableStateTracker.flush(self)
            if self.fh is not None and not self.fh.closed:
                self.fh.close()
    
        def __init__(self):
            TableStateTracker.__init__(self)
            self.fh = None
            self.code_dict = None
    
            self.value_columns = [
                "prod_val",
                "rail_margin",
                "truck_margin",
                "water_margin",
                "air_margin",
                "pipe_margin",
                "gaspipe_margin",
                "wholesale_margin",
                "retail_margin",
                "purchase_val"
                ]
    
            self.old_style_field_map = {
                "Producers' Value": "prod_val",
                "MfgExciseTax": "prod_val",
                "RailMargin": "rail_margin",
                "TruckMargin": "truck_margin",
                "WaterMargin": "water_margin",
                "AirMargin": "air_margin",
                "PipeMargin": "pipe_margin",
                "WholesaleMargin": "wholesale_margin",
                "WholesaleTax": "wholesale_margin",
                "RetailMargin": "retail_margin",
                "RetailSalesTax": "retail_margin",
                "OtherRetailTax": "retail_margin",
                "Purchasers' Value": "purchase_val",
                }
    
        def set_filename(self, filename):
            path = fileutils.getcache(filename, str(self.year))
            self.filename = path
    
        def set_year(self, year):
            self.flush()
            self.year = year
            tablename = "%s.pcebridge_%d" % (config.IO_SCHEMA, year)
            fields = ["pce_code", "commodity"] + self.value_columns
            types = ["varchar(6)", "varchar(6)"] + \
                ["bigint"]*len(self.value_columns)
            self.create_table(tablename, fields, types)
    
        def setup_for_codes(self):
            self.code_dict = {}
    
        def flush_codes(self):
            if self.code_dict is not None:
                tablename = "%s.nipa_codes_%d" % (config.IO_SCHEMA, self.year)
                self.create_table(tablename,
                                  ["pce_code", "nipa_group", "description"],
                                  ["varchar(6)", "char(7)", "text"])
                for (code, raw_desc) in self.code_dict.items():
    
                    desc = raw_desc
                    if desc.endswith('(s.)') or desc.endswith('(d.)'):
                        desc = desc[:-4].strip()
                    elif desc.endswith('(n.d.)'):
                        desc = desc[:-6].strip()
    
                    if desc in nipa_code_map:
                        desc = nipa_code_map[desc]
    
                    if desc in reverse_code_dict:
                        nipa_code = reverse_code_dict[desc]
                    else:
                        nipa_code = None
                    #self.current_stmt(code, nipa_code, raw_desc)
                    self.table.insert([code, nipa_code, raw_desc])
    
                self.code_dict = None
                self.flush()
    
        def insert_code_row(self, code, desc):
            # workaround for the way excel interprets numbers as floats
            # when we know the codes should be strings
            if type(code) is float:
                code = int(code)
    
            self.code_dict[str(code)] = desc.strip()
    
        def insert_row(self, pce_code, commod, dollar_values, factor=1):
            # workaround for the way excel interprets numbers as floats
            # when we know the codes should be strings
            if type(pce_code) is float:
                pce_code = int(pce_code)
    
            values = [str(pce_code).strip(), commod.strip()]
            for column in self.value_columns:
                if column in dollar_values:
                    if factor == 1:
                        values.append(dollar_values[column])
                    else:
                        values.append(int(float(dollar_values[column]) * factor))
                else:
                    values.append(None)
            #self.current_stmt(*values)
            self.table.insert(values)
    
        def parse_old_style_xls(self, year):
            self.set_year(year)
            self.set_filename("%d_PCE_Commodity.xls" % self.year)
            wb = xlrd.open_workbook(self.filename)
    
            # parse pce bridge data
            sheet = wb.sheet_by_name("%d PCE Workfile - Commodity" % self.year)
            field_indexes = {}
            pce_code_idx = 0
            commod_idx = 2
            for rowindex in range(sheet.nrows):
                row = sheet.row_values(rowindex)
                if len(row) > 1:
                    if "PCE Category" in row:
                        pce_code_idx = row.index("PCE Category")
                        if "Commodity" in row:
                            commod_idx = row.index("Commodity")
                        for i in range(len(row)):
                            xls_col = row[i]
                            if xls_col in self.old_style_field_map:
                                colname = self.old_style_field_map[xls_col]
                                if colname not in field_indexes:
                                    field_indexes[colname] = []
                                field_indexes[colname].append(i)
                    elif len(field_indexes):
                        pce_code = row[pce_code_idx]
                        commod = str(int(row[commod_idx])).rjust(6, "0")
                        values = {}
                        for (field, columns) in field_indexes.items():
                            # doclumentation says units are in 100,000 dollars
                            # but the orders of magnitude don't match up with
                            # later years if we use 100
                            components = [int(float(row[column] * 1000))
                                          for column in columns]
                            value = 0
                            for component in components:
                                value += component
                            values[field] = value
                        self.insert_row(pce_code, commod, values)
    
            # parse codes from neighboring worksheet
            self.setup_for_codes()
            sheet = wb.sheet_by_name("%d PCE Category Descriptions" % self.year)
            code_idx = None
            desc_idx = None
            for rowindex in range(sheet.nrows):
                row = sheet.row_values(rowindex)
                if len(row) > 1:
                    codetab = "PCE Category Code"
                    codetab2 = "%s - %d" % (codetab, self.year)
                    if codetab in row or codetab2 in row:
                        if codetab in row:
                            code_idx = row.index(codetab)
                        else:
                            code_idx = row.index(codetab2)
                        desctab = "PCE Category Description - %d" % self.year
                        if desctab in row:
                            desc_idx = row.index(desctab)
                        else:
                            desctab = "PCE Category Description"
                            if desctab in row:
                                desc_idx = row.index(desctab)
                    elif code_idx is not None and desc_idx is not None:
                        code = row[code_idx]
                        desc = str(row[desc_idx])
                        self.insert_code_row(code, desc)
            self.flush_codes()
    
        def get_file_handle(self, filetype, options={}):
            if filetype == "txt":
                self.fh = open(self.filename)
                return self.fh
            elif filetype == "csv":
                self.fh = open(self.filename)
                if "delim" in options:
                    csvf = csv.reader(self.fh, delimiter=options["delim"])
                else:
                    csvf = csv.reader(self.fh)
                return csvf
            elif filetype == "xls":
                wb = xlrd.open_workbook(self.filename)
                return wb
    
        def parse_text(self, rowcallback):
            path = fileutils.getcache(filename, str(self.year))
            f = open(path)
            for line in f:
                rowcallback(line, this)
            f.close()
    
    tracker = IONIPAStateTracker()
    tracker.parse_old_style_xls(1967)
    tracker.parse_old_style_xls(1972)
    tracker.parse_old_style_xls(1977)
    tracker.parse_old_style_xls(1982)
    
    tracker.set_year(1987)
    tracker.set_filename("tbld-87.dat")
    fh = tracker.get_file_handle("txt")
    for line in fh:
        if len(line) < 103:
            continue
        commod = line[0:6]
        pce_code = line[14:18]
        values = {
            "prod_val": line[21:30],
            "rail_margin": line[30:39],
            "truck_margin": line[39:48],
            "water_margin": line[48:57],
            "air_margin": line[57:66],
            "pipe_margin": line[66:75],
            "wholesale_margin": line[75:84],
            "retail_margin": line[84:93],
            "purchase_val": line[93:102],
            }
        tracker.insert_row(pce_code, commod, values, 1000)
    
    tracker.setup_for_codes()
    tracker.set_filename("io-nipa.doc")
    fh = tracker.get_file_handle("txt")
    for line in fh:
        if len(line) < 27:
            continue
        code = line[0:4].strip()
        desc = line[26:].strip()
        tracker.insert_code_row(code, desc)
    tracker.flush_codes()
    
    tracker.set_year(1992)
    tracker.set_filename("TabD.txt")
    fh = tracker.get_file_handle("csv", {"delim": "\t"})
    for row in fh:
        values = {
            "prod_val": row[4],
            "rail_margin": row[5],
            "truck_margin": row[6],
            "water_margin": row[7],
            "air_margin": row[8],
            "pipe_margin": row[9],
            "gaspipe_margin": row[10],
            "wholesale_margin": row[11],
            "retail_margin": row[12],
            "purchase_val": row[13],
            }
        tracker.insert_row(row[2], row[0], values, 1000)
    
    tracker.setup_for_codes()
    tracker.set_filename("IO-NIPA.txt")
    fh = tracker.get_file_handle("csv", {"delim": "\t"})
    for row in fh:
        code = row[0]
        desc = row[4]
        tracker.insert_code_row(code, desc)
    tracker.flush_codes()
    
    tracker.set_year(1997)
    tracker.set_filename("AppendixC_Detail.txt")
    fh = tracker.get_file_handle("csv", {"delim": ","})
    for row in fh:
        values = {
            "prod_val": row[3],
            "rail_margin": row[4],
            "truck_margin": row[5],
            "water_margin": row[6],
            "air_margin": row[7],
            "pipe_margin": row[8],
            "gaspipe_margin": row[9],
            "wholesale_margin": row[10],
            "retail_margin": row[11],
            "purchase_val": row[12],
            }
        tracker.insert_row(row[1], row[0], values, 1000)
    
    tracker.setup_for_codes()
    tracker.set_filename("IO-NIPA_PCE.txt")
    fh = tracker.get_file_handle("csv", {"delim": ","})
    for row in fh:
        code = row[1]
        desc = row[2]
        tracker.insert_code_row(code, desc)
    tracker.flush_codes()
    
    tracker.set_year(2002)
    tracker.setup_for_codes() # do this simultaneously since it's all one file
    tracker.set_filename("2002_PCE_Bridge.xls")
    wb = tracker.get_file_handle("xls")
    naics_pat = re.compile('[A-Z0-9]{6}')
    sheet = wb.sheet_by_name("PCE_Bridge_Detail")
    pce_codes = []
    for rowindex in range(sheet.nrows):
        row = sheet.row_values(rowindex)
        if len(row) == 13 and naics_pat.match(row[1]):
            pce_desc = row[0]
            # we don't need the distinction between households and
            # nonprofit institutions service households
            parts = pce_desc.split('-')
            if len(parts) > 1:
                lastpart = parts[-1].strip()
                if lastpart == 'HH' or lastpart == 'NPISH':
                    pce_desc = '-'.join(parts[:-1])
            pce_desc = pce_desc.strip()
    
            if pce_desc in pce_codes:
                pce_code = pce_codes.index(pce_desc)
            else:
                pce_code = len(pce_codes)
                pce_codes.append(pce_desc)
                tracker.insert_code_row(str(pce_code), pce_desc)
            
            values = {
                "prod_val": row[3],
                "rail_margin": row[4],
                "truck_margin": row[5],
                "water_margin": row[6],
                "air_margin": row[7],
                "pipe_margin": row[8],
                "gaspipe_margin": row[9],
                "wholesale_margin": row[10],
                "retail_margin": row[11],
                "purchase_val": row[12],
                }
            tracker.insert_row(str(pce_code), row[1], values, 1000)
    
    tracker.flush_codes()
Exemple #6
0
 def flush(self):
     TableStateTracker.flush(self)
     if self.fh is not None and not self.fh.closed:
         self.fh.close()