Beispiel #1
0
def doparse():
    tracker = IOTableStateTracker()
    
    #tracker.create_simple_transaction_table(
    #    "1947", "1947/1947 Transactions 85-level Data.txt")
    #tracker.create_simple_transaction_table(
    #    "1958", "1958/1958 Transactions 85-level Data.txt")
    #tracker.create_simple_transaction_table(
    #    "1963", "1963/1963 Transactions 367-level Data.txt")
    #tracker.create_simple_transaction_table(
    #    "1967", "1967/1967 Transactions 484-level Data.txt", 1000)
    
    tracker.create_simple_make_use(
        "1972", "1972/1972 Transactions 496-level Data.txt", 1000)
    tracker.create_simple_make_use(
        "1977", "1977/1977 Transactions 537-level Data.txt", 1000)

    tracker.create_make_table("1982")
    tracker.create_use_table("1982", True)
    with open(fileutils.getcache("82-6DT.DAT", "1982"), "r") as f:
        for line in f:
            if len(line) >= 112: # right-aligned
                input_ind = line[0:6]
                output_ind = line[6:12]
                use_dollars = line[12:22]
                make_dollars = line[22:32]
                tracker.insert_make(input_ind, output_ind, make_dollars, 100)
                tracker.insert_use(input_ind, output_ind, use_dollars,
                                   {"margins": line[32:42],
                                    "rail_margin": line[42:52],
                                    "truck_margin": line[52:62],
                                    "water_margin": line[62:72],
                                    "air_margin": line[72:82],
                                    "pipe_margin": line[82:92],
                                    "wholesale_margin": line[92:102],
                                    "retail_margin": line[102:112]},
                                    100) # this year dollars are in 100,000s
    
    tracker.create_make_table("1987")
    with open(fileutils.getcache("TBL1-87.DAT", "1987"), "r") as f:
        for line in f:
            if len(line) >= 24: # right-aligned
                tracker.insert_make(
                    line[0:6], line[7:13], line[15:24], 1000)
    
    tracker.create_use_table("1987", True)
    with open(fileutils.getcache("TBL2-87.DAT", "1987"), "r") as f:
        for line in f:
            if len(line) >= 96: # right-aligned
                input_ind = line[0:6]
                output_ind = line[7:13]
                use_dollars = line[15:24].strip()
                tracker.insert_use(
                    input_ind, output_ind, use_dollars,
                    {"margins": line[24:33],
                     "rail_margin": line[33:42],
                     "truck_margin": line[42:51],
                     "water_margin": line[51:60],
                     "air_margin": line[60:69],
                     "pipe_margin": line[69:78],
                     "wholesale_margin": line[78:87],
                     "retail_margin": line[87:96]},
                    1000)
    
    # the documentation for 1992 appears very incorrect unless there
    # is some way for tabs to be 7 characters for two fields and 9 
    # characters for the rest of the fields. we will just assume the
    # file is an ordinary tab-delimited file.
    
    tracker.create_make_table("1992")
    with open(fileutils.getcache("IOMAKE.TXT", "1992"), "r") as f:
        for line in f:
            row = line.split("\t")
            if len(row) == 4:
                tracker.insert_make(row[0], row[1], row[3], 1000)
    
    tracker.create_use_table("1992", True)
    with open(fileutils.getcache("IOUSE.TXT", "1992"), "r") as f:
        for line in f:
            row = line.split("\t")
            if len(row) == 13:
                tracker.insert_use(
                    row[0], row[1], row[3],
                    {"margins": row[4],
                     "rail_margin": row[5],
                     "truck_margin": row[6],
                     "water_margin": row[7],
                     "air_margin": row[8],
                     "pipe_margin": row[9],
                     "gaspipe_margin": row[10],
                     "wholesale_margin": row[11],
                     "retail_margin": line[12]},
                    1000)
    
    tracker.create_make_table("1997")
    with open(fileutils.getcache("NAICSMakeDetail.txt", "1997")) as f:
        csvf = csv.reader(f)
        for row in csvf:
            if len(row) == 4:
                tracker.insert_make(row[0], row[1], row[3], 1000)    
    
    tracker.create_use_table("1997", True)
    with open(fileutils.getcache("NAICSUseDetail.txt", "1997")) as f:
        csvf = csv.reader(f)
        for row in csvf:
            if len(row) == 15:
                tracker.insert_use(
                    row[0], row[1], row[4],
                    {"margins": row[5],
                     "rail_margin": row[6],
                     "truck_margin": row[7],
                     "water_margin": row[8],
                     "air_margin": row[9],
                     "pipe_margin": row[10],
                     "gaspipe_margin": row[11],
                     "wholesale_margin": row[12],
                     "retail_margin": row[13]},
                    1000)
    
    # contrary to the format documentation, revised 2002 tables are
    # delimited with mixed tabs and spaces. they appear fixed width with
    # 8-char tabs. field names fortunately do not contain whitespace.
    valid_line = re.compile("[A-Z0-9]{6}\s")
    
    tracker.create_make_table("2002")
    with open(fileutils.getcache("REV_NAICSMakeDetail 4-24-08.txt", "2002")) as f:
        fields = dbsetup.get_header_locations(
                     dbsetup.replace_tabs(f.readline().strip()))
        for line in f:
            if valid_line.match(line):
                row = dbsetup.get_values_for_fields(dbsetup.replace_tabs(line), fields)
                tracker.insert_make(
                    row["Industry"], row["Commodity"], row["ProVal"], 1000)
    
    tracker.create_use_table("2002", True)
    with open(fileutils.getcache("REV_NAICSUseDetail 4-24-08.txt", "2002")) as f:
        # cheat here because it's not worth the trouble to deal with
        # lack of whitespace between two fields (GasPipeVal and WhsVal)
        line = f.readline().strip().replace("GasPipeVal", "GasPipe   ")
        fields = dbsetup.get_header_locations(dbsetup.replace_tabs(line))
        for line in f:
            if valid_line.match(line):
                row = dbsetup.get_values_for_fields(
                    dbsetup.replace_tabs(line), fields)
                tracker.insert_use(
                    row["Commodity"], row["Industry"], row["ProVal"],
                    {"margins": row["StripMar"],
                     "rail_margin": row["RailVal"],
                     "truck_margin": row["TruckVal"],
                     "water_margin": row["WaterVal"],
                     "air_margin": row["AirVal"],
                     "pipe_margin": row["PipeVal"],
                     "gaspipe_margin": row["GasPipe"],
                     "wholesale_margin": row["WhsVal"],
                     "retail_margin": row["RetVal"]},
                    1000)
    
    tracker.flush()
Beispiel #2
0
def doparse():
    for year in (1972, 1977):
        table = SQLTable("%s.codes_%d" % (config.IO_SCHEMA, year),
                         ["code", "description"],
                         ["char(6)", "text"]).create()
        table.truncate()
        filepath = fileutils.getdatapath("io_sectors_%d.csv" % year, "usa")
        with open(filepath, "r") as fh:
            csvf = csv.reader(fh)
            for row in csvf:
                if len(row) and len(row[0]):
                    table.insert([row[0], row[1]])

        if year == 1972:
            # this is stated in the rtf file for both 1972 and 1977
            # but this code never appears in 1977, the documentation
            # was probably not properly updated
            table.insert(["870000", "total value added"])

    writer = dbsetup.IOCodeTableWriter()
    
    writer.set_year(1982, "Io-code.doc")
    with open(writer.get_filename()) as f:
        for line in f:
            if len(line) > 8:
                code = line[:6]
                desc = line[8:]
                writer.writerow(code, desc)
    
    writer.set_year(1987, "SIC-IO.DOC")
    with open(writer.get_filename()) as f:
        pattern = re.compile('\s*(\d{1,2})\.(\d{4})\s+([^0-9\*]+)')
        for line in f:
            match = pattern.match(line)
            if match:
                code = match.group(1).rjust(2, '0') + match.group(2)
                desc = match.group(3).strip('(. \r\n')
                writer.writerow(code, desc)
    
    writer.set_year(1992, "io-code.txt")
    with open(writer.get_filename()) as f:
        for line in f:
            if len(line) > 7:
                code = line[:6]
                desc = line[7:]
                writer.writerow(code, desc)
    
    writer.set_year(1997, "IO-CodeDetail.txt")
    with open(writer.get_filename()) as f:
        csvf = csv.reader(f)
        for row in csvf:
            if len(row) == 2:
                writer.writerow(row[0], row[1])
    
    writer.set_year(2002, "REV_NAICSUseDetail 4-24-08.txt")
    with open(writer.get_filename()) as f:
        valid_line = re.compile("[A-Z0-9]{6}\s")
        line = f.readline().strip().replace("GasPipeVal", "GasPipe   ")
        fields = dbsetup.get_header_locations(dbsetup.replace_tabs(line))
        codemap = {}
        for line in f:
            if valid_line.match(line):
                row = dbsetup.get_values_for_fields(
                    dbsetup.replace_tabs(line), fields)
                codemap[row["Commodity"]] = row["CommodityDescription"]
                codemap[row["Industry"]] = row["IndustryDescription"]
    
        for (code, desc) in codemap.items():
            writer.writerow(code, desc)
    
    writer.flush()
Beispiel #3
0
def doparse():
    for year in (1972, 1977):
        table = SQLTable("%s.codes_%d" % (config.IO_SCHEMA, year),
                         ["code", "description"],
                         ["char(6)", "text"]).create()
        table.truncate()
        filepath = fileutils.getdatapath("io_sectors_%d.csv" % year, "usa")
        with open(filepath, "r") as fh:
            csvf = csv.reader(fh)
            for row in csvf:
                if len(row) and len(row[0]):
                    table.insert([row[0], row[1]])

        if year == 1972:
            # this is stated in the rtf file for both 1972 and 1977
            # but this code never appears in 1977, the documentation
            # was probably not properly updated
            table.insert(["870000", "total value added"])

    writer = dbsetup.IOCodeTableWriter()

    writer.set_year(1982, "Io-code.doc")
    with open(writer.get_filename()) as f:
        for line in f:
            if len(line) > 8:
                code = line[:6]
                desc = line[8:]
                writer.writerow(code, desc)

    writer.set_year(1987, "SIC-IO.DOC")
    with open(writer.get_filename()) as f:
        pattern = re.compile('\s*(\d{1,2})\.(\d{4})\s+([^0-9\*]+)')
        for line in f:
            match = pattern.match(line)
            if match:
                code = match.group(1).rjust(2, '0') + match.group(2)
                desc = match.group(3).strip('(. \r\n')
                writer.writerow(code, desc)

    writer.set_year(1992, "io-code.txt")
    with open(writer.get_filename()) as f:
        for line in f:
            if len(line) > 7:
                code = line[:6]
                desc = line[7:]
                writer.writerow(code, desc)

    writer.set_year(1997, "IO-CodeDetail.txt")
    with open(writer.get_filename()) as f:
        csvf = csv.reader(f)
        for row in csvf:
            if len(row) == 2:
                writer.writerow(row[0], row[1])

    writer.set_year(2002, "REV_NAICSUseDetail 4-24-08.txt")
    with open(writer.get_filename()) as f:
        valid_line = re.compile("[A-Z0-9]{6}\s")
        line = f.readline().strip().replace("GasPipeVal", "GasPipe   ")
        fields = dbsetup.get_header_locations(dbsetup.replace_tabs(line))
        codemap = {}
        for line in f:
            if valid_line.match(line):
                row = dbsetup.get_values_for_fields(dbsetup.replace_tabs(line),
                                                    fields)
                codemap[row["Commodity"]] = row["CommodityDescription"]
                codemap[row["Industry"]] = row["IndustryDescription"]

        for (code, desc) in codemap.items():
            writer.writerow(code, desc)

    writer.flush()