def init_depr_rates(
    asset_tree=naics.generate_tree(),
    get_econ=False,
    get_tax_est=False,
    get_tax_150=False,
    get_tax_200=False,
    get_tax_sl=False,
    get_tax_ads=False,
    soi_from_out=False,
    output_data=False,
):
    """ This fun
    
    
    """
    # Calculating the fixed asset data:
    fixed_asset_tree = read_bea.read_bea(asset_tree)
    # Calculating the inventory data:
    inv_tree = read_inv.read_inventories(asset_tree)
    # Calculating the land data:
    land_tree = read_land.read_land(asset_tree)
    # Calculating the depreciation rates:
    econ_depr_tree = calc_rates.calc_depr_rates(fixed_asset_tree, inv_tree, land_tree)
    tax_depr_tree = calc_rates.calc_tax_depr_rates(fixed_asset_tree, inv_tree, land_tree)
    # naics.pop_rates(tax_depr_tree)

    return {"Econ": econ_depr_tree, "Tax": tax_depr_tree}
Example #2
0
def load_partner(soi_tree=naics.generate_tree(),
                 from_out=False,
                 output_data=False,
                 out_path=None):
    """ Loading the partnership tax soi data into a NAICS Tree.
    
    :param soi_tree: The NAICS tree to put all of the data in.
    :param from_out: If the corporate soi data is already in an output file,
           then it can be read in directly from the output.
    :param output_data: Print the corporate dataframes to csv files in the
           output folder.
    :param out_path: The output_path, both for reading in output data and for
           printing to the output file
    """
    # Initializing the output path:
    if out_path == None:
        out_path = _OUT_DIR
    # Load the soi income data into the NAICS tree:
    soi_tree = prt.load_income(data_tree=soi_tree, from_out=from_out)
    # Load the soi asset data into the NAICS tree:
    soi_tree = prt.load_asset(data_tree=soi_tree, from_out=from_out)
    # Load the soi partnership types data into the NAICS tree:
    soi_tree = prt.load_type(data_tree=soi_tree, from_out=from_out)
    # Output the data to csv files in the output folder:
    if output_data:
        naics.print_tree_dfs(tree=soi_tree,
                             out_path=out_path,
                             data_types=[_INC_DF_NM, _AST_DF_NM, _TYP_DF_NM])
    return soi_tree
def load_soi_farm_prop(data_tree=naics.generate_tree(),
                       blue_tree=None,
                       blueprint=None,
                       from_out=False,
                       out_path=_FARM_PROP_OUT_PATH):
    """ This function loads the soi nonfarm proprietorship data:
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    # Load Farm Proprietorship data:
    farm_data = pd.read_csv(_FARM_IN_PATH)
    new_farm_cols = ["Land", "FA"]
    #
    data_tree.append_all(df_nm=_FARM_DF_NM, df_cols=new_farm_cols)
    #
    land_mult = ((farm_data["R_sp"][0] + farm_data["Q_sp"][0]) *
                 (float(farm_data["A_sp"][0]) / farm_data["A_p"][0]))
    total = farm_data["R_p"][0] + farm_data["Q_p"][0]
    total_pa = 0
    cur_codes = [111, 112]
    proportions = np.zeros(len(cur_codes))
    proportions = naics.get_proportions(cur_codes, data_tree, _AST_PRT_DF_NM,
                                        [_LAND_COL_NM, _DEPR_COL_NM])
    #
    for ind_code in cur_codes:
        cur_ind = naics.find_naics(data_tree, ind_code)
        cur_df = cur_ind.data.dfs[_AST_PRT_DF_NM]
        total_pa += (cur_df[_LAND_COL_NM][0] + cur_df[_DEPR_COL_NM][0])
    #
    for i in xrange(0, len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_ind.data.dfs[_FARM_DF_NM]["Land"][0] = (
            land_mult * cur_ind.data.dfs[_AST_PRT_DF_NM][_LAND_COL_NM][0] /
            total_pa)
        cur_ind.data.dfs[_FARM_DF_NM]["FA"][0] = (
            (proportions.iloc[1, i] * total) -
            cur_ind.data.dfs[_FARM_DF_NM]["Land"][0])
    # Default:
    if blueprint == None and _TOT_CORP_DF_NM in data_tree.enum_inds[
            0].data.dfs.keys():
        blueprint = _TOT_CORP_DF_NM
    naics.pop_back(tree=data_tree, df_list=[_FARM_DF_NM])
    naics.pop_forward(tree=data_tree,
                      df_list=[_FARM_DF_NM],
                      blueprint=blueprint,
                      blue_tree=blue_tree)
    #
    return data_tree
Example #4
0
def load_proprietorship(soi_tree=naics.generate_tree(),
                       from_out=False, get_all=False,
                       get_nonfarm=False, get_farm=False,
                       output_data=False, out_path=None):
    """ Loading the proprietorship tax soi data into a NAICS Tree.
    
    :param soi_tree: The NAICS tree to put all of the data in.
    :param from_out: If the corporate soi data is already in an output file,
           then it can be read in directly from the output.
    :param output_data: Print the corporate dataframes to csv files in the
           output folder.
    :param out_path: The output_path, both for reading in output data and for
           printing to the output file
    """
    # Initializing the output path:
    if out_path == None:
        out_path = _OUT_DIR
    # Load the soi nonfarm data into the NAICS tree:
    if get_nonfarm:
        soi_tree = prop.load_soi_nonfarm_prop(
                                    data_tree=soi_tree, from_out=from_out
                                    )
    # Load the farm data into to the NAICS tree:
    if get_farm:
        soi_tree = prop.load_soi_farm_prop(
                                    data_tree=soi_tree, from_out=from_out
                                    )
    # Output the data to csv files in the output folder:
    if output_data:
            naics.print_tree_dfs(tree=soi_tree, out_path=out_path,
                                 data_types=[_NFARM_DF_NM, _FARM_DF_NM])
    return soi_tree
Example #5
0
def read_land(asset_tree):
    land_data = pd.read_csv(_LAND_IN_PATH)
    land_data = _LAND_IN_FILE_FCTR * land_data
    # Initializing NAICS tree for the land data:
    df_cols = ["All", "Corp", "Non-Corp"]
    land_tree = naics.generate_tree()
    land_tree.append_all(df_nm="Land", df_cols=df_cols)
    ''' Calculate the proportion that belong in corporate and non-corporate
    tax categories:'''
    corp_sum = 0.0
    non_corp_sum = 0.0
    for i in _CORP_NMS:
        corp_sum += asset_tree.enum_inds[0].data.dfs["LAND"][i][0]
    for i in _NCORP_NMS:
        non_corp_sum += asset_tree.enum_inds[0].data.dfs["LAND"][i][0]
    if corp_sum + non_corp_sum == 0:
        return land_tree
    ''' Initialize the total industry category--corresponding to NAICS code 
    of "1": '''
    land_df = land_tree.enum_inds[0].data.dfs["Land"]
    land_df["Corp"][0] = land_data["Corporate"][0]
    land_df["Non-Corp"][0] = land_data["Non-Corporate"][0]
    land_df["All"][0] = (land_data["Corporate"][0]+
                            land_data["Non-Corporate"][0])
    # Use the asset_tree to populate the rest:
    naics.pop_back(land_tree, ["Land"])
    naics.pop_forward(land_tree, ["Land"], "LAND", asset_tree)
    return land_tree
def calibrate_debt(debt_tree=naics.generate_tree(), soi_tree=None, from_out=False, soi_from_out=False):
    """ This function is incomplete. This is supposed to do the debt
    calibrations.
    
    :param debt_tree: The NAICS tree to append the calibrated debt
           parameters to. Default is a newly generated tree.
    :param soi_tree: A tree with all of the relevant soi data.
    :
    """
    if soi_tree == None:
        soi_tree = pull_soi_data(get_corp=True, from_out=soi_from_out)
    #
    debt_dir = os.path.abspath(_PARAM_DIR + "//debt")
    debt_data_dir = os.path.abspath(debt_dir + "//data")
    sys.path.append(debt_dir)
    import debt_calibration as debt

    #
    lblty_file = os.path.abspath(debt_data_dir + "//liabilities.csv")
    print lblty_file
    lblty_df = pd.read_csv(lblty_file)
    eqty_file = os.path.abspath(debt_data_dir + "//equity.csv")
    eqty_df = pd.read_csv(eqty_file)
    debt_tree = naics.load_tree_dfs(input_file=lblty_file, dfs_name="liabilities", tree=debt_tree)
    debt_tree = naics.load_tree_dfs(input_file=eqty_file, dfs_name="equity", tree=debt_tree)
    #
    naics.pop_forward(
        tree=debt_tree, df_list=["liabilities"], blue_tree=soi_tree, blueprint="tot_corps", sub_print=["Interest Paid"]
    )
    #
    return debt_tree
Example #7
0
def load_partner(soi_tree=naics.generate_tree(),
                 from_out=False, output_data=False,
                 out_path=None):
    """ Loading the partnership tax soi data into a NAICS Tree.
    
    :param soi_tree: The NAICS tree to put all of the data in.
    :param from_out: If the corporate soi data is already in an output file,
           then it can be read in directly from the output.
    :param output_data: Print the corporate dataframes to csv files in the
           output folder.
    :param out_path: The output_path, both for reading in output data and for
           printing to the output file
    """
    # Initializing the output path:
    if out_path == None:
        out_path = _OUT_DIR
    # Load the soi income data into the NAICS tree:
    soi_tree = prt.load_income(data_tree=soi_tree, from_out=from_out)
    # Load the soi asset data into the NAICS tree:
    soi_tree = prt.load_asset(data_tree=soi_tree, from_out=from_out)
    # Load the soi partnership types data into the NAICS tree:
    soi_tree = prt.load_type(data_tree=soi_tree, from_out=from_out)
    # Output the data to csv files in the output folder:
    if output_data:
        naics.print_tree_dfs(tree=soi_tree, out_path=out_path,
                             data_types=[_INC_DF_NM, _AST_DF_NM, _TYP_DF_NM])
    return soi_tree
def calibrate_debt(debt_tree=None, soi_tree=None, from_out=False,
                   soi_from_out=False):
    if soi_tree == None:
        soi_tree = pull_soi_data(get_corp=True, from_out=soi_from_out)
    if debt_tree == None:
        debt_tree = naics.generate_tree()
    #
    debt_dir = os.path.abspath(_PARAM_DIR + "\\debt")
    debt_data_dir = os.path.abspath(debt_dir + "\\data")
    sys.path.append(debt_dir)
    import debt_calibration as debt
    #
    lblty_file = os.path.abspath(debt_data_dir + "\\liabilities.csv")
    print lblty_file
    lblty_df = pd.read_csv(lblty_file)
    eqty_file = os.path.abspath(debt_data_dir + "\\equity.csv")
    eqty_df = pd.read_csv(eqty_file)
    debt_tree = naics.load_tree_dfs(input_file=lblty_file, dfs_name="liabilities", tree=debt_tree)
    debt_tree = naics.load_tree_dfs(input_file=eqty_file, dfs_name="equity", tree=debt_tree)
    #
    naics.pop_forward(tree=debt_tree, df_list=["liabilities"],
                      blue_tree=soi_tree, blueprint="tot_corps",
                      sub_print = ["Interest Paid"])
    #
    return debt_tree
def pull_soi_data(soi_tree=naics.generate_tree(), from_out=False, 
                  get_all=False, get_corp=False,
                  get_tot=False, get_s=False,
                  get_c=False, get_prt=False,
                  get_prop=False, get_farm_prop=False,
                  output_data=False, out_path=None):
    # If get_all, set all booleans to true:
    if get_all:
        get_corp = True
        get_tot = True
        get_s = True
        get_c = True
        get_prt = True
        get_prop = True
        get_farm = True
    # Import the soi_processing custom module:
    soi_dir = os.path.join(_DATA_DIR, "soi")
    sys.path.append(soi_dir)
    import soi_processing as soi
    # Loading the soi corporate data into the NAICS tree:
    soi_tree = soi.load_corporate(soi_tree=soi_tree, from_out=from_out,
                                  get_all=get_corp, get_tot=get_tot,
                                  get_s=get_s, get_c=get_c,
                                  output_data=output_data, out_path=out_path)
    # Loading the soi partnership data into the NAICS tree:
    if get_prt:
        soi_tree = soi.load_partner(soi_tree=soi_tree, from_out=from_out,
                                    output_data=output_data, out_path=out_path)
    # Loading the soi proprietorship data into the NAICS tree:
    soi_tree = soi.load_proprietorship(
                            soi_tree=soi_tree, from_out=from_out,
                            get_nonfarm=get_prop, get_farm=get_farm_prop,
                            output_data=output_data, out_path=out_path
                            )
    return soi_tree
Example #10
0
def load_proprietorship(soi_tree=naics.generate_tree(),
                        from_out=False,
                        get_all=False,
                        get_nonfarm=False,
                        get_farm=False,
                        output_data=False,
                        out_path=None):
    """ Loading the proprietorship tax soi data into a NAICS Tree.
    
    :param soi_tree: The NAICS tree to put all of the data in.
    :param from_out: If the corporate soi data is already in an output file,
           then it can be read in directly from the output.
    :param output_data: Print the corporate dataframes to csv files in the
           output folder.
    :param out_path: The output_path, both for reading in output data and for
           printing to the output file
    """
    # Initializing the output path:
    if out_path == None:
        out_path = _OUT_DIR
    # Load the soi nonfarm data into the NAICS tree:
    if get_nonfarm:
        soi_tree = prop.load_soi_nonfarm_prop(data_tree=soi_tree,
                                              from_out=from_out)
    # Load the farm data into to the NAICS tree:
    if get_farm:
        soi_tree = prop.load_soi_farm_prop(data_tree=soi_tree,
                                           from_out=from_out)
    # Output the data to csv files in the output folder:
    if output_data:
        naics.print_tree_dfs(tree=soi_tree,
                             out_path=out_path,
                             data_types=[_NFARM_DF_NM, _FARM_DF_NM])
    return soi_tree
Example #11
0
def load_corporate(soi_tree=naics.generate_tree(),
                   from_out=False,
                   get_all=False,
                   get_tot=False,
                   get_s=False,
                   get_c=False,
                   output_data=False,
                   out_path=None):
    """ Loading the corporate tax soi data into a NAICS Tree.
    
    :param soi_tree: The NAICS tree to put all of the data in.
    :param from_out: If the corporate soi data is already in an output folder,
           then it can be read in directly from the output.
    :param get_all: Get corporate soi data for all kinds of corporations.
    :param get_tot: Get the aggregate soi data for corporations.
    :param get_s: Get the soi data for s corporations.
    :param get_c: Interpolate the soi data for c corporations.
    :param output_data: Print the corporate dataframes to csv files in the
           output folder.
    :param out_path: The output_path, both for reading in output data and for
           printing to the output file
    
    .. note: Because there is only data on the aggregate and s corporations,
       the c corporations data can only be interpolated if the other two have
       been calculated.
    """
    # Initializing the output path:
    if out_path == None:
        out_path = _OUT_DIR
    # Initializing booleans based of initial input booleans:
    if get_all:
        get_tot = True
        get_s = True
        get_c = True
    if not get_tot or not get_s:
        get_c = False
    # Load the total corporate soi data into the NAICS tree:
    if get_tot:
        soi_tree = corp.load_soi_tot_corp(data_tree=soi_tree,
                                          from_out=from_out)
        if output_data:
            naics.print_tree_dfs(tree=soi_tree,
                                 out_path=out_path,
                                 data_types=[_TOT_CORP_DF_NM])
    # Load the S-corporate soi data into the NAICS tree:
    if get_s:
        soi_tree = corp.load_soi_s_corp(data_tree=soi_tree, from_out=from_out)
        if output_data:
            naics.print_tree_dfs(tree=soi_tree,
                                 out_path=out_path,
                                 data_types=[_S_CORP_DF_NM])
    # Calculate the C-corporate soi data for the NAICS tree:
    if get_c:
        soi_tree = corp.calc_c_corp(data_tree=soi_tree, from_out=from_out)
        if output_data:
            naics.print_tree_dfs(tree=soi_tree,
                                 out_path=out_path,
                                 data_types=[_C_CORP_DF_NM])
    return soi_tree
Example #12
0
def read_inventories(asset_tree):
    # Opening BEA's excel file on depreciable assets by industry:
    inv_book = xlrd.open_workbook(_INV_IN_PATH)
    sht0 = inv_book.sheet_by_index(0)
    num_rows = sht0.nrows
    num_cols = sht0.ncols
    #Find the starting index in worksheet.
    cur_index = naics.search_ws(sht0, 1, 25, True, [0, 0], True)
    check_index = naics.search_ws(sht0, "line", 20)
    if (cur_index[1] != check_index[1]):
        print "ERROR"
    # Reading in the crosswalk:
    inv_cross = pd.read_csv(_INV_IN_CROSS_PATH)
    # Creating a tree for the inventory data:
    data_cols = ["All", "Corp", "Non-Corp"]
    inv_tree = naics.generate_tree()
    inv_tree.append_all(df_nm="Inventories", df_cols=data_cols)
    #
    inv_data = np.zeros(inv_cross.shape[0])
    #
    cross_index = 0
    for i in xrange(cur_index[0], num_rows):
        if (cross_index >= inv_cross.shape[0]):
            break
        cur_list = str(sht0.cell_value(i, cur_index[1])).strip()
        cur_name = str(sht0.cell_value(i, cur_index[1] + 1)).strip()
        checks = ((str(cur_list) == str(inv_cross["List"][cross_index])) and
                  (str(cur_name) == str(inv_cross["Industry"][cross_index])))
        if (checks):
            cross_index += 1
            try:
                cur_value = float(sht0.cell_value(i, num_cols - 1))
            except ValueError:
                continue
            inv_data[cross_index - 1] = cur_value
            # Data is in billions:
            inv_data[cross_index -
                     1] = _INV_IN_FILE_FCTR * inv_data[cross_index - 1]
    #
    for i in xrange(0, inv_cross.shape[0]):
        cur_codes = inv_cross["NAICS"][i].strip().split(".")
        proportions = naics.get_proportions(cur_codes, asset_tree, "INV")
        for j in xrange(0, proportions.shape[1]):
            cur_ind = inv_tree.enum_inds[int(proportions.iloc[0, j])]
            prev_ind = asset_tree.enum_inds[int(proportions.iloc[0, j])]
            prev_df = prev_ind.data.dfs["INV"]
            if (sum(prev_df.iloc[0, :]) != 0):
                cur_dfs = ((prev_df / sum(prev_df.iloc[0, :])) *
                           (inv_data[i] * proportions.iloc[1, j]))
                inv_df = cur_ind.data.dfs["Inventories"]
                inv_df["All"] += sum(cur_dfs.iloc[0, :])
                for k in _CORP_NMS:
                    inv_df["Corp"] += cur_dfs[k][0]
                for k in _NCORP_NMS:
                    inv_df["Non-Corp"] += cur_dfs[k][0]
    #
    naics.pop_back(inv_tree, ["Inventories"])
    naics.pop_forward(inv_tree, ["Inventories"], "INV", asset_tree)
    return inv_tree
Example #13
0
def calc_tax_depr_rates(asset_tree, inv_tree, land_tree):
    #
    tax_data = pd.read_csv(_TAX_DEPR_IN_PATH).fillna(0)
    tax_assets = tax_data["Asset Type"]
    # Real Interest Rate:
    r = .05  
    #
    tax_mthds = {"GDS 200%": 2.0, "GDS 150%": 1.5, "GDS SL": 1.0, "ADS SL": 1.0}
    tax_cols = {'Asset','Tax_Depreciation_Rate'}
    tax_rates = pd.DataFrame(np.zeros((len(tax_assets),len(tax_cols))), columns = tax_cols)
    tax_rates['Asset'] = tax_assets

    # Compute the tax depreciation rates:
    for i in xrange(0, len(tax_assets)):
        tax_method = tax_data['Method'][i]
        tax_system = tax_data['System'][i]
        tax_life = tax_data[tax_system][i]
        tax_b = tax_mthds[tax_method]
        tax_beta = tax_b / tax_life
        if(tax_method == 'GDS 200%' or tax_method == 'GDS 150%'):
            tax_star = tax_life * (1 - (1/tax_b))
            tax_z = (((tax_beta/(tax_beta+r))* (1-np.exp(-1*(tax_beta+r)*tax_star))) 
                      + ((np.exp(-1*tax_beta*tax_star)/((tax_life-tax_star)*r))* (np.exp(-1*r*tax_star)-np.exp(-1*r*tax_life))))
            tax_rates.iloc[i,0] = r/((1/tax_z)-1)
        else:
            tax_z = ((1-np.exp(-1*r*tax_life)) / (r*tax_life))
            tax_rates.iloc[i,0] = (tax_z * r) / (1 + r - tax_z)
    #
    types = ["All", "Corp", "Non-Corp"]
    # Initialize tax depreciation rates tree:
    depr_tree = naics.generate_tree()
    depr_tree.append_all(df_nm="Tax", df_cols=types)
    #
    asset_list = asset_tree.enum_inds[0].data.dfs['Corp'].columns
    asset_list = asset_list.values.tolist() 
    for i in types:        
        #Iterates over every industry in the tree       
        for j in xrange(0, len(depr_tree.enum_inds)):
            asset_depreciation = 0
            total_depreciation = 0
            #grabs the assets for the industry
            asset_df = asset_tree.enum_inds[j].data.dfs[i]
            #Iterates over each asset in the industry
            for k in xrange(0, len(asset_list)):   
                #calculates the sum of all the depreciation in the industry, multiplying the amount of each asset by its corresponding depreciation rate
                asset_depreciation += (asset_df.iloc[0,k] * tax_rates['Tax_Depreciation_Rate'][k])
            
            #calculates the total capital stock in the industry
            tot_assets = sum(asset_tree.enum_inds[j].data.dfs[i].iloc[0,:])
            tot_inv = inv_tree.enum_inds[j].data.dfs["Inventories"][i][0]
            tot_land = land_tree.enum_inds[j].data.dfs["Land"][i][0]
            total_capital_stock = tot_assets + tot_inv + tot_land
            if(total_capital_stock != 0):
                #calculates the weighted average depreciation rate for assets only (can be changed to include inventories and land)
                depr_tree.enum_inds[j].data.dfs['Tax'][i].iloc[0] = asset_depreciation / tot_assets
            else:
                depr_tree.enum_inds[j].data.dfs['Tax'][i].iloc[0] = 0
                    
    return depr_tree
Example #14
0
def load_type(data_tree=naics.generate_tree(),
               blue_tree = None, blueprint = None,
               from_out=False, out_path=None):
    """ This function loads the soi partnership asset data.
    
    :param data_tree: The NAICS tree to read the data into.
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    """
    # Initializing the output path:
    if out_path == None:
        out_path = _TYP_OUT_PATH
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path,
                                        tree=data_tree)
        return data_tree
    # Opening data on income by partner type:
    wb = xlrd.open_workbook(_TYP_IN_PATH)
    ws = wb.sheet_by_index(0)
    num_rows = ws.nrows
    # Initializing dataframe to hold pertinent type income data:
    typ_df = pd.DataFrame(np.zeros((ws.ncols-1, len(_TYP_IN_ROW_NMS))),
                          columns=_TYP_DF_DICT.values())
    # Extracting the data. For each input row:
    for in_row_nm in _TYP_IN_ROW_NMS:
        
        df_col_key = _TYP_IN_ROWS_DF_DICT[in_row_nm]
        df_col_nm = _TYP_DF_DICT[df_col_key]
        in_row_nm = in_row_nm.lower()
        for ws_row_index in xrange(0, num_rows):
            ws_row_nm = str(ws.cell_value(ws_row_index,0)).lower()
            if(in_row_nm in ws_row_nm):
                typ_df[df_col_nm] = ws.row_values(ws_row_index,1)
                break
    # Scaling the data to the correct units:
    typ_df = typ_df * _TYP_FILE_FCTR
    # Reading in the crosswalks between the columns and the NAICS codes:
    typ_cross = pd.read_csv(_TYP_IN_CROSS_PATH)
    #
    data_tree = naics.load_data_with_cross(
                    data_tree=data_tree, data_df=typ_df,
                    cross_df=typ_cross, df_nm=_TYP_DF_NM
                    )
    # Default blueprint is partner income, and, if not, then tot_corps:
    has_inc_df = _INC_DF_NM in data_tree.enum_inds[0].data.dfs.keys()
    has_tot_df = _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys()
    if blueprint == None and has_inc_df:
        blueprint = _INC_DF_NM
    elif blueprint == None and has_tot_df:
        blueprint = _TOT_CORP_DF_NM
    # Populate all levels of specificity in the NAICS tree:
    naics.pop_back(tree=data_tree, df_list=[_TYP_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_TYP_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    return data_tree
Example #15
0
def calc_assets(soi_tree, asset_tree=naics.generate_tree()):
    """ Calculating a breakdown of the various sector type's assets
    into fixed assets, inventories, and land. 
    
    :param asset_tree: The NAICS tree to put all of the data in.
    :param soi_tree: A NAICS tree containing all the pertinent soi data.
    """
    # Initializing dataframes for all NAICS industries:
    asset_tree.append_all(df_nm="FA", df_cols=_ALL_SECTORS)
    asset_tree.append_all(df_nm="INV", df_cols=_ALL_SECTORS)
    asset_tree.append_all(df_nm="LAND", df_cols=_ALL_SECTORS)
    # Calculate fixed assets, inventories, and land for each industry/sector
    for i in range(0, len(asset_tree.enum_inds)):
        cur_dfs = soi_tree.enum_inds[i].data.dfs
        out_dfs = asset_tree.enum_inds[i].data.dfs
        # Total of all the partner data for the current industry:
        partner_sum = sum(cur_dfs[_TYP_DF_NM].iloc[0, :])
        # C-Corporations:
        sector = _ALL_SECTORS_DICT["C_CORP"]
        cur_df = cur_dfs[_C_CORP_DF_NM]
        out_dfs["FA"][sector][0] = cur_df["depreciable_assets"][0]
        out_dfs["INV"][sector][0] = cur_df["inventories"][0]
        out_dfs["LAND"][sector][0] = cur_df["land"][0]
        # S-Corporations:
        sector = _ALL_SECTORS_DICT["S_CORP"]
        cur_df = cur_dfs[_S_CORP_DF_NM]
        out_dfs["FA"][sector][0] = cur_df["depreciable_assets"][0]
        out_dfs["INV"][sector][0] = cur_df["inventories"][0]
        out_dfs["LAND"][sector][0] = cur_df["land"][0]
        # Partnership sectors:
        for sector in cst.DFLT_PRT_TYP_DF_COL_NMS_DICT.values():
            if partner_sum != 0:
                ratio = abs(float(
                    cur_dfs[_TYP_DF_NM][sector][0])) / partner_sum
            else:
                ratio = abs(1.0 / float(cur_dfs[_TYP_DF_NM].shape[0]))
            cur_df = cur_dfs[_AST_DF_NM]
            out_dfs["FA"][sector][0] = abs(ratio *
                                           cur_df["depreciable_assets_net"][0])
            out_dfs["INV"][sector][0] = abs(ratio *
                                            cur_df["inventories_net"][0])
            out_dfs["LAND"][sector][0] = abs(ratio * cur_df["land_net"][0])
        # Sole Proprietorships:
        sector = _ALL_SECTORS_DICT["SOLE_PROP"]
        if cur_dfs[_INC_DF_NM]["depreciation"][0] != 0:
            ratio = abs(
                float(cur_dfs[_NFARM_DF_NM]["depreciation_deductions"][0]) /
                cur_dfs[_INC_DF_NM]["depreciation"][0])
        else:
            ratio = 0.0
        cur_df = cur_dfs[_AST_DF_NM]
        out_dfs["FA"][sector][0] = abs((ratio *
                                        cur_df["depreciable_assets_net"][0]) +
                                       cur_dfs[_FARM_DF_NM]["FA"][0])
        out_dfs["INV"][sector][0] = abs((ratio *
                                         cur_df["inventories_net"][0]) +
                                        cur_dfs[_FARM_DF_NM]["Land"][0])
        out_dfs["LAND"][sector][0] = abs(ratio * cur_df["land_net"][0])
    return asset_tree
Example #16
0
def read_inventories(asset_tree):
    # Opening BEA's excel file on depreciable assets by industry:
    inv_book = xlrd.open_workbook(_INV_IN_PATH)
    sht0 = inv_book.sheet_by_index(0)
    num_rows = sht0.nrows
    num_cols = sht0.ncols
    #Find the starting index in worksheet.
    cur_index = naics.search_ws(sht0, 1, 25, True, [0,0], True)
    check_index = naics.search_ws(sht0, "line", 20)
    if(cur_index[1] != check_index[1]):
        print "ERROR"
    # Reading in the crosswalk:
    inv_cross = pd.read_csv(_INV_IN_CROSS_PATH)
    # Creating a tree for the inventory data:
    data_cols = ["All", "Corp", "Non-Corp"]
    inv_tree = naics.generate_tree()
    inv_tree.append_all(df_nm="Inventories", df_cols=data_cols)
    #
    inv_data = np.zeros(inv_cross.shape[0])
    #
    cross_index = 0
    for i in xrange(cur_index[0], num_rows):
        if(cross_index >= inv_cross.shape[0]):
            break
        cur_list = str(sht0.cell_value(i, cur_index[1])).strip()
        cur_name = str(sht0.cell_value(i, cur_index[1]+1)).strip()
        checks = ((str(cur_list) == str(inv_cross["List"][cross_index])) and 
                    (str(cur_name) == str(inv_cross["Industry"][cross_index])))
        if(checks):
            cross_index += 1
            try:
                cur_value = float(sht0.cell_value(i, num_cols-1))
            except ValueError:
                continue
            inv_data[cross_index-1] = cur_value
            # Data is in billions:
            inv_data[cross_index-1] = _INV_IN_FILE_FCTR * inv_data[cross_index-1]
    #
    for i in xrange(0, inv_cross.shape[0]):
        cur_codes = inv_cross["NAICS"][i].strip().split(".")
        proportions = naics.get_proportions(cur_codes, asset_tree, "INV")
        for j in xrange(0, proportions.shape[1]):
            cur_ind = inv_tree.enum_inds[int(proportions.iloc[0,j])]
            prev_ind = asset_tree.enum_inds[int(proportions.iloc[0,j])]
            prev_df = prev_ind.data.dfs["INV"]
            if(sum(prev_df.iloc[0, :]) != 0):
                cur_dfs = ((prev_df/sum(prev_df.iloc[0,:])) *
                                (inv_data[i] * proportions.iloc[1,j]))
                inv_df = cur_ind.data.dfs["Inventories"]
                inv_df["All"] += sum(cur_dfs.iloc[0,:])
                for k in _CORP_NMS:
                    inv_df["Corp"] += cur_dfs[k][0]
                for k in _NCORP_NMS:
                    inv_df["Non-Corp"] += cur_dfs[k][0]
    #
    naics.pop_back(inv_tree, ["Inventories"])
    naics.pop_forward(inv_tree, ["Inventories"], "INV", asset_tree)
    return inv_tree
def load_soi_farm_prop(data_tree = naics.generate_tree(),
                       blue_tree = None, blueprint = None,
                       from_out=False, out_path=_FARM_PROP_OUT_PATH):
    """ This function loads the soi nonfarm proprietorship data:
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    # Load Farm Proprietorship data:
    farm_data = pd.read_csv(_FARM_IN_PATH)
    new_farm_cols = ["Land", "FA"]
    #
    for i in data_tree.enum_inds:
        i.append_dfs((_FARM_DF_NM,
                      pd.DataFrame(np.zeros((1,len(new_farm_cols))), 
                                   columns=new_farm_cols)))
    #
    land_mult = ((farm_data["R_sp"][0] + farm_data["Q_sp"][0]) * 
                        (float(farm_data["A_sp"][0])/farm_data["A_p"][0]))
    total = farm_data.iloc[0,0] + farm_data.iloc[0,2]
    total_pa = 0
    cur_codes = [111,112]
    proportions = np.zeros(len(cur_codes))
    proportions = naics.get_proportions(cur_codes, data_tree, "PA_assets", 
                                 ["Land (Net)","Depreciable assets (Net)"])
    #
    for i in xrange(0, len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_df = cur_ind.data.dfs["PA_assets"]
        total_pa += (cur_df["Land (Net)"][0] + 
                                cur_df["Depreciable assets (Net)"][0])
    #
    for i in xrange(0,len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_ind.data.dfs[_FARM_DF_NM]["Land"][0] = (land_mult * 
                            cur_ind.data.dfs["PA_assets"]["Land (Net)"][0]/
                            total_pa)
        cur_ind.data.dfs[_FARM_DF_NM]["FA"][0] = ((proportions.iloc[1,i]*total)
                                    - cur_ind.data.dfs[_FARM_DF_NM]["Land"][0])
    # Default:            
    if blueprint == None and _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys():
        blueprint = _TOT_CORP_DF_NM
    naics.pop_back(tree=data_tree, df_list=[_FARM_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_FARM_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    #
    return data_tree
Example #18
0
def load_pa_05_data(data_tree = None, blue_tree = None, blueprint = None):
    # Defining constant factor (e.g. data is in thousands):
    pa_05_fctr = 10 ** 3
    # Defining constant list of types of partners:
    cols_05 = ["Corporate general partners", 
               "Corporate limited partners",
               "Individual general partners",
               "Individual limited partners",
               "Partnership general partners",
               "Partnership limited partners",
               "Tax-exempt organization general partners",
               "Tax-exempt organization limited partners",
               "Nominee and other general partners", 
               "Nominee and other limited partners"]
    if data_tree == None:
        data_tree = naics.generate_tree()
    #
    for i in os.listdir(prt_dir):
        if("pa05.xls" in i):
            pa_05_file = os.path.abspath(prt_dir + "\\" + i)
        elif("pa05_Crosswalk.csv" in i):
            pa_05_cross_file = os.path.abspath(prt_dir + "\\" + i)
    #
    book_05 = xlrd.open_workbook(pa_05_file)
    sheet_05 = book_05.sheet_by_index(0)
    cur_rows = sheet_05.nrows
    # Extracting the relevant data:
    data_05 = [None]*len(cols_05)
    for i in xrange(0, len(cols_05)):
        for row in xrange(0, cur_rows):
            if(cols_05[i].lower() in str(sheet_05.cell_value(row,0)).lower()):
                data_05[i] = sheet_05.row_values(row,1)
                break
    # Reformatting the data:
    data_05 = pd.DataFrame(data_05).T
    # Data is in thousands of dollars:
    data_05 = data_05 * pa_05_fctr
    # Reading in the crosswalks between the columns and the NAICS codes:
    pa05cross = pd.read_csv(pa_05_cross_file)
    #
    data_tree = naics.load_data_with_cross(
                    data_tree = data_tree, data_df = data_05,
                    cross_df = pa05cross, data_cols = cols_05,
                    df_name = "PA_types"
                    )
    # Defaults:
    if blueprint == None and "PA_inc_loss" in data_tree.enum_inds[0].data.dfs.keys():
        blueprint = "PA_inc_loss"
    elif blueprint == None and "tot_corps" in data_tree.enum_inds[0].data.dfs.keys():
        blueprint = "tot_corps"
    naics.pop_back(tree=data_tree, df_list=["PA_types"])
    naics.pop_forward(tree=data_tree, df_list=["PA_types"],
                      blueprint=blueprint, blue_tree=blue_tree)
    #
    return data_tree
Example #19
0
def load_income(data_tree=naics.generate_tree(),
                blue_tree=None, blueprint=None,
                from_out=False, out_path=None):
    """ This function loads the soi partnership income data.
    
    :param data_tree: The NAICS tree to read the data into.
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    """
    # Initializing the output path:
    if out_path == None:
        out_path = _INC_OUT_PATH
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, 
                                        tree=data_tree)
        return data_tree
    # Opening data on net income/loss:
    wb = xlrd.open_workbook(_INC_IN_PATH)
    ws = wb.sheet_by_index(0)
    start_col = naics.search_ws(ws, _INC_STRT_COL_NM, 20)[1]
    # Initializing dataframe to hold pertinent income/loss data:
    data_df = pd.DataFrame(np.zeros((ws.ncols-start_col,3)), 
                           columns = _INC_PRT_DF_COL_NMS)
    # Extracting the data from the worksheet:
    for row in xrange(0, ws.nrows):
        # Going through each row of excel file, looking for input rows:
        if(_INC_NET_INC_ROW_NM in str(ws.cell_value(row,0)).lower()):
            data_df[_INC_NET_INC_COL_NM] = ws.row_values(row+1, start_col)
            data_df[_INC_NET_LOSS_COL_NM] = ws.row_values(row+2, start_col)
            break
        if(_INC_DEPR_ROW_NM in str(ws.cell_value(row,0)).lower()):
            data_df[_INC_DEPR_COL_NM] = ws.row_values(row, start_col)
    # Scaling the data to the correct units:
    data_df = data_df * _INC_FILE_FCTR
    # Reading in the crosswalks between the columns and the NAICS codes:
    pa01cross = pd.read_csv(_INC_IN_CROSS_PATH)
    # Processing the inc/loss data into the NAICS tree:
    data_tree = naics.load_data_with_cross(
                    data_tree=data_tree, data_df=data_df,
                    cross_df=pa01cross, df_nm=_INC_DF_NM
                    )
    # Default blueprint is tot_corps:
    has_tot_df = _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys()
    if blueprint == None and has_tot_df:
        blueprint = _TOT_CORP_DF_NM
    # Populate all levels of specificity in the NAICS tree:
    naics.pop_back(tree=data_tree, df_list=[_INC_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_INC_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    
    return data_tree
Example #20
0
def load_corporate(soi_tree=naics.generate_tree(),
                   from_out=False, get_all=False,
                   get_tot=False, get_s=False, get_c=False,
                   output_data=False, out_path=None):
    """ Loading the corporate tax soi data into a NAICS Tree.
    
    :param soi_tree: The NAICS tree to put all of the data in.
    :param from_out: If the corporate soi data is already in an output folder,
           then it can be read in directly from the output.
    :param get_all: Get corporate soi data for all kinds of corporations.
    :param get_tot: Get the aggregate soi data for corporations.
    :param get_s: Get the soi data for s corporations.
    :param get_c: Interpolate the soi data for c corporations.
    :param output_data: Print the corporate dataframes to csv files in the
           output folder.
    :param out_path: The output_path, both for reading in output data and for
           printing to the output file
    
    .. note: Because there is only data on the aggregate and s corporations,
       the c corporations data can only be interpolated if the other two have
       been calculated.
    """
    # Initializing the output path:
    if out_path == None:
        out_path = _OUT_DIR
    # Initializing booleans based of initial input booleans:
    if get_all:
        get_tot = True
        get_s = True
        get_c = True
    if not get_tot or not get_s:
        get_c = False
    # Load the total corporate soi data into the NAICS tree:
    if get_tot:
        soi_tree = corp.load_soi_tot_corp(data_tree=soi_tree,
                                          from_out=from_out)
        if output_data:
            naics.print_tree_dfs(tree=soi_tree, out_path=out_path,
                                 data_types=[_TOT_CORP_DF_NM])
    # Load the S-corporate soi data into the NAICS tree:
    if get_s:
        soi_tree = corp.load_soi_s_corp(data_tree=soi_tree,
                                        from_out=from_out)
        if output_data:
            naics.print_tree_dfs(tree=soi_tree, out_path=out_path,
                                 data_types=[_S_CORP_DF_NM])
    # Calculate the C-corporate soi data for the NAICS tree:
    if get_c:
        soi_tree = corp.calc_c_corp(data_tree=soi_tree,
                                    from_out=from_out)
        if output_data:
            naics.print_tree_dfs(tree=soi_tree, out_path=out_path,
                                 data_types=[_C_CORP_DF_NM])
    return soi_tree
Example #21
0
def load_proprietorship(soi_tree=naics.generate_tree(),
                       from_out=False, get_all=False,
                       get_nonfarm=False, get_farm=False,
                       output_data=False, out_path=_OUT_DIR):
    # Get the nonfar
    if get_nonfarm:
        soi_tree = prop.load_soi_nonfarm_prop()
        if output_data:
            naics.print_tree_dfs(tree=soi_tree, out_path=out_path,
                                 data_types=[])
    if get_farm:
        soi_tree = prop.load_soi_farm_prop(data_tree=soi_tree,)
def calc_soi_assets(soi_tree, asset_tree=naics.generate_tree()):
    """ Calculating a breakdown of the various sector type's assets
    into fixed assets, inventories, and land. 
    
    :param asset_tree: The NAICS tree to put all of the data in.
    :param soi_tree: A NAICS tree containing all the pertinent soi data.
    """
    # Import the soi_processing custom module:
    soi_dir = os.path.join(_DATA_DIR, "soi")
    sys.path.append(soi_dir)
    import soi_processing as soi
    # Use soi processing helper function to do all the work:
    return soi.calc_assets(asset_tree=asset_tree, soi_tree=soi_tree)
def pull_soi_data(soi_tree=naics.generate_tree(),
                  from_out=False,
                  get_all=False,
                  get_corp=False,
                  get_tot=False,
                  get_s=False,
                  get_c=False,
                  get_prt=False,
                  get_prop=False,
                  get_farm_prop=False,
                  output_data=False,
                  out_path=None):
    # If get_all, set all booleans to true:
    if get_all:
        get_corp = True
        get_tot = True
        get_s = True
        get_c = True
        get_prt = True
        get_prop = True
        get_farm_prop = True
    # Import the soi_processing custom module:
    soi_dir = os.path.join(_DATA_DIR, "soi")
    sys.path.append(soi_dir)
    import soi_processing as soi
    # Loading the soi corporate data into the NAICS tree:
    if get_corp or get_tot or get_s or get_c:
        soi_tree = soi.load_corporate(soi_tree=soi_tree,
                                      from_out=from_out,
                                      get_all=get_corp,
                                      get_tot=get_tot,
                                      get_s=get_s,
                                      get_c=get_c,
                                      output_data=output_data,
                                      out_path=out_path)
    # Loading the soi partnership data into the NAICS tree:
    if get_prt:
        soi_tree = soi.load_partner(soi_tree=soi_tree,
                                    from_out=from_out,
                                    output_data=output_data,
                                    out_path=out_path)
    # Loading the soi proprietorship data into the NAICS tree:
    if get_prop or get_farm_prop:
        soi_tree = soi.load_proprietorship(soi_tree=soi_tree,
                                           from_out=from_out,
                                           get_nonfarm=get_prop,
                                           get_farm=get_farm_prop,
                                           output_data=output_data,
                                           out_path=out_path)
    return soi_tree
def calc_soi_assets(soi_tree, asset_tree=naics.generate_tree()):
    """ Calculating a breakdown of the various sector type's assets
    into fixed assets, inventories, and land. 
    
    :param asset_tree: The NAICS tree to put all of the data in.
    :param soi_tree: A NAICS tree containing all the pertinent soi data.
    """
    # Import the soi_processing custom module:
    soi_dir = os.path.join(_DATA_DIR, "soi")
    sys.path.append(soi_dir)
    import soi_processing as soi

    # Use soi processing helper function to do all the work:
    return soi.calc_assets(asset_tree=asset_tree, soi_tree=soi_tree)
Example #25
0
def load_soi_partner_data(data_tree = None, get_pa01 = True, get_pa03 = True,
                          get_pa05 = True):
    #
    if data_tree == None:
        data_tree = naics.generate_tree()
    #
    if get_pa01:
        data_tree = load_pa_01_data(data_tree)
    if get_pa03:
        data_tree = load_pa_03_data(data_tree)
    if get_pa05:
        data_tree = load_pa_05_data(data_tree)
    #
    return data_tree
Example #26
0
def get_incs():
    #
    naics_tree = naics.generate_tree()
    #
    read_inc.load_nipa_inc_ind(data_folder, naics_tree)
    read_inc.load_nipa_int_ind(data_folder, naics_tree)
    read_inc.calc_bus_inc(naics_tree)
    #
    parameters = [read_inc.BUS_INC, read_inc.INT_INC, read_inc.FIN_INC]
    #
    naics.pop_back(naics_tree, parameters)
    naics.pop_forward(naics_tree, parameters)
    #
    naics.print_tree_dfs(naics_tree, output_folder)
    return naics_tree
Example #27
0
def get_incs():
    #
    naics_tree = naics.generate_tree()
    #
    read_inc.load_nipa_inc_ind(data_folder, naics_tree)
    read_inc.load_nipa_int_ind(data_folder, naics_tree)
    read_inc.calc_bus_inc(naics_tree)
    #
    parameters = [read_inc.BUS_INC, read_inc.INT_INC, read_inc.FIN_INC]
    #
    naics.pop_back(naics_tree, parameters)
    naics.pop_forward(naics_tree, parameters)
    #
    naics.print_tree_dfs(naics_tree, output_folder)
    return naics_tree
Example #28
0
def calc_depr_rates(asset_tree, inv_tree, land_tree):
    # Opening file containing depreciation rates by asset type:
    depr_econ = pd.read_csv(_ECON_DEPR_IN_PATH)
    depr_econ = depr_econ.fillna(1)
    econ_assets = depr_econ["Asset"]
    econ_rates = depr_econ["Economic Depreciation Rate"]
    #
    types = ["All", "Corp", "Non-Corp"]
    # Initialize tree for depreciation rates:
    depr_tree = naics.generate_tree()
    depr_tree.append_all(df_nm="Economic", df_cols=types)
    #
    for i in types:
        asset_list = asset_tree.enum_inds[0].data.dfs[i].columns
        asset_list = asset_list.values.tolist()
        
        match = np.array([-1] * len(asset_list))
        for j in xrange(0, asset_tree.enum_inds[0].data.dfs[i].shape[1]):
            for k in xrange(0, len(econ_assets)):
                if str(asset_list[j]).strip() == str(econ_assets[k]).strip():
                    match[j] = k
        for j in xrange(0, len(depr_tree.enum_inds)):
            cur_sum = 0
            asset_df = asset_tree.enum_inds[j].data.dfs[i]
            depr_df = depr_tree.enum_inds[j].data.dfs["Economic"]
            for k in xrange(0, len(asset_list)):
                if(match[k] == -1):
                    print k
                    continue
                cur_sum += (asset_df.iloc[0,k] * econ_rates[match[k]])
            if(sum(asset_df.iloc[0,:]) != 0):
                depr_df[i][0] = cur_sum/sum(asset_df.iloc[0,:])
            else:
                depr_df[i][0] = 0
        # Inventories and land have an approximately zero depreciation rate:
        for j in xrange(0, len(depr_tree.enum_inds)):
            tot_assets = sum(asset_tree.enum_inds[j].data.dfs["All"].iloc[0,:])
            tot_inv = inv_tree.enum_inds[j].data.dfs["Inventories"]["All"][0]
            tot_land = land_tree.enum_inds[j].data.dfs["Land"]["All"][0]
            if(tot_assets+tot_inv+tot_land == 0):
                continue
            ratio = tot_assets / (tot_assets + tot_inv + tot_land)
            #
            cur_df = depr_tree.enum_inds[j].data.dfs["Economic"]
            cur_df[i][0] = ratio * cur_df[i][0]
    return depr_tree
Example #29
0
def load_soi_farm_prop(data_tree = None, blue_tree = None, blueprint = None):
    #
    if data_tree == None:
        data_tree = naics.generate_tree()
    #Load Farm Proprietorship data:
    farm_data = pd.read_csv(os.path.abspath(prop_dir + "\\Farm_Data.csv"))
    new_farm_cols = ["Land", "FA"]
    #
    for i in data_tree.enum_inds:
        i.append_dfs(("farm_prop",
                      pd.DataFrame(np.zeros((1,len(new_farm_cols))), 
                                   columns=new_farm_cols)))
    #
    land_mult = ((farm_data["R_sp"][0] + farm_data["Q_sp"][0]) * 
                        (float(farm_data["A_sp"][0])/farm_data["A_p"][0]))
    total = farm_data.iloc[0,0] + farm_data.iloc[0,2]
    total_pa = 0
    cur_codes = [111,112]
    proportions = np.zeros(len(cur_codes))
    proportions = naics.get_proportions(cur_codes, data_tree, "PA_assets", 
                                 ["Land (Net)","Depreciable assets (Net)"])
    #
    for i in xrange(0, len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_df = cur_ind.data.dfs["PA_assets"]
        total_pa += (cur_df["Land (Net)"][0] + 
                                cur_df["Depreciable assets (Net)"][0])
    #
    for i in xrange(0,len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_ind.data.dfs["farm_prop"]["Land"][0] = (land_mult * 
                            cur_ind.data.dfs["PA_assets"]["Land (Net)"][0]/
                            total_pa)
        cur_ind.data.dfs["farm_prop"]["FA"][0] = ((proportions.iloc[1,i]*total)
                                    - cur_ind.data.dfs["farm_prop"]["Land"][0])
    # Default:            
    if blueprint == None and "tot_corps" in data_tree.enum_inds[0].data.dfs.keys():
        blueprint = "tot_corps"
    naics.pop_back(tree=data_tree, df_list=["farm_prop"])
    naics.pop_forward(tree=data_tree, df_list=["farm_prop"],
                      blueprint=blueprint, blue_tree=blue_tree)
    #
    return data_tree
Example #30
0
def calc_c_corp(
        data_tree=naics.generate_tree(), from_out=False,
        out_path=_C_CORP_OUT_PATH):
    """ This function calculates the soi c-corporation data based of the
    s and the aggregate corporation data.
    
    :param data_tree: The tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    ''' For each industry, subtract the s-corporation data from the total to
    get the c-corporation data.'''
    for ind in data_tree.enum_inds:
        try:
            # Industry's total-corporation data:
            cur_tot = ind.data.dfs[_TOT_DF_NM]
        except KeyError:
            print "Total-Corp data not initialized when interpolating C-Corp."
        try:
            # Industry's S-corporation data:
            cur_s = ind.data.dfs[_S_DF_NM]
        except KeyError:
            print "S-Corp data not initialized when interpolating C-Corp."
        data_cols = cur_tot.columns.values.tolist()
        # Append C-corporation dataframe:
        ind.append_dfs((_C_DF_NM,
                        pd.DataFrame(np.zeros((1, len(data_cols))),
                                     columns=data_cols)))
        # C-corporation data:
        ind.data.dfs[_C_DF_NM] = cur_tot - cur_s
    return data_tree
def calibrate_debt(debt_tree=naics.generate_tree(),
                   soi_tree=None,
                   from_out=False,
                   soi_from_out=False):
    """ This function is incomplete. This is supposed to do the debt
    calibrations.
    
    :param debt_tree: The NAICS tree to append the calibrated debt
           parameters to. Default is a newly generated tree.
    :param soi_tree: A tree with all of the relevant soi data.
    :
    """
    if soi_tree == None:
        soi_tree = pull_soi_data(get_corp=True, from_out=soi_from_out)
    #
    debt_dir = os.path.abspath(_PARAM_DIR + "//debt")
    debt_data_dir = os.path.abspath(debt_dir + "//data")
    sys.path.append(debt_dir)
    import debt_calibration as debt
    #
    lblty_file = os.path.abspath(debt_data_dir + "//liabilities.csv")
    print lblty_file
    lblty_df = pd.read_csv(lblty_file)
    eqty_file = os.path.abspath(debt_data_dir + "//equity.csv")
    eqty_df = pd.read_csv(eqty_file)
    debt_tree = naics.load_tree_dfs(input_file=lblty_file,
                                    dfs_name="liabilities",
                                    tree=debt_tree)
    debt_tree = naics.load_tree_dfs(input_file=eqty_file,
                                    dfs_name="equity",
                                    tree=debt_tree)
    #
    naics.pop_forward(tree=debt_tree,
                      df_list=["liabilities"],
                      blue_tree=soi_tree,
                      blueprint="tot_corps",
                      sub_print=["Interest Paid"])
    #
    return debt_tree
def init_depr_rates(asset_tree=naics.generate_tree(), get_econ=False, 
                    get_tax_est=False, get_tax_150=False,
                    get_tax_200=False, get_tax_sl=False,
                    get_tax_ads=False, soi_from_out=False,
                    output_data=False):
    """ This fun
    
    
    """
    # Calculating the fixed asset data:
    fixed_asset_tree = read_bea.read_bea(asset_tree)
    # Calculating the inventory data:
    inv_tree = read_inv.read_inventories(asset_tree)
    # Calculating the land data:
    land_tree = read_land.read_land(asset_tree)
    # Calculating the depreciation rates:
    econ_depr_tree = calc_rates.calc_depr_rates(fixed_asset_tree, inv_tree, land_tree)
    tax_depr_tree = calc_rates.calc_tax_depr_rates(fixed_asset_tree, inv_tree, land_tree)
    #naics.pop_rates(tax_depr_tree)
    
    
    return {"Econ": econ_depr_tree, "Tax": tax_depr_tree}
Example #33
0
def calc_c_corp(data_tree=naics.generate_tree(), from_out=False,
                out_path=_C_CORP_OUT_PATH):
    """ This function calculates the soi c-corporation data based of the
    s and the aggregate corporation data.
    
    :param data_tree: The tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    ''' For each industry, subtract the s-corporation data from the total to
    get the c-corporation data.'''
    for ind in data_tree.enum_inds:
        try:
            # Industry's total-corporation data:
            cur_tot = ind.data.dfs[_TOT_DF_NM]
        except KeyError:
            print "Total-Corp data not initialized when interpolating C-Corp."
        try:
            # Industry's S-corporation data:
            cur_s = ind.data.dfs[_S_DF_NM]
        except KeyError:
            print "S-Corp data not initialized when interpolating C-Corp."
        data_cols = cur_tot.columns.values.tolist()
        # Append C-corporation dataframe:
        ind.append_dfs((_C_DF_NM, pd.DataFrame(np.zeros((1,len(data_cols))),
                                                columns = data_cols)))
        # C-corporation data:
        ind.data.dfs[_C_DF_NM] = cur_tot - cur_s
    return data_tree
Example #34
0
def calc_depr_rates(asset_tree, inv_tree, land_tree):
    # Opening file containing depreciation rates by asset type:
    depr_econ = pd.read_csv(_ECON_DEPR_IN_PATH)
    depr_econ = depr_econ.fillna(0)
    econ_assets = depr_econ["Asset"]
    econ_rates = depr_econ["Economic Depreciation Rate"]
    #
    types = ["All", "Corp", "Non-Corp"]
    #Initialize tree for depreciation rates:
    depr_tree = naics.generate_tree()
    depr_tree.append_all(df_nm="Economic", df_cols=types)
    #Makes a list of all the assets
    asset_list = asset_tree.enum_inds[0].data.dfs['Corp'].columns
    asset_list = asset_list.values.tolist()
    #Runs three times, once for all the assets, once for the corporate assets, and once for non-corporate assets
    for i in types:        
        #Iterates over every industry in the tree       
        for j in xrange(0, len(depr_tree.enum_inds)):
            asset_depreciation = 0
            total_depreciation = 0
            #grabs the assets for the industry
            asset_df = asset_tree.enum_inds[j].data.dfs[i]
            #Iterates over each asset in the industry
            for k in xrange(0, len(asset_list)):
                #calculates the sum of all the depreciation in the industry, multiplying the amount of each asset by its corresponding depreciation rate
                asset_depreciation += (asset_df.iloc[0,k] * econ_rates[k])
            #calculates the total capital stock in the industry
            tot_assets = sum(asset_tree.enum_inds[j].data.dfs[i].iloc[0,:])
            tot_inv = inv_tree.enum_inds[j].data.dfs["Inventories"][i][0]
            tot_land = land_tree.enum_inds[j].data.dfs["Land"][i][0]
            total_capital_stock = tot_assets + tot_inv + tot_land
            if(total_capital_stock != 0):
                #calculates the weighted average depreciation rate for assets only (can be changed to include inventories and land)
                depr_tree.enum_inds[j].data.dfs['Economic'][i].iloc[0] = asset_depreciation / tot_assets
            else:
                depr_tree.enum_inds[j].data.dfs['Economic'][i].iloc[0] = 0
     
    return depr_tree
Example #35
0
def load_asset(data_tree=naics.generate_tree(),
             blue_tree=None, blueprint=None,
             from_out=False, out_path=None):
    """ This function loads the soi partnership asset data.
    
    :param data_tree: The NAICS tree to read the data into.
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    """
    # Initializing the output path:
    if out_path == None:
        out_path = _AST_OUT_PATH
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path,
                                        tree=data_tree)
        return data_tree
    # Opening data on depreciable fixed assets, inventories, and land:
    wb = xlrd.open_workbook(_AST_IN_PATH)
    ws = wb.sheet_by_index(0)
    num_rows = ws.nrows
    # Columns of the asset dataframe:
    df_cols = _AST_DF_DICT.values()
    # Initializing dataframe to hold pertinent asset data:
    ast_df = pd.DataFrame(np.zeros((ws.ncols-1,len(df_cols))), columns=df_cols)
    ''' Extracting the data (note that the rows with total data appear first).
    For each input row:'''
    for in_row_nm in _AST_IN_ROW_NMS:
        # Key corresponding to total asset column:
        df_net_col_key = _AST_IN_ROWS_DF_NET_DICT[in_row_nm]
        # Asset dataframes net income column name:
        df_net_col_nm = _AST_DF_DICT[df_net_col_key]
        # Key corresponding to assets of net income partnerships column:
        df_inc_col_key = _AST_IN_ROWS_DF_INC_DICT[in_row_nm]
        # Asset dataframes total income column name:
        df_inc_col_nm = _AST_DF_DICT[df_inc_col_key]
        in_row_nm = in_row_nm.lower()
        # Finding the first input row with in_row_nm:
        for in_row1 in xrange(0, num_rows):
            in_net_row_nm = str(ws.cell_value(in_row1,0)).lower()
            if(in_row_nm in in_net_row_nm):
                # Total asset data:
                ast_df[df_net_col_nm] = ws.row_values(in_row1, 1)
                # Finding the second input row with in_row_nm:
                for in_row2 in xrange(in_row1+1, num_rows):
                    in_inc_row_nm = str(ws.cell_value(in_row2,0)).lower()
                    if(in_row_nm in in_inc_row_nm):
                        # Asset data for companies with net income:
                        ast_df[df_inc_col_nm] = ws.row_values(in_row2,1)
                        break
                break
    # Scaling the data to the correct units:
    ast_df = ast_df * _AST_FILE_FCTR
    # Reading in the crosswalks between the columns and the NAICS codes:
    ast_cross = pd.read_csv(_AST_IN_CROSS_PATH)
    # Processing the asset data into the NAICS tree:
    data_tree = naics.load_data_with_cross(
                    data_tree=data_tree, data_df=ast_df,
                    cross_df=ast_cross, df_nm=_AST_DF_NM
                    )
    # Default blueprint is tot_corps:
    has_tot_df = _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys()
    if blueprint == None and has_tot_df:
        blueprint = _TOT_CORP_DF_NM
    # Populate all levels of specificity in the NAICS tree:
    naics.pop_back(tree=data_tree, df_list=[_AST_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_AST_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    return data_tree
Example #36
0
def get_assets(asset_tree=naics.generate_tree(), soi_tree=None):
    all_sectors = ["C Corporations", 
                   "S Corporations",
                   "Corporate general partners", 
                   "Corporate limited partners",
                   "Individual general partners",
                   "Individual limited partners",
                   "Partnership general partners",
                   "Partnership limited partners",
                   "Tax-exempt organization general partners",
                   "Tax-exempt organization limited partners",
                   "Nominee and other general partners", 
                   "Nominee and other limited partners", 
                   "Sole Proprietors"]
    #
    pa_types = soi_tree.enum_inds[0].data.dfs["PA_types"].columns
    pa_types = pa_types.values.tolist()
    #
    asset_tree = naics.generate_tree()
    #
    for i in asset_tree.enum_inds:
        i.append_dfs(("FA",pd.DataFrame(np.zeros((1, len(all_sectors))),
                                        columns = all_sectors)))
        i.append_dfs(("INV",pd.DataFrame(np.zeros((1, len(all_sectors))),
                                         columns = all_sectors)))
        i.append_dfs(("LAND",pd.DataFrame(np.zeros((1, len(all_sectors))),
                                          columns = all_sectors)))
    #
    for i in range(0, len(asset_tree.enum_inds)):
        #
        #cur_data = soi_tree.enum_inds[i].data
        #out_data = asset_tree.enum_inds[i].data
        cur_dfs = soi_tree.enum_inds[i].data.dfs
        out_dfs = asset_tree.enum_inds[i].data.dfs
        partner_sum = sum(cur_dfs["PA_types"].iloc[0,:])
        #
        for j in range(0, len(all_sectors)):
            sector = all_sectors[j]
            #
            if sector == "C Corporations":
                cur_df = cur_dfs["c_corps"]
                out_dfs["FA"][sector][0] = cur_df["Depreciable Assets"][0]
                out_dfs["INV"][sector][0] = cur_df["Inventories"][0]
                out_dfs["LAND"][sector][0] = cur_df["Land"][0]
            elif sector == "S Corporations":
                cur_df = cur_dfs["s_corps"]
                out_dfs["FA"][sector][0] = cur_df["Depreciable Assets"][0]
                out_dfs["INV"][sector][0] = cur_df["Inventories"][0]
                out_dfs["LAND"][sector][0] = cur_df["Land"][0]
            elif sector in pa_types:
                if partner_sum != 0:
                    ratio = abs(cur_dfs["PA_types"][sector][0])/partner_sum
                else:
                    ratio = abs(1.0/cur_dfs["PA_types"].shape[0])
                cur_df = cur_dfs["PA_assets"]
                out_dfs["FA"][sector][0] = abs(
                                    ratio*cur_df["Depreciable assets (Net)"][0]
                                    )
                out_dfs["INV"][sector][0] = abs(
                                        ratio*cur_df["Inventories (Net)"][0]
                                        )
                out_dfs["LAND"][sector][0] = abs(
                                                ratio*cur_df["Land (Net)"][0]
                                                )
            elif sector == "Sole Proprietors":
                if cur_dfs["PA_inc_loss"]["Depreciation"][0] != 0:
                    ratio = abs(cur_dfs["soi_prop"]["Depr Deductions"][0]/
                                cur_dfs["PA_inc_loss"]["Depreciation"][0])
                else:
                    ratio = 0.0
                cur_df = cur_dfs["PA_assets"]
                out_dfs["FA"][sector][0] = abs(
                                        (ratio*
                                        cur_df["Depreciable assets (Net)"][0])+
                                        cur_dfs["farm_prop"]["FA"][0]
                                        )
                out_dfs["INV"][sector][0] = abs(
                                        (ratio*cur_df["Inventories (Net)"][0])+
                                        cur_dfs["farm_prop"]["Land"][0]
                                        )
                out_dfs["LAND"][sector][0] = abs(ratio*cur_df["Land (Net)"][0])
    return asset_tree
Example #37
0
def load_soi_tot_corp(data_tree=naics.generate_tree(),
                      cols_dict=_DFLT_TOT_CORP_COLS_DICT,
                      blueprint=None,
                      blue_tree=None,
                      from_out=False,
                      output_path=_TOT_CORP_OUT_PATH):
    """ This function pulls the soi total corporation data.
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=output_path, tree=data_tree)
        return data_tree
    # Pertinent information:
    num_inds = len(data_tree.enum_inds)  # Number of industries in NAICS tree.
    data_cols = cols_dict.keys()  # Dataframe column names.
    # Opening the soi total corporate data file:
    try:
        tot_corp_data = pd.read_csv(_TOT_CORP_IN_PATH).fillna(0)
    except IOError:
        print "IOError: Tot-Corp soi data file not found."
        return None
    # Initializing dataframes for all NAICS industries:
    data_tree.append_all(df_nm=_TOT_DF_NM, df_cols=data_cols)
    # Reading the total corporation data into the NAICS tree:
    enum_index = 0
    for code_num in np.unique(tot_corp_data[_NAICS_COL_NM]):
        # Find the industry with a code that matches "code_num":
        ind_found = False
        for i in range(0, num_inds):
            enum_index = (enum_index + 1) % num_inds
            cur_ind = data_tree.enum_inds[enum_index]
            cur_dfs = cur_ind.data.dfs[cst.CODE_DF_NM]
            for j in range(0, cur_dfs.shape[0]):
                if (cur_dfs.iloc[j, 0] == code_num):
                    # Industry with the matching code has been found:
                    ind_found = True
                    cur_dfs = cur_ind.data.dfs[_TOT_DF_NM]
                    break
            # If the matching industry has been found stop searching for it:
            if ind_found:
                break
        # If no match was found, then ignore data.
        if not ind_found:
            continue
        # Indicators for if rows in tot_corp_data match current industry code:
        indicators = (tot_corp_data[_NAICS_COL_NM] == code_num)
        # Calculating the data:
        for j in cols_dict:
            # Some of the data may not be reported:
            if cols_dict[j] == "":
                cur_dfs[j] = 0
            else:
                # Note: double counting the data in the original dataset.
                cur_dfs[j][0] = sum(
                    indicators * tot_corp_data[cols_dict[j]]) / 2.0
                cur_dfs[j][0] = cur_dfs[j] * _TOT_CORP_IN_FILE_FCTR
    # Populate all levels of specificity in the NAICS tree:
    naics.pop_back(tree=data_tree, df_list=[_TOT_DF_NM])
    naics.pop_forward(tree=data_tree,
                      df_list=[_TOT_DF_NM],
                      blueprint=blueprint,
                      blue_tree=blue_tree)
    return data_tree
def load_asset(data_tree=naics.generate_tree(),
             blue_tree=None, blueprint=None,
             from_out=False):
    """ This function loads the soi partnership asset data.
    
    :param data_tree: The NAICS tree to read the data into.
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_file=_AST_OUT_PATH,
                                        tree=data_tree)
        return data_tree
    # Opening data on depreciable fixed assets, inventories, and land:
    wb = xlrd.open_workbook(_AST_IN_PATH)
    ws = wb.sheet_by_index(0)
    num_rows = ws.nrows
    # Columns of the asset dataframe:
    df_cols = _AST_DF_DICT.values()
    # Initializing dataframe to hold pertinent asset data:
    ast_df = pd.DataFrame(np.zeros((ws.ncols-1,len(df_cols))), columns=df_cols)
    ''' Extracting the data (note that the rows with total data appear first).
    For each input row:'''
    for in_row_nm in _AST_IN_ROW_NMS:
        # Key corresponding to total asset column:
        df_net_col_key = _AST_IN_ROWS_DF_NET_DICT[in_row_nm]
        # Asset dataframes net income column name:
        df_net_col_nm = _AST_DF_DICT[df_net_col_key]
        # Key corresponding to assets of net income partnerships column:
        df_inc_col_key = _AST_IN_ROWS_DF_INC_DICT[in_row_nm]
        # Asset dataframes total income column name:
        df_inc_col_nm = _AST_DF_DICT[df_inc_col_key]
        in_row_nm = in_row_nm.lower()
        # Finding the first input row with in_row_nm:
        for in_row1 in xrange(0, num_rows):
            in_net_row_nm = str(ws.cell_value(in_row1,0)).lower()
            if(in_row_nm in in_net_row_nm):
                # Total asset data:
                ast_df[df_net_col_nm] = ws.row_values(in_row1, 1)
                # Finding the second input row with in_row_nm:
                for in_row2 in xrange(in_row1+1, num_rows):
                    in_inc_row_nm = str(ws.cell_value(in_row2,0)).lower()
                    if(in_row_nm in in_inc_row_nm):
                        # Asset data for companies with net income:
                        ast_df[df_inc_col_nm] = ws.row_values(in_row2,1)
                        break
                break
    # Scaling the data to the correct units:
    ast_df = ast_df * _AST_FILE_FCTR
    # Reading in the crosswalks between the columns and the NAICS codes:
    ast_cross = pd.read_csv(_AST_IN_CROSS_PATH)
    # Processing the asset data into the NAICS tree:
    data_tree = naics.load_data_with_cross(
                    data_tree=data_tree, data_df=ast_df,
                    cross_df=ast_cross, df_nm=_AST_DF_NM
                    )
    # Default blueprint is tot_corps:
    has_tot_df = _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys()
    if blueprint == None and has_tot_df:
        blueprint = _TOT_CORP_DF_NM
    # Populate all levels of specificity in the NAICS tree:
    naics.pop_back(tree=data_tree, df_list=[_AST_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_AST_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    return data_tree
def load_soi_nonfarm_prop(data_tree=naics.generate_tree(), 
                          blue_tree=None, blueprint=None, 
                          from_out=False, out_path=_NFARM_PROP_OUT_PATH):
    """ This function loads the soi nonfarm proprietorship data:
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    # Opening nonfarm proprietor data:
    wb = xlrd.open_workbook(_DDCT_IN_PATH)
    ws = wb.sheet_by_index(0)
    cross = pd.read_csv(_DDCT_IN_CROSS_PATH)
    # Finding the relevant positions in worksheet:
    pos1 = naics.search_ws(ws, _SECTOR_COL, 20, True, [0,0], True)
    pos2 = naics.search_ws(ws, _DDCT_COL1, 20)
    pos3 = naics.search_ws(ws,_DDCT_COL2, 20,
                           True, np.array(pos2) + np.array([0,1]))
    #
    data_tree.append_all(df_nm=_NFARM_DF_NM, df_cols=[_NFARM_DF_COL_NM])
    #
    cross_index = cross.shape[0]-1
    enum_index = len(data_tree.enum_inds)-1
    for i in xrange(pos1[0],ws.nrows):
        cur_cell = str(ws.cell_value(i,pos1[1])).lower().strip()
        #
        tot_proportions = 0
        for j in xrange(0, cross.shape[0]):
            cross_index = (cross_index+1) % cross.shape[0]
            cur_ind_name = str(cross.iloc[cross_index,0]).lower().strip()
            if(cur_cell == cur_ind_name):
                if pd.isnull(cross.iloc[cross_index,1]):
                    continue
                ind_codes = str(cross.iloc[cross_index,1]).split(".")
                for k in xrange(0, len(data_tree.enum_inds)):
                    enum_index = (enum_index+1) % len(data_tree.enum_inds)
                    cur_data = data_tree.enum_inds[enum_index].data
                    cur_codes = cur_data.dfs[_CODE_DF_NM]
                    cur_proportions = naics.compare_codes(ind_codes, cur_codes.iloc[:,0])
                    if cur_proportions == 0:
                        continue
                    tot_proportions += cur_proportions
                    cur_dfs = cur_data.dfs[_NFARM_DF_NM][_NFARM_DF_COL_NM]
                    cur_dfs[0] += (_DDCT_FILE_FCTR * cur_proportions 
                                        * (ws.cell_value(i,pos2[1]) 
                                        + ws.cell_value(i,pos3[1])))
            if(tot_proportions == 1):
                break
    # Default:
    if blueprint == None and _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys():
        blueprint = _TOT_CORP_DF_NM
    naics.pop_back(tree=data_tree, df_list=[_NFARM_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_NFARM_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    #
    return data_tree
Example #40
0
def load_soi_tot_corp(data_tree=naics.generate_tree(),
                      cols_dict=_DFLT_TOT_CORP_COLS_DICT, 
                      blueprint=None, blue_tree=None,
                      from_out=False, output_path=_TOT_CORP_OUT_PATH):
    """ This function pulls the soi total corporation data.
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=output_path, tree=data_tree)
        return data_tree
    # Pertinent information:
    num_inds = len(data_tree.enum_inds) # Number of industries in NAICS tree.
    data_cols = cols_dict.keys() # Dataframe column names.
    # Opening the soi total corporate data file:
    try:
        tot_corp_data = pd.read_csv(_TOT_CORP_IN_PATH).fillna(0)
    except IOError:
        print "IOError: Tot-Corp soi data file not found."
        return None
    # Initializing dataframes for all NAICS industries:
    data_tree.append_all(df_nm=_TOT_DF_NM, df_cols=data_cols)
    # Reading the total corporation data into the NAICS tree:
    enum_index = 0
    for code_num in np.unique(tot_corp_data[_NAICS_COL_NM]):
        # Find the industry with a code that matches "code_num":
        ind_found = False
        for i in range(0, num_inds):
            enum_index = (enum_index + 1) % num_inds
            cur_ind = data_tree.enum_inds[enum_index]
            cur_dfs = cur_ind.data.dfs[cst.CODE_DF_NM]
            for j in range(0, cur_dfs.shape[0]):
                if(cur_dfs.iloc[j,0] == code_num):
                    # Industry with the matching code has been found:
                    ind_found = True
                    cur_dfs = cur_ind.data.dfs[_TOT_DF_NM]
                    break
            # If the matching industry has been found stop searching for it:
            if ind_found:
                break
        # If no match was found, then ignore data.
        if not ind_found:
            continue
        # Indicators for if rows in tot_corp_data match current industry code:
        indicators = (tot_corp_data[_NAICS_COL_NM] == code_num)
        # Calculating the data:
        for j in cols_dict:
            # Some of the data may not be reported:
            if cols_dict[j] == "":
                cur_dfs[j] = 0
            else:
                # Note: double counting the data in the original dataset.
                cur_dfs[j][0] = sum(indicators * tot_corp_data[cols_dict[j]])/2.0
                cur_dfs[j][0] = cur_dfs[j] * _TOT_CORP_IN_FILE_FCTR
    # Populate all levels of specificity in the NAICS tree:
    naics.pop_back(tree=data_tree, df_list=[_TOT_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_TOT_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    return data_tree
def calibrate_depr_rates(data_tree=naics.generate_tree(),
                         get_all=False,
                         get_econ=False,
                         get_tax=False,
                         get_tax_est=False,
                         get_tax_150=False,
                         get_tax_200=False,
                         get_tax_sl=False,
                         get_tax_ads=False,
                         soi_from_out=False,
                         output_data=False):
    """ This calibrates a tree with all the depreciation rate parameters.
    
    :param data_tree: The NAICS tree to append the calibrated depreciation
           parameters to. Default is a newly generated tree.
    :param get_all: Whether to get all the depreciation parameters or not.
    :param get_econ: Whether to get all the economic depreciation rates.
    :param get_tax: Whether to get all of the tax data.
    :param get_tax_est: Whether to get all of the estimated tax data. This is
           the the most accurate estimate for each industry's depreciation
           rate. It is uses IRS tax documents to decide which assets fall
           under which tax depreciation methods.
    :param get_tax_150: Get the depreciation rates under the assumption that
           assets are depreciated under the GDS 150% declining balance method.
    :param get_tax_200: Get the depreciation rates under the assumption that
           assets are depreciated under the GDS 200% declining balance method.
    :param get_tax_sl: Get the depreciation rates under the assumption that
           assets are depreciated under the straight line method.
    :param get_tax_ads: Get the depreciation rates under the assumption that
           assets are depreciated under the ADS method.
    :param soi_from_out: Whether to recalibrate the relevant soi tax data.
    :param output_data: Whether to output the depreciation rates.
    """
    # The depreciation rate directory:
    depr_dir = os.path.abspath(_PARAM_DIR + "//depreciation")
    ''' Importing the module for gathering and processing the depreciation
    rate data: '''
    sys.path.append(depr_dir)
    import depreciation_calibration as depr
    # If get_all, set all booleans to true:
    if get_all:
        get_econ = True
        get_tax = True
    # If get_tax, set all tax booleans to true:
    if get_tax:
        get_tax_150 = True
        get_tax_200 = True
        get_tax_sl = True
        get_tax_ads = True
        get_tax_est = True
    # Initialize NAICS tree with all the soi tax data:
    soi_tree = pull_soi_data(get_all=True,
                             from_out=soi_from_out,
                             output_data=(not soi_from_out))
    ''' Initialize NAICS tree with all assets--fixed assets, inventories, 
    and land--by sector:'''
    asset_tree = calc_soi_assets(soi_tree=soi_tree)
    # Use the asset_tree to initialize all the depreciation rates:
    depr_tree = depr.init_depr_rates(asset_tree=asset_tree,
                                     get_econ=get_econ,
                                     get_tax_est=get_tax_est,
                                     get_tax_150=get_tax_150,
                                     get_tax_200=get_tax_200,
                                     get_tax_sl=get_tax_sl,
                                     get_tax_ads=get_tax_ads,
                                     output_data=output_data)

    #
    return depr_tree
Example #42
0
def read_bea(asset_tree):
    # Opening BEA's excel file on depreciable assets by industry:
    bea_book = xlrd.open_workbook(_BEA_ASSET_PATH)
    sht_names = bea_book.sheet_names()
    num_shts = bea_book.nsheets
    # Opening "readme" sheet:
    try:
        bea_readme = bea_book.sheet_by_name("readme")
    except xlrd.XLRDError:
        bea_readme = bea_book.sheet_by_index(0)
    # Finding relevant positions in the readme sheet:
    sht_pos = naics.search_ws(bea_readme, "Industry Title", 25, False)
    if (sht_pos == [-1, -1]):
        sht_pos = naics.search_ws(bea_readme, "bea code", 25, False, [0, 0],
                                  True)
        sht_pos[1] = sht_pos[1] - 1
    if (sht_pos == [-1, -1]):
        print "Error in reading BEA fixed asset \"readme\" sheet."
        return None
    cur_row = sht_pos[0] + 1
    cur_col = sht_pos[1]
    # Finding the number of industries (includes those without bea codes):
    number_of_industries = 0
    while cur_row < bea_readme.nrows:
        #if(str(bea_readme.cell_value(cur_row, cur_col)) != ""):
        if (unicode(bea_readme.cell_value(cur_row, cur_col)).encode('utf8') !=
                ""):
            # for rownum in xrange(sh.nrows):
            #wr.writerow([unicode(c).encode('utf8') for c in sh.row_values(rownum)])
            number_of_industries += 1
        cur_row += 1
    # Making a list of BEA codes based on the names of the worksheets:
    bea_codes1 = np.zeros(num_shts - 1, dtype=object)
    for index in xrange(1, num_shts):
        bea_codes1[index - 1] = str(sht_names[index])
    # Making a list of BEA codes based on info in the readme sheet:
    code_index = 0
    cur_row = sht_pos[0] + 1
    cur_col = sht_pos[1]
    bea_codes2 = np.zeros(number_of_industries, dtype=object)
    while cur_row < bea_readme.nrows:
        if (unicode(bea_readme.cell_value(cur_row, cur_col)).encode('utf8') !=
                ""):
            cur_code = str(bea_readme.cell_value(cur_row, cur_col + 1))
            cur_code = cur_code.replace("\xa0", " ").strip()
            bea_codes2[code_index] = cur_code
            code_index += 1
        cur_row += 1
    # Reading in a list of the assets in the BEA file:
    list_file = os.path.join(_BEA_DIR, "detailnonres_list.csv")
    asset_list = pd.read_csv(list_file)
    for i in xrange(0, asset_list.shape[0]):
        asset_list.iloc[i, 0] = asset_list.iloc[i, 0].replace("\xa0", " ")
        asset_list.iloc[i, 0] = asset_list.iloc[i, 0].strip()
    # Reading in the corresponding naics codes:
    naics_file = os.path.join(_BEA_DIR, "detailnonres_naics.csv")
    naics_cross = pd.read_csv(naics_file).replace("\xa0", " ")
    naics_inds = naics_cross["Industry"]
    for i in xrange(0, naics_cross.shape[0]):
        naics_inds[i] = naics_inds[i].replace("\xa0", " ").strip()
    # Creating a chart cross-referencing industry names, BEA and NAICS codes.
    chart_cols = ["Industry", "BEA Code", "NAICS Code"]
    bea_chart = pd.DataFrame(np.zeros(shape=(num_shts - 2, 3), dtype=object),
                             columns=chart_cols)
    bea_inds = bea_chart["Industry"]
    bea_naics = bea_chart["NAICS Code"]
    cur_row = sht_pos[0] + 1
    cur_col = sht_pos[1]
    num_naics = naics_cross.shape[0]
    # Filling chart with naics codes that are in both lists and the crosswalk:
    naics_counter = 0
    for i in range(0, num_shts - 2):
        for cur_row in range(sht_pos[0] + 1, bea_readme.nrows):
            bea_code = unicode(bea_readme.cell_value(cur_row, cur_col +
                                                     1)).encode('utf8')
            if (str(bea_codes1[i]) == bea_code):
                bea_ind = unicode(bea_readme.cell_value(
                    cur_row, cur_col)).encode('utf8')
                bea_ind = bea_ind.replace('\xa0', ' ').strip()
                bea_inds[i] = bea_ind
                bea_chart["BEA Code"][i] = bea_code
                for k in xrange(0, num_naics):
                    naics_counter = (naics_counter + 1) % num_naics
                    if (naics_inds[naics_counter] == bea_chart["Industry"][i]):
                        bea_naics[i] = naics_cross["NAICS"][naics_counter]
                        break
                break
            # If they match except one has ".0" at the end:
            elif (str(bea_codes1[i]) == str(
                    bea_readme.cell_value(cur_row, cur_col + 1))[:-2]):
                bea_ind = unicode(bea_readme.cell_value(
                    cur_row, cur_col)).encode('utf8')
                bea_ind = bea_ind.replace('\xa0', ' ').strip()
                bea_chart["Industry"][i] = bea_ind
                cur_code = str(bea_readme.cell_value(cur_row,
                                                     cur_col + 1))[:-2]
                bea_chart["BEA Code"][i] = cur_code
                for k in xrange(0, num_naics):
                    naics_counter = (naics_counter + 1) % num_naics
                    if (naics_inds[naics_counter] == bea_inds[i]):
                        bea_naics[i] = naics_cross["NAICS"][naics_counter]
                        break
                break
    # Initializing the table of assets:
    #cur_sht = bea_book.sheet_by_name(bea_chart["BEA Code"][0])
    #sht_pos = naics.search_ws(cur_sht, "asset codes", 25, False)
    bea_table = pd.DataFrame(np.zeros(
        (asset_list.shape[0], bea_chart.shape[0])),
                             columns=bea_chart["BEA Code"])
    # For each industry, calculating
    for i in bea_chart["BEA Code"]:
        cur_sht = bea_book.sheet_by_name(i)
        sht_pos = naics.search_ws(cur_sht, "asset codes", 25, False)
        for j in xrange(
                0, len(asset_list)):  #xrange(sht_pos[0]+2, cur_sht.nrows):
            cur_asset = asset_list.iloc[j, 0]
            for k in xrange(sht_pos[0] + 2, cur_sht.nrows):
                cur_cell = unicode(cur_sht.cell_value(k, sht_pos[1] +
                                                      1)).encode('utf8')
                cur_cell = cur_cell.replace("\xa0", " ").strip()
                if (cur_asset == cur_cell):
                    bea_table[i][j] = float(
                        cur_sht.cell_value(k, cur_sht.ncols - 1))
        #bea_table[i] = np.array(cur_sht.col_values(cur_sht.ncols-1, sht_pos[0]+2, cur_sht.nrows))
    # The dollar amounts are in millions:
    bea_table = bea_table.convert_objects(convert_numeric=True).fillna(0)
    bea_table = bea_table * _BEA_IN_FILE_FCTR
    # Initialize tree for assets data:
    fixed_asset_tree = naics.generate_tree()
    for i in xrange(0, len(fixed_asset_tree.enum_inds)):
        fixed_asset_tree.enum_inds[i].data.append(
            ("All",
             pd.DataFrame(np.zeros((1, asset_list.shape[0])),
                          columns=asset_list.iloc[:, 0])))
        fixed_asset_tree.enum_inds[i].data.append(
            ("Corp",
             pd.DataFrame(np.zeros((1, asset_list.shape[0])),
                          columns=asset_list.iloc[:, 0])))
        fixed_asset_tree.enum_inds[i].data.append(
            ("Non-Corp",
             pd.DataFrame(np.zeros((1, asset_list.shape[0])),
                          columns=asset_list.iloc[:, 0])))
    # Fill in data from BEA's fixed asset table:
    enum_index = len(asset_tree.enum_inds) - 1
    for i in xrange(0, bea_table.shape[1]):
        cur_codes = str(bea_chart["NAICS Code"][i]).split(".")
        tot_share = 0
        all_proportions = naics.get_proportions(cur_codes, asset_tree,
                                                "FA").iloc[1, :]
        corp_proportions = naics.get_proportions(cur_codes, asset_tree, "FA",
                                                 _CORP_NMS).iloc[1, :]
        non_corp_proportions = naics.get_proportions(cur_codes, asset_tree,
                                                     "FA",
                                                     _NCORP_NMS).iloc[1, :]
        for code_index in xrange(0, len(cur_codes)):
            for j in xrange(0, len(fixed_asset_tree.enum_inds)):
                enum_index = (enum_index + 1) % len(fixed_asset_tree.enum_inds)
                out_dfs = asset_tree.enum_inds[enum_index].data.dfs
                if (sum(out_dfs["FA"].iloc[0, :]) == 0):
                    continue
                all_ratio = 1.0
                corp_ratio = 0.0
                non_corp_ratio = 0.0
                for category in _CORP_NMS:
                    corp_ratio += (out_dfs["FA"][category][0] /
                                   sum(out_dfs["FA"].iloc[0, :]))
                for category in _NCORP_NMS:
                    non_corp_ratio += (out_dfs["FA"][category][0] /
                                       sum(out_dfs["FA"].iloc[0, :]))
                cur_data = fixed_asset_tree.enum_inds[enum_index].data
                ind_codes = cur_data.dfs["Codes:"].iloc[:, 0]
                share = naics.compare_codes(cur_codes, ind_codes)
                tot_share += share
                if (share == 0):
                    continue
                num_assets = fixed_asset_tree.enum_inds[0].data.dfs[
                    "All"].shape[1]
                for k in xrange(0, num_assets):
                    cur_data.dfs["All"].iloc[0,
                                             k] = (bea_table.iloc[k, i] *
                                                   all_ratio *
                                                   all_proportions[code_index])
                    cur_data.dfs["Corp"].iloc[0, k] = (
                        bea_table.iloc[k, i] * corp_ratio *
                        corp_proportions[code_index])
                    cur_data.dfs["Non-Corp"].iloc[0, k] = (
                        bea_table.iloc[k, i] * non_corp_ratio *
                        non_corp_proportions[code_index])
                break
            if (tot_share == 1):
                break
    #
    naics.pop_back(fixed_asset_tree, ["All", "Corp", "Non-Corp"])
    naics.pop_forward(tree=fixed_asset_tree,
                      df_list=["All"],
                      blueprint="FA",
                      blue_tree=asset_tree)
    naics.pop_forward(tree=fixed_asset_tree,
                      df_list=["Corp"],
                      blueprint="FA",
                      blue_tree=asset_tree,
                      sub_print=_CORP_NMS)
    naics.pop_forward(tree=fixed_asset_tree,
                      df_list=["Non-Corp"],
                      blueprint="FA",
                      blue_tree=asset_tree,
                      sub_print=_NCORP_NMS)
    return fixed_asset_tree
def init_depr_rates(data_tree=naics.generate_tree(), get_econ=False, 
                    get_tax_est=False, get_tax_150=False,
                    get_tax_200=False, get_tax_sl=False,
                    get_tax_ads=False, soi_from_out=False,
                    output_data=False):
    # Reading in the SOI Tax Stats-Corporation data:
    soi_tree = naics.generate_tree()
    soi_tree = read_soi.load_corporate(soi_tree=soi_tree, 
                                       from_out=soi_from_out,
                                       output_data=(not soi_from_out))
    # Reading in the SOI Tax Stats-Partnership data:
    soi_tree = read_soi.load_partner(soi_tree=soi_tree, 
                                     from_out=soi_from_out,
                                     output_data=(not soi_from_out))
    # Reading in the SOI Tax Stats-Proprietorship data:
    soi_tree = read_soi.load_soi_proprietorship(soi_tree=soi_tree, 
                                                from_out=soi_from_out,
                                                output_data=(not soi_from_out))
    '''
    Many industries are not listed in the SOI datasets. The data for these missing
        industries are interpolated.
    '''
    # Get a list of the names of all the pd dfs besides the list of codes:
    #cur_names = soi_tree.enum_inds[0].data.dfs.keys()
    #cur_names.remove(_CODE_DF_NM)
    # Populate missing industry data backwards throught the tree:
    #naics.pop_back(data_tree, cur_names)
    # Populate the missing total corporate data forwards through the tree:
    #naics.pop_forward(data_tree, ["tot_corps"])
    # Populate other missing data using tot_corps as a "blueprint":
    #cur_names = ["c_corps", "s_corps", "PA_inc_loss", "PA_assets", "soi_prop"]
    #naics.pop_forward(data_tree, cur_names, "tot_corps")
    # Calculate c_corps data:
    #read_soi.calc_c_corp(data_tree)
    #naics.pop_back(data_tree,["c_corps"])
    #naics.pop_forward(data_tree, ["c_corps"], "tot_corps")
    # Populate pa05 using pa01:
    #naics.pop_forward(data_tree, ["PA_types"], "PA_inc_loss")
    #
    #naics.pop_back(data_tree, ["farm_prop"])
    #naics.pop_forward(data_tree, ["farm_prop"], "tot_corps")
    
    #Create an output tree containing only the final data on FA, INV, and LAND.
    output_tree = calc_assets.summary_tree(data_tree, _DATA_DIR)
    # Create a tree with all the FA's broken down by type of asset:
    asset_tree = read_bea.read_bea(output_tree, _DATA_DIR)
    naics.pop_back(asset_tree, ["All", "Corp", "Non-Corp"])
    #
    corp_types = ["C Corporations",
                  "Corporate general partners", 
                  "Corporate limited partners"]
    non_corp_types = ["S Corporations",
                      "Individual general partners",
                      "Individual limited partners",
                      "Partnership general partners",
                      "Partnership limited partners",
                      "Tax-exempt organization general partners",
                      "Tax-exempt organization limited partners",
                      "Nominee and other general partners", 
                      "Nominee and other limited partners",
                      "Sole Proprietors"]
    naics.pop_forward(asset_tree, ["All"], "FA", output_tree)
    naics.pop_forward(asset_tree, ["Corp"], "FA", output_tree, corp_types)
    naics.pop_forward(asset_tree, ["Non-Corp"], "FA", output_tree, non_corp_types)
    #
    inv_tree = read_inv.read_inventories(output_tree, _DATA_DIR)
    naics.pop_back(inv_tree, ["Inventories"])
    naics.pop_forward(inv_tree, ["Inventories"])
    #
    land_tree = read_land.read_land(output_tree, _DATA_DIR)
    naics.pop_back(land_tree, ["Land"])
    naics.pop_forward(land_tree, ["Land"], "LAND", output_tree)
    #
    econ_depr_tree = calc_rates.calc_depr_rates(asset_tree, inv_tree, land_tree, _DATA_DIR)
    tax_depr_tree = calc_rates.calc_tax_depr_rates(asset_tree, inv_tree, land_tree, _DATA_DIR)
    naics.pop_rates(tax_depr_tree)
    return {"Econ": econ_depr_tree, "Tax": tax_depr_tree}
def calibrate_depr_rates(
    data_tree=naics.generate_tree(),
    get_all=False,
    get_econ=False,
    get_tax=False,
    get_tax_est=False,
    get_tax_150=False,
    get_tax_200=False,
    get_tax_sl=False,
    get_tax_ads=False,
    soi_from_out=False,
    output_data=False,
):
    """ This calibrates a tree with all the depreciation rate parameters.
    
    :param data_tree: The NAICS tree to append the calibrated depreciation
           parameters to. Default is a newly generated tree.
    :param get_all: Whether to get all the depreciation parameters or not.
    :param get_econ: Whether to get all the economic depreciation rates.
    :param get_tax: Whether to get all of the tax data.
    :param get_tax_est: Whether to get all of the estimated tax data. This is
           the the most accurate estimate for each industry's depreciation
           rate. It is uses IRS tax documents to decide which assets fall
           under which tax depreciation methods.
    :param get_tax_150: Get the depreciation rates under the assumption that
           assets are depreciated under the GDS 150% declining balance method.
    :param get_tax_200: Get the depreciation rates under the assumption that
           assets are depreciated under the GDS 200% declining balance method.
    :param get_tax_sl: Get the depreciation rates under the assumption that
           assets are depreciated under the straight line method.
    :param get_tax_ads: Get the depreciation rates under the assumption that
           assets are depreciated under the ADS method.
    :param soi_from_out: Whether to recalibrate the relevant soi tax data.
    :param output_data: Whether to output the depreciation rates.
    """
    # The depreciation rate directory:
    depr_dir = os.path.abspath(_PARAM_DIR + "//depreciation")
    """ Importing the module for gathering and processing the depreciation
    rate data: """
    sys.path.append(depr_dir)
    import depreciation_calibration as depr

    # If get_all, set all booleans to true:
    if get_all:
        get_econ = True
        get_tax = True
    # If get_tax, set all tax booleans to true:
    if get_tax:
        get_tax_150 = True
        get_tax_200 = True
        get_tax_sl = True
        get_tax_ads = True
        get_tax_est = True
    # Initialize NAICS tree with all the soi tax data:
    soi_tree = pull_soi_data(get_all=True, from_out=soi_from_out, output_data=(not soi_from_out))
    """ Initialize NAICS tree with all assets--fixed assets, inventories, 
    and land--by sector:"""
    asset_tree = calc_soi_assets(soi_tree=soi_tree)
    # Use the asset_tree to initialize all the depreciation rates:
    depr_tree = depr.init_depr_rates(
        asset_tree=asset_tree,
        get_econ=get_econ,
        get_tax_est=get_tax_est,
        get_tax_150=get_tax_150,
        get_tax_200=get_tax_200,
        get_tax_sl=get_tax_sl,
        get_tax_ads=get_tax_ads,
        output_data=output_data,
    )

    #
    return depr_tree
def load_soi_nonfarm_prop(data_tree=naics.generate_tree(),
                          blue_tree=None,
                          blueprint=None,
                          from_out=False,
                          out_path=_NFARM_PROP_OUT_PATH):
    """ This function loads the soi nonfarm proprietorship data:
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    # Opening nonfarm proprietor data:
    wb = xlrd.open_workbook(_DDCT_IN_PATH)
    ws = wb.sheet_by_index(0)
    cross = pd.read_csv(_DDCT_IN_CROSS_PATH)
    # Finding the relevant positions in worksheet:
    pos1 = naics.search_ws(ws, _SECTOR_COL, 20, True, [0, 0], True)
    pos2 = naics.search_ws(ws, _DDCT_COL1, 20)
    pos3 = naics.search_ws(ws, _DDCT_COL2, 20, True,
                           np.array(pos2) + np.array([0, 1]))
    #
    data_tree.append_all(df_nm=_NFARM_DF_NM, df_cols=[_NFARM_DF_COL_NM])
    #
    cross_index = cross.shape[0] - 1
    enum_index = len(data_tree.enum_inds) - 1
    for i in xrange(pos1[0], ws.nrows):
        cur_cell = str(ws.cell_value(i, pos1[1])).lower().strip()
        #
        tot_proportions = 0
        for j in xrange(0, cross.shape[0]):
            cross_index = (cross_index + 1) % cross.shape[0]
            cur_ind_name = str(cross.iloc[cross_index, 0]).lower().strip()
            if (cur_cell == cur_ind_name):
                if pd.isnull(cross.iloc[cross_index, 1]):
                    continue
                ind_codes = str(cross.iloc[cross_index, 1]).split(".")
                for k in xrange(0, len(data_tree.enum_inds)):
                    enum_index = (enum_index + 1) % len(data_tree.enum_inds)
                    cur_data = data_tree.enum_inds[enum_index].data
                    cur_codes = cur_data.dfs[_CODE_DF_NM]
                    cur_proportions = naics.compare_codes(
                        ind_codes, cur_codes.iloc[:, 0])
                    if cur_proportions == 0:
                        continue
                    tot_proportions += cur_proportions
                    cur_dfs = cur_data.dfs[_NFARM_DF_NM][_NFARM_DF_COL_NM]
                    cur_dfs[0] += (_DDCT_FILE_FCTR * cur_proportions *
                                   (ws.cell_value(i, pos2[1]) +
                                    ws.cell_value(i, pos3[1])))
            if (tot_proportions == 1):
                break
    # Default:
    if blueprint == None and _TOT_CORP_DF_NM in data_tree.enum_inds[
            0].data.dfs.keys():
        blueprint = _TOT_CORP_DF_NM
    naics.pop_back(tree=data_tree, df_list=[_NFARM_DF_NM])
    naics.pop_forward(tree=data_tree,
                      df_list=[_NFARM_DF_NM],
                      blueprint=blueprint,
                      blue_tree=blue_tree)
    #
    return data_tree
Example #46
0
def calc_tax_depr_rates(asset_tree, inv_tree, land_tree):
    #
    tax_data = pd.read_csv(_TAX_DEPR_IN_PATH).fillna(0)
    tax_assets = tax_data["Asset Type"]
    for i in xrange(0, len(tax_assets)):
        tax_assets[i] = str(tax_assets[i]).replace("\xa0", " ").strip()
    # Real Interest Rate:
    r = .05  # NEEDS TO BE AUTOMATED!!!!
    #
    tax_gds_mthds = {"GDS 200%": 2.0, "GDS 150%": 1.5, "GDS SL": 1.0}
    tax_ads_mthds = {"ADS SL": 1.0}
    tax_cols = tax_gds_mthds.keys() + tax_ads_mthds.keys()
    tax_systems = {"GDS": tax_gds_mthds, "ADS": tax_ads_mthds}
    tax_rates = pd.DataFrame(np.zeros((len(tax_assets),len(tax_cols))), 
                             columns = tax_cols)
    tax_rates["Asset"] = tax_assets
    # Compute the tax rates:
    for i in tax_systems:
        tax_yrs = tax_data[i]
        for j in tax_systems[i]:
            tax_b = tax_systems[i][j]
            tax_beta = tax_b/tax_yrs
            tax_star = tax_yrs * (1 - (1/tax_b))
            tax_z = (((tax_beta/(tax_beta+r))*
                      (1-np.exp(-1*(tax_beta+r)*tax_star))) 
                      + ((np.exp(-1*tax_beta*tax_star)/
                      ((tax_yrs-tax_star)*r))*
                      (np.exp(-1*r*tax_star)-np.exp(-1*r*tax_yrs))))
            tax_rates[j] = r/((1/tax_z)-1)
    tax_rates = tax_rates.fillna(0)
    #
    types = ["All", "Corp", "Non-Corp"]
    # Initialize tax depreciation rates tree:
    depr_tree = naics.generate_tree()
    for j in tax_systems:
        for k in tax_systems[j]:
            depr_tree.append_all(df_nm=k, df_cols=types)
    depr_tree.append_all(df_nm="Recommended", df_cols=types)
    # 
    for i in types:
        asset_list = asset_tree.enum_inds[0].data.dfs[i].columns
        asset_list = asset_list.values.tolist()
        match = np.array([-1] * len(asset_list))
        for j in xrange(0, asset_tree.enum_inds[0].data.dfs[i].shape[1]):
            for k in xrange(0, len(tax_assets)):
                if str(asset_list[j]).strip() == str(tax_assets[k]).strip():
                    match[j] = k
        for j in xrange(0, len(depr_tree.enum_inds)):
            cur_ind = depr_tree.enum_inds[j]
            asset_df = asset_tree.enum_inds[j].data.dfs[i]
            #
            tot_assets = sum(asset_tree.enum_inds[j].data.dfs[i].iloc[0,:])
            tot_inv = inv_tree.enum_inds[j].data.dfs["Inventories"][i][0]
            tot_land = land_tree.enum_inds[j].data.dfs["Land"][i][0]
            if(tot_assets+tot_inv+tot_land == 0):
                continue
            ratio = tot_assets / (tot_assets + tot_inv + tot_land)
            #
            for k in tax_cols:
                cur_tax = cur_ind.data.dfs[k][i]
                cur_sum = 0.0
                for l in xrange(0, len(asset_list)):
                    if(match[l] == -1):
                        continue
                    cur_sum += (asset_df.iloc[0,l] * tax_rates[k][match[l]])
                cur_tax[0] = ratio * (cur_sum/sum(asset_df.iloc[0,:]))
            #
            cur_tax = cur_ind.data.dfs["Recommended"][i]
            cur_sum = 0
            for l in xrange(0, len(asset_list)):
                if(match[l] == -1):
                    continue
                cur_rate = tax_rates[tax_data["Method"][match[l]]][match[l]]
                cur_sum += asset_df.iloc[0,l] * cur_rate
            cur_tax[0] = ratio * (cur_sum/sum(asset_df.iloc[0,:]))
    return depr_tree
Example #47
0
def load_soi_tot_corp(data_tree = None, cols_dict = None, 
                      blue_tree = None, blueprint = None):
    """This function pulls SOI total corporate data.

    :param data_tree: A string to be converted?
    :returns: A bar formatted string?huh
    """
    if data_tree == None:
        data_tree = naics.generate_tree()
    # The aggregate 1120 filings data for all corporations:
    tot_corp_file = ""
    for i in os.listdir(corp_dir):
        if(i[4:] == "sb1.csv"):
            tot_corp_file = os.path.abspath(corp_dir + "\\" + i)
            break
    try:
        tot_corp_data = pd.read_csv(tot_corp_file).fillna(0)
    except IOError:
        print "IOError: Could not find tot-corp soi data file."
        return None
    # Listing the relevant columns that are being extracted from the dataset:
    if cols_dict == None:
        # Default:
        cols_dict = dict([("Depreciable Assets","DPRCBL_ASSTS"),
                      ("Accumulated Depreciation", "ACCUM_DPR"),
                      ("Land", "LAND"),
                      ("Inventories", "INVNTRY"),
                      ("Interest Paid", "INTRST_PD"), 
                      ("Capital Stock", "CAP_STCK"),
                      ("Additional paid-in Capital", "PD_CAP_SRPLS"),
                      ("Earnings (rtnd appr)", "RTND_ERNGS_APPR"),
                      ("Earnings (rtnd unappr.)", "COMP_RTND_ERNGS_UNAPPR"),
                      ("Cost of Treasury Stock", "CST_TRSRY_STCK")])
    data_cols = cols_dict.keys()
    # Initializing data on all corporations:
    for i in data_tree.enum_inds:
        i.append_dfs(("tot_corps", pd.DataFrame(np.zeros((1,len(data_cols))),
                                                columns = data_cols)))
    # Loading total-corporation data:
    enum_index = 0
    for code_num in np.unique(tot_corp_data["INDY_CD"]):
        # Find the industry with a code that matches "code_num":
        ind_found = False
        for i in range(0, len(data_tree.enum_inds)):
            enum_index = (enum_index + 1) % len(data_tree.enum_inds)
            cur_dfs = data_tree.enum_inds[i].data.dfs["Codes:"]
            for j in range(0, cur_dfs.shape[0]):
                if(cur_dfs.iloc[j,0] == code_num):
                    # Industry with the matching code has been found:
                    ind_found = True
                    cur_dfs = data_tree.enum_inds[i].data.dfs["tot_corps"]
                    break
            # If the matching industry has been found stop searching for it.
            if ind_found:
                break
        # If no match was found, then ignore data.
        if not ind_found:
            continue
        # Indicators for if rows in tot_corp_data match current industry code:
        indicators = (tot_corp_data["INDY_CD"] == code_num)
        # Filling in every column in the dataframe:
        for j in cols_dict:
            cur_dfs[j][0] = sum(indicators * tot_corp_data[cols_dict[j]])
    #
    naics.pop_back(tree=data_tree, df_list=["tot_corps"])
    naics.pop_forward(tree=data_tree, df_list=["tot_corps"],
                      blueprint=blueprint, blue_tree=blue_tree)
    #
    return data_tree
Example #48
0
def load_soi_prop_data(data_tree = None, blue_tree = None, blueprint = None):
    #
    prop_fctr = 10**3
    #
    if data_tree == None:
        data_tree = naics.generate_tree()
    # Finding the "\**sp01br" file in the proprietorships folder:
    for i in os.listdir(prop_dir):
        if(i[2:] == "sp01br.xls"):
            sp01br_file = os.path.abspath(prop_dir + "\\" + i)
        if(i[2:] == "sp01br_Crosswalk.csv"):
            sp01br_cross_file = os.path.abspath(prop_dir + "\\" + i)
    # Opening nonfarm proprietor data:
    cur_wb = xlrd.open_workbook(sp01br_file)
    cur_ws = cur_wb.sheet_by_index(0)
    cur_cross = pd.read_csv(sp01br_cross_file)
    # Finding the relevant positions in worksheet:
    pos1 = naics.search_ws(cur_ws,"Industrial sector",20, True, [0,0], True)
    pos2 = naics.search_ws(cur_ws,"Depreciation\ndeduction",20)
    pos3 = naics.search_ws(cur_ws,"Depreciation\ndeduction",20,
                         True, np.array(pos2) + np.array([0,1]))
    #
    for i in data_tree.enum_inds:
        i.append_dfs(("soi_prop", pd.DataFrame(np.zeros((1,1)),
                                    columns = ["Depr Deductions"])))
    #
    cross_index = cur_cross.shape[0]-1
    enum_index = len(data_tree.enum_inds)-1
    for i in xrange(pos1[0],cur_ws.nrows):
        cur_cell = str(cur_ws.cell_value(i,pos1[1])).lower().strip()
        #
        tot_proportions = 0
        for j in xrange(0, cur_cross.shape[0]):
            cross_index = (cross_index+1) % cur_cross.shape[0]
            cur_ind_name = str(cur_cross.iloc[cross_index,0]).lower().strip()
            if(cur_cell == cur_ind_name):
                if pd.isnull(cur_cross.iloc[cross_index,1]):
                    continue
                ind_codes = str(cur_cross.iloc[cross_index,1]).split(".")
                for k in xrange(0, len(data_tree.enum_inds)):
                    enum_index = (enum_index+1) % len(data_tree.enum_inds)
                    cur_data = data_tree.enum_inds[enum_index].data
                    cur_codes = cur_data.dfs["Codes:"]
                    #
                    #print ind_codes
                    #print cur_codes
                    cur_proportions = naics.compare_codes(ind_codes, cur_codes.iloc[:,0])
                    if cur_proportions == 0:
                        continue
                    tot_proportions += cur_proportions
                    cur_dfs = cur_data.dfs["soi_prop"]["Depr Deductions"]
                    cur_dfs[0] += (prop_fctr * cur_proportions 
                                        * (cur_ws.cell_value(i,pos2[1]) 
                                        + cur_ws.cell_value(i,pos3[1])))
            if(tot_proportions == 1):
                break
    # Default:
    if blueprint == None and "tot_corps" in data_tree.enum_inds[0].data.dfs.keys():
        blueprint = "tot_corps"
    naics.pop_back(tree=data_tree, df_list=["soi_prop"])
    naics.pop_forward(tree=data_tree, df_list=["soi_prop"],
                      blueprint=blueprint, blue_tree=blue_tree)
    #
    return data_tree