Python load_tree_dfs 예제들, naics_processing.load_tree_dfs Python 예제들

예제 #1

0

파일 보기

파일: parameter_calibrations.py 프로젝트: evan-magnusson/dynamic

def calibrate_debt(debt_tree=None, soi_tree=None, from_out=False,
                   soi_from_out=False):
    if soi_tree == None:
        soi_tree = pull_soi_data(get_corp=True, from_out=soi_from_out)
    if debt_tree == None:
        debt_tree = naics.generate_tree()
    #
    debt_dir = os.path.abspath(_PARAM_DIR + "\\debt")
    debt_data_dir = os.path.abspath(debt_dir + "\\data")
    sys.path.append(debt_dir)
    import debt_calibration as debt
    #
    lblty_file = os.path.abspath(debt_data_dir + "\\liabilities.csv")
    print lblty_file
    lblty_df = pd.read_csv(lblty_file)
    eqty_file = os.path.abspath(debt_data_dir + "\\equity.csv")
    eqty_df = pd.read_csv(eqty_file)
    debt_tree = naics.load_tree_dfs(input_file=lblty_file, dfs_name="liabilities", tree=debt_tree)
    debt_tree = naics.load_tree_dfs(input_file=eqty_file, dfs_name="equity", tree=debt_tree)
    #
    naics.pop_forward(tree=debt_tree, df_list=["liabilities"],
                      blue_tree=soi_tree, blueprint="tot_corps",
                      sub_print = ["Interest Paid"])
    #
    return debt_tree

예제 #2

0

파일 보기

파일: parameter_calibrations.py 프로젝트: jdebacker/dynamic

def calibrate_debt(debt_tree=naics.generate_tree(), soi_tree=None, from_out=False, soi_from_out=False):
    """ This function is incomplete. This is supposed to do the debt
    calibrations.
    
    :param debt_tree: The NAICS tree to append the calibrated debt
           parameters to. Default is a newly generated tree.
    :param soi_tree: A tree with all of the relevant soi data.
    :
    """
    if soi_tree == None:
        soi_tree = pull_soi_data(get_corp=True, from_out=soi_from_out)
    #
    debt_dir = os.path.abspath(_PARAM_DIR + "//debt")
    debt_data_dir = os.path.abspath(debt_dir + "//data")
    sys.path.append(debt_dir)
    import debt_calibration as debt

    #
    lblty_file = os.path.abspath(debt_data_dir + "//liabilities.csv")
    print lblty_file
    lblty_df = pd.read_csv(lblty_file)
    eqty_file = os.path.abspath(debt_data_dir + "//equity.csv")
    eqty_df = pd.read_csv(eqty_file)
    debt_tree = naics.load_tree_dfs(input_file=lblty_file, dfs_name="liabilities", tree=debt_tree)
    debt_tree = naics.load_tree_dfs(input_file=eqty_file, dfs_name="equity", tree=debt_tree)
    #
    naics.pop_forward(
        tree=debt_tree, df_list=["liabilities"], blue_tree=soi_tree, blueprint="tot_corps", sub_print=["Interest Paid"]
    )
    #
    return debt_tree

예제 #3

0

파일 보기

파일: pull_soi_proprietorship.py 프로젝트: salimfurth/OG-USA

def load_soi_farm_prop(data_tree=naics.generate_tree(),
                       blue_tree=None,
                       blueprint=None,
                       from_out=False,
                       out_path=_FARM_PROP_OUT_PATH):
    """ This function loads the soi nonfarm proprietorship data:
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    # Load Farm Proprietorship data:
    farm_data = pd.read_csv(_FARM_IN_PATH)
    new_farm_cols = ["Land", "FA"]
    #
    data_tree.append_all(df_nm=_FARM_DF_NM, df_cols=new_farm_cols)
    #
    land_mult = ((farm_data["R_sp"][0] + farm_data["Q_sp"][0]) *
                 (float(farm_data["A_sp"][0]) / farm_data["A_p"][0]))
    total = farm_data["R_p"][0] + farm_data["Q_p"][0]
    total_pa = 0
    cur_codes = [111, 112]
    proportions = np.zeros(len(cur_codes))
    proportions = naics.get_proportions(cur_codes, data_tree, _AST_PRT_DF_NM,
                                        [_LAND_COL_NM, _DEPR_COL_NM])
    #
    for ind_code in cur_codes:
        cur_ind = naics.find_naics(data_tree, ind_code)
        cur_df = cur_ind.data.dfs[_AST_PRT_DF_NM]
        total_pa += (cur_df[_LAND_COL_NM][0] + cur_df[_DEPR_COL_NM][0])
    #
    for i in xrange(0, len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_ind.data.dfs[_FARM_DF_NM]["Land"][0] = (
            land_mult * cur_ind.data.dfs[_AST_PRT_DF_NM][_LAND_COL_NM][0] /
            total_pa)
        cur_ind.data.dfs[_FARM_DF_NM]["FA"][0] = (
            (proportions.iloc[1, i] * total) -
            cur_ind.data.dfs[_FARM_DF_NM]["Land"][0])
    # Default:
    if blueprint == None and _TOT_CORP_DF_NM in data_tree.enum_inds[
            0].data.dfs.keys():
        blueprint = _TOT_CORP_DF_NM
    naics.pop_back(tree=data_tree, df_list=[_FARM_DF_NM])
    naics.pop_forward(tree=data_tree,
                      df_list=[_FARM_DF_NM],
                      blueprint=blueprint,
                      blue_tree=blue_tree)
    #
    return data_tree

예제 #4

0

파일 보기

def load_type(data_tree=naics.generate_tree(),
               blue_tree = None, blueprint = None,
               from_out=False, out_path=None):
    """ This function loads the soi partnership asset data.
    
    :param data_tree: The NAICS tree to read the data into.
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    """
    # Initializing the output path:
    if out_path == None:
        out_path = _TYP_OUT_PATH
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path,
                                        tree=data_tree)
        return data_tree
    # Opening data on income by partner type:
    wb = xlrd.open_workbook(_TYP_IN_PATH)
    ws = wb.sheet_by_index(0)
    num_rows = ws.nrows
    # Initializing dataframe to hold pertinent type income data:
    typ_df = pd.DataFrame(np.zeros((ws.ncols-1, len(_TYP_IN_ROW_NMS))),
                          columns=_TYP_DF_DICT.values())
    # Extracting the data. For each input row:
    for in_row_nm in _TYP_IN_ROW_NMS:
        
        df_col_key = _TYP_IN_ROWS_DF_DICT[in_row_nm]
        df_col_nm = _TYP_DF_DICT[df_col_key]
        in_row_nm = in_row_nm.lower()
        for ws_row_index in xrange(0, num_rows):
            ws_row_nm = str(ws.cell_value(ws_row_index,0)).lower()
            if(in_row_nm in ws_row_nm):
                typ_df[df_col_nm] = ws.row_values(ws_row_index,1)
                break
    # Scaling the data to the correct units:
    typ_df = typ_df * _TYP_FILE_FCTR
    # Reading in the crosswalks between the columns and the NAICS codes:
    typ_cross = pd.read_csv(_TYP_IN_CROSS_PATH)
    #
    data_tree = naics.load_data_with_cross(
                    data_tree=data_tree, data_df=typ_df,
                    cross_df=typ_cross, df_nm=_TYP_DF_NM
                    )
    # Default blueprint is partner income, and, if not, then tot_corps:
    has_inc_df = _INC_DF_NM in data_tree.enum_inds[0].data.dfs.keys()
    has_tot_df = _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys()
    if blueprint == None and has_inc_df:
        blueprint = _INC_DF_NM
    elif blueprint == None and has_tot_df:
        blueprint = _TOT_CORP_DF_NM
    # Populate all levels of specificity in the NAICS tree:
    naics.pop_back(tree=data_tree, df_list=[_TYP_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_TYP_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    return data_tree

예제 #5

0

파일 보기

파일: pull_soi_proprietorship.py 프로젝트: evan-magnusson/dynamic

def load_soi_farm_prop(data_tree = naics.generate_tree(),
                       blue_tree = None, blueprint = None,
                       from_out=False, out_path=_FARM_PROP_OUT_PATH):
    """ This function loads the soi nonfarm proprietorship data:
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    # Load Farm Proprietorship data:
    farm_data = pd.read_csv(_FARM_IN_PATH)
    new_farm_cols = ["Land", "FA"]
    #
    for i in data_tree.enum_inds:
        i.append_dfs((_FARM_DF_NM,
                      pd.DataFrame(np.zeros((1,len(new_farm_cols))), 
                                   columns=new_farm_cols)))
    #
    land_mult = ((farm_data["R_sp"][0] + farm_data["Q_sp"][0]) * 
                        (float(farm_data["A_sp"][0])/farm_data["A_p"][0]))
    total = farm_data.iloc[0,0] + farm_data.iloc[0,2]
    total_pa = 0
    cur_codes = [111,112]
    proportions = np.zeros(len(cur_codes))
    proportions = naics.get_proportions(cur_codes, data_tree, "PA_assets", 
                                 ["Land (Net)","Depreciable assets (Net)"])
    #
    for i in xrange(0, len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_df = cur_ind.data.dfs["PA_assets"]
        total_pa += (cur_df["Land (Net)"][0] + 
                                cur_df["Depreciable assets (Net)"][0])
    #
    for i in xrange(0,len(cur_codes)):
        cur_ind = naics.find_naics(data_tree, cur_codes[i])
        cur_ind.data.dfs[_FARM_DF_NM]["Land"][0] = (land_mult * 
                            cur_ind.data.dfs["PA_assets"]["Land (Net)"][0]/
                            total_pa)
        cur_ind.data.dfs[_FARM_DF_NM]["FA"][0] = ((proportions.iloc[1,i]*total)
                                    - cur_ind.data.dfs[_FARM_DF_NM]["Land"][0])
    # Default:            
    if blueprint == None and _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys():
        blueprint = _TOT_CORP_DF_NM
    naics.pop_back(tree=data_tree, df_list=[_FARM_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_FARM_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    #
    return data_tree

예제 #6

0

파일 보기

def load_income(data_tree=naics.generate_tree(),
                blue_tree=None, blueprint=None,
                from_out=False, out_path=None):
    """ This function loads the soi partnership income data.
    
    :param data_tree: The NAICS tree to read the data into.
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    """
    # Initializing the output path:
    if out_path == None:
        out_path = _INC_OUT_PATH
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, 
                                        tree=data_tree)
        return data_tree
    # Opening data on net income/loss:
    wb = xlrd.open_workbook(_INC_IN_PATH)
    ws = wb.sheet_by_index(0)
    start_col = naics.search_ws(ws, _INC_STRT_COL_NM, 20)[1]
    # Initializing dataframe to hold pertinent income/loss data:
    data_df = pd.DataFrame(np.zeros((ws.ncols-start_col,3)), 
                           columns = _INC_PRT_DF_COL_NMS)
    # Extracting the data from the worksheet:
    for row in xrange(0, ws.nrows):
        # Going through each row of excel file, looking for input rows:
        if(_INC_NET_INC_ROW_NM in str(ws.cell_value(row,0)).lower()):
            data_df[_INC_NET_INC_COL_NM] = ws.row_values(row+1, start_col)
            data_df[_INC_NET_LOSS_COL_NM] = ws.row_values(row+2, start_col)
            break
        if(_INC_DEPR_ROW_NM in str(ws.cell_value(row,0)).lower()):
            data_df[_INC_DEPR_COL_NM] = ws.row_values(row, start_col)
    # Scaling the data to the correct units:
    data_df = data_df * _INC_FILE_FCTR
    # Reading in the crosswalks between the columns and the NAICS codes:
    pa01cross = pd.read_csv(_INC_IN_CROSS_PATH)
    # Processing the inc/loss data into the NAICS tree:
    data_tree = naics.load_data_with_cross(
                    data_tree=data_tree, data_df=data_df,
                    cross_df=pa01cross, df_nm=_INC_DF_NM
                    )
    # Default blueprint is tot_corps:
    has_tot_df = _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys()
    if blueprint == None and has_tot_df:
        blueprint = _TOT_CORP_DF_NM
    # Populate all levels of specificity in the NAICS tree:
    naics.pop_back(tree=data_tree, df_list=[_INC_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_INC_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    
    return data_tree

예제 #7

0

파일 보기

파일: parameter_calibrations.py 프로젝트: salimfurth/OG-USA

def calibrate_debt(debt_tree=naics.generate_tree(),
                   soi_tree=None,
                   from_out=False,
                   soi_from_out=False):
    """ This function is incomplete. This is supposed to do the debt
    calibrations.
    
    :param debt_tree: The NAICS tree to append the calibrated debt
           parameters to. Default is a newly generated tree.
    :param soi_tree: A tree with all of the relevant soi data.
    :
    """
    if soi_tree == None:
        soi_tree = pull_soi_data(get_corp=True, from_out=soi_from_out)
    #
    debt_dir = os.path.abspath(_PARAM_DIR + "//debt")
    debt_data_dir = os.path.abspath(debt_dir + "//data")
    sys.path.append(debt_dir)
    import debt_calibration as debt
    #
    lblty_file = os.path.abspath(debt_data_dir + "//liabilities.csv")
    print lblty_file
    lblty_df = pd.read_csv(lblty_file)
    eqty_file = os.path.abspath(debt_data_dir + "//equity.csv")
    eqty_df = pd.read_csv(eqty_file)
    debt_tree = naics.load_tree_dfs(input_file=lblty_file,
                                    dfs_name="liabilities",
                                    tree=debt_tree)
    debt_tree = naics.load_tree_dfs(input_file=eqty_file,
                                    dfs_name="equity",
                                    tree=debt_tree)
    #
    naics.pop_forward(tree=debt_tree,
                      df_list=["liabilities"],
                      blue_tree=soi_tree,
                      blueprint="tot_corps",
                      sub_print=["Interest Paid"])
    #
    return debt_tree

예제 #8

0

파일 보기

파일: pull_soi_corp.py 프로젝트: salimfurth/OG-USA

def calc_c_corp(
        data_tree=naics.generate_tree(), from_out=False,
        out_path=_C_CORP_OUT_PATH):
    """ This function calculates the soi c-corporation data based of the
    s and the aggregate corporation data.
    
    :param data_tree: The tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    ''' For each industry, subtract the s-corporation data from the total to
    get the c-corporation data.'''
    for ind in data_tree.enum_inds:
        try:
            # Industry's total-corporation data:
            cur_tot = ind.data.dfs[_TOT_DF_NM]
        except KeyError:
            print "Total-Corp data not initialized when interpolating C-Corp."
        try:
            # Industry's S-corporation data:
            cur_s = ind.data.dfs[_S_DF_NM]
        except KeyError:
            print "S-Corp data not initialized when interpolating C-Corp."
        data_cols = cur_tot.columns.values.tolist()
        # Append C-corporation dataframe:
        ind.append_dfs((_C_DF_NM,
                        pd.DataFrame(np.zeros((1, len(data_cols))),
                                     columns=data_cols)))
        # C-corporation data:
        ind.data.dfs[_C_DF_NM] = cur_tot - cur_s
    return data_tree

예제 #9

0

파일 보기

파일: pull_soi_corp.py 프로젝트: chrisrytting/OG-USA

def calc_c_corp(data_tree=naics.generate_tree(), from_out=False,
                out_path=_C_CORP_OUT_PATH):
    """ This function calculates the soi c-corporation data based of the
    s and the aggregate corporation data.
    
    :param data_tree: The tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    ''' For each industry, subtract the s-corporation data from the total to
    get the c-corporation data.'''
    for ind in data_tree.enum_inds:
        try:
            # Industry's total-corporation data:
            cur_tot = ind.data.dfs[_TOT_DF_NM]
        except KeyError:
            print "Total-Corp data not initialized when interpolating C-Corp."
        try:
            # Industry's S-corporation data:
            cur_s = ind.data.dfs[_S_DF_NM]
        except KeyError:
            print "S-Corp data not initialized when interpolating C-Corp."
        data_cols = cur_tot.columns.values.tolist()
        # Append C-corporation dataframe:
        ind.append_dfs((_C_DF_NM, pd.DataFrame(np.zeros((1,len(data_cols))),
                                                columns = data_cols)))
        # C-corporation data:
        ind.data.dfs[_C_DF_NM] = cur_tot - cur_s
    return data_tree

예제 #10

0

파일 보기

파일: pull_soi_partner.py 프로젝트: evan-magnusson/dynamic

def load_asset(data_tree=naics.generate_tree(),
             blue_tree=None, blueprint=None,
             from_out=False):
    """ This function loads the soi partnership asset data.
    
    :param data_tree: The NAICS tree to read the data into.
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_file=_AST_OUT_PATH,
                                        tree=data_tree)
        return data_tree
    # Opening data on depreciable fixed assets, inventories, and land:
    wb = xlrd.open_workbook(_AST_IN_PATH)
    ws = wb.sheet_by_index(0)
    num_rows = ws.nrows
    # Columns of the asset dataframe:
    df_cols = _AST_DF_DICT.values()
    # Initializing dataframe to hold pertinent asset data:
    ast_df = pd.DataFrame(np.zeros((ws.ncols-1,len(df_cols))), columns=df_cols)
    ''' Extracting the data (note that the rows with total data appear first).
    For each input row:'''
    for in_row_nm in _AST_IN_ROW_NMS:
        # Key corresponding to total asset column:
        df_net_col_key = _AST_IN_ROWS_DF_NET_DICT[in_row_nm]
        # Asset dataframes net income column name:
        df_net_col_nm = _AST_DF_DICT[df_net_col_key]
        # Key corresponding to assets of net income partnerships column:
        df_inc_col_key = _AST_IN_ROWS_DF_INC_DICT[in_row_nm]
        # Asset dataframes total income column name:
        df_inc_col_nm = _AST_DF_DICT[df_inc_col_key]
        in_row_nm = in_row_nm.lower()
        # Finding the first input row with in_row_nm:
        for in_row1 in xrange(0, num_rows):
            in_net_row_nm = str(ws.cell_value(in_row1,0)).lower()
            if(in_row_nm in in_net_row_nm):
                # Total asset data:
                ast_df[df_net_col_nm] = ws.row_values(in_row1, 1)
                # Finding the second input row with in_row_nm:
                for in_row2 in xrange(in_row1+1, num_rows):
                    in_inc_row_nm = str(ws.cell_value(in_row2,0)).lower()
                    if(in_row_nm in in_inc_row_nm):
                        # Asset data for companies with net income:
                        ast_df[df_inc_col_nm] = ws.row_values(in_row2,1)
                        break
                break
    # Scaling the data to the correct units:
    ast_df = ast_df * _AST_FILE_FCTR
    # Reading in the crosswalks between the columns and the NAICS codes:
    ast_cross = pd.read_csv(_AST_IN_CROSS_PATH)
    # Processing the asset data into the NAICS tree:
    data_tree = naics.load_data_with_cross(
                    data_tree=data_tree, data_df=ast_df,
                    cross_df=ast_cross, df_nm=_AST_DF_NM
                    )
    # Default blueprint is tot_corps:
    has_tot_df = _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys()
    if blueprint == None and has_tot_df:
        blueprint = _TOT_CORP_DF_NM
    # Populate all levels of specificity in the NAICS tree:
    naics.pop_back(tree=data_tree, df_list=[_AST_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_AST_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    return data_tree

예제 #11

0

파일 보기

파일: pull_soi_proprietorship.py 프로젝트: chrisrytting/OG-USA

def load_soi_nonfarm_prop(data_tree=naics.generate_tree(), 
                          blue_tree=None, blueprint=None, 
                          from_out=False, out_path=_NFARM_PROP_OUT_PATH):
    """ This function loads the soi nonfarm proprietorship data:
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    # Opening nonfarm proprietor data:
    wb = xlrd.open_workbook(_DDCT_IN_PATH)
    ws = wb.sheet_by_index(0)
    cross = pd.read_csv(_DDCT_IN_CROSS_PATH)
    # Finding the relevant positions in worksheet:
    pos1 = naics.search_ws(ws, _SECTOR_COL, 20, True, [0,0], True)
    pos2 = naics.search_ws(ws, _DDCT_COL1, 20)
    pos3 = naics.search_ws(ws,_DDCT_COL2, 20,
                           True, np.array(pos2) + np.array([0,1]))
    #
    data_tree.append_all(df_nm=_NFARM_DF_NM, df_cols=[_NFARM_DF_COL_NM])
    #
    cross_index = cross.shape[0]-1
    enum_index = len(data_tree.enum_inds)-1
    for i in xrange(pos1[0],ws.nrows):
        cur_cell = str(ws.cell_value(i,pos1[1])).lower().strip()
        #
        tot_proportions = 0
        for j in xrange(0, cross.shape[0]):
            cross_index = (cross_index+1) % cross.shape[0]
            cur_ind_name = str(cross.iloc[cross_index,0]).lower().strip()
            if(cur_cell == cur_ind_name):
                if pd.isnull(cross.iloc[cross_index,1]):
                    continue
                ind_codes = str(cross.iloc[cross_index,1]).split(".")
                for k in xrange(0, len(data_tree.enum_inds)):
                    enum_index = (enum_index+1) % len(data_tree.enum_inds)
                    cur_data = data_tree.enum_inds[enum_index].data
                    cur_codes = cur_data.dfs[_CODE_DF_NM]
                    cur_proportions = naics.compare_codes(ind_codes, cur_codes.iloc[:,0])
                    if cur_proportions == 0:
                        continue
                    tot_proportions += cur_proportions
                    cur_dfs = cur_data.dfs[_NFARM_DF_NM][_NFARM_DF_COL_NM]
                    cur_dfs[0] += (_DDCT_FILE_FCTR * cur_proportions 
                                        * (ws.cell_value(i,pos2[1]) 
                                        + ws.cell_value(i,pos3[1])))
            if(tot_proportions == 1):
                break
    # Default:
    if blueprint == None and _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys():
        blueprint = _TOT_CORP_DF_NM
    naics.pop_back(tree=data_tree, df_list=[_NFARM_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_NFARM_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    #
    return data_tree

예제 #12

0

파일 보기

파일: pull_soi_corp.py 프로젝트: chrisrytting/OG-USA

def load_soi_tot_corp(data_tree=naics.generate_tree(),
                      cols_dict=_DFLT_TOT_CORP_COLS_DICT, 
                      blueprint=None, blue_tree=None,
                      from_out=False, output_path=_TOT_CORP_OUT_PATH):
    """ This function pulls the soi total corporation data.
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=output_path, tree=data_tree)
        return data_tree
    # Pertinent information:
    num_inds = len(data_tree.enum_inds) # Number of industries in NAICS tree.
    data_cols = cols_dict.keys() # Dataframe column names.
    # Opening the soi total corporate data file:
    try:
        tot_corp_data = pd.read_csv(_TOT_CORP_IN_PATH).fillna(0)
    except IOError:
        print "IOError: Tot-Corp soi data file not found."
        return None
    # Initializing dataframes for all NAICS industries:
    data_tree.append_all(df_nm=_TOT_DF_NM, df_cols=data_cols)
    # Reading the total corporation data into the NAICS tree:
    enum_index = 0
    for code_num in np.unique(tot_corp_data[_NAICS_COL_NM]):
        # Find the industry with a code that matches "code_num":
        ind_found = False
        for i in range(0, num_inds):
            enum_index = (enum_index + 1) % num_inds
            cur_ind = data_tree.enum_inds[enum_index]
            cur_dfs = cur_ind.data.dfs[cst.CODE_DF_NM]
            for j in range(0, cur_dfs.shape[0]):
                if(cur_dfs.iloc[j,0] == code_num):
                    # Industry with the matching code has been found:
                    ind_found = True
                    cur_dfs = cur_ind.data.dfs[_TOT_DF_NM]
                    break
            # If the matching industry has been found stop searching for it:
            if ind_found:
                break
        # If no match was found, then ignore data.
        if not ind_found:
            continue
        # Indicators for if rows in tot_corp_data match current industry code:
        indicators = (tot_corp_data[_NAICS_COL_NM] == code_num)
        # Calculating the data:
        for j in cols_dict:
            # Some of the data may not be reported:
            if cols_dict[j] == "":
                cur_dfs[j] = 0
            else:
                # Note: double counting the data in the original dataset.
                cur_dfs[j][0] = sum(indicators * tot_corp_data[cols_dict[j]])/2.0
                cur_dfs[j][0] = cur_dfs[j] * _TOT_CORP_IN_FILE_FCTR
    # Populate all levels of specificity in the NAICS tree:
    naics.pop_back(tree=data_tree, df_list=[_TOT_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_TOT_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    return data_tree

예제 #13

0

파일 보기

def load_asset(data_tree=naics.generate_tree(),
             blue_tree=None, blueprint=None,
             from_out=False, out_path=None):
    """ This function loads the soi partnership asset data.
    
    :param data_tree: The NAICS tree to read the data into.
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    """
    # Initializing the output path:
    if out_path == None:
        out_path = _AST_OUT_PATH
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path,
                                        tree=data_tree)
        return data_tree
    # Opening data on depreciable fixed assets, inventories, and land:
    wb = xlrd.open_workbook(_AST_IN_PATH)
    ws = wb.sheet_by_index(0)
    num_rows = ws.nrows
    # Columns of the asset dataframe:
    df_cols = _AST_DF_DICT.values()
    # Initializing dataframe to hold pertinent asset data:
    ast_df = pd.DataFrame(np.zeros((ws.ncols-1,len(df_cols))), columns=df_cols)
    ''' Extracting the data (note that the rows with total data appear first).
    For each input row:'''
    for in_row_nm in _AST_IN_ROW_NMS:
        # Key corresponding to total asset column:
        df_net_col_key = _AST_IN_ROWS_DF_NET_DICT[in_row_nm]
        # Asset dataframes net income column name:
        df_net_col_nm = _AST_DF_DICT[df_net_col_key]
        # Key corresponding to assets of net income partnerships column:
        df_inc_col_key = _AST_IN_ROWS_DF_INC_DICT[in_row_nm]
        # Asset dataframes total income column name:
        df_inc_col_nm = _AST_DF_DICT[df_inc_col_key]
        in_row_nm = in_row_nm.lower()
        # Finding the first input row with in_row_nm:
        for in_row1 in xrange(0, num_rows):
            in_net_row_nm = str(ws.cell_value(in_row1,0)).lower()
            if(in_row_nm in in_net_row_nm):
                # Total asset data:
                ast_df[df_net_col_nm] = ws.row_values(in_row1, 1)
                # Finding the second input row with in_row_nm:
                for in_row2 in xrange(in_row1+1, num_rows):
                    in_inc_row_nm = str(ws.cell_value(in_row2,0)).lower()
                    if(in_row_nm in in_inc_row_nm):
                        # Asset data for companies with net income:
                        ast_df[df_inc_col_nm] = ws.row_values(in_row2,1)
                        break
                break
    # Scaling the data to the correct units:
    ast_df = ast_df * _AST_FILE_FCTR
    # Reading in the crosswalks between the columns and the NAICS codes:
    ast_cross = pd.read_csv(_AST_IN_CROSS_PATH)
    # Processing the asset data into the NAICS tree:
    data_tree = naics.load_data_with_cross(
                    data_tree=data_tree, data_df=ast_df,
                    cross_df=ast_cross, df_nm=_AST_DF_NM
                    )
    # Default blueprint is tot_corps:
    has_tot_df = _TOT_CORP_DF_NM in data_tree.enum_inds[0].data.dfs.keys()
    if blueprint == None and has_tot_df:
        blueprint = _TOT_CORP_DF_NM
    # Populate all levels of specificity in the NAICS tree:
    naics.pop_back(tree=data_tree, df_list=[_AST_DF_NM])
    naics.pop_forward(tree=data_tree, df_list=[_AST_DF_NM],
                      blueprint=blueprint, blue_tree=blue_tree)
    return data_tree

예제 #14

0

파일 보기

파일: pull_soi_proprietorship.py 프로젝트: salimfurth/OG-USA

def load_soi_nonfarm_prop(data_tree=naics.generate_tree(),
                          blue_tree=None,
                          blueprint=None,
                          from_out=False,
                          out_path=_NFARM_PROP_OUT_PATH):
    """ This function loads the soi nonfarm proprietorship data:
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=out_path, tree=data_tree)
        return data_tree
    # Opening nonfarm proprietor data:
    wb = xlrd.open_workbook(_DDCT_IN_PATH)
    ws = wb.sheet_by_index(0)
    cross = pd.read_csv(_DDCT_IN_CROSS_PATH)
    # Finding the relevant positions in worksheet:
    pos1 = naics.search_ws(ws, _SECTOR_COL, 20, True, [0, 0], True)
    pos2 = naics.search_ws(ws, _DDCT_COL1, 20)
    pos3 = naics.search_ws(ws, _DDCT_COL2, 20, True,
                           np.array(pos2) + np.array([0, 1]))
    #
    data_tree.append_all(df_nm=_NFARM_DF_NM, df_cols=[_NFARM_DF_COL_NM])
    #
    cross_index = cross.shape[0] - 1
    enum_index = len(data_tree.enum_inds) - 1
    for i in xrange(pos1[0], ws.nrows):
        cur_cell = str(ws.cell_value(i, pos1[1])).lower().strip()
        #
        tot_proportions = 0
        for j in xrange(0, cross.shape[0]):
            cross_index = (cross_index + 1) % cross.shape[0]
            cur_ind_name = str(cross.iloc[cross_index, 0]).lower().strip()
            if (cur_cell == cur_ind_name):
                if pd.isnull(cross.iloc[cross_index, 1]):
                    continue
                ind_codes = str(cross.iloc[cross_index, 1]).split(".")
                for k in xrange(0, len(data_tree.enum_inds)):
                    enum_index = (enum_index + 1) % len(data_tree.enum_inds)
                    cur_data = data_tree.enum_inds[enum_index].data
                    cur_codes = cur_data.dfs[_CODE_DF_NM]
                    cur_proportions = naics.compare_codes(
                        ind_codes, cur_codes.iloc[:, 0])
                    if cur_proportions == 0:
                        continue
                    tot_proportions += cur_proportions
                    cur_dfs = cur_data.dfs[_NFARM_DF_NM][_NFARM_DF_COL_NM]
                    cur_dfs[0] += (_DDCT_FILE_FCTR * cur_proportions *
                                   (ws.cell_value(i, pos2[1]) +
                                    ws.cell_value(i, pos3[1])))
            if (tot_proportions == 1):
                break
    # Default:
    if blueprint == None and _TOT_CORP_DF_NM in data_tree.enum_inds[
            0].data.dfs.keys():
        blueprint = _TOT_CORP_DF_NM
    naics.pop_back(tree=data_tree, df_list=[_NFARM_DF_NM])
    naics.pop_forward(tree=data_tree,
                      df_list=[_NFARM_DF_NM],
                      blueprint=blueprint,
                      blue_tree=blue_tree)
    #
    return data_tree

예제 #15

0

파일 보기

파일: pull_soi_corp.py 프로젝트: salimfurth/OG-USA

def load_soi_tot_corp(data_tree=naics.generate_tree(),
                      cols_dict=_DFLT_TOT_CORP_COLS_DICT,
                      blueprint=None,
                      blue_tree=None,
                      from_out=False,
                      output_path=_TOT_CORP_OUT_PATH):
    """ This function pulls the soi total corporation data.
    
    :param data_tree: The NAICS tree to read the data into.
    :param cols_dict: A dictionary mapping dataframe columns to the name of
           the column names in the input file
    :param blueprint: The key corresponding to a dataframe in a tree to be
           used as a "blueprint" for populating the df_list dataframes forward.
    :param blue_tree: A NAICS tree with the "blueprint" dataframe. The default
           is the original NAICS tree.
    :param from_out: Whether to read in the data from output.
    :param output_path: The path of the output file.
    """
    # If from_out, load the data tree from output:
    if from_out:
        data_tree = naics.load_tree_dfs(input_path=output_path, tree=data_tree)
        return data_tree
    # Pertinent information:
    num_inds = len(data_tree.enum_inds)  # Number of industries in NAICS tree.
    data_cols = cols_dict.keys()  # Dataframe column names.
    # Opening the soi total corporate data file:
    try:
        tot_corp_data = pd.read_csv(_TOT_CORP_IN_PATH).fillna(0)
    except IOError:
        print "IOError: Tot-Corp soi data file not found."
        return None
    # Initializing dataframes for all NAICS industries:
    data_tree.append_all(df_nm=_TOT_DF_NM, df_cols=data_cols)
    # Reading the total corporation data into the NAICS tree:
    enum_index = 0
    for code_num in np.unique(tot_corp_data[_NAICS_COL_NM]):
        # Find the industry with a code that matches "code_num":
        ind_found = False
        for i in range(0, num_inds):
            enum_index = (enum_index + 1) % num_inds
            cur_ind = data_tree.enum_inds[enum_index]
            cur_dfs = cur_ind.data.dfs[cst.CODE_DF_NM]
            for j in range(0, cur_dfs.shape[0]):
                if (cur_dfs.iloc[j, 0] == code_num):
                    # Industry with the matching code has been found:
                    ind_found = True
                    cur_dfs = cur_ind.data.dfs[_TOT_DF_NM]
                    break
            # If the matching industry has been found stop searching for it:
            if ind_found:
                break
        # If no match was found, then ignore data.
        if not ind_found:
            continue
        # Indicators for if rows in tot_corp_data match current industry code:
        indicators = (tot_corp_data[_NAICS_COL_NM] == code_num)
        # Calculating the data:
        for j in cols_dict:
            # Some of the data may not be reported:
            if cols_dict[j] == "":
                cur_dfs[j] = 0
            else:
                # Note: double counting the data in the original dataset.
                cur_dfs[j][0] = sum(
                    indicators * tot_corp_data[cols_dict[j]]) / 2.0
                cur_dfs[j][0] = cur_dfs[j] * _TOT_CORP_IN_FILE_FCTR
    # Populate all levels of specificity in the NAICS tree:
    naics.pop_back(tree=data_tree, df_list=[_TOT_DF_NM])
    naics.pop_forward(tree=data_tree,
                      df_list=[_TOT_DF_NM],
                      blueprint=blueprint,
                      blue_tree=blue_tree)
    return data_tree