Example #1
0
    def _do_growth(self, table, table_name, pk, var_map):
        growth_setts = self._get_config([GLOBAL, GROWTH], optional=True, default=None)
        if not growth_setts:
            print "NO growth in settings. Skiping growth calculations..."
            return table
        
        time_col = growth_setts[TIME_COLUMN]
        deltas = growth_setts[DELTA]
        growth_cols = growth_setts[COLUMNS]

        start = self._get_config([GLOBAL, SOURCE_VARS, time_col, START])
        gstart = self._get_config([GLOBAL, GROWTH, time_col, START], optional=True)
        if gstart:
            start = gstart

        print "GrowthCOL=", growth_cols, "time_col=", time_col, "start=", start
        for d in deltas:
            var = int(var_map[time_col])
            if var >= start + d:
                print "Do one year growth calculation...", var, start, d
                growth_path = self.get_filepath(table_name, {time_col: unicode(var - d) })
                file_prev = get_file(growth_path)
                tbl_prev = pd.read_csv(file_prev, sep="\t", encoding="utf-8-sig", converters=self.coerce)
                table = growth.do_growth(table, tbl_prev, pk, growth_cols, years_ago=d, delta_col=time_col)

        return table
Example #2
0
    def _to_df(self, input_file, use_cache=True, var_map={}, save_to_cache=True):
        hdf_df, target = self._check_hdf_cache(input_file, var_map)
        if hdf_df is not False and use_cache:
            print "Reading from HDF file..."
            return hdf_df

        print "looking here", input_file
        input_file = self._check_file(input_file, var_map)
        print "Trying to open", input_file

        archive_files = self._get_config([GLOBAL, ARCHIVE_FILES], optional=True)

        if archive_files:
            archive = raw_file_handle(input_file)
            df = self._multi_files_to_df(archive, archive_files, var_map)
        else:
            input_file = get_file(input_file)
            df = self._file_to_df(input_file)

        if use_cache and save_to_cache:
            print "Saving dataframe in HDF file..."
            df.to_hdf(target, HDF_CACHE, append=False)

        return df