def _do_growth(self, table, table_name, pk, var_map): growth_setts = self._get_config([GLOBAL, GROWTH], optional=True, default=None) if not growth_setts: print "NO growth in settings. Skiping growth calculations..." return table time_col = growth_setts[TIME_COLUMN] deltas = growth_setts[DELTA] growth_cols = growth_setts[COLUMNS] start = self._get_config([GLOBAL, SOURCE_VARS, time_col, START]) gstart = self._get_config([GLOBAL, GROWTH, time_col, START], optional=True) if gstart: start = gstart print "GrowthCOL=", growth_cols, "time_col=", time_col, "start=", start for d in deltas: var = int(var_map[time_col]) if var >= start + d: print "Do one year growth calculation...", var, start, d growth_path = self.get_filepath(table_name, {time_col: unicode(var - d) }) file_prev = get_file(growth_path) tbl_prev = pd.read_csv(file_prev, sep="\t", encoding="utf-8-sig", converters=self.coerce) table = growth.do_growth(table, tbl_prev, pk, growth_cols, years_ago=d, delta_col=time_col) return table
def _to_df(self, input_file, use_cache=True, var_map={}, save_to_cache=True): hdf_df, target = self._check_hdf_cache(input_file, var_map) if hdf_df is not False and use_cache: print "Reading from HDF file..." return hdf_df print "looking here", input_file input_file = self._check_file(input_file, var_map) print "Trying to open", input_file archive_files = self._get_config([GLOBAL, ARCHIVE_FILES], optional=True) if archive_files: archive = raw_file_handle(input_file) df = self._multi_files_to_df(archive, archive_files, var_map) else: input_file = get_file(input_file) df = self._file_to_df(input_file) if use_cache and save_to_cache: print "Saving dataframe in HDF file..." df.to_hdf(target, HDF_CACHE, append=False) return df