from datetime import date import pandas as pd import os from xls_read import read_sheet, read_by_definition, yearmon from regions import Regions filter_region_name = Regions.filter_region_name reference_region_names = Regions.names() rf_name = Regions.rf_name() district_names = Regions.district_names() summable_regions = Regions.summable_regions() def get_dataframe(datapoints_stream): """Return dataframe corresponding to datapoints stream.""" list_of_dicts = [{'val':x[0], 'region':x[1], 'dates':x[2]} for x in datapoints_stream] df = pd.DataFrame(list_of_dicts) df = df.pivot(columns='region', values='val', index='dates')[reference_region_names] df.index = pd.DatetimeIndex(df.index) return df def get_dataframe_by_definition(def_dict): """Return dataframe corresponding to definition dict.""" file_path = os.path.join(def_dict['folder'], def_dict['filename']) if 'anchor' in def_dict.keys(): gen = read_sheet(file_path, def_dict['sheet'], def_dict['anchor']) else: gen = read_sheet(file_path, def_dict['sheet']) try: df = get_dataframe(gen)[Regions.names()] except:
from datetime import date import pandas as pd import os from xls_read import read_sheet, read_by_definition, yearmon from regions import Regions filter_region_name = Regions.filter_region_name reference_region_names = Regions.names() rf_name = Regions.rf_name() district_names = Regions.district_names() summable_regions = Regions.summable_regions() def get_dataframe(datapoints_stream): """Return dataframe corresponding to datapoints stream.""" list_of_dicts = [{ 'val': x[0], 'region': x[1], 'dates': x[2] } for x in datapoints_stream] df = pd.DataFrame(list_of_dicts) df = df.pivot(columns='region', values='val', index='dates')[reference_region_names] df.index = pd.DatetimeIndex(df.index) return df def get_dataframe_by_definition(def_dict): """Return dataframe corresponding to definition dict.""" file_path = os.path.join(def_dict['folder'], def_dict['filename'])
dfs = import_xl_data() # this import is faster, but series is alphabetic # dfs = import_csv_data() # output 1: save all dataframes to xls by sheet - one df per sheet if '1' in jobs: to_xl_book(dfs, tag = 'by_sheet') # output 2: concat all dataframes to one xls sheet if '2' in jobs: r = pd.concat(dfs) to_xl_sheet(r, tag = 'one_page', sheet = "regions") # output 3: make Russia file (1 sheet) rf = Regions.rf_name() if '3' in jobs: # note: must have pandas 17 or higher for 'rename' df_rf = pd.concat([d[rf].rename(d['varname'][0]) for d in dfs], axis = 1) to_xl_sheet(df_rf, tag = 'rf', sheet = 'rf') # output 4: make fed districts file (num_var sheets) if '4' in jobs: cols = ['varname'] + [Regions.rf_name()] + Regions.district_names() dfs2 = [d.reindex(columns=cols) for d in dfs] to_xl_book(dfs2, tag = 'districts') # output 5: make regions only file (num_var sheets) if '5' in jobs: cols = ['varname'] + [Regions.rf_name()] + Regions.summable_regions() dfs3 = [d.reindex(columns=cols) for d in dfs]
dfs = import_xl_data() # this import is faster, but series is alphabetic # dfs = import_csv_data() # output 1: save all dataframes to xls by sheet - one df per sheet if '1' in jobs: to_xl_book(dfs, tag='by_sheet') # output 2: concat all dataframes to one xls sheet if '2' in jobs: r = pd.concat(dfs) to_xl_sheet(r, tag='one_page', sheet="regions") # output 3: make Russia file (1 sheet) rf = Regions.rf_name() if '3' in jobs: # note: must have pandas 17 or higher for 'rename' df_rf = pd.concat([d[rf].rename(d['varname'][0]) for d in dfs], axis=1) to_xl_sheet(df_rf, tag='rf', sheet='rf') # output 4: make fed districts file (num_var sheets) if '4' in jobs: cols = ['varname'] + [Regions.rf_name()] + Regions.district_names() dfs2 = [d.reindex(columns=cols) for d in dfs] to_xl_book(dfs2, tag='districts') # output 5: make regions only file (num_var sheets) if '5' in jobs: cols = ['varname'] + [Regions.rf_name()] + Regions.summable_regions() dfs3 = [d.reindex(columns=cols) for d in dfs]