from regions import Regions z = Regions.summable_regions() print(z) for r in Regions.district_names(): w = Regions.region_by_district(r) print('\n', r) print(w) # passes now # assert set(Regions.region_by_district("Уральский федеральный округ")) == set (['Курганская область', 'Свердловская область', 'Ханты-Мансийский авт. округ - Югра', 'Ямало-Ненецкий авт. округ', 'Тюменская область без авт. округов', 'Челябинская область']) # must pass: assert set(Regions.region_by_district("Уральский федеральный округ")) == set([ 'Курганская область', 'Свердловская область', #'Ханты-Мансийский авт. округ - Югра', #'Ямало-Ненецкий авт. округ', #'Тюменская область без авт. округов', 'Тюменская область', 'Челябинская область' ]) print(Regions.region_by_district("Северо-Западный федеральный округ")) # Regions.region_by_district("Северо-Западный федеральный округ") must return: [ 'Республика Карелия', 'Республика Коми', #'Ненецкий авт. округ', #'Архангельская область без авт. округа',
to_xl_book(dfs, tag = 'by_sheet') # output 2: concat all dataframes to one xls sheet if '2' in jobs: r = pd.concat(dfs) to_xl_sheet(r, tag = 'one_page', sheet = "regions") # output 3: make Russia file (1 sheet) rf = Regions.rf_name() if '3' in jobs: # note: must have pandas 17 or higher for 'rename' df_rf = pd.concat([d[rf].rename(d['varname'][0]) for d in dfs], axis = 1) to_xl_sheet(df_rf, tag = 'rf', sheet = 'rf') # output 4: make fed districts file (num_var sheets) if '4' in jobs: cols = ['varname'] + [Regions.rf_name()] + Regions.district_names() dfs2 = [d.reindex(columns=cols) for d in dfs] to_xl_book(dfs2, tag = 'districts') # output 5: make regions only file (num_var sheets) if '5' in jobs: cols = ['varname'] + [Regions.rf_name()] + Regions.summable_regions() dfs3 = [d.reindex(columns=cols) for d in dfs] to_xl_book(dfs3, tag = 'summable') # output 6: write regions only file (num_var sheets) if '6' in jobs: to_xl_sheet(df = get_varname_df(), tag = 'varnames', sheet = 'varnames')
from datetime import date import pandas as pd import os from xls_read import read_sheet, read_by_definition, yearmon from regions import Regions filter_region_name = Regions.filter_region_name reference_region_names = Regions.names() rf_name = Regions.rf_name() district_names = Regions.district_names() summable_regions = Regions.summable_regions() def get_dataframe(datapoints_stream): """Return dataframe corresponding to datapoints stream.""" list_of_dicts = [{'val':x[0], 'region':x[1], 'dates':x[2]} for x in datapoints_stream] df = pd.DataFrame(list_of_dicts) df = df.pivot(columns='region', values='val', index='dates')[reference_region_names] df.index = pd.DatetimeIndex(df.index) return df def get_dataframe_by_definition(def_dict): """Return dataframe corresponding to definition dict.""" file_path = os.path.join(def_dict['folder'], def_dict['filename']) if 'anchor' in def_dict.keys(): gen = read_sheet(file_path, def_dict['sheet'], def_dict['anchor']) else: gen = read_sheet(file_path, def_dict['sheet']) try: df = get_dataframe(gen)[Regions.names()] except:
if '1' in jobs: to_xl_book(dfs, tag='by_sheet') # output 2: concat all dataframes to one xls sheet if '2' in jobs: r = pd.concat(dfs) to_xl_sheet(r, tag='one_page', sheet="regions") # output 3: make Russia file (1 sheet) rf = Regions.rf_name() if '3' in jobs: # note: must have pandas 17 or higher for 'rename' df_rf = pd.concat([d[rf].rename(d['varname'][0]) for d in dfs], axis=1) to_xl_sheet(df_rf, tag='rf', sheet='rf') # output 4: make fed districts file (num_var sheets) if '4' in jobs: cols = ['varname'] + [Regions.rf_name()] + Regions.district_names() dfs2 = [d.reindex(columns=cols) for d in dfs] to_xl_book(dfs2, tag='districts') # output 5: make regions only file (num_var sheets) if '5' in jobs: cols = ['varname'] + [Regions.rf_name()] + Regions.summable_regions() dfs3 = [d.reindex(columns=cols) for d in dfs] to_xl_book(dfs3, tag='summable') # output 6: write regions only file (num_var sheets) if '6' in jobs: to_xl_sheet(df=get_varname_df(), tag='varnames', sheet='varnames')