def MEASURE_CODE(code, mlist): if str(code) in measure_file['code2']: return measure_file['code2'][code] elif code == '' or str(code) == 'nan': return '' else: logging.info(mlist.keys()) ERROR('Measures未知代碼: ' + code)
def START_DATE(freq): if find_unknown == False: if freq == 'A': return dealing_start_year elif freq == 'Q': return str(dealing_start_year) + '-Q1' elif freq == 'M': return str(dealing_start_year) + '-01' else: ERROR('頻率錯誤: ' + freq) else: return None
def SUBJECT_CODE(code, slist): if code in subject_file['code2']: return subject_file['code2'][code] else: logging.info(slist.keys()) ERROR('Subjects未知代碼: ' + code)
def COUNTRY_NAME(location): if location in country['Country_Name']: return country['Country_Name'][location] else: ERROR('找不到國家: ' + location)
def COUNTRY_CODE(location): if location in country['Country_Code']: return country['Country_Code'][location] else: ERROR('國家代碼錯誤: ' + location)
def MEI_DATA(i, name, MEI_t, code_num, table_num, KEY_DATA, DATA_BASE, db_table_t, DB_name, snl, freqlist, frequency): freqlen = len(freqlist) NonValue = ['nan', ''] if code_num >= 200: db_table = DB_TABLE + frequency + '_' + str(table_num).rjust(4, '0') DATA_BASE[db_table] = db_table_t DB_name.append(db_table) table_num += 1 code_num = 1 db_table_t = pd.DataFrame(index=freqlist, columns=[]) value = MEI_t[MEI_t.columns[i]] db_table = DB_TABLE + frequency + '_' + str(table_num).rjust(4, '0') db_code = DB_CODE + str(code_num).rjust(3, '0') #db_table_t[db_code] = ['' for tmp in range(freqlen)] db_table_t = pd.concat([ db_table_t, pd.DataFrame( ['' for tmp in range(freqlen)], index=freqlist, columns=[db_code]) ], axis=1) start_found = False last_found = False found = False for k in range(len(value)): try: freq_index = int(value.index[k]) except: freq_index = str(value.index[k]) if freq_index in db_table_t.index and ( (find_unknown == False and int(str(freq_index)[:4]) >= dealing_start_year) or find_unknown == True): if str(value.iloc[k]).strip() in NonValue: db_table_t[db_code][freq_index] = '' else: try: db_table_t[db_code][freq_index] = float(value.iloc[k]) except ValueError: ERROR('Nontype Value detected: ' + str(value.iloc[k])) found = True if start_found == False: if frequency == 'A': start = int(freq_index) else: start = str(freq_index) start_found = True else: continue if start_found == False: if found == True: ERROR('start not found: ' + str(name)) try: last = db_table_t[db_code].loc[~db_table_t[db_code].isin(NonValue )].index[-1] except IndexError: if found == True: ERROR('last not found: ' + str(name)) if found == False: start = 'Nan' last = 'Nan' Subject = subjects_list[MEI_t.columns[i][1]] Measure = measures_list[str(MEI_t.columns[i][2])] PowerCode = MEI_t.columns[i][4] if str(MEI_t.columns[i][3]).find('Unnamed') >= 0 or str( MEI_t.columns[i][3]).find('nan') == 0: if dataset == 'MEI_BTS_COS': Unit = 'Percentage' else: Unit = '' else: Unit = MEI_t.columns[i][3] if Measure == '': desc_e = str(Subject) + ', ' + str(PowerCode) + ' of ' + str(Unit) else: desc_e = str(Subject) + ', ' + str(Measure) + ', ' + str( PowerCode) + ' of ' + str(Unit) if str(Subject).find('>') > 0: sub = str(Subject).find('>') - 1 form_e = str(Subject)[:sub] elif dataset == 'MEI_CLI': form_found = False for form in form_e_dict1: if MEI_t.columns[i][1] in form_e_dict1[form]: form_e = str(form) form_found = True break if form_found == False: form_e = 'Others' elif dataset == 'MEI_BTS_COS': form_found = False for form in form_e_dict2: if MEI_t.columns[i][1] in form_e_dict2[form]: form_e = str(form) form_found = True break if form_found == False: form_e = 'Others' else: form_e = 'Others' if str(MEI_t.columns[i][2]) == 'nan': desc_c = str(MEI_t.columns[i][1]) else: desc_c = str(MEI_t.columns[i][1]) + str(MEI_t.columns[i][2]) unit = str(PowerCode) + ' of ' + str(Unit) name_ord = MEI_t.columns[i][0] book = COUNTRY_NAME(MEI_t.columns[i][0]) desc_e = desc_e + ' - ' + book if str(MEI_t.columns[i][5]).isnumeric(): form_c = int(MEI_t.columns[i][5]) if desc_e.find('=') < 0: desc_e = desc_e.replace('of Index', 'of Index(' + str(form_c) + ')') elif str(MEI_t.columns[i][5]).find('Unnamed') >= 0 or str( MEI_t.columns[i][5]).find('nan') == 0 or str( MEI_t.columns[i][5]).strip() == '': form_c = '' else: form_c = MEI_t.columns[i][5] if desc_e.find('=') < 0: desc_e = desc_e.replace('of Index', 'of Index(' + str(form_c) + ')') #flags = MEI_t['Flags'][i] key_tmp = [ databank, name, db_table, db_code, desc_e, desc_c, frequency, start, last, unit, name_ord, snl, book, form_e, form_c ] KEY_DATA.append(key_tmp) snl += 1 code_num += 1 return code_num, table_num, DATA_BASE, db_table_t, DB_name, snl
break sys.stdout.write("\n\n") if merging: logging.info('Process: File Merging\n') elif updating: logging.info('Process: File Updating\n') logging.info('Reading main key: ' + NAME + 'key' + main_suf + '.xlsx, Time: ' + str(int(time.time() - tStart)) + ' s' + '\n') main_file = readExcelFile(out_path + NAME + 'key' + main_suf + '.xlsx', header_=0, index_col_=0, sheet_name_=NAME + 'key', acceptNoFile=False) if main_file.empty: ERROR('Empty updated_file') logging.info('Reading main database: ' + NAME + 'database' + main_suf + '.xlsx, Time: ' + str(int(time.time() - tStart)) + ' s' + '\n') main_database = readExcelFile(out_path + NAME + 'database' + main_suf + '.xlsx', header_=0, index_col_=0, acceptNoFile=False) if merge_file_loaded: merge_file = df_key merge_database = DATA_BASE_dict else: logging.info('Reading original key: ' + NAME + 'key' + merge_suf + '.xlsx, Time: ' + str(int(time.time() - tStart)) + ' s' + '\n')