def main_qualify(): out_path = load_config()[0]['QUALI_OUT'] web_path = load_config()[0]['WEB_OUT'] files = [] path_to_qualify = None f3 = {p.resolve() for p in Path(load_config()[0]['QUALI_IN']).rglob("**/*" ) if p.suffix in [EXTENSTION]} for f4 in f3: # Only meteorological if '_MD_' in str(f4): files.append(Path(f4)) process_files(files,path_to_qualify,out_path,web_path)
def read_dict(type_of_data): # Ler dicitionario dicionario = pd.read_excel(open(load_config()[0]['DICTIONARY'], 'rb'),sheet_name='Tabela-estacao') colunas_dicionario = pd.read_excel(open(load_config()[0]['DICTIONARY'], 'rb'),sheet_name='Cabeçalhos SONDA',header=None) if type_of_data == '_MD_': header_1 = colunas_dicionario.iloc[29:30] header_1 = header_1.iloc[0].dropna().values[1:] header_2 = colunas_dicionario.iloc[30:31] header_2 = header_2.iloc[0].dropna().values[1:] else: header_1 = None header_2 = None print('Implementação em andamento') exit() return dicionario,header_1,header_2
def menu_qualify(): top_header('Qualify SONDA Data->') path_to_qualify = load_stations_03(load_config()[0]['QUALI_IN']) out_path = load_config()[0]['QUALI_OUT'] web_path = load_config()[0]['WEB_OUT'] if type(path_to_qualify) == str: files = {p.resolve() for p in Path(path_to_qualify).rglob("**/*" ) if p.suffix in [EXTENSTION]} else: files = [] for f2 in path_to_qualify: f3 = {p.resolve() for p in Path(f2).rglob("**/*" ) if p.suffix in [EXTENSTION]} for f4 in f3: files.append(Path(f4)) process_files(files,path_to_qualify,out_path,web_path)
def open_file(select_file, station, year, file): met_header = load_config()[0]['MET_INPUT'] solar_header = load_config()[0]['SOLAR_INPUT'] ##OUTPUT HEADER met_out_header = load_config()[0]['MET_HEADER'] sol_out_header = load_config()[0]['SOLAR_HEADER'] top_header('Main > Preprocessing > Translate Historical > ' + str(station) + ' > ' + str(year) + ' > ' + str(file)) print('\t\tPlease select one file to translate file: ') if 'MD' in file: header_in = met_header header_out = met_out_header if 'SD' in file: header_in = solar_header header_out = sol_out_header if 'TD' in file: header_in = None header_out = None # print(header_in[1:]) # print(header_out) df = pd.read_csv(select_file, sep=",") ## SELECT ONLY COLUMNS INPUT df = df[header_in[1:]] ## IGNORE MULTINDEX INTO HISTORICAL DATA df = df.iloc[1:] print(df) print('aqui') print(load_config()[0]['FORMATED_OUT'] + str(station) + '/' + str(year))
def download_stations(): top_header('Main Menu > Preprocessing Mode > Download Data') stations, ftp_con = connection() ### Station count = -1 for f in stations: count = count + 1 print("\t\t [%s]" % count + f) while True: try: ans_file = int(input("\t\t Select Station: ")) except: print("\t\t Wrong selection") continue if ans_file > count: print("\t\t Wrong selection.") continue files_dir = load_config()[0]['FTP_DIR'] + stations[ans_file] + '/data/' download_files(files_dir, ftp_con, stations[ans_file]) break
def process_files(files,path_to_qualify,out_path,web_path): debug_dir = load_config()[0]['DEBUG_DIR'] debug_di = str(debug_dir) + 'qualify_erros.txt' logging.basicConfig(filename=debug_di, filemode='a', format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S') # logger.setLevel(logging.INFO) # file = '/home/hneto/SONDA/SDT/output/dados_formatados/SBR/Meteorologicos/2016/SBR_2016_06_MD_formatado.csv' # file = '/home/hneto/SONDA/SDT/output/dados_formatados/SLZ/Meteorologicos/2017/SLZ_2017_01_MD_formatado.csv' count_files = 0 size_files = len(files) for file in files: try: ### READ DIRECTIONARY print('Processing-> ',file.stem[:-6]+ ' -> '+str(count_files)+'/'+str(size_files)) print('Reading dictionary....!') ## DETECT TYPE OF DATA ## DETECT HEADER TYPE if '_MD_' in str(file): diction,header1,header2 = read_dict('_MD_') if '_SD_' in str(file): diction,header1,header2 = read_dict('_SD_') print('Loading file ->',file.stem) df = loadFile(file) print('Processing level 01!!') dqc,df = level_01(df,str(file),diction) print('Processing level 02!!') dqc = level_02(df,dqc,str(file)) print('Processing level 03!!') dqc = level_03(df,dqc,str(file)) print('Qualify done!...') print('Generating percentual file!') cols = [] ### SELECT COLUMNS for c in dqc.columns: if not 'std' in c[0]: cols.append(c) dqc = dqc[cols] percent_columns = dqc.iloc[:,5:].columns percent_cols = [cc[0] for cc in percent_columns] percent_cols.insert(0, "Dados") percentual_df = pd.DataFrame(columns=percent_cols) percentual_df['Dados'] = ['Suspeitos nível 1','Suspeitos nível 2','Suspeitos nível 3','Suspeitos nível 4','Válidos','Ausentes'] percentual_df = percentual_df.fillna(0) #Perc Válido (2) VALID_STRING = "9999|0999|0099|0009" ## AUSENTES NODATA = "3333" NOSENSOR = "-5555" ## SUPECT LEVELS SUSPECT_LVL1 = "5552|0552|0052|0002" SUSPECT_LVL2 = "5529|0529|0029" SUSPECT_LVL3 = "5299|0299" SUSPECT_LVL4 = "2999" for pc in percent_columns: nosensor = df[pc[0]].astype(str).str.count(NOSENSOR).sum() if nosensor > 0: percentual_df[pc[0]] = ['N/S','N/S','N/S','N/S','N/S','N/S'] else: valids = dqc[pc].str.count(VALID_STRING).sum() susplvl1 = dqc[pc].str.count(SUSPECT_LVL1).sum() susplvl2 = dqc[pc].str.count(SUSPECT_LVL2).sum() susplvl3 = dqc[pc].str.count(SUSPECT_LVL3).sum() susplvl4 = dqc[pc].str.count(SUSPECT_LVL4).sum() nodata = df[pc[0]].astype(str).str.count(NODATA).sum() total_ = valids + susplvl1 + susplvl2 + susplvl3 + susplvl4 + nodata ## ADD IN PERCENTUAL FRAME percentual_df[pc[0]] = [susplvl1/total_, susplvl2/total_, susplvl3/total_, susplvl4/total_, valids/total_, nodata/total_] ## WEB FILE web_df = pd.DataFrame() station = df.acronym.unique()[0] stationID = diction.loc[diction['Sigla'] == station] siglaNAME = stationID['Sigla'].values[0] nomeNAME = stationID['Nome'].values[0] redeNAME = stationID['Rede'].values[0] latNAME = stationID['Latitude'].values[0] lonNAME = stationID['Longitude'].values[0] altNAME = stationID['Altitude'].values[0] first_row_header = [siglaNAME,nomeNAME,'lat:'+str(latNAME),'lon:'+str(lonNAME),'alt:'+str(altNAME)+'m',redeNAME+' Network','http://sonda.ccst.inpe.br','*****@*****.**'] ### INCREMENT columns into dqc frame dqc['ws10_std','dqc_v1'] = '0000' dqc['wd10_std','dqc_v1'] = '0000' dqc_cols = [] dqc_mult_column = [] dqc_columns = dqc.columns.values dqc_columns = [dqc_cols.append(c[0]) for c in dqc_columns] for cc in range(len(dqc_cols)): if cc > 4: dqc_cols[cc] = dqc_cols[cc]+'_dqc' dqc_mult_column.append((dqc_cols[cc],'dqc_v1')) else: dqc_mult_column.append((dqc_cols[cc],'')) mux = pd.MultiIndex.from_tuples(dqc_mult_column) dqc.columns = dqc_cols dqc_for_concat = dqc[dqc_cols[5:]] dqc.columns = mux # ### WEB FILE web_df = pd.concat([df,dqc_for_concat],axis=1) web_df = web_df[header1.tolist()] ## MOUNT NEW MULTINDEX header2 = header2.tolist() header2.insert(0,'') header2.insert(0,'') header2.insert(0,'') header2.insert(0,'') header2.insert(0,'') new_mux = [] for cc in range(len(web_df.columns)): if cc < len(first_row_header): new_mux.append((first_row_header[cc],web_df.columns[cc],header2[cc])) else: new_mux.append(('',web_df.columns[cc],header2[cc])) mux = pd.MultiIndex.from_tuples(new_mux) # ## FINALIZE WEB DF web_df.columns = mux ### CONVERT VALUES TO STRING all_columns = list(web_df) # Creates list of all column headers web_df[all_columns] = web_df[all_columns].astype(str) ### - Alterar 3333 = N/A antes de salvar o arquivo ### - Alterar -5555 = N/S antes de salvar o arquivo (linha 170 arquivo dqc.py) web_df = web_df.replace('3333.0', 'N/A') web_df = web_df.replace('-5555', 'N/S') print(web_df) # input() print('Saving file...') ### SAVE FILES if type(path_to_qualify) == str: mount_out = Path(path_to_qualify).parts[2:] else: mount_out = Path(file).parts[-4:][:-1] mount_out = '/'.join([str(elem) for elem in mount_out]) ### CREATE PAATH IF NOT EXIST Path(out_path+mount_out+'/').mkdir(parents=True, exist_ok=True) output_file = out_path+mount_out+'/'+file.stem[:-9]+'DQC'+file.suffix ## SALVANDO dqc.to_csv(output_file,index=False) ### WEB FILES web_csv = web_path+mount_out Path(web_path+mount_out+'/').mkdir(parents=True, exist_ok=True) output_web = web_path+mount_out+'/'+file.stem[:-10]+file.suffix percentual_out = web_path+mount_out+'/'+file.stem[:-9]+'percentuais'+file.suffix ### SAVING web_df.to_csv(output_web,index=False) percentual_df.to_csv(percentual_out,index=False) print('Files has been saved into->',output_file,'\nWEB->',output_web) count_files += 1 except: print('Error to qualify file: '+str(file)) logging.warning('Error to qualify file: '+str(file)+'') count_files += 1
def historic_generate(): operation_dir = load_config()[0]['OPERATIONAL_IN'] ## SET DEBUG DIR logging.basicConfig(filename=load_config()[0]['DEBUG_DIR'] + 'historical_debug.txt', filemode='a', format='\nProcess Date %(asctime)s \n %(message)s\n', datefmt='%d-%b-%y %H:%M:%S', level=os.environ.get("LOGLEVEL", "INFO")) top_header('Main > Preprocessing > Generate Historical') print('\t\tPlease select one stations to generate historical data: ') operational_stations = [ fn for fn in listdir(operation_dir) if not fn.startswith('.') ] if len(operational_stations) == 0: print('There is no data to be formatted') input('Press Enter to return') # pre_processing_menu() # return None count = -1 for f in operational_stations: count = count + 1 print("\t\t [%s]" % count + f) while True: try: ans_file = int(input("\t\t Select Station: ")) except: print("\t\t Wrong selection") continue if ans_file > count: print("\t\t Wrong selection.") continue selected_st = operation_dir + operational_stations[ans_file] + '/' break ## SELECT TYPE OF DATA top_header('Main > Preprocessing > Generate Historical > ' + str(operational_stations[ans_file]).upper()) print('\t\tPlease select type of data to generate historical data: ') dataTypes = ['MD', 'SD', 'TD', '50', '25', '10'] countT = -1 for f in dataTypes: countT = countT + 1 print("\t\t [%s]" % countT + f) while True: try: ans_type = int(input("\t\t Select Station: ")) except: print("\t\t Wrong selection") continue if ans_type > countT: print("\t\t Wrong selection.") continue selected_file = operational_stations[ans_file].upper() + '_' + str( dataTypes[ans_type]) + '.DAT' break ### DATA TYPES if operational_stations[ans_file] == 'sms': ## OPEN DATA df = pd.read_csv(selected_st + selected_file, sep=",", header=None, skiprows=4, skipinitialspace=False) df1 = df.copy() head0 = pd.read_csv(selected_st + selected_file, sep=",", header=None, nrows=1) head1 = pd.read_csv(selected_st + selected_file, sep=",", header=None, skiprows=1, nrows=1) head2 = pd.read_csv(selected_st + selected_file, sep=",", header=None, skiprows=3, nrows=1) head0 = head0.iloc[0].values head1 = head1.iloc[0].values head2 = head2.iloc[0].values df1[0] = pd.to_datetime(df1[0], format='%Y-%m-%d %H:%M:%S') ## SELECT TIMESTAMP TO PROCESSE top_header('Main > Preprocessing > Generate Historical > ' + str(operational_stations[ans_file]).upper() + ' > ' + str(dataTypes[ans_type])) print('\t\tPlease select type of data to generate historical data: ') ## AVAIBLE YEARS years = df1[0].dt.year.unique() countY = -1 for f in years: countY = countY + 1 print("\t\t [%s]" % countY + str(f)) while True: try: ans_year = int(input("\t\t Select Year: ")) except: print("\t\t Wrong selection") continue if ans_year > countY: print("\t\t Wrong selection.") continue selected_year = years[ans_year] break df1 = df1.set_index(0) months = df1.loc[str(selected_year)] months = months.reset_index() months = months[0].dt.strftime('%m').unique() top_header('Main > Preprocessing > Generate Historical > ' + str(operational_stations[ans_file]).upper() + ' > ' + str(dataTypes[ans_type]) + ' > ' + str(selected_year)) print('\t\tPlease select type of data to generate historical data: ') countM = -1 for f in months: countM = countM + 1 print("\t\t [%s]" % countM + str(f)) while True: try: ans_month = int(input("\t\t Select Month: ")) except: print("\t\t Wrong selection") continue if ans_month > countM: print("\t\t Wrong selection.") continue selected_month = months[ans_month] break top_header('Main > Preprocessing > Generate Historical > ' + str(operational_stations[ans_file]).upper() + ' > ' + str(dataTypes[ans_type]) + ' > ' + str(selected_year) + ' > ' + str(selected_month)) #SELECTED TO GENERATE df1 = df1.loc[str(selected_year) + '-' + str(selected_month)] ## TIME INTERVAL VERIFICATION df1 = df1.sort_index(ascending=True) # GET TIMES STRING max_time = df1.index.max() min_time = df1.index.min() ## FIND INDEX INTO ORIGINAL DATAFRAME idx_min = df.loc[df[0] == str(min_time)].index.values[0] idx_max = df.loc[df[0] == str(max_time)].index.values[0] ## LOC BETWEEN IDX locked_df_chk = df.loc[idx_min:idx_max] ## FINAL DF TO COMPARE final_df = locked_df_chk.copy() ## MOUNT INDEX OF DATES ACORDING TYPE OF DATA if dataTypes[ans_type] == 'MD' or dataTypes[ ans_type] == '10' or dataTypes[ans_type] == '25' or dataTypes[ ans_type] == '50': freqc = '10min' if dataTypes[ans_type] == 'SD' or dataTypes[ans_type] == 'TD': freqc = '1min' ## PASS COLUMN TO DATETIME locked_df_chk[0] = pd.to_datetime(locked_df_chk[0], format='%Y-%m-%d %H:%M:%S') ## MULTIINDEX mux = [] for i in range(len(head1)): mux.append([str(head1[i]).lower(), str(head2[i]).lower()]) mux = pd.MultiIndex.from_tuples(mux) ## GET ID OF STATION id_st = locked_df_chk[2].values[0] ## DETECT non-existent SENSOR non_sens_col = [] object_type_c = locked_df_chk.select_dtypes(include=['object']) for c in object_type_c: detc_mean = object_type_c[c].astype(float).mean() if str(detc_mean) == 'nan': non_sens_col.append(c) ## DESAPROVE WITH TIME INTERVAL ## DETECT TYPE OF FILE if dataTypes[ans_type] == 'MD' or dataTypes[ ans_type] == '10' or dataTypes[ans_type] == '25' or dataTypes[ ans_type] == '50': t_delta = pd.Timedelta(minutes=10) if dataTypes[ans_type] == 'SD' or dataTypes[ans_type] == 'TD': t_delta = pd.Timedelta(minutes=1) ## Generate all month days to compare year_month = locked_df_chk[0].dt.strftime('%Y-%m').values[0] # print(pd.Timestamp(year_month) + pd.offsets.MonthEnd(1) + pd.Timedelta(hours=24) - t_delta) month_generated = pd.date_range( start=pd.Timestamp(year_month), end=pd.Timestamp(year_month) + pd.offsets.MonthEnd(1) + pd.Timedelta(hours=24) - t_delta, # <-- 2018-08-31 with MonthEnd freq=freqc) ## CHECK DUPLICAT IN TIMESTAMP COLUMN times_dup = locked_df_chk[locked_df_chk.duplicated([0], keep=False)] group_tdup = times_dup.groupby(0) ## RESOLVE CHOKE TIME STAMP idx_first = [] for g, gdftum in group_tdup: # print(gdftum) idx_groups = gdftum.index for idxgg in range(len(idx_groups)): pass_idx = idx_groups[idxgg] - 1 if dataTypes[ans_type] == 'MD' or dataTypes[ ans_type] == '10' or dataTypes[ ans_type] == '25' or dataTypes[ans_type] == '50': t_delta = pd.Timedelta(minutes=10) locked_df_chk.loc[ idx_groups[idxgg], 0] = locked_df_chk.loc[pass_idx][0] + t_delta if dataTypes[ans_type] == 'SD' or dataTypes[ans_type] == 'TD': t_delta = pd.Timedelta(minutes=1) locked_df_chk.loc[ idx_groups[idxgg], 0] = locked_df_chk.loc[pass_idx][0] + t_delta ## CHECK DUPLICAT IN TIMESTAMP COLUMN locked_df_chk = locked_df_chk.drop_duplicates(subset=0) ## SET INDEX COLUMN OF TIME STAMP locked_df_chk = locked_df_chk.set_index(0) ## CHEACK DUPLICATED IN ALL COLUMNS locked_df_chk = locked_df_chk.drop_duplicates(keep='first') # FILL locked_df_chk = locked_df_chk.reindex(month_generated, fill_value=0) ## DESAPROVE WITH TIME INTERVAL check_t_interval = locked_df_chk check_t_interval = check_t_interval[min_time:max_time] # ## LIMITE TIME DETAL lim_delta = pd.Timedelta(minutes=50) totalDelta = t_delta for i, row in check_t_interval.iterrows(): if np.all(row[6:-1].values == 0): totalDelta = totalDelta + t_delta else: last_ro = i totalDelta = pd.Timedelta(minutes=0) if totalDelta >= lim_delta: print( 'Failed to generate file due to a longer time sequence of failures greater than ', lim_delta, '\n') print('') fail = check_t_interval[last_ro:i + t_delta] # fail[6:-1] = 3333 fail = fail.reset_index() fail.columns = mux print(fail) return None ## ADD NON-EXISTENT VALUES locked_df_chk[non_sens_col] = 5555 idx_values = locked_df_chk.loc[locked_df_chk[2] == 0].index ## ADD YEAR locked_df_chk.loc[idx_values, 3] = locked_df_chk.loc[idx_values].index.strftime( '%Y').values ## ADD JULIAN DAY locked_df_chk.loc[idx_values, 4] = locked_df_chk.loc[idx_values].index.strftime( '%j').values ## ADD MINUTE for hm in idx_values: s1 = hm.strftime('%d/%m/%Y 00:00') s2 = hm.strftime('%d/%m/%Y %H:%M') s1 = datetime.strptime(s1, '%d/%m/%Y %H:%M') s2 = datetime.strptime(s2, '%d/%m/%Y %H:%M') differenc = s2 - s1 minutes = divmod(differenc.seconds, 60) locked_df_chk.loc[hm, 5] = minutes[0] locked_df_chk[2] = id_st ## DATETIME COLUMNS + NON_SENSOR dt_non_se = [] dt_non_se.append(2) dt_non_se.append(3) dt_non_se.append(4) dt_non_se = dt_non_se + non_sens_col ### ADD VALUES 3333, diff_columns = np.setdiff1d(locked_df_chk.columns.values, dt_non_se) locked_df_chk.loc[idx_values, diff_columns] = 3333 # print(locked_df_chk['2020-09-09 13:30':'2020-09-09 17:20'].head(50)) ## SAVE if len(locked_df_chk) > 0: ## SAVE PROCESS output_dir = load_config()[0]['HISTORICAL_OUT'] + str( operational_stations[ans_file]).upper() + '/' + str( selected_year) + '/' output_file_name = str(operational_stations[ans_file]).upper( ) + '_' + str(selected_year) + '_' + str( locked_df_chk.index[0].strftime('%j')) + '_a_' + str( locked_df_chk.index[-1].strftime('%j')) + '_' + str( dataTypes[ans_type]) + '.dat' ## COUNT VALUES lost_counter = 0 nosen_counter = 0 for i, row in locked_df_chk.iterrows(): ## COUNTER FOR LOST FILES if 3333 in row.values[:]: lost_counter = lost_counter lost_counter += 1 ## COUNTER FOR LOST FILES if 5555 in row.values[:]: nosen_counter = nosen_counter nosen_counter += 1 ## RESET INDEX locked_df_chk = locked_df_chk.reset_index() # ADD MULTIINDEX locked_df_chk.columns = mux # CREATE DIR pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) ## CREATE VERSION DIRS pathlib.Path(output_dir + '/versions').mkdir(parents=True, exist_ok=True) file__ = output_dir + output_file_name # ## CHEACK IF FILES EXISTS if os.path.isfile(file__): warningmsg = ('\nSTATION-> ' + str(output_file_name[:-4]) + ' \nLOST 3333 ROWS:-> ' + str(lost_counter) + '\nNO SENSOR 5555 ROWS:\n' + str(nosen_counter) + '\n') logging.warning(warningmsg) # print(warningmsg) # print(locked_df_chk.head()) ## CHECK LAST VERSION if len(os.listdir(output_dir + '/versions')) == 0: shutil.move( file__, output_dir + '/versions/' + output_file_name + '.v01') ## CREATE FILE locked_df_chk.to_csv(file__, index=False) warningmsg = ('\nSTATION-> ' + str(output_file_name[:-4]) + ' File version(0) \nLOST 3333 ROWS:-> ' + str(lost_counter) + '\nNO SENSOR 5555 ROWS:\n' + str(nosen_counter) + '\n') logging.warning(warningmsg) else: versions = [ fn for fn in listdir(output_dir + '/versions/') if not fn.startswith('.') ] shutil.move( file__, output_dir + '/versions/' + output_file_name + 'v0' + str((int(versions[-1][-2:]) + 1))) warningmsg = ('\nSTATION-> ' + str(output_file_name[:-4]) + ' File version(' + str(int(versions[-1][-2:]) + 1) + ') \nLOST 3333 ROWS:-> ' + str(lost_counter) + '\nNO SENSOR 5555 ROWS:\n' + str(nosen_counter) + '\n') logging.warning(warningmsg) print(warningmsg) ## CREATE FILE locked_df_chk.to_csv(file__, index=False) print(locked_df_chk) else: warningmsg = ('\nSTATION-> ' + str(output_file_name[:-4]) + ' File version(0) \nLOST 3333 ROWS:-> ' + str(lost_counter) + '\nNO SENSOR 5555 ROWS:\n' + str(nosen_counter) + '\n') logging.warning(warningmsg) ## CREATE FILE print(warningmsg) print(locked_df_chk) locked_df_chk.to_csv(file__, index=False) ## UPLOAD RESULTS ver_file_names = [ fn for fn in listdir(output_dir + str('/versions')) if not fn.startswith('.') ] if len(ver_file_names) > 0: last_file_version = sorted(ver_file_names)[-1] else: last_file_version = None file_to_upload = file__ print('\t\tUpload files to FTP: ') choice = input(""" (Y) - Yes (N) - No Please enter your choice: """) if choice == "Y" or choice == "y": connection(file_to_upload, operational_stations[ans_file], selected_year, output_file_name, last_file_version, operational_stations[ans_file]) elif choice == "N" or choice == "n": sys.exit elif choice == "Q" or choice == "q": sys.exit else: print("You must only select one option") print("Please try again") mainMenu()
# -*- coding: utf-8 -*- from modules.top_header import top_header from modules.load_config import load_config from datetime import datetime import calendar from dependecies import * config_file = load_config() def historic_generate(): operation_dir = load_config()[0]['OPERATIONAL_IN'] ## SET DEBUG DIR logging.basicConfig(filename=load_config()[0]['DEBUG_DIR'] + 'historical_debug.txt', filemode='a', format='\nProcess Date %(asctime)s \n %(message)s\n', datefmt='%d-%b-%y %H:%M:%S', level=os.environ.get("LOGLEVEL", "INFO")) top_header('Main > Preprocessing > Generate Historical') print('\t\tPlease select one stations to generate historical data: ') operational_stations = [ fn for fn in listdir(operation_dir) if not fn.startswith('.') ] if len(operational_stations) == 0: print('There is no data to be formatted') input('Press Enter to return')
def translate_historical(): operation_dir = load_config()[0]['HISTORICAL_OUT'] top_header('Main > Preprocessing > Translate Historical') print('\t\tPlease select one stations to translate historical data: ') historical_pats = [ fn for fn in listdir(operation_dir) if not fn.startswith('.') ] if len(historical_pats) == 0: print('There is no data to be formatted') input('Press Enter to return') ## SELECT STATION count = -1 for f in historical_pats: count = count + 1 print("\t\t [%s]" % count + f) while True: try: ans_file = int(input("\t\t Select Station: ")) except: print("\t\t Wrong selection") continue if ans_file > count: print("\t\t Wrong selection.") continue selected_st = operation_dir + historical_pats[ans_file] + '/' top_header('Main > Preprocessing > Translate Historical > ' + str(historical_pats[ans_file])) print('\t\tPlease select one year file: ') select_year = [ fn for fn in listdir(selected_st) if not fn.startswith('.') ] ## SELECT FILE count = -1 for f in select_year: count = count + 1 print("\t\t [%s]" % count + f) while True: try: ans_year = int(input("\t\t Select Station: ")) except: print("\t\t Wrong selection") continue if ans_year > count: print("\t\t Wrong selection.") continue selected_year = selected_st + select_year[ans_year] + '/' top_header('Main > Preprocessing > Translate Historical > ' + str(historical_pats[ans_file]) + ' > ' + select_year[ans_year]) print('\t\tPlease select one file to translate file: ') select_files = [ fn for fn in listdir(selected_year) if not fn.startswith('.') and '.dat' in fn ] ## FOR FILE count = -1 for f in select_files: count = count + 1 print("\t\t [%s]" % count + f) while True: try: ans_file_ = int(input("\t\t Select Station: ")) except: print("\t\t Wrong selection") continue if ans_file_ > count: print("\t\t Wrong selection.") continue selected_file = selected_year + select_files[ans_file_] open_file(selected_file, historical_pats[ans_file], select_year[ans_year], select_files[ans_file_]) break break break
# -*- coding: utf-8 -*- from modules.top_header import top_header from modules.load_config import load_config from pathlib import Path import numpy as np import pandas as pd import sys config = load_config() path_ = config[0]['AUTO_DATA'] def load_data_type(): top_header('Main Menu > Automatic Detection') print('\t\tPlease select an option: ') print('\t\tPath dir: ', path_) choice = input(""" 1: Detect Solarimetric Data 2: Detect Meteorological Data 3: Detect Anemometric Data 4: Detect Sky Camera Data 5: Set Data PATH Q: Quit Please enter your choice: """) if choice == "Detect Solarimetric Data" or choice == "1": detec_solar() elif choice == "Detect Meteorological Data" or choice == "2": detect_met() elif choice == "Detect Anemometric Data" or choice == "3": detect_ane()
def process_meteo(meteo, file): config = load_config() meteo['timestamp'] = pd.to_datetime( meteo.year, format='%Y') + pd.to_timedelta( meteo.day - 1, unit='d') + pd.to_timedelta(meteo['min'], unit='m') meteo = meteo.set_index('timestamp') ##for calc ws10_std meteo['ws10_std'] = meteo['ws10_avg'] ## Rename columns meteo.rename(columns={ 'day': 'jday', 'temp_sfc': 'tp_sfc', 'prec': 'rain' }, inplace=True) # Converstions conversion = { 'id': 'first', 'year': 'first', 'jday': 'first', 'min': 'first', 'tp_sfc': 'first', 'humid': 'first', 'press': 'first', 'rain': 'sum', 'ws10_avg': 'mean', 'ws10_std': 'std', 'wd10_avg': lambda x: arctan(x.values), 'wd10_std': lambda x: yamartino(x.values) } #Mask to not resample incorrect values Maska = meteo[(meteo != 3333.0) & (meteo != -5555) & (meteo != np.nan)] #Apply ressample based conversion Maska = Maska.resample('10min').agg(conversion) # frame.resample('1H').agg({'radiation': np.sum, 'tamb': np.mean}) ## Unmask values Unmask = meteo[(meteo == 3333.0)].resample('10min').first() Unmask2 = meteo[(meteo == -5555)].resample('10min').first() ## Combine values meteorological = Unmask.combine_first(Maska) meteorological = Unmask2.combine_first(meteorological) ## Reset index meteorological = meteorological.reset_index() # Realocate order of columns meteorological = meteorological.reindex(columns=[ 'id', 'timestamp', 'year', 'jday', 'min', 'tp_sfc', 'humid', 'press', 'rain', 'ws10_avg', 'ws10_std', 'wd10_avg', 'wd10_std' ]) ## Change type of columns meteorological[['id', 'year', 'jday', 'min']] = meteorological[['id', 'year', 'jday', 'min']].astype(int) ## Header check year_ = file.parent.name ## Extract month string month = (meteorological['timestamp'][0].strftime('%m')) stat_ = file.parent.parent.name output = config[0][ 'FORMATED_OUT'] + stat_ + '/Meteorologicos/' + year_ + '/' + stat_ + '_' + year_ + '_' + month + '_MD_formatado.csv' ### Create dir of output if not exist if not os.path.exists(os.path.dirname(output)): try: os.makedirs(os.path.dirname(output)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise ## Update Global MET_HEADER global MET_HEADER # For key in MET_UPDATE Check met1 = [] met2 = [] ##Change ID by name of station meteorological['id'] = stat_ # Create Multindex based in columns from header_log mux = pd.MultiIndex.from_tuples(MET_HEADER) # Fix multindex on dataframe meteorological.columns = mux #Aux out out_met = [] if MET_UPDATE != None: for k in MET_UPDATE: ## Check if Updat is the station if (k[0][0] == stat_): if len(meteorological.loc[ meteorological['timestamp'] >= k[1]]) > 0: for kk in k: # Update Global variable for idx, item in enumerate(MET_HEADER): if kk[0] in item[0]: MET_HEADER[idx] = kk # Separete files met1 = meteorological.loc[ meteorological['timestamp'] >= k[1]] # Create Multindex based in columns from header_log mux1 = pd.MultiIndex.from_tuples(MET_HEADER) # Fix multindex on dataframe met1.columns = mux1 ## Second file met2 = meteorological.loc[ meteorological['timestamp'] < k[1]] mux2 = pd.MultiIndex.from_tuples(aux) met2.columns = mux2 if len(met1) > len(met2): # Rename met2.columns = mux1 # Concat out_met = [met1, met2] out_met = pd.concat(out_met) # Sort out_met = out_met.sort_values(by=['timestamp']) else: #Rename met2.columns = mux2 # Concat out_met = [met2, met1] out_met = pd.concat(out_met) # Sort out_met = out_met.sort_values(by=['timestamp']) # If equals if (meteorological.equals(out_met)): # Clean screan and print first 20 print('Processing File -> ', file) print('\nSplit weather data!: ') ## Drop second line of multindex meteorological.columns = meteorological.columns.droplevel(1) print(meteorological) meteorological.to_csv(output, index=False) # If diference elif (len(out_met)) > 0: # Clean screan and print first 20 ## Save files print('\nSplit weather data!: ', output) ## Drop second line of multindex out_met.columns = out_met.columns.droplevel(1) print(out_met, '\n') out_met.to_csv(output, index=False) else: # Clean screan and print first 20 print('Processing File -> ', file) print('\nSplit weather data!: ', output) ## Drop second line of multindex meteorological.columns = meteorological.columns.droplevel(1) print(meteorological) meteorological.to_csv(output, index=False)
def process_solar(solar, file): config = load_config() ## Create Timestamp solar['timestamp'] = pd.to_datetime( solar.year, format='%Y') + pd.to_timedelta( solar.day - 1, unit='d') + pd.to_timedelta(solar['min'], unit='m') # Change position of timestamp cols = list(solar) cols.insert(1, cols.pop(cols.index('timestamp'))) solar = solar.loc[:, cols] ## Header check year_ = file.parent.name month = (solar['timestamp'][0].strftime('%m')) stat_ = file.parent.parent.name output = config[0][ 'FORMATED_OUT'] + stat_ + '/Solarimetricos/' + year_ + '/' + stat_ + '_' + year_ + '_' + month + '_SD_formatado.csv' ### Create dir of output if not exist if not os.path.exists(os.path.dirname(output)): try: os.makedirs(os.path.dirname(output)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise ## Update Global SOLAR_HEADER global SOL_UPDATE # For key in SOLAR_UPDATE Check sol1 = [] sol2 = [] ##Change ID by name of station solar['id'] = stat_ # Create Multindex based in columns from header_log mux = pd.MultiIndex.from_tuples(SOLAR_HEADER) # Fix multindex on dataframe solar.columns = mux # Aux out_sol = [] if SOL_UPDATE != None: for k in SOL_UPDATE: ## Check if Updat is the station if (k[0][0] == stat_): if len(solar.loc[solar['timestamp'] >= k[1]]) > 0: for kk in k: # Update Global variable for idx, item in enumerate(SOLAR_HEADER): if kk[0] in item[0]: SOLAR_HEADER[idx] = kk # Separete files sol1 = solar.loc[solar['timestamp'] >= k[1]] # Create Multindex based in columns from header_log mux1 = pd.MultiIndex.from_tuples(SOLAR_HEADER) # Fix multindex on dataframe sol1.columns = mux1 ## Second file sol2 = solar.loc[solar['timestamp'] < k[1]] mux2 = pd.MultiIndex.from_tuples(aux2) sol2.columns = mux2 if len(sol1) > len(sol2): # Rename sol2.columns = mux1 # Concat out_sol = [sol1, sol2] out_sol = pd.concat(out_sol) # Sort out_sol = out_sol.sort_values(by=['timestamp']) else: #Rename sol2.columns = mux2 # Concat out_sol = [sol2, sol1] out_sol = pd.concat(out_sol) # Sort out_sol = out_sol.sort_values(by=['timestamp']) if (solar.equals(out_sol)): # Clean screan and print first 20 print('Processing File -> ', file) print('\nSplit weather data!: ') # Drop second level of multindex solar.columns = solar.columns.droplevel(1) print(solar) solar.to_csv(output, index=False) elif (len(out_sol)) > 0: # Clean screan and print first 20 ## Save files print('\nSplit weather data!: ', output) # Drop second level of multindex out_sol.columns = out_sol.columns.droplevel(1) print(out_sol) out_sol.to_csv(output, index=False) else: # Clean screan and print first 20 print('Processing File -> ', file) print('\nSplit weather data!: ', output) # Drop second level of multindex solar.columns = solar.columns.droplevel(1) print(solar) solar.to_csv(output, index=False)