Example #1
0
def main_qualify():
	out_path = load_config()[0]['QUALI_OUT']
	web_path = load_config()[0]['WEB_OUT']
	files = []
	path_to_qualify = None
	f3 = {p.resolve() for p in Path(load_config()[0]['QUALI_IN']).rglob("**/*" ) if p.suffix in [EXTENSTION]}
	for f4 in f3:
		# Only meteorological
		if '_MD_' in str(f4):
			files.append(Path(f4))

	process_files(files,path_to_qualify,out_path,web_path)
Example #2
0
def read_dict(type_of_data):
    # Ler dicitionario
    dicionario = pd.read_excel(open(load_config()[0]['DICTIONARY'], 'rb'),sheet_name='Tabela-estacao')
    colunas_dicionario = pd.read_excel(open(load_config()[0]['DICTIONARY'], 'rb'),sheet_name='Cabeçalhos SONDA',header=None)
    
    if type_of_data == '_MD_':
        header_1 = colunas_dicionario.iloc[29:30]
        header_1 = header_1.iloc[0].dropna().values[1:]
        header_2 = colunas_dicionario.iloc[30:31]        
        header_2 = header_2.iloc[0].dropna().values[1:]
    else:
    	header_1 = None
    	header_2 = None
    	print('Implementação em andamento')
    	exit()
    return dicionario,header_1,header_2
Example #3
0
def menu_qualify():
	top_header('Qualify SONDA Data->')
	path_to_qualify = load_stations_03(load_config()[0]['QUALI_IN'])
	out_path = load_config()[0]['QUALI_OUT']
	web_path = load_config()[0]['WEB_OUT']

	if type(path_to_qualify) == str:
		files = {p.resolve() for p in Path(path_to_qualify).rglob("**/*" ) if p.suffix in [EXTENSTION]}
	else:
		files = []
		for f2 in path_to_qualify:
			f3 = {p.resolve() for p in Path(f2).rglob("**/*" ) if p.suffix in [EXTENSTION]}
			for f4 in f3:
				files.append(Path(f4))

	process_files(files,path_to_qualify,out_path,web_path)
Example #4
0
def open_file(select_file, station, year, file):

    met_header = load_config()[0]['MET_INPUT']
    solar_header = load_config()[0]['SOLAR_INPUT']

    ##OUTPUT HEADER
    met_out_header = load_config()[0]['MET_HEADER']
    sol_out_header = load_config()[0]['SOLAR_HEADER']

    top_header('Main > Preprocessing > Translate Historical > ' +
               str(station) + ' > ' + str(year) + ' > ' + str(file))
    print('\t\tPlease select one file to translate  file: ')

    if 'MD' in file:
        header_in = met_header
        header_out = met_out_header
    if 'SD' in file:
        header_in = solar_header
        header_out = sol_out_header
    if 'TD' in file:
        header_in = None
        header_out = None

    # print(header_in[1:])
    # print(header_out)

    df = pd.read_csv(select_file, sep=",")

    ## SELECT ONLY COLUMNS INPUT
    df = df[header_in[1:]]
    ## IGNORE MULTINDEX INTO HISTORICAL DATA
    df = df.iloc[1:]

    print(df)
    print('aqui')
    print(load_config()[0]['FORMATED_OUT'] + str(station) + '/' + str(year))
Example #5
0
def download_stations():
    top_header('Main Menu > Preprocessing Mode > Download Data')
    stations, ftp_con = connection()
    ### Station
    count = -1
    for f in stations:
        count = count + 1
        print("\t\t [%s]" % count + f)

    while True:
        try:
            ans_file = int(input("\t\t Select Station: "))
        except:
            print("\t\t Wrong selection")
            continue
        if ans_file > count:
            print("\t\t Wrong selection.")
            continue
        files_dir = load_config()[0]['FTP_DIR'] + stations[ans_file] + '/data/'
        download_files(files_dir, ftp_con, stations[ans_file])
        break
Example #6
0
def process_files(files,path_to_qualify,out_path,web_path):
	debug_dir = load_config()[0]['DEBUG_DIR']
	debug_di = str(debug_dir) + 'qualify_erros.txt'
	logging.basicConfig(filename=debug_di, filemode='a', format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S')
	# logger.setLevel(logging.INFO)
	# file = '/home/hneto/SONDA/SDT/output/dados_formatados/SBR/Meteorologicos/2016/SBR_2016_06_MD_formatado.csv'
	# file = '/home/hneto/SONDA/SDT/output/dados_formatados/SLZ/Meteorologicos/2017/SLZ_2017_01_MD_formatado.csv'
	count_files = 0
	size_files = len(files)
	for file in files:
		try:
			### READ DIRECTIONARY
			print('Processing-> ',file.stem[:-6]+ '  -> '+str(count_files)+'/'+str(size_files))
			print('Reading dictionary....!')

			## DETECT TYPE OF DATA
						## DETECT HEADER TYPE
			if '_MD_' in str(file):
				diction,header1,header2 = read_dict('_MD_')
			if '_SD_' in str(file):
				diction,header1,header2 = read_dict('_SD_')

			print('Loading file ->',file.stem)
			df = loadFile(file)
			print('Processing level 01!!')
			dqc,df = level_01(df,str(file),diction)
			print('Processing level 02!!')
			dqc = level_02(df,dqc,str(file))
			print('Processing level 03!!')
			dqc = level_03(df,dqc,str(file))
			print('Qualify done!...')
			print('Generating percentual file!')

			cols = []
			### SELECT COLUMNS
			for c in dqc.columns:
				if not 'std' in c[0]:
					cols.append(c)
			dqc = dqc[cols]
			percent_columns = dqc.iloc[:,5:].columns

			percent_cols = [cc[0] for cc in percent_columns]
			percent_cols.insert(0, "Dados")
			percentual_df = pd.DataFrame(columns=percent_cols)
			percentual_df['Dados'] = ['Suspeitos nível 1','Suspeitos nível 2','Suspeitos nível 3','Suspeitos nível 4','Válidos','Ausentes']
			percentual_df = percentual_df.fillna(0)

			#Perc Válido (2)
			VALID_STRING = "9999|0999|0099|0009"
			## AUSENTES
			NODATA = "3333"
			NOSENSOR = "-5555"
			## SUPECT LEVELS
			SUSPECT_LVL1 = "5552|0552|0052|0002"
			SUSPECT_LVL2 = "5529|0529|0029"
			SUSPECT_LVL3 = "5299|0299"
			SUSPECT_LVL4 = "2999"

			for pc in percent_columns:
				nosensor = df[pc[0]].astype(str).str.count(NOSENSOR).sum()
				if nosensor > 0:
					percentual_df[pc[0]] = ['N/S','N/S','N/S','N/S','N/S','N/S']
				else:
					valids = dqc[pc].str.count(VALID_STRING).sum()
					susplvl1 = dqc[pc].str.count(SUSPECT_LVL1).sum()
					susplvl2 = dqc[pc].str.count(SUSPECT_LVL2).sum()
					susplvl3 = dqc[pc].str.count(SUSPECT_LVL3).sum()
					susplvl4 = dqc[pc].str.count(SUSPECT_LVL4).sum()
					nodata = df[pc[0]].astype(str).str.count(NODATA).sum()
					total_ = valids + susplvl1 + susplvl2 + susplvl3 + susplvl4 + nodata
					## ADD IN PERCENTUAL FRAME
					percentual_df[pc[0]] = [susplvl1/total_,
											susplvl2/total_,
											susplvl3/total_,
											susplvl4/total_,
											valids/total_,
											nodata/total_]
			## WEB FILE
			web_df = pd.DataFrame()
			station = df.acronym.unique()[0]
			stationID = diction.loc[diction['Sigla'] == station]
			siglaNAME = stationID['Sigla'].values[0]
			nomeNAME = stationID['Nome'].values[0]
			redeNAME = stationID['Rede'].values[0]
			latNAME = stationID['Latitude'].values[0]
			lonNAME = stationID['Longitude'].values[0]
			altNAME = stationID['Altitude'].values[0]

			first_row_header = [siglaNAME,nomeNAME,'lat:'+str(latNAME),'lon:'+str(lonNAME),'alt:'+str(altNAME)+'m',redeNAME+' Network','http://sonda.ccst.inpe.br','*****@*****.**']

			### INCREMENT columns into dqc frame
			dqc['ws10_std','dqc_v1'] = '0000'
			dqc['wd10_std','dqc_v1'] = '0000'

			dqc_cols = []
			dqc_mult_column = []
			dqc_columns = dqc.columns.values
			dqc_columns = [dqc_cols.append(c[0]) for c in dqc_columns]
			for cc in range(len(dqc_cols)):
				if cc > 4:
					dqc_cols[cc] = dqc_cols[cc]+'_dqc'
					dqc_mult_column.append((dqc_cols[cc],'dqc_v1'))
				else:
					dqc_mult_column.append((dqc_cols[cc],''))
			mux = pd.MultiIndex.from_tuples(dqc_mult_column)

			dqc.columns = dqc_cols
			dqc_for_concat = dqc[dqc_cols[5:]]
			dqc.columns = mux

			# ### WEB FILE
			web_df = pd.concat([df,dqc_for_concat],axis=1)
			web_df = web_df[header1.tolist()]

			## MOUNT NEW MULTINDEX
			header2 = header2.tolist()
			header2.insert(0,'')
			header2.insert(0,'')
			header2.insert(0,'')
			header2.insert(0,'')
			header2.insert(0,'')

			new_mux = []
			for cc in range(len(web_df.columns)):
				if cc < len(first_row_header):
					new_mux.append((first_row_header[cc],web_df.columns[cc],header2[cc]))
				else:
					new_mux.append(('',web_df.columns[cc],header2[cc]))

			mux = pd.MultiIndex.from_tuples(new_mux)
			# ## FINALIZE WEB DF
			web_df.columns = mux

			### CONVERT VALUES TO STRING
			all_columns = list(web_df) # Creates list of all column headers
			web_df[all_columns] = web_df[all_columns].astype(str)

			### - Alterar 3333 = N/A antes de salvar o arquivo
			### - Alterar -5555 = N/S antes de salvar o arquivo (linha 170 arquivo dqc.py)

			web_df = web_df.replace('3333.0', 'N/A')
			web_df = web_df.replace('-5555', 'N/S')

			print(web_df)

			# input()

			print('Saving file...')
			### SAVE FILES
			if type(path_to_qualify) == str:
				mount_out = Path(path_to_qualify).parts[2:]
			else:
				mount_out = Path(file).parts[-4:][:-1]
			mount_out = '/'.join([str(elem) for elem in mount_out])
			### CREATE PAATH IF NOT EXIST
			Path(out_path+mount_out+'/').mkdir(parents=True, exist_ok=True)
			output_file = out_path+mount_out+'/'+file.stem[:-9]+'DQC'+file.suffix
			## SALVANDO
			dqc.to_csv(output_file,index=False)

			### WEB FILES
			web_csv = web_path+mount_out
			Path(web_path+mount_out+'/').mkdir(parents=True, exist_ok=True)
			output_web = web_path+mount_out+'/'+file.stem[:-10]+file.suffix
			percentual_out = web_path+mount_out+'/'+file.stem[:-9]+'percentuais'+file.suffix
			### SAVING
			web_df.to_csv(output_web,index=False)
			percentual_df.to_csv(percentual_out,index=False)
			print('Files has been saved into->',output_file,'\nWEB->',output_web)
			count_files += 1
		except:
			print('Error to qualify file: '+str(file))
			logging.warning('Error to qualify file: '+str(file)+'')
			count_files += 1
Example #7
0
def historic_generate():

    operation_dir = load_config()[0]['OPERATIONAL_IN']

    ## SET DEBUG DIR
    logging.basicConfig(filename=load_config()[0]['DEBUG_DIR'] +
                        'historical_debug.txt',
                        filemode='a',
                        format='\nProcess Date %(asctime)s \n %(message)s\n',
                        datefmt='%d-%b-%y %H:%M:%S',
                        level=os.environ.get("LOGLEVEL", "INFO"))

    top_header('Main > Preprocessing > Generate Historical')
    print('\t\tPlease select one stations to generate historical data: ')

    operational_stations = [
        fn for fn in listdir(operation_dir) if not fn.startswith('.')
    ]
    if len(operational_stations) == 0:
        print('There is no data to be formatted')
        input('Press Enter to return')
        # pre_processing_menu()
        # return None

    count = -1
    for f in operational_stations:
        count = count + 1
        print("\t\t [%s]" % count + f)

    while True:
        try:
            ans_file = int(input("\t\t Select Station: "))
        except:
            print("\t\t Wrong selection")
            continue
        if ans_file > count:
            print("\t\t Wrong selection.")
            continue

        selected_st = operation_dir + operational_stations[ans_file] + '/'
        break

    ## SELECT TYPE OF DATA
    top_header('Main > Preprocessing > Generate Historical > ' +
               str(operational_stations[ans_file]).upper())
    print('\t\tPlease select type of data to generate historical data: ')

    dataTypes = ['MD', 'SD', 'TD', '50', '25', '10']

    countT = -1
    for f in dataTypes:
        countT = countT + 1
        print("\t\t [%s]" % countT + f)

    while True:
        try:
            ans_type = int(input("\t\t Select Station: "))
        except:
            print("\t\t Wrong selection")
            continue
        if ans_type > countT:
            print("\t\t Wrong selection.")
            continue

        selected_file = operational_stations[ans_file].upper() + '_' + str(
            dataTypes[ans_type]) + '.DAT'
        break

    ### DATA TYPES
    if operational_stations[ans_file] == 'sms':

        ## OPEN DATA
        df = pd.read_csv(selected_st + selected_file,
                         sep=",",
                         header=None,
                         skiprows=4,
                         skipinitialspace=False)
        df1 = df.copy()
        head0 = pd.read_csv(selected_st + selected_file,
                            sep=",",
                            header=None,
                            nrows=1)
        head1 = pd.read_csv(selected_st + selected_file,
                            sep=",",
                            header=None,
                            skiprows=1,
                            nrows=1)
        head2 = pd.read_csv(selected_st + selected_file,
                            sep=",",
                            header=None,
                            skiprows=3,
                            nrows=1)

        head0 = head0.iloc[0].values
        head1 = head1.iloc[0].values
        head2 = head2.iloc[0].values

        df1[0] = pd.to_datetime(df1[0], format='%Y-%m-%d %H:%M:%S')

        ## SELECT TIMESTAMP TO PROCESSE
        top_header('Main > Preprocessing > Generate Historical > ' +
                   str(operational_stations[ans_file]).upper() + ' > ' +
                   str(dataTypes[ans_type]))
        print('\t\tPlease select type of data to generate historical data: ')

        ## AVAIBLE YEARS
        years = df1[0].dt.year.unique()
        countY = -1
        for f in years:
            countY = countY + 1
            print("\t\t [%s]" % countY + str(f))
        while True:
            try:
                ans_year = int(input("\t\t Select Year: "))
            except:
                print("\t\t Wrong selection")
                continue
            if ans_year > countY:
                print("\t\t Wrong selection.")
                continue

            selected_year = years[ans_year]
            break

        df1 = df1.set_index(0)

        months = df1.loc[str(selected_year)]
        months = months.reset_index()
        months = months[0].dt.strftime('%m').unique()

        top_header('Main > Preprocessing > Generate Historical > ' +
                   str(operational_stations[ans_file]).upper() + ' > ' +
                   str(dataTypes[ans_type]) + ' > ' + str(selected_year))
        print('\t\tPlease select type of data to generate historical data: ')

        countM = -1
        for f in months:
            countM = countM + 1
            print("\t\t [%s]" % countM + str(f))
        while True:
            try:
                ans_month = int(input("\t\t Select Month: "))
            except:
                print("\t\t Wrong selection")
                continue
            if ans_month > countM:
                print("\t\t Wrong selection.")
                continue

            selected_month = months[ans_month]
            break

        top_header('Main > Preprocessing > Generate Historical > ' +
                   str(operational_stations[ans_file]).upper() + ' > ' +
                   str(dataTypes[ans_type]) + ' > ' + str(selected_year) +
                   ' > ' + str(selected_month))

        #SELECTED TO GENERATE
        df1 = df1.loc[str(selected_year) + '-' + str(selected_month)]

        ## TIME INTERVAL VERIFICATION
        df1 = df1.sort_index(ascending=True)

        # GET TIMES STRING
        max_time = df1.index.max()
        min_time = df1.index.min()

        ## FIND INDEX INTO ORIGINAL DATAFRAME
        idx_min = df.loc[df[0] == str(min_time)].index.values[0]
        idx_max = df.loc[df[0] == str(max_time)].index.values[0]

        ## LOC BETWEEN IDX
        locked_df_chk = df.loc[idx_min:idx_max]

        ## FINAL DF TO COMPARE
        final_df = locked_df_chk.copy()

        ## MOUNT INDEX OF DATES ACORDING TYPE OF DATA
        if dataTypes[ans_type] == 'MD' or dataTypes[
                ans_type] == '10' or dataTypes[ans_type] == '25' or dataTypes[
                    ans_type] == '50':
            freqc = '10min'
        if dataTypes[ans_type] == 'SD' or dataTypes[ans_type] == 'TD':
            freqc = '1min'
        ## PASS COLUMN TO DATETIME
        locked_df_chk[0] = pd.to_datetime(locked_df_chk[0],
                                          format='%Y-%m-%d %H:%M:%S')

        ## MULTIINDEX
        mux = []
        for i in range(len(head1)):
            mux.append([str(head1[i]).lower(), str(head2[i]).lower()])
        mux = pd.MultiIndex.from_tuples(mux)

        ## GET ID OF STATION
        id_st = locked_df_chk[2].values[0]

        ## DETECT non-existent SENSOR
        non_sens_col = []
        object_type_c = locked_df_chk.select_dtypes(include=['object'])
        for c in object_type_c:
            detc_mean = object_type_c[c].astype(float).mean()
            if str(detc_mean) == 'nan':
                non_sens_col.append(c)

        ## DESAPROVE WITH TIME INTERVAL
        ## DETECT TYPE OF FILE
        if dataTypes[ans_type] == 'MD' or dataTypes[
                ans_type] == '10' or dataTypes[ans_type] == '25' or dataTypes[
                    ans_type] == '50':
            t_delta = pd.Timedelta(minutes=10)
        if dataTypes[ans_type] == 'SD' or dataTypes[ans_type] == 'TD':
            t_delta = pd.Timedelta(minutes=1)

        ## Generate all month days to compare
        year_month = locked_df_chk[0].dt.strftime('%Y-%m').values[0]
        # print(pd.Timestamp(year_month) + pd.offsets.MonthEnd(1) + pd.Timedelta(hours=24) - t_delta)
        month_generated = pd.date_range(
            start=pd.Timestamp(year_month),
            end=pd.Timestamp(year_month) + pd.offsets.MonthEnd(1) +
            pd.Timedelta(hours=24) - t_delta,  # <-- 2018-08-31 with MonthEnd
            freq=freqc)

        ## CHECK DUPLICAT IN TIMESTAMP COLUMN
        times_dup = locked_df_chk[locked_df_chk.duplicated([0], keep=False)]
        group_tdup = times_dup.groupby(0)

        ## RESOLVE CHOKE TIME STAMP
        idx_first = []
        for g, gdftum in group_tdup:
            # print(gdftum)
            idx_groups = gdftum.index
            for idxgg in range(len(idx_groups)):
                pass_idx = idx_groups[idxgg] - 1
                if dataTypes[ans_type] == 'MD' or dataTypes[
                        ans_type] == '10' or dataTypes[
                            ans_type] == '25' or dataTypes[ans_type] == '50':
                    t_delta = pd.Timedelta(minutes=10)
                    locked_df_chk.loc[
                        idx_groups[idxgg],
                        0] = locked_df_chk.loc[pass_idx][0] + t_delta
                if dataTypes[ans_type] == 'SD' or dataTypes[ans_type] == 'TD':
                    t_delta = pd.Timedelta(minutes=1)
                    locked_df_chk.loc[
                        idx_groups[idxgg],
                        0] = locked_df_chk.loc[pass_idx][0] + t_delta

        ## CHECK DUPLICAT IN TIMESTAMP COLUMN
        locked_df_chk = locked_df_chk.drop_duplicates(subset=0)

        ## SET INDEX COLUMN OF TIME STAMP
        locked_df_chk = locked_df_chk.set_index(0)

        ## CHEACK DUPLICATED IN ALL COLUMNS
        locked_df_chk = locked_df_chk.drop_duplicates(keep='first')

        # FILL
        locked_df_chk = locked_df_chk.reindex(month_generated, fill_value=0)

        ## DESAPROVE WITH TIME INTERVAL
        check_t_interval = locked_df_chk
        check_t_interval = check_t_interval[min_time:max_time]
        # ## LIMITE TIME DETAL
        lim_delta = pd.Timedelta(minutes=50)

        totalDelta = t_delta
        for i, row in check_t_interval.iterrows():
            if np.all(row[6:-1].values == 0):
                totalDelta = totalDelta + t_delta
            else:
                last_ro = i
                totalDelta = pd.Timedelta(minutes=0)
            if totalDelta >= lim_delta:
                print(
                    'Failed to generate file due to a longer time sequence of failures greater than ',
                    lim_delta, '\n')
                print('')
                fail = check_t_interval[last_ro:i + t_delta]
                # fail[6:-1] = 3333
                fail = fail.reset_index()
                fail.columns = mux
                print(fail)
                return None

        ## ADD NON-EXISTENT VALUES
        locked_df_chk[non_sens_col] = 5555
        idx_values = locked_df_chk.loc[locked_df_chk[2] == 0].index

        ## ADD YEAR
        locked_df_chk.loc[idx_values,
                          3] = locked_df_chk.loc[idx_values].index.strftime(
                              '%Y').values
        ## ADD JULIAN DAY
        locked_df_chk.loc[idx_values,
                          4] = locked_df_chk.loc[idx_values].index.strftime(
                              '%j').values
        ## ADD MINUTE
        for hm in idx_values:
            s1 = hm.strftime('%d/%m/%Y 00:00')
            s2 = hm.strftime('%d/%m/%Y %H:%M')

            s1 = datetime.strptime(s1, '%d/%m/%Y %H:%M')
            s2 = datetime.strptime(s2, '%d/%m/%Y %H:%M')

            differenc = s2 - s1
            minutes = divmod(differenc.seconds, 60)
            locked_df_chk.loc[hm, 5] = minutes[0]

        locked_df_chk[2] = id_st

        ## DATETIME COLUMNS + NON_SENSOR
        dt_non_se = []
        dt_non_se.append(2)
        dt_non_se.append(3)
        dt_non_se.append(4)
        dt_non_se = dt_non_se + non_sens_col

        ### ADD VALUES 3333,
        diff_columns = np.setdiff1d(locked_df_chk.columns.values, dt_non_se)
        locked_df_chk.loc[idx_values, diff_columns] = 3333

        # print(locked_df_chk['2020-09-09 13:30':'2020-09-09 17:20'].head(50))

        ## SAVE
        if len(locked_df_chk) > 0:
            ## SAVE PROCESS
            output_dir = load_config()[0]['HISTORICAL_OUT'] + str(
                operational_stations[ans_file]).upper() + '/' + str(
                    selected_year) + '/'
            output_file_name = str(operational_stations[ans_file]).upper(
            ) + '_' + str(selected_year) + '_' + str(
                locked_df_chk.index[0].strftime('%j')) + '_a_' + str(
                    locked_df_chk.index[-1].strftime('%j')) + '_' + str(
                        dataTypes[ans_type]) + '.dat'

            ## COUNT VALUES
            lost_counter = 0
            nosen_counter = 0
            for i, row in locked_df_chk.iterrows():
                ## COUNTER FOR LOST FILES
                if 3333 in row.values[:]:
                    lost_counter = lost_counter
                    lost_counter += 1
                ## COUNTER FOR LOST FILES
                if 5555 in row.values[:]:
                    nosen_counter = nosen_counter
                    nosen_counter += 1

            ## RESET INDEX
            locked_df_chk = locked_df_chk.reset_index()

            # ADD MULTIINDEX
            locked_df_chk.columns = mux

            # CREATE DIR
            pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
            ## CREATE VERSION DIRS
            pathlib.Path(output_dir + '/versions').mkdir(parents=True,
                                                         exist_ok=True)

            file__ = output_dir + output_file_name
            # ## CHEACK IF FILES EXISTS
            if os.path.isfile(file__):
                warningmsg = ('\nSTATION-> ' + str(output_file_name[:-4]) +
                              '  \nLOST 3333 ROWS:-> ' + str(lost_counter) +
                              '\nNO SENSOR 5555 ROWS:\n' + str(nosen_counter) +
                              '\n')
                logging.warning(warningmsg)
                # print(warningmsg)
                # print(locked_df_chk.head())

                ## CHECK LAST VERSION
                if len(os.listdir(output_dir + '/versions')) == 0:
                    shutil.move(
                        file__,
                        output_dir + '/versions/' + output_file_name + '.v01')
                    ## CREATE FILE
                    locked_df_chk.to_csv(file__, index=False)

                    warningmsg = ('\nSTATION-> ' + str(output_file_name[:-4]) +
                                  ' File version(0)  \nLOST 3333 ROWS:-> ' +
                                  str(lost_counter) +
                                  '\nNO SENSOR 5555 ROWS:\n' +
                                  str(nosen_counter) + '\n')
                    logging.warning(warningmsg)

                else:
                    versions = [
                        fn for fn in listdir(output_dir + '/versions/')
                        if not fn.startswith('.')
                    ]
                    shutil.move(
                        file__, output_dir + '/versions/' + output_file_name +
                        'v0' + str((int(versions[-1][-2:]) + 1)))

                    warningmsg = ('\nSTATION-> ' + str(output_file_name[:-4]) +
                                  ' File version(' +
                                  str(int(versions[-1][-2:]) + 1) +
                                  ')  \nLOST 3333 ROWS:-> ' +
                                  str(lost_counter) +
                                  '\nNO SENSOR 5555 ROWS:\n' +
                                  str(nosen_counter) + '\n')
                    logging.warning(warningmsg)
                    print(warningmsg)
                    ## CREATE FILE
                    locked_df_chk.to_csv(file__, index=False)
                    print(locked_df_chk)

            else:
                warningmsg = ('\nSTATION-> ' + str(output_file_name[:-4]) +
                              ' File version(0)   \nLOST 3333 ROWS:-> ' +
                              str(lost_counter) + '\nNO SENSOR 5555 ROWS:\n' +
                              str(nosen_counter) + '\n')
                logging.warning(warningmsg)
                ## CREATE FILE
                print(warningmsg)
                print(locked_df_chk)
                locked_df_chk.to_csv(file__, index=False)

            ## UPLOAD RESULTS
            ver_file_names = [
                fn for fn in listdir(output_dir + str('/versions'))
                if not fn.startswith('.')
            ]
            if len(ver_file_names) > 0:
                last_file_version = sorted(ver_file_names)[-1]
            else:
                last_file_version = None
            file_to_upload = file__

            print('\t\tUpload files to FTP: ')
            choice = input("""
                          (Y) - Yes
                          (N) - No
                          Please enter your choice: """)

            if choice == "Y" or choice == "y":
                connection(file_to_upload, operational_stations[ans_file],
                           selected_year, output_file_name, last_file_version,
                           operational_stations[ans_file])
            elif choice == "N" or choice == "n":
                sys.exit
            elif choice == "Q" or choice == "q":
                sys.exit
            else:
                print("You must only select one option")
                print("Please try again")
                mainMenu()
Example #8
0
# -*- coding: utf-8 -*-
from modules.top_header import top_header
from modules.load_config import load_config
from datetime import datetime
import calendar
from dependecies import *

config_file = load_config()


def historic_generate():

    operation_dir = load_config()[0]['OPERATIONAL_IN']

    ## SET DEBUG DIR
    logging.basicConfig(filename=load_config()[0]['DEBUG_DIR'] +
                        'historical_debug.txt',
                        filemode='a',
                        format='\nProcess Date %(asctime)s \n %(message)s\n',
                        datefmt='%d-%b-%y %H:%M:%S',
                        level=os.environ.get("LOGLEVEL", "INFO"))

    top_header('Main > Preprocessing > Generate Historical')
    print('\t\tPlease select one stations to generate historical data: ')

    operational_stations = [
        fn for fn in listdir(operation_dir) if not fn.startswith('.')
    ]
    if len(operational_stations) == 0:
        print('There is no data to be formatted')
        input('Press Enter to return')
Example #9
0
def translate_historical():

    operation_dir = load_config()[0]['HISTORICAL_OUT']

    top_header('Main > Preprocessing > Translate Historical')
    print('\t\tPlease select one stations to translate historical data: ')

    historical_pats = [
        fn for fn in listdir(operation_dir) if not fn.startswith('.')
    ]

    if len(historical_pats) == 0:
        print('There is no data to be formatted')
        input('Press Enter to return')
    ## SELECT STATION
    count = -1
    for f in historical_pats:
        count = count + 1
        print("\t\t [%s]" % count + f)

    while True:
        try:
            ans_file = int(input("\t\t Select Station: "))
        except:
            print("\t\t Wrong selection")
            continue
        if ans_file > count:
            print("\t\t Wrong selection.")
            continue

        selected_st = operation_dir + historical_pats[ans_file] + '/'
        top_header('Main > Preprocessing > Translate Historical > ' +
                   str(historical_pats[ans_file]))
        print('\t\tPlease select one year file: ')

        select_year = [
            fn for fn in listdir(selected_st) if not fn.startswith('.')
        ]

        ## SELECT FILE
        count = -1
        for f in select_year:
            count = count + 1
            print("\t\t [%s]" % count + f)

        while True:
            try:
                ans_year = int(input("\t\t Select Station: "))
            except:
                print("\t\t Wrong selection")
                continue
            if ans_year > count:
                print("\t\t Wrong selection.")
                continue

            selected_year = selected_st + select_year[ans_year] + '/'
            top_header('Main > Preprocessing > Translate Historical > ' +
                       str(historical_pats[ans_file]) + ' > ' +
                       select_year[ans_year])
            print('\t\tPlease select one file to translate  file: ')

            select_files = [
                fn for fn in listdir(selected_year)
                if not fn.startswith('.') and '.dat' in fn
            ]

            ## FOR FILE
            count = -1
            for f in select_files:
                count = count + 1
                print("\t\t [%s]" % count + f)

            while True:
                try:
                    ans_file_ = int(input("\t\t Select Station: "))
                except:
                    print("\t\t Wrong selection")
                    continue
                if ans_file_ > count:
                    print("\t\t Wrong selection.")
                    continue

                selected_file = selected_year + select_files[ans_file_]
                open_file(selected_file, historical_pats[ans_file],
                          select_year[ans_year], select_files[ans_file_])
                break
            break
        break
Example #10
0
# -*- coding: utf-8 -*-
from modules.top_header import top_header
from modules.load_config import load_config
from pathlib import Path
import numpy as np
import pandas as pd
import sys

config = load_config()
path_ = config[0]['AUTO_DATA']


def load_data_type():
    top_header('Main Menu > Automatic Detection')
    print('\t\tPlease select an option: ')
    print('\t\tPath dir: ', path_)
    choice = input("""
                  1: Detect Solarimetric Data
                  2: Detect Meteorological Data
                  3: Detect Anemometric Data
                  4: Detect Sky Camera Data
                  5: Set Data PATH
                  Q: Quit
                  Please enter your choice: """)

    if choice == "Detect Solarimetric Data" or choice == "1":
        detec_solar()
    elif choice == "Detect Meteorological Data" or choice == "2":
        detect_met()
    elif choice == "Detect Anemometric Data" or choice == "3":
        detect_ane()
Example #11
0
def process_meteo(meteo, file):
    config = load_config()
    meteo['timestamp'] = pd.to_datetime(
        meteo.year, format='%Y') + pd.to_timedelta(
            meteo.day - 1, unit='d') + pd.to_timedelta(meteo['min'], unit='m')
    meteo = meteo.set_index('timestamp')

    ##for calc ws10_std
    meteo['ws10_std'] = meteo['ws10_avg']

    ## Rename columns
    meteo.rename(columns={
        'day': 'jday',
        'temp_sfc': 'tp_sfc',
        'prec': 'rain'
    },
                 inplace=True)

    # Converstions
    conversion = {
        'id': 'first',
        'year': 'first',
        'jday': 'first',
        'min': 'first',
        'tp_sfc': 'first',
        'humid': 'first',
        'press': 'first',
        'rain': 'sum',
        'ws10_avg': 'mean',
        'ws10_std': 'std',
        'wd10_avg': lambda x: arctan(x.values),
        'wd10_std': lambda x: yamartino(x.values)
    }

    #Mask to not resample incorrect values
    Maska = meteo[(meteo != 3333.0) & (meteo != -5555) & (meteo != np.nan)]

    #Apply ressample based conversion
    Maska = Maska.resample('10min').agg(conversion)

    # frame.resample('1H').agg({'radiation': np.sum, 'tamb': np.mean})

    ## Unmask values
    Unmask = meteo[(meteo == 3333.0)].resample('10min').first()
    Unmask2 = meteo[(meteo == -5555)].resample('10min').first()

    ## Combine values
    meteorological = Unmask.combine_first(Maska)
    meteorological = Unmask2.combine_first(meteorological)

    ## Reset index
    meteorological = meteorological.reset_index()
    # Realocate order of columns
    meteorological = meteorological.reindex(columns=[
        'id', 'timestamp', 'year', 'jday', 'min', 'tp_sfc', 'humid', 'press',
        'rain', 'ws10_avg', 'ws10_std', 'wd10_avg', 'wd10_std'
    ])

    ## Change type of columns
    meteorological[['id', 'year', 'jday',
                    'min']] = meteorological[['id', 'year', 'jday',
                                              'min']].astype(int)

    ## Header check
    year_ = file.parent.name
    ## Extract month string
    month = (meteorological['timestamp'][0].strftime('%m'))
    stat_ = file.parent.parent.name
    output = config[0][
        'FORMATED_OUT'] + stat_ + '/Meteorologicos/' + year_ + '/' + stat_ + '_' + year_ + '_' + month + '_MD_formatado.csv'

    ### Create dir of output if not exist
    if not os.path.exists(os.path.dirname(output)):
        try:
            os.makedirs(os.path.dirname(output))
        except OSError as exc:  # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise

    ## Update Global MET_HEADER
    global MET_HEADER
    # For key in MET_UPDATE Check
    met1 = []
    met2 = []

    ##Change ID by name of station
    meteorological['id'] = stat_

    # Create Multindex based in columns from header_log
    mux = pd.MultiIndex.from_tuples(MET_HEADER)
    # Fix multindex on dataframe
    meteorological.columns = mux

    #Aux out
    out_met = []

    if MET_UPDATE != None:
        for k in MET_UPDATE:
            ## Check if Updat is the station
            if (k[0][0] == stat_):
                if len(meteorological.loc[
                        meteorological['timestamp'] >= k[1]]) > 0:
                    for kk in k:
                        # Update Global variable
                        for idx, item in enumerate(MET_HEADER):
                            if kk[0] in item[0]:
                                MET_HEADER[idx] = kk

                    # Separete files
                    met1 = meteorological.loc[
                        meteorological['timestamp'] >= k[1]]
                    # Create Multindex based in columns from header_log
                    mux1 = pd.MultiIndex.from_tuples(MET_HEADER)
                    # Fix multindex on dataframe
                    met1.columns = mux1

                    ## Second file
                    met2 = meteorological.loc[
                        meteorological['timestamp'] < k[1]]
                    mux2 = pd.MultiIndex.from_tuples(aux)
                    met2.columns = mux2

                    if len(met1) > len(met2):
                        # Rename
                        met2.columns = mux1
                        # Concat
                        out_met = [met1, met2]
                        out_met = pd.concat(out_met)
                        # Sort
                        out_met = out_met.sort_values(by=['timestamp'])
                    else:
                        #Rename
                        met2.columns = mux2
                        # Concat
                        out_met = [met2, met1]
                        out_met = pd.concat(out_met)
                        # Sort
                        out_met = out_met.sort_values(by=['timestamp'])

    # If equals
    if (meteorological.equals(out_met)):
        # Clean screan and print first 20
        print('Processing File -> ', file)
        print('\nSplit weather data!: ')
        ## Drop second line of multindex
        meteorological.columns = meteorological.columns.droplevel(1)
        print(meteorological)
        meteorological.to_csv(output, index=False)

    # If diference
    elif (len(out_met)) > 0:
        # Clean screan and print first 20
        ## Save files
        print('\nSplit weather data!: ', output)
        ## Drop second line of multindex
        out_met.columns = out_met.columns.droplevel(1)
        print(out_met, '\n')

        out_met.to_csv(output, index=False)
    else:
        # Clean screan and print first 20
        print('Processing File -> ', file)
        print('\nSplit weather data!: ', output)
        ## Drop second line of multindex
        meteorological.columns = meteorological.columns.droplevel(1)
        print(meteorological)

        meteorological.to_csv(output, index=False)
Example #12
0
def process_solar(solar, file):

    config = load_config()
    ## Create Timestamp
    solar['timestamp'] = pd.to_datetime(
        solar.year, format='%Y') + pd.to_timedelta(
            solar.day - 1, unit='d') + pd.to_timedelta(solar['min'], unit='m')

    # Change position of timestamp
    cols = list(solar)
    cols.insert(1, cols.pop(cols.index('timestamp')))
    solar = solar.loc[:, cols]

    ## Header check
    year_ = file.parent.name
    month = (solar['timestamp'][0].strftime('%m'))
    stat_ = file.parent.parent.name
    output = config[0][
        'FORMATED_OUT'] + stat_ + '/Solarimetricos/' + year_ + '/' + stat_ + '_' + year_ + '_' + month + '_SD_formatado.csv'

    ### Create dir of output if not exist
    if not os.path.exists(os.path.dirname(output)):
        try:
            os.makedirs(os.path.dirname(output))
        except OSError as exc:  # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise

    ## Update Global SOLAR_HEADER
    global SOL_UPDATE
    # For key in SOLAR_UPDATE Check
    sol1 = []
    sol2 = []

    ##Change ID by name of station
    solar['id'] = stat_

    # Create Multindex based in columns from header_log
    mux = pd.MultiIndex.from_tuples(SOLAR_HEADER)
    # Fix multindex on dataframe
    solar.columns = mux

    # Aux
    out_sol = []

    if SOL_UPDATE != None:
        for k in SOL_UPDATE:
            ## Check if Updat is the station
            if (k[0][0] == stat_):
                if len(solar.loc[solar['timestamp'] >= k[1]]) > 0:
                    for kk in k:
                        # Update Global variable
                        for idx, item in enumerate(SOLAR_HEADER):
                            if kk[0] in item[0]:
                                SOLAR_HEADER[idx] = kk

                    # Separete files
                    sol1 = solar.loc[solar['timestamp'] >= k[1]]
                    # Create Multindex based in columns from header_log
                    mux1 = pd.MultiIndex.from_tuples(SOLAR_HEADER)
                    # Fix multindex on dataframe
                    sol1.columns = mux1

                    ## Second file
                    sol2 = solar.loc[solar['timestamp'] < k[1]]
                    mux2 = pd.MultiIndex.from_tuples(aux2)
                    sol2.columns = mux2

                    if len(sol1) > len(sol2):
                        # Rename
                        sol2.columns = mux1
                        # Concat
                        out_sol = [sol1, sol2]
                        out_sol = pd.concat(out_sol)
                        # Sort
                        out_sol = out_sol.sort_values(by=['timestamp'])
                    else:
                        #Rename
                        sol2.columns = mux2
                        # Concat
                        out_sol = [sol2, sol1]
                        out_sol = pd.concat(out_sol)
                        # Sort
                        out_sol = out_sol.sort_values(by=['timestamp'])

    if (solar.equals(out_sol)):
        # Clean screan and print first 20
        print('Processing File -> ', file)
        print('\nSplit weather data!: ')
        # Drop second level of multindex
        solar.columns = solar.columns.droplevel(1)
        print(solar)
        solar.to_csv(output, index=False)
    elif (len(out_sol)) > 0:
        # Clean screan and print first 20
        ## Save files
        print('\nSplit weather data!: ', output)
        # Drop second level of multindex
        out_sol.columns = out_sol.columns.droplevel(1)
        print(out_sol)
        out_sol.to_csv(output, index=False)
    else:
        # Clean screan and print first 20
        print('Processing File -> ', file)
        print('\nSplit weather data!: ', output)
        # Drop second level of multindex
        solar.columns = solar.columns.droplevel(1)
        print(solar)
        solar.to_csv(output, index=False)