def forecast_month(month, year, dirData, dirTotalCsv, dirTrain,estacion, contaminant): lastDay = calendar.monthrange(year,month)[1] fechaInicio = str(year) + '-' + numString(month) + '-01 00:00:00' fechaFinal = str(year) + '-' + numString(month) + '-'+ numString(lastDay) +' 23:00:00' #print(fechaInicio) #print(fechaFinal) data = fd.readData(fechaInicio, fechaFinal, [estacion], contaminant) data = separateDate(data) data = unionMeteorologia(data,dirTotalCsv) data = data.fillna(value=-1) #print(data) #sys.out frame_dates = data['fecha'].values data = filterData(data, dirData + estacion + "_" + contaminant + ".csv") data = data.fillna(value=-1) index = data.index.values arrayPred = [] for x in index: pred = data.ix[x].values valPred= pred[1:] valNorm = pre.normalize(valPred,estacion, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(estacion, contaminant, arrayPred, dirTrain, dirData) nameCont = findTable2(contaminant) real = pre.desNorm(result, estacion,contaminant, dirData, nameCont + '_') for xs in range(len(frame_dates)): fecha = frame_dates[xs] ts = df.to_datetime(str(fecha)) fecha_string = ts.strftime('%Y-%m-%d %H:%M:%S') pronostico = real[xs] guardarPrediccion(estacion, fecha_string,[pronostico],contaminant,4)
def saveData2(listEstations, startDate, nameContaminant, endDate, dirr, dirTotalCsv, contaminant): """ Function for the save data in the type file .csv :param listEstations: list with stations :type listEstations: String list :param startDate: start date :type startDate: date :param nameContaminant: name of the pollutant in the database :type nameContaminant: String :param endDate: end date :type endDate: date :param dirr: direction of save data :type dirr: String :param dirTotalCsv: address of the cvs files :type dirTotalCsv: String :param contaminant: name pollutant """ #createFile() est = listEstations tam = len(est) - 1 i = 0 while i <= tam: # 21 print(est[i]) print(startDate[i]) nameDelta = nameContaminant + est[i] + '_delta' nameD = est[i] + '_' + contaminant + '.csv' nameB = est[i] + '_' + contaminant + '_pred.csv' tempData = fd.readData(startDate[i], endDate, [est[i]], contaminant) tempBuild = fd.buildClass2(tempData, [est[i]], contaminant, 24, startDate[i], endDate) temAllData = tempData.dropna(axis=1, how='all') # allD = temAllData.dropna(axis=0,how='any') allD = temAllData.fillna(value=-1) allD = allD.reset_index() allD = allD.drop(labels='index', axis=1) allData = allD.merge(tempBuild, how='left', on='fecha') build = df.DataFrame(allData['fecha'], columns=['fecha']) val = df.DataFrame(allData[nameDelta], columns=[nameDelta]) build[nameDelta] = val data = allData.drop(labels=nameDelta, axis=1) data = data.reset_index() build = build.reset_index() build = build.drop(labels='index', axis=1) data = data.drop(labels='index', axis=1) dataTemp = separateDate(data) dataTemp2 = unionData(dataTemp, dirTotalCsv) maxAndMinValues(dataTemp2, est[i], contaminant, dirr) data = dataTemp2 data = data.drop_duplicates(keep='first') build = build.drop_duplicates(keep='first') build = filterData(data, build) data.to_csv(dirr + nameD, encoding='utf-8', index=False ) # save the data in file "data/[station_contaminant].csv" build.to_csv( dirr + nameB, encoding='utf-8', index=False ) # save the data in file "data/[station_contaminant_pred].csv] i += 1
def baseContaminantes(fecha, estacion, contaminant): """ function to bring the information of the contaminants from the database :param fecha: date to bring the information :type fecha: date :param estacion:name of the station from which the information is extracted :type estacion: String :return: array with pollutant information :type return: array float32 """ fechaActual = str(fecha.year) + '-' + numString(fecha.month) + '-' + numString(fecha.day)+' '+numString(fecha.hour)+':00:00' data = fd.readData(fechaActual, fechaActual, [estacion], contaminant) return data
def training(fechaAyer, estacion, dirTrain, dirData, dirCsv, dirFestivos, variables, contaminant): """ function to train the neural network with the information of 24 hours before :param fechaAyer: date of the previous day :type fechaAyer: date :param estacion: name the station :type estacion: String :param dirData: address of the files with training information :type dirData: String :param dirTrain: address of the training files of the neural network :type dirTrain: String :param dirFestivos: address of the file with the holidays :type dirFestivos: String :param dirCsv: Address of processed meteorology archives :type dirCsv : String :param variables: meteorological variables :type variables: string list """ print(estacion) fecha = str(fechaAyer.year) + '/' + numString( fechaAyer.month) + '/' + numString(fechaAyer.day) + ' ' + numString( fechaAyer.hour) + ':00:00' fechaMet = str(fechaAyer.year) + "-" + numString( fechaAyer.month) + "-" + numString(fechaAyer.day) fechaBuild = str(fechaAyer.year) + "/" + numString( fechaAyer.month) + "/" + numString(fechaAyer.day) data = fd.readData(fecha, fecha, [estacion], contaminant) build = fd.buildClass2(data, [estacion], contaminant, 24, fechaBuild, fechaBuild) if data.empty: print("No se puede hacer el entrenamiento") else: dataMet = unionMeteorologia(fechaMet, fechaAyer, dirCsv, variables) dataMet = dataMet.drop('fecha', axis=1) data = separateDate(data) data = unionData(data, fechaAyer, dirFestivos) data = df.concat([data, dataMet], axis=1) data = filterData(data, dirData + estacion + "_" + contaminant + ".csv") data = data.fillna(value=-1) xy_values = an(data, build, contaminant) # preprocessing tr.training(xy_values[0], xy_values[1], estacion, dirTrain, contaminant, dirData)
def tiempo(): time_cpu = [] time_gpu = [] time_base = [] start = datetime.strptime(startDate[20], '%Y/%m/%d') end = datetime.strptime(endDate, '%Y/%m/%d') dy = 8760 * 2 estation = est[20] date = start + timedelta(hours=dy) while date <= end: sDate = date.strftime('%Y/%m/%d') initData = time() data = FormatData.readData(start, date, [estation], contaminant) build = FormatData.buildClass2(data, [estation], contaminant, 24, startDate[20], sDate) xy_values = an(data, build, contaminant) finData = time() initCpu = time() temp_loss = nn(xy_values[0], xy_values[1], xy_values[2], 1000, estation, contaminant) loss_vec.append(temp_loss) finCpu = time() initGpu = time() temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], 1000) loss_vec.append(temp_loss) finGpu = time() totalCpu = finCpu - initCpu totalGpu = finGpu - initGpu totalBase = finData - initData time_base.append(totalBase) time_cpu.append(totalCpu) time_gpu.append(totalGpu) date = date + timedelta(hours=dy) plt.plot(time_base, 'g-', label='time Data base') plt.plot(time_cpu, 'k-', label='time CPU') plt.plot(time_gpu, 'r-', label='time GPU') plt.title('GPU vs CPU') plt.xlabel('Years') plt.ylabel('Time') plt.legend(loc='best') plt.savefig('tiempo.png', dpi=600) plt.show()
def estationsGpu(): start = startDate[0] estation = [] for x in est: estation += [x] print(estation) data = FormatData.readData(start, endDate, estation, contaminant) build = FormatData.buildClass2(data, [est[0]], contaminant, 24, start, endDate) xy_values = an(data, build, contaminant) temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], 1000) loss_vec.append(temp_loss) print(loss_vec) plt.plot(loss_vec, 'k-', label='Loss') plt.title('Error aumentando el numero de estaciones') plt.xlabel('Numero de estaciones') plt.ylabel('Loss') plt.legend(loc='best') plt.savefig("estacionesGpu.png", dpi=600) plt.show()
def iterationGpu(): i = 200 start = startDate[0] estation = est[10] data = FormatData.readData(start, endDate, [estation], contaminant) build = FormatData.buildClass2(data, [est[10]], contaminant, 24, start, endDate) xy_values = an(data, build, contaminant) while i <= 3000: temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], i) loss_vec.append(temp_loss) i = i + 200 print(i) print(loss_vec) plt.plot(loss_vec, 'k-', label='Loss') plt.title('Error aumentando el numero de iteraciones de entrenamiento') plt.xlabel('Numero de iteraciones') plt.ylabel('Loss') plt.legend(loc='best') plt.savefig("iteraciones.png", dpi=600) plt.show()
def testData2(): i = 0 dataBase_time = [] file_time = [] s = [] while i <= 21: s.append(est[i]) print(s) init_dataBase = time() data = FormatData.readData(startDate[i], endDate, s, contaminant) build = FormatData.buildClass2(data, s, contaminant, 24, startDate[i], endDate) #xy_values = an(data,build, contaminant); fin_dataBase = time() init_fileTime = time() for x in s: station = x name = station + '_' + contaminant data = df.read_csv('data/' + name + '.csv') build = df.read_csv('data/' + name + '_pred.csv') #xy_values = an(data,build, contaminant); fin_fileTime = time() total_dataBase = fin_dataBase - init_dataBase total_file = fin_fileTime - init_fileTime dataBase_time.append(total_dataBase) file_time.append(total_file) i += 1 plt.figure(figsize=(12.2, 6.4)) plt.plot(file_time, 'g-', label='time File') plt.plot(dataBase_time, 'r-', label='time DataBase') plt.title('DataBase vs File') plt.xlabel('stations') plt.ylabel('Time (second)') plt.legend(loc='best') location = np.arange(len(est)) plt.xticks(location, est, fontsize=7, rotation='vertical') plt.savefig('Graficas/tiempoDataBase2.png', dpi=600) plt.show()
def estations(): start = startDate[0] estation = [] for x in est: estation += [x] print(estation) data = FormatData.readData(start, endDate, estation, contaminant) build = FormatData.buildClass2(data, [est[0]], contaminant, 24, start, endDate) xy_values = an(data, build, contaminant) temp_loss = nn(xy_values[0], xy_values[1], xy_values[2], 1000, est[0], contaminant) loss_vec.append(temp_loss) print(loss_vec) plt.figure(figsize=(12.2, 6.4)) plt.plot(loss_vec, 'k-', label='Loss') plt.title('Error aumentando el numero de estaciones') plt.xlabel('Numero de estaciones') plt.ylabel('Loss') plt.legend(loc='best') location = np.range(len(est)) plt.xticks(location, est, rotation='vertical') plt.savefig("estaciones.png", dpi=600) plt.show()
def update4hours(estacion, contaminant, fecha, dirData, dirTrain, dirCsv,dirFestivos, variables, fechaString): """ function to make the last 4 hours of forecast :param estacion: name of the weather station :type estacion: String :param contaminant: name of the pollutant :type contaminant: String :param fecha: current day :type fecha: datetime :param dirData: address of the files with training information :type dirData: String :param dirTrain: address of the training files of the neural network :type dirTrain: String :param dirCsv: Address of processed meteorology archives :type dirCsv : String :param dirFestivos: address of the file with the holidays :type dirFestivos: String :param variables: meteorological variables :type variables: list(Strings) """ nameC = findT(contaminant) dataForecast = ultimate_data(estacion,nameC, 2,1) if dataForecast.empty: print('No se ha hecho pronostico para la estacion:'+ estacion) return 0 else: fechaUltima = dataForecast['fecha'][0] if estacion == 'SFE': fechaUltima = fechaUltima -timedelta(hours=6) elif estacion == 'TAH': fechaUltima = fechaUltima - timedelta(hours=15) elif estacion == 'UAX': fechaUltima = fechaUltima - timedelta(hours=13) elif estacion == 'NEZ': fechaUltima = fechaUltima - timedelta(hours=11) fechaUltima = fechaUltima - timedelta(days = 1) print('Fecha Actual: ' + str(fecha)) print('Fecha Ultimo Registro: ' + str(fechaUltima)) if fechaUltima == fecha: print('Pronostico actualizado') return 0 elif fechaUltima < fecha: print('Pronostico retrasado') fechaTemp = fechaUltima + timedelta(hours=1) fechaInicio = str(fechaTemp.year) + '-' + numString(fechaTemp.month) + '-' + numString(fechaTemp.day)+' '+numString(fechaTemp.hour)+':00:00' fechaFin = str(fecha.year) + '-' + numString(fecha.month) + '-' + numString(fecha.day)+' '+numString(fecha.hour)+':00:00' data = fd.readData(fechaInicio,fechaFin,[estacion],contaminant) data = data.drop_duplicates(keep='first') dataMet = unionTotalMeteorologia(fechaString,dirCsv,variables,fechaInicio,fechaFin) print('Numero de horas retrasado: ' + str(fecha-fechaUltima)) if data.empty and (fecha-fechaUltima) > timedelta(hours=3): print('Pronostico con climatologia') useClimatology(contaminant,estacion,fechaTemp,fecha,dataMet,dirData,dirTrain, dirFestivos) return 1 elif (fecha-fechaUltima) < timedelta(hours=3): print('Climatologia cada 4 horas') return 0 elif not(data.empty): primer_fecha = data['fecha'][0] if primer_fecha > fechaTemp: fechaFinClim = primer_fecha - timedelta(hours=1) useClimatology(contaminant,estacion,fechaTemp,fechaFinClim,dataMet,dirData,dirTrain,dirFestivos) #pronostico_normal(data,dirFestivos,dataMet,estacion,contaminant,dirData,dirTrain) return 1 else: pronostico_normal(data,dirFestivos,dataMet,estacion,contaminant,dirData,dirTrain) print('Pronostico normal C') return 1 elif data.empty: print('No hay datos para la prediccion') return 0 else: print('Pronostico normal') pronostico_normal(data,dirFestivos,dataMet,estacion,contaminant,dirData,dirTrain) return 1 else: print('Pronostico actualizado') return 0