def saveData2(listEstations, startDate, nameContaminant, endDate, dirr, dirTotalCsv, contaminant): """ Function for the save data in the type file .csv :param listEstations: list with stations :type listEstations: String list :param startDate: start date :type startDate: date :param nameContaminant: name of the pollutant in the database :type nameContaminant: String :param endDate: end date :type endDate: date :param dirr: direction of save data :type dirr: String :param dirTotalCsv: address of the cvs files :type dirTotalCsv: String :param contaminant: name pollutant """ #createFile() est = listEstations tam = len(est) - 1 i = 0 while i <= tam: # 21 print(est[i]) print(startDate[i]) nameDelta = nameContaminant + est[i] + '_delta' nameD = est[i] + '_' + contaminant + '.csv' nameB = est[i] + '_' + contaminant + '_pred.csv' tempData = fd.readData(startDate[i], endDate, [est[i]], contaminant) tempBuild = fd.buildClass2(tempData, [est[i]], contaminant, 24, startDate[i], endDate) temAllData = tempData.dropna(axis=1, how='all') # allD = temAllData.dropna(axis=0,how='any') allD = temAllData.fillna(value=-1) allD = allD.reset_index() allD = allD.drop(labels='index', axis=1) allData = allD.merge(tempBuild, how='left', on='fecha') build = df.DataFrame(allData['fecha'], columns=['fecha']) val = df.DataFrame(allData[nameDelta], columns=[nameDelta]) build[nameDelta] = val data = allData.drop(labels=nameDelta, axis=1) data = data.reset_index() build = build.reset_index() build = build.drop(labels='index', axis=1) data = data.drop(labels='index', axis=1) dataTemp = separateDate(data) dataTemp2 = unionData(dataTemp, dirTotalCsv) maxAndMinValues(dataTemp2, est[i], contaminant, dirr) data = dataTemp2 data = data.drop_duplicates(keep='first') build = build.drop_duplicates(keep='first') build = filterData(data, build) data.to_csv(dirr + nameD, encoding='utf-8', index=False ) # save the data in file "data/[station_contaminant].csv" build.to_csv( dirr + nameB, encoding='utf-8', index=False ) # save the data in file "data/[station_contaminant_pred].csv] i += 1
def training(fechaAyer, estacion, dirTrain, dirData, dirCsv, dirFestivos, variables, contaminant): """ function to train the neural network with the information of 24 hours before :param fechaAyer: date of the previous day :type fechaAyer: date :param estacion: name the station :type estacion: String :param dirData: address of the files with training information :type dirData: String :param dirTrain: address of the training files of the neural network :type dirTrain: String :param dirFestivos: address of the file with the holidays :type dirFestivos: String :param dirCsv: Address of processed meteorology archives :type dirCsv : String :param variables: meteorological variables :type variables: string list """ print(estacion) fecha = str(fechaAyer.year) + '/' + numString( fechaAyer.month) + '/' + numString(fechaAyer.day) + ' ' + numString( fechaAyer.hour) + ':00:00' fechaMet = str(fechaAyer.year) + "-" + numString( fechaAyer.month) + "-" + numString(fechaAyer.day) fechaBuild = str(fechaAyer.year) + "/" + numString( fechaAyer.month) + "/" + numString(fechaAyer.day) data = fd.readData(fecha, fecha, [estacion], contaminant) build = fd.buildClass2(data, [estacion], contaminant, 24, fechaBuild, fechaBuild) if data.empty: print("No se puede hacer el entrenamiento") else: dataMet = unionMeteorologia(fechaMet, fechaAyer, dirCsv, variables) dataMet = dataMet.drop('fecha', axis=1) data = separateDate(data) data = unionData(data, fechaAyer, dirFestivos) data = df.concat([data, dataMet], axis=1) data = filterData(data, dirData + estacion + "_" + contaminant + ".csv") data = data.fillna(value=-1) xy_values = an(data, build, contaminant) # preprocessing tr.training(xy_values[0], xy_values[1], estacion, dirTrain, contaminant, dirData)
def tiempo(): time_cpu = [] time_gpu = [] time_base = [] start = datetime.strptime(startDate[20], '%Y/%m/%d') end = datetime.strptime(endDate, '%Y/%m/%d') dy = 8760 * 2 estation = est[20] date = start + timedelta(hours=dy) while date <= end: sDate = date.strftime('%Y/%m/%d') initData = time() data = FormatData.readData(start, date, [estation], contaminant) build = FormatData.buildClass2(data, [estation], contaminant, 24, startDate[20], sDate) xy_values = an(data, build, contaminant) finData = time() initCpu = time() temp_loss = nn(xy_values[0], xy_values[1], xy_values[2], 1000, estation, contaminant) loss_vec.append(temp_loss) finCpu = time() initGpu = time() temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], 1000) loss_vec.append(temp_loss) finGpu = time() totalCpu = finCpu - initCpu totalGpu = finGpu - initGpu totalBase = finData - initData time_base.append(totalBase) time_cpu.append(totalCpu) time_gpu.append(totalGpu) date = date + timedelta(hours=dy) plt.plot(time_base, 'g-', label='time Data base') plt.plot(time_cpu, 'k-', label='time CPU') plt.plot(time_gpu, 'r-', label='time GPU') plt.title('GPU vs CPU') plt.xlabel('Years') plt.ylabel('Time') plt.legend(loc='best') plt.savefig('tiempo.png', dpi=600) plt.show()
def estationsGpu(): start = startDate[0] estation = [] for x in est: estation += [x] print(estation) data = FormatData.readData(start, endDate, estation, contaminant) build = FormatData.buildClass2(data, [est[0]], contaminant, 24, start, endDate) xy_values = an(data, build, contaminant) temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], 1000) loss_vec.append(temp_loss) print(loss_vec) plt.plot(loss_vec, 'k-', label='Loss') plt.title('Error aumentando el numero de estaciones') plt.xlabel('Numero de estaciones') plt.ylabel('Loss') plt.legend(loc='best') plt.savefig("estacionesGpu.png", dpi=600) plt.show()
def iterationGpu(): i = 200 start = startDate[0] estation = est[10] data = FormatData.readData(start, endDate, [estation], contaminant) build = FormatData.buildClass2(data, [est[10]], contaminant, 24, start, endDate) xy_values = an(data, build, contaminant) while i <= 3000: temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], i) loss_vec.append(temp_loss) i = i + 200 print(i) print(loss_vec) plt.plot(loss_vec, 'k-', label='Loss') plt.title('Error aumentando el numero de iteraciones de entrenamiento') plt.xlabel('Numero de iteraciones') plt.ylabel('Loss') plt.legend(loc='best') plt.savefig("iteraciones.png", dpi=600) plt.show()
def testData2(): i = 0 dataBase_time = [] file_time = [] s = [] while i <= 21: s.append(est[i]) print(s) init_dataBase = time() data = FormatData.readData(startDate[i], endDate, s, contaminant) build = FormatData.buildClass2(data, s, contaminant, 24, startDate[i], endDate) #xy_values = an(data,build, contaminant); fin_dataBase = time() init_fileTime = time() for x in s: station = x name = station + '_' + contaminant data = df.read_csv('data/' + name + '.csv') build = df.read_csv('data/' + name + '_pred.csv') #xy_values = an(data,build, contaminant); fin_fileTime = time() total_dataBase = fin_dataBase - init_dataBase total_file = fin_fileTime - init_fileTime dataBase_time.append(total_dataBase) file_time.append(total_file) i += 1 plt.figure(figsize=(12.2, 6.4)) plt.plot(file_time, 'g-', label='time File') plt.plot(dataBase_time, 'r-', label='time DataBase') plt.title('DataBase vs File') plt.xlabel('stations') plt.ylabel('Time (second)') plt.legend(loc='best') location = np.arange(len(est)) plt.xticks(location, est, fontsize=7, rotation='vertical') plt.savefig('Graficas/tiempoDataBase2.png', dpi=600) plt.show()
def estations(): start = startDate[0] estation = [] for x in est: estation += [x] print(estation) data = FormatData.readData(start, endDate, estation, contaminant) build = FormatData.buildClass2(data, [est[0]], contaminant, 24, start, endDate) xy_values = an(data, build, contaminant) temp_loss = nn(xy_values[0], xy_values[1], xy_values[2], 1000, est[0], contaminant) loss_vec.append(temp_loss) print(loss_vec) plt.figure(figsize=(12.2, 6.4)) plt.plot(loss_vec, 'k-', label='Loss') plt.title('Error aumentando el numero de estaciones') plt.xlabel('Numero de estaciones') plt.ylabel('Loss') plt.legend(loc='best') location = np.range(len(est)) plt.xticks(location, est, rotation='vertical') plt.savefig("estaciones.png", dpi=600) plt.show()