def testData(): i = 0 dataBase_time = [] file_time = [] while i <= 21: station = est[i] print(station) init_dataBase = time() data = FormatData.readData(startDate[i], endDate, [est[i]], contaminant) build = FormatData.buildClass2(data, [est[i]], contaminant, 24, startDate[i], endDate) xy_values = an(data, build, contaminant) fin_dataBase = time() init_fileTime = time() name = station + '_' + contaminant data = df.read_csv('data/' + name + '.csv') build = df.read_csv('data/' + name + '_pred.csv') xy_values = an(data, build, contaminant) fin_fileTime = time() total_dataBase = fin_dataBase - init_dataBase total_file = fin_fileTime - init_fileTime dataBase_time.append(total_dataBase) file_time.append(total_file) i += 1 plt.figure(figsize=(12.2, 6.4)) plt.plot(file_time, 'k-', label='time File') plt.plot(dataBase_time, 'r-', label='time DataBase') plt.title('DataBase vs File') plt.xlabel('stations') plt.ylabel('Time (second)') plt.legend(loc='best') location = np.arange(len(est)) plt.xticks(location, est, fontsize=7, rotation='vertical') plt.savefig('tiempoDataBase.png', dpi=600) plt.show()
def trainNeuralNetworksKeras(est, dirr, dirTrain, fechaFinal, contaminant, iteraciones): """ Function to train the neuralNetwork of the 23 stations, save the training on file trainData/[nameStation].csv :param est: name of the station to train :type est: string :param dirr: direction of training data :type dirr: String :param dirTrain: direction of the neural network training files :type dirTrain: String :param fechaFinal: final date of training :type fechaFinal: date :param contaminant: name of the pollutants :type contaminant: String """ tam = len(est) - 1 tamLen = [] i = 0 while i <= tam: station = est[i] print(station) name = station + '_' + contaminant # name the file with the data newD = dirr + 'B' + contaminant + '/' + name if not os.path.exists(dirTrain): os.makedirs(dirTrain) print(newD + '.csv') print(os.path.exists(newD + '.csv')) if os.path.exists(newD + '.csv'): data = df.read_csv(newD + '.csv') # we load the data in the Variable data build = df.read_csv( newD + '_pred.csv') # we load the data in the Variable build data = data[data['fecha'] < fechaFinal] build = build[build['fecha'] < fechaFinal] tamLen.append(len(data.index)) data = data.fillna(value=-1) build = build.fillna(value=-1) xy_values = an(data, build, contaminant) # preprocessing nngk(xy_values[0], xy_values[1], xy_values[2], iteraciones, station, contaminant, dirTrain) # The neural network is trained i += 1 else: i += 1
def training(fechaAyer, estacion, dirTrain, dirData, dirCsv, dirFestivos, variables, contaminant): """ function to train the neural network with the information of 24 hours before :param fechaAyer: date of the previous day :type fechaAyer: date :param estacion: name the station :type estacion: String :param dirData: address of the files with training information :type dirData: String :param dirTrain: address of the training files of the neural network :type dirTrain: String :param dirFestivos: address of the file with the holidays :type dirFestivos: String :param dirCsv: Address of processed meteorology archives :type dirCsv : String :param variables: meteorological variables :type variables: string list """ print(estacion) fecha = str(fechaAyer.year) + '/' + numString( fechaAyer.month) + '/' + numString(fechaAyer.day) + ' ' + numString( fechaAyer.hour) + ':00:00' fechaMet = str(fechaAyer.year) + "-" + numString( fechaAyer.month) + "-" + numString(fechaAyer.day) fechaBuild = str(fechaAyer.year) + "/" + numString( fechaAyer.month) + "/" + numString(fechaAyer.day) data = fd.readData(fecha, fecha, [estacion], contaminant) build = fd.buildClass2(data, [estacion], contaminant, 24, fechaBuild, fechaBuild) if data.empty: print("No se puede hacer el entrenamiento") else: dataMet = unionMeteorologia(fechaMet, fechaAyer, dirCsv, variables) dataMet = dataMet.drop('fecha', axis=1) data = separateDate(data) data = unionData(data, fechaAyer, dirFestivos) data = df.concat([data, dataMet], axis=1) data = filterData(data, dirData + estacion + "_" + contaminant + ".csv") data = data.fillna(value=-1) xy_values = an(data, build, contaminant) # preprocessing tr.training(xy_values[0], xy_values[1], estacion, dirTrain, contaminant, dirData)
def tiempo(): time_cpu = [] time_gpu = [] time_base = [] start = datetime.strptime(startDate[20], '%Y/%m/%d') end = datetime.strptime(endDate, '%Y/%m/%d') dy = 8760 * 2 estation = est[20] date = start + timedelta(hours=dy) while date <= end: sDate = date.strftime('%Y/%m/%d') initData = time() data = FormatData.readData(start, date, [estation], contaminant) build = FormatData.buildClass2(data, [estation], contaminant, 24, startDate[20], sDate) xy_values = an(data, build, contaminant) finData = time() initCpu = time() temp_loss = nn(xy_values[0], xy_values[1], xy_values[2], 1000, estation, contaminant) loss_vec.append(temp_loss) finCpu = time() initGpu = time() temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], 1000) loss_vec.append(temp_loss) finGpu = time() totalCpu = finCpu - initCpu totalGpu = finGpu - initGpu totalBase = finData - initData time_base.append(totalBase) time_cpu.append(totalCpu) time_gpu.append(totalGpu) date = date + timedelta(hours=dy) plt.plot(time_base, 'g-', label='time Data base') plt.plot(time_cpu, 'k-', label='time CPU') plt.plot(time_gpu, 'r-', label='time GPU') plt.title('GPU vs CPU') plt.xlabel('Years') plt.ylabel('Time') plt.legend(loc='best') plt.savefig('tiempo.png', dpi=600) plt.show()
def estationsGpu(): start = startDate[0] estation = [] for x in est: estation += [x] print(estation) data = FormatData.readData(start, endDate, estation, contaminant) build = FormatData.buildClass2(data, [est[0]], contaminant, 24, start, endDate) xy_values = an(data, build, contaminant) temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], 1000) loss_vec.append(temp_loss) print(loss_vec) plt.plot(loss_vec, 'k-', label='Loss') plt.title('Error aumentando el numero de estaciones') plt.xlabel('Numero de estaciones') plt.ylabel('Loss') plt.legend(loc='best') plt.savefig("estacionesGpu.png", dpi=600) plt.show()
def iterationGpu(): i = 200 start = startDate[0] estation = est[10] data = FormatData.readData(start, endDate, [estation], contaminant) build = FormatData.buildClass2(data, [est[10]], contaminant, 24, start, endDate) xy_values = an(data, build, contaminant) while i <= 3000: temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], i) loss_vec.append(temp_loss) i = i + 200 print(i) print(loss_vec) plt.plot(loss_vec, 'k-', label='Loss') plt.title('Error aumentando el numero de iteraciones de entrenamiento') plt.xlabel('Numero de iteraciones') plt.ylabel('Loss') plt.legend(loc='best') plt.savefig("iteraciones.png", dpi=600) plt.show()
def estations(): start = startDate[0] estation = [] for x in est: estation += [x] print(estation) data = FormatData.readData(start, endDate, estation, contaminant) build = FormatData.buildClass2(data, [est[0]], contaminant, 24, start, endDate) xy_values = an(data, build, contaminant) temp_loss = nn(xy_values[0], xy_values[1], xy_values[2], 1000, est[0], contaminant) loss_vec.append(temp_loss) print(loss_vec) plt.figure(figsize=(12.2, 6.4)) plt.plot(loss_vec, 'k-', label='Loss') plt.title('Error aumentando el numero de estaciones') plt.xlabel('Numero de estaciones') plt.ylabel('Loss') plt.legend(loc='best') location = np.range(len(est)) plt.xticks(location, est, rotation='vertical') plt.savefig("estaciones.png", dpi=600) plt.show()