def pronostico_normal(data,dirFestivos,dataMet,estacion,contaminant,dirData,dirTrain): data = data.reset_index(drop=True) data = separateDate(data) data = totalUnionData(data, dirFestivos) data = df.concat([data, dataMet], axis=1, join='inner') #data = data.merge(dataMet, how='left', on='fecha') data = filterData(data, dirData + estacion + "_" + contaminant + ".csv") data = data.fillna(value=-1) index = data.index.values arrayPred = [] for x in index: pred = data.ix[x].values valPred = pred[2:] valNorm = pre.normalize(valPred, estacion, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(estacion, contaminant, arrayPred, dirTrain, dirData) columnContaminant = findTable2(contaminant) real = pre.desNorm(result, estacion, contaminant, dirData, columnContaminant+ '_') for xs in range(len(real)): fechaPronostico = data['fecha'].iloc[xs].values fechaPronostico = datetime.strptime(fechaPronostico[1], '%Y-%m-%d %H:%M:%S') fechaPronostico = fechaPronostico - timedelta(days=1) pronostico = real[xs] guardarPrediccion(estacion, fechaPronostico, [pronostico],contaminant,3) return 1
def useClimatology(contaminant, estacion, fechaInicio, fechaFinal, dataMet,dirData,dirTrain,dirFestivos): """ function to make the forecast using climatologies :param contaminant: name of the pollutant :type contaminant: String :param estacion: name of the weather station :type estacion: String :param fechaInicio: range of data wit wich the vaues of tue query are extracted :type fechaInicio: datetime :param fechaFinal: range of data wit wich the vaues of tue query are extracted :type fechaFinal: datetime :param dataMet: dataframe with the climatological information :type dataMet: DataFrame """ data = fd.get_climatology(fechaInicio, fechaFinal, estacion) data = makeDates(fechaInicio,fechaFinal,data) #sys.out data = data.reset_index(drop=True) data = separateDate(data) data = totalUnionData(data, dirFestivos) data = df.concat([data, dataMet], axis=1, join='inner') #data = data.merge(dataMet, how='left', on='fecha') data = data.fillna(value=-1) data = filterData(data, dirData + estacion + "_" + contaminant + ".csv") data = data.fillna(value=-1) index = data.index.values arrayPred = [] for x in index: pred = data.ix[x].values valPred = pred[2:] valNorm = pre.normalize(valPred, estacion, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(estacion, contaminant, arrayPred, dirTrain, dirData) columnContaminant = findTable2(contaminant) real = pre.desNorm(result, estacion, contaminant, dirData, columnContaminant+ '_') fechaPronostico = fechaInicio for xs in real: print(fechaPronostico) fechaUpdate = fechaPronostico fechaUpdate = fechaUpdate - timedelta(days=1) guardarPrediccion(estacion, fechaUpdate, [xs], contaminant,5) fechaPronostico = fechaPronostico + timedelta(hours=1) print('Climatologia:' + estacion)
def forecastDateKeras(station, dirData, dirrDataC, dirTrain, contaminant, columnContaminant, fechaInicio, fechaFin, dirTotalCsv): sta = station name = sta + '_' + contaminant tempData = baseContaminantes(fechaInicio, fechaFin, station, contaminant) if tempData.empty: dataBackup = back(dirData, contaminant) data = dataBackup data = data.fillna(value=-1) data = filterData(data, dirData + name + ".csv") data = data.fillna(value=-1) temp = data.ix[0].values temp = temp[1:] dataPred = pre.normalize(temp, sta, contaminant, dirData) dataPred = convert(dataPred) prediccion = preK.prediction(sta, contaminant, [dataPred], dirTrain, dirData) else: data = tempData.dropna(axis=1, how = 'all') data = data.fillna(value = -1) data = data.reset_index(drop = True) data = separateDate(data) data = unionData(data,dirTotalCsv) data = data.drop_duplicates(keep='first') data = filterData(data,dirData + name + '.csv') data = data.fillna(value = -1) dataTemp = data['fecha'] index = data.index.values arrayPred = [] for x in index: pred = data.ix[x].values valPred = pred[1:] valNorm = pre.normalize(valPred, sta, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(sta,contaminant,arrayPred, dirTrain,dirData) real = desNorm(result, sta, contaminant, dirData, columnContaminant) dataPrediccion = real savePrediccion1(station, dataPrediccion, contaminant, dataTemp)
def prediccion(estacion, data, dirData, dirTrain, contaminant): """ function that sends the data to the neural network for the prediction of the pollutant :param estacion: name the station :type estacion: String :param data: information for the prediction :type data : list float32 :param dirData: address of the files with training information :type dirData: String :param dirTrain: address of the training files of the neural network :type dirTrain: String :return: prdiction values :type return : float32 """ temp = data.ix[0].values temp = temp[1:] dataPred = pre.normalize(temp, estacion, contaminant, dirData) dataPred = convert(dataPred) prediccion = pre.prediction(estacion, contaminant, [dataPred], dirTrain, dirData) print(prediccion) columnContaminant = findTable2(contaminant) prediccion1 = pre.desNorm(prediccion, estacion, contaminant, dirData, columnContaminant + '_') return prediccion1
def dataCorrelacion(contaminant, estacion, fechaInicio, fechaFin, dataMet,dirData,dirTrain, dirFestivos): data_Corr = df.read_csv('/media/storageBK/AirQualityForecast/Scripts/ContaminationForecast/Data/Correlacion_table.csv', index_col=0) corr_est = data_Corr[estacion].sort_values(ascending=False) estacion_corr = corr_est.index[1] print('Estacion usada para la correlacion: ' + estacion_corr) data = fd.readData_corr(fechaInicio, fechaFin, [estacion_corr], contaminant) if data.empty: print('Estacion: ' + estacion_corr + ' no tiene datos') estacion_corr = corr_est.index[2] data = fd.readData_corr(fechaInicio, fechaFin, [estacion_corr], contaminant) print('Estacion usada para la correlacion: ' + estacion_corr) if data.empty: print('Estacion: ' + estacion_corr + ' no tiene datos') useClimatology(contaminant, estacion, fechaInicio, fechaFin, dataMet,dirData,dirTrain, dirFestivos) else: data = data.drop_duplicates(keep='first') data = data.reset_index(drop=True) index_values = data.columns.values[1:] for xs in index_values: data.rename(columns={xs:xs.replace(estacion_corr.lower(), estacion.lower())}, inplace = True) data = separateDate(data) data = totalUnionData(data, dirFestivos) data = df.concat([data, dataMet], axis=1, join='inner') print(data) #data = data.merge(dataMet, how='left', on='fecha') data = filterData(data, dirData + estacion + "_" + contaminant + ".csv") data = data.fillna(value=-1) index = data.index.values arrayPred = [] for x in index: pred = data.ix[x].values valPred = pred[2:] valNorm = pre.normalize(valPred, estacion, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(estacion, contaminant, arrayPred, dirTrain, dirData) columnContaminant = findTable2(contaminant) real = pre.desNorm(result, estacion, contaminant, dirData, columnContaminant+ '_') for xs in range(len(real)): fechaPronostico = data['fecha'].iloc[xs].values fechaPronostico = datetime.strptime(fechaPronostico[1], '%Y-%m-%d %H:%M:%S') fechaPronostico1 = fechaPronostico - timedelta(days=1) pronostico = real[xs] guardarPrediccion(estacion, fechaPronostico1, [pronostico],contaminant,5) else: data = data.drop_duplicates(keep='first') data = data.reset_index(drop=True) index_values = data.columns.values[1:] for xs in index_values: data.rename(columns={xs:xs.replace(estacion_corr.lower(), estacion.lower())}, inplace = True) data = separateDate(data) data = totalUnionData(data, dirFestivos) data = df.concat([data, dataMet], axis=1, join='inner') print(data) #data = data.merge(dataMet, how='left', on='fecha') data = filterData(data, dirData + estacion + "_" + contaminant + ".csv") data = data.fillna(value=-1) index = data.index.values arrayPred = [] for x in index: pred = data.ix[x].values valPred = pred[2:] valNorm = pre.normalize(valPred, estacion, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(estacion, contaminant, arrayPred, dirTrain, dirData) columnContaminant = findTable2(contaminant) real = pre.desNorm(result, estacion, contaminant, dirData, columnContaminant+ '_') for xs in range(len(real)): fechaPronostico = data['fecha'].iloc[xs].values fechaPronostico = datetime.strptime(fechaPronostico[1], '%Y-%m-%d %H:%M:%S') fechaPronostico1 = fechaPronostico - timedelta(days=1) pronostico = real[xs] guardarPrediccion(estacion, fechaPronostico1, [pronostico],contaminant,5)
def trialk(station, dirData, dirrDataC, dirGraficas, dirTrain, contaminant, columnContaminant, fechaInicio, fechaFin): """ function to make the forecast of a whole year and graph it :param station: name the station :type station: String :param dirData: address of the files with training information :type dirData: String :param dirGraficas: address where the graphics are saved :type dirGraficas: String :param dirTrain: address of the training files of the neural network :type dirTrain: String :param columnContaminant:name of the pollutant in the DataFrame :type columnContaminant: String :param fechaInicio: start date of the forecast :type fechaInicio: date :param fechaFin: end date of the forecast :type fechaFin: date """ sta = station name = sta + '_' + contaminant temp = df.read_csv(dirrDataC + name + '.csv') # we load the data in the Variable data temp = temp.fillna(value=-1.0) data = temp[(temp['fecha'] <= fechaFin) & (temp['fecha'] >= fechaInicio)] data = data.reset_index(drop=True) data = filterData(data, dirData + name + '.csv') data = data.fillna(value=-1.0) tempBuild = df.read_csv(dirrDataC + name + '_pred.csv') # we load the data in the Variable build tempBuild = tempBuild.fillna(value=-1.0) build = tempBuild[(tempBuild['fecha'] <= fechaFin) & (tempBuild['fecha'] >= fechaInicio)]; build = build.reset_index(drop=True) build = build.fillna(value=-1.0) l = xlabel(data) labels = l[0] location = l[1] print(labels) if (station == 'SAG') | (station == 'UIZ'): #loc = labels.index('Marzo') #lugar = location[loc] + 1 #nombre = labels[loc] nombre = 'anio' else: print('no mes') #loc = labels.index('Marzo') #lugar = location[loc] + 1 #nombre = labels[loc] nombre = 'anio' arrayPred = [] nameColumn = columnContaminant +'_'+ sta + '_delta' inf = build[nameColumn].values index = data.index.values for x in index: pred = data.ix[x].values valPred = pred[1:] valNorm = pre.normalize(valPred, sta, contaminant, dirData) arrayPred.append(convert(valNorm)) result = preK.prediction(sta, contaminant, arrayPred, dirTrain, dirData) real = desNorm(result, sta, contaminant, dirData, columnContaminant) #metri.append(metricas(inf, real, station)) plt.figure(figsize=(22.2, 11.4)) plt.plot(inf, color='tomato', linestyle="solid", marker='o', label='Valor observado.'); plt.plot(real, color='darkgreen', linestyle='solid', marker='o', label='Pronóstico 24h NN.'); plt.title(nombreEst(station) + ' (' + station + ') comparación de ' + contaminant+' observado vs red neuronal' + ' para la primer semana de ' + nombre + ' 2016' ,fontsize=25, y=1.1 ) plt.xlabel('Fecha', fontsize=18) #n = 'Primera semana de '+nombre #plt.xlabel(n,fontsize=22); plt.ylabel('Partes por millon (PPM)', fontsize=22) plt.legend(loc='best') plt.grid(True, axis='both', alpha= 0.3, linestyle="--", which="both") # plt.xticks(location,labels,fontsize=8,rotation=80) plt.xticks(location,labels,fontsize=16,rotation=80) #plt.xlim(lugar,lugar+144); plt.axhspan(20, 40, color='lightgray', alpha=0.3) plt.axhspan(60, 80, color='lightgray', alpha=0.3) plt.axhspan(100, 120, color='lightgray', alpha=0.3) plt.gca().spines['bottom'].set_color('dimgray') plt.gca().spines['left'].set_visible(False) plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.tight_layout() plt.savefig(dirGraficas + station + '_' + nombre + '.png') plt.show(); plt.clf(); plt.close()