def predict_check(): try: local_dir = os.path.dirname(__file__) data_path = os.path.join(local_dir, 'data') # read in data from files data = read_in_data(data_path) data_train, data_val, data_test = split_data(data) data_train, data_val, data_test = list( map(lambda data_set: normalize(data_set), (data_train, data_val, data_test))) model = train(data_train[0], data_train[1]) coeff = model.coef_ assert coeff.shape == (3, ), "The shape of coefficient matrix should be (3, )" predict(data_test, model) except Exception: print(f'Training test failed') print(f'Exception trace back:') print(traceback.print_exc()) else: print('Training test passed.') print(f"Linear regression coefficient: ") print(coeff)
def pronostico_normal(data,dirFestivos,dataMet,estacion,contaminant,dirData,dirTrain): data = data.reset_index(drop=True) data = separateDate(data) data = totalUnionData(data, dirFestivos) data = df.concat([data, dataMet], axis=1, join='inner') #data = data.merge(dataMet, how='left', on='fecha') data = filterData(data, dirData + estacion + "_" + contaminant + ".csv") data = data.fillna(value=-1) index = data.index.values arrayPred = [] for x in index: pred = data.ix[x].values valPred = pred[2:] valNorm = pre.normalize(valPred, estacion, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(estacion, contaminant, arrayPred, dirTrain, dirData) columnContaminant = findTable2(contaminant) real = pre.desNorm(result, estacion, contaminant, dirData, columnContaminant+ '_') for xs in range(len(real)): fechaPronostico = data['fecha'].iloc[xs].values fechaPronostico = datetime.strptime(fechaPronostico[1], '%Y-%m-%d %H:%M:%S') fechaPronostico = fechaPronostico - timedelta(days=1) pronostico = real[xs] guardarPrediccion(estacion, fechaPronostico, [pronostico],contaminant,2) return 1
def normalization_check(): try: # construct data dir local_dir = os.path.dirname(__file__) data_path = os.path.join(local_dir, 'data') # read in data from files data = read_in_data(data_path) data_train, data_val, data_test = split_data(data) data_train, data_val, data_test = list( map(lambda data_set: normalize(data_set), (data_train, data_val, data_test))) assert is_normalized(data_train[0]), "data_train is not normalized" assert is_normalized(data_val[0]), "data_val is not normalized" assert is_normalized(data_test[0]), "data_test is not normalized" assert data_train[0].shape[1] == 3, "the column of data_train should be 3" assert data_train[0].shape[1] == 3, "the column of data_train should be 3" assert data_train[0].shape[1] == 3, "the column of data_train should be 3" except Exception: print(f'Test failed') print(f'Exception trace back:') print(traceback.print_exc()) else: print('Normalization test passed.')
def forecast_month(month, year, dirData, dirTotalCsv, dirTrain,estacion, contaminant): lastDay = calendar.monthrange(year,month)[1] fechaInicio = str(year) + '-' + numString(month) + '-01 00:00:00' fechaFinal = str(year) + '-' + numString(month) + '-'+ numString(lastDay) +' 23:00:00' #print(fechaInicio) #print(fechaFinal) data = fd.readData(fechaInicio, fechaFinal, [estacion], contaminant) data = separateDate(data) data = unionMeteorologia(data,dirTotalCsv) data = data.fillna(value=-1) #print(data) #sys.out frame_dates = data['fecha'].values data = filterData(data, dirData + estacion + "_" + contaminant + ".csv") data = data.fillna(value=-1) index = data.index.values arrayPred = [] for x in index: pred = data.ix[x].values valPred= pred[1:] valNorm = pre.normalize(valPred,estacion, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(estacion, contaminant, arrayPred, dirTrain, dirData) nameCont = findTable2(contaminant) real = pre.desNorm(result, estacion,contaminant, dirData, nameCont + '_') for xs in range(len(frame_dates)): fecha = frame_dates[xs] ts = df.to_datetime(str(fecha)) fecha_string = ts.strftime('%Y-%m-%d %H:%M:%S') pronostico = real[xs] guardarPrediccion(estacion, fecha_string,[pronostico],contaminant,4)
def prediccion(estacion, data, dirData, dirTrain, contaminant): """ function that sends the data to the neural network for the prediction of the pollutant :param estacion: name the station :type estacion: String :param data: information for the prediction :type data : list float32 :param dirData: address of the files with training information :type dirData: String :param dirTrain: address of the training files of the neural network :type dirTrain: String :return: prdiction values :type return : float32 """ temp = data.ix[0].values temp = temp[1:] dataPred = pre.normalize(temp, estacion, contaminant, dirData) dataPred = convert(dataPred) prediccion = pre.prediction(estacion, contaminant, [dataPred], dirTrain, dirData) print(prediccion) columnContaminant = findTable2(contaminant) prediccion1 = pre.desNorm(prediccion, estacion, contaminant, dirData, columnContaminant + '_') return prediccion1
def dataCorrelacion(contaminant, estacion, fechaInicio, fechaFin, dataMet, dirData, dirTrain, dirFestivos): print('COrrelacion') data_Corr = df.read_csv( '/media/storageBK/AirQualityForecast/Scripts/ContaminationForecast/Data/Correlacion_table.csv', index_col=0) corr_est = data_Corr[estacion].sort_values(ascending=False) estacion_corr = corr_est.index[1] data = fd.readData_corr(fechaInicio, fechaFin, [estacion_corr], contaminant) if data.empty: useClimatology(contaminant, estacion, fechaUltima, fechaFin, dataMet, dirData, dirTrain, dirFestivos) else: data = data.drop_duplicates(keep='first') data = data.reset_index(drop=True) index_values = data.columns.values[1:] for xs in index_values: data.rename(columns={ xs: xs.replace(estacion_corr.lower(), estacion.lower()) }, inplace=True) data = separateDate(data) data = totalUnionData(data, dirFestivos) data = df.concat([data, dataMet], axis=1, join='inner') print(data) #data = data.merge(dataMet, how='left', on='fecha') data = filterData(data, dirData + estacion + "_" + contaminant + ".csv") data = data.fillna(value=-1) index = data.index.values arrayPred = [] for x in index: pred = data.ix[x].values valPred = pred[2:] print(valPred) valNorm = pre.normalize(valPred, estacion, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(estacion, contaminant, arrayPred, dirTrain, dirData) columnContaminant = findTable2(contaminant) real = pre.desNorm(result, estacion, contaminant, dirData, columnContaminant + '_') for xs in range(len(real)): fechaPronostico = data['fecha'].iloc[xs].values fechaPronostico = datetime.strptime(fechaPronostico[1], '%Y-%m-%d %H:%M:%S') pronostico = real[xs] guardarPrediccionRep(estacion, fechaPronostico, [pronostico], contaminant, 5)
def useClimatology(contaminant, estacion, fechaInicio, fechaFinal, dataMet, dirData, dirTrain, dirFestivos): """ function to make the forecast using climatologies :param contaminant: name of the pollutant :type contaminant: String :param estacion: name of the weather station :type estacion: String :param fechaInicio: range of data wit wich the vaues of tue query are extracted :type fechaInicio: datetime :param fechaFinal: range of data wit wich the vaues of tue query are extracted :type fechaFinal: datetime :param dataMet: dataframe with the climatological information :type dataMet: DataFrame """ data = fd.get_climatology(fechaInicio, fechaFinal, estacion) print(data) data = makeDates(fechaInicio, fechaFinal, data) #sys.out print(data) data = data.reset_index(drop=True) print(data) data = separateDate(data) data = totalUnionData(data, dirFestivos) data = df.concat([data, dataMet], axis=1, join='inner') #data = data.merge(dataMet, how='left', on='fecha') data = data.fillna(value=-1) data = filterData(data, dirData + estacion + "_" + contaminant + ".csv") data = data.fillna(value=-1) index = data.index.values arrayPred = [] for x in index: pred = data.ix[x].values valPred = pred[2:] valNorm = pre.normalize(valPred, estacion, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(estacion, contaminant, arrayPred, dirTrain, dirData) columnContaminant = findTable2(contaminant) real = pre.desNorm(result, estacion, contaminant, dirData, columnContaminant + '_') fechaPronostico = fechaInicio for xs in real: print(fechaPronostico) fechaUpdate = fechaPronostico fechaUpdate = fechaUpdate - timedelta(days=1) guardarPrediccion(estacion, fechaUpdate, [xs], contaminant, 1) fechaPronostico = fechaPronostico + timedelta(hours=1) print('Climatologia:' + estacion)
def forecastDate2(station, dirData, dirrDataC, dirTrain, contaminant, columnContaminant, fechaInicio, fechaFin, dirTotalCsv): sta = station name = sta + '_' + contaminant tempData = baseContaminantes(fechaInicio, fechaFin, station, contaminant) if tempData.empty: dataBackup = back(dirData, contaminant) data = dataBackup data = data.fillna(value=-1) data = filterData(data, dirData + name + ".csv") data = data.fillna(value=-1) temp = data.ix[0].values temp = temp[1:] dataPred = pre.normalize(temp, sta, contaminant, dirData) dataPred = convert(dataPred) prediccion = pre.prediction(sta, contaminant, [dataPred], dirTrain, dirData) else: data = tempData.dropna(axis=1, how = 'all') data = data.fillna(value = -1) data = data.reset_index(drop = True) data = separateDate(data) data = unionData(data,dirTotalCsv) data = data.drop_duplicates(keep='first') data = filterData(data,dirData + name + '.csv') data = data.fillna(value = -1) dataTemp = data['fecha'] index = data.index.values arrayPred = [] for x in index: pred = data.ix[x].values valPred = pred[1:] valNorm = pre.normalize(valPred, sta, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(sta,contaminant,arrayPred, dirTrain,dirData) real = desNorm(result, sta, contaminant, dirData, columnContaminant) dataPrediccion = real savePrediccion(station, dataPrediccion, contaminant, dataTemp)
def forecastDate(station, dirData, dirrDataC, dirTrain, contaminant, columnContaminant, fechaInicio, fechaFin): """ function to make the forecast of a whole year and graph it :param station: name the station :type station: String :param dirData: address of the files with training information :type dirData: String :param dirTrain: address of the training files of the neural network :type dirTrain: String :param columnContaminant:name of the pollutant in the DataFrame :type columnContaminant: String :param fechaInicio: start date of the forecast :type fechaInicio: date :param fechaFin: end date of the forecast :type fechaFin: date """ sta = station name = sta + '_' + contaminant temp = df.read_csv(dirrDataC + name + '.csv') # we load the data in the Variable data temp = temp.fillna(value=-1.0) data = temp[(temp['fecha'] <= fechaFin) & (temp['fecha'] >= fechaInicio)] data = data.reset_index(drop=True) data = filterData(data, dirData + name + '.csv') data = data.fillna(value=-1.0) dataTemp = data['fecha'].values print(dataTemp) nameColumn = columnContaminant +'_'+ sta + '_delta' index = data.index.values arrayPred = [] for x in index: pred = data.ix[x].values valPred = pred[1:] valNorm = pre.normalize(valPred, sta, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(sta, contaminant, arrayPred, dirTrain + contaminant + '/', dirData) real = desNorm(result, sta, contaminant, dirData, columnContaminant) dataPrediccion = real savePrediccion(station, dataPrediccion, contaminant, dataTemp)
def trial(station, dirData, dirrDataC, dirGraficas, dirTrain, contaminant, columnContaminant, fechaInicio, fechaFin): """ function to make the forecast of a whole year and graph it :param station: name the station :type station: String :param dirData: address of the files with training information :type dirData: String :param dirGraficas: address where the graphics are saved :type dirGraficas: String :param dirTrain: address of the training files of the neural network :type dirTrain: String :param columnContaminant:name of the pollutant in the DataFrame :type columnContaminant: String :param fechaInicio: start date of the forecast :type fechaInicio: date :param fechaFin: end date of the forecast :type fechaFin: date """ sta = station name = sta + '_' + contaminant temp = df.read_csv(dirrDataC + name + '.csv') # we load the data in the Variable data temp = temp.fillna(value=-1.0) data = temp[(temp['fecha'] <= fechaFin) & (temp['fecha'] >= fechaInicio)] data = data.reset_index(drop=True) data = filterData(data, dirData + name + '.csv') data = data.fillna(value=-1.0) tempBuild = df.read_csv(dirrDataC + name + '_pred.csv') # we load the data in the Variable build tempBuild = tempBuild.fillna(value=-1.0) build = tempBuild[(tempBuild['fecha'] <= fechaFin) & (tempBuild['fecha'] >= fechaInicio)]; build = build.reset_index(drop=True) build = build.fillna(value=-1.0) l = xlabel(data) labels = l[0] location = l[1] print(labels) if (station == 'SAG') | (station == 'UIZ'): #loc = labels.index('Marzo') #lugar = location[loc] + 1 #nombre = labels[loc] nombre = 'anio' else: print('no mes') #loc = labels.index('Marzo') #lugar = location[loc] + 1 #nombre = labels[loc] nombre = 'anio' arrayPred = [] nameColumn = columnContaminant +'_'+ sta + '_delta' inf = build[nameColumn].values index = data.index.values for x in index: pred = data.ix[x].values valPred = pred[1:] valNorm = pre.normalize(valPred, sta, contaminant, dirData) arrayPred.append(convert(valNorm)) result = pre.prediction(sta, contaminant, arrayPred, dirTrain, dirData) real = desNorm(result, sta, contaminant, dirData, columnContaminant) #metri.append(metricas(inf, real, station)) plt.figure(figsize=(22.2, 11.4)) plt.plot(inf, color='tomato', linestyle="solid", marker='o', label='Valor observado.'); plt.plot(real, color='darkgreen', linestyle='solid', marker='o', label='Pronóstico 24h NN.'); plt.title(nombreEst(station) + ' (' + station + ') comparación de ' + contaminant+' observado vs red neuronal' + ' para la primer semana de ' + nombre + ' 2016' ,fontsize=25, y=1.1 ) plt.xlabel('Fecha', fontsize=18) #n = 'Primera semana de '+nombre #plt.xlabel(n,fontsize=22); plt.ylabel('Partes por millon (PPM)', fontsize=22) plt.legend(loc='best') plt.grid(True, axis='both', alpha= 0.3, linestyle="--", which="both") # plt.xticks(location,labels,fontsize=8,rotation=80) plt.xticks(location,labels,fontsize=16,rotation=80) #plt.xlim(lugar,lugar+144); plt.axhspan(20, 40, color='lightgray', alpha=0.3) plt.axhspan(60, 80, color='lightgray', alpha=0.3) plt.axhspan(100, 120, color='lightgray', alpha=0.3) plt.gca().spines['bottom'].set_color('dimgray') plt.gca().spines['left'].set_visible(False) plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.tight_layout() plt.savefig(dirGraficas + station + '_' + nombre + '.png') plt.show(); plt.clf(); plt.close()