def saveData2(listEstations, startDate, nameContaminant, endDate, dirr,
              dirTotalCsv, contaminant):
    """
    Function for the save data in the type file .csv

    :param listEstations: list with stations
    :type listEstations: String list
    :param startDate: start date
    :type startDate: date
    :param nameContaminant: name of the pollutant in the database
    :type nameContaminant: String
    :param endDate: end date
    :type endDate: date
    :param dirr: direction of save data
    :type dirr: String
    :param dirTotalCsv: address of the cvs files
    :type dirTotalCsv: String
    :param contaminant: name pollutant
    """
    #createFile()
    est = listEstations
    tam = len(est) - 1
    i = 0
    while i <= tam:  # 21
        print(est[i])
        print(startDate[i])
        nameDelta = nameContaminant + est[i] + '_delta'
        nameD = est[i] + '_' + contaminant + '.csv'
        nameB = est[i] + '_' + contaminant + '_pred.csv'
        tempData = fd.readData(startDate[i], endDate, [est[i]], contaminant)
        tempBuild = fd.buildClass2(tempData, [est[i]], contaminant, 24,
                                   startDate[i], endDate)
        temAllData = tempData.dropna(axis=1, how='all')
        # allD = temAllData.dropna(axis=0,how='any')
        allD = temAllData.fillna(value=-1)
        allD = allD.reset_index()
        allD = allD.drop(labels='index', axis=1)
        allData = allD.merge(tempBuild, how='left', on='fecha')
        build = df.DataFrame(allData['fecha'], columns=['fecha'])
        val = df.DataFrame(allData[nameDelta], columns=[nameDelta])
        build[nameDelta] = val
        data = allData.drop(labels=nameDelta, axis=1)
        data = data.reset_index()
        build = build.reset_index()
        build = build.drop(labels='index', axis=1)
        data = data.drop(labels='index', axis=1)
        dataTemp = separateDate(data)
        dataTemp2 = unionData(dataTemp, dirTotalCsv)
        maxAndMinValues(dataTemp2, est[i], contaminant, dirr)
        data = dataTemp2
        data = data.drop_duplicates(keep='first')
        build = build.drop_duplicates(keep='first')
        build = filterData(data, build)
        data.to_csv(dirr + nameD, encoding='utf-8', index=False
                    )  # save the data in file "data/[station_contaminant].csv"
        build.to_csv(
            dirr + nameB, encoding='utf-8', index=False
        )  # save the data in file "data/[station_contaminant_pred].csv]
        i += 1
Exemple #2
0
def training(fechaAyer, estacion, dirTrain, dirData, dirCsv, dirFestivos,
             variables, contaminant):
    """
    function to train the neural network with the information of 24 hours before

    :param fechaAyer: date of the previous day
    :type fechaAyer: date
    :param estacion: name the station
    :type estacion: String
    :param dirData: address of the files with training information
    :type dirData: String
    :param dirTrain: address of the training files of the neural network
    :type dirTrain: String
    :param dirFestivos: address of the file with the holidays
    :type dirFestivos: String
    :param dirCsv: Address of processed meteorology archives
    :type dirCsv : String
    :param variables: meteorological variables
    :type variables: string list
    """
    print(estacion)
    fecha = str(fechaAyer.year) + '/' + numString(
        fechaAyer.month) + '/' + numString(fechaAyer.day) + ' ' + numString(
            fechaAyer.hour) + ':00:00'
    fechaMet = str(fechaAyer.year) + "-" + numString(
        fechaAyer.month) + "-" + numString(fechaAyer.day)
    fechaBuild = str(fechaAyer.year) + "/" + numString(
        fechaAyer.month) + "/" + numString(fechaAyer.day)
    data = fd.readData(fecha, fecha, [estacion], contaminant)
    build = fd.buildClass2(data, [estacion], contaminant, 24, fechaBuild,
                           fechaBuild)
    if data.empty:
        print("No se puede hacer el entrenamiento")
    else:
        dataMet = unionMeteorologia(fechaMet, fechaAyer, dirCsv, variables)
        dataMet = dataMet.drop('fecha', axis=1)
        data = separateDate(data)
        data = unionData(data, fechaAyer, dirFestivos)
        data = df.concat([data, dataMet], axis=1)
        data = filterData(data,
                          dirData + estacion + "_" + contaminant + ".csv")
        data = data.fillna(value=-1)
        xy_values = an(data, build, contaminant)  # preprocessing
        tr.training(xy_values[0], xy_values[1], estacion, dirTrain,
                    contaminant, dirData)
def tiempo():
    time_cpu = []
    time_gpu = []
    time_base = []
    start = datetime.strptime(startDate[20], '%Y/%m/%d')
    end = datetime.strptime(endDate, '%Y/%m/%d')
    dy = 8760 * 2
    estation = est[20]
    date = start + timedelta(hours=dy)
    while date <= end:
        sDate = date.strftime('%Y/%m/%d')
        initData = time()
        data = FormatData.readData(start, date, [estation], contaminant)
        build = FormatData.buildClass2(data, [estation], contaminant, 24,
                                       startDate[20], sDate)
        xy_values = an(data, build, contaminant)
        finData = time()
        initCpu = time()
        temp_loss = nn(xy_values[0], xy_values[1], xy_values[2], 1000,
                       estation, contaminant)
        loss_vec.append(temp_loss)
        finCpu = time()
        initGpu = time()
        temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], 1000)
        loss_vec.append(temp_loss)
        finGpu = time()
        totalCpu = finCpu - initCpu
        totalGpu = finGpu - initGpu
        totalBase = finData - initData
        time_base.append(totalBase)
        time_cpu.append(totalCpu)
        time_gpu.append(totalGpu)
        date = date + timedelta(hours=dy)
    plt.plot(time_base, 'g-', label='time Data base')
    plt.plot(time_cpu, 'k-', label='time CPU')
    plt.plot(time_gpu, 'r-', label='time GPU')
    plt.title('GPU vs CPU')
    plt.xlabel('Years')
    plt.ylabel('Time')
    plt.legend(loc='best')
    plt.savefig('tiempo.png', dpi=600)
    plt.show()
def estationsGpu():
    start = startDate[0]
    estation = []
    for x in est:
        estation += [x]
        print(estation)
        data = FormatData.readData(start, endDate, estation, contaminant)
        build = FormatData.buildClass2(data, [est[0]], contaminant, 24, start,
                                       endDate)
        xy_values = an(data, build, contaminant)
        temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], 1000)
        loss_vec.append(temp_loss)
    print(loss_vec)
    plt.plot(loss_vec, 'k-', label='Loss')
    plt.title('Error aumentando el numero de estaciones')
    plt.xlabel('Numero de estaciones')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    plt.savefig("estacionesGpu.png", dpi=600)
    plt.show()
def iterationGpu():
    i = 200
    start = startDate[0]
    estation = est[10]
    data = FormatData.readData(start, endDate, [estation], contaminant)
    build = FormatData.buildClass2(data, [est[10]], contaminant, 24, start,
                                   endDate)
    xy_values = an(data, build, contaminant)
    while i <= 3000:
        temp_loss = nng(xy_values[0], xy_values[1], xy_values[2], i)
        loss_vec.append(temp_loss)
        i = i + 200
        print(i)
    print(loss_vec)
    plt.plot(loss_vec, 'k-', label='Loss')
    plt.title('Error aumentando el numero de iteraciones de entrenamiento')
    plt.xlabel('Numero de iteraciones')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    plt.savefig("iteraciones.png", dpi=600)
    plt.show()
def testData2():
    i = 0
    dataBase_time = []
    file_time = []
    s = []
    while i <= 21:
        s.append(est[i])
        print(s)
        init_dataBase = time()
        data = FormatData.readData(startDate[i], endDate, s, contaminant)
        build = FormatData.buildClass2(data, s, contaminant, 24, startDate[i],
                                       endDate)
        #xy_values = an(data,build, contaminant);
        fin_dataBase = time()
        init_fileTime = time()
        for x in s:
            station = x
            name = station + '_' + contaminant
            data = df.read_csv('data/' + name + '.csv')
            build = df.read_csv('data/' + name + '_pred.csv')
            #xy_values = an(data,build, contaminant);
        fin_fileTime = time()
        total_dataBase = fin_dataBase - init_dataBase
        total_file = fin_fileTime - init_fileTime
        dataBase_time.append(total_dataBase)
        file_time.append(total_file)
        i += 1
    plt.figure(figsize=(12.2, 6.4))
    plt.plot(file_time, 'g-', label='time File')
    plt.plot(dataBase_time, 'r-', label='time DataBase')
    plt.title('DataBase vs File')
    plt.xlabel('stations')
    plt.ylabel('Time (second)')
    plt.legend(loc='best')
    location = np.arange(len(est))
    plt.xticks(location, est, fontsize=7, rotation='vertical')
    plt.savefig('Graficas/tiempoDataBase2.png', dpi=600)
    plt.show()
def estations():
    start = startDate[0]
    estation = []
    for x in est:
        estation += [x]
        print(estation)
        data = FormatData.readData(start, endDate, estation, contaminant)
        build = FormatData.buildClass2(data, [est[0]], contaminant, 24, start,
                                       endDate)
        xy_values = an(data, build, contaminant)
        temp_loss = nn(xy_values[0], xy_values[1], xy_values[2], 1000, est[0],
                       contaminant)
        loss_vec.append(temp_loss)
    print(loss_vec)
    plt.figure(figsize=(12.2, 6.4))
    plt.plot(loss_vec, 'k-', label='Loss')
    plt.title('Error aumentando el numero de estaciones')
    plt.xlabel('Numero de estaciones')
    plt.ylabel('Loss')
    plt.legend(loc='best')
    location = np.range(len(est))
    plt.xticks(location, est, rotation='vertical')
    plt.savefig("estaciones.png", dpi=600)
    plt.show()