Exemple #1
0
                trainOutputs.append([ftrrs[lenInput]])

            for i in range(len(fter)):
                fters = fter[i].split('\t')
                for j in range(len(fters)):
                    fters[j] = float(fters[j])
                testInputs.append(fters[0:lenInput])
                testValOutputs.append(fters[lenInput])

            # scaling trainOutputs and testValOutputs (value' <- value * scalingRate - 1.0)
            for i in range(len(trainOutputs)):
                trainOutputs[i][0] = trainOutputs[i][0] * scalingRate - 1.0

                # consider math range error for sigmoid
                if scalingRate < inf:
                    trainOutputs[i][0] = h.sigmoid(trainOutputs[i][0])
                else:
                    trainOutputs[i][0] = 1.0

            for i in range(len(testValOutputs)):
                testValOutputs[i] = testValOutputs[i] * scalingRate - 1.0

            # print training data
            print('\n <<< training data >>>\n')
            print('TRAIN length: ' + str(len(trainInputs)))
            for i in range(min(100, len(trainInputs))):
                print(str(i) + ' : ' + str(trainInputs[i]))
                print(' -> ' + str(trainOutputs[i]))
            if len(trainInputs) > 100: print('...')

            # print test data
Exemple #2
0
def deepLearning(inputFileName, outputFileName, testFileName,
                 testOutputFileName, testOutputReal, test_report, validRate,
                 valid_report, modelConfig, deviceName, epoch, printed,
                 modelName):

    # You can do only 'training' or 'testing' by setting some arguments as None.
    # inputFileName == None and outputFileName == None -> testing only
    # testFileName == None                             -> training only
    # for validation, you can set testFileName == None <- validation uses training data only

    ##############################
    ##                          ##
    ##       0. READ DATA       ##
    ##                          ##
    ##############################

    # read files
    print('[00] reading train input / train output / test input files...')

    trainI = None
    trainO = None
    testI = None

    # input train data
    if inputFileName != None: trainI = helper.getDataFromFile(inputFileName)

    #  output train data (Sigmoid applied)
    if outputFileName != None: trainO = helper.getDataFromFile(outputFileName)

    # test input data (set nullValue to 0)
    # set testI (array) as testFileName, if testFileName is an array
    if isinstance(testFileName, list):
        testI = testFileName

    # set testI (array) as test data from the file named as testFileName
    else:
        if testFileName != None: testI = helper.getDataFromFile(testFileName)

    # read configuration file (to get normalization info)
    print('[01] reading configuration files...')
    f = open('config.txt', 'r')
    fl = f.readlines()
    f.close()
    for i in range(len(fl)):
        fl[i] = fl[i].split('\n')[0]

    normalizeName = None
    validInterval = 1
    testSizeOnce = 0  # max test data size at once (for both testing and validation)

    # extract configuration
    # trainInput     : train input data file name
    # trainOutput    : train output data file name
    # testInput      : test input data file name
    for i in range(len(fl)):
        configSplit = fl[i].split('\n')[0].split(' ')  # split

        # normalize info file name
        if configSplit[0] == 'normalizeName':
            normalizeName = configSplit[1]
            if normalizeName == 'None': normalizeName = None

        # validation interval
        elif configSplit[0] == 'validInterval':
            validInterval = int(configSplit[1])

        # test input size at once
        elif configSplit[0] == 'testSize':
            testSizeOnce = int(configSplit[1])

    # read normalization info file
    if normalizeName != None and trainO != None:
        print('[02] calculating and writing average and stddev...')

        trainOutputAvg = np.mean(trainO,
                                 axis=0)  # average of train output value
        trainOutputStddev = np.std(trainO,
                                   axis=0)  # stddev of train output value

        # normalize training output data and write avg and stddev
        writeNormalizeInfo(trainO, normalizeName)
    else:
        print('[03] Reading average and stddev failed.')
        trainOutputAvg = None
        trainOutputStddev = None

    # apply sigmoid to train output data
    if trainO != None:
        print('[04] applying sigmoid to train output data...')
        for i in range(len(trainO)):
            for j in range(len(trainO[0])):
                trainO[i][j] = helper.sigmoid(trainO[i][j])

    # print input, output, and test data
    if printed != 0:
        if trainI != None:
            print('\n ---- original input data (' + str(len(trainI)) +
                  ') ----\n')
            for i in range(len(trainI)):
                print(helper.roundedArray(trainI[i], 6))

        if trainO != None:
            print('\n ---- original output data (' + str(len(trainO)) +
                  ') ----\n')
            for i in range(len(trainO)):
                print(helper.roundedArray(trainO[i], 6))

        if testI != None:
            print('\n ---- original test data (' + str(len(testI)) +
                  ') ----\n')
            for i in range(len(testI)):
                print(helper.roundedArray(testI[i], 6))

    ##############################
    ##                          ##
    ##   1. READ MODEL CONFIG   ##
    ##                          ##
    ##############################

    # model design using model configuration file
    # activation function of final layer is always 'sigmoid'
    print('[10] reading model configuration...')
    f = open(modelConfig, 'r')
    modelInfo = f.readlines()
    f.close()

    ##############################
    ##                          ##
    ##   2A. TRAINING / TEST    ##
    ##                          ##
    ##############################

    # if the model already exists, input the test input to the NN and get the result
    # if the model does not exist, newly train NN using training input and output data and then do testing procedure
    if validRate == 0:

        # NN and optimizer
        print('[11] obtaining neural network and optimizer info...')

        if trainI != None and trainO != None:
            NN = helper.getNN(modelInfo, trainI, trainO)  # Neural Network
            op = helper.getOptimizer(modelInfo)  # optimizer
            loss = helper.getLoss(modelInfo)  # loss

        try:  # try reading test.h5 and test.json
            print('[20] reading model [ ' + modelName + ' ]...')
            newModel = deepLearning_GPU.deepLearningModel(
                modelName, op, loss, True)
            testO = getTestResult(newModel, testI, testSizeOnce)

        except:  # do learning if test.h5 and test.json does not exist
            print('[21] learning...')

            # False, True는 각각 dataPrint(학습데이터 출력 여부), modelPrint(model의 summary 출력 여부)
            print(trainO[0])
            deepLearning_GPU.deepLearning(NN, op, 'mean_squared_error', trainI,
                                          trainO, modelName, epoch, False,
                                          True, deviceName)

            print('[22] reading learned model [ ' + modelName + ' ]...')
            newModel = deepLearning_GPU.deepLearningModel(
                modelName, op, loss, True)

            # get test output if testI is not None
            if testI == None:
                print('test input file name (testInput) is None.')
                return
            else:
                testO = getTestResult(newModel, testI, testSizeOnce)

        # test
        print('[23] testing...')

        # estimate

        # inverse sigmoid
        for i in range(len(testO)):  # for each output data
            for j in range(len(testO[0])):  # for each value of output data
                testO[i][j] = helper.invSigmoid(testO[i][j])

        # check if test output exists, before writing test output file
        try:
            test = open(testOutputFileName, 'r')
            test.close()
            print(' **** Delete test output file (' + testOutputFileName +
                  ') first. ****')
            return
        except:
            pass

        # write to file
        print('[24] writing test result to file [ ' + testOutputFileName +
              ' ]...')

        # open file
        f = open(testOutputFileName, 'a')

        result = ''
        for i in range(len(testO)):  # for each output data
            if i % 1000 == 0: print(str(i) + ' / ' + str(len(testO)))

            for j in range(len(testO[0])):  # for each value of output data
                result += str(testO[i][j]) + '\t'
            result += '\n'

            # flush every 10,000 steps
            if i % 10000 == 0:
                f.write(result)
                result = ''

        # final append
        f.write(result)
        f.close()

        ##############################
        ##                          ##
        ##  2A+. WRITE TEST REPORT  ##
        ##                          ##
        ##############################

        # compare prediction output data with real output data and write report
        if testOutputReal != None:
            try:
                writeTestResult(test_report, testOutputFileName,
                                testOutputReal, normalizeName, trainOutputAvg,
                                trainOutputStddev)
            except:
                pass

    ##############################
    ##                          ##
    ##      2B. VALIDATION      ##
    ##                          ##
    ##############################

    # validation (if validation rate > 0)
    else:

        ##############################
        ##                          ##
        ##   2B-0. DATA TO VALID    ##
        ##                          ##
        ##############################

        # make index-list of validation data
        print('[28] deciding data to validate...')
        inputSize = len(trainI)
        validSize = int(inputSize * validRate)
        trainSize = inputSize - validSize

        validArray = []
        for i in range(inputSize):
            validArray.append(0)
        while sum(validArray) < validSize:

            # start index for validation
            validStartIndex = int(
                random.randint(0, inputSize - 1) /
                validInterval) * validInterval

            # set data[validStartIndex : validStartIndex + validInterval] as validation data
            for i in range(validStartIndex, validStartIndex + validInterval):
                validArray[i] = 1

        # make train and validation data
        # _TrainO, _ValidO : sigmoid((originalOutput - meanOriginalOutput)/stdOriginalOutput)
        _TrainI = []  # training input
        _TrainO = []  # training output
        _ValidI = []  # valid input
        _ValidO = []  # valid output

        for i in range(inputSize):
            if validArray[i] == 0:  # training data
                _TrainI.append(trainI[i])
                _TrainO.append(trainO[i])
            else:  # validation data
                _ValidI.append(trainI[i])
                _ValidO.append(trainO[i])

        ##############################
        ##                          ##
        ## 2B-1. TRAIN (MAKE MODEL) ##
        ##                          ##
        ##############################

        # model name for validation
        newModelName = modelName + 'Valid'
        print('[29] training [ ' + newModelName + ' ]...')

        # NN and optimizer
        NN = helper.getNN(modelInfo, _TrainI, _TrainO)  # Neural Network
        op = helper.getOptimizer(modelInfo)  # optimizer
        loss = helper.getLoss(modelInfo)  # loss

        # output for validation
        try:  # try reading the validation model
            validModel = deepLearning_GPU.deepLearningModel(
                newModelName, op, loss, True)
            _predValidO = getTestResult(validModel, _ValidI, testSizeOnce)
        except:  # do learning if the validation model does not exist
            deepLearning_GPU.deepLearning(NN, op, loss, _TrainI, _TrainO,
                                          newModelName, epoch, False, True,
                                          deviceName)
            validModel = deepLearning_GPU.deepLearningModel(
                newModelName, op, loss, True)
            _predValidO = getTestResult(validModel, _ValidI, testSizeOnce)

        ##############################
        ##                          ##
        ##     2B-2. VALIDATION     ##
        ##                          ##
        ##############################
        print('[30] validating and writing result [ ' + valid_report + ' ]...')

        MAE = 0  # mean absolute error
        MSE = 0  # mean square error
        accuracy = 0  # accuracy

        # inverse sigmoid for PREDICTED validation output
        for i in range(len(_predValidO)):  # for each output data
            for j in range(len(
                    _predValidO[0])):  # for each value of output data
                _predValidO[i][j] = helper.invSigmoid(_predValidO[i][j])

        # inverse sigmoid for REAL validation output
        for i in range(len(_ValidO)):  # for each output data
            for j in range(len(_ValidO[0])):  # for each value of output data
                _ValidO[i][j] = helper.invSigmoid(_ValidO[i][j])

        # denormalize if normalized info is available (denormalize whole trainO)
        denormalize(normalizeName, len(_predValidO), len(_predValidO[0]),
                    _predValidO, trainOutputAvg, trainOutputStddev)
        denormalize(normalizeName, len(_ValidO), len(_ValidO[0]), _ValidO,
                    trainOutputAvg, trainOutputStddev)

        # compute error
        validCount = 0
        resultToWrite = ''
        outputCols = len(_ValidO[0])

        # for each data

        # set edgeitems and linewidth as infinite
        np.set_printoptions(edgeitems=10000, linewidth=1000000)

        for i in range(inputSize):
            if i % 1000 == 0: print(str(i) + ' / ' + str(inputSize))

            # validation for data whose value of valid array is 1
            if validArray[i] == 1:

                # compute MAE and MSE
                for j in range(outputCols):
                    MAE += abs(_ValidO[validCount][0] -
                               _predValidO[validCount][0])
                    MSE += pow(
                        _ValidO[validCount][0] - _predValidO[validCount][0], 2)

                # compute accuracy
                if helper.argmax(_ValidO[validCount]) == helper.argmax(
                        _predValidO[validCount]):
                    accuracy += 1

                # print and write result
                newResultToWrite = (
                    '[' + str(i) + '] pred = ' +
                    str(np.round_(_predValidO[validCount], 6)) + ', real = ' +
                    str(np.round_(_ValidO[validCount], 6)))
                resultToWrite += newResultToWrite + '\n'

                validCount += 1

        # recover edgeitems and linewidth
        np.set_printoptions(edgeitems=10000, linewidth=1000000)

        # get the average of MAE, MSE and accuracy
        MAE /= (validSize * outputCols)
        MSE /= (validSize * outputCols)
        accuracy /= validSize

        # print evaluation result
        resultSummary = '----------------\n'
        resultSummary += 'input size : ' + str(inputSize) + '\n'
        resultSummary += 'train size : ' + str(trainSize) + '\n'
        resultSummary += 'valid size : ' + str(validSize) + '\n'
        resultSummary += 'MAE        : ' + str(round(MAE, 6)) + '\n'
        resultSummary += 'MSE        : ' + str(round(MSE, 6)) + '\n'
        resultSummary += 'accuracy   : ' + str(round(accuracy, 6)) + '\n'
        resultSummary += 'pred avg   : ' + str(np.average(_predValidO,
                                                          axis=0)) + '\n'
        resultSummary += 'real avg   : ' + str(np.average(_ValidO,
                                                          axis=0)) + '\n'
        print(resultSummary)
        resultToWrite += resultSummary

        # write result file
        fvalid = open(valid_report, 'w')
        fvalid.write(resultToWrite)
        fvalid.close()

        # return final result
        return (MAE, MSE, accuracy, np.average(_predValidO, axis=0),
                np.average(_ValidO, axis=0))
Exemple #3
0
def deepLearning(inputFileName, outputFileName, testFileName,
                 testOutputFileName, imgHeight, deviceName, epoch, printed,
                 modelName):

    # read files
    trainI = helper.getDataFromFile(inputFileName,
                                    imgHeight)  # input train data
    trainO = helper.getDataFromFile(
        outputFileName, None)  # output train data (Sigmoid applied)
    testI = helper.getDataFromFile(
        testFileName, imgHeight)  # test input data (set nullValue to 0)

    # apply sigmoid to train output data
    for i in range(len(trainO)):
        for j in range(len(trainO[0])):
            trainO[i][j] = helper.sigmoid(trainO[i][j])

    # flatten trainI: (N, size, size) -> (N, size*size)
    for i in range(len(trainI)):
        trainI[i] = helper.flatten(trainI[i])

    print('')
    print(' ---- number of rows ----')
    print('input  size: ' + str(len(trainI)))
    print('output size: ' + str(len(trainO)))
    print('test   size: ' + str(len(testI)))
    print('')

    # print input, output, and test data
    if printed != 0:
        print('\n ---- original input data ----\n')
        for i in range(len(trainI)):
            print(helper.roundedArray(trainI[i], 6))

        print('\n ---- original output data ----\n')
        for i in range(len(trainO)):
            print(helper.roundedArray(trainO[i], 6))

        print('\n ---- original test data ----\n')
        for i in range(len(testI)):
            print(helper.roundedArray(testI[i], 6))

    # model design using deepLearning_model.txt, in the form of
    # activation function of final layer is always 'sigmoid'
    f = open('deepLearning_model.txt', 'r')
    modelInfo = f.readlines()
    f.close()

    # NN and optimizer
    NN = helper.getNN(modelInfo, trainI, trainO)  # Neural Network
    op = helper.getOptimizer(modelInfo)  # optimizer

    try:  # try reading test.h5 and test.json
        newModel = deepLearning_GPU.deepLearningModel(modelName, True)
        testOutput = deepLearning_GPU.modelOutput(newModel, testI)
    except:  # do learning if test.h5 and test.json does not exist
        print('\n <<<< LEARNING >>>>\n')

        # False, True는 각각 dataPrint(학습데이터 출력 여부), modelPrint(model의 summary 출력 여부)
        print(trainO[0])
        deepLearning_GPU.deepLearning(NN, op, 'mean_squared_error', trainI,
                                      trainO, modelName, epoch, False, True,
                                      deviceName)

        newModel = deepLearning_GPU.deepLearningModel(modelName, True)
        testOutput = deepLearning_GPU.modelOutput(newModel, testI)

    # test
    print('\n <<<< TEST >>>>\n')

    # estimate
    outputLayer = testOutput[len(testOutput) - 1]

    # inverse sigmoid
    for i in range(len(outputLayer)):  # for each output data
        for j in range(len(outputLayer[0])):  # for each value of output data
            outputLayer[i][j] = helper.invSigmoid(outputLayer[i][j])

    # write to file
    result = ''
    print('\n<<<< output layer >>>>')
    for i in range(len(outputLayer)):  # for each output data
        for j in range(len(outputLayer[0])):  # for each value of output data
            result += str(outputLayer[i][j]) + '\t'
        result += '\n'
    print(result)

    f = open(testOutputFileName.split('.')[0] + '_prediction.txt', 'w')
    f.write(result)
    f.close()
Exemple #4
0
def deepLearning(inputFileName, outputFileName, testFileName,
                 testOutputFileName, valid, deviceName, epoch, printed,
                 modelName, normalizeTarget):

    # read files
    # trainO : (originalOutput - meanOriginalOutput)/stdOriginalOutput
    trainI = helper.getDataFromFile(inputFileName, None)  # input train data
    trainO = helper.getDataFromFile(
        outputFileName, None)  # output train data (Sigmoid applied)
    testI = helper.getDataFromFile(
        testFileName, None)  # test input data (set nullValue to 0)

    # apply sigmoid to train output data
    # trainO :   sigmoid(normalize(originalOutput))
    #          = sigmoid((originalOutput - meanOriginalOutput)/stdOriginalOutput)
    for i in range(len(trainO)):
        for j in range(len(trainO[0])):
            trainO[i][j] = helper.sigmoid(trainO[i][j])

    # for i in range(15): print(trainO[i])

    print('')
    print(' ---- number of rows ----')
    print('input  size: ' + str(len(trainI)))
    print('output size: ' + str(len(trainO)))
    print('test   size: ' + str(len(testI)))
    print('')

    # print input, output, and test data
    if printed != 0:
        print('\n ---- original input data ----\n')
        for i in range(len(trainI)):
            print(helper.roundedArray(trainI[i], 6))

        print('\n ---- original output data ----\n')
        for i in range(len(trainO)):
            print(helper.roundedArray(trainO[i], 6))

        print('\n ---- original test data ----\n')
        for i in range(len(testI)):
            print(helper.roundedArray(testI[i], 6))

    # model design using deepLearning_model.txt, in the form of
    # activation function of final layer is always 'sigmoid'
    f = open('deepLearning_model.txt', 'r')
    modelInfo = f.readlines()
    f.close()

    # read normalization info
    if normalizeTarget == True:
        fnorm = open('data_normalizeInfo.txt', 'r')
        fnormInfo = fnorm.readlines()
        fnormMean = float(fnormInfo[0].split(' ')[0])  # mean of training data
        fnormStd = float(fnormInfo[0].split(' ')[1])  # stddev of training data

    #### TEST when the value of valid is 0 ####
    if valid == 0:

        # NN and optimizer
        NN = helper.getNN(modelInfo, trainI, trainO)  # Neural Network
        op = helper.getOptimizer(modelInfo)  # optimizer

        #print(trainI[:5])
        #print(trainO[:5])

        try:  # try reading test.h5 and test.json
            newModel = deepLearning_GPU.deepLearningModel(modelName, True)
            testOutput = deepLearning_GPU.modelOutput(newModel, testI)
        except:  # do learning if test.h5 and test.json does not exist
            print('\n <<<< LEARNING >>>>\n')

            # False, True는 각각 dataPrint(학습데이터 출력 여부), modelPrint(model의 summary 출력 여부)
            deepLearning_GPU.deepLearning(NN, op, 'mean_squared_error', trainI,
                                          trainO, modelName, epoch, False,
                                          True, deviceName)

            newModel = deepLearning_GPU.deepLearningModel(modelName, True)
            testOutput = deepLearning_GPU.modelOutput(newModel, testI)

        # test
        print('\n <<<< TEST >>>>\n')

        # estimate
        outputLayer = testOutput[len(testOutput) - 1]

        # inverse sigmoid
        # output:   denormalize(invSigmoid(sigmoid(normalize(originalOutput))))
        #         = denormalize((originalOutput - meanOriginalOutput)/stdOriginalOutput)
        #         = originalOutput
        for i in range(len(outputLayer)):  # for each output data
            for j in range(len(
                    outputLayer[0])):  # for each value of output data
                outputLayer[i][j] = helper.invSigmoid(outputLayer[i][j])
                if normalizeTarget == True:
                    outputLayer[i][
                        j] = outputLayer[i][j] * fnormStd + fnormMean

        # write to file
        result = ''
        print('\n<<<< output layer >>>>')
        for i in range(len(outputLayer)):  # for each output data
            for j in range(len(
                    outputLayer[0])):  # for each value of output data
                result += str(outputLayer[i][j]) + '\t'
            result += '\n'

        f = open(testOutputFileName.split('.')[0] + '_prediction.txt', 'w')
        f.write(result)
        f.close()

        # return final result
        finalResult = []
        for i in range(len(outputLayer)):  # for each output data
            finalResult.append(outputLayer[i][0])

        return finalResult

    #### VALIDATION when the value of valid is >0 ####
    else:

        # make index-list of validation data
        inputSize = len(trainI)
        validSize = int(inputSize * valid)
        trainSize = inputSize - validSize

        validArray = []
        for i in range(inputSize):
            validArray.append(0)
        while sum(validArray) < validSize:
            validArray[random.randint(0, inputSize - 1)] = 1

        # make train and validation data
        # _TrainO, _ValidO : sigmoid((originalOutput - meanOriginalOutput)/stdOriginalOutput)
        _TrainI = []  # training input
        _TrainO = []  # training output
        _ValidI = []  # valid input
        _ValidO = []  # valid output

        for i in range(inputSize):
            if validArray[i] == 0:  # training data
                _TrainI.append(trainI[i])
                _TrainO.append(trainO[i])
            else:  # validation data
                _ValidI.append(trainI[i])
                _ValidO.append(trainO[i])

        # model name for validation
        newModelName = modelName + 'Valid'

        # NN and optimizer
        NN = helper.getNN(modelInfo, _TrainI, _TrainO)  # Neural Network
        op = helper.getOptimizer(modelInfo)  # optimizer

        # output for validation
        try:  # try reading testValid.h5 and test.json
            validModel = deepLearning_GPU.deepLearningModel(newModelName, True)
            predictedValidO = deepLearning_GPU.modelOutput(validModel, _ValidI)
        except:  # do learning if testValid.h5 and test.json does not exist
            print('\n <<<< LEARNING >>>>\n')

            # False, True는 각각 dataPrint(학습데이터 출력 여부), modelPrint(model의 summary 출력 여부)
            # _TrainO : sigmoid((originalOutput - meanOriginalOutput)/stdOriginalOutput)
            deepLearning_GPU.deepLearning(NN, op, 'mean_squared_error',
                                          _TrainI, _TrainO, newModelName,
                                          epoch, False, True, deviceName)

            validModel = deepLearning_GPU.deepLearningModel(newModelName, True)
            predictedValidO = deepLearning_GPU.modelOutput(validModel, _ValidI)

        # evaluation
        print('\n <<<< VALID >>>>\n')

        MAE = 0  # mean absolute error
        MSE = 0  # mean square error
        accuracy = 0  # accuracy

        # predicted validation output
        outputLayer = predictedValidO[len(predictedValidO) - 1]

        # inverse sigmoid
        # output :   invSigmoid(sigmoid(normalize(originalOutput)))
        #          = (originalOutput - meanOriginalOutput)/stdOriginalOutput
        for i in range(len(outputLayer)):  # for each output data
            for j in range(len(
                    outputLayer[0])):  # for each value of output data
                outputLayer[i][j] = helper.invSigmoid(outputLayer[i][j])

        # compute error
        # output  : denormalize((originalOutput - meanOriginalOutput)/stdOriginalOutput)
        #           = originalOutput
        # _Valid0 : denormalize(invSigmoid(sigmoid((originalOutput - meanOriginalOutput)/stdOriginalOutput)))
        #           = denormalize((originalOutput - meanOriginalOutput)/stdOriginalOutput)
        #           = originalOutput
        for i in range(len(outputLayer)):  # for each output data
            for j in range(len(
                    outputLayer[0])):  # for each value of output data
                _ValidO[i][j] = helper.invSigmoid(_ValidO[i][j])
                if normalizeTarget == True:
                    _ValidO[i][j] = _ValidO[i][j] * fnormStd + fnormMean
                    outputLayer[i][
                        j] = outputLayer[i][j] * fnormStd + fnormMean

        # compute error
        validCount = 0
        resultToWrite = ''
        for i in range(inputSize):
            if validArray[i] == 1:

                # compute errors and accuracy
                thisAE = abs(_ValidO[validCount][0] -
                             outputLayer[validCount][0])
                thisSE = pow(
                    _ValidO[validCount][0] - outputLayer[validCount][0], 2)
                MAE += thisAE
                MSE += thisSE
                if thisSE <= 0.5: accuracy += 1

                # print and write result
                newResultToWrite = ('[' + str(i) + '] pred = ' +
                                    str(int(outputLayer[validCount][0])) +
                                    ', real = ' +
                                    str(int(_ValidO[validCount][0])) +
                                    ', AE = ' + str(int(thisAE)) + ', SE = ' +
                                    str(int(thisSE)))
                resultToWrite += newResultToWrite + '\n'
                print(newResultToWrite)

                validCount += 1

        MAE /= validSize
        MSE /= validSize
        accuracy /= validSize

        # print evaluation result
        resultSummary = ''
        resultSummary += 'input size : ' + str(inputSize) + '\n'
        resultSummary += 'train size : ' + str(trainSize) + '\n'
        resultSummary += 'valid size : ' + str(validSize) + '\n'
        resultSummary += 'MAE        : ' + str(round(MAE, 6)) + '\n'
        resultSummary += 'MSE        : ' + str(round(MSE, 6)) + '\n'
        resultSummary += 'accuracy   : ' + str(round(accuracy, 6)) + '\n'
        resultSummary += 'pred avg   : ' + str(np.average(outputLayer,
                                                          axis=0)) + '\n'
        resultSummary += 'real avg   : ' + str(np.average(_ValidO,
                                                          axis=0)) + '\n'
        print(resultSummary)
        resultToWrite += resultSummary

        # write result file
        fvalid = open('data_valid_result.txt', 'w')
        fvalid.write(resultToWrite)
        fvalid.close()
def run(size, numTrain, numTest, numWD, deviceName, doTrainAndTest):

    inputs = []  # 딥러닝 입력값 저장
    outputs = []  # 딥러닝 출력값 저장

    isFile = False  # valueMaze_X.json 학습 모델 파일이 있는가?

    # 입력 정보
    originalScreen = []  # 각 맵의 스크린
    wdList = [
    ]  # 각 맵에서 wireless device의 위치를 저장한 배열, [[wd0Y, wd0X], [wd1Y, wd1X], ..., [wd(n-1)Y, wd(n-1)X]]

    # 출력 정보
    outputScreen = [
    ]  # 각 맵에서의 각 좌표에서의 throughput을 나타낸 맵, [[[thrput(0,0), thrput(0,1), ...], [thrput(1,0), ...], ...], (so on)]

    # 평가 결과 저장
    RL = []

    # 1. 트레이닝 및 테스트용 맵 생성
    # 맵의 구성: .(공간), H(HAP), W(wireless device)
    if readOrWrite == 1:  # 맵 파일 쓰고 그 맵으로 트레이닝, 테스트
        print('generating maps...')
        for i in range(numTrain + numTest):
            initScreen_ = WPCN_helper_REAL.initScreen(size, False, numWD)

            originalScreen.append(initScreen_[0])
            wdList.append(initScreen_[2])

            # 맵 파일 작성
            f = open(
                'originalMaps_' + str(size) + '_' + str(numWD) + '/DL_WPCN_' +
                ('0' if i < 1000 else '') + ('0' if i < 100 else '') +
                ('0' if i < 10 else '') + str(i) + '.txt', 'w')
            for j in range(size):
                for k in range(size):
                    if originalScreen[i][j][k] == -1:
                        f.write('W')  # wireless device
                    elif originalScreen[i][j][k] == 0:
                        f.write('.')  # space
                    elif originalScreen[i][j][k] == 1:
                        f.write('H')  # HAP (not exist in the original map)
                f.write('\n')
            f.close()

    else:  # 기존 맵 파일 읽어서 기존 맵으로 트레이닝, 테스트
        print('reading maps...')
        for i in range(numTrain + numTest):
            f = open(
                'originalMaps_' + str(size) + '_' + str(numWD) + '/DL_WPCN_' +
                ('0' if i < 1000 else '') + ('0' if i < 100 else '') +
                ('0' if i < 10 else '') + str(i) + '.txt', 'r')
            map_ = f.readlines()  # 맵을 나타낸 배열
            f.close()
            for j in range(len(map_)):
                map_[j] = map_[j].replace('\n', '')  # 각 줄마다 개행문자 제거

            # originalScreen과 wdList 읽어오기
            wdListTemp = []
            thisScreen = [[0] * size
                          for j in range(size)]  # originalScreen에 추가할 맵(스크린)
            for j in range(size):
                for k in range(size):
                    if map_[j][k] == 'W':
                        thisScreen[j][k] = -1  # wireless device
                        wdListTemp.append([j, k])  # wdListTemp에 추가
                    elif map_[j][k] == '.':
                        thisScreen[j][k] = 0  # space
                    elif map_[j][k] == 'H':
                        thisScreen[j][
                            k] = 1  # HAP (not exist in the original map)

            originalScreen.append(thisScreen)
            wdList.append(wdListTemp)

    # print(wdList)

    # 2. 트레이닝 및 테스트용 맵에서 각 x, y (HAP의 좌표)에 대해 최적의 할당 시간(HAPtime)을 찾아서,
    # HAP의 각 좌표에서의 throughput에 대한 맵 만들기 (optiInfoForMap_X.txt where X = 0 or 1)
    # y좌표, x좌표는 1 간격

    # 형식:
    # * (맵 번호) (HAPtime) (최적y) (최적x) (최대thrput)
    # (thrput at y=0,x=0) (thrput at y=0,x=1) ... (thrput at y=0,x=n)
    # (thrput at y=1,x=0) (thrput at y=1,x=1) ... (thrput at y=1,x=n)
    # ...                 ...                     ...
    # (thrput at y=n,x=0) (thrput at y=n,x=1) ... (thrput at y=n,x=n)
    toSave = ''

    lines = 0  # optiInfoForMap_X.txt 파일의 라인 개수 (X = 0 or 1)

    try:  # optiInfoForMap 파일이 있으면 읽기
        f = open(
            'optiInfoForMap/optiInfoForMap_' + str(problemNo) + '_forPaper_' +
            str(size) + '_' + str(numWD) + '.txt', 'r')
        optiInformation = f.readlines()
        f.close()

        lines = len(optiInformation)  # 데이터 개수는 lines / (1 + size)

        if lines / (1 + size) < numTrain + numTest:  # 완전한 정보가 없으면
            for i in range(lines):
                toSave += optiInformation[i]  # toSave에 기존 저장된 정보 추가
            raiseError = 1 / 0  # 오류 발생시키기

        # 출력값 배열에 기존 파일에 있는 추가
        temp = []  # 출력값 배열에 추가할 데이터
        for i in range(lines):

            # '*' 기호 (새로운 맵)를 만나면 기존 temp 배열의 데이터를 저장하고 temp는 초기화
            if optiInformation[i][0] == '*':
                if len(temp) > 0: outputScreen.append(temp)
                temp = []
            # 그렇지 않으면 temp에 해당 행의 데이터를 추가
            elif len(optiInformation[i]) >= 3:  # 공백 또는 개행만 있는 줄 제외
                temp.append(optiInformation[i].split(' '))
                for j in range(len(temp[len(temp) - 1])):
                    temp[len(temp) - 1][j] = float(temp[len(temp) - 1][j])

    except Exception as e:  # optiInfoForMap 파일이 없으면 새로 생성하기
        print(e)
        print("can't read optiInfoForMap_" + str(problemNo) + '_forPaper_' +
              str(size) + '_' + str(numWD) + '.txt')

        for i in range(int(lines / (1 + size)), numTrain + numTest):
            print('finding max throughput for map ' + str(i) + '...')

            # HAP의 위치(각 좌표)에 따른 최적의 할당 시간을 찾아서 출력값에 추가
            temp = [
                [0] * size for _ in range(size)
            ]  # 출력값 배열에 추가할 데이터 (해당 x, y 좌표에서의 HAPtime에 따른 최대 throughput)

            optiY_ = 0  # throughput (sum 또는 common)이 최대가 되는 HAP의 y좌표
            optiX_ = 0  # throughput (sum 또는 common)이 최대가 되는 HAP의 x좌표
            maxThroughput_ = 0.0  # throughput (sum 또는 common)의 최댓값
            maxHAPtime_ = 0.0  # throughput (sum 또는 common)이 최대가 되기 위한 HAP에 할당되는 시간

            for y in range(size):
                for x in range(size):
                    (throughput, HAPtime) = WPCN_helper_REAL.getThroughput(
                        wdList[i], [y, x], size, problemNo)
                    temp[y][x] = throughput

                    # throughput 최고기록을 갱신한 경우 업데이트
                    if throughput > maxThroughput_:
                        optiY_ = y
                        optiX_ = x
                        maxThroughput_ = throughput
                        maxHAPtime_ = HAPtime

            # toSave에 추가
            # * (맵 번호) (HAPtime) (최적y) (최적x) (최대thrput)
            # (thrput at y=0,x=0) (thrput at y=0,x=1) ... (thrput at y=0,x=n)
            # (thrput at y=1,x=0) (thrput at y=1,x=1) ... (thrput at y=1,x=n)
            # ...                 ...                     ...
            # (thrput at y=n,x=0) (thrput at y=n,x=1) ... (thrput at y=n,x=n)
            toSave += '* ' + str(i) + ' ' + str(maxHAPtime_) + ' ' + str(
                optiY_) + ' ' + str(optiX_) + ' ' + str(maxThroughput_) + '\n'
            for j in range(size):
                for k in range(size - 1):
                    toSave += str(temp[j][k]) + ' '
                toSave += str(temp[j][size - 1]) + '\n'

            # 출력 및 toSave에 추가
            print('max throughput: ' + str(maxThroughput_) + ' axis: ' +
                  str(optiY_) + ' ' + str(optiX_) + ' HAPtime: ' +
                  str(maxHAPtime_))

            # 출력값 배열에 추가
            if i < numTrain: outputScreen.append(temp)

            # 파일로 저장
            optiInfo = open(
                'optiInfoForMap/optiInfoForMap_' + str(problemNo) +
                '_forPaper_' + str(size) + '_' + str(numWD) + '.txt', 'w')
            optiInfo.write(toSave)
            optiInfo.close()

    # 3. 트레이닝용 맵의 정보를 입력값과 출력값 배열에 넣기
    print('make input and output data...')

    for i in range(numTrain):
        originalScreen_ = deepLearning_GPU_helper.arrayCopy(
            originalScreen[i])  # copy array from originalScreen
        originalScreen_ = deepLearning_GPU_helper.arrayCopyFlatten(
            originalScreen_, 0, size, 0, size, None)  # flatten
        inputs.append(originalScreen_)

        # find max value in outputScreen[i]
        maxVal = 0.0
        for j in range(size):
            for k in range(size):
                if outputScreen[i][j][k] > maxVal:
                    maxVal = outputScreen[i][j][k]
        for j in range(size):
            for k in range(size):
                outputScreen[i][j][
                    k] /= maxVal  # uniform [0, maxVal] -> uniform [0, 1]
                outputScreen[i][j][k] = outputScreen[i][j][
                    k] * 2.0 - 1.0  # uniform [0, 1] -> uniform [-1, 1]

        # 테스트 시 역sigmoid를 적용할 것이므로 먼저 outputScreen의 모든 원소에 sigmoid를 적용 (outputScreen의 값이 1 이상이 될수 있으므로)
        for j in range(size):
            for k in range(size):
                outputScreen[i][j][k] = deepLearning_GPU_helper.sigmoid(
                    outputScreen[i][j][k])

        outputs.append([outputScreen[i]])

    # 트레이닝, 테스트를 하지 않고 종료
    if doTrainAndTest == False: return

    # 4~7. 학습 및 테스트는 주어진 입출력 데이터를 이용하여 계속 반복
    while True:
        epoch = input('epoch').split(
            ' '
        )  # 입력형식: epoch0 epoch1 epoch2 ... epochN (ex: 5 15 30 100 200 300 500 1000)
        dropout = input('dropout').split(
            ' '
        )  # 입력형식: dropout0, dropout1, dropout2, ..., dropoutN (ex: 0 0.05 0.1 0.15 0.2)

        # 딥러닝 학습시키기
        lc = 'mean_squared_error'  # loss calculator

        for epo in range(len(epoch)):  # 각 epoch에 대하여
            for dro in range(len(dropout)):  # 각 dropout에 대하여 실험

                epoc = int(epoch[epo])
                drop = float(dropout[dro])

                while True:

                    # 실험 결과
                    resultForThisMap = ''
                    WPCNdeepNN_modelName = 'WPCNdeepNN_' + str(
                        size) + '_' + str(numWD)

                    try:
                        # 4. 모델 정의
                        NN = [
                            tf.keras.layers.Reshape(
                                (size, size, 1), input_shape=(size * size, )),
                            keras.layers.Conv2D(32,
                                                kernel_size=(3, 3),
                                                input_shape=(size, size, 1),
                                                activation='relu'),
                            keras.layers.MaxPooling2D(pool_size=2),
                            keras.layers.Dropout(drop),
                            keras.layers.Conv2D(32, (3, 3), activation='relu'),
                            keras.layers.Flatten(),
                            keras.layers.Dropout(drop),
                            keras.layers.Dense(40, activation='relu'),
                            keras.layers.Dense(size * size,
                                               activation='sigmoid'),
                            keras.layers.Reshape((1, size, size),
                                                 input_shape=(size * size, ))
                        ]

                        # 5. 옵티마이저
                        if problemNo == 0:
                            op = tf.keras.optimizers.Adam(0.001)  # for Sum-
                        else:
                            op = tf.keras.optimizers.Adam(
                                0.0001)  # for Common-

                        print('[00] training...')
                        deepLearning_GPU.deepLearning(NN, op, lc, inputs,
                                                      outputs,
                                                      WPCNdeepNN_modelName,
                                                      epoc, False, True,
                                                      deviceName)

                        # 6. 테스트 데이터에 대한 출력 결과 확인하고 정답과 비교하기
                        print('[10] loading model...')
                        newModel = deepLearning_GPU.deepLearningModel(
                            WPCNdeepNN_modelName, True)

                        print('[20] init throughput values...')
                        sumTestThroughput = 0.0  # test throughput의 합계
                        sumCorrectMaxThroughput = 0.0  # 정답의 throughput의 합계 (training data를 생성할 때와 같은 방법으로 최대 throughput 확인)

                        optiInfo = open(
                            'optiInfoForMap/optiInfoForMap_' + str(problemNo) +
                            '_forPaper_' + str(size) + '_' + str(numWD) +
                            '.txt', 'r')
                        optiInformation = optiInfo.readlines()
                        optiInfo.close()

                        print('[30] testing...')

                        for i in range(numTest):
                            if i == 0: print('[31] initializing...')
                            testScreen = originalScreen[numTrain +
                                                        i]  # 테스트할 스크린
                            testOutput = deepLearning_GPU.modelOutput(
                                newModel,
                                [np.array(testScreen).reshape(size * size)
                                 ])  # 테스트 결과
                            testOutputLayer = testOutput[
                                len(testOutput) - 1]  # 테스트 결과의 output layer의 값

                            # 6-0. 테스트 결과 확인
                            # 출력값 받아오기
                            if i == 0: print('[32] getting optiScreen...')
                            optiScreen = [[0] * size for j in range(size)
                                          ]  # 테스트 결과의 출력 스크린(=맵)
                            for j in range(size):
                                for k in range(size):
                                    optiScreen[j][
                                        k] = deepLearning_GPU_helper.invSigmoid(
                                            testOutputLayer[0][0][j][k])

                            if i == 0:
                                print('[33] finding optimal X and Y values...')
                            # 출력값에서 최적의 HAP의 x, y좌표 찾기
                            optiY_test = 0  # 출력 map에서의 최적의 HAP의 y좌표
                            optiX_test = 0  # 출력 map에서의 최적의 HAP의 x좌표
                            optiThroughput = 0  # 출력 map에서의 최적의 HAP의 x, y좌표에서의 throughput
                            for j in range(size):
                                for k in range(size):
                                    if optiScreen[j][k] > optiThroughput:
                                        optiY_test = j
                                        optiX_test = k
                                        optiThroughput = optiScreen[j][k]

                            # 상하/좌우의 throughput의 값을 이용하여 좌표 보정
                            if i == 0:
                                print('[34] modifying X and Y values...')
                            optiY = optiY_test
                            optiX = optiX_test

                            # y좌표를 (y-1), y, (y+1) 좌표의 throughput 값을 이용하여 보정
                            if optiY > 0 and optiY < size - 1:

                                up = optiScreen[optiY - 1][optiX]
                                this = optiScreen[optiY][optiX]
                                down = optiScreen[optiY + 1][optiX]
                                minVal = min(up, this, down)

                                up -= minVal
                                this -= minVal
                                down -= minVal

                                optiY_test += (up * (-1) +
                                               down * 1) / (up + this + down)

                            # x좌표를 (x-1), x, (x+1) 좌표의 throughput 값을 이용하여 보정
                            if optiX > 0 and optiX < size - 1:

                                left = optiScreen[optiY][optiX - 1]
                                this = optiScreen[optiY][optiX]
                                right = optiScreen[optiY][optiX + 1]
                                minVal = min(left, this, right)

                                left -= minVal
                                this -= minVal
                                right -= minVal

                                optiX_test += (left * (-1) + right * 1) / (
                                    left + this + right)

                            # 출력값 중 HAP의 좌표를 범위 내로 조정
                            if i == 0:
                                print(
                                    '[35] setting X and Y values as inside the range...'
                                )
                            if optiY_test > size - 1: optiY_test = size - 1
                            elif optiY_test < 0: optiY_test = 0

                            if optiX_test > size - 1: optiX_test = size - 1
                            elif optiX_test < 0: optiX_test = 0

                            # 테스트 결과 받아오기
                            if i == 0: print('[36] getting test results...')
                            (throughput,
                             HAPtime) = WPCN_helper_REAL.getThroughput(
                                 wdList[numTrain + i],
                                 [optiY_test, optiX_test], size, problemNo)
                            sumTestThroughput += throughput

                            # 6-1. testScreen과 optiScreen 중 일부 출력
                            if i == 0: print('[37] printing...')
                            if i < 10:  # 처음 10개의 map에 대해서만 출력
                                # testScreen 출력
                                print(' --- test input screen for map ' +
                                      str(i) + ' ---')
                                toPrint = ''
                                for j in range(size):
                                    toPrint += '['
                                    for k in range(size):
                                        if testScreen[j][k] == -1:
                                            toPrint += 'W  '
                                        else:
                                            toPrint += '.  '
                                    toPrint += ']\n'
                                print(toPrint)

                                # optiScreen 출력
                                print(' --- test output screen for map ' +
                                      str(i) + ' ---')
                                toPrint = ''
                                for j in range(size):
                                    toPrint += '['
                                    for k in range(size):
                                        toPrint += str(
                                            round(optiScreen[j][k], 3)
                                        ) + ' ' * (7 - len(
                                            str(round(optiScreen[j][k], 3))))
                                    toPrint += ']\n'
                                print(toPrint)

                            # 6-2. 정답과 비교
                            # 최적의 HAP의 위치 및 할당 시간 찾기
                            # HAP의 위치 및 할당 시간에 따른 throughput의 최댓값
                            if i == 0:
                                print('[38] comparing with correct answer...')
                            correctMaxThroughput = float(
                                optiInformation[(numTrain + i) *
                                                (size + 1)].split(' ')[5])
                            sumCorrectMaxThroughput += correctMaxThroughput

                            # 정답과 비교
                            resultForThisMap += (
                                'test throughput for map ' + str(i) + ' [Y=' +
                                str(round(optiY_test, 3)) + ' X=' +
                                str(round(optiX_test, 3)) + ' HT=' +
                                str(HAPtime) + '] : ' +
                                str(round(throughput, 6)) + ' / ' +
                                str(round(correctMaxThroughput, 6)) + ' (' +
                                str(
                                    round(
                                        100.0 * throughput /
                                        correctMaxThroughput, 6)) + ' %)\n')
                            if i < 10: print('')
                            else: print(i)

                        # 7. 평가
                        print('[40] evaluating...')
                        percentage = round(
                            100.0 * sumTestThroughput /
                            sumCorrectMaxThroughput, 6)
                        print('size:' + str(size) + ' train:' + str(numTrain) +
                              ' test:' + str(numTest) +
                              ' problem(0=Sum,1=Common):' + str(problemNo) +
                              ' epoch:' + str(epoc) + ' dropout:' +
                              str(round(drop, 3)))
                        print('total test throughput: ' +
                              str(round(sumTestThroughput, 6)))
                        print('total max  throughput: ' +
                              str(round(sumCorrectMaxThroughput, 6)))
                        print('percentage           : ' + str(percentage) +
                              ' %')

                        RL.append([
                            size, numTrain, numTest, problemNo, epoc, drop,
                            percentage
                        ])

                    # 오류 발생 처리
                    except Exception as ex:
                        print('\n################################')
                        print('error: ' + str(ex))
                        print('retry training and testing')
                        print('################################\n')

                    # 현재까지의 평가 결과 출력
                    print('\n <<< ESTIMATION RESULT >>>')
                    print(
                        'No.\tsize\ttrain\ttest\tproblem\tepoch\tdropout\tpercentage'
                    )

                    saveResult = '\n <<< ESTIMATION RESULT >>>\nNo.\tsize\ttrain\ttest\tproblem\tepoch\tdropout\tpercentage\n'

                    for i in range(len(RL)):
                        toPrint = (str(i) + '\t' + str(RL[i][0]) + '\t' +
                                   str(RL[i][1]) + '\t' + str(RL[i][2]) +
                                   '\t' + str(RL[i][3]) + '\t' +
                                   str(RL[i][4]) + '\t' +
                                   str(round(RL[i][5], 3)) + '\t' +
                                   (' ' if round(RL[i][6], 4) < 10.0 else '') +
                                   str(round(RL[i][6], 4)))
                        print(toPrint)
                        saveResult += (toPrint + '\n')
                    print('\n')

                    # 평가 결과 저장
                    fSave = open(
                        'DL_WPCN_forPaper_' + str(problemNo) + '_' +
                        str(size) + '_' + str(numWD) + '.txt', 'w')
                    fSave.write(saveResult)
                    fSave.close()

                    fSave_ = open(
                        'DL_WPCN_forPaper_' + str(problemNo) + '_' +
                        str(size) + '_' + str(numWD) + '_detail.txt', 'w')
                    fSave_.write(resultForThisMap)
                    fSave_.close()

                    # 학습 및 테스트 종료
                    break

        # 완전 종료
        break
Exemple #6
0
            fr = f.readlines()
            f.close()

            trainInputs = []  # input data for training
            trainOutputs = []  # output data for training
            testInputs = []  # input data for test
            testValOutputs = []  # output data for test (for validation)
            for i in range(len(fr)):
                frs = fr[i].split('\t')
                for j in range(len(frs)):
                    frs[j] = float(frs[j])

                # 80% of data for training and 20% for test
                if random.random() < 0.8:
                    trainInputs.append(frs[1:6])
                    trainOutputs.append([h.sigmoid(frs[6])])
                else:
                    testInputs.append(frs[1:6])
                    testValOutputs.append(frs[6])

            # print training data
            print('\n <<< training data >>>\n')
            print('TRAIN length: ' + str(len(trainInputs)))
            for i in range(min(100, len(trainInputs))):
                print(
                    str(i) + ' : ' + str(trainInputs[i]) + ' -> ' +
                    str(trainOutputs[i]))
            if len(trainInputs) > 100: print('...')

            # print test data
            print('\n <<< test data >>>\n')
                for k in range(len(memset[memsetIndex])):
                    if outputs[i][outputCols[j]] == memset[memsetIndex][k]:
                        trainO_temp.append(1)
                    else:
                        trainO_temp.append(0)

                if i == 0:
                    onehotList.append([j,
                                       memset[memsetIndex]])  # save onehotList

                memsetIndex += 1

        # apply sigmoid to each value of trainO
        for k in range(len(trainO_temp)):
            trainO_temp[k] = GPUhelper.sigmoid(trainO_temp[k])
        trainO.append(trainO_temp)

    memset = []  # initialize memset

    for i in range(len(tests)):
        if i % 50 == 0: print('test row ' + str(i))

        # append to testI (test input)
        testI_temp = []
        memsetIndex = 0  # index for memset

        for j in range(len(testCols)):

            # just append this value if numeric
            if testCols_type[j] == 0 or testCols_type[j] == 3 or testCols_type[