Exemple #1
0
def main():

    fileW = FromFinessFileMLR.createAnOutputFile()
    model = mlr.MLR()

    #Number of descriptor should be 396 and number of population should be 50 or more

    numOfPop = 50
    numOfFea = 396
    unfit = 1000

    # Final model requirements

    R2req_train = .6
    R2req_validate = .5
    R2req_test = .5

    TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = FromDataFileMLR.getAllOfTheData(
    )
    TrainX, ValidateX, TestX = FromDataFileMLR.rescaleTheData(
        TrainX, ValidateX, TestX)

    unfit = 1000
    fittingStatus = unfit
    """Create a population based on the number of features selected, in this case 10, from the pool of features"""

    population = DifferentialEvolution.Create_A_Population(numOfPop, numOfFea)
    fittingStatus, fitness = FromFinessFileMLR.validate_model(model,fileW, population, \
        TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)
Exemple #2
0
def main():
    np.random.seed()

    # initialize objects
    fileW = createAnOutputFile()
    model = mlr.MLR()

    # load in data from files
    TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = FromDataFileMLR.getAllOfTheData(
    )
    TrainX, ValidateX, TestX = FromDataFileMLR.rescaleTheData(
        TrainX, ValidateX, TestX)

    # DE_BPSO algorithm
    velocity = create_initial_velocity()
    population = create_initial_population(velocity, Lambda=0.01)
    x, fitness = FromFinessFileMLR.validate_model(model,fileW, population, \
                                        TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)

    local_best_matrix, local_fitness = create_initial_local_best_matrix(
        population, fitness)
    create_initial_global_best_row(local_best_matrix, local_fitness)

    evolve_population(population, fitness, velocity, local_best_matrix, local_fitness, \
                                        model, fileW, TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)
Exemple #3
0
def append_to_file(new_vector, fileW):
    #print new_vector
    model = DE_BPSO_model.DE_BPSO_MODEL()
    TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = FromDataFileMLR.getAllOfTheData(
    )
    TrainX, ValidateX, TestX = FromDataFileMLR.rescaleTheData(
        TrainX, ValidateX, TestX)

    FromFinessFileMLR.validate_model_and_append(model,fileW, new_vector, \
                                    TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)
Exemple #4
0
def cal_fitness_DE(new_vector):
    #print new_vector
    model = DE_BPSO_model.MLR()
    TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = FromDataFileMLR.getAllOfTheData(
    )
    TrainX, ValidateX, TestX = FromDataFileMLR.rescaleTheData(
        TrainX, ValidateX, TestX)

    fitness = FromFinessFileMLR.validate_single_model(model, new_vector, \
                                    TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)
    return fitness
Exemple #5
0
def main():
    # BPSO parameters
    num_pop = 50
    num_feat = 385
    num_gens = 1000

    # initialize objects
    model = mlr.MLR()
    fdf = FromDataFileMLR.FromDataFileMLR()
    fff = FromFinessFileMLR.FromFinessFileMR(fdf)
    bpso = BinaryParticleSwarmOptimization(model, fff, num_pop, num_feat,
                                           num_gens)

    # load in data from files
    trainX, trainY, validateX, validateY, testX, testY = fdf.getAllOfTheData()
    trainX, validateX, testX = fdf.rescaleTheData(trainX, validateX, testX)

    # BPSO algorithm
    bpso.create_initial_population()
    bpso.evaluate_population(trainX, trainY, validateX, validateY, testX,
                             testY)
    bpso.create_initial_velocity()
    initial_local_best_matrix, initial_local_fitness = bpso.create_initial_local_best_matrix(
    )
    bpso.create_initial_global_best_row()
    bpso.evolve_population(initial_local_best_matrix, initial_local_fitness,
                           trainX, trainY, validateX, validateY, testX, testY)
Exemple #6
0
 def __init__(self, numOfPop, numOfFea):
     self.filedata = FromDataFileMLR.DataFromFile()
     self.fitnessdata = FromFinessFileMLR.FitnessResults()
     self.NofIterations = 2000
     self.alpha = 0.5
     self.GlobalBestRow = ndarray(numOfFea)
     self.GlobalBestFitness = 10000
     self.VelocityM = ndarray((numOfPop, numOfFea))
     self.LocalBestM = ndarray((numOfPop, numOfFea))
     self.LocalBestM_Fit = ndarray(numOfPop)
Exemple #7
0
 def __init__(self, numOfPop, numOfFea):
     # Acquires and formats data from Train, Validation, Test .csv files
     self.filedata = FromDataFileMLR.DataFromFile()
     # Performs data analysis on training, validation, and test data
     self.analyzer = FromFinessFileMLR.FitnessResults()
     self.NumIterations = 1000
     self.alpha = 0.5  # starting alpha value
     self.GlobalBestRow = ndarray(
         numOfFea)  # best-fitting population yet found
     self.GlobalBestFitness = 10000  # fitness of GlobalBestRow, initialized very high
     self.VelocityM = ndarray((numOfPop, numOfFea))  # Velocity matrix
     self.LocalBestM = ndarray((numOfPop, numOfFea))  # local best matrix
     self.LocalBestM_Fit = ndarray(numOfPop)  # local best matrix fitnesses
Exemple #8
0
def main():
    np.random.seed()

    # initialize objects
    fileW = createAnOutputFile()
    model = mlr.MLR()

    # load in data from files
    TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = FromDataFileMLR.getAllOfTheData(
    )
    TrainX, ValidateX, TestX = FromDataFileMLR.rescaleTheData(
        TrainX, ValidateX, TestX)

    # BPSO algorithm
    init_population = create_initial_population()
    init_fitness = evaluate_population(model, fileW, init_population, TrainX,
                                       TrainY, ValidateX, ValidateY, TestX,
                                       TestY)
    init_velocity = create_initial_velocity()
    init_local_best_matrix, init_local_fitness = create_initial_local_best_matrix(
        init_population, init_fitness)
    create_initial_global_best_row(init_local_best_matrix, init_local_fitness)
    evolve_population(init_population, init_fitness, init_velocity, init_local_best_matrix, init_local_fitness, \
                                        model, fileW, TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)
Exemple #9
0
def main():
    # Number of descriptor should be 385 and number of population should be 50 or more
    numOfPop = 50
    numOfFea = 385

    # create an object of Multiple Linear Regression model.
    # The class is located in mlr file
    model = mlr.MLR()
    filedata = FromDataFileMLR.DataFromFile()
    fitnessdata = FromFinessFileMLR.FitnessResults()
    analyzer = Fitness(numOfPop, numOfFea)

    # create an output file. Name the object to be FileW
    fileW = analyzer.createAnOutputFile()

    # we continue exhancing the model; however if after 1000 iteration no
    # enhancement is done, we can quit
    unfit = 1000

    # Final model requirements: The following is used to evaluate each model. The minimum
    # values for R^2 of training should be 0.6, R^2 of Validation should be 0.5 and R^2 of
    # test should be 0.5
    R2req_train = .6
    R2req_validate = .5
    R2req_test = .5

    # getAllOfTheData is in FromDataFileMLR file. The following places the data
    # (training data, validation data, and test data) into associated matrices
    TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = filedata.getAllOfTheData(
    )
    TrainX, ValidateX, TestX = filedata.rescaleTheData(TrainX, ValidateX,
                                                       TestX)

    fittingStatus = unfit
    population = analyzer.createInitialPopulation(numOfPop, numOfFea)
    fittingStatus, fitness = fitnessdata.validate_model(model,fileW, population, \
        TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)

    analyzer.CreateInitialVelocity(numOfPop, numOfFea)
    copyto(analyzer.LocalBestM,
           population)  #initializing LocalBestMatrix as the initial population
    copyto(analyzer.LocalBestM_Fit, fitness)
    analyzer.FindGlobalBestRow()

    analyzer.PerformOneMillionIteration(numOfPop, numOfFea, population, fitness, model, fileW, \
                               TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)
Exemple #10
0
def main():
    # DE parameters
    num_pop = 50
    num_feat = 385
    num_gens = 100

    # initialize objects
    model = mlr.MLR()
    FDF = FromDataFileMLR.FromDataFileMLR()
    FFF = FromFinessFileMLR.FromFinessFileMR(FDF)
    DE = DifferentialEvolution(model, FFF, num_pop, num_feat, num_gens)

    # load in data from files
    trainX, trainY, validateX, validateY, testX, testY = FDF.getAllOfTheData()
    trainX, validateX, testX = FDF.rescaleTheData(trainX, validateX, testX)

    # differential evolution algorithm
    DE.create_initial_population()
    DE.evaluate_population(trainX, trainY, validateX, validateY, testX, testY)
    DE.evolve_population(trainX, trainY, validateX, validateY, testX, testY)
Exemple #11
0
def main():
    # GA parameters
    num_pop = 50
    num_feat = 385
    num_gens = 1000

    # initialize objects
    model = mlr.MLR()
    fdf = FromDataFileMLR.FromDataFileMLR()
    fff = FromFinessFileMLR.FromFinessFileMR(fdf)
    GA = GeneticAlgorithm(model, fff, num_pop, num_feat, num_gens)

    # load in data from files
    trainX, trainY, validateX, validateY, testX, testY = fdf.getAllOfTheData()
    trainX, validateX, testX = fdf.rescaleTheData(trainX, validateX, testX)

    # genetic algorithm
    GA.create_initial_population()
    GA.evaluate_population(trainX, trainY, validateX, validateY, testX, testY)
    GA.evolve_population(trainX, trainY, validateX, validateY, testX, testY)
Exemple #12
0
start = time.time()
#Number of descriptor should be 396 and number of population should be 50 or more
"""Number of population"""
numOfPop = 50
"""Number of total features"""
numOfFea = 396

# Final model requirements

R2req_train = .6
R2req_validate = .5
R2req_test = .5
alpha = 0.5
beta = 0.004

TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = FromDataFileMLR.getAllOfTheData(
)
TrainX, ValidateX, TestX = FromDataFileMLR.rescaleTheData(
    TrainX, ValidateX, TestX)

population = BPSO.Create_A_Population(numOfPop, numOfFea)
""" Get fitness"""
fitness = FromFinessFileMLR.validate_model(model, fileW, population, \
                                                TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)
"""Initialize velocity"""
initial_velocity = BPSO.create_initial_velocity(numOfPop, numOfFea)

#print str(shape(initial_velocity))
"""Initialize Local Best Matrix (Same as Initial Population)"""
local_best_matrix = population
"""Create Global best row"""
global_best_row_index = argmin(fitness)
Exemple #13
0
 def __init__(self):
     self.DF = FromDataFileMLR.DataFile()
     self.FF = FromFinessFileMLR.FitnessFile()
Exemple #14
0
 def __init__(self):
     self.filedata = FromDataFileMLR.DataFromFile()
     self.fitnessdata = FromFinessFileMLR.FitnessResults()
Exemple #15
0
 def __init__(self):
     self.DataFile = FromDataFileMLR.DataMLR()
     self.FitnessFile = FromFinessFileMLR.FitnessMLR()
 def __init__(self):
     self.filedata = FromDataFileMLR.DataFromFile()
Exemple #17
0
def validate_model_and_append(model, fileW, vector, TrainX, TrainY, ValidateX,
                              ValidateY, TestX, TestY):
    # numOfPop = population.shape[0]  # get the population based on the number of features selected
    """Create an array based on the population size"""
    # fitness = zeros(numOfPop)
    fitness = 0
    c = 2
    """ initialize booleans for false=0 and true =1"""
    false = 0
    true = 1
    predictive = false
    """Initialize all arrays/matrices, """
    trackDesc, trackFitness, trackModel, trackR2, trackQ2, \
    trackR2PredValidation, trackR2PredTest = InitializeTracks()

    yTrain, yHatTrain, yHatCV, yValidation, \
    yHatValidation, yTest, yHatTest = initializeYDimension()

    unfit = 1000
    itFits = 1
    """Get columns that have a value of one and eliminate the rest"""
    xi = OnlySelectTheOnesColumns(vector)
    """Store data in a hash table for fast look up and encrypt the data using sha1"""
    idx = hashlib.sha1(array(xi)).digest()

    X_train_masked = TrainX.T[xi].T

    X_validation_masked = ValidateX.T[xi].T
    X_test_masked = TestX.T[xi].T

    try:
        model_desc = model.fit(X_train_masked, TrainY)
    except:
        return unfit, fitness

    # Computed predicted values
    Yhat_cv = cv_predict(model, X_train_masked, TrainY)  # Cross Validation
    Yhat_validation = model.predict(X_validation_masked)
    Yhat_test = model.predict(X_test_masked)

    # Compute R2 statistics (Prediction for Valiation and Test set)
    q2_loo = r2(TrainY, Yhat_cv)
    q2_loo = FromDataFileMLR.getTwoDecPoint(q2_loo)

    r2pred_validation = r2Pred(TrainY, ValidateY, Yhat_validation)
    r2pred_validation = FromDataFileMLR.getTwoDecPoint(r2pred_validation)

    r2pred_test = r2Pred(TrainY, TestY, Yhat_test)
    r2pred_test = FromDataFileMLR.getTwoDecPoint(r2pred_test)

    Y_fitness = append(TrainY, ValidateY)
    Yhat_fitness = append(Yhat_cv, Yhat_validation)

    fitness = calc_fitness(xi, Y_fitness, Yhat_fitness, c)

    if predictive and ((q2_loo < 0.5) or (r2pred_validation < 0.5) or
                       (r2pred_test < 0.5)):
        # if it's not worth recording, just return the fitness
        print "ending the program because of predictive is: ", predictive

    # Compute predicted Y_hat for training set.
    Yhat_train = model.predict(X_train_masked)
    r2_train = r2(TrainY, Yhat_train)

    idxLength = len(xi)

    # store stats
    trackDesc[idx] = str(xi)

    trackFitness[idx] = FromDataFileMLR.getTwoDecPoint(fitness)

    trackModel[idx] = model_desc

    trackR2[idx] = FromDataFileMLR.getTwoDecPoint(r2_train)
    trackQ2[idx] = FromDataFileMLR.getTwoDecPoint(q2_loo)
    trackR2PredValidation[idx] = FromDataFileMLR.getTwoDecPoint(
        r2pred_validation)
    trackR2PredTest[idx] = FromDataFileMLR.getTwoDecPoint(r2pred_test)

    yTrain[idx] = TrainY.tolist()

    yHatTrain[idx] = Yhat_train.tolist()
    for i in range(len(yHatTrain[idx])):
        yHatTrain[idx][i] = FromDataFileMLR.getTwoDecPoint(yHatTrain[idx][i])

    yHatCV[idx] = Yhat_cv.tolist()
    for i in range(len(yHatCV[idx])):
        yHatCV[idx][i] = FromDataFileMLR.getTwoDecPoint(yHatCV[idx][i])

    yValidation[idx] = ValidateY.tolist()

    yHatValidation[idx] = Yhat_validation.tolist()
    for i in range(len(yHatValidation[idx])):
        yHatValidation[idx][i] = FromDataFileMLR.getTwoDecPoint(
            yHatValidation[idx][i])

    yTest[idx] = TestY.tolist()

    yHatTest[idx] = Yhat_test.tolist()
    for i in range(len(yHatTest[idx])):
        yHatTest[idx][i] = FromDataFileMLR.getTwoDecPoint(yHatTest[idx][i])

    write(model, fileW, trackDesc, trackFitness, trackModel, trackR2, \
          trackQ2, trackR2PredValidation, trackR2PredTest)
    def validate_model(self, model, fileW, population, TrainX, TrainY,
                       ValidateX, ValidateY, TestX, TestY):
        numOfPop = population.shape[0]
        fitness = zeros(numOfPop)
        c = 2
        false = 0
        true = 1
        predictive = false

        trackDesc, trackFitness,trackModel,trackR2, trackQ2, \
        trackR2PredValidation, trackR2PredTest  = self.InitializeTracks()

        yTrain, yHatTrain, yHatCV, yValidation, \
        yHatValidation, yTest, yHatTest = self.initializeYDimension()

        unfit = 1000
        itFits = 1
        DataFile = FromDataFileMLR.DataMLR()

        for i in range(numOfPop):
            xi = self.OnlySelectTheOnesColumns(population[i])

            idx = hashlib.sha1(array(xi)).digest()

            X_train_masked = TrainX.T[xi].T

            X_validation_masked = ValidateX.T[xi].T
            X_test_masked = TestX.T[xi].T

            try:
                model_desc = model.fit(X_train_masked, TrainY)
            except:
                return unfit, fitness

            # Computed predicted values
            Yhat_cv = self.cv_predict(model, X_train_masked,
                                      TrainY)  # Cross Validation
            Yhat_validation = model.predict(X_validation_masked)
            Yhat_test = model.predict(X_test_masked)

            # Compute R2 statistics (Prediction for Valiation and Test set)
            q2_loo = self.r2(TrainY, Yhat_cv)
            q2_loo = DataFile.getTwoDecPoint(q2_loo)

            r2pred_validation = self.r2Pred(TrainY, ValidateY, Yhat_validation)
            r2pred_validation = DataFile.getTwoDecPoint(r2pred_validation)

            r2pred_test = self.r2Pred(TrainY, TestY, Yhat_test)
            r2pred_test = DataFile.getTwoDecPoint(r2pred_test)

            Y_fitness = append(TrainY, ValidateY)
            Yhat_fitness = append(Yhat_cv, Yhat_validation)

            fitness[i] = self.calc_fitness(xi, Y_fitness, Yhat_fitness, c)

            if predictive and ((q2_loo < 0.5) or (r2pred_validation < 0.5) or
                               (r2pred_test < 0.5)):
                # if it's not worth recording, just return the fitness
                print("ending the program because of predictive is: ",
                      predictive)
                continue

            # Compute predicted Y_hat for training set.
            Yhat_train = model.predict(X_train_masked)
            r2_train = self.r2(TrainY, Yhat_train)

            idxLength = len(xi)

            # store stats
            trackDesc[idx] = str(xi)

            trackFitness[idx] = DataFile.getTwoDecPoint(fitness[i])

            trackModel[idx] = model_desc

            trackR2[idx] = DataFile.getTwoDecPoint(r2_train)
            trackQ2[idx] = DataFile.getTwoDecPoint(q2_loo)
            trackR2PredValidation[idx] = DataFile.getTwoDecPoint(
                r2pred_validation)
            trackR2PredTest[idx] = DataFile.getTwoDecPoint(r2pred_test)

            yTrain[idx] = TrainY.tolist()

            yHatTrain[idx] = Yhat_train.tolist()
            for i in range(len(yHatTrain[idx])):
                yHatTrain[idx][i] = DataFile.getTwoDecPoint(yHatTrain[idx][i])

            yHatCV[idx] = Yhat_cv.tolist()
            for i in range(len(yHatCV[idx])):
                yHatCV[idx][i] = DataFile.getTwoDecPoint(yHatCV[idx][i])

            yValidation[idx] = ValidateY.tolist()

            yHatValidation[idx] = Yhat_validation.tolist()
            for i in range(len(yHatValidation[idx])):
                yHatValidation[idx][i] = DataFile.getTwoDecPoint(
                    yHatValidation[idx][i])

            yTest[idx] = TestY.tolist()

            yHatTest[idx] = Yhat_test.tolist()
            for i in range(len(yHatTest[idx])):
                yHatTest[idx][i] = DataFile.getTwoDecPoint(yHatTest[idx][i])

        self.write(model,fileW, trackDesc, trackFitness, trackModel, trackR2,\
                    trackQ2,trackR2PredValidation, trackR2PredTest)

        return itFits, fitness