def main(): fileW = FromFinessFileMLR.createAnOutputFile() model = mlr.MLR() #Number of descriptor should be 396 and number of population should be 50 or more numOfPop = 50 numOfFea = 396 unfit = 1000 # Final model requirements R2req_train = .6 R2req_validate = .5 R2req_test = .5 TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = FromDataFileMLR.getAllOfTheData( ) TrainX, ValidateX, TestX = FromDataFileMLR.rescaleTheData( TrainX, ValidateX, TestX) unfit = 1000 fittingStatus = unfit """Create a population based on the number of features selected, in this case 10, from the pool of features""" population = DifferentialEvolution.Create_A_Population(numOfPop, numOfFea) fittingStatus, fitness = FromFinessFileMLR.validate_model(model,fileW, population, \ TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)
def main(): np.random.seed() # initialize objects fileW = createAnOutputFile() model = mlr.MLR() # load in data from files TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = FromDataFileMLR.getAllOfTheData( ) TrainX, ValidateX, TestX = FromDataFileMLR.rescaleTheData( TrainX, ValidateX, TestX) # DE_BPSO algorithm velocity = create_initial_velocity() population = create_initial_population(velocity, Lambda=0.01) x, fitness = FromFinessFileMLR.validate_model(model,fileW, population, \ TrainX, TrainY, ValidateX, ValidateY, TestX, TestY) local_best_matrix, local_fitness = create_initial_local_best_matrix( population, fitness) create_initial_global_best_row(local_best_matrix, local_fitness) evolve_population(population, fitness, velocity, local_best_matrix, local_fitness, \ model, fileW, TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)
def append_to_file(new_vector, fileW): #print new_vector model = DE_BPSO_model.DE_BPSO_MODEL() TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = FromDataFileMLR.getAllOfTheData( ) TrainX, ValidateX, TestX = FromDataFileMLR.rescaleTheData( TrainX, ValidateX, TestX) FromFinessFileMLR.validate_model_and_append(model,fileW, new_vector, \ TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)
def cal_fitness_DE(new_vector): #print new_vector model = DE_BPSO_model.MLR() TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = FromDataFileMLR.getAllOfTheData( ) TrainX, ValidateX, TestX = FromDataFileMLR.rescaleTheData( TrainX, ValidateX, TestX) fitness = FromFinessFileMLR.validate_single_model(model, new_vector, \ TrainX, TrainY, ValidateX, ValidateY, TestX, TestY) return fitness
def main(): # BPSO parameters num_pop = 50 num_feat = 385 num_gens = 1000 # initialize objects model = mlr.MLR() fdf = FromDataFileMLR.FromDataFileMLR() fff = FromFinessFileMLR.FromFinessFileMR(fdf) bpso = BinaryParticleSwarmOptimization(model, fff, num_pop, num_feat, num_gens) # load in data from files trainX, trainY, validateX, validateY, testX, testY = fdf.getAllOfTheData() trainX, validateX, testX = fdf.rescaleTheData(trainX, validateX, testX) # BPSO algorithm bpso.create_initial_population() bpso.evaluate_population(trainX, trainY, validateX, validateY, testX, testY) bpso.create_initial_velocity() initial_local_best_matrix, initial_local_fitness = bpso.create_initial_local_best_matrix( ) bpso.create_initial_global_best_row() bpso.evolve_population(initial_local_best_matrix, initial_local_fitness, trainX, trainY, validateX, validateY, testX, testY)
def __init__(self, numOfPop, numOfFea): self.filedata = FromDataFileMLR.DataFromFile() self.fitnessdata = FromFinessFileMLR.FitnessResults() self.NofIterations = 2000 self.alpha = 0.5 self.GlobalBestRow = ndarray(numOfFea) self.GlobalBestFitness = 10000 self.VelocityM = ndarray((numOfPop, numOfFea)) self.LocalBestM = ndarray((numOfPop, numOfFea)) self.LocalBestM_Fit = ndarray(numOfPop)
def __init__(self, numOfPop, numOfFea): # Acquires and formats data from Train, Validation, Test .csv files self.filedata = FromDataFileMLR.DataFromFile() # Performs data analysis on training, validation, and test data self.analyzer = FromFinessFileMLR.FitnessResults() self.NumIterations = 1000 self.alpha = 0.5 # starting alpha value self.GlobalBestRow = ndarray( numOfFea) # best-fitting population yet found self.GlobalBestFitness = 10000 # fitness of GlobalBestRow, initialized very high self.VelocityM = ndarray((numOfPop, numOfFea)) # Velocity matrix self.LocalBestM = ndarray((numOfPop, numOfFea)) # local best matrix self.LocalBestM_Fit = ndarray(numOfPop) # local best matrix fitnesses
def main(): np.random.seed() # initialize objects fileW = createAnOutputFile() model = mlr.MLR() # load in data from files TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = FromDataFileMLR.getAllOfTheData( ) TrainX, ValidateX, TestX = FromDataFileMLR.rescaleTheData( TrainX, ValidateX, TestX) # BPSO algorithm init_population = create_initial_population() init_fitness = evaluate_population(model, fileW, init_population, TrainX, TrainY, ValidateX, ValidateY, TestX, TestY) init_velocity = create_initial_velocity() init_local_best_matrix, init_local_fitness = create_initial_local_best_matrix( init_population, init_fitness) create_initial_global_best_row(init_local_best_matrix, init_local_fitness) evolve_population(init_population, init_fitness, init_velocity, init_local_best_matrix, init_local_fitness, \ model, fileW, TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)
def main(): # Number of descriptor should be 385 and number of population should be 50 or more numOfPop = 50 numOfFea = 385 # create an object of Multiple Linear Regression model. # The class is located in mlr file model = mlr.MLR() filedata = FromDataFileMLR.DataFromFile() fitnessdata = FromFinessFileMLR.FitnessResults() analyzer = Fitness(numOfPop, numOfFea) # create an output file. Name the object to be FileW fileW = analyzer.createAnOutputFile() # we continue exhancing the model; however if after 1000 iteration no # enhancement is done, we can quit unfit = 1000 # Final model requirements: The following is used to evaluate each model. The minimum # values for R^2 of training should be 0.6, R^2 of Validation should be 0.5 and R^2 of # test should be 0.5 R2req_train = .6 R2req_validate = .5 R2req_test = .5 # getAllOfTheData is in FromDataFileMLR file. The following places the data # (training data, validation data, and test data) into associated matrices TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = filedata.getAllOfTheData( ) TrainX, ValidateX, TestX = filedata.rescaleTheData(TrainX, ValidateX, TestX) fittingStatus = unfit population = analyzer.createInitialPopulation(numOfPop, numOfFea) fittingStatus, fitness = fitnessdata.validate_model(model,fileW, population, \ TrainX, TrainY, ValidateX, ValidateY, TestX, TestY) analyzer.CreateInitialVelocity(numOfPop, numOfFea) copyto(analyzer.LocalBestM, population) #initializing LocalBestMatrix as the initial population copyto(analyzer.LocalBestM_Fit, fitness) analyzer.FindGlobalBestRow() analyzer.PerformOneMillionIteration(numOfPop, numOfFea, population, fitness, model, fileW, \ TrainX, TrainY, ValidateX, ValidateY, TestX, TestY)
def main(): # DE parameters num_pop = 50 num_feat = 385 num_gens = 100 # initialize objects model = mlr.MLR() FDF = FromDataFileMLR.FromDataFileMLR() FFF = FromFinessFileMLR.FromFinessFileMR(FDF) DE = DifferentialEvolution(model, FFF, num_pop, num_feat, num_gens) # load in data from files trainX, trainY, validateX, validateY, testX, testY = FDF.getAllOfTheData() trainX, validateX, testX = FDF.rescaleTheData(trainX, validateX, testX) # differential evolution algorithm DE.create_initial_population() DE.evaluate_population(trainX, trainY, validateX, validateY, testX, testY) DE.evolve_population(trainX, trainY, validateX, validateY, testX, testY)
def main(): # GA parameters num_pop = 50 num_feat = 385 num_gens = 1000 # initialize objects model = mlr.MLR() fdf = FromDataFileMLR.FromDataFileMLR() fff = FromFinessFileMLR.FromFinessFileMR(fdf) GA = GeneticAlgorithm(model, fff, num_pop, num_feat, num_gens) # load in data from files trainX, trainY, validateX, validateY, testX, testY = fdf.getAllOfTheData() trainX, validateX, testX = fdf.rescaleTheData(trainX, validateX, testX) # genetic algorithm GA.create_initial_population() GA.evaluate_population(trainX, trainY, validateX, validateY, testX, testY) GA.evolve_population(trainX, trainY, validateX, validateY, testX, testY)
start = time.time() #Number of descriptor should be 396 and number of population should be 50 or more """Number of population""" numOfPop = 50 """Number of total features""" numOfFea = 396 # Final model requirements R2req_train = .6 R2req_validate = .5 R2req_test = .5 alpha = 0.5 beta = 0.004 TrainX, TrainY, ValidateX, ValidateY, TestX, TestY = FromDataFileMLR.getAllOfTheData( ) TrainX, ValidateX, TestX = FromDataFileMLR.rescaleTheData( TrainX, ValidateX, TestX) population = BPSO.Create_A_Population(numOfPop, numOfFea) """ Get fitness""" fitness = FromFinessFileMLR.validate_model(model, fileW, population, \ TrainX, TrainY, ValidateX, ValidateY, TestX, TestY) """Initialize velocity""" initial_velocity = BPSO.create_initial_velocity(numOfPop, numOfFea) #print str(shape(initial_velocity)) """Initialize Local Best Matrix (Same as Initial Population)""" local_best_matrix = population """Create Global best row""" global_best_row_index = argmin(fitness)
def __init__(self): self.DF = FromDataFileMLR.DataFile() self.FF = FromFinessFileMLR.FitnessFile()
def __init__(self): self.filedata = FromDataFileMLR.DataFromFile() self.fitnessdata = FromFinessFileMLR.FitnessResults()
def __init__(self): self.DataFile = FromDataFileMLR.DataMLR() self.FitnessFile = FromFinessFileMLR.FitnessMLR()
def __init__(self): self.filedata = FromDataFileMLR.DataFromFile()
def validate_model_and_append(model, fileW, vector, TrainX, TrainY, ValidateX, ValidateY, TestX, TestY): # numOfPop = population.shape[0] # get the population based on the number of features selected """Create an array based on the population size""" # fitness = zeros(numOfPop) fitness = 0 c = 2 """ initialize booleans for false=0 and true =1""" false = 0 true = 1 predictive = false """Initialize all arrays/matrices, """ trackDesc, trackFitness, trackModel, trackR2, trackQ2, \ trackR2PredValidation, trackR2PredTest = InitializeTracks() yTrain, yHatTrain, yHatCV, yValidation, \ yHatValidation, yTest, yHatTest = initializeYDimension() unfit = 1000 itFits = 1 """Get columns that have a value of one and eliminate the rest""" xi = OnlySelectTheOnesColumns(vector) """Store data in a hash table for fast look up and encrypt the data using sha1""" idx = hashlib.sha1(array(xi)).digest() X_train_masked = TrainX.T[xi].T X_validation_masked = ValidateX.T[xi].T X_test_masked = TestX.T[xi].T try: model_desc = model.fit(X_train_masked, TrainY) except: return unfit, fitness # Computed predicted values Yhat_cv = cv_predict(model, X_train_masked, TrainY) # Cross Validation Yhat_validation = model.predict(X_validation_masked) Yhat_test = model.predict(X_test_masked) # Compute R2 statistics (Prediction for Valiation and Test set) q2_loo = r2(TrainY, Yhat_cv) q2_loo = FromDataFileMLR.getTwoDecPoint(q2_loo) r2pred_validation = r2Pred(TrainY, ValidateY, Yhat_validation) r2pred_validation = FromDataFileMLR.getTwoDecPoint(r2pred_validation) r2pred_test = r2Pred(TrainY, TestY, Yhat_test) r2pred_test = FromDataFileMLR.getTwoDecPoint(r2pred_test) Y_fitness = append(TrainY, ValidateY) Yhat_fitness = append(Yhat_cv, Yhat_validation) fitness = calc_fitness(xi, Y_fitness, Yhat_fitness, c) if predictive and ((q2_loo < 0.5) or (r2pred_validation < 0.5) or (r2pred_test < 0.5)): # if it's not worth recording, just return the fitness print "ending the program because of predictive is: ", predictive # Compute predicted Y_hat for training set. Yhat_train = model.predict(X_train_masked) r2_train = r2(TrainY, Yhat_train) idxLength = len(xi) # store stats trackDesc[idx] = str(xi) trackFitness[idx] = FromDataFileMLR.getTwoDecPoint(fitness) trackModel[idx] = model_desc trackR2[idx] = FromDataFileMLR.getTwoDecPoint(r2_train) trackQ2[idx] = FromDataFileMLR.getTwoDecPoint(q2_loo) trackR2PredValidation[idx] = FromDataFileMLR.getTwoDecPoint( r2pred_validation) trackR2PredTest[idx] = FromDataFileMLR.getTwoDecPoint(r2pred_test) yTrain[idx] = TrainY.tolist() yHatTrain[idx] = Yhat_train.tolist() for i in range(len(yHatTrain[idx])): yHatTrain[idx][i] = FromDataFileMLR.getTwoDecPoint(yHatTrain[idx][i]) yHatCV[idx] = Yhat_cv.tolist() for i in range(len(yHatCV[idx])): yHatCV[idx][i] = FromDataFileMLR.getTwoDecPoint(yHatCV[idx][i]) yValidation[idx] = ValidateY.tolist() yHatValidation[idx] = Yhat_validation.tolist() for i in range(len(yHatValidation[idx])): yHatValidation[idx][i] = FromDataFileMLR.getTwoDecPoint( yHatValidation[idx][i]) yTest[idx] = TestY.tolist() yHatTest[idx] = Yhat_test.tolist() for i in range(len(yHatTest[idx])): yHatTest[idx][i] = FromDataFileMLR.getTwoDecPoint(yHatTest[idx][i]) write(model, fileW, trackDesc, trackFitness, trackModel, trackR2, \ trackQ2, trackR2PredValidation, trackR2PredTest)
def validate_model(self, model, fileW, population, TrainX, TrainY, ValidateX, ValidateY, TestX, TestY): numOfPop = population.shape[0] fitness = zeros(numOfPop) c = 2 false = 0 true = 1 predictive = false trackDesc, trackFitness,trackModel,trackR2, trackQ2, \ trackR2PredValidation, trackR2PredTest = self.InitializeTracks() yTrain, yHatTrain, yHatCV, yValidation, \ yHatValidation, yTest, yHatTest = self.initializeYDimension() unfit = 1000 itFits = 1 DataFile = FromDataFileMLR.DataMLR() for i in range(numOfPop): xi = self.OnlySelectTheOnesColumns(population[i]) idx = hashlib.sha1(array(xi)).digest() X_train_masked = TrainX.T[xi].T X_validation_masked = ValidateX.T[xi].T X_test_masked = TestX.T[xi].T try: model_desc = model.fit(X_train_masked, TrainY) except: return unfit, fitness # Computed predicted values Yhat_cv = self.cv_predict(model, X_train_masked, TrainY) # Cross Validation Yhat_validation = model.predict(X_validation_masked) Yhat_test = model.predict(X_test_masked) # Compute R2 statistics (Prediction for Valiation and Test set) q2_loo = self.r2(TrainY, Yhat_cv) q2_loo = DataFile.getTwoDecPoint(q2_loo) r2pred_validation = self.r2Pred(TrainY, ValidateY, Yhat_validation) r2pred_validation = DataFile.getTwoDecPoint(r2pred_validation) r2pred_test = self.r2Pred(TrainY, TestY, Yhat_test) r2pred_test = DataFile.getTwoDecPoint(r2pred_test) Y_fitness = append(TrainY, ValidateY) Yhat_fitness = append(Yhat_cv, Yhat_validation) fitness[i] = self.calc_fitness(xi, Y_fitness, Yhat_fitness, c) if predictive and ((q2_loo < 0.5) or (r2pred_validation < 0.5) or (r2pred_test < 0.5)): # if it's not worth recording, just return the fitness print("ending the program because of predictive is: ", predictive) continue # Compute predicted Y_hat for training set. Yhat_train = model.predict(X_train_masked) r2_train = self.r2(TrainY, Yhat_train) idxLength = len(xi) # store stats trackDesc[idx] = str(xi) trackFitness[idx] = DataFile.getTwoDecPoint(fitness[i]) trackModel[idx] = model_desc trackR2[idx] = DataFile.getTwoDecPoint(r2_train) trackQ2[idx] = DataFile.getTwoDecPoint(q2_loo) trackR2PredValidation[idx] = DataFile.getTwoDecPoint( r2pred_validation) trackR2PredTest[idx] = DataFile.getTwoDecPoint(r2pred_test) yTrain[idx] = TrainY.tolist() yHatTrain[idx] = Yhat_train.tolist() for i in range(len(yHatTrain[idx])): yHatTrain[idx][i] = DataFile.getTwoDecPoint(yHatTrain[idx][i]) yHatCV[idx] = Yhat_cv.tolist() for i in range(len(yHatCV[idx])): yHatCV[idx][i] = DataFile.getTwoDecPoint(yHatCV[idx][i]) yValidation[idx] = ValidateY.tolist() yHatValidation[idx] = Yhat_validation.tolist() for i in range(len(yHatValidation[idx])): yHatValidation[idx][i] = DataFile.getTwoDecPoint( yHatValidation[idx][i]) yTest[idx] = TestY.tolist() yHatTest[idx] = Yhat_test.tolist() for i in range(len(yHatTest[idx])): yHatTest[idx][i] = DataFile.getTwoDecPoint(yHatTest[idx][i]) self.write(model,fileW, trackDesc, trackFitness, trackModel, trackR2,\ trackQ2,trackR2PredValidation, trackR2PredTest) return itFits, fitness