def cross(Father, Mother, training_inputs, training_groundtruth, test_inputs, test_groundtruth ): ''' This cross function is used for the genetic algorithm optimization. the inputs are two individuals, and will return a "child" with a better training performance. This function will first generate two children. And for each child's one single feature, it will either be from the 'mother' or the 'father' And then a quick training and testing process will be used to evalue the AUROC score. And the 'child' with a better performance will be returned. ''' Child_1 = [] Child_2 = [] for i in range(6): coin = rand(-1,1) if coin>= 0: Child_1.append(Father[i]) Child_2.append(Mother[i]) else: Child_1.append(Mother[i]) Child_2.append(Father[i]) Child_1 = mutate(Child_1) Child_2 = mutate(Child_2) # build the network, make weights, and train it NN_1 = NeuralNetwork(input_layer=68, hidden_layer= Child_1[0], output_layer=1, lr = Child_1[1], lr_decay= Child_1[2], iteration= Child_1[5], batch_size= Child_1[4], mf= Child_1[3]) NN_2 = NeuralNetwork(input_layer=68, hidden_layer= Child_2[0], output_layer=1, lr = Child_2[1], lr_decay= Child_2[2], iteration= Child_2[5], batch_size= Child_2[4], mf= Child_2[3]) NN_1.make_weights() NN_2.make_weights() NN_1.train(training_inputs, training_groundtruth) NN_2.train(training_inputs, training_groundtruth) Score_1 = AUROC_cruve(NN_1, test_inputs, test_groundtruth, Fig=False) Score_2 = AUROC_cruve(NN_2, test_inputs, test_groundtruth, Fig=False) if Score_1 > Score_2: return Child_1, Score_1 else: return Child_2, Score_2
def genetic_algorithm(training_inputs, training_groundtruth, test_inputs, test_groundtruth, num_population,times, invasion, hidden_nodes, lr, lr_decay, mf, batch_size, epoch): ''' In this function, I will perform the genetic_algorithm to find out the best combination of hyperparameters for the training. [training_inputs, training_groundtruth, test_inputs, test_groundtruth] is a training, testing set which are obtained for evalute the training performance. num_population: the number of random candidates with the random number of feathers. times: total times of "cross" for the parents to exchange the features which uis aiming at getting 'progeny" with the better performance. invasion: number of new "invasion" individuals, besides the progeny got from the 'cross', for each time I also introduce some new individuals with different features, which is aimming at making the whole population get more information for optimization. [hidden_nodes, lr, lr_decay, mf, batch_size, epoch] are the six features i planned to use as the hyperparameter features that needs to be optimized They are all put in as the two-element list, which represents the range. In the function, I will use rand funvtion to generate a random number between the range. ''' # generate the parents population print('generating '+ str(num_population)+' individuals') # makeing sure the input are correct assert hidden_nodes[0] < hidden_nodes[1], 'something went wrong!' assert lr[0] < lr[1], 'something went wrong!' assert lr_decay[0] < lr_decay[1], 'something went wrong!' assert mf[0] < mf[1], 'something went wrong!' assert batch_size[0] < batch_size[1], 'something went wrong!' assert epoch[0] < epoch[1], 'something went wrong!' # generating a bunch of individuals based on the range that provided individuals_genom = [] individuals_phyno = [] for i in range(num_population): # randonly generate the feature _hidden_nodes = int(rand(hidden_nodes[0],hidden_nodes[1])) _lr = rand(lr[0], lr[1]) _lr_decay = rand(lr_decay[0], lr_decay[1]) _mf = rand(mf[0],mf[1]) _batch_size = int(rand(batch_size[0],batch_size[1] )) _epoch = int(rand(epoch[0], epoch[1])) # build an individual and put it into the whole set individuals_genom.append( [_hidden_nodes, _lr, _lr_decay, _mf, _batch_size, _epoch]) NN = NeuralNetwork(input_layer=68, hidden_layer= _hidden_nodes, output_layer=1, lr = _lr, lr_decay= _lr_decay, iteration= _epoch, batch_size= _batch_size, mf= _mf) NN.make_weights() NN.train(training_inputs, training_groundtruth) # also store the individual's performance in a list vector individuals_phyno.append(AUROC_cruve(NN, test_inputs, test_groundtruth, Fig=False)) # take the best performance people and keep it for the next generation my_phyno = max(individuals_phyno) idx = individuals_phyno.index(my_phyno) my_genome = individuals_genom[idx] n = invasion # begin the cross, do N times of cross for t in range(times): print('For the time '+str(t)+' the best candidates and the best result is') print(my_genome) print(my_phyno) if t >=1: if len(individuals_genom) % 2 == 0: # add the new invasion people, # make sure that the number is even add = n else: add = n+1 for i in range(add): _hidden_nodes = int(rand(hidden_nodes[0],hidden_nodes[1])) _lr = rand(lr[0] , lr[1]) _lr_decay = rand(lr_decay[0], lr_decay[1]) _mf = rand(mf[0],mf[1]) _batch_size = int(rand(batch_size[0],batch_size[1] )) _epoch = int(rand(epoch[0], epoch[1])) individuals_genom.append( [_hidden_nodes, _lr, _lr_decay, _mf, _batch_size, _epoch]) NN = NeuralNetwork(input_layer=68, hidden_layer= _hidden_nodes, output_layer=1, lr = _lr, lr_decay= _lr_decay, iteration= _epoch, batch_size= _batch_size, mf= _mf) NN.make_weights() NN.train(training_inputs, training_groundtruth) individuals_phyno.append(AUROC_cruve(NN, test_inputs, test_groundtruth, Fig=False)) # next_generation_genom = [] next_generation_phyno = [] next_generation_genom.append(my_genome) next_generation_phyno.append(my_phyno) assert len(individuals_genom) %2 == 0, 'something wrong!' for i in range(int(len(individuals_genom)/2)): child_genom, child_phyno = cross(individuals_genom[i*2],individuals_genom[ i*2 +1], training_inputs, training_groundtruth, test_inputs, test_groundtruth) # get the child, put the child into the next generation next_generation_genom.append(child_genom) next_generation_phyno.append(child_phyno) # next is now the parents individuals_genom = next_generation_genom individuals_phyno = next_generation_phyno # shift it randomly index = list(range(len(individuals_genom))) random.shuffle(index) individuals_phyno = [individuals_phyno[x] for x in index] individuals_genom = [individuals_genom[x] for x in index] # still get the best performance my_phyno = max(individuals_phyno) idx = individuals_phyno.index(my_phyno) my_genome = individuals_genom[idx] print('The whole crossing process is done') my_phyno = max(individuals_phyno) idx = individuals_phyno.index(my_phyno) my_genome = individuals_genom[idx] print('the best candidates and the best result is') print(my_genome) print(my_phyno)