class GA(object): def __init__(self, n_population, pc, pm, bankruptcy_data, non_bankruptcy_data, clusters_data, cluster_centers, threshold_list, population=None): self.threshold_list = threshold_list self.bankruptcy_data = bankruptcy_data self.non_bankruptcy_data = non_bankruptcy_data self.neural_network = NeuralNetwork(n_inputs=6, n_outputs=2, n_neurons_to_hl=6, n_hidden_layers=1) self.n_population = n_population self.p_crossover = pc # percent of crossover self.p_mutation = pm # percent of mutation self.population = population or self._makepopulation() self.saved_cluster_data = clusters_data self.cluster_centers = cluster_centers self.predict_bankruptcy = [] self.predict_non_bankruptcy = [] self.fitness_list = [] # list of chromosome and fitness self.currentUnderSampling = None self.predict_chromosome = None self.fitness() def init_neural_network(self, chromosome): # remove threshold from chromosome list primary_weights = chromosome[5:] matrix_list = [] for i in range(0, int(len(primary_weights) / 6) - 1): matrix_list.append(primary_weights[i * 6:(i + 1) * 6]) weights_matrix = array(matrix_list) layers = self.neural_network.layers i = 0 for neuron in layers[0].neurons: neuron.set_weights(weights_matrix[:, i]) i += 1 layers[1].neurons[0].set_weights(primary_weights[-6:]) def performance_measure(self): tp = 0 fp = 0 fn = 0 tn = 0 for item in self.bankruptcy_data: if self.predict(item) > 0.5: fp += 1 else: tp += 1 for item in self.non_bankruptcy_data: if self.predict(item) > 0.5: tn += 1 else: fn += 1 sensitivity = tp / (tp + fn) specificity = tn / (fp + tn) print("TP is : %s" % (str(tp))) print("FP is : %s" % (str(fp))) print("FN is : %s" % (str(fn))) print("TN is : %s" % (str(tn))) print("G-MEAN : %s" % (str(math.sqrt(sensitivity * specificity)))) print("Hit-ratio : %s" % (str((tp + tn) / (tp + fn + fp + tn)))) def predict(self, data): self.init_neural_network(self.predict_chromosome) return self.neural_network.update(data)[0] def _makepopulation(self): pop_list = [] for i in range(0, self.n_population): weights = [random.uniform(-5, 5) for _ in range(0, 36)] out_weights = [random.uniform(-5, 5) for _ in range(0, 12)] # make threshold list threshold1 = [ random.uniform(threshold[0], threshold[1]) for threshold in self.threshold_list ] chromosome = threshold1 + weights + out_weights pop_list.append(chromosome) return pop_list ''' b : the number of bankruptcy firms BAi : the classification accuracy of ith instances of bankruptcy firms n : the number of non-bankruptcy firms NAj : the classification accuracy of jth instances of non-bankruptcy firms POi : the predicated output of ith instances of bankruptcy firms AOi : the actual output of ith instances of non-bankruptcy firms POj : the predicated output of jth instances of non-bankruptcy firms AOj : the actual output of jth instances of non-bankruptcy firms ''' def ba_i(self, poi): if poi < 0.5: return 1 return 0 def na_j(self, poj): if poj > 0.5: return 1 return 0 def cbeus( self, thresholds ): # the rule structure for the cluster-based underSampling base on GA i = 0 undersampling_clusters = [] for cluster in self.saved_cluster_data: for instance in cluster: if euclidean_distances( [instance], [self.cluster_centers[i]]) < thresholds[i]: undersampling_clusters.append(instance) i += 1 return undersampling_clusters def fitness(self): fitness_sum = 0 trials = 3 for index in range(0, trials): for item in self.population: print("underSampling : Cut off % s" % str(item[:5])) self.currentUnderSampling = self.cbeus(item[:5]) self.init_neural_network(item) self.predict_non_bankruptcy = [] self.predict_bankruptcy = [] for instance in self.currentUnderSampling: self.predict_non_bankruptcy.append( self.neural_network.update(instance)[0]) for instance in self.bankruptcy_data: self.predict_bankruptcy.append( self.neural_network.update(instance)[0]) fitness_value = self.g_mean(len(self.currentUnderSampling)) self.fitness_list.append([item, fitness_value]) fitness_sum += fitness_value self.fitness_list.sort(key=lambda x: x[1]) self._select_parents(fitness_sum) self.fitness_list.sort(key=lambda x: x[1]) self.population = [] for item in self.fitness_list: self.population.append(item[0]) if len(self.population) == 5: break if index == trials - 1: os.system('cls' if os.name == 'nt' else 'clear') print("The Optimization Weights For Predict Is: %s " % str(self.population[0][5:])) self.predict_chromosome = self.population[0][5:] self.performance_measure() def g_mean(self, n): b = len(self.bankruptcy_data) sum_bankruptcy = 0 sum_non_bankruptcy = 0 for item in self.predict_non_bankruptcy: sum_non_bankruptcy += self.ba_i(item) for item in self.predict_bankruptcy: sum_bankruptcy += self.na_j(item) return math.sqrt( (1 / b) * sum_bankruptcy * (1 / n) * sum_non_bankruptcy) def cxOnePoint(self, ind1, ind2): """Executes a one point crossover on the input :term:`sequence` individuals. The two individuals are modified in place. The resulting individuals will respectively have the length of the other. :param ind1: The first individual participating in the crossover. :param ind2: The second individual participating in the crossover. :returns: A tuple of two individuals. This function uses the :func:`~random.randint` function from the python base :mod:`random` module. """ size = min(len(ind1), len(ind2)) cxpoint = random.randint(1, size - 1) ind1[cxpoint:], ind2[cxpoint:] = ind2[cxpoint:], ind1[cxpoint:] return ind1, ind2 def swapMutation(self, ind1): size = len(ind1) swpoint1 = random.randint(1, size - 1) swpoint2 = random.randint(1, size - 1) ind1[swpoint1], ind1[swpoint2] = ind1[swpoint2], ind1[swpoint1] return ind1 def _select_parents(self, fitness_sum): """ Roulette wheel selection Selects parents from the given population Args : population (list) : Current population from which parents will be selected fitness_sum (number) : Summation of all fitness value Returns : parents (IndividualGA, IndividualGA) : selected parents """ probability = [] for item in self.fitness_list: probability.append(item[1] / fitness_sum) item.append(item[1] / fitness_sum) ncrossover = math.ceil(self.n_population * self.p_crossover / 2) # number of crossover offspring nmutation = math.ceil(self.n_population * self.p_mutation) # number of mutation offspring selection_probability = set() while len(selection_probability) < ncrossover: selection_probability.add(random.uniform(0, 1)) probability = np.cumsum(probability).tolist() def roulette(prob): for i in range(0, len(probability)): if prob < probability[i]: return self.fitness_list[i][0] crossover_list = [] for item in list(selection_probability): crossover_list.append(roulette(item)) if len(crossover_list) == 2: inde1, inde2 = self.cxOnePoint(crossover_list[0][:], crossover_list[1][:]) # init the neural network with the individual 1 self.currentUnderSampling = self.cbeus(inde1[:5]) self.init_neural_network(inde1) self.predict_non_bankruptcy = [] self.predict_bankruptcy = [] for instance in self.currentUnderSampling: self.predict_non_bankruptcy.append( self.neural_network.update(instance)[0]) for instance in self.bankruptcy_data: self.predict_bankruptcy.append( self.neural_network.update(instance)[1]) fitness_value = self.g_mean(len(self.currentUnderSampling)) self.fitness_list.append([inde1, fitness_value]) # init the neural network with the individual 2 self.currentUnderSampling = self.cbeus(inde2[:5]) self.init_neural_network(inde2) self.predict_non_bankruptcy = [] self.predict_bankruptcy = [] for instance in self.currentUnderSampling: self.predict_non_bankruptcy.append( self.neural_network.update(instance)[0]) for instance in self.bankruptcy_data: self.predict_bankruptcy.append( self.neural_network.update(instance)[1]) fitness_value = self.g_mean(len(self.currentUnderSampling)) self.fitness_list.append([inde2, fitness_value]) crossover_list = [] # create individual with mutation selection_probability = set() while len(selection_probability) < nmutation: selection_probability.add(random.uniform(0, 1)) for item in list(selection_probability): inde3 = self.swapMutation(roulette(item)) self.currentUnderSampling = self.cbeus(inde3[:5]) self.init_neural_network(inde3) self.predict_non_bankruptcy = [] self.predict_bankruptcy = [] for instance in self.currentUnderSampling: self.predict_non_bankruptcy.append( self.neural_network.update(instance)[0]) for instance in self.bankruptcy_data: self.predict_bankruptcy.append( self.neural_network.update(instance)[1]) fitness_value = self.g_mean(len(self.currentUnderSampling)) self.fitness_list.append([inde3, fitness_value])
class GWO(object): def __init__(self, n_population, bankruptcy_data, non_bankruptcy_data, clusters_data, cluster_centers, threshold_list, population=None): self.threshold_list = threshold_list self.bankruptcy_data = bankruptcy_data self.non_bankruptcy_data = non_bankruptcy_data self.neural_network = NeuralNetwork(n_inputs=6, n_outputs=2, n_neurons_to_hl=6, n_hidden_layers=1) self.n_population = n_population self.population = population or self._makepopulation() self.saved_cluster_data = clusters_data self.cluster_centers = cluster_centers self.predict_bankruptcy = [] self.predict_non_bankruptcy = [] self.fitness_list = [] # list of chromosome and fitness self.currentUnderSampling = None self.predict_position = None self.search_main() def init_neural_network(self, chromosome): # remove threshold from chromosome list primary_weights = chromosome[5:] matrix_list = [] for i in range(0, int(len(primary_weights) / 6) - 1): matrix_list.append(primary_weights[i * 6:(i + 1) * 6]) weights_matrix = array(matrix_list) layers = self.neural_network.layers i = 0 for neuron in layers[0].neurons: neuron.set_weights(weights_matrix[:, i]) i += 1 layers[1].neurons[0].set_weights(primary_weights[-6:]) def performance_measure(self): tp = 0 fp = 0 fn = 0 tn = 0 for item in self.bankruptcy_data: if self.predict(item) > 0.5: fp += 1 else: tp += 1 for item in self.non_bankruptcy_data: if self.predict(item) > 0.5: tn += 1 else: fn += 1 sensitivity = tp / (tp + fn) specificity = tn / (fp + tn) print("TP is : %s" % (str(tp))) print("FP is : %s" % (str(fp))) print("FN is : %s" % (str(fn))) print("TN is : %s" % (str(tn))) print("G-MEAN : %s" % (str(math.sqrt(sensitivity * specificity)))) print("Hit-ratio : %s" % (str((tp + tn) / (tp + fn + fp + tn)))) def predict(self, data): self.init_neural_network(self.predict_position) return self.neural_network.update(data)[0] def _makepopulation(self): pop_list = [] for i in range(0, self.n_population): weights = [random.uniform(-5, 5) for _ in range(0, 36)] out_weights = [random.uniform(-5, 5) for _ in range(0, 12)] # make threshold list threshold1 = [ random.uniform(threshold[0], threshold[1]) for threshold in self.threshold_list ] position = threshold1 + weights + out_weights pop_list.append(position) return pop_list def ba_i(self, poi): if poi < 0.5: return 1 return 0 def na_j(self, poj): if poj > 0.5: return 1 return 0 def cbeus( self, thresholds ): # the rule structure for the cluster-based underSampling base on GA i = 0 undersampling_clusters = [] for cluster in self.saved_cluster_data: for instance in cluster: if euclidean_distances( [instance], [self.cluster_centers[i]]) < thresholds[i]: undersampling_clusters.append(instance) i += 1 return undersampling_clusters def search_main(self): Max_iter = 3 for index in range(0, Max_iter): for position in self.population: print("underSampling : Cut off % s" % str(position[:5])) self.currentUnderSampling = self.cbeus(position[:5]) self.init_neural_network(position) self.predict_non_bankruptcy = [] self.predict_bankruptcy = [] for instance in self.currentUnderSampling: self.predict_non_bankruptcy.append( self.neural_network.update(instance)[0]) for instance in self.bankruptcy_data: self.predict_bankruptcy.append( self.neural_network.update(instance)[0]) fitness_value = self.g_mean(len(self.currentUnderSampling)) self.fitness_list.append([position, fitness_value]) self.fitness_list.sort(key=lambda x: x[1]) # Update Alpha, Beta, and Delta Alpha_pos = self.fitness_list[0][0] # Update alpha Beta_pos = self.fitness_list[1][0] # Update beta Delta_pos = self.fitness_list[2][0] # Update delta a = 2 - index * ( (2) / Max_iter) # a decreases linearly from 2 to 0 for position in self.population: for j in range(0, len(position)): r1 = random.random() # r1 is a random number in [0,1] r2 = random.random() # r2 is a random number in [0,1] A1 = 2 * a * r1 - a # Equation (3.3) C1 = 2 * r2 # Equation (3.4) D_alpha = abs(C1 * Alpha_pos[j] - position[j]) # Equation (3.5)-part 1 X1 = Alpha_pos[j] - A1 * D_alpha # Equation (3.6)-part 1 r1 = random.random() r2 = random.random() A2 = 2 * a * r1 - a # Equation (3.3) C2 = 2 * r2 # Equation (3.4) D_beta = abs(C2 * Beta_pos[j] - position[j]) # Equation (3.5)-part 2 X2 = Beta_pos[j] - A2 * D_beta # Equation (3.6)-part 2 r1 = random.random() r2 = random.random() A3 = 2 * a * r1 - a # Equation (3.3) C3 = 2 * r2 # Equation (3.4) D_delta = abs(C3 * Delta_pos[j] - position[j]) # Equation (3.5)-part 3 X3 = Delta_pos[j] - A3 * D_delta # Equation (3.5)-part 3 position[j] = (X1 + X2 + X3) / 3 # Equation (3.7) if index == Max_iter - 1: os.system('cls' if os.name == 'nt' else 'clear') self.fitness_list.sort(key=lambda x: x[1]) print("The Optimization Weights For Predict Is: %s " % str(self.fitness_list[0][0][5:])) self.predict_position = self.fitness_list[0][0][5:] self.performance_measure() def g_mean(self, n): b = len(self.bankruptcy_data) sum_bankruptcy = 0 sum_non_bankruptcy = 0 for item in self.predict_non_bankruptcy: sum_non_bankruptcy += self.ba_i(item) for item in self.predict_bankruptcy: sum_bankruptcy += self.na_j(item) return math.sqrt( (1 / b) * sum_bankruptcy * (1 / n) * sum_non_bankruptcy)