def test_check_datatype(): """Test checking if datatype exists in raw data.""" sfreq, n_points = 1024., int(1e6) rng = RandomState(99) info_eeg = mne.create_info(['ch1', 'ch2', 'ch3'], sfreq, ['eeg'] * 3) raw_eeg = mne.io.RawArray(rng.random((3, n_points)) * 1e-6, info_eeg) info_meg = mne.create_info(['ch1', 'ch2', 'ch3'], sfreq, ['mag'] * 3) raw_meg = mne.io.RawArray(rng.random((3, n_points)) * 1e-6, info_meg) info_ieeg = mne.create_info(['ch1', 'ch2', 'ch3'], sfreq, ['seeg'] * 3) raw_ieeg = mne.io.RawArray(rng.random((3, n_points)) * 1e-6, info_ieeg) # check behavior for unsupported data types for datatype in (None, 'anat'): with pytest.raises(ValueError, match=f'The specified datatype ' f'{datatype} is currently not'): _check_datatype(raw_eeg, datatype) # check behavior for matching data type for raw, datatype in [(raw_eeg, 'eeg'), (raw_meg, 'meg'), (raw_ieeg, 'ieeg')]: _check_datatype(raw, datatype) # check for missing data type for raw, datatype in [(raw_ieeg, 'eeg'), (raw_meg, 'eeg'), (raw_ieeg, 'meg'), (raw_eeg, 'meg'), (raw_meg, 'ieeg'), (raw_eeg, 'ieeg')]: with pytest.raises(ValueError, match=f'The specified datatype ' f'{datatype} was not found'): _check_datatype(raw, datatype)
def get_special_selector(feat_selector, feat_selector_params, random_state, num_feat_keys): # Init feat selector with mask of random feats if random_state is None: r_state = RandomState(np.random.randint(1000)) elif isinstance(random_state, int): r_state = RandomState(random_state) else: r_state = random_state init_mask = r_state.random(num_feat_keys) feat_selector = feat_selector(mask=init_mask) # Figure out param passed if 'selector__mask' in feat_selector_params: p_name = 'selector__mask' # If set to searchable, set to searchable... if feat_selector_params[p_name] == 'sets as hyperparameters': feat_array = Array(init=[.5 for i in range(num_feat_keys)]) feat_array.set_mutation(sigma=1 / 6).set_bounds(lower=0, upper=1) feat_selector_params[p_name] = feat_array elif feat_selector_params[p_name] == 'sets as random features': del feat_selector_params[p_name] return feat_selector, feat_selector_params
def test_unrelated_columns(N=60, random_seed=12345): """ Test to see if 'unrelated' columns jam up the analysis. See Github Issue 43. https://github.com/ACCLAB/DABEST-python/issues/44. Added in v0.2.5. """ # rng = RandomState(MT19937(random_seed)) rng = RandomState(PCG64(12345)) # rng = np.random.default_rng(seed=random_seed) df = pd.DataFrame({ 'groups': rng.choice(['Group 1', 'Group 2', 'Group 3'], size=(N, )), 'color': rng.choice(['green', 'red', 'purple'], size=(N, )), 'value': rng.random(size=(N, )) }) df['unrelated'] = np.nan test = load(data=df, x='groups', y='value', idx=['Group 1', 'Group 2']) md = test.mean_diff.results assert md.difference[0] == pytest.approx(-0.0322, abs=1e-4) assert md.bca_low[0] == pytest.approx(-0.2279, abs=1e-4) assert md.bca_high[0] == pytest.approx(0.1613, abs=1e-4)
def _adjust_and_check_fit( cls, num_objects: int, target_bounding_boxes: np.ndarray, positions: np.ndarray, placement_area: PlacementArea, random_state: RandomState, ) -> Tuple[bool, Optional[np.ndarray]]: """ This method will check if the current `target_bounding_boxes` and `positions` can fit in the table and if so, will return a new array of positions randomly sampled inside of the `placement_area` """ width, height, _ = placement_area.size half_sizes = target_bounding_boxes[:, 1, :] max_x, max_y, _ = np.max(positions + half_sizes, axis=0) min_x, min_y, _ = np.min(positions - half_sizes, axis=0) size_x, size_y = max_x - min_x, max_y - min_y if size_x < width and size_y < height: # Sample a random offset of the "remaning area" delta_x = -min_x + random_state.random() * (width - size_x) delta_y = -min_y + random_state.random() * (height - size_y) return ( True, positions + np.tile(np.array([delta_x, delta_y, 0]), (num_objects, 1)), ) return False, None
def multiplier_proposal_vector(q, d=1.05, f=1, rs=0): if not rs: rseed = random.randint(1000, 9999) rs = RandomState(MT19937(SeedSequence(rseed))) S = q.shape ff = rs.binomial(1,f,S) u = rs.random(S) l = 2 * np.log(d) m = np.exp(l * (u - .5)) m[ff==0] = 1. new_q = q * m U=np.sum(np.log(m)) return new_q, 0, U
class TestManoModel(unittest.TestCase): """Test kinematics module.""" def setUp(self): self.random = RandomState(7) def test_constructor(self): for left_hand in False, True: with self.subTest(f"ManoModel(left_hand={left_hand})"): mano_model = ManoModel(left_hand) self.assertEqual(mano_model.is_left_hand, left_hand) self.assertIsNotNone(mano_model.faces) self.assertIsNotNone(mano_model.weights) self.assertIsNotNone(mano_model.kintree_table) self.assertIsNotNone(mano_model.shapedirs) self.assertIsNotNone(mano_model.posedirs) self.assertIsNotNone(mano_model.origins()) self.assertIsNotNone(mano_model.vertices()) self.assertEqual(len(mano_model.link_names), len(mano_model.origins())) self.assertEqual(len(mano_model.tip_links), 5) def test_origins(self): """Test the MANO joints transformation.""" mano_model = ManoModel() origins = mano_model.vertices( # pose=self.random.uniform((16, 3)), trans=self.random.random((3, ))) self.assertIsNotNone(origins) def test_vertices(self): """Test the MANO vertices transformation.""" mano_model = ManoModel() vertices = mano_model.vertices( betas=self.random.random(10) * 0.1, # pose=self.random.random((16, 3)), trans=self.random.random(3)) self.assertIsNotNone(vertices)
def test_monitor(): T = 100 monitor = Monitor() rng = RandomState(12345) actions = rng.randint(0, 2, T) optimal_actions = rng.randint(0, 2, T) rewards = rng.random(T) for t in range(T): monitor.report( t=t, agent_action=actions[t], optimal_action=optimal_actions[t], action_reward=rewards[t] ) assert np.array_equal( [ monitor.t_count_optimal_action[t] for t in sorted(monitor.t_count_optimal_action) ], [ 1 if action == optimal else 0 for action, optimal in zip(actions, optimal_actions) ] ) assert np.array_equal( [ monitor.t_average_cumulative_reward[t].get_value() for t in sorted(monitor.t_average_cumulative_reward) ], np.cumsum(rewards) )
class RandomParcels(BaseEstimator): def __init__(self, geo, n_parcels, medial_wall_inds=None, medial_wall_mask=None, random_state=1): # Set passed params self.geo = geo self.n_parcels = n_parcels self.medial_wall_inds = medial_wall_inds self.medial_wall_mask = medial_wall_mask self.random_state = random_state self.mask = None def get_parc(self, copy=True): if self.mask is None: self._generate_parc_from_params() if copy: return self.mask.copy() else: return self.mask def _generate_parc_from_params(self): # Proc by input args self._proc_geo() self._proc_medial_wall() self._proc_random_state() # Set up mask, done and flags self.sz = len(self._geo) self.reset() # Init self.init_parcels() # Then generate self.generate_parcels() def _proc_geo(self): self._geo = [np.array(g) for g in self.geo] def _proc_medial_wall(self): # Proc medial wall inds if self.medial_wall_inds is not None: self.m_wall = set(list(self.medial_wall_inds)) elif self.medial_wall_mask is not None: self.m_wall = set(list(np.where(self.medial_wall_mask == True)[0])) else: self.m_wall = set() def _proc_random_state(self): if self.random_state is None: self.r_state = RandomState() elif isinstance(self.random_state, int): self.r_state = RandomState(seed=self.random_state) else: self.r_state = self.random_state def reset(self): '''Just reset the mask, and set w/ done info''' self.mask = np.zeros(self.sz, dtype='int16') self.done = self.m_wall.copy() self.ready, self.generated = False, False def init_parcels(self): # Generate the starting locs valid = np.setdiff1d(np.arange(self.sz), np.array(list(self.done))) self.start_locs = self.r_state.choice(valid, size=self.n_parcels, replace=False) # Set random probs. that each loc is chosen self.probs = self.r_state.random(size=self.n_parcels) def setup(self): '''This should be called before generating parcel, so after a mutation has been made, setup needs to be called. It also does not hurt to call setup an extra time, as nothing random is set.''' # Generate corresponding labels w/ each loc self.labels = np.arange(1, self.n_parcels + 1, dtype='int16') # Mask where if == 1, then that parcel is done self.finished = np.zeros(self.n_parcels, dtype='bool_') # Drop the first points self.mask[self.start_locs] = self.labels # Set ready flag to True self.ready = True def get_probs(self): return self.probs / np.sum(self.probs) def choice(self): '''Select a valid label based on probs.''' msk = self.finished == 0 probs = self.probs[msk] / np.sum(self.probs[msk]) label = self.r_state.choice(self.labels[msk], p=probs) return label def get_valid_neighbors(self, loc): ns = self._geo[loc] valid_ns = ns[self.mask[ns] == 0] return valid_ns def generate_parcels(self): if self.ready is False: self.setup() # Keep looping until every spot is filled while (self.finished == 0).any(): self.add_spot() # Set generated flag when done self.generated = True def add_spot(self): # Select which parcel to add to label = self.choice() # Determine valid starting locations anywhere in exisitng parcel current = np.where(self.mask == label)[0] valid = set(current) - self.done self.proc_spot(valid, label) def proc_spot(self, valid, label): # If no valid choices, then set this parcel to finished if len(valid) == 0: self.finished[label - 1] = 1 return # Select randomly from the valid starting locs loc = self.r_state.choice(tuple(valid)) # Select a valid + open neighbor valid_ns = self.get_valid_neighbors(loc) if len(valid_ns) > 0: # Select a valid choice, and add it w/ the right label choice = self.r_state.choice(valid_ns) self.mask[choice] = label # If this was the only choice, mark start loc as done if len(valid_ns) == 1: self.done.add(loc) # If there are no valid choices, mark as done else: self.done.add(loc) valid.remove(loc) self.proc_spot(valid, label)
class HierarchicalLDA(object): def __init__(self, corpus, vocab, alpha=10.0, gamma=1.0, eta=0.1, seed=42, verbose=True, num_levels=3): NCRPNode.total_nodes = 0 NCRPNode.last_node_id = 0 self.corpus = corpus self.vocab = vocab self.alpha = alpha # smoothing on doc-topic distributions self.gamma = gamma # "imaginary" customers at the next, as yet unused table self.eta = eta # smoothing on topic-word distributions self.seed = seed self.random_state = RandomState(seed) self.verbose = verbose self.num_levels = num_levels self.num_documents = len(corpus) self.num_types = len(vocab) self.eta_sum = eta * self.num_types # if self.verbose: # for d in range(len(self.corpus)): # doc = self.corpus[d] # words = ' '.join([self.vocab[n] for n in doc]) # print 'doc_%d = %s' % (d, words) # initialise a single path path = np.zeros(self.num_levels, dtype=np.object) # initialize and fill the topic pointer arrays for # every document. Set everything to the single path that # we added earlier. self.root_node = NCRPNode(self.num_levels, self.vocab) self.document_leaves = {} # currently selected path (ie leaf node) through the NCRP tree self.levels = np.zeros(self.num_documents, dtype=np.object) # indexed < doc, token > for d in range(len(self.corpus)): # populate nodes into the path of this document doc = self.corpus[d] doc_len = len(doc) path[0] = self.root_node self.root_node.customers += 1 # always add to the root node first for level in range(1, self.num_levels): # at each level, a node is selected by its parent node based on the CRP prior parent_node = path[level-1] level_node = parent_node.select(self.gamma) level_node.customers += 1 path[level] = level_node # set the leaf node for this document leaf_node = path[self.num_levels-1] self.document_leaves[d] = leaf_node # randomly assign each word in the document to a level (node) along the path self.levels[d] = np.zeros(doc_len, dtype=np.int) for n in range(doc_len): w = doc[n] random_level = self.random_state.randint(self.num_levels) random_node = path[random_level] random_node.word_counts[w] += 1 random_node.total_words += 1 self.levels[d][n] = random_level def estimate(self, num_samples, display_topics=50, n_words=5, with_weights=True): print('HierarchicalLDA sampling\n') for s in range(num_samples): sys.stdout.write('.') for d in range(len(self.corpus)): self.sample_path(d) for d in range(len(self.corpus)): self.sample_topics(d) if (s > 0) and ((s+1) % display_topics == 0): print(f" {s+1}") self.print_nodes(n_words, with_weights) # print def sample_path(self, d): # define a path starting from the leaf node of this doc path = np.zeros(self.num_levels, dtype=np.object) node = self.document_leaves[d] for level in range(self.num_levels-1, -1, -1): # e.g. [3, 2, 1, 0] for num_levels = 4 path[level] = node node = node.parent # remove this document from the path, deleting empty nodes if necessary self.document_leaves[d].drop_path() ############################################################ # calculates the prior p(c_d | c_{-d}) in eq. (4) ############################################################ node_weights = {} self.calculate_ncrp_prior(node_weights, self.root_node, 0.0) ############################################################ # calculates the likelihood p(w_d | c, w_{-d}, z) in eq. (4) ############################################################ level_word_counts = {} for level in range(self.num_levels): level_word_counts[level] = {} doc_levels = self.levels[d] doc = self.corpus[d] # remove doc from path for n in range(len(doc)): # for each word in the doc # count the word at each level level = doc_levels[n] w = doc[n] if w not in level_word_counts[level]: level_word_counts[level][w] = 1 else: level_word_counts[level][w] += 1 # remove word count from the node at that level level_node = path[level] level_node.word_counts[w] -= 1 level_node.total_words -= 1 assert level_node.word_counts[w] >= 0 assert level_node.total_words >= 0 self.calculate_doc_likelihood(node_weights, level_word_counts) ############################################################ # pick a new path ############################################################ nodes = np.array(list(node_weights.keys())) weights = np.array([node_weights[node] for node in nodes]) weights = np.exp(weights - np.max(weights)) # normalise so the largest weight is 1 weights = weights / np.sum(weights) choice = self.random_state.multinomial(1, weights).argmax() node = nodes[choice] # if we picked an internal node, we need to add a new path to the leaf if not node.is_leaf(): node = node.get_new_leaf() # add the doc back to the path node.add_path() # add a customer to the path self.document_leaves[d] = node # store the leaf node for this doc # add the words for level in range(self.num_levels-1, -1, -1): # e.g. [3, 2, 1, 0] for num_levels = 4 word_counts = level_word_counts[level] for w in word_counts: node.word_counts[w] += word_counts[w] node.total_words += word_counts[w] node = node.parent def calculate_ncrp_prior(self, node_weights, node, weight): ''' Calculates the prior on the path according to the nested CRP ''' for child in node.children: child_weight = log( float(child.customers) / (node.customers + self.gamma) ) self.calculate_ncrp_prior(node_weights, child, weight + child_weight) node_weights[node] = weight + log( self.gamma / (node.customers + self.gamma)) def calculate_doc_likelihood(self, node_weights, level_word_counts): # calculate the weight for a new path at a given level new_topic_weights = np.zeros(self.num_levels) for level in range(1, self.num_levels): # skip the root word_counts = level_word_counts[level] total_tokens = 0 for w in word_counts: count = word_counts[w] # for i in range(count): # why ????????? # new_topic_weights[level] += log((self.eta + i) / (self.eta_sum + total_tokens)) # total_tokens += 1 up_part = self.eta down_part = self.eta_sum + total_tokens new_topic_weights[level] += math.lgamma(up_part+count) - math.lgamma(up_part) - (math.lgamma(down_part+count) - math.lgamma(down_part)) # explained in calculate_word_likelihood_at_level total_tokens += count self.calculate_word_likelihood(node_weights, self.root_node, 0.0, level_word_counts, new_topic_weights, 0) def calculate_word_likelihood_at_level(self, node_word_count, node_total_words, level_word_count_at_level, new_topic_weights): node_weight = 0.0 word_counts = level_word_count_at_level total_words = 0 for w in word_counts: count = word_counts[w] # for i in range(count): # why ????????? # node_weight += log( (self.eta + node_word_count[w] + i) / # (self.eta_sum + node_total_words + total_words) ) # total_words += 1 # # commented old calc method as the new one is faster, but less obvious: # sum (i=0,n) (log( (up_part + i) / (down_part + i) ) ) = # = log (product (i=0,n) ((up_part + i) / (down_part + i)) = # = log (product (i=0,n) (up_part + i)) - log (product (i=0,n) (down_part + i)) # product of arithmetic progression is d^n * (Gamma(n+(a1/d))/Gamma(a1/d)) # here d = 1, n = count, a1 = up_part or down_part # log(Gamma(n+a1)/Gamma(a1)) = log(Gamma(n+a1)) - log(Gamma(a1)) # so log (product (i=0,n) (up_part + i)) = log(Gamma(count+up_part)) - log(Gamma(up_part)) # and log (product (i=0,n) (down_part + i)) = log(Gamma(count+down_part)) - log(Gamma(down_part)) # # as up_part and down_part are floats, we cannot replace log of gamma with log of factorial :( up_part = self.eta + node_word_count[w] down_part = self.eta_sum + node_total_words + total_words node_weight += math.lgamma(up_part+count) - math.lgamma(up_part) - (math.lgamma(down_part+count) - math.lgamma(down_part)) total_words += count return node_weight def calculate_word_likelihood(self, node_weights, node, weight, level_word_counts, new_topic_weights, level): # first calculate the likelihood of the words at this level, given this topic node_weight = self.calculate_word_likelihood_at_level(node.word_counts, node.total_words, level_word_counts[level], new_topic_weights) # propagate that weight to the child nodes for child in node.children: self.calculate_word_likelihood(node_weights, child, weight + node_weight, level_word_counts, new_topic_weights, level+1) # finally if this is an internal node, add the weight of a new path level += 1 while level < self.num_levels: node_weight += new_topic_weights[level] level += 1 node_weights[node] += node_weight def get_weighted_random(self, weights): total = weights.sum() n = self.random_state.random() n *= total for i, w in enumerate(weights): if n <= w: return i else: n -= w return len(weights)-1 # exceptional case def sample_topics(self, d): doc = self.corpus[d] # initialise level counts doc_levels = self.levels[d] level_counts = np.zeros(self.num_levels, dtype=np.int) for c in doc_levels: level_counts[c] += 1 # get the leaf node and populate the path path = np.zeros(self.num_levels, dtype=np.object) node = self.document_leaves[d] for level in range(self.num_levels-1, -1, -1): # e.g. [3, 2, 1, 0] for num_levels = 4 path[level] = node node = node.parent # sample a new level for each word level_weights = np.zeros(self.num_levels) for n in range(len(doc)): w = doc[n] word_level = doc_levels[n] # remove from model level_counts[word_level] -= 1 node = path[word_level] node.word_counts[w] -= 1 node.total_words -= 1 # pick new level for level in range(self.num_levels): level_weights[level] = (self.alpha + level_counts[level]) * \ (self.eta + path[level].word_counts[w]) / \ (self.eta_sum + path[level].total_words) level_weights = level_weights / np.sum(level_weights) # level = self.random_state.multinomial(1, level_weights).argmax() level = self.get_weighted_random(level_weights) # put the word back into the model doc_levels[n] = level level_counts[level] += 1 node = path[level] node.word_counts[w] += 1 node.total_words += 1 def print_nodes(self, n_words, with_weights): self.print_node(self.root_node, 0, n_words, with_weights) def print_node(self, node, indent, n_words, with_weights): out = ' ' * indent out += 'topic=%d level=%d (documents=%d): ' % (node.node_id, node.level, node.customers) out += node.get_top_words(n_words, with_weights) print(out) for child in node.children: self.print_node(child, indent+1, n_words, with_weights)
def _create_new_domino_position_and_rotation( self, random_state: RandomState ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]: """ This method will attempt at creating a new setup of dominos. The setup is to have the dominos be equally spaced across a circle arc :return: Tuple[proposed_positions, proposed_angles] proposed_positions: np.ndarray of shape (num_objects, 3) with proposed target positions, or None if placement fails proposed_angles: np.ndarray of shape (num_objects,) with proposed target angles, or None if placement fails """ num_objects = self.mujoco_simulation.num_objects for _ in range(MAX_RETRY): # Offset that the whole domino chain is rotated by, this rotate the whole arc of dominos globally proposed_offset = random_state.random() * np.pi # Angle between the rotation of one domino and the next. Random angle offset between -pi/8 and pi/8 proposed_delta = random_state.random() * (np.pi / 4.0) - (np.pi / 8.0) # Angles each domino will be rotated respectively proposed_angles = np.array(range(num_objects)) * proposed_delta + ( proposed_offset + proposed_delta / 2) # Set target quat so that the computation for `get_target_bounding_boxes` is correct self._set_target_quat(num_objects, proposed_angles) angles_between_objects = ( np.array(range(1, 1 + num_objects)) * proposed_delta + proposed_offset) object_distance = ( self.mujoco_simulation.simulation_params.object_size * self.mujoco_simulation.simulation_params.domino_distance_mul) x = np.cumsum(np.cos(angles_between_objects)) * object_distance y = np.cumsum(np.sin(angles_between_objects)) * object_distance # Proposed positions proposed_positions = np.zeros((num_objects, 3)) # Copy the z axis values: target_bounding_boxes = ( self.mujoco_simulation.get_target_bounding_boxes() ) # List[obj_pos, obj_size] proposed_positions[:, 2] = target_bounding_boxes[:, 1, 2] for target_idx in range(num_objects): # First target will be at (0, 0) and the remaning targets will be offset from that if target_idx > 0: proposed_positions[target_idx] += np.array( [x[target_idx - 1], y[target_idx - 1], 0]) is_valid, proposed_positions = self._adjust_and_check_fit( num_objects, target_bounding_boxes, proposed_positions, self.mujoco_simulation.get_placement_area(), random_state, ) if is_valid and proposed_positions is not None: return proposed_positions, proposed_angles # Mark failure to fit goal positions return None, None
class Evolution: # pylint: disable=too-many-instance-attributes # noinspection PyUnresolvedReferences """ Class that executes genetic search. :param num_populations: (int) number of populations (default 1) :param population_size: (int) size of the population :param genotype_size: (int) size of the genotype vector :param evaluation_function: (func) function to evaluate genotype performance. It should take as inputthe entire population genotype (matrix) and return an array with the performances :param fitness_normalization_mode: (str) method to normalize fitness values (fitness-proportionate, rank-based or sigma scaling) :param selection_mode: (str) method to select parents for reproduction (RWS or SUS) :param reproduce_from_elite: (bool) whether the reproduction comes from elite or remaining agents in the population :param reproduction_mode: (str) method to reproduce genetic algorithm or hill climbing :param mutation_variance: (float) variance of gaussian mutation rate :param folder_path: (string) path of the folder where to save the checkpoints :param search_constraint: (list of bool) flag whether to clip a specific site in a genotype (default to all True) :param reevaluate: (bool) whether to re-evaluate the individual if it's retained in the new generation (used only in hill-climbing) :param max_generation: (int) maximum generations to evolve (not used if termination_function is provided) :param termination_function: (func) function to check if search should terminate (it accept the Evolution instance, default to None) :param elitist_fraction: (float) proportion of new population that will be made of best unmodified parents (only relevant for genetic algorithm) :param mating_fraction: (float) proportion of population that will be made of children (in Beer this is equal to 1. - elitist_fraction) (only relevant for genetic algorithm) :param crossover_probability: (float) probability that crossover will occur (only relevant for genetic algorithm) :param crossover_mode: (str) the way to perform crossover (UNIFORM, 1-POINT, 2-POINT, ...) (only relevant for genetic algorithm) :param crossover_points: (list of int) a list that specifies the indices of where to cut during crossover (only relevant for genetic algorithm) :param checkpoint_interval: (int) every how many generations should the population be saved and results logged :param max_expected_offspring: (float) number of offspring to be allocated to the best individual, best between 1 and 2 """ population_size: int genotype_size: int evaluation_function: Callable num_populations: int = 1 performance_objective: Union[str,float] = 'MAX' # 'MIN', 'ABS_MAX', float value fitness_normalization_mode: str = 'FPS' # 'NONE', 'FPS', 'RANK', 'SIGMA' selection_mode: str = 'RWS' # 'UNIFORM', 'RWS', 'SUS' reproduce_from_elite: bool = False reproduction_mode: str = 'GENETIC_ALGORITHM' # 'HILL_CLIMBING', 'GENETIC_ALGORITHM' mutation_variance: float = DEFAULT_MUTATION_VARIANCE max_generation: int = 100 termination_function: Callable = None checkpoint_interval: int = DEFAULT_CHECKPOINT_INTERVAL crossover_probability: float = DEFAULT_CROSSOVER_PROB crossover_points: List[int] = None folder_path: str = None elitist_fraction: float = None mating_fraction: float = None n_elite: int = None n_mating: int = None n_fillup: int = None crossover_mode: str = 'UNIFORM' search_constraint: np.ndarray = None # this will be converted to all True by default in __post_init__ reevaluate: bool = True # only used in hill-climbing max_expected_offspring: float = DEFAULT_MAX_EXPECTED_OFFSPRING random_seed: int = 0 random_state: RandomState = None pop_eval_random_seed: int = None # initialized at every generation # other field (no need to define them outside) generation: int = 0 # the current generation number population: np.ndarray = None # the list of population genotypes (sorted by performance) population_unsorted: np.ndarray = None # the list of population genotypes (before sorting) # (will be initialized in __post_init__) performances: np.ndarray = None # performances of the genotypes fitnesses: np.ndarray = None # fitnesses of the genotypes population_sorted_indexes: np.ndarray = None # keep track of indexes in sorted population # population_sorted_indexes[0] is the index of the agent with best performance # in the unsorted population # collect average, best and worst performances across generations avg_performances: List[List[float]] = field(default_factory=list) best_performances: List[List[float]] = field(default_factory=list) worst_performances: List[List[float]] = field(default_factory=list) timeit: bool = False def __post_init__(self): assert self.num_populations > 0, "Number of populations should be greater than zero" assert self.population_size % 4 == 0, "Population size must be divisible by 4" # otherwise n_elite + n_mating may be greater than population_size self.sqrt_mutation_variance = np.sqrt(self.mutation_variance) if self.random_state is None: self.random_state = RandomState(self.random_seed) self.loaded_from_file = all( x is not None for x in [self.population, self.performances, self.fitnesses] ) # create initial population if not provided if self.population is None: # create a set of random genotypes self.population = self.random_state.uniform( MIN_SEARCH_VALUE, MAX_SEARCH_VALUE, [self.num_populations, self.population_size, self.genotype_size] ) if self.search_constraint is None: self.search_constraint = np.array([True] * self.genotype_size) self.file_num_zfill = int(np.ceil(np.log10(self.max_generation + 1))) \ if self.max_generation \ else 1 if self.max_generation == 0 \ else FILE_NUM_ZFILL_DEFAULT # conver performance_objective to float if it is a string with a number f = utils.get_float(self.performance_objective) if f is not None: self.performance_objective = f self.timing = Timing(self.timeit) self.validate_params() self.init_reproduction_parameters() @staticmethod def get_random_genotype(rando_state, gen_size): return rando_state.uniform(MIN_SEARCH_VALUE, MAX_SEARCH_VALUE, gen_size) def init_reproduction_parameters(self): # self.n_mating: number of new agents return by select_mating_pool() if self.reproduction_mode == 'GENETIC_ALGORITHM': # self.n_elite: number of best agents to preserve (only used in genetic algorithm) # self.n_fillup: agents to be randomly generated self.n_elite = int( np.floor(self.population_size * self.elitist_fraction + 0.5) # at least one ) # children from elite group self.n_mating = int(np.floor( self.population_size * self.mating_fraction + 0.5 # at least one )) # children from mating population self.n_fillup = self.population_size - (self.n_elite + self.n_mating) # children from random fillup assert all(x >= 0 for x in [self.n_elite, self.n_mating, self.n_fillup]) assert self.n_elite + self.n_mating + self.n_fillup == self.population_size else: # 'HILL_CLIMBING' self.n_mating = self.population_size def validate_params(self): # termination condition assert self.max_generation is None or self.termination_function is None, \ "Either max_generation or termination_function must be defined" # folder path if self.folder_path: assert os.path.isdir(self.folder_path), "folder_path '{}' is not a valid directory".format(self.folder_path) # search_constraint assert len(self.search_constraint) == self.genotype_size, \ "The length of search_constraint should be equal to genotype_size" # performance_objective accepted_values = ['MAX', 'MIN', 'ABS_MAX'] assert type(self.performance_objective) in [float,int] or \ self.performance_objective in accepted_values, \ 'performance_objective should be either {}'.format(', '.join(accepted_values)) # fitness_normalization_mode accepted_values = ['NONE', 'FPS', 'RANK', 'SIGMA'] assert self.fitness_normalization_mode in accepted_values, \ 'fitness_normalization_mode should be either {}'.format(', '.join(accepted_values)) assert self.fitness_normalization_mode!='NONE' or self.selection_mode == 'UNIFORM', \ "if fitness_normalization_mode is 'NONE' (copy of PERFORMANCE), selection_mode must be UNIFORM (not normalized)" # selection_mode accepted_values = ['UNIFORM', 'RWS', 'SUS'] assert self.selection_mode in accepted_values, \ 'selection_mode should be either {}'.format(', '.join(accepted_values)) # reproduce_from_elite assert not self.reproduce_from_elite or self.selection_mode == 'UNIFORM', \ 'if reproducing from elite, selection mode must be uniform' # reproduction_mode accepted_values = ['HILL_CLIMBING', 'GENETIC_ALGORITHM'] assert self.reproduction_mode in accepted_values, \ 'reproduction_mode should be either {}'.format(', '.join(accepted_values)) # GENETIC_ALGORITHM if self.reproduction_mode == 'GENETIC_ALGORITHM': assert 0 <= self.elitist_fraction <= 1, \ 'In GENETIC_ALGORITHM: 0 <= elitist_fraction <=1' assert 0 <= self.mating_fraction <= 1, \ 'In GENETIC_ALGORITHM: 0 <= mating_fraction <=1' assert 0 <= self.crossover_probability <= 1, \ 'In GENETIC_ALGORITHM: 0 <= crossover_probability <=1' assert re.match('UNIFORM|\d+-POINT', self.crossover_mode), \ 'In GENETIC_ALGORITHM: crossover_mode should be UNIFORM or x-POINT' # crossover assert self.crossover_mode != None, "crossover_mode cannot be None" if self.crossover_mode == 'UNIFORM': # crossover is computed on the entire genotype # with prob 0.5 of flipping each genotype site assert self.crossover_points is None, \ "In uniform crossover_mode you shouldn't specify the crossover_points" elif self.crossover_mode.endswith('-POINT'): # A. if crossover_points is None the points are randomly generated # crossover_points must be a list of max x-1 integers in the interval [1,G-1] # where x is the integer > 0 specified in the parameter crossover_mode ('x-POINT') # and G is the size of the genotype # e.g. if parent1=[0,0,0] and parent2=[1,1,1] (G=3), # crossover_points must contain a single integer which can be # 1: child1=[0,1,1] child2=[1,0,0] # 2: child1=[0,0,1] child2=[1,1,0] # B. if crossover_points is not None -> num_points <= len(self.crossover_points) # if num_points < len(self.crossover_points) # only num_points will be randomly selected from the self.crossover_points num_points = self.crossover_mode[:-6] assert utils.is_int(num_points), \ "Param crossover_mode should be 'UNIFORM' or 'x-POINT' (with x being an integer > 0)" num_points = int(num_points) assert 0 < num_points < self.genotype_size, \ "Param crossover_mode should be 'x-POINT', with x being an integer such that 0 < x < G " \ "and where G is the size of the genotype" assert num_points <= self.genotype_size - 1, \ "Too high value for {} in param crossover_mode. Max should be G-1 " \ "(where G is the size of the genotype)".format( self.crossover_mode) if self.crossover_points is not None: assert len(set(self.crossover_points)) == len(self.crossover_points), \ "Duplicated values in crossover_points" self.crossover_points = sorted(set(self.crossover_points)) assert num_points <= len(self.crossover_points), \ "crossover_mode={} and crossover_points={} but {} must be <= {}=len(crossover_points)".format( self.crossover_mode, self.crossover_points, num_points, len(self.crossover_points)) assert all(1 < x < self.genotype_size for x in self.crossover_points), \ "Some of the values in crossover_points are not in the interval [1,G-1] " \ "where G is the size of the genotype" else: assert False, \ "Param crossover_mode should be 'UNIFORM' or 'x-POINT' (with x being an integer > 0)" def set_folder_name(self, text): self.folder_path = text def run(self): """ Execute a full search run until some condition is reached. :return: the last population in the search """ if self.loaded_from_file: # comple cycle from previous run (after saving) self.save_to_file() self.reproduce() self.generation += 1 t = self.timing.init_tictoc() while self.generation <= self.max_generation: # evaluate all genotypes on the task self.pop_eval_random_seed = utils.random_int(self.random_state) # suffle populations before running evaluation function for pop in self.population: self.random_state.shuffle(pop) # run evaluation function self.performances = self.evaluation_function( self.population, self.pop_eval_random_seed ) if type(self.performances) is list: self.performances = np.array(self.performances) if self.num_populations==1 and self.performances.ndim != 2: # eval function returned a simple array of perfomances # because there is only one population self.performances = np.expand_dims(self.performances,0) # add an additional index (population) expected_perf_shape = self.population.shape[:-1] assert self.performances.shape == expected_perf_shape, \ "Evaluation function didn't return performances with shape {}".format(expected_perf_shape) assert (self.performances >=0).all(), \ "Performance must be non-negative" self.timing.add_time('EVO1-RUN_eval_function', t) # sorting population and performances on performances self.sort_population_on_performance() self.timing.add_time('EVO1-RUN_sort_population', t) # update average/best/worst population performance avg = np.mean(self.performances, axis=1).tolist() best = self.performances[:,0].tolist() worst = self.performances[:,-1].tolist() variance = np.var(self.performances, axis=1).tolist() self.avg_performances.append(avg) self.best_performances.append(best) self.worst_performances.append(worst) self.timing.add_time('EVO1-RUN_stats', t) print_stats = lambda a : '|'.join(['{:.5f}'.format(x) for x in a]) # print short statistics print("Generation {}: Best: {}, Worst: {}, Average: {}, Variance: {}".format( str(self.generation).rjust(self.file_num_zfill), print_stats(best), print_stats(worst), print_stats(avg), print_stats(variance))) self.timing.add_time('EVO1-RUN_print_stats', t) # check if to terminate if self.generation == self.max_generation or \ (self.termination_function and self.termination_function(self)): self.save_to_file() # Stop search due to termination condition break # save the intermediate evolution state if self.checkpoint_interval and self.generation % self.checkpoint_interval == 0: # save current generation self.save_to_file() self.timing.add_time('EVO1-RUN_savefile', t) # Compute fitnesses (based on performances) - used in reproduce self.update_fitnesses() self.timing.add_time('EVO1-RUN_update_fitness', t) # run reproduce (update fitnesses and run genetic or hill-climbing) self.reproduce() self.timing.add_time('EVO1-RUN_reproduce', t) # update generation self.generation += 1 def sort_population_on_performance(self): # performances must be non-negative (>=0) if type(self.performance_objective) is str: if self.performance_objective == 'MAX': performances_objectified = self.performances elif self.performance_objective == 'MIN': performances_objectified = - self.performances else: assert self.performance_objective == 'ABS_MAX' performances_objectified = np.abs(self.performances) else: # minimizing the distance between performance and perf objective # when self.performance_objective==0 this would be identical to 'ABS_MIN' performances_objectified = - np.abs(self.performances - self.performance_objective) # sort genotypes, performances by performance_objectified from hight to low self.population_sorted_indexes = np.argsort(-performances_objectified, axis=-1) self.performances = np.take_along_axis(self.performances, self.population_sorted_indexes, axis=-1) self.population_unsorted = self.population # keep track of the original population to ensure reproducibility sorted_indexes_exp = np.expand_dims(self.population_sorted_indexes, -1) # add one dimension at the end to sort population self.population = np.take_along_axis(self.population_unsorted, sorted_indexes_exp, axis=1) # OLD METHOD WITHOUT NUMPY: # sort genotypes and performances by performance from best to worst # self.population, self.performances = \ # zip(*sorted(zip(self.population, self.performances), # key=lambda pair: pair[1], reverse=True)) # self.population = np.array(self.population) # self.performances = np.array(self.performances) def reproduce(self): """Run reproduce via HILL_CLIMBING or GENETIC_ALGORITHM""" if self.reproduction_mode == 'GENETIC_ALGORITHM': self.reproduce_genetic_algorithm() else: self.reproduce_hill_climbing() def reproduce_genetic_algorithm(self): """ Reproduce a single generation in the following way: 1) Copy the proportion equal to elitist_fraction of the current population to the new population (these are best_genotypes) 2) Select part of the population for crossover using some selection method (set in config) 3) Shuffle the selected population in preparation for cross-over 4) Create crossover_fraction children of selected population with probability of crossover equal to prob_crossover. Crossover takes place at genome module boundaries (single neurons). 5) Apply mutation to the children with mutation equal to mutation_var 6) Fill the rest of the population with randomly created genotypes self.population and self.performances are sorted based on performances """ t = self.timing.init_tictoc() new_population = np.zeros( [self.num_populations, self.population_size, self.genotype_size] ) # 1) Elitist selection # same elite size in all populations self.elite_population = self.population[:, :self.n_elite] new_population[:, :self.n_elite] = self.elite_population self.timing.add_time('EVO2-GA_1_elitist_selection', t) # 2) Select mating population from the remaining population mating_pool = self.select_mating_pool() self.timing.add_time('EVO2-GA_2_mating_pool', t) # 3) Shuffle mating pool for pop_mating_pool in mating_pool: self.random_state.shuffle(pop_mating_pool) self.timing.add_time('EVO2-GA_3_shuffle', t) # 4) Create children with crossover or apply mutation mating_finish = self.n_elite + self.n_mating newpop_counter = None # track where we are in the new population for p in range(self.num_populations): mating_counter = 0 newpop_counter = self.n_elite # track where we are in the new population while newpop_counter < mating_finish: not_last = mating_finish - newpop_counter > 1 parent1 = mating_pool[p][mating_counter] if not_last and self.random_state.random() < self.crossover_probability: parent2 = mating_pool[p][mating_counter + 1] child1, child2 = self.crossover(parent1, parent2) # if the child is the same as the first parent after crossover, mutate it (as in Beer) if np.array_equal(child1, parent1): child1 = self.mutate(parent1) new_population[p][newpop_counter] = child1 new_population[p][newpop_counter + 1] = child2 newpop_counter += 2 mating_counter += 2 else: # if no crossover, mutate just one genotype child1 = self.mutate(parent1) new_population[p][newpop_counter] = child1 newpop_counter += 1 mating_counter += 1 self.timing.add_time('EVO2-GA_4_children', t) # 5) Fill up with random new genotypes new_population[:, newpop_counter:] = self.random_state.uniform( MIN_SEARCH_VALUE, MAX_SEARCH_VALUE, size=[self.num_populations, self.n_fillup, self.genotype_size] ) self.timing.add_time('EVO2-GA_5_fillup', t) # 6) redefined population based on the newly computed population self.population = new_population self.timing.add_time('EVO2-GA_6_convert_pop', t) def reproduce_hill_climbing(self): t = self.timing.init_tictoc() # 1) Select the parents using sampling (replacing the entire population, no elite here) parent_population = self.select_mating_pool() self.timing.add_time('EVO2-HC_1_mating pool', t) # 2) Reevaluate if self.reevaluate: parent_performance = np.array(self.evaluation_function(parent_population, self.pop_eval_random_seed)) else: assert False, \ "reevaluate params has to be True. " \ "For reevaluate to be False we need to also return performances in function select_mating_pool" self.timing.add_time('EVO2-HC_2_reevaluate', t) # 3) Produce the new population by mutating each parent and rewrite it on the current population self.population = np.array([self.mutate(gen) for gen in parent_population]) self.timing.add_time('EVO2-HC_3_mutate', t) # 4) Calculate new performances self.performance = np.array(self.evaluation_function(self.population, self.pop_eval_random_seed)) self.timing.add_time('EVO2-HC_4_compute_perf', t) # 5) Check if performace worsened and in this case retrieve agent from parent population lower_performance = self.performance < parent_performance # bool array for i in range(self.population_size): if lower_performance[i]: self.population[i] = parent_population[i] self.performance[i] = parent_performance[i] self.timing.add_time('EVO2-HC_5_compare_and_select', t) def update_fitnesses(self): """ Update genotype fitness to relative values, retain sorting from best to worst. """ if self.fitness_normalization_mode == 'NONE': # do not use fitness in selection self.fitnesses = None elif self.fitness_normalization_mode == 'FPS': # (fitness-proportionate) self.fitnesses = np.zeros(self.performances.shape) # same shape as performances for p in range(self.num_populations): avg_perf = self.avg_performances[-1][p] m = utils.linear_scaling( self.worst_performances[-1][p], self.best_performances[-1][p], avg_perf, self.max_expected_offspring ) scaled_performances = m * (self.performances[p] - avg_perf) + avg_perf total_performance = np.sum(scaled_performances) self.fitnesses[p] = scaled_performances / total_performance elif self.fitness_normalization_mode == 'RANK': # (rank-based) # Baker's linear ranking method: f(pos) = 2-SP+2*(SP-1)*(pos-1)/(n-1) # the highest ranked individual receives max_exp_offspring (typically 1.1), # the lowest receives 2 - max_exp_offspring # normalized to sum to 1 self.fitnesses = np.zeros(self.performances.shape) # same shape as performances for p in range(self.num_populations): self.fitnesses[p] = np.array( [ ( self.max_expected_offspring + (2 - 2 * self.max_expected_offspring) * i / (self.population_size - 1) ) / self.population_size for i in range(self.population_size) ] ) elif self.fitness_normalization_mode == 'SIGMA': # (sigma-scaling) # for every individual 1 + (I(f) - P(avg_f))/2*P(std) is calculated # if value is below zero, a small positive constant is given so the individual has some probability # of being chosen. The numbers are then normalized self.fitnesses = np.zeros(self.performances.shape) # same shape as performances for p in range(self.num_populations): pop_perf = self.performances[p] avg = np.mean(pop_perf) std = max(0.0001, np.std(pop_perf)) exp_values = list((1 + ((f - avg) / (2 * std))) for f in pop_perf) for i, v in enumerate(exp_values): if v <= 0: exp_values[i] = 1 / self.population_size s = sum(exp_values) self.fitnesses[p] = np.array(list(e / s for e in exp_values)) def select_mating_pool(self): """ Select a mating pool population. :return: selected parents for reproduction """ if self.selection_mode == 'UNIFORM': # create mating_pool from source_population uniformally # (from beginning to end and if needed restart from beginning) source_population = \ self.elite_population if self.reproduce_from_elite \ else self.population num_source_pop = source_population.shape[1] # number of elements in source pop assert num_source_pop>0, \ "Error, can't create a mating pool from empty source population" cycle_source_pop_indexes = np.resize( # this return a column vector np.resize( # [0,1,...,n, 0, 1, ..., n] np.arange(num_source_pop), # where n is num_source_pop and the size [self.n_mating,1] # and n_mating the actual size of the list ), [self.num_populations, self.n_mating, 1] # this duplicates the indexes for all populations ) # to obtain same 3 dimensions of source_population # rotate thtough the source_population(s) mating_pool = np.take_along_axis(source_population, cycle_source_pop_indexes, 1) else: min_fitness = np.min(self.fitnesses, axis=-1) assert (min_fitness > - ROUNDING_TOLERANCE).all(), \ "Found neg fitness: {}".format(min_fitness) if (self.fitnesses < 0).any(): # setting small neg values due to rounding errors to zeros self.fitnesses[self.fitnesses<0] = 0 cum_probs = np.cumsum(self.fitnesses, axis=-1) cum_probs_error = np.abs(cum_probs[:,-1] - 1.0) assert (cum_probs_error >=0).all() and (cum_probs_error < CUM_PROB_TOLERANCE).all(), \ "Too big cum_probs_error: {}".format(cum_probs_error) mating_pool = np.zeros([self.num_populations, self.n_mating, self.genotype_size]) if self.selection_mode == "RWS": # roulette wheel selection for pop in range(self.num_populations): mating_pool_indexes = self.random_state.choice( self.population_size, size=(self.n_mating,1), replace=True, p=self.fitnesses[pop] ) mating_pool[pop] = np.take_along_axis( self.population[pop], mating_pool_indexes, axis=0 ) elif self.selection_mode == "SUS": # TODO: find a way to implement this via numpy # stochastic universal sampling selection p_dist = 1 / self.n_mating # distance between the pointers for pop in range(self.num_populations): start = self.random_state.uniform(0, p_dist) pointers = [start + i * p_dist for i in range(self.n_mating)] cp = cum_probs[pop] # cumulative prob of current population m_idx = 0 # index in the mating pool to be filled for poi in pointers: for (i, genotype) in enumerate(self.population[pop]): if poi <= cp[i]: mating_pool[pop][m_idx] = genotype m_idx += 1 break else: assert False assert len(mating_pool[0]) == self.n_mating return mating_pool def crossover(self, parent1, parent2): """ Given two genotypes, create two new genotypes by exchanging their genetic material. :param parent1: first parent genotype :param parent2: second parent genotype :return: two new genotypes # TODO: implement class testing functions """ genotype_size = len(parent1) if self.crossover_mode == 'UNIFORM': if self.crossover_points is None: # by default do crossover on the entire genotype flips = self.random_state.choice(a=[0, 1], size=genotype_size) else: # TODO: this will never occur because we check crossover_points above but # consider implementing in the future a case of uniform crossover in certain # portions of the genotype assert False inv_flips = 1 - flips child1 = flips * parent1 + inv_flips * parent2 child2 = flips * parent2 + inv_flips * parent1 else: # x-POINT num_points = int(self.crossover_mode[:-6]) if self.crossover_points is None: possible_points = list(range(1, genotype_size)) # [1,...,G-1] chosen_crossover_points = sorted(self.random_state.choice(possible_points, num_points, replace=False)) elif num_points < len(self.crossover_points): chosen_crossover_points = sorted( self.random_state.choice(self.crossover_points, num_points, replace=False)) else: chosen_crossover_points = sorted(self.crossover_points) assert num_points == len(chosen_crossover_points) gt = [parent1, parent2] boundaries = [0] + chosen_crossover_points + [genotype_size] segment_ranges = [(boundaries[i], boundaries[i + 1]) for i in range(len(boundaries) - 1)] segments1 = [gt[i % 2][s[0]:s[1]] for i, s in enumerate(segment_ranges)] segments2 = [gt[1 - i % 2][s[0]:s[1]] for i, s in enumerate(segment_ranges)] child1 = np.hstack(segments1) child2 = np.hstack(segments2) return child1, child2 def mutate(self, genotype): magnitude = self.random_state.normal(0, self.sqrt_mutation_variance) unit_vector = utils.make_rand_vector(len(genotype), self.random_state) mutant = np.where( self.search_constraint, np.clip( genotype + magnitude * unit_vector, MIN_SEARCH_VALUE, MAX_SEARCH_VALUE ), genotype + magnitude * unit_vector ) return mutant def save_to_file(self): if self.folder_path is None: return # population is saved after sorting based on fitness file_path = os.path.join( self.folder_path, 'evo_{}.json'.format(str(self.generation).zfill(self.file_num_zfill)) ) # print("Saving rand state: {}".format(state_of_rand_state)) obj_dict = asdict(self) del obj_dict['evaluation_function'] del obj_dict['termination_function'] obj_dict['random_state'] = json_numpy.dumps(self.random_state.get_state()) with open(file_path, 'w') as f_out: json.dump(obj_dict, f_out, cls=json_numpy.NumpyListJsonEncoder, indent=3) @staticmethod def load_from_file(file_path, evaluation_function: Callable = None, termination_function: Callable = None, **kwargs): with open(file_path) as f_in: obj_dict = json.load(f_in) for k in ['population', 'population_unsorted', 'performances', 'fitnesses']: # assert type(obj_dict[k]) == np.ndarray obj_dict[k] = np.array(obj_dict[k]) random_state = RandomState(None) random_state_state = json_numpy.loads(obj_dict['random_state']) # print("Loading rand state: {}".format(random_state_state)) random_state.set_state(random_state_state) obj_dict['random_state'] = random_state obj_dict['evaluation_function'] = evaluation_function obj_dict['termination_function'] = termination_function if kwargs: obj_dict.update(kwargs) evo = Evolution(**obj_dict) return evo