Beispiel #1
0
def test_check_datatype():
    """Test checking if datatype exists in raw data."""
    sfreq, n_points = 1024., int(1e6)
    rng = RandomState(99)
    info_eeg = mne.create_info(['ch1', 'ch2', 'ch3'], sfreq, ['eeg'] * 3)
    raw_eeg = mne.io.RawArray(rng.random((3, n_points)) * 1e-6, info_eeg)
    info_meg = mne.create_info(['ch1', 'ch2', 'ch3'], sfreq, ['mag'] * 3)
    raw_meg = mne.io.RawArray(rng.random((3, n_points)) * 1e-6, info_meg)
    info_ieeg = mne.create_info(['ch1', 'ch2', 'ch3'], sfreq, ['seeg'] * 3)
    raw_ieeg = mne.io.RawArray(rng.random((3, n_points)) * 1e-6, info_ieeg)
    # check behavior for unsupported data types
    for datatype in (None, 'anat'):
        with pytest.raises(ValueError, match=f'The specified datatype '
                                             f'{datatype} is currently not'):
            _check_datatype(raw_eeg, datatype)
    # check behavior for matching data type
    for raw, datatype in [(raw_eeg, 'eeg'), (raw_meg, 'meg'),
                          (raw_ieeg, 'ieeg')]:
        _check_datatype(raw, datatype)
    # check for missing data type
    for raw, datatype in [(raw_ieeg, 'eeg'), (raw_meg, 'eeg'),
                          (raw_ieeg, 'meg'), (raw_eeg, 'meg'),
                          (raw_meg, 'ieeg'), (raw_eeg, 'ieeg')]:
        with pytest.raises(ValueError, match=f'The specified datatype '
                                             f'{datatype} was not found'):
            _check_datatype(raw, datatype)
Beispiel #2
0
def get_special_selector(feat_selector, feat_selector_params, random_state,
                         num_feat_keys):

    # Init feat selector with mask of random feats
    if random_state is None:
        r_state = RandomState(np.random.randint(1000))
    elif isinstance(random_state, int):
        r_state = RandomState(random_state)
    else:
        r_state = random_state

    init_mask = r_state.random(num_feat_keys)
    feat_selector = feat_selector(mask=init_mask)

    # Figure out param passed
    if 'selector__mask' in feat_selector_params:
        p_name = 'selector__mask'

        # If set to searchable, set to searchable...
        if feat_selector_params[p_name] == 'sets as hyperparameters':

            feat_array = Array(init=[.5 for i in range(num_feat_keys)])
            feat_array.set_mutation(sigma=1 / 6).set_bounds(lower=0, upper=1)
            feat_selector_params[p_name] = feat_array

        elif feat_selector_params[p_name] == 'sets as random features':
            del feat_selector_params[p_name]

    return feat_selector, feat_selector_params
Beispiel #3
0
def test_unrelated_columns(N=60, random_seed=12345):
    """
    Test to see if 'unrelated' columns jam up the analysis.
    See Github Issue 43.
    https://github.com/ACCLAB/DABEST-python/issues/44.
    
    Added in v0.2.5.
    """

    # rng = RandomState(MT19937(random_seed))
    rng = RandomState(PCG64(12345))
    # rng = np.random.default_rng(seed=random_seed)

    df = pd.DataFrame({
        'groups':
        rng.choice(['Group 1', 'Group 2', 'Group 3'], size=(N, )),
        'color':
        rng.choice(['green', 'red', 'purple'], size=(N, )),
        'value':
        rng.random(size=(N, ))
    })

    df['unrelated'] = np.nan

    test = load(data=df, x='groups', y='value', idx=['Group 1', 'Group 2'])

    md = test.mean_diff.results

    assert md.difference[0] == pytest.approx(-0.0322, abs=1e-4)
    assert md.bca_low[0] == pytest.approx(-0.2279, abs=1e-4)
    assert md.bca_high[0] == pytest.approx(0.1613, abs=1e-4)
Beispiel #4
0
    def _adjust_and_check_fit(
        cls,
        num_objects: int,
        target_bounding_boxes: np.ndarray,
        positions: np.ndarray,
        placement_area: PlacementArea,
        random_state: RandomState,
    ) -> Tuple[bool, Optional[np.ndarray]]:
        """
        This method will check if the current `target_bounding_boxes` and `positions` can fit in the table
        and if so, will return a new array of positions randomly sampled inside of the `placement_area`
        """
        width, height, _ = placement_area.size

        half_sizes = target_bounding_boxes[:, 1, :]

        max_x, max_y, _ = np.max(positions + half_sizes, axis=0)
        min_x, min_y, _ = np.min(positions - half_sizes, axis=0)

        size_x, size_y = max_x - min_x, max_y - min_y

        if size_x < width and size_y < height:
            # Sample a random offset of the "remaning area"
            delta_x = -min_x + random_state.random() * (width - size_x)
            delta_y = -min_y + random_state.random() * (height - size_y)

            return (
                True,
                positions + np.tile(np.array([delta_x, delta_y, 0]),
                                    (num_objects, 1)),
            )
        return False, None
Beispiel #5
0
def multiplier_proposal_vector(q, d=1.05, f=1, rs=0):
    if not rs:
        rseed = random.randint(1000, 9999)
        rs = RandomState(MT19937(SeedSequence(rseed)))
    S = q.shape
    ff = rs.binomial(1,f,S)
    u = rs.random(S)
    l = 2 * np.log(d)
    m = np.exp(l * (u - .5))
    m[ff==0] = 1.
    new_q = q * m
    U=np.sum(np.log(m))
    return new_q, 0, U
Beispiel #6
0
class TestManoModel(unittest.TestCase):
    """Test kinematics module."""
    def setUp(self):
        self.random = RandomState(7)

    def test_constructor(self):
        for left_hand in False, True:
            with self.subTest(f"ManoModel(left_hand={left_hand})"):
                mano_model = ManoModel(left_hand)
                self.assertEqual(mano_model.is_left_hand, left_hand)
                self.assertIsNotNone(mano_model.faces)
                self.assertIsNotNone(mano_model.weights)
                self.assertIsNotNone(mano_model.kintree_table)
                self.assertIsNotNone(mano_model.shapedirs)
                self.assertIsNotNone(mano_model.posedirs)
                self.assertIsNotNone(mano_model.origins())
                self.assertIsNotNone(mano_model.vertices())
                self.assertEqual(len(mano_model.link_names),
                                 len(mano_model.origins()))
                self.assertEqual(len(mano_model.tip_links), 5)

    def test_origins(self):
        """Test the MANO joints transformation."""
        mano_model = ManoModel()
        origins = mano_model.vertices(
            # pose=self.random.uniform((16, 3)),
            trans=self.random.random((3, )))
        self.assertIsNotNone(origins)

    def test_vertices(self):
        """Test the MANO vertices transformation."""
        mano_model = ManoModel()
        vertices = mano_model.vertices(
            betas=self.random.random(10) * 0.1,
            # pose=self.random.random((16, 3)),
            trans=self.random.random(3))
        self.assertIsNotNone(vertices)
Beispiel #7
0
def test_monitor():

    T = 100

    monitor = Monitor()

    rng = RandomState(12345)

    actions = rng.randint(0, 2, T)
    optimal_actions = rng.randint(0, 2, T)
    rewards = rng.random(T)

    for t in range(T):

        monitor.report(
            t=t,
            agent_action=actions[t],
            optimal_action=optimal_actions[t],
            action_reward=rewards[t]
        )

    assert np.array_equal(
        [
            monitor.t_count_optimal_action[t]
            for t in sorted(monitor.t_count_optimal_action)
        ],
        [
            1 if action == optimal else 0
            for action, optimal in zip(actions, optimal_actions)
        ]
    )

    assert np.array_equal(
        [
            monitor.t_average_cumulative_reward[t].get_value()
            for t in sorted(monitor.t_average_cumulative_reward)
        ],
        np.cumsum(rewards)
    )
Beispiel #8
0
class RandomParcels(BaseEstimator):
    def __init__(self,
                 geo,
                 n_parcels,
                 medial_wall_inds=None,
                 medial_wall_mask=None,
                 random_state=1):

        # Set passed params
        self.geo = geo
        self.n_parcels = n_parcels
        self.medial_wall_inds = medial_wall_inds
        self.medial_wall_mask = medial_wall_mask
        self.random_state = random_state
        self.mask = None

    def get_parc(self, copy=True):

        if self.mask is None:
            self._generate_parc_from_params()

        if copy:
            return self.mask.copy()
        else:
            return self.mask

    def _generate_parc_from_params(self):

        # Proc by input args
        self._proc_geo()
        self._proc_medial_wall()
        self._proc_random_state()

        # Set up mask, done and flags
        self.sz = len(self._geo)
        self.reset()

        # Init
        self.init_parcels()

        # Then generate
        self.generate_parcels()

    def _proc_geo(self):
        self._geo = [np.array(g) for g in self.geo]

    def _proc_medial_wall(self):

        # Proc medial wall inds
        if self.medial_wall_inds is not None:
            self.m_wall = set(list(self.medial_wall_inds))
        elif self.medial_wall_mask is not None:
            self.m_wall = set(list(np.where(self.medial_wall_mask == True)[0]))
        else:
            self.m_wall = set()

    def _proc_random_state(self):

        if self.random_state is None:
            self.r_state = RandomState()
        elif isinstance(self.random_state, int):
            self.r_state = RandomState(seed=self.random_state)
        else:
            self.r_state = self.random_state

    def reset(self):
        '''Just reset the mask, and set w/ done info'''

        self.mask = np.zeros(self.sz, dtype='int16')
        self.done = self.m_wall.copy()
        self.ready, self.generated = False, False

    def init_parcels(self):

        # Generate the starting locs
        valid = np.setdiff1d(np.arange(self.sz), np.array(list(self.done)))
        self.start_locs = self.r_state.choice(valid,
                                              size=self.n_parcels,
                                              replace=False)

        # Set random probs. that each loc is chosen
        self.probs = self.r_state.random(size=self.n_parcels)

    def setup(self):
        '''This should be called before generating parcel,
        so after a mutation has been made, setup needs to
        be called. It also does not hurt to call setup an
        extra time, as nothing random is set.'''

        # Generate corresponding labels w/ each loc
        self.labels = np.arange(1, self.n_parcels + 1, dtype='int16')

        # Mask where if == 1, then that parcel is done
        self.finished = np.zeros(self.n_parcels, dtype='bool_')

        # Drop the first points
        self.mask[self.start_locs] = self.labels

        # Set ready flag to True
        self.ready = True

    def get_probs(self):

        return self.probs / np.sum(self.probs)

    def choice(self):
        '''Select a valid label based on probs.'''

        msk = self.finished == 0
        probs = self.probs[msk] / np.sum(self.probs[msk])
        label = self.r_state.choice(self.labels[msk], p=probs)

        return label

    def get_valid_neighbors(self, loc):

        ns = self._geo[loc]
        valid_ns = ns[self.mask[ns] == 0]

        return valid_ns

    def generate_parcels(self):

        if self.ready is False:
            self.setup()

        # Keep looping until every spot is filled
        while (self.finished == 0).any():
            self.add_spot()

        # Set generated flag when done
        self.generated = True

    def add_spot(self):

        # Select which parcel to add to
        label = self.choice()

        # Determine valid starting locations anywhere in exisitng parcel
        current = np.where(self.mask == label)[0]
        valid = set(current) - self.done

        self.proc_spot(valid, label)

    def proc_spot(self, valid, label):

        # If no valid choices, then set this parcel to finished
        if len(valid) == 0:
            self.finished[label - 1] = 1
            return

        # Select randomly from the valid starting locs
        loc = self.r_state.choice(tuple(valid))

        # Select a valid + open neighbor
        valid_ns = self.get_valid_neighbors(loc)

        if len(valid_ns) > 0:

            # Select a valid choice, and add it w/ the right label
            choice = self.r_state.choice(valid_ns)
            self.mask[choice] = label

            # If this was the only choice, mark start loc as done
            if len(valid_ns) == 1:
                self.done.add(loc)

        # If there are no valid choices, mark as done
        else:
            self.done.add(loc)

            valid.remove(loc)
            self.proc_spot(valid, label)
Beispiel #9
0
class HierarchicalLDA(object):

    def __init__(self, corpus, vocab,
                 alpha=10.0, gamma=1.0, eta=0.1,
                 seed=42, verbose=True, num_levels=3):

        NCRPNode.total_nodes = 0
        NCRPNode.last_node_id = 0

        self.corpus = corpus
        self.vocab = vocab
        self.alpha = alpha  # smoothing on doc-topic distributions
        self.gamma = gamma  # "imaginary" customers at the next, as yet unused table
        self.eta = eta      # smoothing on topic-word distributions

        self.seed = seed
        self.random_state = RandomState(seed)
        self.verbose = verbose

        self.num_levels = num_levels
        self.num_documents = len(corpus)
        self.num_types = len(vocab)
        self.eta_sum = eta * self.num_types

        # if self.verbose:
        #     for d in range(len(self.corpus)):
        #         doc = self.corpus[d]
        #         words = ' '.join([self.vocab[n] for n in doc])
        #         print 'doc_%d = %s' % (d, words)

        # initialise a single path
        path = np.zeros(self.num_levels, dtype=np.object)

        # initialize and fill the topic pointer arrays for
        # every document. Set everything to the single path that
        # we added earlier.
        self.root_node = NCRPNode(self.num_levels, self.vocab)
        self.document_leaves = {}                                   # currently selected path (ie leaf node) through the NCRP tree
        self.levels = np.zeros(self.num_documents, dtype=np.object) # indexed < doc, token >
        for d in range(len(self.corpus)):

            # populate nodes into the path of this document
            doc = self.corpus[d]
            doc_len = len(doc)
            path[0] = self.root_node
            self.root_node.customers += 1 # always add to the root node first
            for level in range(1, self.num_levels):
                # at each level, a node is selected by its parent node based on the CRP prior
                parent_node = path[level-1]
                level_node = parent_node.select(self.gamma)
                level_node.customers += 1
                path[level] = level_node

            # set the leaf node for this document
            leaf_node = path[self.num_levels-1]
            self.document_leaves[d] = leaf_node

            # randomly assign each word in the document to a level (node) along the path
            self.levels[d] = np.zeros(doc_len, dtype=np.int)
            for n in range(doc_len):
                w = doc[n]
                random_level = self.random_state.randint(self.num_levels)
                random_node = path[random_level]
                random_node.word_counts[w] += 1
                random_node.total_words += 1
                self.levels[d][n] = random_level

    def estimate(self, num_samples, display_topics=50, n_words=5, with_weights=True):

        print('HierarchicalLDA sampling\n')
        for s in range(num_samples):

            sys.stdout.write('.')

            for d in range(len(self.corpus)):
                self.sample_path(d)

            for d in range(len(self.corpus)):
                self.sample_topics(d)

            if (s > 0) and ((s+1) % display_topics == 0):
                print(f" {s+1}")
                self.print_nodes(n_words, with_weights)
#                 print

    def sample_path(self, d):

        # define a path starting from the leaf node of this doc
        path = np.zeros(self.num_levels, dtype=np.object)
        node = self.document_leaves[d]
        for level in range(self.num_levels-1, -1, -1): # e.g. [3, 2, 1, 0] for num_levels = 4
            path[level] = node
            node = node.parent

        # remove this document from the path, deleting empty nodes if necessary
        self.document_leaves[d].drop_path()

        ############################################################
        # calculates the prior p(c_d | c_{-d}) in eq. (4)
        ############################################################

        node_weights = {}
        self.calculate_ncrp_prior(node_weights, self.root_node, 0.0)

        ############################################################
        # calculates the likelihood p(w_d | c, w_{-d}, z) in eq. (4)
        ############################################################

        level_word_counts = {}
        for level in range(self.num_levels):
            level_word_counts[level] = {}
        doc_levels = self.levels[d]
        doc = self.corpus[d]

        # remove doc from path
        for n in range(len(doc)): # for each word in the doc

            # count the word at each level
            level = doc_levels[n]
            w = doc[n]
            if w not in level_word_counts[level]:
                level_word_counts[level][w] = 1
            else:
                level_word_counts[level][w] += 1

            # remove word count from the node at that level
            level_node = path[level]
            level_node.word_counts[w] -= 1
            level_node.total_words -= 1
            assert level_node.word_counts[w] >= 0
            assert level_node.total_words >= 0

        self.calculate_doc_likelihood(node_weights, level_word_counts)

        ############################################################
        # pick a new path
        ############################################################

        nodes = np.array(list(node_weights.keys()))
        weights = np.array([node_weights[node] for node in nodes])
        weights = np.exp(weights - np.max(weights)) # normalise so the largest weight is 1
        weights = weights / np.sum(weights)

        choice = self.random_state.multinomial(1, weights).argmax()
        node = nodes[choice]

        # if we picked an internal node, we need to add a new path to the leaf
        if not node.is_leaf():
            node = node.get_new_leaf()

        # add the doc back to the path
        node.add_path()                     # add a customer to the path
        self.document_leaves[d] = node      # store the leaf node for this doc

        # add the words
        for level in range(self.num_levels-1, -1, -1): # e.g. [3, 2, 1, 0] for num_levels = 4
            word_counts = level_word_counts[level]
            for w in word_counts:
                node.word_counts[w] += word_counts[w]
                node.total_words += word_counts[w]
            node = node.parent

    def calculate_ncrp_prior(self, node_weights, node, weight):
        ''' Calculates the prior on the path according to the nested CRP '''

        for child in node.children:
            child_weight = log( float(child.customers) / (node.customers + self.gamma) )
            self.calculate_ncrp_prior(node_weights, child, weight + child_weight)

        node_weights[node] = weight + log( self.gamma / (node.customers + self.gamma))
    
    def calculate_doc_likelihood(self, node_weights, level_word_counts):

        # calculate the weight for a new path at a given level
        new_topic_weights = np.zeros(self.num_levels)
        for level in range(1, self.num_levels):  # skip the root

            word_counts = level_word_counts[level]
            total_tokens = 0

            for w in word_counts:
                count = word_counts[w]
                # for i in range(count):  # why ?????????
                #     new_topic_weights[level] += log((self.eta + i) / (self.eta_sum + total_tokens))
                #     total_tokens += 1
                up_part = self.eta
                down_part = self.eta_sum + total_tokens
                new_topic_weights[level] += math.lgamma(up_part+count) - math.lgamma(up_part) - (math.lgamma(down_part+count) - math.lgamma(down_part)) # explained in calculate_word_likelihood_at_level
                total_tokens += count

        self.calculate_word_likelihood(node_weights, self.root_node, 0.0, level_word_counts, new_topic_weights, 0)

    def calculate_word_likelihood_at_level(self, node_word_count, node_total_words, level_word_count_at_level, new_topic_weights):
        node_weight = 0.0
        word_counts = level_word_count_at_level
        total_words = 0
        for w in word_counts:
            count = word_counts[w]
            # for i in range(count): # why ?????????
            #     node_weight += log( (self.eta + node_word_count[w] + i) /
            #                         (self.eta_sum + node_total_words  + total_words) )
            #     total_words += 1
            #
            # commented old calc method as the new one is faster, but less obvious:
            # sum (i=0,n) (log( (up_part + i) / (down_part + i) ) ) = 
            # = log (product (i=0,n) ((up_part + i) / (down_part + i)) = 
            # = log (product (i=0,n) (up_part + i)) - log (product (i=0,n) (down_part + i))
            # product of arithmetic progression is d^n * (Gamma(n+(a1/d))/Gamma(a1/d))
            # here d = 1, n = count, a1 = up_part or down_part
            # log(Gamma(n+a1)/Gamma(a1)) = log(Gamma(n+a1)) - log(Gamma(a1))
            # so log (product (i=0,n) (up_part + i)) = log(Gamma(count+up_part)) - log(Gamma(up_part))
            # and log (product (i=0,n) (down_part + i)) = log(Gamma(count+down_part)) - log(Gamma(down_part))
            #
            # as up_part and down_part are floats, we cannot replace log of gamma with log of factorial :(
            up_part = self.eta + node_word_count[w]
            down_part = self.eta_sum + node_total_words + total_words
            node_weight += math.lgamma(up_part+count) - math.lgamma(up_part) - (math.lgamma(down_part+count) - math.lgamma(down_part))
            total_words += count
        return node_weight

    def calculate_word_likelihood(self, node_weights, node, weight, level_word_counts, new_topic_weights, level):
        # first calculate the likelihood of the words at this level, given this topic
        node_weight = self.calculate_word_likelihood_at_level(node.word_counts, node.total_words, level_word_counts[level], new_topic_weights)
        # propagate that weight to the child nodes
        for child in node.children:
            self.calculate_word_likelihood(node_weights, child, weight + node_weight,
                                           level_word_counts, new_topic_weights, level+1)
        # finally if this is an internal node, add the weight of a new path
        level += 1
        while level < self.num_levels:
            node_weight += new_topic_weights[level]
            level += 1
        node_weights[node] += node_weight


    def get_weighted_random(self, weights):
        total = weights.sum()
        n = self.random_state.random()
        n  *= total
        for i, w in enumerate(weights):
            if n <= w:
                return i
            else:
                n -= w
        return len(weights)-1 # exceptional case


    def sample_topics(self, d):

        doc = self.corpus[d]

        # initialise level counts
        doc_levels = self.levels[d]
        level_counts = np.zeros(self.num_levels, dtype=np.int)
        for c in doc_levels:
            level_counts[c] += 1

        # get the leaf node and populate the path
        path = np.zeros(self.num_levels, dtype=np.object)
        node = self.document_leaves[d]
        for level in range(self.num_levels-1, -1, -1): # e.g. [3, 2, 1, 0] for num_levels = 4
            path[level] = node
            node = node.parent

        # sample a new level for each word
        level_weights = np.zeros(self.num_levels)
        for n in range(len(doc)):

            w = doc[n]
            word_level = doc_levels[n]

            # remove from model
            level_counts[word_level] -= 1
            node = path[word_level]
            node.word_counts[w] -= 1
            node.total_words -= 1

            # pick new level
            for level in range(self.num_levels):
                level_weights[level] = (self.alpha + level_counts[level]) *                     \
                    (self.eta + path[level].word_counts[w]) /                                   \
                    (self.eta_sum + path[level].total_words)

            level_weights = level_weights / np.sum(level_weights)
            # level = self.random_state.multinomial(1, level_weights).argmax()
            level = self.get_weighted_random(level_weights)

            # put the word back into the model
            doc_levels[n] = level
            level_counts[level] += 1
            node = path[level]
            node.word_counts[w] += 1
            node.total_words += 1

    def print_nodes(self, n_words, with_weights):
        self.print_node(self.root_node, 0, n_words, with_weights)

    def print_node(self, node, indent, n_words, with_weights):
        out = '    ' * indent
        out += 'topic=%d level=%d (documents=%d): ' % (node.node_id, node.level, node.customers)
        out += node.get_top_words(n_words, with_weights)
        print(out)
        for child in node.children:
            self.print_node(child, indent+1, n_words, with_weights)
Beispiel #10
0
    def _create_new_domino_position_and_rotation(
        self, random_state: RandomState
    ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
        """
        This method will attempt at creating a new setup of dominos.
        The setup is to have the dominos be equally spaced across a circle arc

        :return: Tuple[proposed_positions, proposed_angles]
            proposed_positions: np.ndarray of shape (num_objects, 3) with proposed target positions, or None if placement fails
            proposed_angles: np.ndarray of shape (num_objects,) with proposed target angles, or None if placement fails
        """
        num_objects = self.mujoco_simulation.num_objects

        for _ in range(MAX_RETRY):
            # Offset that the whole domino chain is rotated by, this rotate the whole arc of dominos globally
            proposed_offset = random_state.random() * np.pi
            # Angle between the rotation of one domino and the next. Random angle offset between -pi/8 and pi/8
            proposed_delta = random_state.random() * (np.pi / 4.0) - (np.pi /
                                                                      8.0)

            # Angles each domino will be rotated respectively
            proposed_angles = np.array(range(num_objects)) * proposed_delta + (
                proposed_offset + proposed_delta / 2)
            # Set target quat so that the computation for `get_target_bounding_boxes` is correct
            self._set_target_quat(num_objects, proposed_angles)

            angles_between_objects = (
                np.array(range(1, 1 + num_objects)) * proposed_delta +
                proposed_offset)

            object_distance = (
                self.mujoco_simulation.simulation_params.object_size *
                self.mujoco_simulation.simulation_params.domino_distance_mul)

            x = np.cumsum(np.cos(angles_between_objects)) * object_distance
            y = np.cumsum(np.sin(angles_between_objects)) * object_distance

            # Proposed positions
            proposed_positions = np.zeros((num_objects, 3))

            # Copy the z axis values:
            target_bounding_boxes = (
                self.mujoco_simulation.get_target_bounding_boxes()
            )  # List[obj_pos, obj_size]
            proposed_positions[:, 2] = target_bounding_boxes[:, 1, 2]

            for target_idx in range(num_objects):
                # First target will be at (0, 0) and the remaning targets will be offset from that
                if target_idx > 0:
                    proposed_positions[target_idx] += np.array(
                        [x[target_idx - 1], y[target_idx - 1], 0])

            is_valid, proposed_positions = self._adjust_and_check_fit(
                num_objects,
                target_bounding_boxes,
                proposed_positions,
                self.mujoco_simulation.get_placement_area(),
                random_state,
            )
            if is_valid and proposed_positions is not None:
                return proposed_positions, proposed_angles

        # Mark failure to fit goal positions
        return None, None
Beispiel #11
0
class Evolution:
    # pylint: disable=too-many-instance-attributes
    # noinspection PyUnresolvedReferences
    """
    Class that executes genetic search.
    :param num_populations: (int) number of populations (default 1)
    :param population_size: (int) size of the population
    :param genotype_size: (int) size of the genotype vector
    :param evaluation_function: (func) function to evaluate genotype performance.
        It should take as inputthe entire population genotype (matrix) and return
        an array with the performances
    :param fitness_normalization_mode: (str) method to normalize fitness values
        (fitness-proportionate, rank-based or sigma scaling)
    :param selection_mode: (str) method to select parents for reproduction (RWS or SUS)
    :param reproduce_from_elite: (bool) whether the reproduction comes from elite 
        or remaining agents in the population
    :param reproduction_mode: (str) method to reproduce genetic algorithm or hill climbing
    :param mutation_variance: (float) variance of gaussian mutation rate
    :param folder_path: (string) path of the folder where to save the checkpoints
    :param search_constraint: (list of bool) flag whether to clip a specific site in
        a genotype (default to all True)
    :param reevaluate: (bool) whether to re-evaluate the individual if it's retained
        in the new generation (used only in hill-climbing)
    :param max_generation: (int) maximum generations to evolve (not used if
        termination_function is provided)
    :param termination_function: (func) function to check if search should terminate
        (it accept the Evolution instance, default to None)
    :param elitist_fraction: (float) proportion of new population that will be made of
        best unmodified parents (only relevant for genetic algorithm)
    :param mating_fraction: (float) proportion of population that will be made of children
        (in Beer this is equal to 1. - elitist_fraction) (only relevant for genetic algorithm)
    :param crossover_probability: (float) probability that crossover will occur
        (only relevant for genetic algorithm)
    :param crossover_mode: (str) the way to perform crossover (UNIFORM, 1-POINT, 2-POINT, ...)
        (only relevant for genetic algorithm)
    :param crossover_points: (list of int) a list that specifies the indices of where
        to cut during crossover (only relevant for genetic algorithm)
    :param checkpoint_interval: (int) every how many generations should the population
        be saved and results logged
    :param max_expected_offspring: (float) number of offspring to be allocated to the
        best individual, best between 1 and 2
    """
    
    population_size: int
    genotype_size: int
    evaluation_function: Callable
    num_populations: int = 1
    performance_objective: Union[str,float] = 'MAX' # 'MIN', 'ABS_MAX', float value
    fitness_normalization_mode: str = 'FPS' # 'NONE', 'FPS', 'RANK', 'SIGMA'
    selection_mode: str = 'RWS' # 'UNIFORM', 'RWS', 'SUS'
    reproduce_from_elite: bool = False
    reproduction_mode: str = 'GENETIC_ALGORITHM' # 'HILL_CLIMBING', 'GENETIC_ALGORITHM'
    mutation_variance: float = DEFAULT_MUTATION_VARIANCE
    max_generation: int = 100
    termination_function: Callable = None
    checkpoint_interval: int = DEFAULT_CHECKPOINT_INTERVAL
    crossover_probability: float = DEFAULT_CROSSOVER_PROB
    crossover_points: List[int] = None
    folder_path: str = None
    elitist_fraction: float = None
    mating_fraction: float = None
    n_elite: int = None
    n_mating: int = None
    n_fillup: int = None
    crossover_mode: str = 'UNIFORM'
    search_constraint: np.ndarray = None  # this will be converted to all True by default in __post_init__
    reevaluate: bool = True # only used in hill-climbing
    max_expected_offspring: float = DEFAULT_MAX_EXPECTED_OFFSPRING

    random_seed: int = 0
    random_state: RandomState = None
    pop_eval_random_seed: int = None  # initialized at every generation

    # other field (no need to define them outside)
    generation: int = 0  # the current generation number
    population: np.ndarray = None  # the list of population genotypes (sorted by performance)
    population_unsorted: np.ndarray = None  # the list of population genotypes (before sorting)
    # (will be initialized in __post_init__)
    performances: np.ndarray = None  # performances of the genotypes
    fitnesses: np.ndarray = None  # fitnesses of the genotypes

    population_sorted_indexes: np.ndarray = None  
    # keep track of indexes in sorted population
    # population_sorted_indexes[0] is the index of the agent with best performance
    # in the unsorted population

    # collect average, best and worst performances across generations
    avg_performances: List[List[float]] = field(default_factory=list)
    best_performances: List[List[float]] = field(default_factory=list)
    worst_performances: List[List[float]] = field(default_factory=list)

    timeit: bool = False

    def __post_init__(self):

        assert self.num_populations > 0, "Number of populations should be greater than zero"

        assert self.population_size % 4 == 0, "Population size must be divisible by 4"
        # otherwise n_elite + n_mating may be greater than population_size    

        self.sqrt_mutation_variance = np.sqrt(self.mutation_variance)

        if self.random_state is None:
            self.random_state = RandomState(self.random_seed)

        self.loaded_from_file = all(
            x is not None for x in 
            [self.population, self.performances, self.fitnesses]
        )

        # create initial population if not provided
        if self.population is None:
            # create a set of random genotypes
            self.population = self.random_state.uniform(
                MIN_SEARCH_VALUE, MAX_SEARCH_VALUE,
                [self.num_populations, self.population_size, self.genotype_size]
            )

        if self.search_constraint is None:
            self.search_constraint = np.array([True] * self.genotype_size)

        self.file_num_zfill = int(np.ceil(np.log10(self.max_generation + 1))) \
            if self.max_generation \
            else 1 if self.max_generation == 0 \
            else FILE_NUM_ZFILL_DEFAULT

        # conver performance_objective to float if it is a string with a number
        f = utils.get_float(self.performance_objective)
        if f is not None:
            self.performance_objective = f

        self.timing = Timing(self.timeit)

        self.validate_params()
        self.init_reproduction_parameters()

    @staticmethod
    def get_random_genotype(rando_state, gen_size):
        return rando_state.uniform(MIN_SEARCH_VALUE, MAX_SEARCH_VALUE, gen_size)

    def init_reproduction_parameters(self):
        # self.n_mating: number of new agents return by select_mating_pool()
        if self.reproduction_mode == 'GENETIC_ALGORITHM':
            # self.n_elite: number of best agents to preserve (only used in genetic algorithm)
            # self.n_fillup: agents to be randomly generated
            self.n_elite = int(
                np.floor(self.population_size * self.elitist_fraction + 0.5) # at least one
            )  # children from elite group
            self.n_mating = int(np.floor(
                self.population_size * self.mating_fraction + 0.5 # at least one
            ))  # children from mating population
            self.n_fillup = self.population_size - (self.n_elite + self.n_mating)  # children from random fillup
            assert all(x >= 0 for x in [self.n_elite, self.n_mating, self.n_fillup])
            assert self.n_elite + self.n_mating + self.n_fillup == self.population_size
        else:  # 'HILL_CLIMBING'
            self.n_mating = self.population_size

    def validate_params(self):

        # termination condition
        assert self.max_generation is None or self.termination_function is None, \
            "Either max_generation or termination_function must be defined"

        # folder path
        if self.folder_path:
            assert os.path.isdir(self.folder_path), "folder_path '{}' is not a valid directory".format(self.folder_path)

        # search_constraint
        assert len(self.search_constraint) == self.genotype_size, \
            "The length of search_constraint should be equal to genotype_size"

        # performance_objective         
        accepted_values = ['MAX', 'MIN', 'ABS_MAX']
        assert type(self.performance_objective) in [float,int] or \
            self.performance_objective in accepted_values, \
            'performance_objective should be either {}'.format(', '.join(accepted_values))

        # fitness_normalization_mode         
        accepted_values = ['NONE', 'FPS', 'RANK', 'SIGMA']
        assert self.fitness_normalization_mode in accepted_values, \
            'fitness_normalization_mode should be either {}'.format(', '.join(accepted_values))
        assert self.fitness_normalization_mode!='NONE' or self.selection_mode == 'UNIFORM', \
            "if fitness_normalization_mode is 'NONE' (copy of PERFORMANCE), selection_mode must be UNIFORM (not normalized)" 

        # selection_mode
        accepted_values = ['UNIFORM', 'RWS', 'SUS']
        assert self.selection_mode in accepted_values, \
            'selection_mode should be either {}'.format(', '.join(accepted_values))

        # reproduce_from_elite
        assert not self.reproduce_from_elite or self.selection_mode == 'UNIFORM', \
            'if reproducing from elite, selection mode must be uniform'

        # reproduction_mode
        accepted_values = ['HILL_CLIMBING', 'GENETIC_ALGORITHM']
        assert self.reproduction_mode in accepted_values, \
            'reproduction_mode should be either {}'.format(', '.join(accepted_values))

        # GENETIC_ALGORITHM
        if self.reproduction_mode == 'GENETIC_ALGORITHM':
            assert 0 <= self.elitist_fraction <= 1, \
                'In GENETIC_ALGORITHM: 0 <= elitist_fraction <=1'
            assert 0 <= self.mating_fraction <= 1, \
                'In GENETIC_ALGORITHM: 0 <= mating_fraction <=1'
            assert 0 <= self.crossover_probability <= 1, \
                'In GENETIC_ALGORITHM: 0 <= crossover_probability <=1'
            assert re.match('UNIFORM|\d+-POINT', self.crossover_mode), \
                'In GENETIC_ALGORITHM: crossover_mode should be UNIFORM or x-POINT'

        # crossover
        assert self.crossover_mode != None, "crossover_mode cannot be None"        
        if self.crossover_mode == 'UNIFORM':
            # crossover is computed on the entire genotype
            # with prob 0.5 of flipping each genotype site
            assert self.crossover_points is None, \
                "In uniform crossover_mode you shouldn't specify the crossover_points"
        elif self.crossover_mode.endswith('-POINT'):
            # A. if crossover_points is None the points are randomly generated
            # crossover_points must be a list of max x-1 integers in the interval [1,G-1]
            # where x is the integer > 0 specified in the parameter crossover_mode ('x-POINT')
            # and G is the size of the genotype
            # e.g. if parent1=[0,0,0] and parent2=[1,1,1] (G=3),
            # crossover_points must contain a single integer which can be
            # 1: child1=[0,1,1] child2=[1,0,0]
            # 2: child1=[0,0,1] child2=[1,1,0]
            # B. if crossover_points is not None -> num_points <= len(self.crossover_points)
            # if num_points < len(self.crossover_points)
            # only num_points will be randomly selected from the self.crossover_points
            num_points = self.crossover_mode[:-6]
            assert utils.is_int(num_points), \
                "Param crossover_mode should be 'UNIFORM' or 'x-POINT' (with x being an integer > 0)"
            num_points = int(num_points)
            assert 0 < num_points < self.genotype_size, \
                "Param crossover_mode should be 'x-POINT', with x being an integer such that 0 < x < G " \
                "and where G is the size of the genotype"
            assert num_points <= self.genotype_size - 1, \
                "Too high value for {} in param crossover_mode. Max should be G-1 " \
                "(where G is the size of the genotype)".format(
                    self.crossover_mode)
            if self.crossover_points is not None:
                assert len(set(self.crossover_points)) == len(self.crossover_points), \
                    "Duplicated values in crossover_points"
                self.crossover_points = sorted(set(self.crossover_points))
                assert num_points <= len(self.crossover_points), \
                    "crossover_mode={} and crossover_points={} but {} must be <= {}=len(crossover_points)".format(
                        self.crossover_mode, self.crossover_points, num_points, len(self.crossover_points))
                assert all(1 < x < self.genotype_size for x in self.crossover_points), \
                    "Some of the values in crossover_points are not in the interval [1,G-1] " \
                    "where G is the size of the genotype"
        else:
            assert False, \
                "Param crossover_mode should be 'UNIFORM' or 'x-POINT' (with x being an integer > 0)"

    def set_folder_name(self, text):
        self.folder_path = text


    def run(self):
        """
        Execute a full search run until some condition is reached.
        :return: the last population in the search
        """

        if self.loaded_from_file:
            # comple cycle from previous run (after saving)
            self.save_to_file()
            self.reproduce()
            self.generation += 1

        t = self.timing.init_tictoc()

        while self.generation <= self.max_generation:
            # evaluate all genotypes on the task
            self.pop_eval_random_seed = utils.random_int(self.random_state)            

            # suffle populations before running evaluation function
            for pop in self.population:
                self.random_state.shuffle(pop)

            # run evaluation function
            self.performances = self.evaluation_function(
                self.population, self.pop_eval_random_seed
            )

            if type(self.performances) is list:
                self.performances = np.array(self.performances)
            
            if self.num_populations==1 and self.performances.ndim != 2:
                # eval function returned a simple array of perfomances 
                # because there is only one population
                self.performances = np.expand_dims(self.performances,0) # add an additional index (population)

            expected_perf_shape = self.population.shape[:-1]
            assert self.performances.shape == expected_perf_shape, \
                "Evaluation function didn't return performances with shape {}".format(expected_perf_shape)
            
            assert (self.performances >=0).all(), \
                "Performance must be non-negative"

            self.timing.add_time('EVO1-RUN_eval_function', t)

            # sorting population and performances on performances
            self.sort_population_on_performance()
            self.timing.add_time('EVO1-RUN_sort_population', t)

            # update average/best/worst population performance
            avg = np.mean(self.performances, axis=1).tolist()
            best = self.performances[:,0].tolist()
            worst = self.performances[:,-1].tolist()
            variance = np.var(self.performances, axis=1).tolist()
            self.avg_performances.append(avg)
            self.best_performances.append(best)
            self.worst_performances.append(worst)
            self.timing.add_time('EVO1-RUN_stats', t)

            print_stats = lambda a : '|'.join(['{:.5f}'.format(x) for x in a])

            # print short statistics
            print("Generation {}: Best: {}, Worst: {}, Average: {}, Variance: {}".format(
                str(self.generation).rjust(self.file_num_zfill), print_stats(best), 
                print_stats(worst), print_stats(avg), print_stats(variance)))
            self.timing.add_time('EVO1-RUN_print_stats', t)

            # check if to terminate
            if self.generation == self.max_generation or \
                    (self.termination_function and self.termination_function(self)):
                self.save_to_file()
                # Stop search due to termination condition
                break

            # save the intermediate evolution state
            if self.checkpoint_interval and self.generation % self.checkpoint_interval == 0:
                # save current generation
                self.save_to_file()
            self.timing.add_time('EVO1-RUN_savefile', t)

            # Compute fitnesses (based on performances) - used in reproduce
            self.update_fitnesses()
            self.timing.add_time('EVO1-RUN_update_fitness', t)

            # run reproduce (update fitnesses and run genetic or hill-climbing)
            self.reproduce()             
            self.timing.add_time('EVO1-RUN_reproduce', t)

            # update generation
            self.generation += 1

    def sort_population_on_performance(self):     
        # performances must be non-negative (>=0)           
        if type(self.performance_objective) is str:
            if self.performance_objective == 'MAX':            
                performances_objectified = self.performances
            elif self.performance_objective == 'MIN':
                performances_objectified = - self.performances
            else:
                assert self.performance_objective == 'ABS_MAX'
                performances_objectified = np.abs(self.performances)
        else:
            # minimizing the distance between performance and perf objective
            # when self.performance_objective==0 this would be identical to 'ABS_MIN'
            performances_objectified = - np.abs(self.performances - self.performance_objective)

        # sort genotypes, performances by performance_objectified from hight to low
        self.population_sorted_indexes = np.argsort(-performances_objectified, axis=-1)            
        self.performances = np.take_along_axis(self.performances, self.population_sorted_indexes, axis=-1)        
        self.population_unsorted = self.population # keep track of the original population to ensure reproducibility
        sorted_indexes_exp = np.expand_dims(self.population_sorted_indexes, -1) # add one dimension at the end to sort population
        self.population = np.take_along_axis(self.population_unsorted, sorted_indexes_exp, axis=1)

        # OLD METHOD WITHOUT NUMPY:
        # sort genotypes and performances by performance from best to worst
        # self.population, self.performances = \
        #     zip(*sorted(zip(self.population, self.performances), 
        #     key=lambda pair: pair[1], reverse=True))
        # self.population = np.array(self.population)
        # self.performances = np.array(self.performances)


    def reproduce(self):
        """Run reproduce via HILL_CLIMBING or GENETIC_ALGORITHM"""
        if self.reproduction_mode == 'GENETIC_ALGORITHM':
            self.reproduce_genetic_algorithm()
        else:
            self.reproduce_hill_climbing()

    def reproduce_genetic_algorithm(self):
        """
        Reproduce a single generation in the following way:
        1) Copy the proportion equal to elitist_fraction of the current population to the new population
           (these are best_genotypes)
        2) Select part of the population for crossover using some selection method (set in config)
        3) Shuffle the selected population in preparation for cross-over
        4) Create crossover_fraction children of selected population with probability of crossover equal
           to prob_crossover.
        Crossover takes place at genome module boundaries (single neurons).
        5) Apply mutation to the children with mutation equal to mutation_var
        6) Fill the rest of the population with randomly created genotypes

        self.population and self.performances are sorted based on performances
        """

        t = self.timing.init_tictoc()

        new_population = np.zeros(                
            [self.num_populations, self.population_size, self.genotype_size]
        )

        # 1) Elitist selection        
        # same elite size in all populations
        self.elite_population = self.population[:, :self.n_elite] 
        new_population[:, :self.n_elite] = self.elite_population
        self.timing.add_time('EVO2-GA_1_elitist_selection', t)

        # 2) Select mating population from the remaining population        
        mating_pool = self.select_mating_pool()
        self.timing.add_time('EVO2-GA_2_mating_pool', t)

        # 3) Shuffle mating pool
        for pop_mating_pool in mating_pool:            
            self.random_state.shuffle(pop_mating_pool)
        self.timing.add_time('EVO2-GA_3_shuffle', t)

        # 4) Create children with crossover or apply mutation
        mating_finish = self.n_elite + self.n_mating
        newpop_counter = None  # track where we are in the new population
        
        for p in range(self.num_populations):            
            
            mating_counter = 0
            newpop_counter = self.n_elite # track where we are in the new population
            
            while newpop_counter < mating_finish:
                not_last = mating_finish - newpop_counter > 1
                parent1 = mating_pool[p][mating_counter]

                if not_last and self.random_state.random() < self.crossover_probability:
                    parent2 = mating_pool[p][mating_counter + 1]
                    child1, child2 = self.crossover(parent1, parent2)
                    # if the child is the same as the first parent after crossover, mutate it (as in Beer)
                    if np.array_equal(child1, parent1):
                        child1 = self.mutate(parent1)
                    new_population[p][newpop_counter] = child1
                    new_population[p][newpop_counter + 1] = child2
                    newpop_counter += 2
                    mating_counter += 2
                else:
                    # if no crossover, mutate just one genotype
                    child1 = self.mutate(parent1)
                    new_population[p][newpop_counter] = child1
                    newpop_counter += 1
                    mating_counter += 1
            
        self.timing.add_time('EVO2-GA_4_children', t)

        # 5) Fill up with random new genotypes
        new_population[:, newpop_counter:] = self.random_state.uniform(
            MIN_SEARCH_VALUE, MAX_SEARCH_VALUE,
            size=[self.num_populations, self.n_fillup, self.genotype_size]
        )
        self.timing.add_time('EVO2-GA_5_fillup', t)

        # 6) redefined population based on the newly computed population
        self.population = new_population
        self.timing.add_time('EVO2-GA_6_convert_pop', t)

    def reproduce_hill_climbing(self):

        t = self.timing.init_tictoc()

        # 1) Select the parents using sampling (replacing the entire population, no elite here)
        parent_population = self.select_mating_pool()
        self.timing.add_time('EVO2-HC_1_mating pool', t)

        # 2) Reevaluate
        if self.reevaluate:
            parent_performance = np.array(self.evaluation_function(parent_population, self.pop_eval_random_seed))
        else:
            assert False, \
                "reevaluate params has to be True. " \
                "For reevaluate to be False we need to also return performances in function select_mating_pool"
        self.timing.add_time('EVO2-HC_2_reevaluate', t)

        # 3) Produce the new population by mutating each parent and rewrite it on the current population
        self.population = np.array([self.mutate(gen) for gen in parent_population])
        self.timing.add_time('EVO2-HC_3_mutate', t)

        # 4) Calculate new performances
        self.performance = np.array(self.evaluation_function(self.population, self.pop_eval_random_seed))
        self.timing.add_time('EVO2-HC_4_compute_perf', t)

        # 5) Check if performace worsened and in this case retrieve agent from parent population
        lower_performance = self.performance < parent_performance  # bool array
        for i in range(self.population_size):
            if lower_performance[i]:
                self.population[i] = parent_population[i]
                self.performance[i] = parent_performance[i]
        self.timing.add_time('EVO2-HC_5_compare_and_select', t)

    def update_fitnesses(self):
        """
        Update genotype fitness to relative values, retain sorting from best to worst.
        """        
        if self.fitness_normalization_mode == 'NONE':
            # do not use fitness in selection
            self.fitnesses = None

        elif self.fitness_normalization_mode == 'FPS':  # (fitness-proportionate)
            self.fitnesses = np.zeros(self.performances.shape) # same shape as performances
            for p in range(self.num_populations):
                avg_perf = self.avg_performances[-1][p]
                m = utils.linear_scaling(
                    self.worst_performances[-1][p],
                    self.best_performances[-1][p],
                    avg_perf,
                    self.max_expected_offspring
                )
                scaled_performances = m * (self.performances[p] - avg_perf) + avg_perf
                total_performance = np.sum(scaled_performances)
                self.fitnesses[p] = scaled_performances / total_performance

        elif self.fitness_normalization_mode == 'RANK':  # (rank-based)
            # Baker's linear ranking method: f(pos) = 2-SP+2*(SP-1)*(pos-1)/(n-1)
            # the highest ranked individual receives max_exp_offspring (typically 1.1),
            # the lowest receives 2 - max_exp_offspring
            # normalized to sum to 1
            self.fitnesses = np.zeros(self.performances.shape) # same shape as performances
            for p in range(self.num_populations):
                self.fitnesses[p] = np.array(
                    [
                        (
                            self.max_expected_offspring + (2 - 2 * self.max_expected_offspring) * i /
                            (self.population_size - 1)
                        ) / self.population_size 
                        for i in range(self.population_size)
                    ]
                )

        elif self.fitness_normalization_mode == 'SIGMA':  # (sigma-scaling)
            # for every individual 1 + (I(f) - P(avg_f))/2*P(std) is calculated
            # if value is below zero, a small positive constant is given so the individual has some probability
            # of being chosen. The numbers are then normalized
            self.fitnesses = np.zeros(self.performances.shape) # same shape as performances
            for p in range(self.num_populations):
                pop_perf = self.performances[p]
                avg = np.mean(pop_perf)
                std = max(0.0001, np.std(pop_perf))
                exp_values = list((1 + ((f - avg) / (2 * std))) for f in pop_perf)
                
                for i, v in enumerate(exp_values):
                    if v <= 0:
                        exp_values[i] = 1 / self.population_size
                s = sum(exp_values)
                self.fitnesses[p] = np.array(list(e / s for e in exp_values))

    def select_mating_pool(self):
        """
        Select a mating pool population.
        :return: selected parents for reproduction
        """

        if self.selection_mode == 'UNIFORM':
            # create mating_pool from source_population uniformally 
            # (from beginning to end and if needed restart from beginning)

            source_population = \
                self.elite_population if self.reproduce_from_elite \
                else self.population

            num_source_pop = source_population.shape[1] # number of elements in source pop

            assert num_source_pop>0, \
                "Error, can't create a mating pool from empty source population"
            
            cycle_source_pop_indexes = np.resize(       # this return a column vector 
                np.resize(                              # [0,1,...,n, 0, 1, ..., n]     
                    np.arange(num_source_pop),          # where n is num_source_pop and the size
                    [self.n_mating,1]                   # and n_mating the actual size of the list
                ),                                  
                [self.num_populations, self.n_mating, 1] # this duplicates the indexes for all populations
            )                                            # to obtain same 3 dimensions of source_population

            # rotate thtough the source_population(s)            
            mating_pool = np.take_along_axis(source_population, cycle_source_pop_indexes, 1)
        else:
            min_fitness = np.min(self.fitnesses, axis=-1)
            assert (min_fitness > - ROUNDING_TOLERANCE).all(), \
                "Found neg fitness: {}".format(min_fitness)
            if (self.fitnesses < 0).any():
                # setting small neg values due to rounding errors to zeros
                self.fitnesses[self.fitnesses<0] = 0
            cum_probs = np.cumsum(self.fitnesses, axis=-1)
            cum_probs_error = np.abs(cum_probs[:,-1] - 1.0)
            assert (cum_probs_error >=0).all() and (cum_probs_error < CUM_PROB_TOLERANCE).all(), \
                "Too big cum_probs_error: {}".format(cum_probs_error)
            mating_pool = np.zeros([self.num_populations, self.n_mating, self.genotype_size])
            if self.selection_mode == "RWS":
                # roulette wheel selection
                for pop in range(self.num_populations):                    
                    mating_pool_indexes = self.random_state.choice(
                        self.population_size, 
                        size=(self.n_mating,1), 
                        replace=True, 
                        p=self.fitnesses[pop]
                    )
                    mating_pool[pop] = np.take_along_axis(
                        self.population[pop],
                        mating_pool_indexes,
                        axis=0
                    )
            elif self.selection_mode == "SUS":
                # TODO: find a way to implement this via numpy
                # stochastic universal sampling selection                
                p_dist = 1 / self.n_mating  # distance between the pointers
                for pop in range(self.num_populations):                    
                    start = self.random_state.uniform(0, p_dist)
                    pointers = [start + i * p_dist for i in range(self.n_mating)]
                    cp = cum_probs[pop] # cumulative prob of current population
                    m_idx = 0 # index in the mating pool to be filled
                    for poi in pointers:
                        for (i, genotype) in enumerate(self.population[pop]):
                            if poi <= cp[i]:
                                mating_pool[pop][m_idx] = genotype
                                m_idx += 1
                                break
            else:
                assert False

        assert len(mating_pool[0]) == self.n_mating
        return mating_pool

    def crossover(self, parent1, parent2):
        """
        Given two genotypes, create two new genotypes by exchanging their genetic material.
        :param parent1: first parent genotype
        :param parent2: second parent genotype
        :return: two new genotypes
        # TODO: implement class testing functions
        """

        genotype_size = len(parent1)
        if self.crossover_mode == 'UNIFORM':
            if self.crossover_points is None:
                # by default do crossover on the entire genotype
                flips = self.random_state.choice(a=[0, 1], size=genotype_size)
            else:
                # TODO: this will never occur because we check crossover_points above but
                # consider implementing in the future a case of uniform crossover in certain
                # portions of the genotype
                assert False
            inv_flips = 1 - flips
            child1 = flips * parent1 + inv_flips * parent2
            child2 = flips * parent2 + inv_flips * parent1
        else:
            # x-POINT
            num_points = int(self.crossover_mode[:-6])
            if self.crossover_points is None:
                possible_points = list(range(1, genotype_size))  # [1,...,G-1]
                chosen_crossover_points = sorted(self.random_state.choice(possible_points, num_points, replace=False))
            elif num_points < len(self.crossover_points):
                chosen_crossover_points = sorted(
                    self.random_state.choice(self.crossover_points, num_points, replace=False))
            else:
                chosen_crossover_points = sorted(self.crossover_points)
                assert num_points == len(chosen_crossover_points)
            gt = [parent1, parent2]
            boundaries = [0] + chosen_crossover_points + [genotype_size]
            segment_ranges = [(boundaries[i], boundaries[i + 1]) for i in range(len(boundaries) - 1)]
            segments1 = [gt[i % 2][s[0]:s[1]] for i, s in enumerate(segment_ranges)]
            segments2 = [gt[1 - i % 2][s[0]:s[1]] for i, s in enumerate(segment_ranges)]
            child1 = np.hstack(segments1)
            child2 = np.hstack(segments2)
        return child1, child2

    def mutate(self, genotype):
        magnitude = self.random_state.normal(0, self.sqrt_mutation_variance)
        unit_vector = utils.make_rand_vector(len(genotype), self.random_state)
        mutant = np.where(
            self.search_constraint,
            np.clip(
                genotype + magnitude * unit_vector,
                MIN_SEARCH_VALUE,
                MAX_SEARCH_VALUE
            ),
            genotype + magnitude * unit_vector
        )
        return mutant

    def save_to_file(self):
        if self.folder_path is None:
            return
            # population is saved after sorting based on fitness
        file_path = os.path.join(
            self.folder_path,
            'evo_{}.json'.format(str(self.generation).zfill(self.file_num_zfill))
        )

        # print("Saving rand state: {}".format(state_of_rand_state))

        obj_dict = asdict(self)
        del obj_dict['evaluation_function']
        del obj_dict['termination_function']
        obj_dict['random_state'] = json_numpy.dumps(self.random_state.get_state())

        with open(file_path, 'w') as f_out:
            json.dump(obj_dict, f_out, cls=json_numpy.NumpyListJsonEncoder, indent=3)

    @staticmethod
    def load_from_file(file_path, evaluation_function: Callable = None,
                       termination_function: Callable = None,
                       **kwargs):

        with open(file_path) as f_in:
            obj_dict = json.load(f_in)

        for k in ['population', 'population_unsorted', 'performances', 'fitnesses']:
            # assert type(obj_dict[k]) == np.ndarray
            obj_dict[k] = np.array(obj_dict[k])

        random_state = RandomState(None)
        random_state_state = json_numpy.loads(obj_dict['random_state'])
        # print("Loading rand state: {}".format(random_state_state))
        random_state.set_state(random_state_state)
        obj_dict['random_state'] = random_state

        obj_dict['evaluation_function'] = evaluation_function
        obj_dict['termination_function'] = termination_function

        if kwargs:
            obj_dict.update(kwargs)

        evo = Evolution(**obj_dict)

        return evo