Example #1
0
def test_trainer():
    search_space = SearchSpace(model_output_shape=2)
    tokens = search_space.generate_token()
    # controller = Controller(tokens=tokens)
    trainer = Trainer()

    # samples = controller.generate_sequence()
    samples = [[65, 146, 143, 201, 281, 382]]
    architectures = search_space.create_models(samples=samples,
                                               model_input_shape=(128, 128, 3))
    epoch_performance = trainer.train_models(samples=samples,
                                             architectures=architectures)
    assert len(epoch_performance) != 0
Example #2
0
class Controller(object):
    def __init__(self, tokens):
        self.max_no_of_layers = config.controller["max_no_of_layers"]
        self.agent_lr = config.controller["agent_lr"]
        self.min_reward = config.controller["min_reward"]
        self.min_plays = config.controller["min_plays"]
        self.max_plays = config.controller["max_plays"]
        self.alpha = config.controller["alpha"]
        self.gamma = config.controller["gamma"]
        self.model_input_shape = config.emnas["model_input_shape"]
        self.valid_sequence_timeout = config.controller[
            "valid_sequence_timeout"]
        self.tokens = tokens
        self.len_search_space = len(tokens) + 1
        self.end_token = list(tokens.keys())[-1]
        self.model = self.rl_agent()
        self.states = []
        self.gradients = []
        self.rewards = []
        self.probs = []
        if config.search_space["mode"] == "MobileNets":
            self.search_space = SearchSpaceMn(
                config.emnas["model_output_shape"])
        else:
            self.search_space = SearchSpace(config.emnas["model_output_shape"])

    def rl_agent(self):
        model_output_shape = (self.max_no_of_layers - 1, self.len_search_space)
        model = keras.models.Sequential()
        model.add(
            keras.layers.Dense(512,
                               input_shape=(self.max_no_of_layers - 1, ),
                               activation="relu"))
        model.add(keras.layers.Dense(256, activation="relu"))
        model.add(keras.layers.Dense(128, activation="relu"))
        model.add(keras.layers.Dense(64, activation="relu"))
        model.add(keras.layers.Dense(32, activation="relu"))
        model.add(keras.layers.Dense(16, activation="relu"))
        model.add(keras.layers.Dense(16, activation="relu"))
        model.add(keras.layers.Dense(32, activation="relu"))
        model.add(keras.layers.Dense(64, activation="relu"))
        model.add(keras.layers.Dense(128, activation="relu"))
        model.add(keras.layers.Dense(256, activation="relu"))
        model.add(keras.layers.Dense(512, activation="relu"))
        model.add(
            keras.layers.Dense(model_output_shape[0] * model_output_shape[1],
                               activation="softmax"))
        model.add(keras.layers.Reshape(model_output_shape))

        model.compile(loss="categorical_crossentropy",
                      optimizer=keras.optimizers.Adam(lr=self.agent_lr))
        return model

    def get_all_action(self, state: np.ndarray) -> (List, np.ndarray, bool):
        true_sequence = False
        actions = []
        distributions = self.model.predict(state)
        for distribution in distributions[0]:
            distribution /= np.sum(distribution)
            action = np.random.choice(self.len_search_space, 1,
                                      p=distribution)[0]
            action = 1 if action == 0 else action
            actions.append(int(action))
            if action == self.end_token:
                break

        sequence = actions + [self.end_token
                              ] if self.end_token not in actions else actions
        valid_sequence = self.search_space.check_sequence(sequence)
        if valid_sequence:
            valid_model = self.search_space.create_models(
                samples=[sequence], model_input_shape=self.model_input_shape)
            true_sequence = True if (valid_model[0] is not None
                                     and valid_sequence is True) else False

        if len(actions) < self.max_no_of_layers - 1:
            for _ in range((self.max_no_of_layers - 1) - len(actions)):
                actions.append(0)

        return actions, distributions, true_sequence

    def get_valid_action(self, state: np.ndarray) -> (List, np.ndarray, int):
        true_sequence = False
        counter = 0
        while not true_sequence:
            counter += 1
            actions = []
            distributions = self.model.predict(state)
            for distribution in distributions[0]:
                distribution /= np.sum(distribution)
                action = np.random.choice(self.len_search_space,
                                          1,
                                          p=distribution)[0]
                action = 1 if action == 0 else action
                actions.append(int(action))
                if action == self.end_token:
                    break

            sequence = actions + [
                self.end_token
            ] if self.end_token not in actions else actions
            valid_sequence = self.search_space.check_sequence(sequence)
            if valid_sequence:
                valid_model = self.search_space.create_models(
                    samples=[sequence],
                    model_input_shape=self.model_input_shape)
                true_sequence = True if (valid_model[0] is not None
                                         and valid_sequence is True) else False
            if counter > self.valid_sequence_timeout:
                return None, None, None  # timeout

        if len(actions) < self.max_no_of_layers - 1:
            for _ in range((self.max_no_of_layers - 1) - len(actions)):
                actions.append(0)

        return actions, distributions, counter - 1

    def remember(self, state, actions, prob, reward):
        model_output_shape = (self.max_no_of_layers - 1, self.len_search_space)
        encoded_action = np.zeros(model_output_shape, np.float32)
        for i, action in enumerate(actions):
            encoded_action[i][action] = 1

        self.gradients.append(encoded_action - prob)
        self.states.append(state)
        self.rewards.append(reward)
        self.probs.append(prob)

    def clear_memory(self):
        self.states.clear()
        self.gradients.clear()
        self.rewards.clear()
        self.probs.clear()

    def get_discounted_rewards(self, rewards_in):
        discounted_rewards = []
        cumulative_total_return = 0

        for reward in rewards_in[::-1]:
            cumulative_total_return = (cumulative_total_return *
                                       self.gamma) + reward
            discounted_rewards.insert(0, cumulative_total_return)

        mean_rewards = np.mean(discounted_rewards)
        std_rewards = np.std(discounted_rewards)
        norm_discounted_rewards = (discounted_rewards -
                                   mean_rewards) / (std_rewards + 1e-7)

        return norm_discounted_rewards

    def update_policy(self):
        states_ = np.vstack(self.states)

        gradients_ = np.vstack(self.gradients)
        rewards_ = np.vstack(self.rewards)
        discounted_rewards = self.get_discounted_rewards(rewards_)
        discounted_rewards = discounted_rewards.reshape(
            discounted_rewards.shape[0], discounted_rewards.shape[1],
            discounted_rewards.shape[1])
        gradients_ *= discounted_rewards
        gradients_ = self.alpha * gradients_ + np.vstack(self.probs)

        history = self.model.train_on_batch(states_, gradients_)
        self.clear_memory()
        return history

    def generate_sequence_naive(self, mode: str):
        token_keys = list(self.tokens.keys())
        if mode == "b":  # Brute-force
            space = itertools.permutations(token_keys,
                                           self.max_no_of_layers - 1)
            return space
        if mode == "r":  # Random
            sequence = []
            sequence_length = np.random.randint(3, self.max_no_of_layers)
            for i in range(sequence_length):
                token = np.random.choice(token_keys)
                sequence.append(token)
            return sequence
        if mode == "r_var_len":
            sequence = []
            length = np.random.randint(12 - 1, self.max_no_of_layers, 1)[0]
            for i in range(length):
                token = np.random.choice(token_keys)
                sequence.append(token)
            sequence.append(token_keys[-1])
            return sequence