Ejemplo n.º 1
0
class NNAgentSigmoid(Agent):
    """Template for neural network based agents"""

    data_dir = Utility.get_data_path()
    learning_rate = 0.01

    def __init__(self, environment: Environment, seed: int, hidden_size: [int], activation_name: str = "relu"):
        """Load appropriate parameters depending on environment and learning time"""
        super().__init__(environment, seed)
        input_size = [self.environment.observation_length + self.environment.action_length]
        output_size = [self.environment.reward_length]
        self.nn = NeuralNetworkSigmoid(activation_name, input_size + hidden_size + output_size)
        self.activations = ([], [])

    def calculate_action(self, observation: str) \
            -> str:
        """Feed percept into nn and calculate best activations action. Returns action."""
        action = ""
        reward = -2
        # calculate expected reward by trying every action
        number_of_actions = pow(2, self.environment.action_length)
        if self.seeded_rand_range(0, 10) == 0:
            action_idx = self.seeded_rand_range(0, number_of_actions)
            action_string = format(action_idx, 'b').zfill(self.environment.action_length)
            nn_input = NNUtility.bitstr_to_narray(observation + action_string)
            nn_output = self.nn.forward(nn_input)
            action = action_string
            self.activations = nn_output
        else:
            for action_idx in range(number_of_actions):
                action_string = format(action_idx, 'b').zfill(self.environment.action_length)
                nn_input = NNUtility.bitstr_to_narray(observation + action_string)
                nn_output = self.nn.forward(nn_input)
                reward_string = NNUtility.narray_to_bitstr(nn_output[1][-1])
                action_reward = Utility.get_reward_from_bitstring(reward_string)
                if action_reward == reward:
                    i = self.seeded_rand_range()
                    if i == 1:
                        action = action_string
                        self.activations = nn_output
                else:
                    if action_reward > reward:
                        action = action_string
                        reward = action_reward
                        self.activations = nn_output
        return action

    def train(self, reward: str):
        """Train agent on received reward"""
        self.nn.backward(self.activations, NNUtility.bitstr_to_narray(reward))
Ejemplo n.º 2
0
import pickle
import matplotlib.pyplot as plt
from python.src import Utility
from matplotlib import rc
rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': 12})
rc('text', usetex=True)
plt.rcParams['axes.axisbelow'] = True

data_dir_path = Utility.get_data_path()
plots_path = Utility.get_plots_path()

apiq_dict_path = data_dir_path.joinpath("apiq_dict.apiq")
apiq_dict = pickle.load(apiq_dict_path.open("rb"))

# bar plot of apiq values
f = plt.figure(figsize=(6, 4), dpi=400)
agents = list(apiq_dict.keys())
apiq_values = [v["mean"] for v in apiq_dict.values()]
apiq_errors = [v["error"] for v in apiq_dict.values()]
ypos = [len(agents) - 1 - y for y in range(len(agents))]
plt.barh(ypos[:4], apiq_values[:4], color="#076678")
plt.barh(ypos[4:5], apiq_values[4:5], color="#689d6a")
plt.barh(ypos[5:], apiq_values[5:], color="#8f3f71")
plt.errorbar(apiq_values,
             ypos,
             xerr=apiq_errors,
             fmt=',',
             ecolor='black',
             capsize=4)
plt.yticks(
    ypos,