Beispiel #1
0
    def __init__(self,
                 epsilon=0,
                 gamma=0.97,
                 memory_capacity=10000,
                 batch_size=4,
                 learning_rate=0.001,
                 Q_1=None,
                 num_inputs=None,
                 num_actions=None,
                 params_path=None,
                 memory_path=None):

        self.state = None
        self.is_terminal = False
        self.action = None
        self.reward = 0

        self.episode = 0

        # DQL
        self.epsilon = epsilon  # Exploitation or exploration
        self.gamma = gamma  # Discount factor
        self.memory_capacity = memory_capacity
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.num_inputs = num_inputs
        self.num_actions = num_actions

        # Replay memory
        self.exp_bank = self.reset_memory()
        if memory_path and os.path.exists(memory_path):
            memory = np.load(memory_path, allow_pickle=True)
            self.exp_bank.memory = memory['exp_bank'].tolist()
            print("loaded memory_bank")

        # Action-value function
        if Q_1 is not None:
            self.Q_1 = Q_1
            self.num_inputs = self.Q_1.layers[0].input_shape
            self.num_actions = self.Q_1.layers[-1].output_shape
        elif num_actions is not None:
            self.Q_1 = models.Sequential()
            self.Q_1.add(
                layers.Dense(self.num_inputs, activation=activations.relu))
            self.Q_1.add(layers.Dense(10, activation=activations.sigmoid))
            self.Q_1.add(layers.Dense(20, activation=activations.sigmoid))
            self.Q_1.add(
                layers.Dense(self.num_actions, activation=activations.softmax))
            self.Q_1.compile()
        else:
            raise ValueError()

        if params_path:
            self.Q_1.load_params(params_path)
            self.Q_2 = self.Q_1

        self.Q_2 = self.Q_1
    fig.suptitle('Algorithm accuracy for each epoch', fontsize=16)
    # for i in range(len(acc)):
    #     ax.annotate('{} = {}'.format(i + 1, np.round(acc[i] * 100, 1)), (i, acc[i] * 100 + 0.4), ha='center', va='center')
    k = acc.shape[0] - 1
    ax.annotate('Epoch {} = {}%'.format(k + 1, np.round(acc[k] * 100, 1)),
                (k, acc[k] * 100),
                ha='center',
                va='center')
    plt.draw()
    plt.pause(.001)


print('Loading data...')
dataset = load_mnist_data(file_path="400_imgs.npz")

M = models.Sequential()

M.add(layers.Reshape((1, 28, 28), input_shape=(1, 784)))
M.add(
    layers.Conv2D(15,
                  kernel_size=(3, 3),
                  input_shape=(1, 28, 28),
                  padding="same",
                  activation=activations.leaky_relu))
M.add(layers.MaxPooling2D((2, 2)))
M.add(layers.Flatten())
M.add(layers.Dense(100, activation=activations.leaky_relu))
M.add(layers.Dense(10, activation=activations.softmax))

M.compile()
M.description()