class NNQ:
    def __init__(self, input_space, action_space):

        #HyperParameters
        self.GAMMA = 0.95
        self.LEARNING_RATE = 0.002
        self.MEMORY_SIZE = 1000000
        self.BATCH_SIZE = 30
        self.EXPLORATION_MAX = 1.0
        self.EXPLORATION_MIN = 0.01
        self.EXPLORATION_DECAY = 0.997
        self.exploration_rate = self.EXPLORATION_MAX
        self.reward = 0

        self.actions = action_space
        #Experience Replay
        self.memory = deque(maxlen=self.MEMORY_SIZE)

        #Create the NN model
        self.model = Sequential()
        self.model.add(
            Dense(64, input_shape=(input_space, ), activation="relu"))
        self.model.add(Dense(64, activation="relu"))
        self.model.add(Dense(self.actions, activation="softmax"))
        self.model.compile(loss="mse", optimizer=Adam(lr=self.LEARNING_RATE))

    def act(self, state):
        #Exploration vs Exploitation
        if np.random.rand() < self.exploration_rate:
            return random.randrange(self.actions)

        q_values = self.model.predict(state)

        return np.argmax(q_values[0])

    def remember(self, state, action, reward, next_state, done):
        #in every action put in the memory
        self.memory.append((state, action, reward, next_state, done))

    def experience_replay(self):
        #When the memory is filled up take a batch and train the network
        if len(self.memory) < self.MEMORY_SIZE:
            return

        batch = random.sample(self.memory, self.BATCH_SIZE)
        for state, action, reward, next_state, terminal in batch:
            q_update = reward
            if not terminal:
                q_update = (
                    reward +
                    self.GAMMA * np.amax(self.model.predict(next_state)[0]))
            q_values = self.model.predict(state)
            q_values[0][action] = q_update
            self.model.fit(state, q_values, verbose=0)

        if self.exploration_rate > self.EXPLORATION_MIN:
            self.exploration_rate *= self.EXPLORATION_DECAY
Esempio n. 2
0
class Model:
    __batch_size = 100

    def __init__(self):
        self.model = Sequential([
            Dense(40, input_shape=(4, ), activation="relu"),
            Dense(40, activation="relu"),
            Dense(40, activation="relu"),
            Dense(4, activation="tanh")
        ])
        # TODO: xavier initialization?
        self.model.compile(loss=keras.losses.mean_squared_error,
                           optimizer=keras.optimizers.Adam(lr=0.001))

        self.memory = deque(maxlen=10000)

    def experience_replay(self):
        if len(self.memory) < self.__batch_size:
            return

        batch = random.sample(self.memory, self.__batch_size)
        states = np.vstack([state for state, _, _, _, _ in batch])
        next_states = np.vstack(
            [next_state for _, _, _, next_state, _ in batch])

        predicted_states = self.model.predict(states)
        predicted_next_states = self.model.predict(next_states)
        max_nex_state_values = np.max(predicted_next_states, 1)

        for index, (_, action, reward, _, terminal) in enumerate(batch):
            q_update = reward

            if not terminal:
                discount_factor = 0.95
                q_update += discount_factor * max_nex_state_values[index]

            learning_rate = 0.95
            predicted_states[index][action] = (
                (1 - learning_rate) * predicted_states[index][action] +
                learning_rate * q_update)
        self.model.fit(states, predicted_states, verbose=0)
Esempio n. 3
0
n_chars = len(alphabet)
n_in_seq_length = n_numbers * ceil(log10(largest + 1)) + n_numbers - 1
n_out_seq_length = ceil(log10(n_numbers * (largest + 1)))
n_batch = 100
n_epoch = 500
model = Sequential([
    LSTM(100, input_shape=(n_in_seq_length, n_chars)),
    RepeatVector(n_out_seq_length),
    LSTM(50, return_sequences=True),
    TimeDistributed(Dense(n_chars, activation='softmax'))
])

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
for i in range(n_epoch):
    x, y = generate_data(n_samples, largest, alphabet)
    model.fit(x, y, epochs=1, batch_size=n_batch)

model.save('training/keras_classifier.h5')

# evaluate on some new patterns
x, y = generate_data(n_samples, largest, alphabet)
result = model.predict(x, batch_size=n_batch, verbose=0)
# calculate error
expected = [invert(x, alphabet) for x in y]
predicted = [invert(x, alphabet) for x in result]
# show some examples
for i in range(20):
    print('Expected=%s, Predicted=%s' % (expected[i], predicted[i]))
Esempio n. 4
0
    #Plot model loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title("Model Loss")
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    #Evaluate model on test data
    accuracy = model.evaluate(x_test, y_test)
    print(f"Test accuracy: {accuracy}")

    #Generate predictions for 10 samples
    print("Predictions for 10 samples")
    predictions = model.predict(x_test[:10])
    y_new = model.predict_classes(x_test[:10])
    y_pred = model.predict(x_test)
    print("Shape of predictions")
    print(y_pred.shape)

    #Generate confusion matrix

    # rounded_predictions = model.predict_classes(x_test, batch_size=128, verbose=0
    Y_pred = np.argmax(y_pred, 1)
    Y_test = np.argmax(y_test, 1)

    matrix = skl.confusion_matrix(Y_test, Y_pred)

    sns.heatmap(matrix.T,
                square=True,