def mutate(population): for individual in population: mut_p = individual['genome'][-2] mut_std = individual['genome'][-3] rand_vals = Initializers.uniform(individual['genome'].shape, mut_std) for i in range(individual['genome'].shape[0]): if random.random() < mut_p: individual['genome'][i] += rand_vals[i] return population
model.set_tree(tree) model.set_loss(Losses.MeanSquared()) learning_rate = 0.00002 model.set_optimizer(Optimizers.RMSprop(learning_rate=learning_rate)) # model.compile(X_reward, y_reward) # model.train(X_reward, y_reward, 24) reinforce_index = X_obs.shape[0] X_batches, y_batches, num_batches = ok.make_batches([X_train], y_train, batch_size=10000) for i in range(8): print('\n---Iteration {}---'.format(i + 1)) for X_batch, y_batch in zip(X_batches, y_batches): model.train(X_reward, y_reward, 24) accuracy, preds = model.get_dream_accuracy([X_batch[0], None], y_batch) preds = preds[0] preds += Initializers.normal(preds.shape, 0.01) print('Accuracy: {}%'.format(accuracy)) preds_reward = reward(preds.reshape(preds.shape[0], preds.shape[1]).astype('float32'), y_batch.reshape(preds.shape[0], preds.shape[1]).astype('float32')) print('Avg Reward: {}'.format(np.mean(preds_reward))) X_reward[0] = np.append(X_reward[0], X_batch[0], axis=0) X_reward[1] = np.append(X_reward[1], preds, axis=0) y_reward = np.append(y_reward, preds_reward.reshape(preds_reward.shape[0], 1), axis=0) params = model.get_params_as_vec() file = open('reward_model_params_vec.pk', 'wb') pickle.dump(params, file, protocol=pickle.HIGHEST_PROTOCOL) file.close() test_accuracy, test_preds = model.get_dream_accuracy([X_test, None], y_test) print('Test Accuracy: {}%'.format(test_accuracy))