def __init__(self, state_num, action_num, experience_replay=True): self.state_num = state_num self.action_num = action_num self.experience_replay = experience_replay self.experience_pool = [] self.model = get_model(state_num, action_num) train_conf = TrainerConfig() train_conf.learning_rate = LEARNING_RATE train_conf.weight_l2 = 0 self.trainer = SGDTrainer(self.model, train_conf) self.trainer.training_names = [] self.trainer.training_variables = [] self.thread_lock = threading.Lock() self.epsilon = EPSILON self.tick = 0
from deepy.layers.recurrent import RecurrentLayer, RecurrentNetwork from deepy.conf import NetworkConfig, TrainerConfig from deepy.utils.functions import FLOATX from deepy import SGDTrainer logging.basicConfig(level=logging.INFO) if __name__ == '__main__': net_conf = NetworkConfig(input_size=6) net_conf.layers = [RecurrentLayer(size=10, activation='sigmoid', bptt=True)] trainer_conf = TrainerConfig() trainer_conf.learning_rate = 0.03 trainer_conf.weight_l2 = 0.0001 trainer_conf.hidden_l2 = 0.0001 trainer_conf.monitor_frequency = trainer_conf.validation_frequency = trainer_conf.test_frequency = 1 network = RecurrentNetwork(net_conf) trainer = SGDTrainer(network) data = np.array([[1,0,0,0,0,0], [0,1,0,0,0,0], [0,0,1,0,0,0], [0,0,0,1,0,0], [0,0,0,0,1,0], [0,0,0,0,0,1], [0,1,0,0,0,0], [0,0,1,0,0,0], [0,0,0,1,0,0],