def __init__(self, environment, configuration, name='', debug=False):
        num_actions = environment.action_space.n
        num_states = get_num_states(environment)
        self.Q = np.zeros((num_states, num_actions))

        # init super lastly, because it starts the training
        super().__init__(environment=environment, debug=debug, name=name, configuration=configuration)
    def __init__(self,
                 environment,
                 memory_size,
                 nb_variational_circuits,
                 configuration,
                 name='',
                 debug=False):
        self.D = []  # Memory
        self.memory_size = memory_size
        self.nb_variational_circuits = nb_variational_circuits
        self.debug = debug

        num_actions = environment.action_space.n
        num_states = get_num_states(environment)
        self.nb_qbits = int(np.max([num_actions, np.log2(num_states)]))
        self.theta = np.array([
            2 * np.pi * np.random.random(3)
            for j in range(self.nb_qbits * self.nb_variational_circuits)
        ]).flatten()

        self.init_memory(environment)

        # init super lastly, because it starts the training
        super().__init__(environment=environment,
                         debug=debug,
                         name=name,
                         configuration=configuration)
예제 #3
0
    def __init__(self, environment, configuration, name, debug=False):
        self.environment = environment
        self.debug = debug
        self.name = name
        self.configuration = configuration

        self.num_actions = environment.action_space.n
        self.num_states = get_num_states(environment)
    def __init__(self, environment, configuration, name='', debug=False):
        self.D = []  # Memory
        self.num_states = get_num_states(environment)
        self.model = configuration.model
        self.model.compile(loss='mean_squared_error', optimizer='sgd')
        self.target_model = clone_model(self.model)
        self.memory_size = configuration.memory_size

        self.train_counter = 0

        self.init_memory(environment)

        # init super lastly, because it starts the training
        super().__init__(environment=environment, debug=debug, name=name, configuration=configuration)
예제 #5
0
    def __init__(self, environment, configuration, name='', debug=False):
        # init super
        Agent.__init__(self,
                       environment=environment,
                       debug=debug,
                       name=name,
                       configuration=configuration)

        self.D = []  # Memory
        self.num_states = get_num_states(environment)
        self.model = configuration.model
        self.target_model = configuration.clone_model(self.model)
        self.memory_size = configuration.memory_size

        self.train_counter = 0
        self.episode = 0

        self.init_memory(environment)
예제 #6
0
    def __init__(self, environment, configuration, name='', verbosity_level = 10, debug=False):
        self.D = []  # Memory
        self.memory_size = configuration.memory_size
        self.nb_variational_circuits = configuration.nb_variational_circuits
        self.debug = debug

        self.train_counter = 0
        self.episode = 0

        num_actions = environment.action_space.n
        num_states = get_num_states(environment)
        self.nb_circuit_params = 1
        self.nb_qbits = int(np.max([num_actions, np.log2(num_states)]))
        self.theta = np.array(
            [np.random.random(self.nb_circuit_params) for j in range(self.nb_qbits * self.nb_variational_circuits)]).flatten()

        self.target_theta = copy.deepcopy(self.theta.copy())


        self.init_memory(environment)
        self.loss_mem = []

        # init super lastly, because it starts the training
        super().__init__(environment=environment, debug=debug, name=name, configuration=configuration)
# target update
target_replacement = 10

#alpha, gamma, epsilon
training_params_Q = [.6, .8, .9]

# Instantiate the trainer
confQ = Configuration(nb_iterations=nb_iterations, training_params=training_params_Q, cooling_scheme=cooling_schemes,
                        batch_size=1, average=int(batch_size/100), test_steps=1, verbosity_level=1e20)


#alpha, gamma, epsilon
training_params_DQN = [.6, .8, .9] # DQN opt .1, .8, .9 overfitting nach 590

# NN model for DQN
num_states = get_num_states(env)
model = Sequential()
model.add(Dense(8, input_shape=(16, ), activation='tanh'))
model.add(Dense(env.action_space.n, activation='linear'))
model.compile(loss=Huber(), optimizer=SGD(training_params_DQN[0]))

confDQN = Configuration(nb_iterations=nb_iterations, training_params=training_params_DQN[1:],
                        cooling_scheme=cooling_schemes[1:], batch_size=batch_size,
                        memory_size=memory_size, average=int(nb_iterations/10), model=model,
                        target_replacement=target_replacement, test_steps=1, verbosity_level=10)

confDQN.clone_model = clone_model
confDQN.embedding = lambda state: to_categorical(state, num_classes=num_states).reshape(num_states)

training_params_VQDQN = [.22, .8, .9] # DQN opt .1, .8, .9 overfitting nach 590
confVQD = Configuration(nb_iterations=nb_iterations, training_params=training_params_VQDQN, cooling_scheme=cooling_schemes,