Ejemplo n.º 1
0
    def __init__(self, weights_path, train_conv_layers):
        self.__angle_values = [-1, -0.5, 0, 0.5, 1]

        self.__nb_actions = 5
        self.__gamma = 0.99

        #Define the model
        activation = 'relu'
        pic_input = Input(shape=(59,255,3))
        
        img_stack = Conv2D(16, (3, 3), name='convolution0', padding='same', activation=activation, trainable=train_conv_layers)(pic_input)
        img_stack = MaxPooling2D(pool_size=(2,2))(img_stack)
        img_stack = Conv2D(32, (3, 3), activation=activation, padding='same', name='convolution1', trainable=train_conv_layers)(img_stack)
        img_stack = MaxPooling2D(pool_size=(2, 2))(img_stack)
        img_stack = Conv2D(32, (3, 3), activation=activation, padding='same', name='convolution2', trainable=train_conv_layers)(img_stack)
        img_stack = MaxPooling2D(pool_size=(2, 2))(img_stack)
        img_stack = Flatten()(img_stack)
        img_stack = Dropout(0.2)(img_stack)

        img_stack = Dense(128, name='rl_dense', kernel_initializer=random_normal(stddev=0.01))(img_stack)
        img_stack=Dropout(0.2)(img_stack)
        output = Dense(self.__nb_actions, name='rl_output', kernel_initializer=random_normal(stddev=0.01))(img_stack)

        opt = Adam()
        self.__action_model = Model(inputs=[pic_input], outputs=output)

        self.__action_model.compile(optimizer=opt, loss='mean_squared_error')
        self.__action_model.summary()
        
        # If we are using pretrained weights for the conv layers, load them and verify the first layer.
        if (weights_path is not None and len(weights_path) > 0):
            print('Loading weights from my_model_weights.h5...')
            print('Current working dir is {0}'.format(os.getcwd()))
            self.__action_model.load_weights(weights_path, by_name=True)
            
            print('First layer: ')
            w = np.array(self.__action_model.get_weights()[0])
            print(w)
        else:
            print('Not loading weights')

        # Set up the target model. 
        # This is a trick that will allow the model to converge more rapidly.
        self.__action_context = tf.get_default_graph()
        self.__target_model = clone_model(self.__action_model)

        self.__target_context = tf.get_default_graph()
        self.__model_lock = threading.Lock()
Ejemplo n.º 2
0
def nngame_train( # pylint: disable=too-many-arguments,too-many-locals
        game, epochs=100, layer_sizes=(32, 32), dropout=0.2, verbosity=0,
        optimizer='sgd', loss='mean_squared_error'):
    """Train a neural network regression model

    This mostly exists as a proof of concept, individual testing should be done
    to make sure it is working sufficiently. This API will likely change to
    support more general architectures and training.
    """
    utils.check(layer_sizes, 'must have at least one layer')
    utils.check(0 <= dropout < 1, 'dropout must be a valid probability')
    # This is for delayed importing inf tensor flow
    from keras import models, layers

    model = models.Sequential()
    lay_iter = iter(layer_sizes)
    model.add(layers.Dense(
        next(lay_iter), input_shape=[game.num_strats], activation='relu'))
    for units in lay_iter:
        model.add(layers.Dense(units, activation='relu'))
        if dropout:
            model.add(layers.Dropout(dropout))
    model.add(layers.Dense(1, activation='sigmoid'))

    regs = []
    offsets = np.empty(game.num_strats)
    scales = np.empty(game.num_strats)
    for i, profs, pays in _dev_profpay(game):
        # XXX Payoff normalization specific to sigmoid. If we accept alternate
        # models, we need a way to compute how to potentially normalize
        # payoffs.
        min_pay = pays.min()
        offsets[i] = min_pay
        max_pay = pays.max()
        scale = 1 if np.isclose(max_pay, min_pay) else max_pay - min_pay
        scales[i] = scale
        reg = models.clone_model(model)
        reg.compile(optimizer=optimizer, loss=loss)
        reg.fit(profs, (pays - min_pay) / scale, epochs=epochs,
                verbose=verbosity)
        regs.append(reg)

    return _DevRegressionGame(
        game, tuple(regs), offsets, scales, game.min_strat_payoffs(),
        game.max_strat_payoffs(), np.ones(game.num_strats, bool))
Ejemplo n.º 3
0
    def train(self, env):
        memory = deque(maxlen=self.replay_memory)
        episode_number = 0
        epsilon = self.init_epsilon
        epsilon_decay = (self.init_epsilon -
                         self.final_epsilon) / self.epsilon_step_num
        global_step = 0

        if self.resume:
            model = load_model(self.restore_file_path)
            epsilon = self.final_epsilon
        else:
            model = self.def_model()

        model_target = clone_model(model)
        model_target.set_weights(model.get_weights())

        while episode_number < self.num_episode:

            done = False
            dead = False
            step, score, start_life = 0, 0, 5
            loss = 0.0
            observe = env.reset()

            for _ in range(random.randint(1, self.no_op_steps)):
                observe, score, done, info = env.step(1)

            if self.debug:
                print("Time Score: {0}".format(score))

            state = self.pre_processing(observe)
            history = np.stack((state, state, state, state), axis=2)
            history = np.reshape([history], (1, 84, 84, 4))

            while not done:
                if self.render:
                    env.render()
                    time.sleep(0.01)

                action = self.get_action(history, epsilon, global_step,
                                         model_target)

                if epsilon > self.final_epsilon and global_step > self.observe_step_num:
                    epsilon -= epsilon_decay

                observe, reward, done, info = env.step(action)
                next_state = self.pre_processing(observe)
                next_state = np.reshape([next_state], (1, 84, 84, 1))
                next_history = np.append(next_state,
                                         history[:, :, :, :3],
                                         axis=3)

                if start_life > info['ale.lives']:
                    dead = True
                    start_life = info['ale.lives']

                self.store_memory(memory, history, action, reward,
                                  next_history, dead)

                if global_step > self.observe_step_num:
                    loss = loss + self.train_memory_batch(memory, model)
                    if global_step % self.refresh_target_model_num == 0:
                        model_target.set_weights(model.get_weights())

                score += reward

                if dead:
                    dead = False
                else:
                    history = next_history

                global_step += 1
                step += 1

                if done:
                    if global_step <= self.observe_step_num:
                        state = "observe"
                    elif self.observe_step_num < global_step <= self.observe_step_num + self.epsilon_step_num:
                        state = "explore"
                    else:
                        state = "train"
                    print('state: {0}, episode: {1}, score: {2}'.format(
                        state, episode_number, score))

                    if episode_number % 100 == 0 or (episode_number +
                                                     1) == self.num_episode:
                        now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
                        file_name = "{0}_model_{1}.h5".format(self.game, now)
                        model_path = os.path.join(self.train_dir, file_name)
                        model.save(model_path)

                    episode_number += 1
Ejemplo n.º 4
0
 def clone_model(self):
     print("cloning model")
     """Returns a copy of a keras model."""
     temp_model = clone_model(self.model)
     temp_model.set_weights(self.model.get_weights())
     return temp_model
Ejemplo n.º 5
0
def agent_init():

    a_globs.cur_epsilon = a_globs.EPSILON
    print("Epsilon at run start: {}".format(a_globs.cur_epsilon))

    if a_globs.AGENT == a_globs.REWARD:
        num_outputs = 1
        cur_activation = 'sigmoid'
        loss = {
            'main_output': 'mean_squared_error',
            'aux_output': 'mean_squared_error'
        }
        a_globs.non_zero_reward_buffer = []
        a_globs.zero_reward_buffer = []
        a_globs.buffer_container = [
            a_globs.non_zero_reward_buffer, a_globs.zero_reward_buffer
        ]

    elif a_globs.AGENT == a_globs.NOISE:
        num_outputs = a_globs.NUM_NOISE_NODES
        cur_activation = 'linear'
        loss = {
            'main_output': 'mean_squared_error',
            'aux_output': 'mean_squared_error'
        }

        a_globs.generic_buffer = []
        a_globs.buffer_container = [a_globs.generic_buffer]

    elif a_globs.AGENT == a_globs.STATE:
        num_outputs = a_globs.FEATURE_VECTOR_SIZE
        cur_activation = 'softmax'
        if a_globs.IS_1_HOT:
            loss = {
                'main_output': 'mean_squared_error',
                'aux_output': 'categorical_crossentropy'
            }
        else:
            loss = {
                'main_output': 'mean_squared_error',
                'aux_output': 'mean_squared_error'
            }

        a_globs.deterministic_state_buffer = []
        a_globs.stochastic_state_buffer = []
        if a_globs.IS_STOCHASTIC:
            a_globs.buffer_container = [
                a_globs.deterministic_state_buffer,
                a_globs.stochastic_state_buffer
            ]
        else:
            a_globs.buffer_container = [a_globs.deterministic_state_buffer]

    elif a_globs.AGENT == a_globs.REDUNDANT:
        num_outputs = a_globs.NUM_ACTIONS * a_globs.NUM_REDUNDANT_TASKS
        cur_activation = 'linear'
        loss = {
            'main_output': 'mean_squared_error',
            'aux_output': 'mean_squared_error'
        }

        a_globs.generic_buffer = []
        a_globs.buffer_container = [a_globs.generic_buffer]

    #Specify the model
    init_weights = he_normal()
    main_input = Input(shape=(a_globs.FEATURE_VECTOR_SIZE, ))
    shared_1 = Dense(a_globs.NUM_NERONS_LAYER_1,
                     activation='relu',
                     kernel_initializer=init_weights,
                     name='shared_1')(main_input)
    main_task_full_layer = Dense(a_globs.NUM_NERONS_LAYER_2,
                                 activation='relu',
                                 kernel_initializer=init_weights,
                                 name='main_task_full_layer')(shared_1)
    aux_task_full_layer = Dense(a_globs.NUM_NERONS_LAYER_2,
                                activation='relu',
                                kernel_initializer=init_weights)(shared_1)

    main_output = Dense(a_globs.NUM_ACTIONS,
                        activation='linear',
                        kernel_initializer=init_weights,
                        name='main_output')(main_task_full_layer)
    aux_output = Dense(num_outputs,
                       activation=cur_activation,
                       kernel_initializer=init_weights,
                       name='aux_output')(aux_task_full_layer)

    #Initialize the model
    loss_weights = {'main_output': 1.0, 'aux_output': a_globs.LAMBDA}
    a_globs.model = Model(inputs=main_input, outputs=[main_output, aux_output])
    a_globs.model.compile(optimizer=Adam(lr=a_globs.ALPHA),
                          loss=loss,
                          loss_weights=loss_weights)
    summarize_model(a_globs.model, a_globs.AGENT)

    #Create the target network to use in the update rule
    a_globs.target_network = clone_model(a_globs.model)
    a_globs.target_network.set_weights(a_globs.model.get_weights())
    def train_model_aggregate(self):
        # Training and evaluation loop
        for i in range(self.num_grand_epochs):
            print("Grand Epoch:", i + 1, "/", self.num_grand_epochs)

            # Re-define the aggregate model (stored on the master node, and ultimately returned), also re-initialize its weights
            self.aggregate_model = self.get_new_model()

            # Define a plotting object for every numpy array that comprises the weights of our neural network, only if the algorithm is on its last grand epoch
            if i == self.num_grand_epochs + 1:
                self.plots = [
                    pca_weights_plotter()
                    for j in range(len(self.aggregate_model.get_weights()))
                ]

            # Train individual models for specified number of epochs
            start_time = time.time()
            for segnum in list(range(self.num_segments)):
                self.train_segment(segnum, i)
            print("Time:", time.time() - start_time)

            # Average the weights of the trained models on the segments, add these weights to the aggregate model
            avg_weights = []
            for i in range(len(self.aggregate_model.get_weights())):
                np_arrays = [
                    self.segment_models[segment].get_weights()[i]
                    for segment in self.segment_models
                ]
                avg_weights.append(sum(np_arrays) / self.num_segments)
            self.aggregate_model.set_weights(avg_weights)

            # Compile aggregate model
            self.aggregate_model.compile(loss='categorical_crossentropy',
                                         optimizer=Adam(),
                                         metrics=['accuracy'])

            # Evaluate aggregate model on the test set
            score = self.aggregate_model.evaluate(self.x_test,
                                                  self.y_test,
                                                  verbose=1)
            print("Aggregate model accuracy on test set:", score[1])

            # Plot the average model's weights and show the plots, only if the algorithm is on its last grand epoch
            if i == self.num_grand_epochs + 1:
                avg_weights = self.aggregate_model.get_weights()
                for j in range(len(avg_weights)):
                    plot = self.plots[j]
                    plot.plot_data(avg_weights[j], "dark orange", 'x')
                    plot.show_plot()

            # Redistribute the aggregate model to each segment for the next grand epoch of training, if not on last grand epoch
            if i != self.num_grand_epochs - 1:
                for segment in self.segment_models:
                    self.segment_models[segment] = clone_model(
                        self.aggregate_model)

        print('')
        print(
            '-------------------------------------------------------------------------------------------------'
        )

        # Conduct final testing with the weight-average aggregate model approach (non-ensemble)
        train_score_merged = self.aggregate_model.evaluate(self.x_train,
                                                           self.y_train,
                                                           verbose=0)
        test_score_merged = self.aggregate_model.evaluate(self.x_test,
                                                          self.y_test,
                                                          verbose=0)
        print("Training set prediction accuracy with aggregate model:",
              train_score_merged[1])
        print("Test set prediction accuracy with aggregate model:",
              test_score_merged[1])

        print(
            '-------------------------------------------------------------------------------------------------'
        )

        # Conduct final testing with the consensus prediction ensemble approach, [include aggregate model in the ensemble]
        # self.segment_models['agg'] = self.aggregate_model
        start_time = time.time()
        train_score_consensus = self.consensus_predict_ensemble_evaluate(
            self.x_train, self.y_train)
        test_score_consensus = self.consensus_predict_ensemble_evaluate(
            self.x_test, self.y_test)
        print(
            "Training set prediction accuracy with consensus prediction ensembling:",
            train_score_consensus)
        print(
            "Test set prediction accuracy with consensus prediction ensembling:",
            test_score_consensus)
        print("Time:", time.time() - start_time)

        print(
            '-------------------------------------------------------------------------------------------------'
        )

        # Conduct final testing with the neural boosted ensemble approach
        start_time = time.time()
        self.neural_boosted_ensemble_train()
        train_score_neural = self.neural_boosted_ensemble_evaluate(
            self.x_train, self.y_train)
        test_score_neural = self.neural_boosted_ensemble_evaluate(
            self.x_test, self.y_test)
        print(
            "Training set prediction accuracy with neural boosted ensembling:",
            train_score_neural)
        print("Test set prediction accuracy with neural boosted ensembling:",
              test_score_neural)
        print("Time:", time.time() - start_time)

        print(
            '-------------------------------------------------------------------------------------------------'
        )

        # Conduct final testing with the convolutional boosted ensemble approach
        # assert self.num_classes == self.num_segments, "Cannot perform convolutional ensembling at the moment"
        start_time = time.time()
        self.convolutional_boosted_ensemble_train()
        train_score_convolutional = self.convolutional_boosted_ensemble_evaluate(
            self.x_train, self.y_train)
        test_score_convolutional = self.convolutional_boosted_ensemble_evaluate(
            self.x_test, self.y_test)
        print(
            "Training set prediction accuracy with convolutional boosted ensembling:",
            train_score_convolutional)
        print(
            "Test set prediction accuracy with convolutional boosted ensembling:",
            test_score_convolutional)
        print("Time:", time.time() - start_time)

        print(
            '-------------------------------------------------------------------------------------------------'
        )
Ejemplo n.º 7
0
    action_size = len(env.action_space)  #.n
    agent = DQNAgent(state_size, action_size)
    #agent.load("model.h5")
    done = False

    episodelist = list()
    scorelist = list()
    output = list()
    e = 0
    #
    while time.time() < end:
        #for e in range(EPISODES):
        e += 1
        agent.state = env.reset()
        stationary_model = clone_model(agent.model)

        while True:

            agent.action, q_ = agent.act(agent.state, env.actionlimit,
                                         env.epsilonmod[env.turncounter - 1])
            next_state, reward, done = env.step(agent.action)
            agent.memorize(agent.state, agent.action, reward, next_state, done,
                           q_)
            agent.state = next_state

            if done:
                #print("episode: {}/{}, score: {}, e: {:.2}, actions: {}, expmod: {}"
                #      .format(e, EPISODES, env.discountedmined, agent.epsilon, env.actionslist, env.epsilonmod))

                episodelist.append(e)
Ejemplo n.º 8
0
## save base model to restore later

(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# x_train = x_train.astype('float32') / 255
# x_test = x_test.astype('float32') / 255
x_train, x_test = vgg_normalize(x_train, x_test)

(x_train, y_train) = shuffle_and_reduce(reduce_percent, x_train, y_train)
y_test = to_categorical(y_test)
y_train = to_categorical(y_train)

scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

base = clone_model(model)
base.set_weights(model.get_weights())

# gradients_train = get_layer_output_grad(base, x_train, y_train, -6)
# gradients_test = get_layer_output_grad(base, x_test, y_test, -6)
# print(gradients)

base = train_chosen(model, base, x_train, x_test, y_train, y_test, index)

# scores = base.evaluate(x_test, y_test, verbose=1)
# print('Test loss:', scores[0])
# print('Test accuracy:', scores[1])

base.save(output)
Ejemplo n.º 9
0
 def copy_model(self, game):
     model_copy = clone_model(self.model)
     model_copy.build(self.input_layer(game))
     self.compile_model(model_copy, game)
     model_copy.set_weights(self.model.get_weights())
     return model_copy
def multi_gpu_model(model, gpus=None, cpu_merge=True, cpu_relocation=False):
    """Replicates a model on different GPUs.

    Specifically, this function implements single-machine
    multi-GPU data parallelism. It works in the following way:

    - Divide the model's input(s) into multiple sub-batches.
    - Apply a model copy on each sub-batch. Every model copy
        is executed on a dedicated GPU.
    - Concatenate the results (on CPU) into one big batch.

    E.g. if your `batch_size` is 64 and you use `gpus=2`,
    then we will divide the input into 2 sub-batches of 32 samples,
    process each sub-batch on one GPU, then return the full
    batch of 64 processed samples.

    This induces quasi-linear speedup on up to 8 GPUs.

    This function is only available with the TensorFlow backend
    for the time being.

    # Arguments
        model: A Keras model instance. To avoid OOM errors,
            this model could have been built on CPU, for instance
            (see usage example below).
        gpus: Integer >= 2 or list of integers, number of GPUs or
            list of GPU IDs on which to create model replicas.
        cpu_merge: A boolean value to identify whether to force
            merging model weights under the scope of the CPU or not.
        cpu_relocation: A boolean value to identify whether to
            create the model's weights under the scope of the CPU.
            If the model is not defined under any preceding device
            scope, you can still rescue it by activating this option.

    # Returns
        A Keras `Model` instance which can be used just like the initial
        `model` argument, but which distributes its workload on multiple GPUs.

    # Example 1 - Training models with weights merge on CPU

    ```python
        import tensorflow as tf
        from keras.applications import Xception
        from keras.utils import multi_gpu_model
        import numpy as np

        num_samples = 1000
        height = 224
        width = 224
        num_classes = 1000

        # Instantiate the base model (or "template" model).
        # We recommend doing this with under a CPU device scope,
        # so that the model's weights are hosted on CPU memory.
        # Otherwise they may end up hosted on a GPU, which would
        # complicate weight sharing.
        with tf.device('/cpu:0'):
            model = Xception(weights=None,
                             input_shape=(height, width, 3),
                             classes=num_classes)

        # Replicates the model on 8 GPUs.
        # This assumes that your machine has 8 available GPUs.
        parallel_model = multi_gpu_model(model, gpus=8)
        parallel_model.compile(loss='categorical_crossentropy',
                               optimizer='rmsprop')

        # Generate dummy data.
        x = np.random.random((num_samples, height, width, 3))
        y = np.random.random((num_samples, num_classes))

        # This `fit` call will be distributed on 8 GPUs.
        # Since the batch size is 256, each GPU will process 32 samples.
        parallel_model.fit(x, y, epochs=20, batch_size=256)

        # Save model via the template model (which shares the same weights):
        model.save('my_model.h5')
    ```

    # Example 2 - Training models with weights merge on CPU using cpu_relocation

    ```python
         ..
         # Not needed to change the device scope for model definition:
         model = Xception(weights=None, ..)

         try:
             model = multi_gpu_model(model, cpu_relocation=True)
             print("Training using multiple GPUs..")
         except:
             print("Training using single GPU or CPU..")

         model.compile(..)
         ..
    ```

    # Example 3 - Training models with weights merge on GPU (recommended for NV-link)

    ```python
         ..
         # Not needed to change the device scope for model definition:
         model = Xception(weights=None, ..)

         try:
             model = multi_gpu_model(model, cpu_merge=False)
             print("Training using multiple GPUs..")
         except:
             print("Training using single GPU or CPU..")

         model.compile(..)
         ..
    ```

    # On model saving

    To save the multi-gpu model, use `.save(fname)` or `.save_weights(fname)`
    with the template model (the argument you passed to `multi_gpu_model`),
    rather than the model returned by `multi_gpu_model`.
    """
    if K.backend() != 'tensorflow':
        raise ValueError('`multi_gpu_model` is only available '
                         'with the TensorFlow backend.')

    available_devices = _get_available_devices()
    available_devices = [
        _normalize_device_name(name) for name in available_devices
    ]
    if not gpus:
        # Using all visible GPUs when not specifying `gpus`
        # e.g. CUDA_VISIBLE_DEVICES=0,2 python keras_mgpu.py
        gpus = len([x for x in available_devices if 'gpu' in x])

    if isinstance(gpus, (list, tuple)):
        if len(gpus) <= 1:
            raise ValueError('For multi-gpu usage to be effective, '
                             'call `multi_gpu_model` with `len(gpus) >= 2`. '
                             'Received: `gpus=%s`' % gpus)
        num_gpus = len(gpus)
        target_gpu_ids = gpus
    else:
        if gpus <= 1:
            raise ValueError('For multi-gpu usage to be effective, '
                             'call `multi_gpu_model` with `gpus >= 2`. '
                             'Received: `gpus=%d`' % gpus)
        num_gpus = gpus
        target_gpu_ids = range(num_gpus)

    import tensorflow as tf

    target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids]
    for device in target_devices:
        if device not in available_devices:
            raise ValueError(
                'To call `multi_gpu_model` with `gpus=%s`, '
                'we expect the following devices to be available: %s. '
                'However this machine only has: %s. '
                'Try reducing `gpus`.' %
                (gpus, target_devices, available_devices))

    def get_slice(data, i, parts):
        shape = K.shape(data)
        batch_size = shape[:1]
        input_shape = shape[1:]
        step = batch_size // parts
        if i == parts - 1:
            size = batch_size - step * i
        else:
            size = step
        size = K.concatenate([size, input_shape], axis=0)
        stride = K.concatenate([step, input_shape * 0], axis=0)
        start = stride * i
        return K.slice(data, start, size)

    # Relocate the model definition under CPU device scope if needed
    if cpu_relocation:
        with tf.device('/cpu:0'):
            model = clone_model(model)

    all_outputs = []
    for i in range(len(model.outputs)):
        all_outputs.append([])

    # Place a copy of the model on each GPU,
    # each getting a slice of the inputs.
    for i, gpu_id in enumerate(target_gpu_ids):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('replica_%d' % gpu_id):
                inputs = []
                # Retrieve a slice of the input.
                for x in model.inputs:
                    # In-place input splitting which is not only
                    # 5% ~ 12% faster but also less GPU memory
                    # duplication.
                    with tf.device(x.device):
                        input_shape = K.int_shape(x)[1:]
                        slice_i = Lambda(get_slice,
                                         output_shape=input_shape,
                                         arguments={
                                             'i': i,
                                             'parts': num_gpus
                                         })(x)
                        inputs.append(slice_i)

                # Apply model on slice
                # (creating a model replica on the target device).
                outputs = model(inputs)
                outputs = to_list(outputs)

                # Save the outputs for merging back together later.
                for o in range(len(outputs)):
                    all_outputs[o].append(outputs[o])

    # Deduplicate output names to handle Siamese networks.
    occurrences = {}
    for n in model.output_names:
        if n not in occurrences:
            occurrences[n] = 1
        else:
            occurrences[n] += 1
    conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1}
    output_names = []
    for n in model.output_names:
        if n in conflict_counter:
            conflict_counter[n] += 1
            n += '_%d' % conflict_counter[n]
        output_names.append(n)

    # Merge outputs under expected scope.
    with tf.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]):
        merged = []
        for name, outputs in zip(output_names, all_outputs):
            merged.append(concatenate(outputs, axis=0, name=name))
        return Model(model.inputs, merged)
    def __init__(self, config=None):

        if config is None:
            config = {}
        self.env = wrap_dqn(gym.make(config.get('game', 'PongNoFrameskip-v4')))
        self.action_size = self.env.action_space.n

        self.to_vis = config.get('visualize', False)
        self.verbose = config.get('verbose', True)
        self.backup = config.get('backup', 25)
        self.episodes = config.get('episodes', 300)

        self.depth = config.get('depth', 4)
        self.state_size = config.get('space', (84, 84))
        self.model = None
        self._target_model = None

        self.prioritized = config.get(('prioritized', False))

        if self.prioritized:
            self.memory = PrioritizedMemory(
                max_len=config.get('mem_size', 100000))
        else:
            self.memory = SimpleMemory(max_len=config.get('mem_size', 100000))

        if config.get('duel', False):
            self.model = self._duel_conv()
        else:
            self.model = self._conv()

        self.model.compile(Adam(lr=config.get('lr', 1e-4)), loss=huber_loss)

        if config.get('target', True):
            self._target_model = clone_model(self.model)
            self._target_model.set_weights(self.model.get_weights())
            self._time = 0
            self.update_time = config.get('target_update', 1000)

        self.env._max_episode_steps = None
        self.batch_size = config.get('batch', 32 * 3)
        self.to_observe = config.get('to_observe', 10000)

        self.log_dir = config['log_dir']
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)
        plot_model(self.model,
                   to_file=os.path.join(self.log_dir, 'model.png'),
                   show_shapes=True)

        attr = {
            'batch size': self.batch_size,
            'to observe': self.to_observe,
            'depth': self.depth
        }

        self.results = {'info': attr}

        load_prev = config.get('load', False)

        self.gamma = None
        pol = None

        if 'pol' in config:
            if config['pol'] == 'random':
                pol = policy.RandomPolicy()
            elif config['pol'] == 'eps':
                pol = policy.EpsPolicy(config.get('pol_eps', 0.1))

        self.pol = pol

        if load_prev:
            path = sorted([
                int(x) for x in os.listdir(self.log_dir)
                if os.path.isdir(os.path.join(self.log_dir, x))
            ])
            if len(path) != 0:
                load_prev = self.load(os.path.join(self.log_dir,
                                                   str(path[-1])))

        if self.pol is None:
            self.pol = policy.AnnealedPolicy(
                inner_policy=policy.EpsPolicy(1.0,
                                              other_pol=policy.GreedyPolicy()),
                attr='eps',
                value_max=1.0,
                value_min=config.get('ex_min', 0.02),
                value_test=0.5,
                nb_steps=config.get('ex_steps', 100000))
        if self.gamma is None:
            self.gamma = policy.EpsPolicy(float(config.get('gamma',
                                                           0.99))).get_value
Ejemplo n.º 12
0
 def _clone(self):
     self.qs1 = []
     for q in self.qs:
         q1 = clone_model(q)
         q1.set_weights(q.get_weights())
         self.qs1.append(q1)
Ejemplo n.º 13
0
def startIteration(model, args):
    log_file_name = datetime.datetime.now().strftime("log_%Y_%m_%d_%H_%M_%S.txt")
    log_file = open(log_file_name, "w")
    backup = sys.stdout
    sys.stdout = Tee(sys.stdout, log_file)

    # store N_t(a)
    Nt = np.zeros(ACTIONS)
    
    # open up a game state to communicate with emulator
    game_state = game.GameState()

    # store the previous observations in replay memory
    D = deque()

    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    Nt[0] += 1
    x_t, r_0, terminal, curr_score = game_state.frame_step(do_nothing)

    x_t = skimage.color.rgb2gray(x_t)
    x_t = skimage.transform.resize(x_t,(80,80))
    x_t = skimage.exposure.rescale_intensity(x_t,out_range=(0,255))

    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

    #In Keras, need to reshape
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  #shape(1,80,80,4) 

    #Create target network
    if args['training_algorithm'] == "doubleDQN":
        target_model = clone_model(model)
        target_model.set_weights(model.get_weights())

    if args['mode'] == 'run':
        OBSERVE = 999999999    #We keep observe, never train
        epsilon = FINAL_EPSILON
        print ("Now we load weights")
        if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"):
            for i in range(BOOTSTRAP_K):
                if os.path.isfile("model_%d.h5" % (i)):
                    model[i].load_weights("model_%d.h5" % (i))
                    print ("Weight for head %d load successfully", (i))
        else:
            if os.path.isfile("model.h5"):              
                model.load_weights("model.h5")
                print ("Weight load successfully")
    else:                       #We go to training mode
        OBSERVE = OBSERVATION
        epsilon = INITIAL_EPSILON

    t = 0
    total_reward = 0
    while (True):
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t1 = 0
        a_t = np.zeros([ACTIONS])
        
        if t % FRAME_PER_ACTION == 0:
            if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"):
                chosen = np.random.randint(BOOTSTRAP_K)
                q = model[chosen].predict(s_t)
                max_Q = np.argmax(q)
                action_index = max_Q
                a_t[action_index] = 1
            else:
                #choose an action epsilon greedy
                if random.random() <= epsilon:
                    print("----------Random Action----------")
                    action_index = random.randrange(ACTIONS)
                    a_t[action_index] = 1
                else:
                    q = model.predict(s_t)       #input a stack of 4 images, get the prediction
                    max_Q = np.argmax(q)
                    action_index = max_Q
                    a_t[action_index] = 1
            Nt[action_index] += 1

        #We reduced the epsilon gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        #run the selected action and observed next state and reward
        x_t1_colored, r_t1, terminal, curr_score = game_state.frame_step(a_t)
        terminal_check = terminal

        x_t1 = skimage.color.rgb2gray(x_t1_colored)
        x_t1 = skimage.transform.resize(x_t1,(80,80))
        x_t1 = skimage.exposure.rescale_intensity(x_t1, out_range=(0, 255))

        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x80x80x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3)

        #calculate bootstrap mask
        #the authors use Bernoulli(0.5), but that essentially means
        #choose with 0.5 probability on each head
        mask = np.random.choice(2, BOOTSTRAP_K, p=[0.5,]*2)

        # store the transition in D
        if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"):
            D.append((s_t, action_index, r_t1, s_t1, terminal, mask))
        else:
            D.append((s_t, action_index, r_t1, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE:
            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)

            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3]))   #shape(32, 80, 80, 4)
            print (inputs.shape)
            targets = np.zeros((inputs.shape[0], ACTIONS))                         #shape(32, 2)

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]
                action_t = minibatch[i][1]   #This is action index
                reward_t = minibatch[i][2]
                state_t1 = minibatch[i][3]
                terminal = minibatch[i][4]
                if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"):
                    mask = minibatch[i][5]
                

                inputs[i:i + 1] = state_t  #I saved down s_t
                #Hitting each buttom probability
                if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"):
                    targets[i] = model[chosen].predict(state_t)
                else:
                    targets[i] = model.predict(state_t) 


                if terminal:
                    targets[i, action_t] = reward_t
                else:
                    if args['training_algorithm'] == "DQN":
                        Q_sa = model.predict(state_t1)
                        targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)
                    elif args['training_algorithm'] == "doubleDQN":
                        Q_sa = model.predict(state_t1)
                        Q_target = target_model.predict(state_t1)
                        maxQ_ind = np.argmax(Q_sa,axis = 1)
                        targets[i, action_t] = reward_t + GAMMA * Q_target[0][maxQ_ind]
                    elif args['training_algorithm'] == "DQN+UCB":
                        Q_sa = model.predict(state_t1)
                        modified_Q_sa = Q_sa+np.sqrt(2*np.log(t)/(Nt))
                        targets[i, action_t] = reward_t + GAMMA * np.max(modified_Q_sa)
                    elif args['training_algorithm'] == "bootstrappedDQN":
                        Q_sa = model[chosen].predict(state_t1)
                        targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)
                    elif args['training_algorithm'] == "bootstrappedDQN+UCB":
                        Q_sa = model[chosen].predict(state_t1)
                        modified_Q_sa = Q_sa+np.sqrt(2*np.log(t)/(Nt))
                        targets[i, action_t] = reward_t + GAMMA * np.max(modified_Q_sa)

            if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"):
                for idx in range(BOOTSTRAP_K):
                    if mask[idx] == 1:
                        loss += model[idx].train_on_batch(inputs, targets)
            else:
                loss += model.train_on_batch(inputs, targets)

        s_t = s_t1
        t = t + 1

        if args['training_algorithm'] == "doubleDQN" and t % TARGET_UPDATE == 0 :
            print("----------------------------Copy to target model----------------------------")
            target_model.set_weights(model.get_weights())

        # save progress every 10000 iterations
        if t % 1000 == 0:
            print("Now we save model")
            if args['training_algorithm'] in ("bootstrappedDQN", "bootstrappedDQN+UCB"):
                for i in range(BOOTSTRAP_K):
                    model[i].save_weights("model_%d.h5" % (i), overwrite=True)
                    with open("model_%d.json" % (i), "w") as outfile:
                        json.dump(models[i].to_json(), outfile)
            else:              
                model.save_weights("model.h5", overwrite=True)
                with open("model.json", "w") as outfile:
                    json.dump(model.to_json(), outfile)

        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        printInfo(t, state, action_index, r_t1, Q_sa, loss)

        score_file = open("scores","a") 
        score_file.write(str(curr_score)+"\n")
        score_file.close()

        if terminal_check:
            print("Total rewards: ", total_reward) 
            out_file = open("total_reward","a") 
            out_file.write(str(total_reward)+"\n")
            out_file.close()
            total_reward = 0
        else:
            total_reward = total_reward + r_t1

    print("Episode finished!")
    print("************************")
Ejemplo n.º 14
0
def reduce_keras_model(model, verbose=False):
    from keras.models import Model
    from keras.models import clone_model

    x = []
    input = []
    skip_layers = []
    keras_sub_version = get_keras_sub_version()
    if verbose:
        print('Keras sub version: {}'.format(keras_sub_version))

    # Find all inputs
    for level_id in range(len(model.layers)):
        layer = model.layers[level_id]
        layer_type = layer.__class__.__name__
        if layer_type == 'InputLayer':
            inp1 = get_copy_of_layer(layer, verbose)
            x.append(inp1)
            input.append(inp1.output)
    tmp_model = Model(inputs=input, outputs=input)

    for level_id in range(len(model.layers)):
        layer = model.layers[level_id]
        layer_type = layer.__class__.__name__

        # Skip input layers
        if layer_type == 'InputLayer':
            continue

        input_layers = get_input_layers_ids(model, layer, verbose)
        output_layers = get_output_layers_ids(model, layer, verbose)
        if verbose:
            print('Go for {}: {} ({}). Input layers: {} Output layers: {}'.format(level_id, layer_type, layer.name, input_layers, output_layers))

        if level_id in skip_layers:
            if verbose:
                print('Skip layer because it was removed during optimization!')
            continue

        # Special cases for reducing
        if len(output_layers) == 1:
            next_layer = model.layers[output_layers[0]]
            next_layer_type = next_layer.__class__.__name__
            if layer_type in ['Conv2D', 'DepthwiseConv2D'] and next_layer_type == 'BatchNormalization':
                tmp_model = optimize_conv2d_batchnorm_block(tmp_model, model, input_layers, layer, next_layer, verbose)
                x = tmp_model.layers[-1].output
                skip_layers.append(output_layers[0])
                continue

            if layer_type in ['SeparableConv2D'] and next_layer_type == 'BatchNormalization':
                tmp_model = optimize_separableconv2d_batchnorm_block(tmp_model, model, input_layers, layer, next_layer, verbose)
                x = tmp_model.layers[-1].output
                skip_layers.append(output_layers[0])
                continue

        if layer_type == 'Model':
            new_layer = clone_model(layer)
            new_layer.set_weights(layer.get_weights())
        else:
            new_layer = get_copy_of_layer(layer, verbose)

        prev_layer = []
        for i in range(len(set(input_layers))):
            search_layer = tmp_model.get_layer(name=model.layers[input_layers[i]].name)
            try:
                tens = search_layer.output
                prev_layer.append(tens)
            except:
                # Ugly need to check for correctness
                for node in search_layer._inbound_nodes:
                    for i in range(len(node.inbound_layers)):
                        outbound_tensor_index = node.tensor_indices[i]
                        prev_layer.append(node.output_tensors[outbound_tensor_index])

        if len(prev_layer) == 1:
            prev_layer = prev_layer[0]

        output_tensor, output_names = get_layers_without_output(tmp_model, verbose)
        if layer_type == 'Model':
            for f in prev_layer:
                x = new_layer(f)
                if f in output_tensor:
                    output_tensor.remove(f)
                output_tensor.append(x)
        else:
            x = new_layer(prev_layer)
            if type(prev_layer) is list:
                for f in prev_layer:
                    if f in output_tensor:
                        output_tensor.remove(f)
            else:
                if prev_layer in output_tensor:
                    output_tensor.remove(prev_layer)
            if type(x) is list:
                output_tensor += x
            else:
                output_tensor.append(x)

        tmp_model = Model(inputs=input, outputs=output_tensor)
        tmp_model.get_layer(name=layer.name).set_weights(layer.get_weights())

    output_tensor, output_names = get_layers_without_output(tmp_model, verbose)
    if verbose:
        print('Output names: {}'.format(output_names))
    model = Model(inputs=input, outputs=output_tensor)
    return model
Ejemplo n.º 15
0
def copy_agent(agent):
    weights = agent.model.get_weights()
    copied_model = clone_model(agent.model)
    copied = DQNAgent(*agent.get_init_info())
    copied.set_model(copied_model, weights)
    return copied
Ejemplo n.º 16
0
def trainModelWithDMC(p_boardSizeX, p_boardSizeY, p_episodes, savePath, p_temperature=15.0, p_gamma=0.9, howFar=2, rewardFunc=game.rewardFunc2, selfPlayFixOpp=False, startingModel=None, opponentConstr=None, opponentStrength=None):

    model = startingModel

    if startingModel is None:
        model = Sequential()
        model.add(Dense(70, kernel_initializer='lecun_uniform', activation='relu', input_shape=(2*(4+(2*howFar+1)*(2*howFar+1)),)))

        model.add(Dense(35, kernel_initializer='lecun_uniform', activation='relu'))

        model.add(Dense(3, kernel_initializer='lecun_uniform', activation='linear'))

        model.compile(loss='mean_squared_error', optimizer=RMSprop())

    episodes = p_episodes
    gamma = p_gamma
    startingTemperature = p_temperature
    temperature = p_temperature
    batchSize = 1
    buffer = 1
    replay = []
    h = 0
    updateStep = 0
    for i in range(episodes):

        rewardList1 = []
        stateList1 = []
        actionList1 = []

        rewardList2 = []
        stateList2 = []
        actionList2 = []

        #init board
        board = game.Board(p_boardSizeX,p_boardSizeY)

        #set opponent
        if(opponentConstr is None):
            if(selfPlayFixOpp == True):
                oppModel = clone_model(model)
                oppModel.set_weights(model.get_weights())
                board.setPlayers(game.AITrainingReduFeatWOPlayer("1",board,model,howFar), game.AITrainingReduFeatWOPlayer("2",board,oppModel,howFar))
            else:
                #default
                 board.setPlayers(game.AITrainingReduFeatWOPlayer("1",board,model,howFar), game.AITrainingReduFeatWOPlayer("2",board,model,howFar))
        else:
            try:
                opp = opponentConstr("2",board,opponentStrength)
            except TypeError:
                opp = opponentConstr("2")
            board.setPlayers(game.AITrainingReduFeatWOPlayer("1",board,model,howFar), opp)

        #set start strategy
        board.startGameWithPseudoRandomStartPositions()
        while(board.checkGameStatus() == 0):

            #Boltzman action selection
            board.player1.getDirection()
            Qprobs = game.softmax(board.player1.vals/temperature)
            action_value = np.random.choice(Qprobs[0],p=Qprobs[0])
            action1 = np.argmax(Qprobs[0] == action_value) - 1
            actionList1.append(action1)

            board.player2.getDirection()
            Qprobs = game.softmax(board.player2.vals/temperature)
            action_value = np.random.choice(Qprobs[0],p=Qprobs[0])
            action2 = np.argmax(Qprobs[0] == action_value) - 1
            actionList2.append(action2)

            #Take action, observe new state S'

            state1 = board.to01ReducedFeaturesWithOpponent(board.player1, board.player2, howFar).reshape(1,2*(4+(2*howFar+1)*(2*howFar+1)))
            stateList1.append(state1)
            state2 = board.to01ReducedFeaturesWithOpponent(board.player2, board.player1, howFar).reshape(1,2*(4+(2*howFar+1)*(2*howFar+1)))
            stateList2.append(state2)

            board.movePlayers(action1, action2)

            gameStatus = board.checkGameStatus()

            #only with rewardFunc1
            reward = game.rewardFunc2(gameStatus)

            reward2 = 1.0*reward

            if(gameStatus == 1) or (gameStatus == 2):
                reward2 = -1.0*reward2

            rewardList1.append(reward)
            rewardList2.append(reward2)

        dAC1 = discountedAccRewards(rewardList1,gamma)
        dAC2 = discountedAccRewards(rewardList2,gamma)

        stateList1 = stateList1 + stateList2
        actionList1 = actionList1 + actionList2
        dAC1 = np.append(dAC1,dAC2)
        for stepNum in range(len(stateList1)):

            if (len(replay) < buffer):
                replay.append((stateList1[stepNum], actionList1[stepNum], dAC1[stepNum]))
            else:
                if (h < (buffer-1)):
                    h += 1
                else:
                    h = 0
                replay[h] = (stateList1[stepNum], actionList1[stepNum], dAC1[stepNum])

                #randomly sample our experience replay memory
                minibatch = random.sample(replay, batchSize)
                X_train = []
                y_train = []

                for memory in minibatch:
                    state, action, accReward = memory
                    Qvals = model.predict(state, batch_size=1)
                    y = np.zeros((1,3))
                    y[:] = Qvals[:]
                    y[0][action+1] = accReward #action + 1 because actions are -1,0,1
                    X_train.append(state.reshape(2*(4+(2*howFar+1)*(2*howFar+1)),))
                    y_train.append(y.reshape(3,))

                X_train = np.array(X_train)
                y_train = np.array(y_train)
                print("Game #: %s" % (i,))
                model.fit(X_train, y_train, batch_size=batchSize, epochs=1, verbose=1)
                updateStep += 1
        if i % 10000 == 0:
            model.save(savePath)
        if temperature > 1.0:
            temperature -= (startingTemperature/episodes)
        else:
            temperature = 1.0
    model.save(savePath)
Ejemplo n.º 17
0
total_profit = 0
agent.inventory = []

# agent.inventory.append(data[0])
# next_state = getState(data, 0 + 1, window_size + 1)
# reward = 0
# agent.memory.append((state, action, reward, next_state, done))

for t in range(l):
    if t == 0:
        action = 1
    else:
        action = agent.act(state)

    if ((t % 20) == 0):
        agent.target_model = clone_model(agent.model)
        agent.target_model.set_weights(agent.model.get_weights())

    # sit
    next_state = getState(data, t + 1, window_size + 1)
    reward = 0

    if action == 1:  # buy
        agent.inventory.append(data[t])
        print("Buy: " + formatPrice(data[t]))

    elif action == 2 and len(agent.inventory) > 0:  # sell
        bought_price = agent.inventory.pop(0)
        reward = max(data[t] - bought_price, 0)
        total_profit += data[t] - bought_price
        print("Sell: " + formatPrice(data[t]) + " | Profit: " +
Ejemplo n.º 18
0
 def clone_model(self):
     model_copy = clone_model(self.model)
     model_copy.set_weights(self.model.get_weights())
     return model_copy
Ejemplo n.º 19
0
    def __init__(self,
                 state_dim,
                 action_size=3,
                 strategy="t-dqn",
                 dueling_type='no',
                 use_PER='True',
                 epsilon_start=1.0,
                 epsilon_end=0.01,
                 epsilon_decay_steps=25000,
                 reset_every=100,
                 pretrained=False,
                 model_name=None):
        self.strategy = strategy

        # agent config
        self.state_dim = state_dim  # normalized
        self.action_size = action_size  # default = 3 [sit, buy, sell]
        self.model_name = model_name
        self.inventory = []

        self.first_iter_trading = False

        self.total_steps = 0
        self.episodes = self.episode_length = 0
        self.steps_per_episode = []
        self.episode_reward = 0
        self.rewards_history = []
        self.losses = []

        self.epsilon = epsilon_start
        self.epsilon_decay_steps = epsilon_decay_steps
        self.epsilon_decay = (epsilon_start -
                              epsilon_end) / epsilon_decay_steps
        self.epsilon_history = []

        # model config
        self.model_name = model_name
        self.gamma = 0.99  # affinity for long term reward
        self.l2_reg = 1e-6
        self.dueling_type = dueling_type

        # self.epsilon = 1.0
        # self.epsilon_min = 0.01
        # self.epsilon_decay = 0.995
        self.learning_rate = 0.0001
        self.loss = huber_loss
        self.custom_objects = {
            "huber_loss": huber_loss
        }  # important for loading the model from memory
        self.optimizer = Adam(lr=self.learning_rate)

        self.with_per = use_PER

        self.pretrained = pretrained
        self.results_dir = 'results'

        if pretrained and self.model_name is not None:
            self.model = self.load()
        else:
            self.model = self._model()

        # strategy config
        if self.strategy in ["t-dqn", "double-dqn"]:
            self.total_steps = 1
            self.reset_every = reset_every

            # target network
            self.target_model = clone_model(self.model)
            self.target_model.set_weights(self.model.get_weights())
        else:
            self.with_per = False

        # Memory Buffer for Experience Replay
        if self.with_per:
            self.buffer = MemoryBuffer(int(100000))
            print("Agent with Prioritized Experience Replay")
        else:
            self.memory = deque(maxlen=100000)
Ejemplo n.º 20
0
def copy_agent(agent):
    weights = agent.model.get_weights()
    copied_model = clone_model(agent.model)
    copied = DQNAgent(*agent.get_init_info())
    copied.set_model(copied_model, weights)
    return copied
Ejemplo n.º 21
0
 def copy(self, data):
     model = clone_model(data)
     model.set_weights(data.get_weights())
     return model
Ejemplo n.º 22
0
foldA_test_labels, foldB_test_labels = foldB_train_labels, foldA_train_labels

# dimension of data and number of classes
dimension = 4
num_classes = 3
#####################################

model = Sequential()
model.add(Dense(128, activation='relu', input_dim=dimension))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

###########FOLD A###############
from keras.models import clone_model
model_A = clone_model(model)
model_A.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])
model_A.fit(foldA_train, foldA_train_labels, epochs=20)

loss, accuracy1 = model_A.evaluate(foldA_test, foldA_test_labels)

###########FOLD B###############
from keras.models import clone_model
model_B = clone_model(model)
model_B.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])
model_B.fit(foldB_train, foldB_train_labels, epochs=20)
Ejemplo n.º 23
0
online_network = create_dqn_model(input_shape, nb_actions, dense_layers,
                                  dense_units)


def epsilon_greedy(q_values, epsilon, n_outputs):
    if random.random() < epsilon:
        return random.randrange(n_outputs)  # random action
    else:
        return np.argmax(q_values)  # q-optimal action


replay_memory_maxlen = 1000000
replay_memory = deque([], maxlen=replay_memory_maxlen)

target_network = clone_model(online_network)
target_network.set_weights(online_network.get_weights())

name = 'MsPacman_DQN'  # used in naming files (weights, logs, etc)
n_steps = 50000  # total number of training steps (= n_epochs)
warmup = 1000  # start training after warmup iterations
training_interval = 20  # period (in actions) between training steps
save_steps = int(
    n_steps / 10)  # period (in training steps) between storing weights to file
copy_steps = 500  # period (in training steps) between updating target_network weights
gamma = 0.8  # discount rate
skip_start = 90  # skip the start of every game (it's just freezing time before game starts)
batch_size = 128  # size of minibatch that is taken randomly from replay memory every training step
double_dqn = False  # whether to use Double-DQN approach or simple DQN (see above)
# eps-greedy parameters: we slowly decrease epsilon from eps_max to eps_min in eps_decay_steps
eps_max = 1.0