Beispiel #1
0
def build_network(input_shape, output_shape):
    input_data = Input(shape=input_shape, name="input")
    h = Convolution2D(32, 8, 8, subsample=(4, 4),
                      activation='relu')(input_data)
    h = Convolution2D(64, 4, 4, subsample=(2, 2), activation='relu')(h)
    h = Convolution2D(64, 3, 3, subsample=(2, 2), activation='relu')(h)
    h = Flatten()(h)
    h = Dense(256, activation='relu')(h)

    value = Dense(1, activation='linear')(h)
    policy = Dense(output_shape, activation='softmax')(h)

    value_network = Model(input=input_data, output=value)
    policy_network = Model(input=input_data, output=policy)

    adventage = Input(shape=(1, ))
    train_network = Model(input=[input_data, adventage],
                          output=[value, policy])
    print(train_network.summary())
    plot_model(train_network,
               to_file='model_plot.png',
               show_shapes=True,
               show_layer_names=True)

    return value_network, policy_network, train_network, adventage
    def build_network(self):
        # Consturct model
        input_frame = Input(shape=(self.frame_width, self.frame_height,
                                   self.state_length))
        action_one_hot = Input(shape=(self.num_actions, ))
        conv1 = Convolution2D(32, 8, 8, subsample=(4, 4),
                              activation='relu')(input_frame)
        conv2 = Convolution2D(64, 4, 4, subsample=(2, 2),
                              activation='relu')(conv1)
        conv3 = Convolution2D(64, 3, 3, subsample=(1, 1),
                              activation='relu')(conv2)
        flat_feature = Flatten()(conv3)
        hidden_feature = Dense(512)(flat_feature)
        lrelu_feature = LeakyReLU()(hidden_feature)
        q_value_prediction = Dense(self.num_actions)(lrelu_feature)

        if self.dueling:
            # Dueling Network
            # Q = Value of state + (Value of Action - Mean of all action value)
            hidden_feature_2 = Dense(512, activation='relu')(flat_feature)
            state_value_prediction = Dense(1)(hidden_feature_2)
            q_value_prediction = merge(
                [q_value_prediction, state_value_prediction],
                mode=lambda x: x[0] - K.mean(x[0]) + x[1],
                output_shape=(self.num_actions, ))

        #select_q_value_of_action = Multiply()([q_value_prediction,action_one_hot])
        select_q_value_of_action = merge([q_value_prediction, action_one_hot],
                                         mode='mul',
                                         output_shape=(self.num_actions, ))
        target_q_value = Lambda(
            lambda x: K.sum(x, axis=-1, keepdims=True),
            output_shape=lambda_out_shape)(select_q_value_of_action)

        model = Model(input=[input_frame, action_one_hot],
                      output=[q_value_prediction, target_q_value])

        # MSE loss on target_q_value only
        model.compile(loss=['mse', 'mse'],
                      loss_weights=[0.0, 1.0],
                      optimizer=Adam(lr=0.00001))  #self.opt)
        model.summary()
        plot_model(model,
                   to_file='model_plot.png',
                   show_shapes=True,
                   show_layer_names=True)
        return model
Beispiel #3
0
    def __init__(self, action_space, screen=(84, 84)):
        self.screen = screen
        self.input_depth = 1
        self.past_range = 10
        self.replay_size = 32
        self.observation_shape =  (self.input_depth * self.past_range,)+ self.screen
        self.action_space_n=action_space.n

        _, self.policy, self.load_net, _ = build_network(self.observation_shape, action_space.n)

        self.load_net.compile(optimizer=Adam(lr=0.0001), loss='mse')  # clipnorm=1.
        _, _, self.load_net_guided, _ = build_guided_model(self.observation_shape, action_space.n)
        plot_model(self.load_net, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

        self.load_net_guided.compile(optimizer=Adam(lr=0.0001), loss='mse')  # clipnorm=1.


        self.action_space = action_space
        self.observations = np.zeros((self.input_depth * self.past_range,) + screen)
model.add(Convolution2D(64, 3, 3, W_regularizer=l2(0.001)))
model.add(ELU())
model.add(Flatten())
# model.add(Dropout(0.5))
model.add(Dense(80, W_regularizer=l2(0.001)))
model.add(Dropout(0.5))
model.add(Dense(40, W_regularizer=l2(0.001)))
model.add(Dropout(0.5))
model.add(Dense(16, W_regularizer=l2(0.001)))
model.add(Dropout(0.5))
model.add(Dense(10, W_regularizer=l2(0.001)))
model.add(Dense(1, W_regularizer=l2(0.001)))
adam = Adam(lr=0.0001)
model.compile(optimizer=adam, loss='mse', metrics=['accuracy'])
model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=2)
# model_history = model.fit_generator(train_generator, samples_per_epoch=nb_samples_per_epoch, nb_epoch=10, validation_data=validation_generator, callbacks=[early_stopping], \
# nb_val_samples=len(validation_samples))

print("Done with training. ")
from keras.utils.visualize_util import plot_model
plot_model(model, show_layer_names, show_shapes, to_file='model.png')

## Save the model and weights
model_json = model.to_json()
with open('model.json', 'w') as json_file:
    json_file.write(model_json)

model.save("model.h5")
print("Model save to disk")
Beispiel #5
0
    def evaluate(self,
                 env,
                 num_episodes,
                 eval_count,
                 max_episode_length=None,
                 monitor=True):
        """Test your agent with a provided environment.

        Basically run your policy on the environment and collect stats
        like cumulative reward, average episode length, etc.

        You can also call the render function here if you want to
        visually inspect your policy.
        """
        print("Evaluation starts.", monitor)

        is_training = False
        if self.load_network:
            self.q_network.load_weights(self.load_network_path)
            print("Load network from:", self.load_network_path)
            if self.convert:
                print("asd")
                self.q_network.save("model.h5")
                plot_model(self.q_network,
                           to_file='model_plot.png',
                           show_shapes=True,
                           show_layer_names=True)
                return
        if monitor:
            env = wrappers.Monitor(env,
                                   self.output_path_videos,
                                   video_callable=lambda x: True,
                                   resume=True)
        state = env.reset()

        idx_episode = 1
        episode_frames = 0
        episode_reward = np.zeros(num_episodes)
        t = 0

        while idx_episode <= num_episodes:
            t += 1
            action_state = self.history_processor.process_state_for_network(
                self.atari_processor.process_state_for_network(state))
            action = self.select_action(action_state,
                                        is_training,
                                        policy_type='GreedyEpsilonPolicy')
            state, reward, done, info = env.step(action)
            episode_frames += 1
            episode_reward[idx_episode - 1] += reward
            if episode_frames > max_episode_length:
                done = True
            if done:
                print("Eval: time %d, episode %d, length %d, reward %.0f" %
                      (t, idx_episode, episode_frames,
                       episode_reward[idx_episode - 1]))
                eval_count += 1
                save_scalar(eval_count, 'eval/eval_episode_raw_reward',
                            episode_reward[idx_episode - 1], self.writer)
                save_scalar(eval_count, 'eval/eval_episode_raw_length',
                            episode_frames, self.writer)
                sys.stdout.flush()
                state = env.reset()
                episode_frames = 0
                idx_episode += 1
                self.atari_processor.reset()
                self.history_processor.reset()

        reward_mean = np.mean(episode_reward)
        reward_std = np.std(episode_reward)
        print(
            "Evaluation summury: num_episodes [%d], reward_mean [%.3f], reward_std [%.3f]"
            % (num_episodes, reward_mean, reward_std))
        sys.stdout.flush()

        return reward_mean, reward_std, eval_count
Beispiel #6
0
    if parsed_args.train:
        print "[*] Training model..."
        mhan.fit(X_ids, Y_ids, XV_ids, YV_ids, labels, wvecs, vocabs)
    if parsed_args.test or parsed_args.store_test:
        lang_idx = parsed_args.languages.index(args['target'])
        dev_path = "%s/%s/" % (args['path'], args['target'])
        source_idx = lang_idx
        if parsed_args.source is not None:
            print "[*] Cross-lingual mode: ON"
            print "[*] Source language: %s" % args['source']
            dev_path = "%s/%s/" % (args['path'], args['source'])
            source_idx = parsed_args.languages.index(args['source'])
        epoch_num, best_weights_file = pick_best(dev_path)
        mhan.model.load_weights(best_weights_file)
        plot_model(mhan.model, to_file="%sarch.png" % dev_path)
        if parsed_args.store_test:
            print "[*] Storing predictions on %s test..." % args["target"]
            reals, epreds, watts, satts = mhan.eval(lang_idx,
                                                    XT_ids[lang_idx],
                                                    YT_ids[lang_idx],
                                                    wvecs[lang_idx],
                                                    labels[lang_idx],
                                                    L=len(
                                                        parsed_args.languages),
                                                    source=parsed_args.source)
            out = export(args["target"], lang_idx, source_idx, epreds, watts,
                         satts, XT_ids, YT_ids, vocabs, labels)
            json.dump(out,
                      open("%s%s" % (dev_path, parsed_args.store_file), 'w'))
        else:
Beispiel #7
0
 numTrn = trnY.shape[ 0]
 #
 if not os.path.isfile(pathModel):
     model = buildModel_SimpleCNN3D(inpShape=valX.shape[1:],
                                    numCls=numCls)
     popt = kopt.Adam(lr=0.00001)
     # popt = 'adam'
     model.compile(optimizer=popt,
                   loss='categorical_crossentropy',
                   # loss='binary_crossentropy',
                   metrics=['accuracy'])
 else:
     pathModelBk = '%s-%s.bk' % (pathModel, time.strftime('%Y.%m.%d-%H.%M.%S'))
     shutil.copy(pathModel, pathModelBk)
     model = keras.models.load_model(pathModel)
 plot_model(model, to_file=pathModelPlot, show_shapes=True)
 # plt.imshow(skio.imread(pathModelPlot))
 # plt.show()
 model.summary()
 batchSize = 8
 numEpochs = 300
 numIterPerEpoch = numTrn/(numCls*batchSize)
 # model.fit(trnX, trnY, nb_epoch=10, validation_data=(valX, valY))
 model.fit_generator(
     generator=train_generator(dataX=trnX, dataY=trnY, batchSize=batchSize, isRandomize=True),
     samples_per_epoch=numIterPerEpoch,
     nb_epoch=numEpochs, validation_data=(valX, valY),
     callbacks=[
         kall.ModelCheckpoint(pathModel, verbose=True, save_best_only=True),
         kall.CSVLogger(pathLog, append=True)
     ])