def build_network(input_shape, output_shape): input_data = Input(shape=input_shape, name="input") h = Convolution2D(32, 8, 8, subsample=(4, 4), activation='relu')(input_data) h = Convolution2D(64, 4, 4, subsample=(2, 2), activation='relu')(h) h = Convolution2D(64, 3, 3, subsample=(2, 2), activation='relu')(h) h = Flatten()(h) h = Dense(256, activation='relu')(h) value = Dense(1, activation='linear')(h) policy = Dense(output_shape, activation='softmax')(h) value_network = Model(input=input_data, output=value) policy_network = Model(input=input_data, output=policy) adventage = Input(shape=(1, )) train_network = Model(input=[input_data, adventage], output=[value, policy]) print(train_network.summary()) plot_model(train_network, to_file='model_plot.png', show_shapes=True, show_layer_names=True) return value_network, policy_network, train_network, adventage
def build_network(self): # Consturct model input_frame = Input(shape=(self.frame_width, self.frame_height, self.state_length)) action_one_hot = Input(shape=(self.num_actions, )) conv1 = Convolution2D(32, 8, 8, subsample=(4, 4), activation='relu')(input_frame) conv2 = Convolution2D(64, 4, 4, subsample=(2, 2), activation='relu')(conv1) conv3 = Convolution2D(64, 3, 3, subsample=(1, 1), activation='relu')(conv2) flat_feature = Flatten()(conv3) hidden_feature = Dense(512)(flat_feature) lrelu_feature = LeakyReLU()(hidden_feature) q_value_prediction = Dense(self.num_actions)(lrelu_feature) if self.dueling: # Dueling Network # Q = Value of state + (Value of Action - Mean of all action value) hidden_feature_2 = Dense(512, activation='relu')(flat_feature) state_value_prediction = Dense(1)(hidden_feature_2) q_value_prediction = merge( [q_value_prediction, state_value_prediction], mode=lambda x: x[0] - K.mean(x[0]) + x[1], output_shape=(self.num_actions, )) #select_q_value_of_action = Multiply()([q_value_prediction,action_one_hot]) select_q_value_of_action = merge([q_value_prediction, action_one_hot], mode='mul', output_shape=(self.num_actions, )) target_q_value = Lambda( lambda x: K.sum(x, axis=-1, keepdims=True), output_shape=lambda_out_shape)(select_q_value_of_action) model = Model(input=[input_frame, action_one_hot], output=[q_value_prediction, target_q_value]) # MSE loss on target_q_value only model.compile(loss=['mse', 'mse'], loss_weights=[0.0, 1.0], optimizer=Adam(lr=0.00001)) #self.opt) model.summary() plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True) return model
def __init__(self, action_space, screen=(84, 84)): self.screen = screen self.input_depth = 1 self.past_range = 10 self.replay_size = 32 self.observation_shape = (self.input_depth * self.past_range,)+ self.screen self.action_space_n=action_space.n _, self.policy, self.load_net, _ = build_network(self.observation_shape, action_space.n) self.load_net.compile(optimizer=Adam(lr=0.0001), loss='mse') # clipnorm=1. _, _, self.load_net_guided, _ = build_guided_model(self.observation_shape, action_space.n) plot_model(self.load_net, to_file='model_plot.png', show_shapes=True, show_layer_names=True) self.load_net_guided.compile(optimizer=Adam(lr=0.0001), loss='mse') # clipnorm=1. self.action_space = action_space self.observations = np.zeros((self.input_depth * self.past_range,) + screen)
model.add(Convolution2D(64, 3, 3, W_regularizer=l2(0.001))) model.add(ELU()) model.add(Flatten()) # model.add(Dropout(0.5)) model.add(Dense(80, W_regularizer=l2(0.001))) model.add(Dropout(0.5)) model.add(Dense(40, W_regularizer=l2(0.001))) model.add(Dropout(0.5)) model.add(Dense(16, W_regularizer=l2(0.001))) model.add(Dropout(0.5)) model.add(Dense(10, W_regularizer=l2(0.001))) model.add(Dense(1, W_regularizer=l2(0.001))) adam = Adam(lr=0.0001) model.compile(optimizer=adam, loss='mse', metrics=['accuracy']) model.summary() early_stopping = EarlyStopping(monitor='val_loss', patience=2) # model_history = model.fit_generator(train_generator, samples_per_epoch=nb_samples_per_epoch, nb_epoch=10, validation_data=validation_generator, callbacks=[early_stopping], \ # nb_val_samples=len(validation_samples)) print("Done with training. ") from keras.utils.visualize_util import plot_model plot_model(model, show_layer_names, show_shapes, to_file='model.png') ## Save the model and weights model_json = model.to_json() with open('model.json', 'w') as json_file: json_file.write(model_json) model.save("model.h5") print("Model save to disk")
def evaluate(self, env, num_episodes, eval_count, max_episode_length=None, monitor=True): """Test your agent with a provided environment. Basically run your policy on the environment and collect stats like cumulative reward, average episode length, etc. You can also call the render function here if you want to visually inspect your policy. """ print("Evaluation starts.", monitor) is_training = False if self.load_network: self.q_network.load_weights(self.load_network_path) print("Load network from:", self.load_network_path) if self.convert: print("asd") self.q_network.save("model.h5") plot_model(self.q_network, to_file='model_plot.png', show_shapes=True, show_layer_names=True) return if monitor: env = wrappers.Monitor(env, self.output_path_videos, video_callable=lambda x: True, resume=True) state = env.reset() idx_episode = 1 episode_frames = 0 episode_reward = np.zeros(num_episodes) t = 0 while idx_episode <= num_episodes: t += 1 action_state = self.history_processor.process_state_for_network( self.atari_processor.process_state_for_network(state)) action = self.select_action(action_state, is_training, policy_type='GreedyEpsilonPolicy') state, reward, done, info = env.step(action) episode_frames += 1 episode_reward[idx_episode - 1] += reward if episode_frames > max_episode_length: done = True if done: print("Eval: time %d, episode %d, length %d, reward %.0f" % (t, idx_episode, episode_frames, episode_reward[idx_episode - 1])) eval_count += 1 save_scalar(eval_count, 'eval/eval_episode_raw_reward', episode_reward[idx_episode - 1], self.writer) save_scalar(eval_count, 'eval/eval_episode_raw_length', episode_frames, self.writer) sys.stdout.flush() state = env.reset() episode_frames = 0 idx_episode += 1 self.atari_processor.reset() self.history_processor.reset() reward_mean = np.mean(episode_reward) reward_std = np.std(episode_reward) print( "Evaluation summury: num_episodes [%d], reward_mean [%.3f], reward_std [%.3f]" % (num_episodes, reward_mean, reward_std)) sys.stdout.flush() return reward_mean, reward_std, eval_count
if parsed_args.train: print "[*] Training model..." mhan.fit(X_ids, Y_ids, XV_ids, YV_ids, labels, wvecs, vocabs) if parsed_args.test or parsed_args.store_test: lang_idx = parsed_args.languages.index(args['target']) dev_path = "%s/%s/" % (args['path'], args['target']) source_idx = lang_idx if parsed_args.source is not None: print "[*] Cross-lingual mode: ON" print "[*] Source language: %s" % args['source'] dev_path = "%s/%s/" % (args['path'], args['source']) source_idx = parsed_args.languages.index(args['source']) epoch_num, best_weights_file = pick_best(dev_path) mhan.model.load_weights(best_weights_file) plot_model(mhan.model, to_file="%sarch.png" % dev_path) if parsed_args.store_test: print "[*] Storing predictions on %s test..." % args["target"] reals, epreds, watts, satts = mhan.eval(lang_idx, XT_ids[lang_idx], YT_ids[lang_idx], wvecs[lang_idx], labels[lang_idx], L=len( parsed_args.languages), source=parsed_args.source) out = export(args["target"], lang_idx, source_idx, epreds, watts, satts, XT_ids, YT_ids, vocabs, labels) json.dump(out, open("%s%s" % (dev_path, parsed_args.store_file), 'w')) else:
numTrn = trnY.shape[ 0] # if not os.path.isfile(pathModel): model = buildModel_SimpleCNN3D(inpShape=valX.shape[1:], numCls=numCls) popt = kopt.Adam(lr=0.00001) # popt = 'adam' model.compile(optimizer=popt, loss='categorical_crossentropy', # loss='binary_crossentropy', metrics=['accuracy']) else: pathModelBk = '%s-%s.bk' % (pathModel, time.strftime('%Y.%m.%d-%H.%M.%S')) shutil.copy(pathModel, pathModelBk) model = keras.models.load_model(pathModel) plot_model(model, to_file=pathModelPlot, show_shapes=True) # plt.imshow(skio.imread(pathModelPlot)) # plt.show() model.summary() batchSize = 8 numEpochs = 300 numIterPerEpoch = numTrn/(numCls*batchSize) # model.fit(trnX, trnY, nb_epoch=10, validation_data=(valX, valY)) model.fit_generator( generator=train_generator(dataX=trnX, dataY=trnY, batchSize=batchSize, isRandomize=True), samples_per_epoch=numIterPerEpoch, nb_epoch=numEpochs, validation_data=(valX, valY), callbacks=[ kall.ModelCheckpoint(pathModel, verbose=True, save_best_only=True), kall.CSVLogger(pathLog, append=True) ])