def main(unused_argv): opt = Options() sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, opt.valid_size, opt.states_fil, opt.labels_fil) # 1. train ###################################### # TODO implement your training here! # you can get the full data from the transition table like this: # # # both train_data and valid_data contain tupes of images and labels train_data = trans.get_train() valid_data = trans.get_valid() samples_train_data = np.float32(train_data[0]) labels_train_data = np.float32(train_data[1]) unhotted_labels_train_data = unhot(labels_train_data) samples_valid_data = np.float32(valid_data[0]) labels_valid_data = np.float32(valid_data[1]) unhotted_labels_valid_data = unhot(labels_valid_data) print("Shape of samples_train_data {}".format(samples_train_data.shape)) print("Shape of labels_train_data {}".format(labels_train_data.shape)) print("Shape of unhotted_labels_train_data {}".format(unhotted_labels_train_data.shape)) classifier = cnn.get_estimator() # Train the model train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": samples_train_data}, y=unhotted_labels_train_data, batch_size=100, num_epochs=None, shuffle=True) classifier.train( input_fn=train_input_fn, steps=1000 ) eval_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": samples_valid_data}, y=unhotted_labels_valid_data, num_epochs=1, shuffle=False ) eval_results = classifier.evaluate(input_fn=eval_input_fn) print(eval_results)
# Input layer image_dimension = opt.cub_siz * opt.pob_siz img_rows = img_cols = image_dimension input_shape = [img_rows, img_cols, opt.hist_len] # -------------------------------------------------------- # Model agent = model.model(input_shape) agent.load_weights(model_path) if not agent: exit(2) # -------------------------------------------------------- # setup a large transitiontable that is filled during training maxlen = 100000 trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, maxlen) # -------------------------------------------------------- # Display if opt.disp_on: win_all = None win_pob = None # -------------------------------------------------------- # Config accumulated_reward = 0 accumulated_reward_list = [] n_completed_episodes = 0 nepisodes = 0 epi_step = 0
from utils import Options from simulator import Simulator from transitionTable import TransitionTable #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # NOTE: # this script assumes you did generate your data with the get_data.py script # you are of course allowed to change it and generate data here but if you # want this to work out of the box first run get_data.py #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # 0. initialization opt = Options() sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, opt.valid_size, opt.states_fil, opt.labels_fil) # 1. train ###################################### # TODO implement your training here! # you can get the full data from the transition table like this: # # # both train_data and valid_data contain tupes of images and labels # train_data = trans.get_train() # valid_data = trans.get_valid() # # alternatively you can get one random mini batch line this # # for i in range(number_of_batches): # x, y = trans.sample_minibatch()
# custom modules from utils import Options, rgb2gray from simulator import Simulator from transitionTable import TransitionTable display_on = True win_all = None win_pob = None opt = Options() sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) state_with_history = np.zeros((opt.hist_len, opt.state_siz)) maxlen = 100000 trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, maxlen) def append_to_hist(state, obs): """ Add observation to the state. """ for i in range(state.shape[0] - 1): state[i, :] = state[i + 1, :] state[-1, :] = obs def reshapeInputData(input_batch, no_batches): return input_batch.reshape((no_batches, historyLength * opt.pob_siz * opt.cub_siz * opt.pob_siz * opt.cub_siz))
from utils import Options from simulator import Simulator from transitionTable import TransitionTable #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # NOTE: # this script assumes you did generate your data with the get_data.py script # you are of course allowed to change it and generate data here but if you # want this to work out of the box first run get_data.py #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # 0. initialization opt = Options() sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, opt.valid_size, opt.states_fil, opt.labels_fil) # 1. train ###################################### # TODO implement your training here! # you can get the full data from the transition table like this: # # # both train_data and valid_data contain tupes of images and labels # train_data = trans.get_train() # valid_data = trans.get_valid() # # alternatively you can get one random mini batch line this # # for i in range(number_of_batches): # x, y = trans.sample_minibatch()
def start_eval(self, speicherort, display): # 0. initialization self.opt = Options() sim = SimulatorDeterministicStart(self.opt.map_ind, self.opt.cub_siz, self.opt.pob_siz, self.opt.act_num) imported_meta = tf.train.import_meta_graph(speicherort) win_all = None win_pob = None def_graph = tf.get_default_graph() with tf.Session() as sess: with tf.variable_scope("new_testing_scope", reuse=tf.AUTO_REUSE): x = sess.graph.get_tensor_by_name('x:0') Q = tf.get_collection("Q")[0] imported_meta.restore(sess, tf.train.latest_checkpoint('./weights/')) maxlen = 100000 # initialize the environment state = sim.newGame(self.opt.tgt_y, self.opt.tgt_x, 0) state_with_history = np.zeros( (self.opt.hist_len, self.opt.state_siz)) self.append_to_hist( state_with_history, rgb2gray(state.pob).reshape(self.opt.state_siz)) next_state_with_history = np.copy(state_with_history) trans = TransitionTable(self.opt.state_siz, self.opt.act_num, self.opt.hist_len, self.opt.minibatch_size, maxlen) epi_step = 0 episodes = 0 solved_epoisodes = 0 step_sum = 0 # train for <steps> steps while True: # goal check if state.terminal or epi_step >= self.opt.early_stop: if state.terminal: solved_epoisodes += 1 episodes += 1 step_sum = step_sum + epi_step epi_step = 0 # reset the game try: state = sim.newGame(self.opt.tgt_y, self.opt.tgt_x, episodes) except: return (step_sum, solved_epoisodes) # and reset the history state_with_history[:] = 0 self.append_to_hist( state_with_history, rgb2gray(state.pob).reshape(self.opt.state_siz)) next_state_with_history = np.copy(state_with_history) if display: if win_all is None: plt.subplot(121) win_all = plt.imshow(state.screen) plt.subplot(122) win_pob = plt.imshow(state.pob) else: win_all.set_data(state.screen) win_pob.set_data(state.pob) plt.pause(self.opt.disp_interval) plt.draw() epi_step += 1 # format state for network input input_reshaped = self.reshapeInputData( state_with_history, 1) # create batch of input state input_batched = np.tile(input_reshaped, (self.opt.minibatch_size, 1, 1, 1)) ### take one action per step qvalues = sess.run(Q, feed_dict={ x: input_batched })[0] # take the first batch entry action = np.argmax(qvalues) action_onehot = trans.one_hot_action(action) # apply action next_state = sim.step(action) # append to history self.append_to_hist( next_state_with_history, rgb2gray(next_state.pob).reshape(self.opt.state_siz)) # add to the transition table trans.add(state_with_history.reshape(-1), action_onehot, next_state_with_history.reshape(-1), next_state.reward, next_state.terminal) # mark next state as current state state_with_history = np.copy(next_state_with_history) state = next_state if display: if win_all is None: plt.subplot(121) win_all = plt.imshow(state.screen) plt.subplot(122) win_pob = plt.imshow(state.pob) else: win_all.set_data(state.screen) win_pob.set_data(state.pob) plt.pause(self.opt.disp_interval) plt.draw()
def train_model(opt=Options(), save_mdl_name='my_model.h5', epochs=10): sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, opt.valid_size, opt.states_fil, opt.labels_fil) # 1. train [train_states, train_labels] = trans.get_train() [valid_states, valid_labels] = trans.get_valid() print("train data shape {}", train_states.shape) print("train data shape {}", train_labels.shape) print("valid data shape {}", valid_states.shape) print("valid data shape {}", valid_labels.shape) train_shaped = train_states.reshape(train_states.shape[0], opt.cub_siz * opt.pob_siz, opt.cub_siz * opt.pob_siz, opt.hist_len) valid_shaped = valid_states.reshape(valid_states.shape[0], opt.cub_siz * opt.pob_siz, opt.cub_siz * opt.pob_siz, opt.hist_len) #train_shaped = tf.reshape(train_states, [-1,25, 25, 4]) train_shaped = train_shaped.astype('float32') valid_shaped = valid_shaped.astype('float32') num_classes = 5 input_shape = (opt.cub_siz * opt.pob_siz, opt.cub_siz * opt.pob_siz, opt.hist_len) # print(train_shaped.shape) class AccuracyHistory(keras.callbacks.Callback): def on_train_begin(self, logs={}): self.acc = [] def on_epoch_end(self, batch, logs={}): self.acc.append(logs.get('acc')) history = AccuracyHistory() model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), strides=(2, 2), activation='relu', input_shape=input_shape)) #model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(Conv2D(128, (3, 3), activation='relu')) model.add(Conv2D(128, (3, 3), activation='relu')) #model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(1000, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) #keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) #model.compile(loss=keras.losses.categorical_crossentropy, # optimizer=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0), # metrics=['accuracy']) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.SGD(lr=0.001), metrics=['accuracy']) model.fit(train_shaped, train_labels, batch_size=trans.minibatch_size, epochs=epochs, verbose=1, validation_data=(valid_shaped, valid_labels), callbacks=[history]) # 2. save your trained model model.save(save_mdl_name)
def play(args): # 0. initialization sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) model = model_create() #continue training from a previous model ##model.load_weights(opt.weights_fil) # setup a transition table that is filled during training maxlen = opt.early_stop ##print('weights loaded to the model') trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, maxlen) if args.mode == "train": print "Training mode" if opt.disp_on: win_all = None win_pob = None epi_step = 0 nepisodes = 0 state = sim.newGame(opt.tgt_y, opt.tgt_x) state_with_history = np.zeros((opt.hist_len, opt.state_siz)) append_to_hist(state_with_history, rgb2gray(state.pob).reshape(opt.state_siz)) next_state_with_history = np.copy(state_with_history) loss = 0. reward_acc = 0. loss_list = [] reward_acc_list = [] epi_step_list = [] reward_acc_new = [] reward_acc_track = 0 start = timer() #Training for step in range(steps): if state.terminal or epi_step >= opt.early_stop: state_batch, action_batch = trans.sample_minibatch(epi_step) state_batch = state_batch.reshape(epi_step, img_rows, img_cols, opt.hist_len) reward_sample_weight = np.zeros( (epi_step, ), dtype=np.float32) + reward_acc loss = model.train_on_batch(state_batch, action_batch, sample_weight=reward_sample_weight) print('Episode %d, step %d, total reward %.5f, loss %.8f' % (nepisodes, epi_step, reward_acc, loss)) # keep track of these values epi_step_list.append(epi_step) reward_acc_list.append(reward_acc) ## loss_list.append(loss) epi_step = 0 nepisodes += 1 # reset the game state = sim.newGame(opt.tgt_y, opt.tgt_x) # and reset the history state_with_history[:] = 0 append_to_hist(state_with_history, rgb2gray(state.pob).reshape(opt.state_siz)) next_state_with_history = np.copy(state_with_history) reward_acc = 0 loss = 0 trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, maxlen) #Save the weights every now and then if (((step + 1) % 1000000) == 0): model.save_weights(opt.weights_fil, overwrite=True) print('Saved weights') with open(opt.network_fil, "w") as outfile: json.dump(model.to_json(), outfile) epi_step += 1 #sample an action from the policy network action = np.argmax( model.predict( (state_with_history).reshape(1, img_rows, img_cols, opt.hist_len))) #one hot encoding action_onehot = trans.one_hot_action(action) #Take next step in the environment according to the action selected next_state = sim.step(action) # append state to history append_to_hist(next_state_with_history, rgb2gray(next_state.pob).reshape(opt.state_siz)) #add to the transition table trans.add(state_with_history.reshape(-1), action_onehot) # mark next state as current state state_with_history = np.copy(next_state_with_history) state = next_state reward_acc += state.reward reward_acc_track += state.reward reward_acc_new.append(reward_acc_track) print "Total Steps:", step print('Episode %d, step %d, action %d, reward %.5f' % (nepisodes, epi_step, action, state.reward)) if opt.disp_on: if win_all is None: plt.subplot(121) win_all = plt.imshow(state.screen) plt.subplot(122) win_pob = plt.imshow(state.pob) else: win_all.set_data(state.screen) win_pob.set_data(state.pob) plt.pause(opt.disp_interval) plt.draw() end = timer() sec = int(end - start) hours = int(sec / 3600) rem = int(sec - (hours * 3600)) mins = rem / 60 rem = rem - (mins * 60) secs = rem print 'Training time:', hours, ':', mins, ':', secs with open('episode_steps', 'wb') as f: pickle.dump(epi_step_list, f) print 'saved episode steps' with open('accum_reward_episodes', 'wb') as f: pickle.dump(reward_acc_list, f) print 'saved accumulated reward for each episode' ## with open('loss','wb') as f: ## pickle.dump(loss_list,f) ## print 'saved losses' with open('accum_reward_steps', 'wb') as f: pickle.dump(reward_acc_new, f) print 'saved accumulated reward for all steps' #Save the weights model.save_weights(opt.weights_fil, overwrite=True) print('Saved weights') with open(opt.network_fil, "w") as outfile: json.dump(model.to_json(), outfile) ### run if args.mode == 'run': print "Running mode" model.load_weights(opt.weights_fil) print('weights loaded to the model') opt.disp_on = True win_all = None win_pob = None state = sim.newGame(opt.tgt_y, opt.tgt_x) state_with_history = np.zeros((opt.hist_len, opt.state_siz)) append_to_hist(state_with_history, rgb2gray(state.pob).reshape(opt.state_siz)) next_state_with_history = np.copy(state_with_history) epi_step = 0 nepisodes = 0 n_reached = 0.0 reward_acc_test = 0 reward_acc_list_test = [] print('Test Phase') for test_step in range(test_steps): if state.terminal or epi_step > opt.early_stop: if (state.terminal): print 'Episode:', (nepisodes + 1), 'agent reached' n_reached += 1 else: print 'Episode:', (nepisodes + 1), 'agent failed' epi_step = 0 nepisodes += 1 # reset the game state = sim.newGame(opt.tgt_y, opt.tgt_x) # and reset the history state_with_history[:] = 0 append_to_hist(state_with_history, rgb2gray(state.pob).reshape(opt.state_siz)) next_state_with_history = np.copy(state_with_history) epi_step += 1 action = np.argmax( model.predict( (state_with_history).reshape(1, img_rows, img_cols, opt.hist_len))) action_onehot = trans.one_hot_action(action) #Take next step according to the action selected next_state = sim.step(action) # append state to history append_to_hist(next_state_with_history, rgb2gray(next_state.pob).reshape(opt.state_siz)) # mark next state as current state state_with_history = np.copy(next_state_with_history) state = next_state reward_acc_test += state.reward if opt.disp_on: if win_all is None: plt.subplot(121) win_all = plt.imshow(state.screen) plt.subplot(122) win_pob = plt.imshow(state.pob) else: win_all.set_data(state.screen) win_pob.set_data(state.pob) plt.pause(opt.disp_interval) plt.draw() print 'Agent reached the target', n_reached, 'from', nepisodes, 'episodes', '(', ( n_reached / nepisodes) * 100, '%)'
def test_model(opt = Options(),mdl_load_name='my_model.h5'): """validation to astar return [success_rate, astar_diff] """ # 0. initialization sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) print(opt.state_siz) trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, opt.valid_size, opt.states_fil, opt.labels_fil) state_length = (opt.cub_siz*opt.pob_siz) state_history = np.zeros((1,state_length,state_length,opt.hist_len)) #load traind mdl model = load_model(mdl_load_name) # 1. control loop if opt.disp_on: win_all = None win_pob = None epi_step = 0 # #steps in current episode nepisodes = 0 # total #episodes executed nepisodes_solved = 0 action = 0 # action to take given by the network # start a new game state = sim.newGame(opt.tgt_y, opt.tgt_x) astar_num_steps = get_astar_steps(copy.deepcopy(sim)) astar_num_steps_arr = [] agent_num_steps_arr = [] for step in range(opt.eval_steps): # check if episode ended if state.terminal or epi_step >= opt.early_stop: if state.terminal: nepisodes_solved += 1 print("astar_num_steps: {} agent steps: {} ".format(astar_num_steps,epi_step)) astar_num_steps_arr.append(astar_num_steps) agent_num_steps_arr.append(epi_step) nepisodes += 1 # start a new game state = sim.newGame(opt.tgt_y, opt.tgt_x) astar_num_steps = get_astar_steps(copy.deepcopy(sim)) epi_step = 0 else: # here you would let your agent take its action gray_state = rgb2gray(state.pob) gray_state = gray_state.reshape(1,opt.state_siz) trans.add_recent(step, gray_state) recent = trans.get_recent() recent_shaped = recent.reshape(1,state_length,state_length,opt.hist_len) action = np.argmax(model.predict(recent_shaped)) state = sim.step(action) epi_step += 1 if step % opt.prog_freq == 0: print("step {}".format(step)) if opt.disp_on: if win_all is None: plt.subplot(121) win_all = plt.imshow(state.screen) plt.subplot(122) win_pob = plt.imshow(state.pob) else: win_all.set_data(state.screen) win_pob.set_data(state.pob) plt.pause(opt.disp_interval) plt.draw() # 2. calculate statistics success_rate = float(nepisodes_solved) / float(nepisodes) print("this session was: {}".format(success_rate)) # 3. additional analysis agent_num_steps_arr=np.array(agent_num_steps_arr) astar_num_steps_arr=np.array(astar_num_steps_arr) astar_num_steps_arr[astar_num_steps_arr == None] = 0 #set to zero if start was on goal #only compute mead diff to astare where goal found print("sahpe form ",astar_num_steps_arr.shape) astar_num_steps_arr = astar_num_steps_arr[agent_num_steps_arr< opt.early_stop] print("sahpe to",astar_num_steps_arr.shape) #change after astar_num_steps_arr agent_num_steps_arr = agent_num_steps_arr[agent_num_steps_arr< opt.early_stop] astar_diff = np.mean(agent_num_steps_arr-astar_num_steps_arr) print("avg diff to astar: {}".format(astar_diff)) return [success_rate, astar_diff]
import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt from random import randrange from transitionTable import TransitionTable # custom modules from utils import Options, rgb2gray from simulator import Simulator from keras.models import Sequential from keras.models import model_from_json # 0. initialization opt = Options() sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, opt.valid_size, opt.states_fil, opt.labels_fil) # TODO: load your agent # Hint: If using standard tensorflow api it helps to write your own model.py # file with the network configuration, including a function model.load(). # You can use saver = tf.train.Saver() and saver.restore(sess, filename_cpkt) json_file = open('model.json','r') model = json_file.read() json_file.close() ml = model_from_json(model) ml.load_weights("model.h5") print("Model loaded") agent =None
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # NOTE: # In contrast to your last exercise you DO NOT generate data before training # instead the TransitionTable is build up while you are training to make sure # that you get some data that corresponds roughly to the current policy # of your agent #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # 0. initialization opt = Options() sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) # setup a large transitiontable that is filled during training maxlen = 100000 trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, maxlen) if opt.disp_on: win_all = None win_pob = None sess = tf.Session() x = tf.placeholder(tf.float32, shape=(None, opt.hist_len * opt.state_siz)) u = tf.placeholder(tf.float32, shape=(opt.minibatch_size, opt.act_num)) ustar = tf.placeholder(tf.float32, shape=(opt.minibatch_size, opt.act_num)) xn = tf.placeholder(tf.float32, shape=(None, opt.hist_len * opt.state_siz)) r = tf.placeholder(tf.float32, shape=(opt.minibatch_size, 1)) term = tf.placeholder(tf.float32, shape=(opt.minibatch_size, 1)) Q_s = cnn.get_network_for_input_raw(x)
import model # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # NOTE: # this script assumes you did generate your data with the get_data.py script # you are of course allowed to change it and generate data here but if you # want this to work out of the box first run get_data.py # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # 0. initialization opt = Options() sim = Simulator(opt.map_index, opt.cube_size, opt.partial_obs_size, opt.act_num) trans = TransitionTable(opt.state_size, opt.act_num, opt.history_length, opt.minibatch_size, opt.valid_size, opt.states_file, opt.labels_file) batch_size = 32 n_epochs = 10 if opt.num_models is None: default_weights_file = opt.model_folder + "model0_" + opt.weights_file default_network_file = opt.model_folder + "model0_" + opt.network_file # 1. train ###################################### # TODO implement your training here! # you can get the full data from the transition table like this: # # # both train_data and valid_data contain tupes of images and labels # train_data = trans.get_train()
import numpy as np import matplotlib.pyplot as plt from random import randrange # custom modules from utils import Options, rgb2gray from simulator import Simulator from transitionTable import TransitionTable from keras.models import model_from_json # 0. initialization opt = Options() sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) # FIXME Check if needed trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, opt.valid_size, opt.states_fil, opt.labels_fil) # TODO: load your agent agent = model_from_json(open(opt.network_fil, 'r').read()) agent.load_weights(opt.weights_fil) print('Loaded model from disk') # 1. control loop if opt.disp_on: win_all = None win_pob = None epi_step = 0 # #steps in current episode nepisodes = 0 # total #episodes executed nepisodes_solved = 0 action = 0 # action to take given by the network
import keras from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D from keras.optimizers import SGD # custom modules from utils import Options from simulator import Simulator from transitionTable import TransitionTable # 0. initialization opt = Options() sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, opt.valid_size, opt.states_fil, opt.labels_fil) # 1. train # both train_data and valid_data contain tupes of images and labels # labels are one hot encoded train_data, train_labels = trans.get_train() valid_data, valid_labels = trans.get_valid() #reshape data to represent the history and get the screen version train_data = train_data.reshape(train_data.shape[0], 25, 25, 4) valid_data = valid_data.reshape(valid_data.shape[0], 25, 25, 4) #Activation function activation='relu'
figurewidth="\\matplotlibTotikzfigurewidth", figureheight="\\matplotlibTotikzfigureheight", strict=False) N_EPISODES_TOTAL_TRAIN = 700 #number of total trainign game episodes SAVE_AFTER_N_EPISODES = 50 DISP_PROGRESS_AFTER_N_EPISODES = 5 # show a full episode every n episodes for opt.disp_on is true FULL_RANDOM_EPISODES = 5 #two full random episodes before training # 0. initialization opt = Options() sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) # setup a large transitiontable that is filled during training maxlen = 100000 trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, maxlen) if opt.disp_on: win_all = None win_pob = None #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # You should prepare your network training here. I suggest to put this into a input_shape_dense = int(opt.cub_siz * opt.pob_siz * opt.cub_siz * opt.pob_siz * opt.hist_len) input_shape_conv = (opt.cub_siz * opt.pob_siz, opt.cub_siz * opt.pob_siz, opt.hist_len) use_conv = True agent = QMazeAgent(input_shape_conv, opt.act_num, use_conv=use_conv)
# custom modules from utils import Options, rgb2gray from simulator import Simulator from transitionTable import TransitionTable from keras.models import model_from_json # 0. initialization opt = Options() sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num, testing=True) # FIXME Check if needed trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, opt.valid_size, opt.states_fil, opt.labels_fil, opt.targets_fil) # TODO: load your agent agent = model_from_json(open(opt.network_fil, 'r').read()) agent.load_weights(opt.weights_fil) print('Loaded model from disk') # 1. control loop if opt.disp_on: win_all = None win_pob = None epi_step = 0 # #steps in current episode nepisodes = 0 # total #episodes executed nepisodes_solved = 0 nepisodes_end_score = 0