Ejemplo n.º 1
0
def main(unused_argv):
	opt = Options()
	sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
	trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
	                             opt.minibatch_size, opt.valid_size,
	                             opt.states_fil, opt.labels_fil)

	# 1. train
	######################################
	# TODO implement your training here!
	# you can get the full data from the transition table like this:
	#
	# # both train_data and valid_data contain tupes of images and labels
	train_data = trans.get_train()
	valid_data = trans.get_valid()

	samples_train_data = np.float32(train_data[0])
	labels_train_data = np.float32(train_data[1])
	unhotted_labels_train_data = unhot(labels_train_data)

	samples_valid_data = np.float32(valid_data[0])
	labels_valid_data = np.float32(valid_data[1])
	unhotted_labels_valid_data = unhot(labels_valid_data)

	print("Shape of samples_train_data {}".format(samples_train_data.shape))
	print("Shape of labels_train_data {}".format(labels_train_data.shape))
	print("Shape of unhotted_labels_train_data {}".format(unhotted_labels_train_data.shape))

	classifier = cnn.get_estimator()

	# Train the model
	train_input_fn = tf.estimator.inputs.numpy_input_fn(
	    x={"x": samples_train_data},
	    y=unhotted_labels_train_data,
	    batch_size=100,
	    num_epochs=None,
	    shuffle=True)

	classifier.train(
	    input_fn=train_input_fn,
	    steps=1000
	)

	eval_input_fn = tf.estimator.inputs.numpy_input_fn(
		x={"x": samples_valid_data},
		y=unhotted_labels_valid_data,
		num_epochs=1,
		shuffle=False
	)

	eval_results = classifier.evaluate(input_fn=eval_input_fn)
	print(eval_results)
Ejemplo n.º 2
0
# Input layer
image_dimension = opt.cub_siz * opt.pob_siz
img_rows = img_cols = image_dimension
input_shape = [img_rows, img_cols, opt.hist_len]

# --------------------------------------------------------
# Model
agent = model.model(input_shape)
agent.load_weights(model_path)
if not agent:
    exit(2)

# --------------------------------------------------------
# setup a large transitiontable that is filled during training
maxlen = 100000
trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
                        opt.minibatch_size, maxlen)

# --------------------------------------------------------
# Display
if opt.disp_on:
    win_all = None
    win_pob = None

# --------------------------------------------------------
# Config
accumulated_reward = 0
accumulated_reward_list = []
n_completed_episodes = 0
nepisodes = 0
epi_step = 0
Ejemplo n.º 3
0
from utils     import Options
from simulator import Simulator
from transitionTable import TransitionTable

#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# NOTE:
# this script assumes you did generate your data with the get_data.py script
# you are of course allowed to change it and generate data here but if you
# want this to work out of the box first run get_data.py
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

# 0. initialization
opt = Options()
sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
                             opt.minibatch_size, opt.valid_size,
                             opt.states_fil, opt.labels_fil)

# 1. train
######################################
# TODO implement your training here!
# you can get the full data from the transition table like this:
#
# # both train_data and valid_data contain tupes of images and labels
# train_data = trans.get_train()
# valid_data = trans.get_valid()
# 
# alternatively you can get one random mini batch line this
#
# for i in range(number_of_batches):
#     x, y = trans.sample_minibatch()
Ejemplo n.º 4
0
# custom modules
from utils import Options, rgb2gray
from simulator import Simulator
from transitionTable import TransitionTable

display_on = True

win_all = None
win_pob = None

opt = Options()
sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
state_with_history = np.zeros((opt.hist_len, opt.state_siz))
maxlen = 100000
trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
                        opt.minibatch_size, maxlen)


def append_to_hist(state, obs):
    """
    Add observation to the state.
    """
    for i in range(state.shape[0] - 1):
        state[i, :] = state[i + 1, :]
    state[-1, :] = obs


def reshapeInputData(input_batch, no_batches):
    return input_batch.reshape((no_batches, historyLength * opt.pob_siz *
                                opt.cub_siz * opt.pob_siz * opt.cub_siz))
Ejemplo n.º 5
0
from utils import Options
from simulator import Simulator
from transitionTable import TransitionTable

#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# NOTE:
# this script assumes you did generate your data with the get_data.py script
# you are of course allowed to change it and generate data here but if you
# want this to work out of the box first run get_data.py
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

# 0. initialization
opt = Options()
sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
                        opt.minibatch_size, opt.valid_size, opt.states_fil,
                        opt.labels_fil)

# 1. train
######################################
# TODO implement your training here!
# you can get the full data from the transition table like this:
#
# # both train_data and valid_data contain tupes of images and labels
# train_data = trans.get_train()
# valid_data = trans.get_valid()
#
# alternatively you can get one random mini batch line this
#
# for i in range(number_of_batches):
#     x, y = trans.sample_minibatch()
Ejemplo n.º 6
0
    def start_eval(self, speicherort, display):
        # 0. initialization
        self.opt = Options()
        sim = SimulatorDeterministicStart(self.opt.map_ind, self.opt.cub_siz,
                                          self.opt.pob_siz, self.opt.act_num)
        imported_meta = tf.train.import_meta_graph(speicherort)

        win_all = None
        win_pob = None

        def_graph = tf.get_default_graph()

        with tf.Session() as sess:
            with tf.variable_scope("new_testing_scope", reuse=tf.AUTO_REUSE):

                x = sess.graph.get_tensor_by_name('x:0')
                Q = tf.get_collection("Q")[0]

                imported_meta.restore(sess,
                                      tf.train.latest_checkpoint('./weights/'))

                maxlen = 100000

                # initialize the environment
                state = sim.newGame(self.opt.tgt_y, self.opt.tgt_x, 0)
                state_with_history = np.zeros(
                    (self.opt.hist_len, self.opt.state_siz))
                self.append_to_hist(
                    state_with_history,
                    rgb2gray(state.pob).reshape(self.opt.state_siz))
                next_state_with_history = np.copy(state_with_history)
                trans = TransitionTable(self.opt.state_siz, self.opt.act_num,
                                        self.opt.hist_len,
                                        self.opt.minibatch_size, maxlen)
                epi_step = 0

                episodes = 0

                solved_epoisodes = 0

                step_sum = 0
                # train for <steps> steps
                while True:

                    # goal check
                    if state.terminal or epi_step >= self.opt.early_stop:
                        if state.terminal:
                            solved_epoisodes += 1
                        episodes += 1
                        step_sum = step_sum + epi_step
                        epi_step = 0

                        # reset the game
                        try:
                            state = sim.newGame(self.opt.tgt_y, self.opt.tgt_x,
                                                episodes)
                        except:
                            return (step_sum, solved_epoisodes)

                        # and reset the history
                        state_with_history[:] = 0
                        self.append_to_hist(
                            state_with_history,
                            rgb2gray(state.pob).reshape(self.opt.state_siz))
                        next_state_with_history = np.copy(state_with_history)

                        if display:
                            if win_all is None:
                                plt.subplot(121)
                                win_all = plt.imshow(state.screen)
                                plt.subplot(122)
                                win_pob = plt.imshow(state.pob)
                            else:
                                win_all.set_data(state.screen)
                                win_pob.set_data(state.pob)
                            plt.pause(self.opt.disp_interval)
                            plt.draw()

                    epi_step += 1

                    # format state for network input
                    input_reshaped = self.reshapeInputData(
                        state_with_history, 1)
                    # create batch of input state
                    input_batched = np.tile(input_reshaped,
                                            (self.opt.minibatch_size, 1, 1, 1))

                    ### take one action per step
                    qvalues = sess.run(Q, feed_dict={
                        x: input_batched
                    })[0]  # take the first batch entry
                    action = np.argmax(qvalues)
                    action_onehot = trans.one_hot_action(action)
                    # apply action
                    next_state = sim.step(action)
                    # append to history
                    self.append_to_hist(
                        next_state_with_history,
                        rgb2gray(next_state.pob).reshape(self.opt.state_siz))
                    # add to the transition table
                    trans.add(state_with_history.reshape(-1), action_onehot,
                              next_state_with_history.reshape(-1),
                              next_state.reward, next_state.terminal)
                    # mark next state as current state
                    state_with_history = np.copy(next_state_with_history)
                    state = next_state

                    if display:
                        if win_all is None:
                            plt.subplot(121)
                            win_all = plt.imshow(state.screen)
                            plt.subplot(122)
                            win_pob = plt.imshow(state.pob)
                        else:
                            win_all.set_data(state.screen)
                            win_pob.set_data(state.pob)
                        plt.pause(self.opt.disp_interval)
                        plt.draw()
Ejemplo n.º 7
0
def train_model(opt=Options(), save_mdl_name='my_model.h5', epochs=10):
    sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
    trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
                            opt.minibatch_size, opt.valid_size, opt.states_fil,
                            opt.labels_fil)

    # 1. train
    [train_states, train_labels] = trans.get_train()
    [valid_states, valid_labels] = trans.get_valid()
    print("train data shape {}", train_states.shape)
    print("train data shape {}", train_labels.shape)

    print("valid data shape {}", valid_states.shape)
    print("valid data shape {}", valid_labels.shape)

    train_shaped = train_states.reshape(train_states.shape[0],
                                        opt.cub_siz * opt.pob_siz,
                                        opt.cub_siz * opt.pob_siz,
                                        opt.hist_len)
    valid_shaped = valid_states.reshape(valid_states.shape[0],
                                        opt.cub_siz * opt.pob_siz,
                                        opt.cub_siz * opt.pob_siz,
                                        opt.hist_len)

    #train_shaped = tf.reshape(train_states, [-1,25, 25, 4])
    train_shaped = train_shaped.astype('float32')
    valid_shaped = valid_shaped.astype('float32')
    num_classes = 5

    input_shape = (opt.cub_siz * opt.pob_siz, opt.cub_siz * opt.pob_siz,
                   opt.hist_len)

    # print(train_shaped.shape)

    class AccuracyHistory(keras.callbacks.Callback):
        def on_train_begin(self, logs={}):
            self.acc = []

        def on_epoch_end(self, batch, logs={}):
            self.acc.append(logs.get('acc'))

    history = AccuracyHistory()

    model = Sequential()
    model.add(
        Conv2D(32,
               kernel_size=(3, 3),
               strides=(2, 2),
               activation='relu',
               input_shape=input_shape))
    #model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    #model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(1000, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    #keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    #model.compile(loss=keras.losses.categorical_crossentropy,
    #              optimizer=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0),
    #              metrics=['accuracy'])
    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.SGD(lr=0.001),
                  metrics=['accuracy'])

    model.fit(train_shaped,
              train_labels,
              batch_size=trans.minibatch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(valid_shaped, valid_labels),
              callbacks=[history])

    # 2. save your trained model
    model.save(save_mdl_name)
def play(args):
    # 0. initialization
    sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
    model = model_create()

    #continue training from a previous model
    ##model.load_weights(opt.weights_fil)

    # setup a transition table that is filled during training
    maxlen = opt.early_stop
    ##print('weights loaded to the model')
    trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, maxlen)
    if args.mode == "train":
        print "Training mode"

        if opt.disp_on:
            win_all = None
            win_pob = None

        epi_step = 0
        nepisodes = 0

        state = sim.newGame(opt.tgt_y, opt.tgt_x)
        state_with_history = np.zeros((opt.hist_len, opt.state_siz))
        append_to_hist(state_with_history,
                       rgb2gray(state.pob).reshape(opt.state_siz))
        next_state_with_history = np.copy(state_with_history)
        loss = 0.
        reward_acc = 0.

        loss_list = []
        reward_acc_list = []
        epi_step_list = []
        reward_acc_new = []
        reward_acc_track = 0
        start = timer()
        #Training
        for step in range(steps):

            if state.terminal or epi_step >= opt.early_stop:
                state_batch, action_batch = trans.sample_minibatch(epi_step)
                state_batch = state_batch.reshape(epi_step, img_rows, img_cols,
                                                  opt.hist_len)
                reward_sample_weight = np.zeros(
                    (epi_step, ), dtype=np.float32) + reward_acc
                loss = model.train_on_batch(state_batch,
                                            action_batch,
                                            sample_weight=reward_sample_weight)
                print('Episode %d, step %d, total reward %.5f, loss %.8f' %
                      (nepisodes, epi_step, reward_acc, loss))

                # keep track of these values
                epi_step_list.append(epi_step)
                reward_acc_list.append(reward_acc)
                ##                loss_list.append(loss)

                epi_step = 0
                nepisodes += 1
                # reset the game
                state = sim.newGame(opt.tgt_y, opt.tgt_x)
                # and reset the history
                state_with_history[:] = 0
                append_to_hist(state_with_history,
                               rgb2gray(state.pob).reshape(opt.state_siz))
                next_state_with_history = np.copy(state_with_history)
                reward_acc = 0
                loss = 0
                trans = TransitionTable(opt.state_siz, opt.act_num,
                                        opt.hist_len, maxlen)

            #Save the weights every now and then
            if (((step + 1) % 1000000) == 0):
                model.save_weights(opt.weights_fil, overwrite=True)
                print('Saved weights')
                with open(opt.network_fil, "w") as outfile:
                    json.dump(model.to_json(), outfile)

            epi_step += 1
            #sample an action from the policy network
            action = np.argmax(
                model.predict(
                    (state_with_history).reshape(1, img_rows, img_cols,
                                                 opt.hist_len)))

            #one hot encoding
            action_onehot = trans.one_hot_action(action)

            #Take next step in the environment according to the action selected
            next_state = sim.step(action)

            # append state to history
            append_to_hist(next_state_with_history,
                           rgb2gray(next_state.pob).reshape(opt.state_siz))

            #add to the transition table
            trans.add(state_with_history.reshape(-1), action_onehot)
            # mark next state as current state
            state_with_history = np.copy(next_state_with_history)
            state = next_state
            reward_acc += state.reward
            reward_acc_track += state.reward
            reward_acc_new.append(reward_acc_track)
            print "Total Steps:", step
            print('Episode %d, step %d, action %d, reward %.5f' %
                  (nepisodes, epi_step, action, state.reward))

            if opt.disp_on:
                if win_all is None:
                    plt.subplot(121)
                    win_all = plt.imshow(state.screen)
                    plt.subplot(122)
                    win_pob = plt.imshow(state.pob)
                else:
                    win_all.set_data(state.screen)
                    win_pob.set_data(state.pob)
                plt.pause(opt.disp_interval)
                plt.draw()

        end = timer()
        sec = int(end - start)
        hours = int(sec / 3600)
        rem = int(sec - (hours * 3600))
        mins = rem / 60
        rem = rem - (mins * 60)
        secs = rem

        print 'Training time:', hours, ':', mins, ':', secs

        with open('episode_steps', 'wb') as f:
            pickle.dump(epi_step_list, f)
            print 'saved episode steps'

        with open('accum_reward_episodes', 'wb') as f:
            pickle.dump(reward_acc_list, f)
            print 'saved accumulated reward for each episode'


##        with open('loss','wb') as f:
##            pickle.dump(loss_list,f)
##            print 'saved losses'

        with open('accum_reward_steps', 'wb') as f:
            pickle.dump(reward_acc_new, f)
            print 'saved accumulated reward for all steps'

        #Save the weights
        model.save_weights(opt.weights_fil, overwrite=True)
        print('Saved weights')
        with open(opt.network_fil, "w") as outfile:
            json.dump(model.to_json(), outfile)

    ### run
    if args.mode == 'run':

        print "Running mode"
        model.load_weights(opt.weights_fil)
        print('weights loaded to the model')
        opt.disp_on = True
        win_all = None
        win_pob = None
        state = sim.newGame(opt.tgt_y, opt.tgt_x)
        state_with_history = np.zeros((opt.hist_len, opt.state_siz))
        append_to_hist(state_with_history,
                       rgb2gray(state.pob).reshape(opt.state_siz))
        next_state_with_history = np.copy(state_with_history)
        epi_step = 0
        nepisodes = 0
        n_reached = 0.0
        reward_acc_test = 0
        reward_acc_list_test = []

        print('Test Phase')
        for test_step in range(test_steps):

            if state.terminal or epi_step > opt.early_stop:
                if (state.terminal):
                    print 'Episode:', (nepisodes + 1), 'agent reached'
                    n_reached += 1
                else:
                    print 'Episode:', (nepisodes + 1), 'agent failed'
                epi_step = 0
                nepisodes += 1
                # reset the game
                state = sim.newGame(opt.tgt_y, opt.tgt_x)
                # and reset the history
                state_with_history[:] = 0
                append_to_hist(state_with_history,
                               rgb2gray(state.pob).reshape(opt.state_siz))
                next_state_with_history = np.copy(state_with_history)

            epi_step += 1
            action = np.argmax(
                model.predict(
                    (state_with_history).reshape(1, img_rows, img_cols,
                                                 opt.hist_len)))
            action_onehot = trans.one_hot_action(action)
            #Take next step according to the action selected
            next_state = sim.step(action)
            # append state to history
            append_to_hist(next_state_with_history,
                           rgb2gray(next_state.pob).reshape(opt.state_siz))

            # mark next state as current state
            state_with_history = np.copy(next_state_with_history)
            state = next_state
            reward_acc_test += state.reward

            if opt.disp_on:
                if win_all is None:
                    plt.subplot(121)
                    win_all = plt.imshow(state.screen)
                    plt.subplot(122)
                    win_pob = plt.imshow(state.pob)
                else:
                    win_all.set_data(state.screen)
                    win_pob.set_data(state.pob)
                plt.pause(opt.disp_interval)
                plt.draw()
        print 'Agent reached the target', n_reached, 'from', nepisodes, 'episodes', '(', (
            n_reached / nepisodes) * 100, '%)'
Ejemplo n.º 9
0
def test_model(opt = Options(),mdl_load_name='my_model.h5'):
    """validation to astar
        return [success_rate, astar_diff]
    """
    # 0. initialization

    sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
    print(opt.state_siz)
    trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
                                 opt.minibatch_size, opt.valid_size,
                                 opt.states_fil, opt.labels_fil)
    state_length = (opt.cub_siz*opt.pob_siz)
    state_history = np.zeros((1,state_length,state_length,opt.hist_len))
    #load traind mdl
    model = load_model(mdl_load_name)

    # 1. control loop
    if opt.disp_on:
        win_all = None
        win_pob = None
    epi_step = 0    # #steps in current episode
    nepisodes = 0   # total #episodes executed
    nepisodes_solved = 0
    action = 0     # action to take given by the network

    # start a new game
    state = sim.newGame(opt.tgt_y, opt.tgt_x)
    astar_num_steps = get_astar_steps(copy.deepcopy(sim))

    astar_num_steps_arr = []
    agent_num_steps_arr = []

    for step in range(opt.eval_steps):

        # check if episode ended
        if state.terminal or epi_step >= opt.early_stop:
            if state.terminal:
                nepisodes_solved += 1
            print("astar_num_steps: {} agent steps: {} ".format(astar_num_steps,epi_step))
            astar_num_steps_arr.append(astar_num_steps)
            agent_num_steps_arr.append(epi_step)
            nepisodes += 1
            # start a new game
            state = sim.newGame(opt.tgt_y, opt.tgt_x)
            astar_num_steps = get_astar_steps(copy.deepcopy(sim))

            epi_step = 0
        else:
            #   here you would let your agent take its action
            gray_state = rgb2gray(state.pob)
            gray_state = gray_state.reshape(1,opt.state_siz)
            trans.add_recent(step, gray_state)
            recent = trans.get_recent()
            recent_shaped = recent.reshape(1,state_length,state_length,opt.hist_len)
            action = np.argmax(model.predict(recent_shaped))
            state = sim.step(action)

            epi_step += 1

        if step % opt.prog_freq == 0:
            print("step {}".format(step))

        if opt.disp_on:
            if win_all is None:
                plt.subplot(121)
                win_all = plt.imshow(state.screen)
                plt.subplot(122)
                win_pob = plt.imshow(state.pob)
            else:
                win_all.set_data(state.screen)
                win_pob.set_data(state.pob)
            plt.pause(opt.disp_interval)
            plt.draw()

    # 2. calculate statistics
    success_rate = float(nepisodes_solved) / float(nepisodes)
    print("this session was: {}".format(success_rate))
    # 3. additional analysis

    agent_num_steps_arr=np.array(agent_num_steps_arr)
    astar_num_steps_arr=np.array(astar_num_steps_arr)
    astar_num_steps_arr[astar_num_steps_arr == None] = 0 #set to zero if start was on goal
    #only compute mead diff to astare where goal found

    print("sahpe form ",astar_num_steps_arr.shape)
    astar_num_steps_arr = astar_num_steps_arr[agent_num_steps_arr< opt.early_stop]
    print("sahpe to",astar_num_steps_arr.shape)
    #change after astar_num_steps_arr
    agent_num_steps_arr = agent_num_steps_arr[agent_num_steps_arr< opt.early_stop]
    astar_diff = np.mean(agent_num_steps_arr-astar_num_steps_arr)
    print("avg diff to astar: {}".format(astar_diff))
    return [success_rate, astar_diff]
Ejemplo n.º 10
0
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from random import randrange
from transitionTable import TransitionTable
# custom modules
from utils     import Options, rgb2gray
from simulator import Simulator
from keras.models import Sequential
from keras.models import model_from_json

# 0. initialization
opt = Options()
sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
                             opt.minibatch_size, opt.valid_size,
                             opt.states_fil, opt.labels_fil)

# TODO: load your agent
# Hint: If using standard tensorflow api it helps to write your own model.py  
# file with the network configuration, including a function model.load().
# You can use saver = tf.train.Saver() and saver.restore(sess, filename_cpkt)
json_file = open('model.json','r')
model = json_file.read()
json_file.close()
ml = model_from_json(model)
ml.load_weights("model.h5")
print("Model loaded")

agent =None
Ejemplo n.º 11
0

#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# NOTE:
# In contrast to your last exercise you DO NOT generate data before training
# instead the TransitionTable is build up while you are training to make sure
# that you get some data that corresponds roughly to the current policy
# of your agent
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

# 0. initialization
opt = Options()
sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
# setup a large transitiontable that is filled during training
maxlen = 100000
trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
                        opt.minibatch_size, maxlen)

if opt.disp_on:
    win_all = None
    win_pob = None

sess = tf.Session()

x = tf.placeholder(tf.float32, shape=(None, opt.hist_len * opt.state_siz))
u = tf.placeholder(tf.float32, shape=(opt.minibatch_size, opt.act_num))
ustar = tf.placeholder(tf.float32, shape=(opt.minibatch_size, opt.act_num))
xn = tf.placeholder(tf.float32, shape=(None, opt.hist_len * opt.state_siz))
r = tf.placeholder(tf.float32, shape=(opt.minibatch_size, 1))
term = tf.placeholder(tf.float32, shape=(opt.minibatch_size, 1))

Q_s = cnn.get_network_for_input_raw(x)
Ejemplo n.º 12
0
import model

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# NOTE:
# this script assumes you did generate your data with the get_data.py script
# you are of course allowed to change it and generate data here but if you
# want this to work out of the box first run get_data.py
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

# 0. initialization
opt = Options()

sim = Simulator(opt.map_index, opt.cube_size, opt.partial_obs_size,
                opt.act_num)
trans = TransitionTable(opt.state_size, opt.act_num, opt.history_length,
                        opt.minibatch_size, opt.valid_size, opt.states_file,
                        opt.labels_file)

batch_size = 32
n_epochs = 10
if opt.num_models is None:
    default_weights_file = opt.model_folder + "model0_" + opt.weights_file
    default_network_file = opt.model_folder + "model0_" + opt.network_file

# 1. train
######################################
# TODO implement your training here!
# you can get the full data from the transition table like this:
#
# # both train_data and valid_data contain tupes of images and labels
# train_data = trans.get_train()
Ejemplo n.º 13
0
import numpy as np
import matplotlib.pyplot as plt
from random import randrange
# custom modules
from utils import Options, rgb2gray
from simulator import Simulator
from transitionTable import TransitionTable
from keras.models import model_from_json

# 0. initialization
opt = Options()
sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
# FIXME Check if needed
trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
                        opt.minibatch_size, opt.valid_size, opt.states_fil,
                        opt.labels_fil)

# TODO: load your agent
agent = model_from_json(open(opt.network_fil, 'r').read())
agent.load_weights(opt.weights_fil)
print('Loaded model from disk')

# 1. control loop
if opt.disp_on:
    win_all = None
    win_pob = None
epi_step = 0  # #steps in current episode
nepisodes = 0  # total #episodes executed
nepisodes_solved = 0
action = 0  # action to take given by the network
Ejemplo n.º 14
0
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D
from keras.optimizers import SGD

# custom modules
from utils     import Options
from simulator import Simulator
from transitionTable import TransitionTable

# 0. initialization
opt = Options()
sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
                             opt.minibatch_size, opt.valid_size,
                             opt.states_fil, opt.labels_fil)

# 1. train
# both train_data and valid_data contain tupes of images and labels
# labels are one hot encoded
train_data, train_labels = trans.get_train()
valid_data, valid_labels = trans.get_valid()

#reshape data to represent the history and get the screen version
train_data = train_data.reshape(train_data.shape[0], 25, 25, 4)
valid_data = valid_data.reshape(valid_data.shape[0], 25, 25, 4)

#Activation function
activation='relu'
Ejemplo n.º 15
0
                  figurewidth="\\matplotlibTotikzfigurewidth",
                  figureheight="\\matplotlibTotikzfigureheight",
                  strict=False)


N_EPISODES_TOTAL_TRAIN = 700  #number of total trainign game episodes
SAVE_AFTER_N_EPISODES = 50
DISP_PROGRESS_AFTER_N_EPISODES = 5  # show a full episode every n episodes for opt.disp_on is true
FULL_RANDOM_EPISODES = 5  #two full random episodes before training

# 0. initialization
opt = Options()
sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
# setup a large transitiontable that is filled during training
maxlen = 100000
trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
                        opt.minibatch_size, maxlen)

if opt.disp_on:
    win_all = None
    win_pob = None

#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

# You should prepare your network training here. I suggest to put this into a
input_shape_dense = int(opt.cub_siz * opt.pob_siz * opt.cub_siz * opt.pob_siz *
                        opt.hist_len)
input_shape_conv = (opt.cub_siz * opt.pob_siz, opt.cub_siz * opt.pob_siz,
                    opt.hist_len)

use_conv = True
agent = QMazeAgent(input_shape_conv, opt.act_num, use_conv=use_conv)
Ejemplo n.º 16
0
# custom modules
from utils import Options, rgb2gray
from simulator import Simulator
from transitionTable import TransitionTable
from keras.models import model_from_json

# 0. initialization
opt = Options()
sim = Simulator(opt.map_ind,
                opt.cub_siz,
                opt.pob_siz,
                opt.act_num,
                testing=True)
# FIXME Check if needed
trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
                        opt.minibatch_size, opt.valid_size, opt.states_fil,
                        opt.labels_fil, opt.targets_fil)

# TODO: load your agent
agent = model_from_json(open(opt.network_fil, 'r').read())
agent.load_weights(opt.weights_fil)
print('Loaded model from disk')

# 1. control loop
if opt.disp_on:
    win_all = None
    win_pob = None
epi_step = 0  # #steps in current episode
nepisodes = 0  # total #episodes executed
nepisodes_solved = 0
nepisodes_end_score = 0