################## saver = tf.train.Saver() init = tf.initialize_all_variables() summary = tf.merge_all_summaries() logger = tf.train.SummaryWriter(OUT_DIR, sess.graph) # initialize mdp state structure mdp = MDP_state(STATE_SIZE, FRAMES) # initialize replay buffer R = ReplayBuffer(MDP_STATE_SIZE, ACTION_SIZE, BUFFER_SIZE) buf = R.LoadBuffer(OUT_DIR + BUFFER_FILE) if buf: EXP_PROB = EPSILON populated = R.GetOccupency() print("Replay buffer loaded from disk, occupied: " + str(populated)) else: print("Creating new replay buffer") # load saved model ckpt = tf.train.get_checkpoint_state(OUT_DIR) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model loaded from disk") # define action discretization #max_a = env.action_space.high[0] #min_a = env.action_space.low[0] n = OUnoise(1, 0.5, 1)
ann_fric = (1-EPSILON)/ANNEALING EXP_PROB = 1 # initialize environment env = gym.make(ENVIRONMENT) # initialize mdp state structure mdp = MDP_state(STATE_SIZE, FRAMES) # initialize replay buffer R = ReplayBuffer(MDP_STATE_SIZE, 1, BUFFER_SIZE) buf = R.LoadBuffer(OUT_DIR+BUFFER_FILE) if buf: EXP_PROB = EPSILON populated = R.GetOccupency() print("Replay buffer loaded from disk, occupied: " + str(populated)) else: print("Creating new replay buffer") # load saved model ckpt = tf.train.get_checkpoint_state(OUT_DIR) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess,ckpt.model_checkpoint_path) print("Model loaded from disk") # define action discretization max_a = env.action_space.high[0] min_a = env.action_space.low[0] act = actions(ACTION_SIZE, max_a)