model.load(model_filename) with open(results_filename, 'rb') as input: res = pickle.load(input) epsilon = res.epsilon if opt.epsilon < 1: epsilon = opt.epsilon epsilons = np.maximum( np.arange(epsilon, epsilon - 1 + min_epsilon, (-1 + min_epsilon) / n_train), min_epsilon) else: epsilons = np.arange(1, min_epsilon, (-1 + min_epsilon) / n_train) # size of memory for i_train in range(n_train): epsilon = epsilons[i_train] res.epsilon = epsilon print("Training: round {}, epsilon = {}".format(i_train, round(epsilon, 2))) lengths_i_train = [] scores_i_train = [] for i_episode in range(n_episodes): i = 0 done = False grid = env.reset() grid = grid.reshape((1, n_channels, env.nrow, env.ncol)) t0 = time.time() while i < imax: i += 1 source = grid.copy() if epsilon >= np.random.rand(): action = np.random.randint(4)