def _init(): env = environment(inputfile, gamma) env.seed(seed + rank) return env
model.add( Dense(64, input_dim=state_size, activation='relu', kernel_initializer='he_uniform')) model.add(Dropout(dropout)) model.add(Dense(self.action_size, activation='softmax')) #model = Model(inputs=state_input, outputs=output) #model = Model(input=[state_input, advantage], output=[output]) model.compile(optimizer=Adam(lr=LR_actor), loss='categorical_crossentropy') return model if __name__ == "__main__": env = environment(inputfile, gamma) state_size = env.flatlen #[0] action_size = env.Ilen * env.Jlen #.n agent = DQNAgent(state_size, action_size) agent.batch_size = env.turns #for one pass on policy algorithms #agent.load("model.h5") done = False episodelist = list() scorelist = list() output = list() e = 0 # #while time.time()<end:
#LR_critic=inputarray.loc[idx].LR_critic LR = inputarray.loc[idx].LR batch_size = int(inputarray.loc[idx].batch_size) #memcap=int(inputarray.loc[idx].memcap) inputfile = inputarray.loc[idx].inputfile gamma = inputarray.loc[idx].gamma #dropout=float(inputarray.loc[idx].dropout) start = time.time() end = start + 11.5 * 60 * 60 #inputfile="BM_easy6x6x4.xlsx" #LR=0.000001 #gamma=0.995 #batch_size=32 #n_steps=5 inspectenv = environment(inputfile, gamma) episodetimesteps = int(inspectenv.turns) LR_s = str(LR).split('.')[1] inputfile_s = inputfile.split('.')[0] gamma_s = str(gamma).split('.')[1] class TimeLimit(BaseCallback): """ Callback for saving a model (the check is done every ``check_freq`` steps) based on the training reward (in practice, we recommend using ``EvalCallback``). :param check_freq: (int) :param log_dir: (str) Path to the folder where the model will be saved.
from stable_baselines.common.policies import MlpPolicy from stable_baselines.common.vec_env import SubprocVecEnv from stable_baselines.common import set_global_seeds, make_vec_env from stable_baselines.common.callbacks import BaseCallback, CallbackList, EvalCallback from stable_baselines import A2C from OPenv_gym import environment start = time.time() end = start + 2 * 60 * 60 inputfile = "BM_central15x15x5.xlsx" LR = 0.001 LR2 = 0.000001 gamma = 0.95 batch_size = 64 #n_steps=5 inspectenv = environment(inputfile, gamma) test = 'A2C' episodetimesteps = int(inspectenv.turns) eval_env = environment(inputfile, gamma, rendermode="on") loaded_model = A2C.load("best_model2") ob = eval_env.reset() cum_reward = 0 for a in range(round(eval_env.flatlen * 0.5)): action = loaded_model.predict(ob) ob, reward, terminal, _ = eval_env.step(action)