val = model.env.env_method("_send_config")[0] names, values= val[0],val[1] for i in range(len(values)): sql = ''' INSERT INTO parameters(simu, type, step, value) VALUES(?,?,?,?) ''' val = (model_name, names[i], 0, float(values[i])) cur.execute(sql,val) conn.commit() cur.close() conn.close() for i in range(args.total_steps//args.save_every): model.learn(total_timesteps=args.save_every, tb_log_name=model_name, reset_num_timesteps=False, callback=callback) if normalize: env.save_running_average(workDirectory+"/resultats/"+model_name+"/normalizeData") model.save(workDirectory+"/resultats/"+model_name+"/"+model_name) os.system("python3 makegif.py --algo "+args.algo+" --dir ./server/assets/"+model_name+"_"+str((i+1)*args.save_every)+"_steps.gif --name "+model_name) print("\n saved at "+str((i+1)*args.save_every)) model.save(workDirectory+"/resultats/"+model_name+"/"+model_name) if normalize: env.save_running_average(workDirectory+"/resultats/"+model_name+"/normalizeData") env = DummyVecEnv([lambda: e.AidaBulletEnv(commands, render = False, on_rack = False, default_reward = args.default_reward, height_weight = args.height_weight, orientation_weight = args.orientation_weight, direction_weight = args.direction_weight, speed_weight = args.speed_weight,
model.learn(n_timesteps, **kwargs) if args.teleop: env.wait() env.exit() time.sleep(0.5) else: # Close the connection properly env.reset() if isinstance(env, VecFrameStack): env = env.venv # HACK to bypass Monitor wrapper env.envs[0].env.exit_scene() # Save trained model model.save(os.path.join(save_path, ENV_ID), cloudpickle=True) # Save hyperparams with open(os.path.join(params_path, 'config.yml'), 'w') as f: yaml.dump(saved_hyperparams, f) if args.save_vae and vae is not None: print("Saving VAE") vae.save(os.path.join(params_path, 'vae')) if normalize: # Unwrap if isinstance(env, VecFrameStack): env = env.venv # Important: save the running average, for testing the agent we need that normalization env.save_running_average(params_path)
def callback(_locals, _globals): global n_steps n_steps += 1 if n_steps % 50000 == 0 or n_steps == 10000: print('Saving: ', n_steps) save_path = 'checkpoints/yumi/her/her_{}_task_{}_{}.npy'.format( name, args.task, n_steps) os.makedirs(os.path.dirname(save_path), exist_ok=True) model.save(save_path) return True model = HER('MlpPolicy', env, model_class=DDPG, verbose=1, tensorboard_log=log_dir, **dict(random_exploration=.2)) model.learn(total_timesteps=total_timesteps, callback=callback) model.save("her-yumi-{}-final".format(n_steps)) env.save_running_average(log_dir) obs = env.reset() for i in range(100): action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) env.render()