default=True, help='Image-based states') parser.add_argument('--n-steps', type=int, default=500000, help='number of steps for training') parser.add_argument('--name', help='run name (within the experiment)') parser.add_argument('--experiment-name', help='experiment name') args = parser.parse_args() env = KukaEnv(renders=args.render, is_discrete=True, max_steps=args.max_ep_len, action_repeat=args.repeat, images=False, static_all=True, static_obj_rnd_pos=False, rnd_obj_rnd_pos=False, full_color=False, width=84, height=84) env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) env = FrameStackEnv(env, 3, 'tensors', 'states') experience_generator = ExperienceGenerator(env) agent = PPOAgentStates(num_layers=3,
parser.add_argument('--episodes', type=int, default=10000, help='number of training episodes to run') parser.add_argument('--images', action='store_true', default=False, help='Image-based states') parser.add_argument('--name', help='experiment name') args = parser.parse_args() env = KukaEnv(renders=args.render, is_discrete=True, max_steps=args.max_ep_len, action_repeat=args.repeat, images=args.images, static_all=True, static_obj_rnd_pos=False, rnd_obj_rnd_pos=False, full_color=False) env.seed(args.seed) torch.manual_seed(args.seed) saved_action = namedtuple('saved_action', ['log_prob', 'value']) policy_net = DQNCnn(7) target_net = DQNCnn(7) memory = ReplayBuffer(100000, 4)
default=True, help='Image-based states') parser.add_argument('--n-steps', type=int, default=100000, help='number of steps for training') parser.add_argument('--name', help='experiment name') parser.add_argument('--experiment-name', help='experiment name') args = parser.parse_args() env = KukaEnv(renders=args.render, is_discrete=True, max_steps=args.max_ep_len, action_repeat=args.repeat, images=True, static_all=False, static_obj_rnd_pos=True, rnd_obj_rnd_pos=False, full_color=True, width=84, height=84) # env = gym.make('BowlingDeterministic') env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) env = FrameStackEnv(env, 3, 'tensors') agent = SACAgentImages(
from environments.kuka import KukaEnv import matplotlib.pyplot as plt import numpy as np import pybullet as p import time from PIL import Image from models.encoders import VAE import torch import torch.nn.functional as F from torchvision import transforms from tqdm import tqdm env = KukaEnv(images=True, static_all=False, is_discrete=True, static_obj_rnd_pos=False, rnd_obj_rnd_pos=False, renders=False, full_color=True) vae = VAE(32) vae.to('cuda:0') optimizer = torch.optim.Adam(vae.parameters(), lr=0.0002) def vae_loss(x, x_hat, mu, var, weight): # Reconstruction error recon_err = F.binary_cross_entropy(x_hat, x, reduction='sum') # KL