if __name__ == "__main__": opt = Options().parse() opt.output = get_output_folder(opt.output, "Paint") np.random.seed(opt.seed) torch.manual_seed(opt.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(opt.seed) random.seed(opt.seed) torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = True from DRL.ddpg import DDPG from DRL.multi import fastenv # fenv = fastenv(args.max_step, args.env_batch, writer, args.canvas_color, args.loss_fcn, args.dataset, args.use_multiple_renderers) # agent = DDPG(args.batch_size, args.env_batch, args.max_step, \ # args.tau, args.discount, args.rmsize, \ # writer, args.resume, args.output, args.loss_fcn, args.renderer, args.use_multiple_renderers) fenv = fastenv(opt, writer) agent = DDPG(opt, writer) evaluate = Evaluator(opt, writer) print('observation_space', fenv.observation_space, 'action_space', fenv.action_space) summary = 'Loss Function - {}\nRenderer - {}\nResuming Model - {}\nbatch_size - {}\nmax_step - {}\nOutput - {}' \ .format(opt.loss_fcn, opt.renderer, opt.resume, opt.batch_size, opt.max_step, opt.output) writer.add_text('summary', summary, 0) writer.add_text('Command Line Arguments', str(opt), 0) train(agent, fenv, evaluate)
parser.add_argument('--env_batch', default=96, type=int, help='concurrent environment number') parser.add_argument('--tau', default=0.001, type=float, help='moving average for target network') parser.add_argument('--max_step', default=40, type=int, help='max length for episode') parser.add_argument('--noise_factor', default=0.01, type=float, help='noise level for parameter space noise') parser.add_argument('--validate_interval', default=50, type=int, help='how many episodes to perform a validation') parser.add_argument('--validate_episodes', default=5, type=int, help='how many episode to perform during validation') parser.add_argument('--train_times', default=2000000, type=int, help='total traintimes') parser.add_argument('--episode_train_times', default=10, type=int, help='train times for each episode') parser.add_argument('--resume', default=None, type=str, help='Resuming model path for testing') parser.add_argument('--output', default='./model', type=str, help='Resuming model path for testing') parser.add_argument('--debug', dest='debug', action='store_true', help='print some info') parser.add_argument('--seed', default=1234, type=int, help='random seed') args = parser.parse_args() args.output = get_output_folder(args.output, "Paint") np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = True from DRL.ddpg import DDPG from DRL.multi import fastenv fenv = fastenv(args.max_step, args.env_batch, writer) agent = DDPG(args.batch_size, args.env_batch, args.max_step, \ args.tau, args.discount, args.rmsize, \ writer, args.resume, args.output) evaluate = Evaluator(args, writer) print('observation_space', fenv.observation_space, 'action_space', fenv.action_space) train(agent, fenv, evaluate)
help='Resuming model path for testing') parser.add_argument('--output', default='./model', type=str, help='Resuming model path for testing') parser.add_argument('--debug', dest='debug', action='store_true', help='print some info') parser.add_argument('--seed', default=1234, type=int, help='random seed') args = parser.parse_args() args.output = get_output_folder(args.output, "Paint") np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = False from DRL.ddpg import DDPG from DRL.multi import fastenv fenv = fastenv(args.state_dim, args.action_dim, args.max_step, args.env_batch, writer) agent = DDPG(args.state_dim, args.merged_state_dim, args.action_dim, args.batch_size, args.env_batch, args.max_step, \ args.tau, args.discount, args.rmsize, \ writer, args.resume, args.output) evaluate = Evaluator(args, writer) print('observation_space', fenv.observation_space, 'action_space', fenv.action_space) train(agent, fenv, evaluate)