예제 #1
0

if __name__ == "__main__":
    opt = Options().parse()

    opt.output = get_output_folder(opt.output, "Paint")
    np.random.seed(opt.seed)
    torch.manual_seed(opt.seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(opt.seed)
    random.seed(opt.seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True
    from DRL.ddpg import DDPG
    from DRL.multi import fastenv
    # fenv = fastenv(args.max_step, args.env_batch, writer, args.canvas_color, args.loss_fcn, args.dataset, args.use_multiple_renderers)
    # agent = DDPG(args.batch_size, args.env_batch, args.max_step, \
    #              args.tau, args.discount, args.rmsize, \
    #              writer, args.resume, args.output, args.loss_fcn, args.renderer, args.use_multiple_renderers)
    fenv = fastenv(opt, writer)
    agent = DDPG(opt, writer)
    evaluate = Evaluator(opt, writer)
    print('observation_space', fenv.observation_space, 'action_space',
          fenv.action_space)

    summary = 'Loss Function - {}\nRenderer - {}\nResuming Model - {}\nbatch_size - {}\nmax_step - {}\nOutput - {}' \
        .format(opt.loss_fcn, opt.renderer, opt.resume, opt.batch_size, opt.max_step, opt.output)
    writer.add_text('summary', summary, 0)
    writer.add_text('Command Line Arguments', str(opt), 0)

    train(agent, fenv, evaluate)
예제 #2
0
    parser.add_argument('--env_batch', default=96, type=int, help='concurrent environment number')
    parser.add_argument('--tau', default=0.001, type=float, help='moving average for target network')
    parser.add_argument('--max_step', default=40, type=int, help='max length for episode')
    parser.add_argument('--noise_factor', default=0.01, type=float, help='noise level for parameter space noise')
    parser.add_argument('--validate_interval', default=50, type=int, help='how many episodes to perform a validation')
    parser.add_argument('--validate_episodes', default=5, type=int, help='how many episode to perform during validation')
    parser.add_argument('--train_times', default=2000000, type=int, help='total traintimes')
    parser.add_argument('--episode_train_times', default=10, type=int, help='train times for each episode')    
    parser.add_argument('--resume', default=None, type=str, help='Resuming model path for testing')
    parser.add_argument('--output', default='./model', type=str, help='Resuming model path for testing')
    parser.add_argument('--debug', dest='debug', action='store_true', help='print some info')
    parser.add_argument('--seed', default=1234, type=int, help='random seed')
    
    args = parser.parse_args()    
    args.output = get_output_folder(args.output, "Paint")
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.seed)
    random.seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    from DRL.ddpg import DDPG
    from DRL.multi import fastenv
    fenv = fastenv(args.max_step, args.env_batch, writer)
    agent = DDPG(args.batch_size, args.env_batch, args.max_step, \
                 args.tau, args.discount, args.rmsize, \
                 writer, args.resume, args.output)
    evaluate = Evaluator(args, writer)
    print('observation_space', fenv.observation_space, 'action_space', fenv.action_space)
    train(agent, fenv, evaluate)
예제 #3
0
                        help='Resuming model path for testing')
    parser.add_argument('--output',
                        default='./model',
                        type=str,
                        help='Resuming model path for testing')
    parser.add_argument('--debug',
                        dest='debug',
                        action='store_true',
                        help='print some info')
    parser.add_argument('--seed', default=1234, type=int, help='random seed')

    args = parser.parse_args()
    args.output = get_output_folder(args.output, "Paint")
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.seed)
    random.seed(args.seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = False
    from DRL.ddpg import DDPG
    from DRL.multi import fastenv
    fenv = fastenv(args.state_dim, args.action_dim, args.max_step,
                   args.env_batch, writer)
    agent = DDPG(args.state_dim, args.merged_state_dim, args.action_dim, args.batch_size, args.env_batch, args.max_step, \
                 args.tau, args.discount, args.rmsize, \
                 writer, args.resume, args.output)
    evaluate = Evaluator(args, writer)
    print('observation_space', fenv.observation_space, 'action_space',
          fenv.action_space)
    train(agent, fenv, evaluate)