コード例 #1
0
ファイル: main.py プロジェクト: JasonTang99/A3C
def run(args):
    device = torch.device("cpu")
    env = gym.make('SpaceInvaders-v0')
    state_size = env.observation_space.shape
    action_size = env.action_space.n

    model = ActorCritic([1, 4, 84, 84], action_size).to(device)
    opt = SharedRMSprop(model.parameters(),
                        lr=args.lr,
                        alpha=args.alpha,
                        eps=1e-8,
                        weight_decay=args.weight_decay,
                        momentum=args.momentum,
                        centered=False)
    opt_lock = mp.Lock()
    scheduler = LRScheduler(args)

    if args.load_fp:
        checkpoint = torch.load(args.load_fp)
        model.load_state_dict(checkpoint['model_state_dict'])
        opt.load_state_dict(checkpoint['optimizer_state_dict'])

    if args.train:
        start = time.time()

        model.share_memory()
        model.train()

        step_counter, max_reward, ma_reward, ma_loss = [
            mp.Value('d', 0.0) for _ in range(4)
        ]

        processes = []
        if args.num_procs == -1:
            args.num_procs = mp.cpu_count()
        for rank in range(args.num_procs):
            p = mp.Process(target=train,
                           args=(rank, args, device, model, opt, opt_lock,
                                 scheduler, step_counter, max_reward,
                                 ma_reward, ma_loss))
            p.start()
            processes.append(p)
        for p in processes:
            p.join()

        if args.verbose > 0:
            print(f"Seconds taken: {time.time() - start}")
        if args.save_fp:
            torch.save(
                {
                    'model_state_dict': model.state_dict(),
                    # 'optimizer_state_dict': opt.state_dict(),
                },
                args.save_fp)

    if args.test:
        model.eval()
        test(args, device, model)
コード例 #2
0
ファイル: main.py プロジェクト: hvcl/ColorRL
def main(scripts, args):
    scripts = " ".join(sys.argv[0:])
    args = parser.parse_args()
    args.scripts = scripts

    torch.manual_seed(args.seed)
    if args.gpu_ids == -1:
        args.gpu_ids = [-1]
    else:
        torch.cuda.manual_seed(args.seed)
        mp.set_start_method('spawn')

    if (args.deploy):
        raw, gt_lbl, raw_valid, gt_lbl_valid, raw_test, gt_lbl_test, raw_test_upsize, gt_lbl_test_upsize = setup_data(
            args)
    else:
        raw, gt_lbl, raw_valid, gt_lbl_valid, raw_test, gt_lbl_test = setup_data(
            args)

    env_conf = setup_env_conf(args)

    shared_model = get_model(args,
                             args.model,
                             env_conf["observation_shape"],
                             args.features,
                             atrous_rates=args.atr_rate,
                             num_actions=2,
                             split=args.data_channel,
                             multi=args.multi)

    manager = mp.Manager()
    shared_dict = manager.dict()
    if args.wctrl == "s2m":
        shared_dict["spl_w"] = args.spl_w
        shared_dict["mer_w"] = args.mer_w

    if args.load:
        saved_state = torch.load(args.load,
                                 map_location=lambda storage, loc: storage)
        shared_model.load_state_dict(saved_state)
    shared_model.share_memory()

    if args.shared_optimizer:
        if args.optimizer == 'RMSprop':
            optimizer = SharedRMSprop(shared_model.parameters(), lr=args.lr)
        if args.optimizer == 'Adam':
            optimizer = SharedAdam(shared_model.parameters(),
                                   lr=args.lr,
                                   amsgrad=args.amsgrad)
        optimizer.share_memory()
    else:
        optimizer = None

    processes = []
    if not args.no_test:
        if raw_test is not None:
            if (args.deploy):
                p = mp.Process(target=test_func,
                               args=(args, shared_model, env_conf,
                                     [raw_valid, gt_lbl_valid],
                                     (raw_test, gt_lbl_test, raw_test_upsize,
                                      gt_lbl_test_upsize, shared_dict)))
            else:
                p = mp.Process(target=test_func,
                               args=(args, shared_model, env_conf,
                                     [raw_valid, gt_lbl_valid
                                      ], (raw_test, gt_lbl_test), shared_dict))
        else:
            p = mp.Process(target=test_func,
                           args=(args, shared_model, env_conf,
                                 [raw_valid, gt_lbl_valid], None, shared_dict))
        p.start()
        processes.append(p)

    time.sleep(0.1)

    for rank in range(0, args.workers):
        p = mp.Process(target=train_func,
                       args=(rank, args, shared_model, optimizer, env_conf,
                             [raw, gt_lbl], shared_dict))

        p.start()
        processes.append(p)
        time.sleep(0.1)

    for p in processes:
        time.sleep(0.1)
        p.join()
コード例 #3
0
ファイル: main.py プロジェクト: anhtuanhsgs/MedicalRl
                               env_conf["num_action"], args.hidden_feat)
    else:
        shared_model = A3Clstm_continuous(env_conf["observation_shape"],
                                          env_conf["num_action"],
                                          args.hidden_feat)

    if args.load:
        saved_state = torch.load('{0}{1}.dat'.format(args.load_model_dir,
                                                     args.env),
                                 map_location=lambda storage, loc: storage)
        shared_model.load_state_dict(saved_state)
    shared_model.share_memory()

    if args.shared_optimizer:
        if args.optimizer == 'RMSprop':
            optimizer = SharedRMSprop(shared_model.parameters(), lr=args.lr)
        if args.optimizer == 'Adam':
            optimizer = SharedAdam(shared_model.parameters(),
                                   lr=args.lr,
                                   amsgrad=args.amsgrad)
        optimizer.share_memory()
    else:
        optimizer = None

    processes = []
    if "EM_env" in args.env:
        p = mp.Process(target=test,
                       args=(args, shared_model, env_conf,
                             [raw, lbl, prob, gt_lbl], True))
    else:
        p = mp.Process(target=test, args=(args, shared_model, env_conf))
コード例 #4
0
            map_location=lambda storage, loc: storage)
        for k, v in model_state.items():
            if 'pose_actor' in k:
                model_state[k] = pose_saved_state[k]
            if 'pose_BiRNN' in k:
                key = k.replace('pose_BiRNN', 'global_net.pose_BiRNN')
                model_state[k] = pose_saved_state[key]
        shared_model.load_state_dict(model_state)

    params = shared_model.parameters()
    shared_model.share_memory()

    if args.shared_optimizer:
        print('share memory')
        if args.optimizer == 'RMSprop':
            optimizer = SharedRMSprop(params, lr=args.lr)
        if args.optimizer == 'Adam':
            optimizer = SharedAdam(params, lr=args.lr, amsgrad=args.amsgrad)
        optimizer.share_memory()
    else:
        optimizer = None

    current_time = datetime.now().strftime('%b%d_%H-%M')
    args.log_dir = os.path.join(args.log_dir, args.env, current_time)
    env.close()

    processes = []
    manager = mp.Manager()
    train_modes = manager.list()
    n_iters = manager.list()
コード例 #5
0
ファイル: ocpg.py プロジェクト: Aks-Dmv/AROC
        if i in args.env:
            env_conf = setup_json[i]
    # env = atari_env(args.env, env_conf, args)
    env = OC_env(args.env)
    shared_model = OCPGModel(env.observation_space.shape[0], env.action_space,
                             args.options, args.width)
    if args.load:
        saved_state = torch.load('{0}{1}.dat'.format(args.load_model_dir,
                                                     args.env),
                                 map_location=lambda storage, loc: storage)
        shared_model.load_state_dict(saved_state)
    shared_model.share_memory()

    if args.shared_optimizer:
        if args.optimizer == 'RMSprop':
            optimizer = SharedRMSprop(shared_model.parameters(), lr=args.lr)
        if args.optimizer == 'Adam':
            optimizer = SharedAdam(shared_model.parameters(),
                                   lr=args.lr,
                                   amsgrad=args.amsgrad)
        if args.load:
            saved_state = torch.load('{0}{1}1.torch'.format(
                args.load_model_dir, args.env),
                                     map_location=lambda storage, loc: storage)
            print("load state dict")
            optimizer.load_state_dict(saved_state)
            print("loaded optimizer")
        optimizer.share_memory()
    else:
        optimizer = None
コード例 #6
0
ファイル: main.py プロジェクト: Shashank-Shet/rl_a3c_pytorch
    ]
    if args.load:
        saved_state = torch.load('{0}{1}_early.dat'.format(
            args.load_model_dir, args.env),
                                 map_location=lambda storage, loc: storage)
        shared_models[0].load_state_dict(saved_state)
        saved_state = torch.load('{0}{1}_late.dat'.format(
            args.load_model_dir, args.env),
                                 map_location=lambda storage, loc: storage)
        shared_models[1].load_state_dict(saved_state)
    shared_models[0].share_memory()
    shared_models[1].share_memory()

    if args.optimizer == 'RMSprop':
        optimizers = [
            SharedRMSprop(shared_models[0].parameters(), lr=args.lr),
            SharedRMSprop(shared_models[1].parameters(), lr=args.lr)
        ]
    if args.optimizer == 'Adam':
        optimizers = [
            SharedAdam(shared_models[0].parameters(),
                       lr=args.lr,
                       amsgrad=args.amsgrad),
            SharedAdam(shared_models[1].parameters(),
                       lr=args.lr,
                       amsgrad=args.amsgrad)
        ]
    optimizers[0].share_memory()
    optimizers[1].share_memory()

    processes = []
コード例 #7
0
ファイル: main.py プロジェクト: ddayzzz/rl_a3c_pytorch
    def run(self):

        torch.manual_seed(args.seed)
        if args.gpu_ids == -1:
            args.gpu_ids = [-1]
        else:
            torch.cuda.manual_seed(args.seed)
            mp.set_start_method('spawn')

        # env = make_env(env_type=args.env_type, env_name=args.env_name, args=args)
        shared_model = UNREAL(in_channels=3, action_size=6, enable_pixel_control=True)

        if args.load:
            saved_state = torch.load(
                '{0}{1}.dat'.format(args.load_model_dir, args.env),
                map_location=lambda storage, loc: storage)
            shared_model.load_state_dict(saved_state)
        shared_model.share_memory()

        lr = log_uniform(1e-4, 5e-3, 0.5)

        if args.shared_optimizer:
            if args.optimizer == 'RMSprop':
                optimizer = SharedRMSprop(shared_model.parameters(), lr=lr, eps=0.1)
            if args.optimizer == 'Adam':
                optimizer = SharedAdam(
                    shared_model.parameters(), lr=lr, amsgrad=args.amsgrad)
            optimizer.share_memory()
        else:
            optimizer = None



        # p = mp.Process(target=train, args=(args, shared_model, env_conf))
        # p.start()
        # processes.append(p)
        # time.sleep(0.1)

        self.stop_requested = False
        self.terminate_reqested = False

        for rank in range(0, args.workers):
            trainer = Trainer(rank, args, shared_model=shared_model, optimizer=optimizer, lr=lr)
            self.trainers.append(trainer)

            # time.sleep(0.1)
        # 设置运行起始的时间
        # set start time
        self.start_time = time.time() - 0  # wall_t

        processes = []
        for rank in range(0, args.workers):
            if rank == 0:
                p = mp.Process(target=self.train_function, args=(rank, True, True))
            else:
                p = mp.Process(target=self.train_function, args=(rank, True))
            p.start()
            processes.append(p)

        # 注册终止信号
        signal.signal(signal.SIGINT, self.signal_handler)


        print('Press Ctrl+C to stop')
        for rank in range(0, args.workers):
            time.sleep(0.01)
            processes[rank].join()
コード例 #8
0
def start():
    args = parser.parse_args()
    args.shared_optimizer = True
    if args.gpu_ids == -1:
        torch.manual_seed(args.seed)
        args.gpu_ids = [-1]
        device_share = torch.device('cpu')
        mp.set_start_method('spawn')
    else:
        torch.cuda.manual_seed(args.seed)
        mp.set_start_method('spawn', force=True)
        if len(args.gpu_ids) > 1:
            device_share = torch.device('cpu')
        else:
            device_share = torch.device('cuda:' + str(args.gpu_ids[-1]))
    env = create_env(args.env, args)
    shared_model = build_model(env.observation_space, env.action_space, args,
                               device_share).to(device_share)
    shared_model.share_memory()
    env.close()
    del env

    if args.load_coordinator_dir is not None:
        saved_state = torch.load(args.load_coordinator_dir,
                                 map_location=lambda storage, loc: storage)
        if args.load_coordinator_dir[-3:] == 'pth':
            shared_model.load_state_dict(saved_state['model'], strict=False)
        else:
            shared_model.load_state_dict(saved_state)

    params = shared_model.parameters()
    if args.shared_optimizer:
        print('share memory')
        if args.optimizer == 'RMSprop':
            optimizer = SharedRMSprop(params, lr=args.lr)
        if args.optimizer == 'Adam':
            optimizer = SharedAdam(params, lr=args.lr, amsgrad=args.amsgrad)
        optimizer.share_memory()
    else:
        optimizer = None

    current_time = datetime.now().strftime('%b%d_%H-%M')
    args.log_dir = os.path.join(args.log_dir, args.env, current_time)

    processes = []
    manager = mp.Manager()
    train_modes = manager.list()
    n_iters = manager.list()

    p = mp.Process(target=test,
                   args=(args, shared_model, optimizer, train_modes, n_iters))
    p.start()
    processes.append(p)
    time.sleep(args.sleep_time)

    for rank in range(0, args.workers):
        p = mp.Process(target=train,
                       args=(rank, args, shared_model, optimizer, train_modes,
                             n_iters))
        p.start()
        processes.append(p)
        time.sleep(args.sleep_time)

    for p in processes:
        time.sleep(args.sleep_time)
        p.join()
コード例 #9
0
ファイル: main.py プロジェクト: kibeomKim/tetris
        self.tau = 1.0


if __name__ == "__main__":
    params = Params()
    mp.set_start_method('spawn')
    count = mp.Value('i', 0)  # update count
    lock = mp.Lock()

    # shared_model = A3C()
    shared_model = A3C_LSTM()
    shared_model = shared_model.share_memory()

    # shared_optimizer = SharedAdam(shared_model.parameters(), lr=params.lr, amsgrad=params.amsgrad,
    #                               weight_decay=params.weight_decay)
    shared_optimizer = SharedRMSprop(shared_model.parameters(), lr=params.lr)
    shared_optimizer.share_memory()

    # run_loop(0, params, shared_model, shared_optimizer, count, lock)    # for debugging
    # test(0, params, shared_model, count, lock)

    processes = []

    # have to add test module
    p = mp.Process(target=test, args=(
        0,
        params,
        shared_model,
        count,
        lock,
    ))