コード例 #1
0
ファイル: online.py プロジェクト: sundaramx/savn_online
    def load_model(self, model_path, args):
        if args.model == 'NON_ADAPTIVE_A3C':
            self.model = BaseModel(args)
        elif args.model == 'GCN':
            self.model = GCN(args)
        else:
            self.model = SAVN(args)
        saved_state = torch.load(model_path,
                                 map_location=lambda storage, loc: storage)
        self.model.load_state_dict(saved_state)

        self.model_options = ModelOptions()
        self.model_options.params = get_params(self.model, args.gpu_id)
コード例 #2
0
def nonadaptivea3c_train(
    rank,
    args,
    create_shared_model,
    shared_model,
    initialize_agent,
    optimizer,
    res_queue,
    end_flag,
):
    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types,
                                                 args.train_scenes)

    random.seed(args.seed + rank)
    idx = [j for j in range(len(args.scene_types))]
    random.shuffle(idx)

    setproctitle.setproctitle("Training Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    import torch

    torch.cuda.set_device(gpu_id)

    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id)
    compute_grad = not isinstance(player, RandomNavigationAgent)

    model_options = ModelOptions()

    j = 0

    while not end_flag.value:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        new_episode(args,
                    player,
                    scenes[idx[j]],
                    possible_targets,
                    targets[idx[j]],
                    glove=glove)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:
            # Make sure model is up to date.
            player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward,
                                       model_options, True)
            # Compute the loss.
            loss = compute_loss(args, player, gpu_id, model_options)
            if compute_grad:
                # Compute gradient.
                player.model.zero_grad()
                loss["total_loss"].backward()
                torch.nn.utils.clip_grad_norm_(player.model.parameters(),
                                               100.0)
                # Transfer gradient to shared model and step optimizer.
                transfer_gradient_from_player_to_shared(
                    player, shared_model, gpu_id)
                optimizer.step()
                # Clear actions and repackage hidden.
            if not player.done:
                reset_player(player)

        for k in loss:
            loss[k] = loss[k].item()

        end_episode(
            player,
            res_queue,
            title=args.scene_types[idx[j]],
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
        )
        reset_player(player)

        j = (j + 1) % len(args.scene_types)

    player.exit()
コード例 #3
0
def nonadaptivea3c_val(
    rank,
    args,
    model_to_open,
    model_create_fn,
    initialize_agent,
    res_queue,
    max_count,
    scene_type,
):

    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types, args.val_scenes)
    num = name_to_num(scene_type)
    scenes = scenes[num]
    targets = targets[num]

    if scene_type == "living_room":
        args.max_episode_length = 200
    else:
        args.max_episode_length = 100

    setproctitle.setproctitle("Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    shared_model = model_create_fn(args)

    if model_to_open != "":
        saved_state = torch.load(
            model_to_open, map_location=lambda storage, loc: storage
        )
        shared_model.load_state_dict(saved_state['model'])

    player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id)
    player.sync_with_shared(shared_model)
    count = 0

    model_options = ModelOptions()

    j = 0

    while count < max_count:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        new_episode(args, player, scenes, possible_targets, targets, glove=glove)
        player_start_state = copy.deepcopy(player.environment.controller.state)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:
            # Make sure model is up to date.
            player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward, model_options, False)
            # Compute the loss.
            loss = compute_loss(args, player, gpu_id, model_options)
            if not player.done:
                reset_player(player)

        for k in loss:
            loss[k] = loss[k].item()
        spl, best_path_length = compute_spl(player, player_start_state)

        bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success)

        end_episode(
            player,
            res_queue,
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
            spl=spl,
            **bucketed_spl,
        )

        count += 1
        reset_player(player)

        j = (j + 1) % len(args.scene_types)

    player.exit()
    res_queue.put({"END": True})
コード例 #4
0
def savn_val(
    rank,
    args,
    model_to_open,
    model_create_fn,
    initialize_agent,
    res_queue,
    max_count,
    scene_type,
    glove_file=None,
    img_file=None,
):

    # glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types,
                                                 args.val_scenes)
    num = name_to_num(scene_type)
    scenes = scenes[num]
    targets = targets[num]

    if scene_type == "living_room":
        args.max_episode_length = 200
    else:
        args.max_episode_length = 100

    setproctitle.setproctitle("Training Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    import torch

    torch.cuda.set_device(gpu_id)

    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    shared_model = model_create_fn(args)

    if model_to_open is not None:
        saved_state = torch.load(model_to_open,
                                 map_location=lambda storage, loc: storage)
        shared_model.load_state_dict(saved_state)

    player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id)
    player.sync_with_shared(shared_model)
    count = 0

    player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id)

    player_actions = {}

    model_options = ModelOptions()

    while count < max_count:

        count += 1

        start_time = time.time()
        new_episode(args,
                    player,
                    scenes,
                    possible_targets,
                    targets,
                    glove=glove_file,
                    img_file=None)
        player_start_state = copy.deepcopy(player.environment.controller.state)
        if args.verbose:
            print(player_start_state)
        player.episode.exploring = True
        total_reward = 0
        player.eps_len = 0

        # theta <- shared_initialization
        params_list = [get_params(shared_model, gpu_id)]
        model_options.params = params_list[-1]
        loss_dict = {}
        reward_dict = {}
        episode_num = 0
        num_gradients = 0

        player_actions['scene'] = player.environment.scene_name
        player_actions['target_object'] = player.episode.task_data[0]
        player_actions['positions'] = []
        player_actions['positions'].append(str(player_start_state))

        while True:
            total_reward = run_episode(player, args, total_reward,
                                       model_options, False)
            player_actions['positions'].append(
                str(player.environment.controller.state))

            if player.done:
                break

            if args.gradient_limit < 0 or episode_num < args.gradient_limit:

                num_gradients += 1

                # Compute the loss.
                learned_loss = compute_learned_loss(args, player, gpu_id,
                                                    model_options)

                if args.verbose:
                    print("inner gradient")
                inner_gradient = torch.autograd.grad(
                    learned_loss["learned_loss"],
                    [v for _, v in params_list[episode_num].items()],
                    create_graph=True,
                    retain_graph=True,
                    allow_unused=True,
                )

                params_list.append(
                    SGD_step(params_list[episode_num], inner_gradient,
                             args.inner_lr))
                model_options.params = params_list[-1]

                reset_player(player)
                episode_num += 1

                for k, v in learned_loss.items():
                    loss_dict["{}/{:d}".format(k, episode_num)] = v.item()

        loss = compute_loss(args, player, gpu_id, model_options)
        player_actions['success'] = player.success

        for k, v in loss.items():
            loss_dict[k] = v.item()
        reward_dict["total_reward"] = total_reward

        spl, best_path_length = compute_spl(player, player_start_state)
        bucketed_spl = get_bucketed_metrics(spl, best_path_length,
                                            player.success, player.actions[-1],
                                            player.arrive)

        if args.record_route:
            with open(
                    '/home/duhm/Code/savn_deployment/players_action_test.json',
                    'a') as write_file:
                json.dump(player_actions, write_file)

        end_episode(
            player,
            res_queue,
            total_time=time.time() - start_time,
            spl=spl,
            **reward_dict,
            **bucketed_spl,
        )

        reset_player(player)

    player.exit()
    res_queue.put({"END": True})
コード例 #5
0
def savn_train(
    rank,
    args,
    create_shared_model,
    shared_model,
    initialize_agent,
    optimizer,
    res_queue,
    end_flag,
):

    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types,
                                                 args.train_scenes)

    random.seed(args.seed + rank)
    idx = [j for j in range(len(args.scene_types))]
    random.shuffle(idx)

    setproctitle.setproctitle("Training Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    import torch

    torch.cuda.set_device(gpu_id)
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id)

    model_options = ModelOptions()

    j = 0

    while not end_flag.value:

        start_time = time.time()
        new_episode(args,
                    player,
                    scenes[idx[j]],
                    possible_targets,
                    targets[idx[j]],
                    glove=glove)
        player.episode.exploring = True
        total_reward = 0
        player.eps_len = 0

        # theta <- shared_initialization
        params_list = [get_params(shared_model, gpu_id)]
        model_options.params = params_list[-1]
        loss_dict = {}
        reward_dict = {}
        episode_num = 0
        num_gradients = 0

        # Accumulate loss over all meta_train episodes.
        while True:
            # Run episode for k steps or until it is done or has made a mistake (if dynamic adapt is true).
            if args.verbose:
                print("New inner step")
            total_reward = run_episode(player, args, total_reward,
                                       model_options, True)

            if player.done:
                break

            if args.gradient_limit < 0 or episode_num < args.gradient_limit:

                num_gradients += 1

                # Compute the loss.
                learned_loss = compute_learned_loss(args, player, gpu_id,
                                                    model_options)

                if args.verbose:
                    print("inner gradient")
                inner_gradient = torch.autograd.grad(
                    learned_loss["learned_loss"],
                    [v for _, v in params_list[episode_num].items()],
                    create_graph=True,
                    retain_graph=True,
                    allow_unused=True,
                )

                params_list.append(
                    SGD_step(params_list[episode_num], inner_gradient,
                             args.inner_lr))
                model_options.params = params_list[-1]

                # reset_player(player)
                episode_num += 1

                for k, v in learned_loss.items():
                    loss_dict["{}/{:d}".format(k, episode_num)] = v.item()

        loss = compute_loss(args, player, gpu_id, model_options)

        for k, v in loss.items():
            loss_dict[k] = v.item()
        reward_dict["total_reward"] = total_reward

        if args.verbose:
            print("meta gradient")

        # Compute the meta_gradient, i.e. differentiate w.r.t. theta.
        meta_gradient = torch.autograd.grad(
            loss["total_loss"],
            [v for _, v in params_list[0].items()],
            allow_unused=True,
        )

        end_episode(
            player,
            res_queue,
            title=args.scene_types[idx[j]],
            episode_num=0,
            total_time=time.time() - start_time,
            total_reward=total_reward,
        )

        # Copy the meta_gradient to shared_model and step.
        transfer_gradient_to_shared(meta_gradient, shared_model, gpu_id)
        optimizer.step()
        reset_player(player)

        j = (j + 1) % len(args.scene_types)

    player.exit()
コード例 #6
0
ファイル: a3c_val.py プロジェクト: zebrajack/ECCV-VN
def a3c_val(
    rank,
    args,
    model_to_open,
    model_create_fn,
    initialize_agent,
    res_queue,
    max_count,
    scene_type,
    scenes,
):

    targets = AI2THOR_TARGET_CLASSES[args.num_category]

    if scene_type == "living_room":
        args.max_episode_length = 200
    else:
        args.max_episode_length = 100

    setproctitle.setproctitle("Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    shared_model = model_create_fn(args)

    if model_to_open != "":
        saved_state = torch.load(
            model_to_open, map_location=lambda storage, loc: storage
        )
        shared_model.load_state_dict(saved_state)

    player = initialize_agent(model_create_fn, args, rank, scenes, targets, gpu_id=gpu_id)
    player.sync_with_shared(shared_model)
    count = 0

    model_options = ModelOptions()

    while count < max_count:

        total_reward = 0
        player.eps_len = 0
        new_episode(args, player)
        player_start_state = copy.deepcopy(player.environment.controller.state)
        player_start_time = time.time()

        while not player.done:
            player.sync_with_shared(shared_model)
            total_reward = run_episode(player, args, total_reward, model_options, False, shared_model)
            if not player.done:
                reset_player(player)

        spl, best_path_length = compute_spl(player, player_start_state)

        bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success)

        end_episode(
            player,
            res_queue,
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
            spl=spl,
            **bucketed_spl,
        )

        count += 1
        reset_player(player)

    player.exit()
    res_queue.put({"END": True})
コード例 #7
0
def nonadaptivea3c_train(
    rank,
    args,
    create_shared_model,
    shared_model,
    initialize_agent,
    optimizer,
    res_queue,
    end_flag,
    global_ep,
):

    glove = None
    protos = None
    pre_metadata = None
    curriculum_meta = None
    scene_types = args.scene_types

    if args.glove_file:
        glove = Glove(args.glove_file)
    if args.proto_file:
        protos = Prototype(args.proto_file)

    if args.data_source == "ithor":
        from datasets.ithor_data import get_data
        scenes, possible_targets, targets = get_data(scene_types, args.train_scenes)

    elif args.data_source == "robothor":

        from datasets.robothor_data import get_data

        # check if use pinned_scene mode
        if args.pinned_scene:
            # TODO: design a flexible scene allocating strategy
            scene_types = [scene_types[(rank % len(scene_types))]]
            pre_metadata = preload_metadata(args, scene_types)

        scenes, possible_targets, targets = get_data(scene_types)

        if args.curriculum_learning:
            curriculum_meta = get_curriculum_meta(args, scenes)


    # is pinned_scene set to True, pre-load all metadata for controller
    # constructed in new_episode()


    random.seed(args.seed + rank)
    idx = list(range(len(scene_types)))
    random.shuffle(idx)

    setproctitle.setproctitle("Training Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    import torch

    torch.cuda.set_device(gpu_id)

    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id)
    compute_grad = not isinstance(player, RandomNavigationAgent)

    model_options = ModelOptions()

    j = 0

    while not end_flag.value:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        # new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]],glove=glove, protos=protos,
        #     pre_metadata=pre_metadata, curriculum_meta=curriculum_meta, total_ep=global_ep.value)
        scene = new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]],glove=glove, protos=protos,
            pre_metadata=pre_metadata, curriculum_meta=curriculum_meta)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:
            # Make sure model is up to date.
            player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward, model_options, True)

            # plot trajectory , by wuxiaodong
            if args.demo_trajectory and global_ep.value % args.demo_trajectory_freq == 0:
                print(len(player.episode.episode_trajectories))
                # todo delete
                # scene = 'FloorPlan_Train1_1'
                trajectory_pil = get_trajectory(scene,
                                                [str(loc) for loc in player.episode.episode_trajectories],
                                                birdview_root='./demo_robothor/data/birdview/',
                                                init_loc_str=player.episode.init_pos_str,
                                                target_loc_str=player.episode.target_pos_str,
                                                actions=player.episode.actions_taken,
                                                success=player.success, target_name=player.episode.target_object)
                demo_out_dir = os.path.join(args.log_dir, '../output_trajecgtory', args.title)
                if not os.path.exists(demo_out_dir):
                    os.makedirs(demo_out_dir)
                trajectory_pil.save(os.path.join(demo_out_dir, '{}_init_{}_target_{}_iter{}.png'.format(
                    player.episode.object_type,
                    player.episode.init_pos_str,
                    player.episode.target_pos_str,
                    global_ep.value
                )))
                print('ploting {}_init_{}_target_{}_iter{}.png'.format(
                    player.episode.object_type,
                    player.episode.init_pos_str,
                    player.episode.target_pos_str,
                    global_ep.value
                ))

            # Compute the loss.
            loss = compute_loss(args, player, gpu_id, model_options)
            if compute_grad:
                # Compute gradient.
                player.model.zero_grad()
                loss["total_loss"].backward()
                torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0)
                # Transfer gradient to shared model and step optimizer.
                transfer_gradient_from_player_to_shared(player, shared_model, gpu_id)
                optimizer.step()
            # Clear actions and repackage hidden.
            if not player.done:
                reset_player(player)

        # print("Training Agent {}: finished episodes on {}, local loss {}".format(
        #     rank, scene, loss.cpu().detach().numpy() ))

        for k in loss:
            loss[k] = loss[k].item()

        end_episode(
            player,
            res_queue,
            title=scene_types[idx[j]],
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
            policy_loss=loss['policy_loss'],
            value_loss=loss['value_loss']
        )

        reset_player(player)

        j = (j + 1) % len(scene_types)

    player.exit()
コード例 #8
0
def nonadaptivea3c_val(
    rank,
    args,
    model_to_open,
    model_create_fn,
    initialize_agent,
    res_queue,
    max_count,
    scene_type,
):

    glove = None
    protos = None
    pre_metadata = None
    curriculum_meta = None
    scene_types = [scene_type]
    offline_shortest_data = None

    if args.glove_file:
        glove = Glove(args.glove_file)
    if args.proto_file:
        protos = Prototype(args.proto_file)

    if args.data_source == "ithor":

        from datasets.ithor_data import get_data, name_to_num

        scenes, possible_targets, targets = get_data(scene_types,
                                                     args.val_scenes)
        num = name_to_num(scene_type)
        scenes = scenes[0]
        targets = targets[0]

    elif args.data_source == "robothor":

        from datasets.robothor_data import get_data
        # TODO: design a flexible scene allocating strategy

        pre_metadata = preload_metadata(args, scene_types)

        scenes, possible_targets, targets = get_data(scene_types)
        scenes = scenes[0]
        targets = targets[0]

        if args.curriculum_learning:
            curriculum_meta = get_curriculum_meta(args, scenes)
            if args.offline_shortest_data:
                offline_shortest_data = load_offline_shortest_path_data(
                    args, scenes)

    setproctitle.setproctitle("Val Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    shared_model = model_create_fn(args)

    if model_to_open != "":
        saved_state = torch.load(model_to_open,
                                 map_location=lambda storage, loc: storage)
        shared_model.load_state_dict(saved_state)

    player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id)
    player.sync_with_shared(shared_model)
    count = 0

    model_options = ModelOptions()

    while count < max_count:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        scene = new_episode(args,
                            player,
                            scenes,
                            possible_targets,
                            targets,
                            glove=glove,
                            protos=protos,
                            pre_metadata=pre_metadata,
                            curriculum_meta=curriculum_meta)
        if scene == None:  # iteration stopped
            break

        player_start_state = copy.deepcopy(player.environment.controller.state)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:

            # Make sure model is up to date.
            # player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward,
                                       model_options, False)
            # Compute the loss.
            # loss = compute_loss(args, player, gpu_id, model_options)
            if not player.done:
                reset_player(player)

        # for k in loss:
        #     loss[k] = loss[k].item()
        if offline_shortest_data:  # assume data_source == robothor and curriculum_learning is True
            scene = player.environment.scene_name
            episode_id = player.episode.episode_id
            best_path_length = offline_shortest_data[scene][episode_id]
            spl = player.success * (best_path_length / float(player.eps_len))
        else:
            spl, best_path_length = compute_spl(player, player_start_state)

        bucketed_spl = get_bucketed_metrics(spl, best_path_length,
                                            player.success)
        if args.curriculum_learning:
            end_episode(player,
                        res_queue,
                        total_time=time.time() - player_start_time,
                        total_reward=total_reward,
                        spl=spl,
                        **bucketed_spl,
                        scene_type=scene_type,
                        difficulty=player.episode.difficulty)
        else:
            end_episode(
                player,
                res_queue,
                total_time=time.time() - player_start_time,
                total_reward=total_reward,
                spl=spl,
                **bucketed_spl,
                scene_type=scene_type,
            )

        count += 1
        reset_player(player)

    player.exit()
    res_queue.put({
        "END": True,
        "scene_type": scene_type,
        "total_episodes": count
    })
コード例 #9
0
ファイル: a3c_train.py プロジェクト: zebrajack/ECCV-VN
def a3c_train(
        rank,
        args,
        create_shared_model,
        shared_model,
        initialize_agent,
        optimizer,
        res_queue,
        end_flag,
        scenes,
):
    setproctitle.setproctitle('Training Agent: {}'.format(rank))

    targets = AI2THOR_TARGET_CLASSES[args.num_category]

    random.seed(args.seed + rank)
    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    torch.cuda.set_device(gpu_id)
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    player = initialize_agent(create_shared_model, args, rank, scenes, targets, gpu_id=gpu_id)
    compute_grad = not isinstance(player, RandomNavigationAgent)

    model_options = ModelOptions()

    episode_num = 0

    while not end_flag.value:

        total_reward = 0
        player.eps_len = 0
        player.episode.episode_times = episode_num
        new_episode(args, player)
        player_start_time = time.time()

        while not player.done:
            player.sync_with_shared(shared_model)
            total_reward = run_episode(player, args, total_reward, model_options, True)
            loss = compute_loss(args, player, gpu_id, model_options)
            if compute_grad and loss['total_loss'] != 0:
                player.model.zero_grad()
                loss['total_loss'].backward()
                torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0)
                transfer_gradient_from_player_to_shared(player, shared_model, gpu_id)
                optimizer.step()
            if not player.done:
                reset_player(player)

        for k in loss:
            loss[k] = loss[k].item()

        end_episode(
            player,
            res_queue,
            title=num_to_name(int(player.episode.scene[9:])),
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
        )
        reset_player(player)

        episode_num = (episode_num + 1) % len(args.scene_types)

    player.exit()
コード例 #10
0
ファイル: nonadaptivea3c_train.py プロジェクト: cspatil8/savn
def nonadaptivea3c_train(
    rank,
    args,
    create_shared_model,
    shared_model,
    initialize_agent,
    optimizer,
    res_queue,
    end_flag,
):
    # print('Now Im in nonadaptivea3c_train')
    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types,
                                                 args.train_scenes)
    # print('We have glove embeddings and data wow')
    random.seed(args.seed + rank)
    idx = [j for j in range(len(args.scene_types))]
    random.shuffle(idx)
    # print('scene types have been shuffled')
    setproctitle.setproctitle("Training Agent: {}".format(rank))
    # print('some set proctitle bullshit')
    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    # print('something gpu id')
    import torch
    # print('Torch imported')
    torch.cuda.set_device(gpu_id)
    # print('Looks like we cannot set device, cuda is a bunch of fuckers for gpu id : {}'.format(gpu_id))
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        # print('gpu id > 0, who knows what happens next')
        torch.cuda.manual_seed(args.seed + rank)
    # print('Done doing gpu bullshit')
    player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id)
    # print('agent initialized')
    compute_grad = not isinstance(player, RandomNavigationAgent)
    # print('Something something compute gradient')
    model_options = ModelOptions()

    j = 0
    print('Right before while loop')
    while not end_flag.value:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        new_episode(args,
                    player,
                    scenes[idx[j]],
                    possible_targets,
                    targets[idx[j]],
                    glove=glove)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:
            # Make sure model is up to date.
            player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward,
                                       model_options, True)
            # Compute the loss.
            loss = compute_loss(args, player, gpu_id, model_options)
            if compute_grad:
                # Compute gradient.
                player.model.zero_grad()
                loss["total_loss"].backward()
                torch.nn.utils.clip_grad_norm_(player.model.parameters(),
                                               100.0)
                # Transfer gradient to shared model and step optimizer.
                transfer_gradient_from_player_to_shared(
                    player, shared_model, gpu_id)
                optimizer.step()
                # Clear actions and repackage hidden.
            if not player.done:
                reset_player(player)

        for k in loss:
            loss[k] = loss[k].item()

        end_episode(
            player,
            res_queue,
            title=args.scene_types[idx[j]],
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
        )
        reset_player(player)

        j = (j + 1) % len(args.scene_types)
    print('End of while loop')
    player.exit()