Esempio n. 1
0
def nonadaptivea3c_train(
    rank,
    args,
    create_shared_model,
    shared_model,
    initialize_agent,
    optimizer,
    res_queue,
    end_flag,
):
    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types,
                                                 args.train_scenes)

    random.seed(args.seed + rank)
    idx = [j for j in range(len(args.scene_types))]
    random.shuffle(idx)

    setproctitle.setproctitle("Training Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    import torch

    torch.cuda.set_device(gpu_id)

    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id)
    compute_grad = not isinstance(player, RandomNavigationAgent)

    model_options = ModelOptions()

    j = 0

    while not end_flag.value:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        new_episode(args,
                    player,
                    scenes[idx[j]],
                    possible_targets,
                    targets[idx[j]],
                    glove=glove)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:
            # Make sure model is up to date.
            player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward,
                                       model_options, True)
            # Compute the loss.
            loss = compute_loss(args, player, gpu_id, model_options)
            if compute_grad:
                # Compute gradient.
                player.model.zero_grad()
                loss["total_loss"].backward()
                torch.nn.utils.clip_grad_norm_(player.model.parameters(),
                                               100.0)
                # Transfer gradient to shared model and step optimizer.
                transfer_gradient_from_player_to_shared(
                    player, shared_model, gpu_id)
                optimizer.step()
                # Clear actions and repackage hidden.
            if not player.done:
                reset_player(player)

        for k in loss:
            loss[k] = loss[k].item()

        end_episode(
            player,
            res_queue,
            title=args.scene_types[idx[j]],
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
        )
        reset_player(player)

        j = (j + 1) % len(args.scene_types)

    player.exit()
Esempio n. 2
0
def nonadaptivea3c_val(
    rank,
    args,
    model_to_open,
    model_create_fn,
    initialize_agent,
    res_queue,
    max_count,
    scene_type,
):

    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types, args.val_scenes)
    num = name_to_num(scene_type)
    scenes = scenes[num]
    targets = targets[num]

    if scene_type == "living_room":
        args.max_episode_length = 200
    else:
        args.max_episode_length = 100

    setproctitle.setproctitle("Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    shared_model = model_create_fn(args)

    if model_to_open != "":
        saved_state = torch.load(
            model_to_open, map_location=lambda storage, loc: storage
        )
        shared_model.load_state_dict(saved_state['model'])

    player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id)
    player.sync_with_shared(shared_model)
    count = 0

    model_options = ModelOptions()

    j = 0

    while count < max_count:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        new_episode(args, player, scenes, possible_targets, targets, glove=glove)
        player_start_state = copy.deepcopy(player.environment.controller.state)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:
            # Make sure model is up to date.
            player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward, model_options, False)
            # Compute the loss.
            loss = compute_loss(args, player, gpu_id, model_options)
            if not player.done:
                reset_player(player)

        for k in loss:
            loss[k] = loss[k].item()
        spl, best_path_length = compute_spl(player, player_start_state)

        bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success)

        end_episode(
            player,
            res_queue,
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
            spl=spl,
            **bucketed_spl,
        )

        count += 1
        reset_player(player)

        j = (j + 1) % len(args.scene_types)

    player.exit()
    res_queue.put({"END": True})
Esempio n. 3
0
def savn_train(
    rank,
    args,
    create_shared_model,
    shared_model,
    initialize_agent,
    optimizer,
    res_queue,
    end_flag,
):

    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types,
                                                 args.train_scenes)

    random.seed(args.seed + rank)
    idx = [j for j in range(len(args.scene_types))]
    random.shuffle(idx)

    setproctitle.setproctitle("Training Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    import torch

    torch.cuda.set_device(gpu_id)
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id)

    model_options = ModelOptions()

    j = 0

    while not end_flag.value:

        start_time = time.time()
        new_episode(args,
                    player,
                    scenes[idx[j]],
                    possible_targets,
                    targets[idx[j]],
                    glove=glove)
        player.episode.exploring = True
        total_reward = 0
        player.eps_len = 0

        # theta <- shared_initialization
        params_list = [get_params(shared_model, gpu_id)]
        model_options.params = params_list[-1]
        loss_dict = {}
        reward_dict = {}
        episode_num = 0
        num_gradients = 0

        # Accumulate loss over all meta_train episodes.
        while True:
            # Run episode for k steps or until it is done or has made a mistake (if dynamic adapt is true).
            if args.verbose:
                print("New inner step")
            total_reward = run_episode(player, args, total_reward,
                                       model_options, True)

            if player.done:
                break

            if args.gradient_limit < 0 or episode_num < args.gradient_limit:

                num_gradients += 1

                # Compute the loss.
                learned_loss = compute_learned_loss(args, player, gpu_id,
                                                    model_options)

                if args.verbose:
                    print("inner gradient")
                inner_gradient = torch.autograd.grad(
                    learned_loss["learned_loss"],
                    [v for _, v in params_list[episode_num].items()],
                    create_graph=True,
                    retain_graph=True,
                    allow_unused=True,
                )

                params_list.append(
                    SGD_step(params_list[episode_num], inner_gradient,
                             args.inner_lr))
                model_options.params = params_list[-1]

                # reset_player(player)
                episode_num += 1

                for k, v in learned_loss.items():
                    loss_dict["{}/{:d}".format(k, episode_num)] = v.item()

        loss = compute_loss(args, player, gpu_id, model_options)

        for k, v in loss.items():
            loss_dict[k] = v.item()
        reward_dict["total_reward"] = total_reward

        if args.verbose:
            print("meta gradient")

        # Compute the meta_gradient, i.e. differentiate w.r.t. theta.
        meta_gradient = torch.autograd.grad(
            loss["total_loss"],
            [v for _, v in params_list[0].items()],
            allow_unused=True,
        )

        end_episode(
            player,
            res_queue,
            title=args.scene_types[idx[j]],
            episode_num=0,
            total_time=time.time() - start_time,
            total_reward=total_reward,
        )

        # Copy the meta_gradient to shared_model and step.
        transfer_gradient_to_shared(meta_gradient, shared_model, gpu_id)
        optimizer.step()
        reset_player(player)

        j = (j + 1) % len(args.scene_types)

    player.exit()
Esempio n. 4
0
def savn_val(
    rank,
    args,
    model_to_open,
    model_create_fn,
    initialize_agent,
    res_queue,
    max_count,
    scene_type,
    glove_file=None,
    img_file=None,
):

    # glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types,
                                                 args.val_scenes)
    num = name_to_num(scene_type)
    scenes = scenes[num]
    targets = targets[num]

    if scene_type == "living_room":
        args.max_episode_length = 200
    else:
        args.max_episode_length = 100

    setproctitle.setproctitle("Training Agent: {}".format(rank))

    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    import torch

    torch.cuda.set_device(gpu_id)

    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    shared_model = model_create_fn(args)

    if model_to_open is not None:
        saved_state = torch.load(model_to_open,
                                 map_location=lambda storage, loc: storage)
        shared_model.load_state_dict(saved_state)

    player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id)
    player.sync_with_shared(shared_model)
    count = 0

    player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id)

    player_actions = {}

    model_options = ModelOptions()

    while count < max_count:

        count += 1

        start_time = time.time()
        new_episode(args,
                    player,
                    scenes,
                    possible_targets,
                    targets,
                    glove=glove_file,
                    img_file=None)
        player_start_state = copy.deepcopy(player.environment.controller.state)
        if args.verbose:
            print(player_start_state)
        player.episode.exploring = True
        total_reward = 0
        player.eps_len = 0

        # theta <- shared_initialization
        params_list = [get_params(shared_model, gpu_id)]
        model_options.params = params_list[-1]
        loss_dict = {}
        reward_dict = {}
        episode_num = 0
        num_gradients = 0

        player_actions['scene'] = player.environment.scene_name
        player_actions['target_object'] = player.episode.task_data[0]
        player_actions['positions'] = []
        player_actions['positions'].append(str(player_start_state))

        while True:
            total_reward = run_episode(player, args, total_reward,
                                       model_options, False)
            player_actions['positions'].append(
                str(player.environment.controller.state))

            if player.done:
                break

            if args.gradient_limit < 0 or episode_num < args.gradient_limit:

                num_gradients += 1

                # Compute the loss.
                learned_loss = compute_learned_loss(args, player, gpu_id,
                                                    model_options)

                if args.verbose:
                    print("inner gradient")
                inner_gradient = torch.autograd.grad(
                    learned_loss["learned_loss"],
                    [v for _, v in params_list[episode_num].items()],
                    create_graph=True,
                    retain_graph=True,
                    allow_unused=True,
                )

                params_list.append(
                    SGD_step(params_list[episode_num], inner_gradient,
                             args.inner_lr))
                model_options.params = params_list[-1]

                reset_player(player)
                episode_num += 1

                for k, v in learned_loss.items():
                    loss_dict["{}/{:d}".format(k, episode_num)] = v.item()

        loss = compute_loss(args, player, gpu_id, model_options)
        player_actions['success'] = player.success

        for k, v in loss.items():
            loss_dict[k] = v.item()
        reward_dict["total_reward"] = total_reward

        spl, best_path_length = compute_spl(player, player_start_state)
        bucketed_spl = get_bucketed_metrics(spl, best_path_length,
                                            player.success, player.actions[-1],
                                            player.arrive)

        if args.record_route:
            with open(
                    '/home/duhm/Code/savn_deployment/players_action_test.json',
                    'a') as write_file:
                json.dump(player_actions, write_file)

        end_episode(
            player,
            res_queue,
            total_time=time.time() - start_time,
            spl=spl,
            **reward_dict,
            **bucketed_spl,
        )

        reset_player(player)

    player.exit()
    res_queue.put({"END": True})
Esempio n. 5
0
def nonadaptivea3c_train(
    rank,
    args,
    create_shared_model,
    shared_model,
    initialize_agent,
    optimizer,
    res_queue,
    end_flag,
):
    # print('Now Im in nonadaptivea3c_train')
    glove = Glove(args.glove_file)
    scenes, possible_targets, targets = get_data(args.scene_types,
                                                 args.train_scenes)
    # print('We have glove embeddings and data wow')
    random.seed(args.seed + rank)
    idx = [j for j in range(len(args.scene_types))]
    random.shuffle(idx)
    # print('scene types have been shuffled')
    setproctitle.setproctitle("Training Agent: {}".format(rank))
    # print('some set proctitle bullshit')
    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]
    # print('something gpu id')
    import torch
    # print('Torch imported')
    torch.cuda.set_device(gpu_id)
    # print('Looks like we cannot set device, cuda is a bunch of fuckers for gpu id : {}'.format(gpu_id))
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        # print('gpu id > 0, who knows what happens next')
        torch.cuda.manual_seed(args.seed + rank)
    # print('Done doing gpu bullshit')
    player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id)
    # print('agent initialized')
    compute_grad = not isinstance(player, RandomNavigationAgent)
    # print('Something something compute gradient')
    model_options = ModelOptions()

    j = 0
    print('Right before while loop')
    while not end_flag.value:

        # Get a new episode.
        total_reward = 0
        player.eps_len = 0
        new_episode(args,
                    player,
                    scenes[idx[j]],
                    possible_targets,
                    targets[idx[j]],
                    glove=glove)
        player_start_time = time.time()

        # Train on the new episode.
        while not player.done:
            # Make sure model is up to date.
            player.sync_with_shared(shared_model)
            # Run episode for num_steps or until player is done.
            total_reward = run_episode(player, args, total_reward,
                                       model_options, True)
            # Compute the loss.
            loss = compute_loss(args, player, gpu_id, model_options)
            if compute_grad:
                # Compute gradient.
                player.model.zero_grad()
                loss["total_loss"].backward()
                torch.nn.utils.clip_grad_norm_(player.model.parameters(),
                                               100.0)
                # Transfer gradient to shared model and step optimizer.
                transfer_gradient_from_player_to_shared(
                    player, shared_model, gpu_id)
                optimizer.step()
                # Clear actions and repackage hidden.
            if not player.done:
                reset_player(player)

        for k in loss:
            loss[k] = loss[k].item()

        end_episode(
            player,
            res_queue,
            title=args.scene_types[idx[j]],
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
        )
        reset_player(player)

        j = (j + 1) % len(args.scene_types)
    print('End of while loop')
    player.exit()