Example #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env', type=str, default='pong')
    parser.add_argument('--lambda-pi', type=float, default=1.0)
    parser.add_argument('--lambda-ve', type=float, default=1.0)
    parser.add_argument('--history-len', type=int, default=4)
    parser.add_argument('--seed', type=int, default=0)
    args = parser.parse_args()

    utils.seed_all(seed=args.seed)

    optimizer = tf.train.AdamOptimizer(learning_rate=5e-5,
                                       epsilon=1e-4,
                                       use_locking=True)

    a3c.execute(
        lambda: make_atari_env(args.env, args.history_len),
        AtariPolicy,
        optimizer,
        discount=0.99,
        lambda_pi=args.lambda_pi,
        lambda_ve=args.lambda_ve,
        entropy_bonus=0.01,
        max_sample_length=10,
        n_actors=16,
        max_timesteps=10000000,
        grad_clip=40.,
        log_every_n_steps=50000,
    )
Example #2
0
def main(args):

    logdir = init_logging(args)
    logger = logging.getLogger(__name__)

    args.logdir = logdir

    if args.cpu or not th.cuda.is_available():
        device = th.device('cpu')
    else:
        device = th.device('cuda')
        cudnn.enabled = True
        cudnn.benchmark = True

    if not args.devrun and not args.nosave:
        wandb.init(config=args, dir=logdir, project=args.project)

        if args.name is not None:
            wandb.run.name = args.name
        # else:
        #     wandb.run.name = wandb.run.id

    seed_all(args.seed)

    logger.info('Creating dataloader')
    loader = create_dataloader(args)

    logger.info('Creating model')
    model = create_model(args).to(device)

    logger.info('Creating optimiser')
    opt = create_optimiser(model.parameters(), args)

    logger.info('Creating loss')
    loss = create_loss(args)

    logger.info('Creating trainer')
    trainer = create_trainer(loader, model, opt, loss, device, args)

    epochs = args.epochs
    epoch_length = args.epoch_length

    logger.info('Starting trainer')
    wandb.watch(model, log="all", log_freq=1)
    trainer.run(loader['train'], max_epochs=epochs, epoch_length=epoch_length)
Example #3
0
def main():
    env_name = 'CartPole-v0'

    utils.seed_all(seed=0)

    optimizer = tf.train.AdamOptimizer(learning_rate=5e-5, use_locking=True)

    a3c.execute(
        lambda: make_env(env_name),
        CartPolePolicy,
        optimizer,
        discount=0.99,
        lambda_pi=1.0,
        lambda_ve=1.0,
        entropy_bonus=0.01,
        max_sample_length=20,
        n_actors=16,
        max_timesteps=1000000,
        log_every_n_steps=10000,
    )
Example #4
0
def main(argv, common_opts):
    args = parse_args(argv)
    seed_all(12345)
    init_algorithms(deterministic=True)
    torch.set_grad_enabled(False)

    device = common_opts['device']

    assert args.multicrop == False, 'TODO: Implement multi-crop for single image inference.'

    model = load_model(args.model).to(device).eval()

    input_specs: ImageSpecs = model.data_specs.input_specs

    image: PIL.Image.Image = PIL.Image.open(args.image, 'r')
    image.thumbnail((input_specs.width, input_specs.height))
    inp = input_specs.convert(image).to(device, torch.float32)

    output = model(inp[None, ...])[0]

    print(output)
    norm_skel3d = ensure_cartesian(output.to(CPU, torch.float64), d=3)

    fig = plt.figure(figsize=(16, 8))
    ax1 = fig.add_subplot(1, 2, 1)
    ax2: Axes3D = fig.add_subplot(1, 2, 2, projection='3d')

    #for x,y,z in output.to(CPU, torch.float64):
    #I    ax2.scatter(x,y,z)
    ax1.imshow(input_specs.unconvert(inp.to(CPU)))
    plot_skeleton_on_axes3d(norm_skel3d,
                            CanonicalSkeletonDesc,
                            ax2,
                            invert=True)

    plt.show()
Example #5
0
def experiment(device, args=None):
    """Train model.

    Args:
        device (str): device to use for training.
        args (dict): experiment arguments.
    """
    if args is None:
        args = dict

    train_config = args["train"]
    train_augmentations = albu.Compose(
        [
            albu.OneOf([
                albu.HueSaturationValue(hue_shift_limit=10,
                                        sat_shift_limit=35,
                                        val_shift_limit=25),
                albu.RandomGamma(),
                albu.CLAHE(),
            ]),
            albu.RandomBrightnessContrast(brightness_limit=[-0.3, 0.3],
                                          contrast_limit=[-0.3, 0.3],
                                          p=0.5),
            albu.OneOf([
                albu.Blur(),
                albu.MotionBlur(),
                albu.GaussNoise(),
                albu.ImageCompression(quality_lower=75)
            ]),
            albu.ShiftScaleRotate(shift_limit=0.0625,
                                  scale_limit=0.15,
                                  rotate_limit=10,
                                  border_mode=0,
                                  p=0.5),
            albu.Resize(300, 300),
            albu.Normalize(),
            ToTensorV2(),
        ],
        bbox_params=albu.BboxParams(
            "albumentations"
        ),  # 'albumentations' because x1, y1, x2, y2 in range [0, 1]
    )
    train_dataset = COCOFileDataset(train_config["annotations"],
                                    train_config["images_dir"],
                                    transforms=train_augmentations)
    train_loader = DataLoader(
        train_dataset,
        batch_size=train_config["batch_size"],
        num_workers=train_config["num_workers"],
        shuffle=True,
        drop_last=True,
    )
    logger.info("Train dataset information:")
    logger.info("\n" + train_dataset.info())

    valid_config = args["validation"]
    valid_augmentations = albu.Compose(
        [
            albu.Resize(300, 300),
            albu.Normalize(),
            ToTensorV2(),
        ],
        bbox_params=albu.BboxParams(
            format="albumentations"
        ),  # 'albumentations' because x1, y1, x2, y2 in range [0, 1]
    )
    valid_dataset = COCOFileDataset(valid_config["annotations"],
                                    valid_config["images_dir"],
                                    transforms=valid_augmentations)
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=valid_config["batch_size"],
        num_workers=valid_config["num_workers"],
        shuffle=False,
        drop_last=False,
    )
    logger.info("Validation dataset information:")
    logger.info("\n" + valid_dataset.info())

    model_config = args["model"]
    num_classes = model_config["num_classes"] + 1  # +1 for background class
    seed_all(42)
    model = SSD300(model_config["backbone"], num_classes)
    model = model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=1e-3 / 2)
    # optimizer = optim.SGD(model.parameters(), lr=2.6e-3, momentum=0.9, weight_decay=0.0005)
    epoch_scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, args["experiment"]["num_epochs"])
    batch_scheduler = None
    criterion = Loss(num_classes)

    experiment_config = args["experiment"]
    num_epochs = experiment_config["num_epochs"]
    for epoch_idx in range(1, num_epochs + 1):
        logger.info(f"Epoch: {epoch_idx}/{num_epochs}")
        train_metrics = train_fn(train_loader,
                                 model,
                                 device,
                                 criterion,
                                 optimizer,
                                 batch_scheduler,
                                 verbose=False)
        logger.info(f"     Train: {train_metrics}")

        # TODO: checkpoints
        valid_metrics = valid_fn(valid_loader,
                                 model,
                                 device,
                                 criterion,
                                 verbose=False)
        logger.info(f"Validation: {valid_metrics}")

        epoch_scheduler.step()

    export_to_onnx(model, torch.randn(1, 3, 300, 300),
                   experiment_config["onnx"])
    logger.info("Exported ONNX model to '{}'".format(
        experiment_config["onnx"]))
Example #6
0
def train_alg(model_alg, reset_optimizers_between_envs,
              reset_optimizers_every_iter, buffer_size, subsave, iteration,
              last_round_no_mer, is_evolving, seed):
    seed_all(seed)
    training_timesteps = META_TRAINING_TIMESTEPS
    params = params_list
    if not is_evolving:
        params = [params[-1]]

    start_time = time()
    env = gym.make(env_name)
    eval_env = gym.make(env_name)
    final_eval_env = gym.make(env_name)
    final_parameters_dict = params_sampler.sample1_means()
    change_env_parameters(final_eval_env, parameter_dict=final_parameters_dict)
    tensorboard_path = subsave + '/tb_' + str(iteration)

    optimizer_kwargs = {}
    policy_kwargs = {
        'optimizer_class': th.optim.Adam,
        'optimizer_kwargs': optimizer_kwargs,
    }
    model = model_alg(
        MlpPolicy,
        env,
        verbose=0,
        buffer_size=buffer_size,
        batch_size=BATCH_SIZE,
        learning_rate=LEARNING_RATE,
        learning_starts=LEARNING_STARTS,
        gradient_steps=GRADIENT_STEPS,
        policy_kwargs=policy_kwargs,
        mer_s=MER_S,
        mer_gamma=MER_GAMMA,
        monitor_wrapper=True,
        tensorboard_log=tensorboard_path,
        reset_optimizers_during_training=reset_optimizers_every_iter,
        seed=seed)

    for i_param, param in enumerate(params):
        log_name = 'run_' + str(i_param)
        if i_param == (len(params) - 1):
            if not is_evolving:
                training_timesteps = FINAL_TRAINING_TIMESTEPS + NUM_TRAINING_ENVS * META_TRAINING_TIMESTEPS
            else:
                training_timesteps = FINAL_TRAINING_TIMESTEPS
            log_name += '_final'
        change_env_parameters(env, eval_env, parameter_dict=param)
        if model_alg.__name__ == 'SACMER' and last_round_no_mer and (
                i_param == (len(params) - 1)):
            is_reservoir = False
            is_mer = False
        else:  # This will not have any effect on regular SAC
            is_reservoir = True
            is_mer = True
        model.update_env(env,
                         monitor_wrapper=False,
                         is_reservoir=is_reservoir,
                         reset_optimizers=reset_optimizers_between_envs
                         )  # environment already wrapped so
        # monitor_wrapper=False
        eval_callback = EvalCallback(eval_env,
                                     best_model_save_path=None,
                                     log_path=tensorboard_path + '/' +
                                     log_name + '/running_eval',
                                     eval_freq=EVAL_FREQ,
                                     n_eval_episodes=N_EVAL_EPISODES,
                                     deterministic=True,
                                     render=False)
        if is_evolving:
            final_eval_callback = EvalCallback(final_eval_env,
                                               best_model_save_path=None,
                                               log_path=tensorboard_path +
                                               '/' + log_name + '/final_eval',
                                               eval_freq=EVAL_FREQ,
                                               n_eval_episodes=N_EVAL_EPISODES,
                                               deterministic=True,
                                               render=False)
        else:
            final_eval_callback = EventCallback()
        model.learn(total_timesteps=training_timesteps,
                    log_interval=1,
                    reset_num_timesteps=False,
                    tb_log_name=log_name,
                    is_mer=is_mer,
                    callback=CallbackList([eval_callback,
                                           final_eval_callback]))
        env.reset()
        eval_env.reset()
    if iteration == 0:  # saving models fills up storage, so we only save one (which we will also probably not use)
        model.save(subsave + 'model_' + str(iteration))
    print(f"Done. Total time = {time() - start_time} seconds.")
Example #7
0
def main():
    args = parser.parse_args()

    utils.seed_all(args.seed)

    if args.on_server:
        # matplotlib without monitor
        matplotlib.use("Agg")

        # pygame without monitor
        os.environ["SDL_VIDEODRIVER"] = "dummy"

    #####for the logger
    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d_%H:%M:%S")
    ###################

    if not args.save_folder:
        print("Provide save folder.")
        exit()

    policy_net_dims = "-policy_net-"
    for dim in args.policy_net_hidden_dims:
        policy_net_dims += str(dim)
        policy_net_dims += "-"

    reward_net_dims = "-reward_net-"
    for dim in args.reward_net_hidden_dims:
        reward_net_dims += str(dim)
        reward_net_dims += "-"

    parent_dir = ("./results/" + str(args.save_folder) + st + policy_net_dims +
                  reward_net_dims)
    to_save = ("./results/" + str(args.save_folder) + st + policy_net_dims +
               reward_net_dims + "-reg-" + str(args.regularizer) + "-seed-" +
               str(args.seed) + "-lr-" + str(args.lr_irl))

    log_file = "Experiment_info.txt"

    experiment_logger = Logger(to_save, log_file)
    experiment_logger.log_header("Arguments for the experiment :")
    repo = git.Repo(search_parent_directories=True)
    experiment_logger.log_info({'From branch : ': repo.active_branch.name})
    experiment_logger.log_info({'Commit number : ': repo.head.object.hexsha})
    experiment_logger.log_info(vars(args))

    # from rlmethods.rlutils import LossBasedTermination
    # for rl
    from rlmethods.b_actor_critic import ActorCritic
    from rlmethods.soft_ac_pi import SoftActorCritic
    from rlmethods.soft_ac import SoftActorCritic as QSAC
    from rlmethods.rlutils import ReplayBuffer

    # for irl
    from irlmethods.deep_maxent import DeepMaxEnt
    import irlmethods.irlUtils as irlUtils
    from featureExtractor.gridworld_featureExtractor import (
        OneHot,
        LocalGlobal,
        SocialNav,
        FrontBackSideSimple,
    )

    agent_width = 10
    step_size = 2
    obs_width = 10
    grid_size = 10

    if args.feat_extractor is None:

        print("Feature extractor missing.")
        exit()

    # check for the feature extractor being used
    # initialize feature extractor
    if args.feat_extractor == "Onehot":
        feat_ext = OneHot(grid_rows=10, grid_cols=10)
    if args.feat_extractor == "SocialNav":
        feat_ext = SocialNav()
    if args.feat_extractor == "FrontBackSideSimple":
        feat_ext = FrontBackSideSimple(
            thresh1=1,
            thresh2=2,
            thresh3=3,
            thresh4=4,
            step_size=step_size,
            agent_width=agent_width,
            obs_width=obs_width,
        )

    if args.feat_extractor == "LocalGlobal":
        feat_ext = LocalGlobal(
            window_size=5,
            grid_size=grid_size,
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
        )

    if args.feat_extractor == "DroneFeatureSAM1":

        feat_ext = DroneFeatureSAM1(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            thresh1=5,
            thresh2=10,
        )

    if args.feat_extractor == "DroneFeatureRisk":

        feat_ext = DroneFeatureRisk(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            thresh1=15,
            thresh2=30,
        )

    if args.feat_extractor == "DroneFeatureRisk_v2":

        feat_ext = DroneFeatureRisk_v2(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            thresh1=15,
            thresh2=30,
        )

    if args.feat_extractor == "DroneFeatureRisk_speed":

        feat_ext = DroneFeatureRisk_speed(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            thresh1=10,
            thresh2=15,
        )

    if args.feat_extractor == "DroneFeatureRisk_speedv2":

        feat_ext = DroneFeatureRisk_speedv2(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            thresh1=18,
            thresh2=30,
        )

    if args.feat_extractor == 'VasquezF1':
        feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0)

    if args.feat_extractor == 'VasquezF2':
        feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0)

    if args.feat_extractor == 'VasquezF3':
        feat_ext = VasquezF3(agent_width)

    if args.feat_extractor == "Fahad":
        feat_ext = Fahad(36, 60, 0.5, 1.0)

    if args.feat_extractor == "GoalConditionedFahad":
        feat_ext = GoalConditionedFahad(36, 60, 0.5, 1.0)

    experiment_logger.log_header("Parameters of the feature extractor :")
    experiment_logger.log_info(feat_ext.__dict__)

    # initialize the environment
    if not args.dont_save and args.save_folder is None:
        print("Specify folder to save the results.")
        exit()
    """
    environment can now initialize without an annotation file
    if args.annotation_file is None:
        print('Specify annotation file for the environment.')
        exit()
    """
    if args.exp_trajectory_path is None:
        print("Specify expert trajectory folder.")
        exit()
    """
    env = GridWorld(display=args.render, is_onehot= False,is_random=False,
                    rows =10,
                    cols =10,
                    seed = 7,
                    obstacles = [np.asarray([5,5])],
                                
                    goal_state = np.asarray([1,5]))

    """

    env = GridWorld(
        display=args.render,
        is_random=True,
        rows=576,
        cols=720,
        agent_width=agent_width,
        step_size=step_size,
        obs_width=obs_width,
        width=grid_size,
        subject=args.subject,
        annotation_file=args.annotation_file,
        goal_state=None,
        step_wrapper=utils.step_wrapper,
        seed=args.seed,
        replace_subject=args.replace_subject,
        segment_size=args.segment_size,
        external_control=True,
        continuous_action=False,
        reset_wrapper=utils.reset_wrapper,
        consider_heading=True,
        is_onehot=False,
    )

    experiment_logger.log_header("Environment details :")
    experiment_logger.log_info(env.__dict__)

    # CHANGE HEREq

    # CHANGE HERE
    # initialize loss based termination
    # intialize RL method
    # CHANGE HERE

    if args.rl_method == "ActorCritic":
        rl_method = ActorCritic(
            env,
            feat_extractor=feat_ext,
            gamma=1,
            log_interval=args.rl_log_intervals,
            max_episode_length=args.rl_ep_length,
            hidden_dims=args.policy_net_hidden_dims,
            save_folder=to_save,
            lr=args.lr_rl,
            max_episodes=args.rl_episodes,
        )

    if args.rl_method == "SAC":
        if not env.continuous_action:
            print("The action space needs to be continuous for SAC to work.")
            exit()
        replay_buffer = ReplayBuffer(args.replay_buffer_size)

        rl_method = SoftActorCritic(
            env,
            replay_buffer,
            feat_ext,
            play_interval=500,
            learning_rate=args.lr_rl,
            buffer_sample_size=args.replay_buffer_sample_size,
        )

    if args.rl_method == "discrete_SAC":
        if not isinstance(env.action_space, gym.spaces.Discrete):
            print(
                "discrete SAC requires a discrete action space environmnet to work."
            )
            exit()

        replay_buffer = ReplayBuffer(args.replay_buffer_size)

        rl_method = QSAC(
            env,
            replay_buffer,
            feat_ext,
            args.replay_buffer_sample_size,
            learning_rate=args.lr_rl,
            entropy_tuning=True,
            entropy_target=0.3,
            play_interval=args.play_interval,
        )

    print("RL method initialized.")
    print(rl_method.policy)
    if args.policy_path is not None:
        rl_method.policy.load(args.policy_path)

    experiment_logger.log_header("Details of the RL method :")
    experiment_logger.log_info(rl_method.__dict__)

    # initialize IRL method
    # CHANGE HERE
    trajectory_path = args.exp_trajectory_path

    if args.scale_svf is None:
        scale = False

    if args.scale_svf:
        scale = args.scale_svf
    irl_method = DeepMaxEnt(
        trajectory_path,
        rlmethod=rl_method,
        env=env,
        iterations=args.irl_iterations,
        on_server=args.on_server,
        l1regularizer=args.regularizer,
        learning_rate=args.lr_irl,
        seed=args.seed,
        graft=False,
        scale_svf=scale,
        hidden_dims=args.reward_net_hidden_dims,
        clipping_value=args.clipping_value,
        enumerate_all=True,
        save_folder=parent_dir,
        rl_max_ep_len=args.rl_ep_length,
        rl_episodes=args.rl_episodes,
    )

    print("IRL method intialized.")
    print(irl_method.reward)

    experiment_logger.log_header("Details of the IRL method :")
    experiment_logger.log_info(irl_method.__dict__)

    smoothing_flag = False
    if args.svf_smoothing:
        smoothing_flag = True

    irl_method.train(smoothing=smoothing_flag)

    if not args.dont_save:
        pass
Example #8
0
def main():

    args = parser.parse_args()

    utils.seed_all(args.seed)

    if args.on_server:
        # matplotlib without monitor
        matplotlib.use('Agg')

        # pygame without monitor
        os.environ['SDL_VIDEODRIVER'] = 'dummy'
    from matplotlib import pyplot as plt

    save_folder = None
    if not args.dont_save:
        save_folder = './results/'+ args.save_folder
        experiment_logger = Logger(save_folder,'experiment_info.txt')

        experiment_logger.log_header('Arguments for the experiment :')
        experiment_logger.log_info(vars(args))
    

    mp.set_start_method('spawn')

    if args.render:
        from envs.gridworld import GridWorld
    else:
        from envs.gridworld_clockless import GridWorldClockless as GridWorld
        

    if args.feat_extractor=='MCFeatures':
        feat_ext = MCFeatures(args.state_discretization[0], args.state_discretization[1]) 

    elif args.feat_extractor=='MCFeaturesOnehot':
        feat_ext = MCFeaturesOnehot(args.state_discretization[0], args.state_discretization[1])

    else:
        print('Enter proper feature extractor value.')
        exit()

    if not args.dont_save:
        experiment_logger.log_header('Parameters of the feature extractor :')
        experiment_logger.log_info(feat_ext.__dict__)

    '''
    np.asarray([2,2]),np.asarray([7,4]),np.asarray([3,5]),
                                np.asarray([5,2]),np.asarray([8,3]),np.asarray([7,5]),
                                np.asarray([3,3]),np.asarray([3,7]),np.asarray([5,7])
                                
    env = GridWorld(display=args.render, is_onehot= False,is_random=True,
                    rows=100, agent_width=agent_width,step_size=step_size,
                    obs_width=obs_width,width=grid_size,
                    cols=100,
                    seed=7,
                    buffer_from_obs=0,
                    obstacles=3,
                                
                    goal_state=np.asarray([5,5]))
    '''
    env = gym.make('MountainCar-v0')
    env = env.unwrapped

    if not args.dont_save:

        experiment_logger.log_header('Environment details :')
        experiment_logger.log_info(env.__dict__)


    model = ActorCritic(env, feat_extractor=feat_ext,  gamma=0.99, plot_loss=False,
                        log_interval=10, max_ep_length=300, hidden_dims=args.policy_net_hidden_dims,
                        max_episodes=30, save_folder=save_folder)

    if not args.dont_save:

        experiment_logger.log_header('Details of the RL method :')
        experiment_logger.log_info(model.__dict__)
    
    #pdb.set_trace()

    if args.policy_path is not None:
        policy_file_list =  []
        reward_across_models = []
        if os.path.isfile(args.policy_path):
            policy_file_list.append(args.policy_path)
        if os.path.isdir(args.policy_path):
            policy_names = glob.glob(os.path.join(args.policy_path, '*.pt'))
            policy_file_list = sorted(policy_names, key=numericalSort)
        
        xaxis = np.arange(len(policy_file_list))

    if not args.play and not args.play_user:
        if args.reward_path is None:
            model.train_mp(n_jobs=4)
        else:

            from irlmethods.deep_maxent import RewardNet
            state_size = feat_ext.state_rep_size
            reward_net = RewardNet(state_size, args.policy_net_hidden_dims)
            reward_net.load(args.reward_path)
            print(next(reward_net.parameters()).is_cuda)
            model.train_mp(reward_net = reward_net,n_jobs = 4)

        if not args.dont_save:  
            model.policy.save(save_folder+'/policy/')

    if args.play:
        xaxis = []
        counter = 1
        print(policy_file_list)
        for policy_file in policy_file_list:

            model.policy.load(policy_file)

            env.tickSpeed = 15
            assert args.policy_path is not None, 'pass a policy to play from!'

            reward_across_models.append(model.generate_trajectory(args.num_trajs, args.render))

        #plotting the 2d list

            xaxis.append(counter)
            counter += 1
            reward_across_models_np = np.array(reward_across_models)
            mean_rewards = np.mean(reward_across_models_np, axis=1)
            std_rewards = np.std(reward_across_models_np, axis=1)
            plt.plot(xaxis,mean_rewards,color = 'r',label='IRL trained agent')
            plt.fill_between(xaxis , mean_rewards-std_rewards , 
                        mean_rewards+std_rewards, alpha = 0.5, facecolor = 'r')
            plt.draw()
            plt.pause(0.001)
            '''
            print('RAM usage :')
            display_memory_usage(process.memory_info().rss)
            print('GPU usage :')
            display_memory_usage(torch.cuda.memory_allocated())
            torch.cuda.empty_cache()
            display_memory_usage(torch.cuda.memory_allocated())
            '''
            #plt.show()
        plt.show()
    if args.play_user:
        env.tickSpeed = 200

        model.generate_trajectory_user(args.num_trajs, './trajs/ac_gridworld_user/')
Example #9
0
def pretrain_gn(args, config, seed=0):
    """
    more reading: https://pytorch.org/docs/stable/distributed.html
    """
    seed_all(seed)
    rank = 0
    gpus = [int(x) for x in args.gpus.split(',')]
    num_gpus = len(gpus)
    assert num_gpus == 1
    device = gpus[rank]
    print("[{}] Using GPU {} out of {} GPUS ({} available)".format(
        rank, device, device_count(), num_gpus))
    set_device(device)
    net, hyperparams, get_train_loader, get_val_loader, logger =\
        setup_pretrain(args, config)
    per_gpu_batch_size = int(hyperparams['pretrain_batch_size'] / num_gpus)

    if 'gn_update_steps' not in net.stats:
        net.stats['gn_update_steps'] = 0
    generator_criterion = MSELoss()
    for epoch in range(net.stats['epochs'],
                       hyperparams['pretrain_num_epochs']):
        print("=" * 25 + f"EPOCH {epoch}" + "=" * 25)
        # 1. Train
        net.train()
        mean_generator_loss_train = \
            optimize_generator(
                data_loader=get_train_loader(per_gpu_batch_size),
                net=net,
                generator_criterion=generator_criterion,
                logger=logger,
                tqdm_prefix='Train |',
                optimize=True)
        mean_generator_loss_train = mean_generator_loss_train.cpu().item(
        ) / num_gpus
        print(f"Train | G: {mean_generator_loss_train:.04f}")
        logger.log(data={
            'Train/Generator_Loss': mean_generator_loss_train,
        },
                   step=epoch)

        # 2. Validate
        with no_grad():
            net.eval()
            mean_generator_loss_val = \
                optimize_generator(
                    data_loader=get_val_loader(per_gpu_batch_size),
                    net=net,
                    generator_criterion=generator_criterion,
                    logger=logger,
                    tqdm_prefix='Val |',
                    optimize=False)
        mean_generator_loss_val = mean_generator_loss_val.cpu().item(
        ) / num_gpus
        print("Validation | G: {:.04f} ".format(mean_generator_loss_val))
        logger.log(data={
            'Validation/Generator_Loss': mean_generator_loss_val,
        },
                   step=epoch)

        net.save(
            epoch,
            '{}imprint_pretrain_gn_{:03d}.pth'.format(logger.logdir, epoch))
Example #10
0
def train(
    *,
    flow_constructor,
    logdir,
    lr_schedule,
    dropout_p,
    seed,
    init_bs,
    total_bs,
    val_total_bs,
    ema_decay,
    steps_per_log,
    epochs_per_val,
    max_grad_norm,
    dtype=tf.float32,
    scale_loss=None,
    restore_checkpoint=None,
    scale_grad=None,
    dataset='cifar10',
    steps_per_samples=2000,
):
    hvd, MPI, is_root, mpi_average = setup_horovod()

    # Seeding and logging setup
    seed_all(hvd.rank() + hvd.size() * seed)
    assert total_bs % hvd.size() == 0
    assert val_total_bs % hvd.size() == 0
    local_bs = total_bs // hvd.size()
    val_local_bs = val_total_bs // hvd.size()

    # Setting up the logger
    logger = None
    logdir = '{}_mpi{}_{}'.format(os.path.expanduser(logdir), hvd.size(),
                                  time.time())
    checkpointdir = os.path.join(logdir, 'checkpoints')
    if is_root:
        print('Floating point format:', dtype)
        pprint(locals())
        os.makedirs(logdir)
        os.makedirs(checkpointdir)
        logger = TensorBoardOutput(logdir)

    # Load data
    if is_root:
        # Load once on root first to prevent downloading conflicts
        print('Loading data')
        load_data(dataset=dataset, dtype=dtype.as_numpy_dtype)

    MPI.COMM_WORLD.Barrier()

    data_train, data_val = load_data(dataset=dataset,
                                     dtype=dtype.as_numpy_dtype)
    img_shp = list(data_train.shape[1:])
    H, W, Cx = img_shp
    bpd_scale_factor = 1. / (np.log(2) * np.prod(img_shp))
    if is_root:
        print('Training data: {}, Validation data: {}'.format(
            data_train.shape[0], data_val.shape[0]))
        print('Image shape:', img_shp)

    # Build graph
    if is_root: print('Building graph')
    dequant_flow, flow, posterior_flow = flow_constructor()

    # Data-dependent init
    if restore_checkpoint is None:
        if is_root: print('===== Init graph =====')
        x_init_sym = tf.placeholder(dtype, [init_bs] + img_shp)
        init_loss_sym, _ = build_forward(x=x_init_sym,
                                         dequant_flow=dequant_flow,
                                         flow=flow,
                                         posterior_flow=posterior_flow,
                                         flow_kwargs=dict(vcfg=VarConfig(
                                             init=True, ema=None, dtype=dtype),
                                                          dropout_p=dropout_p,
                                                          verbose=is_root))
        flops = int(get_flops()) / (10**9)
    # Training
    if is_root: print('===== Training graph =====')
    x_sym = tf.placeholder(dtype, [local_bs] + img_shp)
    loss_sym, _ = build_forward(x=x_sym,
                                dequant_flow=dequant_flow,
                                flow=flow,
                                posterior_flow=posterior_flow,
                                flow_kwargs=dict(vcfg=VarConfig(init=False,
                                                                ema=None,
                                                                dtype=dtype),
                                                 dropout_p=dropout_p,
                                                 verbose=is_root))

    # EMA
    params = tf.trainable_variables()
    if is_root: print_params()
    ema = tf.train.ExponentialMovingAverage(decay=ema_decay)
    maintain_averages_op = tf.group(ema.apply(params))
    # Op for setting the ema params to the current non-ema params (for use after data-dependent init)
    name2var = {v.name: v for v in tf.global_variables()}
    copy_params_to_ema = tf.group([
        name2var[p.name.replace(':0', '') +
                 '/ExponentialMovingAverage:0'].assign(p) for p in params
    ])

    val_x_sym = tf.placeholder(dtype, [val_local_bs] + img_shp)
    # Validation and sampling (with EMA)
    if is_root: print('===== Validation graph =====')
    val_flow_kwargs = dict(vcfg=VarConfig(init=False, ema=ema, dtype=dtype),
                           dropout_p=0.,
                           verbose=is_root)
    val_loss_sym, _ = build_forward(x=val_x_sym,
                                    dequant_flow=dequant_flow,
                                    flow=flow,
                                    posterior_flow=posterior_flow,
                                    flow_kwargs=val_flow_kwargs)
    # for debugging invertibility
    # val_inverr_sym = tf.reduce_max(tf.abs(dequant_x - flow.inverse(y, train_flow_kwargs)[0][:,:,:,:img_shp[-1]]))

    if is_root: print('===== Sampling graph =====')
    sample_flow_kwargs = dict(vcfg=VarConfig(init=False, ema=ema, dtype=dtype),
                              dropout_p=0,
                              verbose=is_root)
    samples_sym, _ = flow.sample(val_local_bs, sample_flow_kwargs)
    allgathered_samples_x_sym = hvd.allgather(tf.to_float(samples_sym))

    assert len(tf.trainable_variables()) == len(params)

    def run_validation(sess, i_step):
        data_val_shard = np.array_split(data_val, hvd.size(),
                                        axis=0)[hvd.rank()]
        shard_losses = np.concatenate([
            sess.run([val_loss_sym], {val_x_sym: val_batch})
            for val_batch, in iterbatches([data_val_shard],
                                          batch_size=val_local_bs,
                                          include_final_partial_batch=False)
        ])
        val_loss, total_count = mpi_average(shard_losses)
        samples = sess.run(allgathered_samples_x_sym)
        if is_root:
            logger.writekvs(
                [('val_bpd', bpd_scale_factor * val_loss),
                 ('num_val_examples', total_count * val_local_bs),
                 ('samples',
                  tile_imgs(np.clip(samples, 0, 255).astype(np.uint8)))],
                i_step)

    if is_root: print('===== Optimization graph =====')
    # Optimization
    lr_sym = tf.placeholder(dtype, [], 'lr')
    optimizer = hvd.DistributedOptimizer(tf.train.AdamOptimizer(lr_sym))

    if scale_loss is None:
        grads_and_vars = optimizer.compute_gradients(loss_sym, var_list=params)
    else:
        grads_and_vars = [(g / scale_loss, v)
                          for (g, v) in optimizer.compute_gradients(
                              loss_sym * scale_loss, var_list=params)]

    if scale_grad is not None:
        grads_and_vars = [(g / scale_grad, v) for (g, v) in grads_and_vars]
    if max_grad_norm is not None:
        clipped_grads, grad_norm_sym = tf.clip_by_global_norm(
            [g for (g, _) in grads_and_vars], max_grad_norm)
        grads_and_vars = [
            (cg, v) for (cg, (_, v)) in zip(clipped_grads, grads_and_vars)
        ]
    else:
        grad_norm_sym = tf.constant(0.)
    opt_sym = tf.group(optimizer.apply_gradients(grads_and_vars),
                       maintain_averages_op)

    def loop(sess: tf.Session):
        i_step = 0

        if is_root: print('Initializing')
        sess.run(tf.global_variables_initializer())
        if restore_checkpoint is not None:
            # Restore from checkpoint
            if is_root:
                saver = tf.train.Saver()
                print('Restoring checkpoint:', restore_checkpoint)
                restore_step = int(restore_checkpoint.split('-')[-1])
                print('Restoring from step:', restore_step)
                saver.restore(sess, restore_checkpoint)
                i_step = restore_step
            else:
                saver = None
        else:
            # No checkpoint: perform data dependent initialization
            if is_root: print('Data dependent init')
            init_loss = sess.run(
                init_loss_sym, {
                    x_init_sym:
                    data_train[np.random.randint(0, data_train.shape[0],
                                                 init_bs)]
                })
            if is_root: print('Init loss:', init_loss * bpd_scale_factor)
            sess.run(copy_params_to_ema)
            saver = tf.train.Saver() if is_root else None
        if is_root: print('Broadcasting initial parameters')
        sess.run(hvd.broadcast_global_variables(0))
        sess.graph.finalize()

        if is_root:
            print('Training')
            print(f'Total GFLOPS: {flops}')
            print_params()

        loss_hist = deque(maxlen=steps_per_log)
        gnorm_hist = deque(maxlen=steps_per_log)
        for i_epoch in range(99999999999):
            if i_epoch % epochs_per_val == 0:
                run_validation(sess, i_step=i_step)
                if saver is not None:
                    saver.save(sess,
                               os.path.join(checkpointdir, 'model'),
                               global_step=i_step)

            epoch_start_t = time.time()
            for i_epoch_step, (batch, ) in enumerate(
                    iterbatches(  # non-sharded: each gpu goes through the whole dataset
                        [data_train],
                        batch_size=local_bs,
                        include_final_partial_batch=False,
                    )):

                lr = lr_schedule(i_step)
                loss, gnorm, _ = sess.run([loss_sym, grad_norm_sym, opt_sym], {
                    x_sym: batch,
                    lr_sym: lr
                })
                loss_hist.append(loss)
                gnorm_hist.append(gnorm)

                # Skip timing the very first step, which will be unusually slow due to TF initialization
                if i_epoch == i_epoch_step == 0:
                    epoch_start_t = time.time()

                if i_step % steps_per_log == 0:
                    loss_hist_means = MPI.COMM_WORLD.gather(float(
                        np.mean(loss_hist)),
                                                            root=0)
                    gnorm_hist_means = MPI.COMM_WORLD.gather(float(
                        np.mean(gnorm_hist)),
                                                             root=0)
                    steps_per_sec = (i_epoch_step + 1) / (time.time() -
                                                          epoch_start_t)

                    if is_root:
                        kvs = [
                            ('iter', i_step),
                            ('epoch', i_epoch + i_epoch_step * local_bs /
                             data_train.shape[0]),  # epoch for this gpu
                            ('bpd',
                             float(
                                 np.mean(loss_hist_means) * bpd_scale_factor)),
                            ('gnorm', float(np.mean(gnorm_hist_means))),
                            ('lr', float(lr)),
                            # ('fps', steps_per_sec * total_bs),  # fps calculated over all gpus (this epoch)
                            ('sps', steps_per_sec),
                        ]
                        logger.writekvs(kvs, i_step)

                i_step += 1
            # End of epoch

    # Train
    config = tf.ConfigProto()
    # config.log_device_placement = True
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(
        hvd.local_rank())  # Pin GPU to local rank (one GPU per process)
    if is_root: print('===== Creating session =====')
    with tf.Session(config=config) as sess:
        loop(sess)
Example #11
0
def main():

    #####for the logger
    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
    ###################

    args = parser.parse_args()

    seed_all(args.seed)

    if args.on_server:

        matplotlib.use("Agg")
        # pygame without monitor
        os.environ["SDL_VIDEODRIVER"] = "dummy"

    from matplotlib import pyplot as plt

    mp.set_start_method("spawn")

    from rlmethods.b_actor_critic import ActorCritic
    from rlmethods.soft_ac import SoftActorCritic, QSoftActorCritic
    from rlmethods.rlutils import ReplayBuffer

    from envs.gridworld_drone import GridWorldDrone
    from featureExtractor.drone_feature_extractor import (
        DroneFeatureSAM1,
        DroneFeatureOccup,
        DroneFeatureRisk,
        DroneFeatureRisk_v2,
        VasquezF1,
        VasquezF2,
        VasquezF3,
        Fahad,
        GoalConditionedFahad,
    )
    from featureExtractor.gridworld_featureExtractor import (
        FrontBackSide,
        LocalGlobal,
        OneHot,
        SocialNav,
        FrontBackSideSimple,
    )
    from featureExtractor.drone_feature_extractor import (
        DroneFeatureRisk_speed,
        DroneFeatureRisk_speedv2,
    )

    from featureExtractor.drone_feature_extractor import VasquezF1

    save_folder = None

    if not args.dont_save and not args.play:

        if not args.save_folder:
            print("Provide save folder.")
            exit()

        policy_net_dims = "-policy_net-"
        for dim in args.policy_net_hidden_dims:
            policy_net_dims += str(dim)
            policy_net_dims += "-"

        reward_net_dims = "-reward_net-"
        for dim in args.reward_net_hidden_dims:
            reward_net_dims += str(dim)
            reward_net_dims += "-"

        save_folder = (
            "./results/"
            + args.save_folder
            + st
            + args.feat_extractor
            + "-seed-"
            + str(args.seed)
            + policy_net_dims
            + reward_net_dims
            + "-total-ep-"
            + str(args.total_episodes)
            + "-max-ep-len-"
            + str(args.max_ep_length)
        )

        experiment_logger = Logger(save_folder, "experiment_info.txt")
        experiment_logger.log_header("Arguments for the experiment :")
        repo = git.Repo(search_parent_directories=True)
        experiment_logger.log_info({'From branch : ' : repo.active_branch.name})
        experiment_logger.log_info({'Commit number : ' : repo.head.object.hexsha})
        experiment_logger.log_info(vars(args))

    window_size = 9
    step_size = 2
    agent_width = 10
    obs_width = 10
    grid_size = 10

    feat_ext = None
    # initialize the feature extractor to be used
    if args.feat_extractor == "Onehot":
        feat_ext = OneHot(grid_rows=10, grid_cols=10)
    if args.feat_extractor == "SocialNav":
        feat_ext = SocialNav(fieldList=["agent_state", "goal_state"])
    if args.feat_extractor == "FrontBackSideSimple":
        feat_ext = FrontBackSideSimple(
            thresh1=1,
            thresh2=2,
            thresh3=3,
            thresh4=4,
            step_size=step_size,
            agent_width=agent_width,
            obs_width=obs_width,
        )

    if args.feat_extractor == "LocalGlobal":
        feat_ext = LocalGlobal(
            window_size=11,
            grid_size=grid_size,
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
        )

    if args.feat_extractor == "DroneFeatureSAM1":

        feat_ext = DroneFeatureSAM1(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            thresh1=15,
            thresh2=30,
        )

    if args.feat_extractor == "DroneFeatureOccup":

        feat_ext = DroneFeatureOccup(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            window_size=window_size,
        )

    if args.feat_extractor == "DroneFeatureRisk":

        feat_ext = DroneFeatureRisk(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            show_agent_persp=False,
            thresh1=15,
            thresh2=30,
        )

    if args.feat_extractor == "DroneFeatureRisk_v2":

        feat_ext = DroneFeatureRisk_v2(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            show_agent_persp=False,
            thresh1=15,
            thresh2=30,
        )

    if args.feat_extractor == "DroneFeatureRisk_speed":

        feat_ext = DroneFeatureRisk_speed(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            show_agent_persp=False,
            return_tensor=False,
            thresh1=10,
            thresh2=15,
        )

    if args.feat_extractor == "DroneFeatureRisk_speedv2":

        feat_ext = DroneFeatureRisk_speedv2(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            show_agent_persp=False,
            return_tensor=False,
            thresh1=18,
            thresh2=30,
        )

    if args.feat_extractor == "VasquezF1":
        feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0)

    if args.feat_extractor == "VasquezF2":
        feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0)

    if args.feat_extractor == "VasquezF3":
        feat_ext = VasquezF3(agent_width)

    if args.feat_extractor == "Fahad":
        feat_ext = Fahad(36, 60, 0.5, 1.0)

    if args.feat_extractor == "GoalConditionedFahad":
        feat_ext = GoalConditionedFahad(36, 60, 0.5, 1.0)

    if feat_ext is None:
        print("Please enter proper feature extractor!")
        exit()
    # log feature extractor info

    if not args.dont_save and not args.play:

        experiment_logger.log_header("Parameters of the feature extractor :")
        experiment_logger.log_info(feat_ext.__dict__)

    # initialize the environment
    if args.replace_subject:
        replace_subject = True
    else:
        replace_subject = False

    env = GridWorldDrone(
        display=args.render,
        is_onehot=False,
        seed=args.seed,
        obstacles=None,
        show_trail=False,
        is_random=True,
        annotation_file=args.annotation_file,
        subject=args.subject,
        tick_speed=60,
        obs_width=10,
        step_size=step_size,
        agent_width=agent_width,
        replace_subject=replace_subject,
        segment_size=args.segment_size,
        external_control=True,
        step_reward=0.001,
        show_comparison=True,
        consider_heading=True,
        show_orientation=True,
        # rows=200, cols=200, width=grid_size)
        rows=576,
        cols=720,
        width=grid_size,
    )

    # env = gym.make('Acrobot-v1')
    # log environment info
    if not args.dont_save and not args.play:

        experiment_logger.log_header("Environment details :")
        experiment_logger.log_info(env.__dict__)

    # initialize RL

    if args.rl_method == "ActorCritic":
        model = ActorCritic(
            env,
            feat_extractor=feat_ext,
            gamma=1,
            log_interval=100,
            max_episode_length=args.max_ep_length,
            hidden_dims=args.policy_net_hidden_dims,
            save_folder=save_folder,
            lr=args.lr,
            entropy_coeff=args.entropy_coeff,
            max_episodes=args.total_episodes,
        )

    if args.rl_method == "SAC":

        replay_buffer = ReplayBuffer(args.replay_buffer_size)

        model = SoftActorCritic(
            env,
            replay_buffer,
            feat_ext,
            buffer_sample_size=args.replay_buffer_sample_size,
            entropy_tuning=True,
            play_interval=args.play_interval,
            entropy_target=args.entropy_target,
            gamma=args.gamma,
            learning_rate=args.lr,
        )

    if args.rl_method == "discrete_QSAC":

        replay_buffer = ReplayBuffer(args.replay_buffer_size)

        model = QSoftActorCritic(
            env,
            replay_buffer,
            feat_ext,
            buffer_sample_size=args.replay_buffer_sample_size,
            entropy_tuning=True,
            play_interval=args.play_interval,
            entropy_target=args.entropy_target,
            gamma=args.gamma,
            learning_rate=args.lr,
        )
    # log RL info
    if not args.dont_save and not args.play:

        experiment_logger.log_header("Details of the RL method :")
        experiment_logger.log_info(model.__dict__)

    if args.policy_path is not None:

        from debugtools import numericalSort

        policy_file_list = []
        reward_across_models = []
        # print(args.policy_path)
        if os.path.isfile(args.policy_path):
            policy_file_list.append(args.policy_path)
        if os.path.isdir(args.policy_path):
            policy_names = glob.glob(os.path.join(args.policy_path, "*.pt"))
            policy_file_list = sorted(policy_names, key=numericalSort)

        xaxis = np.arange(len(policy_file_list))

    if not args.play and not args.play_user:
        # no playing of any kind, so training

        if args.reward_path is None:

            if args.policy_path:
                model.policy.load(args.policy_path)

            if args.rl_method == "SAC" or args.rl_method == "discrete_QSAC":
                model.train(args.total_episodes, args.max_ep_length)

            else:
                model.train()

        else:
            from irlmethods.deep_maxent import RewardNet

            state_size = feat_ext.extract_features(env.reset()).shape[0]
            reward_net = RewardNet(state_size, args.reward_net_hidden_dims)
            reward_net.load(args.reward_path)
            print(next(reward_net.parameters()).is_cuda)
            model.train(reward_net=reward_net)

        if not args.dont_save:
            model.policy.save(save_folder + "/policy-models/")

    if args.play:
        # env.tickSpeed = 15
        from debugtools import compile_results

        xaxis = []
        counter = 1
        plt.figure(0)
        avg_reward_list = []
        frac_good_run_list = []
        print(policy_file_list)
        for policy_file in policy_file_list:

            print("Playing for policy :", policy_file)
            model.policy.load(policy_file)
            policy_folder = policy_file.strip().split("/")[0:-2]
            save_folder = ""
            for p in policy_folder:
                save_folder = save_folder + p + "/"

            print("The final save folder ", save_folder)
            # env.tickSpeed = 10
            assert args.policy_path is not None, "pass a policy to play from!"
            if args.exp_trajectory_path is not None:
                from irlmethods.irlUtils import calculate_expert_svf

                expert_svf = calculate_expert_svf(
                    args.exp_trajectory_path,
                    max_time_steps=args.max_ep_length,
                    feature_extractor=feat_ext,
                    gamma=1,
                )
            # reward_across_models.append(model.generate_trajectory(args.num_trajs, args.render))
            if args.exp_trajectory_path is None:

                if args.dont_save:
                    rewards, state_info, sub_info = model.generate_trajectory(
                        args.num_trajs, args.render
                    )
                else:
                    rewards, state_info, sub_info = model.generate_trajectory(
                        args.num_trajs,
                        args.render,
                        store_raw=args.store_raw_states,
                        path=save_folder + "/agent_generated_trajectories/",
                    )
            else:

                if args.dont_save:
                    rewards, state_info, sub_info = model.generate_trajectory(
                        args.num_trajs, args.render, expert_svf=expert_svf
                    )
                else:
                    rewards, state_info, sub_info = model.generate_trajectory(
                        args.num_trajs,
                        args.render,
                        path=save_folder + "/agent_generated_trajectories/",
                        expert_svf=expert_svf,
                    )

            avg_reward, good_run_frac = compile_results(
                rewards, state_info, sub_info
            )

            avg_reward_list.append(avg_reward)
            frac_good_run_list.append(good_run_frac)
            plt.plot(avg_reward_list, c="r")
            plt.plot(frac_good_run_list, c="g")
            plt.draw()
        plt.show()

    if args.play_user:
        env.tickSpeed = 200

        model.generate_trajectory_user(
            args.num_trajs, args.render, path="./user_generated_trajectories/"
        )
from args import args
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from neural import get_optimizer_scheduler, get_model_loss
from loops import train_fn, valid_fn, test_fn
import wandb
import random
from utils import get_learning_rate, isclose, seed_all
from test import get_test_samples
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
seed_all(args)

model_directory = "{}{}".format(args.model,
                                "_" + args.name if args.name else "")
model_directory = f"../models/{model_directory}/"
if not os.path.exists(model_directory):
    os.makedirs(model_directory)

train = pd.read_csv("../input/train.csv")
# train_nocall = pd.read_csv("../input/env/nocall.csv")
# train_nocall["folder"] = "env/audio/"

train_le = LabelEncoder().fit(train.ebird_code.values)
train["folder"] = "train_audio"
train["ebird_label"] = train_le.transform(train.ebird_code.values)
mapping = pd.Series(train.ebird_code.values,
Example #13
0
def main(args):
    torch.backends.cudnn.benchmark = True
    seed_all(args.seed)

    d = Dataset(train_set_size=args.train_set_sz,
                num_cls=args.num_cls,
                remove_nan_center=False)
    train = d.train_set
    valid = d.test_set

    num_cls = args.num_cls + 1  # +1 for background
    net = UNet(in_dim=1, out_dim=num_cls).cuda()
    best_net = UNet(in_dim=1, out_dim=num_cls)
    best_val_dice = -np.inf
    best_cls_val_dices = None

    optimizer = torch.optim.Adam(params=net.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    scheduler_warmup = GradualWarmupScheduler(optimizer,
                                              multiplier=10,
                                              total_epoch=50,
                                              after_scheduler=None)

    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir, exist_ok=True)

    writer = tensorboardX.SummaryWriter(log_dir=args.log_dir)

    step = 1
    for epoch in range(1, args.n_epochs + 1):
        for iteration in range(
                1,
                int(np.ceil(train.dataset_sz() / args.batch_sz)) + 1):

            net.train()

            imgs, masks, one_hot_masks, centers, _, _, _, _ = train.next_batch(
                args.batch_sz)
            imgs = make_batch_input(imgs)
            imgs = torch.cuda.FloatTensor(imgs)
            one_hot_masks = torch.cuda.FloatTensor(one_hot_masks)

            pred_logit = net(imgs)
            pred_softmax = F.softmax(pred_logit, dim=1)

            if args.use_ce:
                ce = torch.nn.CrossEntropyLoss()
                loss = ce(pred_logit, torch.cuda.LongTensor(masks))
            else:
                loss = dice_loss(pred_softmax,
                                 one_hot_masks,
                                 keep_background=False).mean()

            scheduler_warmup.step()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if step % args.log_freq == 0:
                print(
                    f"step={step}\tepoch={epoch}\titer={iteration}\tloss={loss.data.cpu().numpy()}"
                )
                writer.add_scalar("cnn_dice_loss",
                                  loss.data.cpu().numpy(), step)
                writer.add_scalar("lr", optimizer.param_groups[0]["lr"], step)

            if step % args.train_eval_freq == 0:
                train_dice, cls_train_dices = do_eval(net, train.images,
                                                      train.onehot_masks,
                                                      args.batch_sz, num_cls)
                train_dice = train_dice.cpu().numpy()
                cls_train_dices = cls_train_dices.cpu().numpy()
                writer.add_scalar("train_dice", train_dice, step)
                # lr_sched.step(1-train_dice)
                for j, cls_train_dice in enumerate(cls_train_dices):
                    writer.add_scalar(f"train_dice/{j}", cls_train_dice, step)
                print(
                    f"step={step}\tepoch={epoch}\titer={iteration}\ttrain_eval: train_dice={train_dice}"
                )

            if step % args.val_eval_freq == 0:
                _pickle.dump(
                    net.state_dict(),
                    open(os.path.join(args.log_dir, 'model.pth.tar'), 'wb'))
                val_dice, cls_val_dices = do_eval(net, valid.images,
                                                  valid.onehot_masks,
                                                  args.batch_sz, num_cls)
                val_dice = val_dice.cpu().numpy()
                cls_val_dices = cls_val_dices.cpu().numpy()
                writer.add_scalar("val_dice", val_dice, step)
                for j, cls_val_dice in enumerate(cls_val_dices):
                    writer.add_scalar(f"val_dice/{j}", cls_val_dice, step)
                print(
                    f"step={step}\tepoch={epoch}\titer={iteration}\tvalid_dice={val_dice}"
                )
                if val_dice > best_val_dice:
                    best_val_dice = val_dice
                    best_cls_val_dices = cls_val_dices
                    best_net.load_state_dict(net.state_dict().copy())
                    _pickle.dump(
                        best_net.state_dict(),
                        open(os.path.join(args.log_dir, 'best_model.pth.tar'),
                             'wb'))
                    f = open(
                        os.path.join(args.log_dir, f"best_val_dice{step}.txt"),
                        'w')
                    f.write(str(best_val_dice) + "\n")
                    f.write(" ".join([
                        str(dice_score) for dice_score in best_cls_val_dices
                    ]))
                    f.close()
                    print(f"better val dice detected.")
                # if step % 5000 == 0:
                #     _pickle.dump(net.state_dict(), open(os.path.join(args.log_dir, '{}.pth.tar'.format(step)),
                #                                         'wb'))

            step += 1

    return best_val_dice, best_cls_val_dices
import utils

# Set up parameters and output dir.
params = utils.load_params(mode='wlnn')  # based on terminal input
params['script'] = 'run-wlnn-mnist.py'
writer, out_dir = utils.init_output(params,
                                    overwrite=params['overwrite_output'])
os.makedirs(os.path.join(out_dir, 'networks'))  # dir to store all networks

if params['use_cuda'] and not torch.cuda.is_available():
    logging.info('use_cuda was set but cuda is not available, running on cpu')
    params['use_cuda'] = False
device = 'cuda' if params['use_cuda'] else 'cpu'

# Ensure deterministic computation.
utils.seed_all(0)

### Ensure that runs are reproducible even on GPU. Note, this slows down training!
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Load dataset.
train_images, train_labels, test_images, test_labels = load_preprocessed_dataset(
    params['dataset'], flatten_images=True, use_torch=True)
train_dataset = torch.utils.data.TensorDataset(train_images, train_labels)
test_dataset = torch.utils.data.TensorDataset(test_images, test_labels)

# Create initial population.
# TODO: Make train_only_outputs a learning_rule.
train_only_outputs = (params['train_only_outputs']
                      or params['learning_rule'] == 'hebbian')
def train(
    *,
    flow_constructor,
    logdir,
    lr_schedule,
    dropout_p,
    seed,
    init_bs,
    total_bs,
    ema_decay,
    steps_per_log,
    max_grad_norm,
    dtype=tf.float32,
    scale_loss=None,
    dataset='imagenet32',
    steps_per_samples=20000,
    steps_per_dump=5000,
    n_epochs=2,
    restore_checkpoint=None,
    dump_samples_to_tensorboard=True,
    save_jpg=True,
):

    import horovod.tensorflow as hvd

    # Initialize Horovod
    hvd.init()
    # Verify that MPI multi-threading is supported.
    assert hvd.mpi_threads_supported()

    from mpi4py import MPI

    assert hvd.size() == MPI.COMM_WORLD.Get_size()

    is_root = hvd.rank() == 0

    def mpi_average(local_list):
        local_list = list(map(float, local_list))
        sums = MPI.COMM_WORLD.gather(sum(local_list), root=0)
        counts = MPI.COMM_WORLD.gather(len(local_list), root=0)
        sum_counts = sum(counts) if is_root else None
        avg = (sum(sums) / sum_counts) if is_root else None
        return avg, sum_counts

    # Seeding and logging setup
    seed_all(hvd.rank() + hvd.size() * seed)
    assert total_bs % hvd.size() == 0
    local_bs = total_bs // hvd.size()

    logger = None
    logdir = '{}_mpi{}_{}'.format(os.path.expanduser(logdir), hvd.size(),
                                  time.time())
    checkpointdir = os.path.join(logdir, 'checkpoints')
    profiledir = os.path.join(logdir, 'profiling')
    if is_root:
        print('Floating point format:', dtype)
        pprint(locals())
        os.makedirs(logdir)
        os.makedirs(checkpointdir)
        os.makedirs(profiledir)
        logger = TensorBoardOutput(logdir)

    # Load data
    assert dataset in ['imagenet32', 'imagenet64', 'imagenet64_5bit']

    if is_root:
        print('Loading data')
    MPI.COMM_WORLD.Barrier()
    if dataset == 'imagenet32':
        """The dataset as a npy file on RAM. There are as many copies as number of MPI threads. 
           This isn't effficient and tf.Records would be better to read from disk. 
           This is just done to ensure bits/dim reported are perfect and no data loading bugs creep in.
           However, the dataset is quite small resolution and even 8 MPI threads can work on 40GB RAM."""
        data_train = np.load('../train_32x32.npy')
        data_val = np.load('../valid_32x32.npy')
        assert data_train.dtype == 'uint8'
        assert np.max(data_train) <= 255
        assert np.min(data_train) >= 0
        assert np.max(data_val) <= 255
        assert np.min(data_val) >= 0
        assert data_val.dtype == 'uint8'
    elif dataset == 'imagenet64':
        """The dataset as a npy file on RAM. There are as many copies as number of MPI threads. 
           This isn't effficient and tf.Records would be better to read from disk. 
           This is just done to ensure bits/dim reported are perfect and no data loading bugs creep in.
           If you don't have enough CPU RAM to run 8 threads, run it with fewer threads and adjust batch-size / model-size tradeoff accordingly."""
        data_train = np.load('../train_64x64.npy')
        data_val = np.load('../valid_64x64.npy')
        assert data_train.dtype == 'uint8'
        assert np.max(data_train) <= 255
        assert np.min(data_train) >= 0
        assert np.max(data_val) <= 255
        assert np.min(data_val) >= 0
    elif dataset == 'imagenet64_5bit':
        """Similar loading as above. Quantized to 5-bit while loading."""
        if is_root:
            data_train = np.load('../train_64x64.npy')
            data_train = np.floor(data_train / 8.)
            data_train = data_train.astype('uint8')
            assert np.max(data_train) <= 31
            assert np.min(data_train) >= 0
            np.save('../train_64x64_5bit.npy', data_train)
            del data_train
            data_val = np.load('../valid_64x64.npy')
            data_val = np.floor(data_val / 8.)
            data_val = data_val.astype('uint8')
            assert np.max(data_val) <= 31
            assert np.min(data_val) >= 0
            np.save('../valid_64x64_5bit.npy', data_val)
            del data_val
        MPI.COMM_WORLD.Barrier()
        data_train = np.load('../train_64x64_5bit.npy')
        data_val = np.load('../valid_64x64_5bit.npy')
    data_train = data_train.astype(dtype.as_numpy_dtype)
    data_val = data_val.astype(dtype.as_numpy_dtype)
    img_shp = list(data_train.shape[1:])
    if dataset == 'imagenet32':
        assert img_shp == [32, 32, 3]
    else:
        assert img_shp == [64, 64, 3]
    if is_root:
        print('Training data: {}, Validation data: {}'.format(
            data_train.shape[0], data_val.shape[0]))
        print('Image shape:', img_shp)
    bpd_scale_factor = 1. / (np.log(2) * np.prod(img_shp))

    # Build graph
    if is_root: print('Building graph')
    dequant_flow, flow, posterior_flow = flow_constructor()
    # Data-dependent init
    if restore_checkpoint is None:
        if is_root: print('===== Init graph =====')
        x_init_sym = tf.placeholder(dtype, [init_bs] + img_shp)
        init_syms, _ = build_forward(x=x_init_sym,
                                     dequant_flow=dequant_flow,
                                     flow=flow,
                                     posterior_flow=posterior_flow,
                                     flow_kwargs=dict(init=True,
                                                      dropout_p=dropout_p,
                                                      verbose=is_root))
    # Training
    if is_root: print('===== Training graph =====')
    x_sym = tf.placeholder(dtype, [local_bs] + img_shp)
    loss_sym, _ = build_forward(x=x_sym,
                                dequant_flow=dequant_flow,
                                flow=flow,
                                posterior_flow=posterior_flow,
                                flow_kwargs=dict(dropout_p=dropout_p,
                                                 verbose=is_root))

    # EMA
    params = tf.trainable_variables()
    if is_root:
        print('Parameters',
              sum(np.prod(p.get_shape().as_list()) for p in params))
    ema = tf.train.ExponentialMovingAverage(decay=ema_decay)
    maintain_averages_op = tf.group(ema.apply(params))
    # Op for setting the ema params to the current non-ema params (for use after data-dependent init)
    name2var = {v.name: v for v in tf.global_variables()}
    copy_params_to_ema = tf.group([
        name2var[p.name.replace(':0', '') +
                 '/ExponentialMovingAverage:0'].assign(p) for p in params
    ])

    # Validation and sampling (with EMA)
    if is_root: print('===== Validation graph =====')
    val_loss_sym, _ = build_forward(x=x_sym,
                                    dequant_flow=dequant_flow,
                                    flow=flow,
                                    posterior_flow=posterior_flow,
                                    flow_kwargs=dict(dropout_p=0,
                                                     ema=ema,
                                                     verbose=is_root))
    # for debugging invertibility
    # val_inverr_sym = tf.reduce_max(tf.abs(
    #     val_dequant_x_sym - flow.inverse(val_y_sym, dropout_p=0, ema=ema, verbose=is_root)[0]
    # ))

    if is_root: print('===== Sampling graph =====')
    samples_sym, _ = flow.sample(local_bs,
                                 flow_kwargs=dict(dropout_p=0.,
                                                  ema=ema,
                                                  verbose=is_root))
    allgathered_samples_sym = hvd.allgather(tf.to_float(samples_sym))

    assert len(tf.trainable_variables()) == len(params)

    def run_sampling(sess,
                     i_step,
                     *,
                     prefix=dataset,
                     dump_to_tensorboard=True,
                     save_jpg=False):
        samples = sess.run(allgathered_samples_sym)
        if is_root:
            print('samples gathered from the session')
            if dataset == 'imagenet64_5bit':
                """Quantized values. So different kind of sampling needed here."""
                samples = np.floor(np.clip(samples, 0, 31))
                samples = samples * 8
                samples = samples.astype('uint8')
            # np.save('samples_' + prefix + '.npy', samples)
            # if save_jpg:
            # samples = tile_imgs(np.floor(np.clip(samples, 0, 255)).astype('uint8'))
            # cv2.imwrite('samples_' + prefix + '_' + str(i_step) + '.jpg', samples)
            if dump_to_tensorboard:
                """You can turn this off if tensorboard crashes for sample dumps. You can view the samples from the npy file anyway"""
                logger.writekvs(
                    [('samples',
                      tile_imgs(np.clip(samples, 0, 255).astype(np.uint8)))],
                    i_step)

    def run_validation(sess, i_step):
        data_val_shard = np.array_split(data_val, hvd.size(),
                                        axis=0)[hvd.rank()]
        shard_losses = np.concatenate([
            sess.run([val_loss_sym], {x_sym: val_batch})
            for val_batch, in iterbatches([data_val_shard],
                                          batch_size=local_bs,
                                          include_final_partial_batch=False)
        ])
        val_loss, total_count = mpi_average(shard_losses)
        if is_root:
            logger.writekvs([('val_bpd', bpd_scale_factor * val_loss),
                             ('num_val_examples', total_count * local_bs)],
                            i_step)

    # Optimization
    lr_sym = tf.placeholder(dtype, [], 'lr')
    optimizer = hvd.DistributedOptimizer(tf.train.AdamOptimizer(lr_sym))
    if scale_loss is None:
        grads_and_vars = optimizer.compute_gradients(loss_sym, var_list=params)
    else:
        grads_and_vars = [(g / scale_loss, v)
                          for (g, v) in optimizer.compute_gradients(
                              loss_sym * scale_loss, var_list=params)]
    if max_grad_norm is not None:
        clipped_grads, grad_norm_sym = tf.clip_by_global_norm(
            [g for (g, _) in grads_and_vars], max_grad_norm)
        grads_and_vars = [
            (cg, v) for (cg, (_, v)) in zip(clipped_grads, grads_and_vars)
        ]
    else:
        grad_norm_sym = tf.constant(0.)
    opt_sym = tf.group(optimizer.apply_gradients(grads_and_vars),
                       maintain_averages_op)

    def loop(sess: tf.Session):
        i_step = 0
        i_step_lr = 0
        if is_root: print('Initializing')
        sess.run(tf.global_variables_initializer())
        # if is_root:
        #     logger.write_graph(sess.graph)

        if restore_checkpoint is not None:
            """If restoring from an existing checkpoint whose path is specified in the launcher"""
            restore_step = int(restore_checkpoint.split('-')[-1])
            if is_root:
                saver = tf.train.Saver()
                print('Restoring checkpoint:', restore_checkpoint)
                print('Restoring from step:', restore_step)
                saver.restore(sess, restore_checkpoint)
                print('Loaded checkpoint')
            else:
                saver = None
            i_step = restore_step
            """You could re-start with the warm-up or start from wherever the checkpoint stopped depending on what is needed.
               If the session had to be stopped due to NaN/Inf, warm-up from a most recent working checkpoint is recommended.
               If it was because of Horovod Crash / Machine Shut down, re-starting from the same LR can be done in which case
               you need to uncomment the blow line. By default, it warms up."""
            i_step_lr = restore_step
        else:
            if is_root: print('Data dependent init')
            sess.run(
                init_syms, {
                    x_init_sym:
                    data_train[np.random.randint(0, data_train.shape[0],
                                                 init_bs)]
                })
            sess.run(copy_params_to_ema)
            saver = tf.train.Saver() if is_root else None
        if is_root: print('Broadcasting initial parameters')
        sess.run(hvd.broadcast_global_variables(0))
        sess.graph.finalize()

        if is_root:
            print('Training')
            print(
                'Parameters(M)',
                sum(np.prod(p.get_shape().as_list())
                    for p in params) / 1024. / 1024.)

        loss_hist = deque(maxlen=steps_per_log)
        """ 2 epochs are sufficient to see good results on Imagenet.
            After 2 epochs, gains are marginal, but important for good bits/dim."""
        for i_epoch in range(n_epochs):
            epoch_start_t = time.time()
            for i_epoch_step, (batch, ) in enumerate(
                    iterbatches(  # non-sharded: each gpu goes through the whole dataset
                        [data_train],
                        batch_size=local_bs,
                        include_final_partial_batch=False,
                    )):
                lr = lr_schedule(i_step_lr)
                loss, _ = sess.run(
                    [loss_sym, opt_sym],
                    {
                        x_sym: batch,
                        lr_sym: lr
                    },
                )
                loss_hist.append(loss)

                if i_epoch == i_epoch_step == 0:
                    epoch_start_t = time.time()

                if i_step % steps_per_log == 0:
                    loss_hist_means = MPI.COMM_WORLD.gather(float(
                        np.mean(loss_hist)),
                                                            root=0)
                    steps_per_sec = (i_epoch_step + 1) / (time.time() -
                                                          epoch_start_t)
                    if is_root:
                        kvs = [
                            ('iter', i_step),
                            ('epoch', i_epoch + i_epoch_step * local_bs /
                             data_train.shape[0]),  # epoch for this gpu
                            ('bpd',
                             float(
                                 np.mean(loss_hist_means) * bpd_scale_factor)),
                            ('lr', float(lr)),
                            ('fps', steps_per_sec * total_bs
                             ),  # fps calculated over all gpus (this epoch)
                            ('sps', steps_per_sec),
                        ]
                        logger.writekvs(kvs, i_step)
                """You could pass the validation for Imagenet because the val set is reasonably big.
                    It is extremely hard to overfit on Imagenet (if you manage to, let us know). 
                    So, skipping the validation throughout the training and validating at the end with the
                    most recent checkpoint would be okay and good for wall clock time.
                    You could also have steps_per_val specified in the launcher pretty high to find a balance."""

                if i_step > 0 and i_step % steps_per_samples == 0 and i_step_lr > 0:
                    run_sampling(
                        sess,
                        i_step=i_step,
                        dump_to_tensorboard=dump_samples_to_tensorboard,
                        save_jpg=save_jpg)
                    print('Run Validation...')
                    run_validation(sess, i_step)

                if i_step % steps_per_dump == 0 and i_step > 0 and i_step_lr > 0:
                    if saver is not None:
                        saver.save(sess,
                                   os.path.join(checkpointdir, 'model'),
                                   global_step=i_step)

                i_step += 1
                i_step_lr += 1
            # End of epoch

    # Train
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(
        hvd.local_rank())  # Pin GPU to local rank (one GPU per process)
    with tf.Session(config=config) as sess:
        loop(sess)
Example #16
0
def evaluate(
    *,
    flow_constructor,
    seed,
    restore_checkpoint,
    total_bs,
    iw_samples=4096,
    dtype=tf.float32,
    dataset='cifar10',
    samples_filename='samples.png',
):
    hvd, MPI, is_root, mpi_average = setup_horovod()

    restore_checkpoint = os.path.expanduser(restore_checkpoint)

    # Seeding and logging setup
    seed_all(hvd.rank() + hvd.size() * seed)
    assert total_bs % hvd.size() == 0
    local_bs = total_bs // hvd.size()
    assert iw_samples % total_bs == 0

    if is_root:
        print('===== EVALUATING {} ({} IW samples) ====='.format(
            restore_checkpoint, iw_samples))

    # Load data
    if is_root:
        # Load once on root first to prevent downloading conflicts
        print('Loading data')
        load_data(dataset=dataset, dtype=dtype.as_numpy_dtype)
    MPI.COMM_WORLD.Barrier()
    data_train, data_val = load_data(dataset=dataset,
                                     dtype=dtype.as_numpy_dtype)
    img_shp = list(data_train.shape[1:])
    H, W, Cx = img_shp
    bpd_scale_factor = 1. / (np.log(2) * np.prod(img_shp))
    if is_root:
        print('Training data: {}, Validation data: {}'.format(
            data_train.shape[0], data_val.shape[0]))
        print('Image shape:', img_shp)

    # Build graph
    if is_root: print('Building graph')
    dequant_flow, flow, posterior_flow = flow_constructor()
    x_sym = tf.placeholder(dtype, [local_bs] + img_shp)
    # This is a fake training graph. Just used to mimic flow_training, so we can load from the saver
    build_forward(x=x_sym,
                  dequant_flow=dequant_flow,
                  flow=flow,
                  posterior_flow=posterior_flow,
                  flow_kwargs=dict(vcfg=VarConfig(init=False,
                                                  ema=None,
                                                  dtype=dtype),
                                   dropout_p=0,
                                   verbose=is_root)
                  # note dropout is 0: it doesn't matter
                  )

    # EMA
    params = tf.trainable_variables()
    if is_root: print_params()
    ema = tf.train.ExponentialMovingAverage(
        decay=0.9999999999999)  # ema turned off
    maintain_averages_op = tf.group(ema.apply(params))

    # Validation and sampling (with EMA)
    if is_root: print('===== Validation graph =====')
    val_flow_kwargs = dict(vcfg=VarConfig(init=False, ema=ema, dtype=dtype),
                           dropout_p=0.,
                           verbose=is_root)
    val_loss_sym, val_logratio_sym = build_forward(
        x=x_sym,
        dequant_flow=dequant_flow,
        flow=flow,
        posterior_flow=posterior_flow,
        flow_kwargs=val_flow_kwargs)

    allgathered_val_logratios_sym = hvd.allgather(val_logratio_sym)
    # for debugging invertibility
    # val_dequant_x_sym_rep = tf.reshape(tf.tile(tf.expand_dims(val_dequant_x_sym, 0), [sampling_times, 1, 1, 1, 1]), [-1] + val_dequant_x_sym.shape.as_list()[1:])
    # val_inverr_sym = tf.reduce_max(tf.abs(val_dequant_x_sym_rep - flow.inverse(val_y_sym, **val_flow_kwargs)[0][:,:,:,:img_shp[-1]]))

    if is_root: print('===== Sampling graph =====')
    samples_sym, _ = flow.sample(64, val_flow_kwargs)
    allgathered_samples_x_sym = hvd.allgather(tf.to_float(samples_sym))

    assert len(tf.trainable_variables()) == len(params)

    def run_iw_eval(sess):
        if is_root:
            print('Running IW eval with {} samples...'.format(iw_samples))
        # Go through one example at a time
        all_val_losses = []
        for i_example in (trange if is_root else range)(len(data_val)):
            # take this single example and tile it
            batch_x = np.tile(data_val[i_example, None, ...],
                              (local_bs, 1, 1, 1))
            # repeatedly evaluate logd for the IWAE bound
            batch_logratios = np.concatenate([
                sess.run(allgathered_val_logratios_sym, {x_sym: batch_x})
                for _ in range(iw_samples // total_bs)
            ]).astype(np.float64)
            assert batch_logratios.shape == (iw_samples, )
            # log [1/n \sum_i exp(r_i)] = log [exp(-b) 1/n \sum_i exp(r_i + b)] = -b + log [1/n \sum_i exp(r_i + b)]
            shift = batch_logratios.max()
            all_val_losses.append(
                -bpd_scale_factor *
                (shift + np.log(np.mean(np.exp(batch_logratios - shift)))))
            if i_example % 100 == 0 and is_root:
                print(i_example, np.mean(all_val_losses))
        if is_root:
            print(f'Final ({len(data_val)}):', np.mean(all_val_losses))

    def run_standard_eval(sess):
        if is_root:
            print('Running standard eval...')
        # Standard validation (single sample)
        data_val_shard = np.array_split(data_val, hvd.size(),
                                        axis=0)[hvd.rank()]
        shard_losses = np.concatenate([
            sess.run([val_loss_sym], {x_sym: val_batch})
            for val_batch, in iterbatches([data_val_shard],
                                          batch_size=local_bs,
                                          include_final_partial_batch=False)
        ])
        val_loss, total_count = mpi_average(shard_losses)
        if is_root:
            for k, v in [
                ('val_bpd', bpd_scale_factor * val_loss),
                ('num_val_examples', total_count * local_bs),
            ]:
                print(k, v)

    def run_sampling_only(sess):
        samples = sess.run(allgathered_samples_x_sym)
        if is_root:
            from PIL import Image
            Image.fromarray(
                tile_imgs(np.clip(samples, 0, 255).astype(
                    np.uint8))).save(samples_filename)
            print('Saved {} samples to {}'.format(len(samples),
                                                  samples_filename))
            # print('Sampled in {} seconds'.format(sample_time))

    # Run
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(
        hvd.local_rank())  # Pin GPU to local rank (one GPU per process)
    with tf.Session(config=config) as sess:
        if is_root: print('Initializing')
        sess.run(tf.global_variables_initializer())
        # Restore from checkpoint
        if is_root:
            print('Restoring checkpoint:', restore_checkpoint)
            saver = tf.train.Saver()
            saver.restore(sess, restore_checkpoint)
            print('Broadcasting initial parameters')
        sess.run(hvd.broadcast_global_variables(0))
        sess.graph.finalize()

        if samples_filename:
            run_sampling_only(sess)

        # Make sure data is the same on all MPI processes
        tmp_inds = [0, 183, 3, 6, 20, 88]
        check_batch = np.ascontiguousarray(data_val[tmp_inds])
        gathered_batches = np.zeros(
            (hvd.size(),
             *check_batch.shape), check_batch.dtype) if is_root else None
        MPI.COMM_WORLD.Gather(check_batch, gathered_batches, root=0)
        if is_root:
            assert all(
                np.allclose(check_batch, b)
                for b in gathered_batches), 'data must be in the same order!'
            print('data ordering ok')

        # Run validation
        run_standard_eval(sess)
        run_iw_eval(sess)
def evaluate(
    *,
    flow_constructor,
    seed,
    restore_checkpoint,
    total_bs,
    iw_samples=1024,  # 4096 is too slow for ImageNet
    dtype=tf.float32,
    dataset='imagenet32',
    samples_filename='samples.png',
    extra_dims=3,
):
    import horovod.tensorflow as hvd

    # Initialize Horovod
    hvd.init()
    # Verify that MPI multi-threading is supported.
    assert hvd.mpi_threads_supported()

    from mpi4py import MPI

    assert hvd.size() == MPI.COMM_WORLD.Get_size()

    is_root = hvd.rank() == 0

    def mpi_average(local_list):
        local_list = list(map(float, local_list))
        sums = MPI.COMM_WORLD.gather(sum(local_list), root=0)
        counts = MPI.COMM_WORLD.gather(len(local_list), root=0)
        sum_counts = sum(counts) if is_root else None
        avg = (sum(sums) / sum_counts) if is_root else None
        return avg, sum_counts

    restore_checkpoint = os.path.expanduser(restore_checkpoint)

    # Seeding and logging setup
    seed_all(hvd.rank() + hvd.size() * seed)
    assert total_bs % hvd.size() == 0
    local_bs = total_bs // hvd.size()
    assert iw_samples % total_bs == 0

    if is_root:
        print('===== EVALUATING {} ({} IW samples) ====='.format(
            restore_checkpoint, iw_samples))

    # Load data
    assert dataset in ['imagenet32', 'imagenet64', 'imagenet64_5bit']

    if is_root:
        print('Loading data')
    MPI.COMM_WORLD.Barrier()
    if dataset == 'imagenet32':
        """The dataset as a npy file on RAM. There are as many copies as number of MPI threads. 
           This isn't effficient and tf.Records would be better to read from disk. 
           This is just done to ensure bits/dim reported are perfect and no data loading bugs creep in.
           However, the dataset is quite small resolution and even 8 MPI threads can work on 40GB RAM."""
        # data_train = np.load('../train_32x32.npy')
        data_val = np.load('../valid_32x32.npy')
        # assert data_train.dtype == 'uint8'
        # assert np.max(data_train) <= 255
        # assert np.min(data_train) >= 0
        assert np.max(data_val) <= 255
        assert np.min(data_val) >= 0
        assert data_val.dtype == 'uint8'
    elif dataset == 'imagenet64':
        """The dataset as a npy file on RAM. There are as many copies as number of MPI threads. 
           This isn't effficient and tf.Records would be better to read from disk. 
           This is just done to ensure bits/dim reported are perfect and no data loading bugs creep in.
           If you don't have enough CPU RAM to run 8 threads, run it with fewer threads and adjust batch-size / model-size tradeoff accordingly."""
        data_train = np.load('../train_64x64.npy')
        data_val = np.load('../valid_64x64.npy')
        assert data_train.dtype == 'uint8'
        assert np.max(data_train) <= 255
        assert np.min(data_train) >= 0
        assert np.max(data_val) <= 255
        assert np.min(data_val) >= 0
    elif dataset == 'imagenet64_5bit':
        """Similar loading as above. Quantized to 5-bit while loading."""
        if is_root:
            data_train = np.load('../train_64x64.npy')
            data_train = np.floor(data_train / 8.)
            data_train = data_train.astype('uint8')
            assert np.max(data_train) <= 31
            assert np.min(data_train) >= 0
            np.save('../train_64x64_5bit.npy', data_train)
            del data_train
            data_val = np.load('../valid_64x64.npy')
            data_val = np.floor(data_val / 8.)
            data_val = data_val.astype('uint8')
            assert np.max(data_val) <= 31
            assert np.min(data_val) >= 0
            np.save('../valid_64x64_5bit.npy', data_val)
            del data_val
        MPI.COMM_WORLD.Barrier()
        data_train = np.load('../train_64x64_5bit.npy')
        data_val = np.load('../valid_64x64_5bit.npy')
    # data_train = data_train.astype(dtype.as_numpy_dtype)
    data_val = data_val.astype(dtype.as_numpy_dtype)
    img_shp = list(data_val.shape[1:])
    if dataset == 'imagenet32':
        assert img_shp == [32, 32, 3]
    else:
        assert img_shp == [64, 64, 3]
    if is_root:
        # print('Training data: {}, Validation data: {}'.format(data_train.shape[0], data_val.shape[0]))
        print('Image shape:', img_shp)
    bpd_scale_factor = 1. / (np.log(2) * np.prod(img_shp))

    # Build graph
    if is_root: print('Building graph')
    dequant_flow, flow, posterior_flow = flow_constructor()
    x_sym = tf.placeholder(dtype, [local_bs] + img_shp)
    # This is a fake training graph. Just used to mimic flow_training, so we can load from the saver
    build_forward(x=x_sym,
                  dequant_flow=dequant_flow,
                  flow=flow,
                  posterior_flow=posterior_flow,
                  flow_kwargs=dict(init=False,
                                   ema=None,
                                   dropout_p=0,
                                   verbose=is_root)
                  # note dropout is 0: it doesn't matter
                  )

    # EMA
    params = tf.trainable_variables()
    if is_root:
        print('Parameters',
              sum(np.prod(p.get_shape().as_list()) for p in params))
    ema = tf.train.ExponentialMovingAverage(
        decay=0.9999999999999)  # ema turned off
    maintain_averages_op = tf.group(ema.apply(params))

    # Validation and sampling (with EMA)
    if is_root: print('===== Validation graph =====')
    val_flow_kwargs = dict(init=False, dropout_p=0, ema=ema, verbose=is_root)
    val_loss_sym, val_logratio_sym, val_dequant_x_sym = build_forward(
        x=x_sym,
        dequant_flow=dequant_flow,
        flow=flow,
        posterior_flow=posterior_flow,
        flow_kwargs=val_flow_kwargs)

    allgathered_val_logratios_sym = hvd.allgather(val_logratio_sym)
    # for debugging invertibility
    # val_inverr_sym = tf.reduce_max(tf.abs(
    #     val_dequant_x_sym - flow.inverse(val_y_sym, dropout_p=0, ema=ema, verbose=is_root)[0]
    # ))

    if is_root: print('===== Sampling graph =====')
    samples_sym, _ = flow.sample(local_bs, flow_kwargs=val_flow_kwargs)
    allgathered_samples_sym = hvd.allgather(tf.to_float(samples_sym))

    assert len(tf.trainable_variables()) == len(params)

    def run_iw_eval(sess):
        if is_root:
            print('Running IW eval with {} samples...'.format(iw_samples))
        # Go through one example at a time
        all_val_losses = []
        for i_example in (trange if is_root else range)(len(data_val)):
            # take this single example and tile it
            batch_x = np.tile(data_val[i_example, None, ...],
                              (local_bs, 1, 1, 1))
            # repeatedly evaluate logd for the IWAE bound
            batch_logratios = np.concatenate([
                sess.run(allgathered_val_logratios_sym, {x_sym: batch_x})
                for _ in range(iw_samples // total_bs)
            ]).astype(np.float64)
            assert batch_logratios.shape == (iw_samples, )
            # log [1/n \sum_i exp(r_i)] = log [exp(-b) 1/n \sum_i exp(r_i + b)] = -b + log [1/n \sum_i exp(r_i + b)]
            shift = batch_logratios.max()
            all_val_losses.append(
                -bpd_scale_factor *
                (shift + np.log(np.mean(np.exp(batch_logratios - shift)))))
            if i_example % 100 == 0 and is_root:
                print(i_example, np.mean(all_val_losses))
        if is_root:
            print(f'Final ({len(data_val)}):', np.mean(all_val_losses))

    def run_sampling_only(sess,
                          *,
                          prefix=dataset,
                          dump_to_tensorboard=True,
                          save_jpg=False):
        samples = sess.run(allgathered_samples_sym)
        if is_root:
            print('samples gathered from the session')
            if dataset == 'imagenet64_5bit':
                """Quantized values. So different kind of sampling needed here."""
                samples = np.floor(np.clip(samples, 0, 31))
                samples = samples * 8
                samples = samples.astype('uint8')
            # np.save('samples_' + prefix + '.npy', samples)
            import cv2
            samples = tile_imgs(
                np.floor(np.clip(samples, 0, 255)).astype('uint8'))
            cv2.imwrite(samples_filename, samples)

    def run_validation(sess):
        data_val_shard = np.array_split(data_val, hvd.size(),
                                        axis=0)[hvd.rank()]
        shard_losses, shard_corr = zip(*[
            sess.run([val_loss_sym, val_corr_sym], {x_sym: val_batch})
            for val_batch, in iterbatches([data_val_shard],
                                          batch_size=local_bs,
                                          include_final_partial_batch=False)
        ])
        val_loss, total_count = mpi_average(shard_losses)
        val_corr, _ = mpi_average(shard_corr)
        if is_root:
            for k, v in [
                ('val_bpd', bpd_scale_factor * val_loss),
                ('val_corr', val_corr),
                ('num_val_examples', total_count * local_bs),
            ]:
                print(k, v)

    # Run
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(
        hvd.local_rank())  # Pin GPU to local rank (one GPU per process)
    with tf.Session(config=config) as sess:
        if is_root: print('Initializing')
        sess.run(tf.global_variables_initializer())
        # Restore from checkpoint
        if is_root:
            print('Restoring checkpoint:', restore_checkpoint)
            saver = tf.train.Saver()
            saver.restore(sess, restore_checkpoint)
            print('Broadcasting initial parameters')
        sess.run(hvd.broadcast_global_variables(0))
        sess.graph.finalize()

        # if samples_filename:
        # run_sampling_only(sess)

        # Make sure data is the same on all MPI processes
        tmp_inds = [0, 183, 3, 6, 20, 88]
        check_batch = np.ascontiguousarray(data_val[tmp_inds])
        gathered_batches = np.zeros(
            (hvd.size(),
             *check_batch.shape), check_batch.dtype) if is_root else None
        MPI.COMM_WORLD.Gather(check_batch, gathered_batches, root=0)
        if is_root:
            assert all(
                np.allclose(check_batch, b)
                for b in gathered_batches), 'data must be in the same order!'
            print('data ordering ok')

        # Run validation
        run_validation(sess)
        run_iw_eval(sess)
Example #18
0
    parser.add_argument("--method_name", type=str, default="ftl")

    # ours
    parser.add_argument("--k_ratio", type=float, default=0.2)
    parser.add_argument("--lr_ratio", type=float, default=1e-3)

    # precision
    parser.add_argument("--precision", type=float, default=0.2)

    # jocor
    parser.add_argument("--forget_rate", type=float, default=0.2)
    parser.add_argument("--co_lambda", type=float, default=0.9)

    args = parser.parse_args()

    seed_all(args.seed)

    device = f"cuda:{args.gpu}"

    if args.dataset_name in ["mnist", "cifar10", "cifar100", "tiny-imagenet"]:
        epochs = 201
        epoch_decay_start = 80
        batch_size = 128
        learning_rate = 1e-3

        mom1 = 0.9
        mom2 = 0.1
        alpha_plan = [learning_rate] * epochs
        beta1_plan = [mom1] * epochs
        for i in range(epoch_decay_start, epochs):
            alpha_plan[i] = (
Example #19
0
# -*- coding: utf-8 -*-

seed_all()
device = get_device()

from utils import seed_all, get_device
from models import GetEncodings, SearchSimilar
bart_tokenizer


# Inference
def inference(question, bart_tokenizer, bart_model, df_context, model_op,
              MODEL_STORE):

    # Get Pretrained BERT encodings
    ge = GetEncodings(MODEL_STORE=MODEL_STORE, type='questions')
    encoded_question = ge.encode(question, max_length=30)

    # Find top matching documents
    ss = SearchSimilar(iterator=df_context['context'].values.tolist(),
                       filename='index.bin',
                       embeddings=model_op,
                       shape=768,
                       device=device)
    similar_contexts = ss.get_n_similar_vectors(encoded_question, 3)
    similar_contexts.insert(0, question)

    combined_tokens = '</s></s>'.join(similar_contexts)

    print(f'Top similar document outputs is {combined_tokens}')
Example #20
0
def main(args):
    torch.backends.cudnn.benchmark = True
    seed_all(args.seed)

    num_classes = 1

    d = Dataset(train_set_size=args.train_set_sz, num_cls=num_classes)
    train = d.train_set
    valid = d.test_set

    net = UNet(in_dim=1, out_dim=4).cuda()
    snake_approx_net = UNet(in_dim=1,
                            out_dim=1,
                            wf=3,
                            padding=True,
                            first_layer_pad=None,
                            depth=4,
                            last_layer_resize=True).cuda()
    best_val_dice = -np.inf

    optimizer = torch.optim.Adam(params=net.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    snake_approx_optimizer = torch.optim.Adam(
        params=snake_approx_net.parameters(),
        lr=args.lr,
        weight_decay=args.weight_decay)
    scheduler_warmup = GradualWarmupScheduler(optimizer,
                                              multiplier=10,
                                              total_epoch=50,
                                              after_scheduler=None)

    # load model
    if args.ckpt:
        loaded = _pickle.load(open(args.ckpt, 'rb'))
        net.load_state_dict(loaded[0])
        optimizer.load_state_dict(loaded[1])
        snake_approx_net.load_state_dict(loaded[2])
        snake_approx_optimizer.load_state_dict(loaded[3])

    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir, exist_ok=True)

    writer = tensorboardX.SummaryWriter(log_dir=args.log_dir)
    snake = SnakePytorch(args.delta, args.batch_sz * args.num_samples,
                         args.num_lines, args.radius)
    snake_eval = SnakePytorch(args.delta, args.batch_sz, args.num_lines,
                              args.radius)
    noises = torch.zeros(
        (args.batch_sz, args.num_samples, args.num_lines, args.radius)).cuda()

    step = 1
    start = timeit.default_timer()
    for epoch in range(1, args.n_epochs + 1):
        for iteration in range(
                1,
                int(np.ceil(train.dataset_sz() / args.batch_sz)) + 1):

            scheduler_warmup.step()

            imgs, masks, onehot_masks, centers, dts_modified, dts_original, jitter_radius, bboxes = \
                train.next_batch(args.batch_sz)

            xs = make_batch_input(imgs)
            xs = torch.cuda.FloatTensor(xs)

            net.train()
            unet_logits = net(xs)

            center_jitters, angle_jitters = [], []
            for img, mask, center in zip(imgs, masks, centers):
                c_j, a_j = get_random_jitter_by_mask(mask, center, [1],
                                                     args.theta_jitter)
                if not args.use_center_jitter:
                    c_j = np.zeros_like(c_j)
                center_jitters.append(c_j)
                angle_jitters.append(a_j)

            center_jitters = np.asarray(center_jitters)
            angle_jitters = np.asarray(angle_jitters)

            # args.radius + 1 because we need additional outermost points for the gradient
            gs_logits_whole_img = unet_logits[:, 3, ...]
            gs_logits, coords_r, coords_c = get_star_pattern_values(
                gs_logits_whole_img,
                None,
                centers,
                args.num_lines,
                args.radius + 1,
                center_jitters=center_jitters,
                angle_jitters=angle_jitters)

            # currently only class 1 is foreground
            # if there's multiple foreground classes use a for loop
            gs = gs_logits[:, :,
                           1:] - gs_logits[:, :, :-1]  # compute the gradient

            noises.normal_(
                0, 1
            )  # noises here is only used for random exploration so no need mirrored sampling
            gs_noisy = torch.unsqueeze(gs, 1) + noises

            def batch_eval_snake(snake, inputs, batch_sz):
                n_inputs = len(inputs)
                assert n_inputs % batch_sz == 0
                n_batches = int(np.ceil(n_inputs / batch_sz))
                ind_sets = []
                for j in range(n_batches):
                    inps = inputs[j * batch_sz:(j + 1) * batch_sz]
                    batch_ind_sets = snake(inps).data.cpu().numpy()
                    ind_sets.append(batch_ind_sets)
                ind_sets = np.concatenate(ind_sets, 0)
                return ind_sets

            gs_noisy = gs_noisy.reshape((args.batch_sz * args.num_samples,
                                         args.num_lines, args.radius))
            ind_sets = batch_eval_snake(snake, gs_noisy,
                                        args.batch_sz * args.num_samples)
            ind_sets = ind_sets.reshape(
                (args.batch_sz * args.num_samples, args.num_lines))
            ind_sets = np.expand_dims(
                smooth_ind(ind_sets, args.smoothing_window), -1)

            # loss layers
            m = torch.nn.LogSoftmax(dim=1)
            loss = torch.nn.NLLLoss()

            # ===========================================================================
            # Inner loop: Train dice loss prediction network
            snake_approx_net.train()
            for _ in range(args.dice_approx_train_steps):

                snake_approx_logits = snake_approx_net(
                    gs_noisy.reshape(args.batch_sz * args.num_samples, 1,
                                     args.num_lines, args.radius).detach())
                snake_approx_train_loss = loss(
                    m(snake_approx_logits.squeeze().transpose(2, 1)),
                    torch.cuda.LongTensor(ind_sets.squeeze()))
                snake_approx_optimizer.zero_grad()
                snake_approx_train_loss.backward()
                snake_approx_optimizer.step()
            # ===========================================================================

            # ===========================================================================
            # Now, minimize the approximate dice loss
            snake_approx_net.eval()

            gt_indices = []
            for mask, center, cj, aj in zip(masks, centers, center_jitters,
                                            angle_jitters):
                gt_ind = mask_to_indices(mask, center, args.radius,
                                         args.num_lines, cj, aj)
                gt_indices.append(gt_ind)
            gt_indices = np.asarray(gt_indices).astype(int)

            gt_indices = gt_indices.reshape((args.batch_sz, args.num_lines))
            gt_indices = torch.cuda.LongTensor(gt_indices)

            snake_approx_logits = snake_approx_net(
                gs.reshape((args.batch_sz, 1, args.num_lines, args.radius)))
            nll_approx_loss = loss(
                m(snake_approx_logits.squeeze().transpose(2, 1)), gt_indices)

            total_loss = nll_approx_loss
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()
            # ===========================================================================

            snake_approx_train_loss = snake_approx_train_loss.data.cpu().numpy(
            )
            nll_approx_loss = nll_approx_loss.data.cpu().numpy()
            total_loss = snake_approx_train_loss + nll_approx_loss

            if step % args.log_freq == 0:
                stop = timeit.default_timer()
                print(f"step={step}\tepoch={epoch}\titer={iteration}"
                      f"\tloss={total_loss}"
                      f"\tsnake_approx_train_loss={snake_approx_train_loss}"
                      f"\tnll_approx_loss={nll_approx_loss}"
                      f"\tlr={optimizer.param_groups[0]['lr']}"
                      f"\ttime={stop-start}")
                start = stop
                writer.add_scalar("total_loss", total_loss, step)
                writer.add_scalar("nll_approx_loss", nll_approx_loss, step)
                writer.add_scalar("lr", optimizer.param_groups[0]["lr"], step)

            if step % args.train_eval_freq == 0:
                train_dice = do_eval(
                    net,
                    snake_eval,
                    train.images,
                    train.masks,
                    train.centers,
                    args.batch_sz,
                    args.num_lines,
                    args.radius,
                    smoothing_window=args.smoothing_window).data.cpu().numpy()
                writer.add_scalar("train_dice", train_dice, step)
                print(
                    f"step={step}\tepoch={epoch}\titer={iteration}\ttrain_eval: train_dice={train_dice}"
                )

            if step % args.val_eval_freq == 0:
                val_dice = do_eval(
                    net,
                    snake_eval,
                    valid.images,
                    valid.masks,
                    valid.centers,
                    args.batch_sz,
                    args.num_lines,
                    args.radius,
                    smoothing_window=args.smoothing_window).data.cpu().numpy()
                writer.add_scalar("val_dice", val_dice, step)
                print(
                    f"step={step}\tepoch={epoch}\titer={iteration}\tvalid_dice={val_dice}"
                )
                if val_dice > best_val_dice:
                    best_val_dice = val_dice
                    _pickle.dump([
                        net.state_dict(),
                        optimizer.state_dict(),
                        snake_approx_net.state_dict(),
                        snake_approx_optimizer.state_dict()
                    ],
                                 open(
                                     os.path.join(args.log_dir,
                                                  'best_model.pth.tar'), 'wb'))
                    f = open(
                        os.path.join(args.log_dir, f"best_val_dice{step}.txt"),
                        'w')
                    f.write(str(best_val_dice))
                    f.close()
                    print(f"better val dice detected.")

            step += 1

    return best_val_dice
Example #21
0
    config = confuse.Configuration('research')
    config.set_file(args.config_file)

    # set model
    model = None
    if args.model == 'classic_lm':
        model = ClassicLanguageModel(**config['model'].get(),
                                     model_name=args.model)
    elif args.model == 'attention_lm':
        model = AttentionLanguageModel(**config['model'].get(),
                                       model_name=args.model)
    else:
        raise ValueError("You have wrong --model parameter")

    # seed everything
    seed_all(config['general']['seed'].get())

    # get dataloaders and training framework
    loaders = load_dataloaders(**config['dataloaders'].get())
    framework = LMFramework(model,
                            **config['optimizer'].get(),
                            loaders=loaders)

    if not os.path.isdir(config['general']['checkpoint_path'].get()):
        os.makedirs(config['general']['checkpoint_path'].get())

    if not os.path.isdir(config['trainer_params']['default_save_path'].get()):
        os.makedirs(config['trainer_params']['default_save_path'].get())

    exp_name =  args.experiment_name + \
                '_' + \
Example #22
0
def main():
    """Runs experiment"""

    args = parser.parse_args()

    utils.seed_all(args.seed)

    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d_%H:%M:%S")

    to_save = pathlib.Path(args.save_dir)
    dir_name = args.save_folder + "_" + st
    to_save = to_save / dir_name
    to_save = str(to_save.resolve())

    log_file = "Experiment_info.txt"

    experiment_logger = Logger(to_save, log_file)
    experiment_logger.log_header("Arguments for the experiment :")
    experiment_logger.log_info(vars(args))

    feat_ext = fe_utils.load_feature_extractor(args.feat_extractor, obs_width=args.pedestrian_width, agent_width=args.pedestrian_width)

    experiment_logger.log_header("Parameters of the feature extractor :")
    experiment_logger.log_info(feat_ext.__dict__)

    env = GridWorld(
        display=args.render,
        is_random=False,
        rows=576,
        cols=720,
        agent_width=args.pedestrian_width,
        step_size=2,
        obs_width=args.pedestrian_width,
        width=10,
        subject=args.subject,
        annotation_file=args.annotation_file,
        goal_state=None,
        step_wrapper=utils.step_wrapper,
        seed=args.seed,
        replace_subject=args.replace_subject,
        segment_size=args.segment_size,
        external_control=True,
        continuous_action=False,
        reset_wrapper=utils.reset_wrapper,
        consider_heading=True,
        is_onehot=False,
        show_orientation=True,
        show_comparison=True,
        show_trail=True,
    )

    experiment_logger.log_header("Environment details :")
    experiment_logger.log_info(env.__dict__)

    if args.rl_method == "ActorCritic":
        rl_method = ActorCritic(
            env,
            feat_extractor=feat_ext,
            gamma=1,
            log_interval=args.rl_log_intervals,
            max_episode_length=args.rl_ep_length,
            hidden_dims=args.policy_net_hidden_dims,
            save_folder=to_save,
            lr=args.lr_rl,
            max_episodes=args.rl_episodes,
        )

    if args.rl_method == "SAC":
        if not env.continuous_action:
            print("The action space needs to be continuous for SAC to work.")
            exit()

        replay_buffer = ReplayBuffer(args.replay_buffer_size)

        rl_method = SoftActorCritic(
            env,
            replay_buffer,
            feat_ext,
            play_interval=500,
            learning_rate=args.lr_rl,
            buffer_sample_size=args.replay_buffer_sample_size,
        )

    if args.rl_method == "discrete_QSAC":
        if not isinstance(env.action_space, gym.spaces.Discrete):
            print("discrete SAC requires a discrete action space to work.")
            exit()

        replay_buffer = ReplayBuffer(args.replay_buffer_size)

        rl_method = QSAC(
            env,
            replay_buffer,
            feat_ext,
            args.replay_buffer_sample_size,
            learning_rate=args.lr_rl,
            entropy_tuning=True,
            entropy_target=args.entropy_target,
            play_interval=args.play_interval,
            tau=args.tau,
            gamma=args.gamma,
        )

    if args.rl_method == "discrete_SAC":
        if not isinstance(env.action_space, gym.spaces.Discrete):
            print("discrete SAC requires a discrete action space to work.")
            exit()

        replay_buffer = ReplayBuffer(args.replay_buffer_size)

        rl_method = DiscreteSAC(
            env,
            replay_buffer,
            feat_ext,
            args.replay_buffer_sample_size,
            learning_rate=args.lr_rl,
            entropy_tuning=True,
            entropy_target=args.entropy_target,
            play_interval=args.play_interval,
            tau=args.tau,
            gamma=args.gamma,
        )

    print("RL method initialized.")
    print(rl_method.policy)
    if args.policy_path is not None:
        rl_method.policy.load(args.policy_path)

    experiment_logger.log_header("Details of the RL method :")
    experiment_logger.log_info(rl_method.__dict__)

    expert_trajectories = read_expert_trajectories(args.exp_trajectory_path)

    irl_method = PerTrajGCL(
        rl=rl_method,
        env=env,
        expert_trajectories=expert_trajectories,
        learning_rate=args.lr_irl,
        l2_regularization=args.regularizer,
        save_folder=to_save,
        saving_interval=args.saving_interval,
    )

    print("IRL method intialized.")
    print(irl_method.reward_net)

    experiment_logger.log_header("Details of the IRL method :")
    experiment_logger.log_info(irl_method.__dict__)

    irl_method.pre_train(
        args.pre_train_iterations,
        args.num_expert_samples,
        account_for_terminal_state=args.account_for_terminal_state,
        gamma=args.gamma,
    )

    rl_method.train(
        args.pre_train_rl_iterations,
        args.rl_ep_length,
        reward_network=irl_method.reward_net,
    )

    # save intermediate RL result
    rl_method.policy.save(to_save + "/policy")

    irl_method.train(
        args.irl_iterations,
        args.rl_episodes,
        args.rl_ep_length,
        args.rl_ep_length,
        reset_training=args.reset_training,
        account_for_terminal_state=args.account_for_terminal_state,
        gamma=args.gamma,
        stochastic_sampling=args.stochastic_sampling,
        num_expert_samples=args.num_expert_samples,
        num_policy_samples=args.num_policy_samples,
    )

    metric_applicator = metric_utils.LTHMP2020()
    metric_results = metric_utils.collect_trajectories_and_metrics(
        env,
        feat_ext,
        rl_method.policy,
        len(expert_trajectories),
        args.rl_ep_length,
        metric_applicator,
        disregard_collisions=True,
    )

    pd_metrics = pd.DataFrame(metric_results).T
    pd_metrics = pd_metrics.applymap(lambda x: x[0])
    pd_metrics.to_pickle(to_save + "/metrics.pkl")

    with open(to_save + "/rl_data.csv", "a") as f:
        rl_method.data_table.write_csv(f)

    with open(to_save + "/irl_data.csv", "a") as f:
        irl_method.data_table.write_csv(f)

    with open(to_save + "/pre_irl_data.csv", "a") as f:
        irl_method.pre_data_table.write_csv(f)
Example #23
0
def run():
    device = "cuda"
    # device = xm.xla_device(fold + 1)
    # model = model.to(device)
    # for num_fold, (df_train, df_valid) in enumerate(kfold_df(df)):
    #     if num_fold==fold:
    #     print("FOLD %s: "%num_fold)
    #     train_dataset = SegmentDataset(df_train.Phrase.values, df_train.Sentiment.values)
    #     valid_dataset = SegmentDataset(df_valid.Phrase.values, df_valid.Sentiment.values)

    #     train_loader = DataLoader(train_dataset, batch_size= config.TRAIN_BATCH_SIZE, num_workers=4)
    #     valid_loader = DataLoader(valid_dataset, batch_size= config.VALID_BATCH_SIZE, num_workers=4)

    #     model = BertUncasedModel()
    #     es = utils.EarlyStopping(patience=2, mode="max")
    #     train_fn(model, train_dataset,len_train_dataset = len(train_dataset), device)
    #     model = model.load_state_dict(torch.load(config.MODEL_PATH))
    #     print(eval_fn(model,valid_loader,device))
    # else:
    #     continue
    seed_all(42)
    df = pd.read_csv("/colabdrive/train.tsv", sep="\t", engine="python")
    df = df[["Phrase", "Sentiment"]]

    for num_fold, (df_train, df_valid) in enumerate(kfold_df(df)):
        print("FOLD %s: " % num_fold)
        train_dataset = SegmentDataset(df_train.Phrase.values,
                                       df_train.Sentiment.values)
        valid_dataset = SegmentDataset(df_valid.Phrase.values,
                                       df_valid.Sentiment.values)

        train_loader = DataLoader(train_dataset,
                                  batch_size=config.TRAIN_BATCH_SIZE,
                                  num_workers=4)
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=config.VALID_BATCH_SIZE,
                                  num_workers=4)
        # train_test_split(stratify=target, shuffle=True, random_state=42)
        model = BertUncasedModel()
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {
                "params": [
                    p for n, p in param_optimizer
                    if not any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                0.001,
            },
            {
                "params": [
                    p for n, p in param_optimizer
                    if any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                0.0,
            },
        ]

        num_train_steps = int(
            len(train_dataset) / config.TRAIN_BATCH_SIZE * config.NUM_EPOCHS)

        optimizer = AdamW(optimizer_parameters, lr=5e-5)
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

        train_fn(model, train_loader, optimizer, scheduler, device)
        model = model.load_state_dict(torch.load(config.MODEL_PATH))
        print(eval_fn(model, valid_loader, device))