def main(): parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, default='pong') parser.add_argument('--lambda-pi', type=float, default=1.0) parser.add_argument('--lambda-ve', type=float, default=1.0) parser.add_argument('--history-len', type=int, default=4) parser.add_argument('--seed', type=int, default=0) args = parser.parse_args() utils.seed_all(seed=args.seed) optimizer = tf.train.AdamOptimizer(learning_rate=5e-5, epsilon=1e-4, use_locking=True) a3c.execute( lambda: make_atari_env(args.env, args.history_len), AtariPolicy, optimizer, discount=0.99, lambda_pi=args.lambda_pi, lambda_ve=args.lambda_ve, entropy_bonus=0.01, max_sample_length=10, n_actors=16, max_timesteps=10000000, grad_clip=40., log_every_n_steps=50000, )
def main(args): logdir = init_logging(args) logger = logging.getLogger(__name__) args.logdir = logdir if args.cpu or not th.cuda.is_available(): device = th.device('cpu') else: device = th.device('cuda') cudnn.enabled = True cudnn.benchmark = True if not args.devrun and not args.nosave: wandb.init(config=args, dir=logdir, project=args.project) if args.name is not None: wandb.run.name = args.name # else: # wandb.run.name = wandb.run.id seed_all(args.seed) logger.info('Creating dataloader') loader = create_dataloader(args) logger.info('Creating model') model = create_model(args).to(device) logger.info('Creating optimiser') opt = create_optimiser(model.parameters(), args) logger.info('Creating loss') loss = create_loss(args) logger.info('Creating trainer') trainer = create_trainer(loader, model, opt, loss, device, args) epochs = args.epochs epoch_length = args.epoch_length logger.info('Starting trainer') wandb.watch(model, log="all", log_freq=1) trainer.run(loader['train'], max_epochs=epochs, epoch_length=epoch_length)
def main(): env_name = 'CartPole-v0' utils.seed_all(seed=0) optimizer = tf.train.AdamOptimizer(learning_rate=5e-5, use_locking=True) a3c.execute( lambda: make_env(env_name), CartPolePolicy, optimizer, discount=0.99, lambda_pi=1.0, lambda_ve=1.0, entropy_bonus=0.01, max_sample_length=20, n_actors=16, max_timesteps=1000000, log_every_n_steps=10000, )
def main(argv, common_opts): args = parse_args(argv) seed_all(12345) init_algorithms(deterministic=True) torch.set_grad_enabled(False) device = common_opts['device'] assert args.multicrop == False, 'TODO: Implement multi-crop for single image inference.' model = load_model(args.model).to(device).eval() input_specs: ImageSpecs = model.data_specs.input_specs image: PIL.Image.Image = PIL.Image.open(args.image, 'r') image.thumbnail((input_specs.width, input_specs.height)) inp = input_specs.convert(image).to(device, torch.float32) output = model(inp[None, ...])[0] print(output) norm_skel3d = ensure_cartesian(output.to(CPU, torch.float64), d=3) fig = plt.figure(figsize=(16, 8)) ax1 = fig.add_subplot(1, 2, 1) ax2: Axes3D = fig.add_subplot(1, 2, 2, projection='3d') #for x,y,z in output.to(CPU, torch.float64): #I ax2.scatter(x,y,z) ax1.imshow(input_specs.unconvert(inp.to(CPU))) plot_skeleton_on_axes3d(norm_skel3d, CanonicalSkeletonDesc, ax2, invert=True) plt.show()
def experiment(device, args=None): """Train model. Args: device (str): device to use for training. args (dict): experiment arguments. """ if args is None: args = dict train_config = args["train"] train_augmentations = albu.Compose( [ albu.OneOf([ albu.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=35, val_shift_limit=25), albu.RandomGamma(), albu.CLAHE(), ]), albu.RandomBrightnessContrast(brightness_limit=[-0.3, 0.3], contrast_limit=[-0.3, 0.3], p=0.5), albu.OneOf([ albu.Blur(), albu.MotionBlur(), albu.GaussNoise(), albu.ImageCompression(quality_lower=75) ]), albu.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.15, rotate_limit=10, border_mode=0, p=0.5), albu.Resize(300, 300), albu.Normalize(), ToTensorV2(), ], bbox_params=albu.BboxParams( "albumentations" ), # 'albumentations' because x1, y1, x2, y2 in range [0, 1] ) train_dataset = COCOFileDataset(train_config["annotations"], train_config["images_dir"], transforms=train_augmentations) train_loader = DataLoader( train_dataset, batch_size=train_config["batch_size"], num_workers=train_config["num_workers"], shuffle=True, drop_last=True, ) logger.info("Train dataset information:") logger.info("\n" + train_dataset.info()) valid_config = args["validation"] valid_augmentations = albu.Compose( [ albu.Resize(300, 300), albu.Normalize(), ToTensorV2(), ], bbox_params=albu.BboxParams( format="albumentations" ), # 'albumentations' because x1, y1, x2, y2 in range [0, 1] ) valid_dataset = COCOFileDataset(valid_config["annotations"], valid_config["images_dir"], transforms=valid_augmentations) valid_loader = DataLoader( valid_dataset, batch_size=valid_config["batch_size"], num_workers=valid_config["num_workers"], shuffle=False, drop_last=False, ) logger.info("Validation dataset information:") logger.info("\n" + valid_dataset.info()) model_config = args["model"] num_classes = model_config["num_classes"] + 1 # +1 for background class seed_all(42) model = SSD300(model_config["backbone"], num_classes) model = model.to(device) optimizer = optim.AdamW(model.parameters(), lr=1e-3 / 2) # optimizer = optim.SGD(model.parameters(), lr=2.6e-3, momentum=0.9, weight_decay=0.0005) epoch_scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, args["experiment"]["num_epochs"]) batch_scheduler = None criterion = Loss(num_classes) experiment_config = args["experiment"] num_epochs = experiment_config["num_epochs"] for epoch_idx in range(1, num_epochs + 1): logger.info(f"Epoch: {epoch_idx}/{num_epochs}") train_metrics = train_fn(train_loader, model, device, criterion, optimizer, batch_scheduler, verbose=False) logger.info(f" Train: {train_metrics}") # TODO: checkpoints valid_metrics = valid_fn(valid_loader, model, device, criterion, verbose=False) logger.info(f"Validation: {valid_metrics}") epoch_scheduler.step() export_to_onnx(model, torch.randn(1, 3, 300, 300), experiment_config["onnx"]) logger.info("Exported ONNX model to '{}'".format( experiment_config["onnx"]))
def train_alg(model_alg, reset_optimizers_between_envs, reset_optimizers_every_iter, buffer_size, subsave, iteration, last_round_no_mer, is_evolving, seed): seed_all(seed) training_timesteps = META_TRAINING_TIMESTEPS params = params_list if not is_evolving: params = [params[-1]] start_time = time() env = gym.make(env_name) eval_env = gym.make(env_name) final_eval_env = gym.make(env_name) final_parameters_dict = params_sampler.sample1_means() change_env_parameters(final_eval_env, parameter_dict=final_parameters_dict) tensorboard_path = subsave + '/tb_' + str(iteration) optimizer_kwargs = {} policy_kwargs = { 'optimizer_class': th.optim.Adam, 'optimizer_kwargs': optimizer_kwargs, } model = model_alg( MlpPolicy, env, verbose=0, buffer_size=buffer_size, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, learning_starts=LEARNING_STARTS, gradient_steps=GRADIENT_STEPS, policy_kwargs=policy_kwargs, mer_s=MER_S, mer_gamma=MER_GAMMA, monitor_wrapper=True, tensorboard_log=tensorboard_path, reset_optimizers_during_training=reset_optimizers_every_iter, seed=seed) for i_param, param in enumerate(params): log_name = 'run_' + str(i_param) if i_param == (len(params) - 1): if not is_evolving: training_timesteps = FINAL_TRAINING_TIMESTEPS + NUM_TRAINING_ENVS * META_TRAINING_TIMESTEPS else: training_timesteps = FINAL_TRAINING_TIMESTEPS log_name += '_final' change_env_parameters(env, eval_env, parameter_dict=param) if model_alg.__name__ == 'SACMER' and last_round_no_mer and ( i_param == (len(params) - 1)): is_reservoir = False is_mer = False else: # This will not have any effect on regular SAC is_reservoir = True is_mer = True model.update_env(env, monitor_wrapper=False, is_reservoir=is_reservoir, reset_optimizers=reset_optimizers_between_envs ) # environment already wrapped so # monitor_wrapper=False eval_callback = EvalCallback(eval_env, best_model_save_path=None, log_path=tensorboard_path + '/' + log_name + '/running_eval', eval_freq=EVAL_FREQ, n_eval_episodes=N_EVAL_EPISODES, deterministic=True, render=False) if is_evolving: final_eval_callback = EvalCallback(final_eval_env, best_model_save_path=None, log_path=tensorboard_path + '/' + log_name + '/final_eval', eval_freq=EVAL_FREQ, n_eval_episodes=N_EVAL_EPISODES, deterministic=True, render=False) else: final_eval_callback = EventCallback() model.learn(total_timesteps=training_timesteps, log_interval=1, reset_num_timesteps=False, tb_log_name=log_name, is_mer=is_mer, callback=CallbackList([eval_callback, final_eval_callback])) env.reset() eval_env.reset() if iteration == 0: # saving models fills up storage, so we only save one (which we will also probably not use) model.save(subsave + 'model_' + str(iteration)) print(f"Done. Total time = {time() - start_time} seconds.")
def main(): args = parser.parse_args() utils.seed_all(args.seed) if args.on_server: # matplotlib without monitor matplotlib.use("Agg") # pygame without monitor os.environ["SDL_VIDEODRIVER"] = "dummy" #####for the logger ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d_%H:%M:%S") ################### if not args.save_folder: print("Provide save folder.") exit() policy_net_dims = "-policy_net-" for dim in args.policy_net_hidden_dims: policy_net_dims += str(dim) policy_net_dims += "-" reward_net_dims = "-reward_net-" for dim in args.reward_net_hidden_dims: reward_net_dims += str(dim) reward_net_dims += "-" parent_dir = ("./results/" + str(args.save_folder) + st + policy_net_dims + reward_net_dims) to_save = ("./results/" + str(args.save_folder) + st + policy_net_dims + reward_net_dims + "-reg-" + str(args.regularizer) + "-seed-" + str(args.seed) + "-lr-" + str(args.lr_irl)) log_file = "Experiment_info.txt" experiment_logger = Logger(to_save, log_file) experiment_logger.log_header("Arguments for the experiment :") repo = git.Repo(search_parent_directories=True) experiment_logger.log_info({'From branch : ': repo.active_branch.name}) experiment_logger.log_info({'Commit number : ': repo.head.object.hexsha}) experiment_logger.log_info(vars(args)) # from rlmethods.rlutils import LossBasedTermination # for rl from rlmethods.b_actor_critic import ActorCritic from rlmethods.soft_ac_pi import SoftActorCritic from rlmethods.soft_ac import SoftActorCritic as QSAC from rlmethods.rlutils import ReplayBuffer # for irl from irlmethods.deep_maxent import DeepMaxEnt import irlmethods.irlUtils as irlUtils from featureExtractor.gridworld_featureExtractor import ( OneHot, LocalGlobal, SocialNav, FrontBackSideSimple, ) agent_width = 10 step_size = 2 obs_width = 10 grid_size = 10 if args.feat_extractor is None: print("Feature extractor missing.") exit() # check for the feature extractor being used # initialize feature extractor if args.feat_extractor == "Onehot": feat_ext = OneHot(grid_rows=10, grid_cols=10) if args.feat_extractor == "SocialNav": feat_ext = SocialNav() if args.feat_extractor == "FrontBackSideSimple": feat_ext = FrontBackSideSimple( thresh1=1, thresh2=2, thresh3=3, thresh4=4, step_size=step_size, agent_width=agent_width, obs_width=obs_width, ) if args.feat_extractor == "LocalGlobal": feat_ext = LocalGlobal( window_size=5, grid_size=grid_size, agent_width=agent_width, obs_width=obs_width, step_size=step_size, ) if args.feat_extractor == "DroneFeatureSAM1": feat_ext = DroneFeatureSAM1( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=5, thresh2=10, ) if args.feat_extractor == "DroneFeatureRisk": feat_ext = DroneFeatureRisk( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureRisk_v2": feat_ext = DroneFeatureRisk_v2( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureRisk_speed": feat_ext = DroneFeatureRisk_speed( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=10, thresh2=15, ) if args.feat_extractor == "DroneFeatureRisk_speedv2": feat_ext = DroneFeatureRisk_speedv2( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=18, thresh2=30, ) if args.feat_extractor == 'VasquezF1': feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == 'VasquezF2': feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == 'VasquezF3': feat_ext = VasquezF3(agent_width) if args.feat_extractor == "Fahad": feat_ext = Fahad(36, 60, 0.5, 1.0) if args.feat_extractor == "GoalConditionedFahad": feat_ext = GoalConditionedFahad(36, 60, 0.5, 1.0) experiment_logger.log_header("Parameters of the feature extractor :") experiment_logger.log_info(feat_ext.__dict__) # initialize the environment if not args.dont_save and args.save_folder is None: print("Specify folder to save the results.") exit() """ environment can now initialize without an annotation file if args.annotation_file is None: print('Specify annotation file for the environment.') exit() """ if args.exp_trajectory_path is None: print("Specify expert trajectory folder.") exit() """ env = GridWorld(display=args.render, is_onehot= False,is_random=False, rows =10, cols =10, seed = 7, obstacles = [np.asarray([5,5])], goal_state = np.asarray([1,5])) """ env = GridWorld( display=args.render, is_random=True, rows=576, cols=720, agent_width=agent_width, step_size=step_size, obs_width=obs_width, width=grid_size, subject=args.subject, annotation_file=args.annotation_file, goal_state=None, step_wrapper=utils.step_wrapper, seed=args.seed, replace_subject=args.replace_subject, segment_size=args.segment_size, external_control=True, continuous_action=False, reset_wrapper=utils.reset_wrapper, consider_heading=True, is_onehot=False, ) experiment_logger.log_header("Environment details :") experiment_logger.log_info(env.__dict__) # CHANGE HEREq # CHANGE HERE # initialize loss based termination # intialize RL method # CHANGE HERE if args.rl_method == "ActorCritic": rl_method = ActorCritic( env, feat_extractor=feat_ext, gamma=1, log_interval=args.rl_log_intervals, max_episode_length=args.rl_ep_length, hidden_dims=args.policy_net_hidden_dims, save_folder=to_save, lr=args.lr_rl, max_episodes=args.rl_episodes, ) if args.rl_method == "SAC": if not env.continuous_action: print("The action space needs to be continuous for SAC to work.") exit() replay_buffer = ReplayBuffer(args.replay_buffer_size) rl_method = SoftActorCritic( env, replay_buffer, feat_ext, play_interval=500, learning_rate=args.lr_rl, buffer_sample_size=args.replay_buffer_sample_size, ) if args.rl_method == "discrete_SAC": if not isinstance(env.action_space, gym.spaces.Discrete): print( "discrete SAC requires a discrete action space environmnet to work." ) exit() replay_buffer = ReplayBuffer(args.replay_buffer_size) rl_method = QSAC( env, replay_buffer, feat_ext, args.replay_buffer_sample_size, learning_rate=args.lr_rl, entropy_tuning=True, entropy_target=0.3, play_interval=args.play_interval, ) print("RL method initialized.") print(rl_method.policy) if args.policy_path is not None: rl_method.policy.load(args.policy_path) experiment_logger.log_header("Details of the RL method :") experiment_logger.log_info(rl_method.__dict__) # initialize IRL method # CHANGE HERE trajectory_path = args.exp_trajectory_path if args.scale_svf is None: scale = False if args.scale_svf: scale = args.scale_svf irl_method = DeepMaxEnt( trajectory_path, rlmethod=rl_method, env=env, iterations=args.irl_iterations, on_server=args.on_server, l1regularizer=args.regularizer, learning_rate=args.lr_irl, seed=args.seed, graft=False, scale_svf=scale, hidden_dims=args.reward_net_hidden_dims, clipping_value=args.clipping_value, enumerate_all=True, save_folder=parent_dir, rl_max_ep_len=args.rl_ep_length, rl_episodes=args.rl_episodes, ) print("IRL method intialized.") print(irl_method.reward) experiment_logger.log_header("Details of the IRL method :") experiment_logger.log_info(irl_method.__dict__) smoothing_flag = False if args.svf_smoothing: smoothing_flag = True irl_method.train(smoothing=smoothing_flag) if not args.dont_save: pass
def main(): args = parser.parse_args() utils.seed_all(args.seed) if args.on_server: # matplotlib without monitor matplotlib.use('Agg') # pygame without monitor os.environ['SDL_VIDEODRIVER'] = 'dummy' from matplotlib import pyplot as plt save_folder = None if not args.dont_save: save_folder = './results/'+ args.save_folder experiment_logger = Logger(save_folder,'experiment_info.txt') experiment_logger.log_header('Arguments for the experiment :') experiment_logger.log_info(vars(args)) mp.set_start_method('spawn') if args.render: from envs.gridworld import GridWorld else: from envs.gridworld_clockless import GridWorldClockless as GridWorld if args.feat_extractor=='MCFeatures': feat_ext = MCFeatures(args.state_discretization[0], args.state_discretization[1]) elif args.feat_extractor=='MCFeaturesOnehot': feat_ext = MCFeaturesOnehot(args.state_discretization[0], args.state_discretization[1]) else: print('Enter proper feature extractor value.') exit() if not args.dont_save: experiment_logger.log_header('Parameters of the feature extractor :') experiment_logger.log_info(feat_ext.__dict__) ''' np.asarray([2,2]),np.asarray([7,4]),np.asarray([3,5]), np.asarray([5,2]),np.asarray([8,3]),np.asarray([7,5]), np.asarray([3,3]),np.asarray([3,7]),np.asarray([5,7]) env = GridWorld(display=args.render, is_onehot= False,is_random=True, rows=100, agent_width=agent_width,step_size=step_size, obs_width=obs_width,width=grid_size, cols=100, seed=7, buffer_from_obs=0, obstacles=3, goal_state=np.asarray([5,5])) ''' env = gym.make('MountainCar-v0') env = env.unwrapped if not args.dont_save: experiment_logger.log_header('Environment details :') experiment_logger.log_info(env.__dict__) model = ActorCritic(env, feat_extractor=feat_ext, gamma=0.99, plot_loss=False, log_interval=10, max_ep_length=300, hidden_dims=args.policy_net_hidden_dims, max_episodes=30, save_folder=save_folder) if not args.dont_save: experiment_logger.log_header('Details of the RL method :') experiment_logger.log_info(model.__dict__) #pdb.set_trace() if args.policy_path is not None: policy_file_list = [] reward_across_models = [] if os.path.isfile(args.policy_path): policy_file_list.append(args.policy_path) if os.path.isdir(args.policy_path): policy_names = glob.glob(os.path.join(args.policy_path, '*.pt')) policy_file_list = sorted(policy_names, key=numericalSort) xaxis = np.arange(len(policy_file_list)) if not args.play and not args.play_user: if args.reward_path is None: model.train_mp(n_jobs=4) else: from irlmethods.deep_maxent import RewardNet state_size = feat_ext.state_rep_size reward_net = RewardNet(state_size, args.policy_net_hidden_dims) reward_net.load(args.reward_path) print(next(reward_net.parameters()).is_cuda) model.train_mp(reward_net = reward_net,n_jobs = 4) if not args.dont_save: model.policy.save(save_folder+'/policy/') if args.play: xaxis = [] counter = 1 print(policy_file_list) for policy_file in policy_file_list: model.policy.load(policy_file) env.tickSpeed = 15 assert args.policy_path is not None, 'pass a policy to play from!' reward_across_models.append(model.generate_trajectory(args.num_trajs, args.render)) #plotting the 2d list xaxis.append(counter) counter += 1 reward_across_models_np = np.array(reward_across_models) mean_rewards = np.mean(reward_across_models_np, axis=1) std_rewards = np.std(reward_across_models_np, axis=1) plt.plot(xaxis,mean_rewards,color = 'r',label='IRL trained agent') plt.fill_between(xaxis , mean_rewards-std_rewards , mean_rewards+std_rewards, alpha = 0.5, facecolor = 'r') plt.draw() plt.pause(0.001) ''' print('RAM usage :') display_memory_usage(process.memory_info().rss) print('GPU usage :') display_memory_usage(torch.cuda.memory_allocated()) torch.cuda.empty_cache() display_memory_usage(torch.cuda.memory_allocated()) ''' #plt.show() plt.show() if args.play_user: env.tickSpeed = 200 model.generate_trajectory_user(args.num_trajs, './trajs/ac_gridworld_user/')
def pretrain_gn(args, config, seed=0): """ more reading: https://pytorch.org/docs/stable/distributed.html """ seed_all(seed) rank = 0 gpus = [int(x) for x in args.gpus.split(',')] num_gpus = len(gpus) assert num_gpus == 1 device = gpus[rank] print("[{}] Using GPU {} out of {} GPUS ({} available)".format( rank, device, device_count(), num_gpus)) set_device(device) net, hyperparams, get_train_loader, get_val_loader, logger =\ setup_pretrain(args, config) per_gpu_batch_size = int(hyperparams['pretrain_batch_size'] / num_gpus) if 'gn_update_steps' not in net.stats: net.stats['gn_update_steps'] = 0 generator_criterion = MSELoss() for epoch in range(net.stats['epochs'], hyperparams['pretrain_num_epochs']): print("=" * 25 + f"EPOCH {epoch}" + "=" * 25) # 1. Train net.train() mean_generator_loss_train = \ optimize_generator( data_loader=get_train_loader(per_gpu_batch_size), net=net, generator_criterion=generator_criterion, logger=logger, tqdm_prefix='Train |', optimize=True) mean_generator_loss_train = mean_generator_loss_train.cpu().item( ) / num_gpus print(f"Train | G: {mean_generator_loss_train:.04f}") logger.log(data={ 'Train/Generator_Loss': mean_generator_loss_train, }, step=epoch) # 2. Validate with no_grad(): net.eval() mean_generator_loss_val = \ optimize_generator( data_loader=get_val_loader(per_gpu_batch_size), net=net, generator_criterion=generator_criterion, logger=logger, tqdm_prefix='Val |', optimize=False) mean_generator_loss_val = mean_generator_loss_val.cpu().item( ) / num_gpus print("Validation | G: {:.04f} ".format(mean_generator_loss_val)) logger.log(data={ 'Validation/Generator_Loss': mean_generator_loss_val, }, step=epoch) net.save( epoch, '{}imprint_pretrain_gn_{:03d}.pth'.format(logger.logdir, epoch))
def train( *, flow_constructor, logdir, lr_schedule, dropout_p, seed, init_bs, total_bs, val_total_bs, ema_decay, steps_per_log, epochs_per_val, max_grad_norm, dtype=tf.float32, scale_loss=None, restore_checkpoint=None, scale_grad=None, dataset='cifar10', steps_per_samples=2000, ): hvd, MPI, is_root, mpi_average = setup_horovod() # Seeding and logging setup seed_all(hvd.rank() + hvd.size() * seed) assert total_bs % hvd.size() == 0 assert val_total_bs % hvd.size() == 0 local_bs = total_bs // hvd.size() val_local_bs = val_total_bs // hvd.size() # Setting up the logger logger = None logdir = '{}_mpi{}_{}'.format(os.path.expanduser(logdir), hvd.size(), time.time()) checkpointdir = os.path.join(logdir, 'checkpoints') if is_root: print('Floating point format:', dtype) pprint(locals()) os.makedirs(logdir) os.makedirs(checkpointdir) logger = TensorBoardOutput(logdir) # Load data if is_root: # Load once on root first to prevent downloading conflicts print('Loading data') load_data(dataset=dataset, dtype=dtype.as_numpy_dtype) MPI.COMM_WORLD.Barrier() data_train, data_val = load_data(dataset=dataset, dtype=dtype.as_numpy_dtype) img_shp = list(data_train.shape[1:]) H, W, Cx = img_shp bpd_scale_factor = 1. / (np.log(2) * np.prod(img_shp)) if is_root: print('Training data: {}, Validation data: {}'.format( data_train.shape[0], data_val.shape[0])) print('Image shape:', img_shp) # Build graph if is_root: print('Building graph') dequant_flow, flow, posterior_flow = flow_constructor() # Data-dependent init if restore_checkpoint is None: if is_root: print('===== Init graph =====') x_init_sym = tf.placeholder(dtype, [init_bs] + img_shp) init_loss_sym, _ = build_forward(x=x_init_sym, dequant_flow=dequant_flow, flow=flow, posterior_flow=posterior_flow, flow_kwargs=dict(vcfg=VarConfig( init=True, ema=None, dtype=dtype), dropout_p=dropout_p, verbose=is_root)) flops = int(get_flops()) / (10**9) # Training if is_root: print('===== Training graph =====') x_sym = tf.placeholder(dtype, [local_bs] + img_shp) loss_sym, _ = build_forward(x=x_sym, dequant_flow=dequant_flow, flow=flow, posterior_flow=posterior_flow, flow_kwargs=dict(vcfg=VarConfig(init=False, ema=None, dtype=dtype), dropout_p=dropout_p, verbose=is_root)) # EMA params = tf.trainable_variables() if is_root: print_params() ema = tf.train.ExponentialMovingAverage(decay=ema_decay) maintain_averages_op = tf.group(ema.apply(params)) # Op for setting the ema params to the current non-ema params (for use after data-dependent init) name2var = {v.name: v for v in tf.global_variables()} copy_params_to_ema = tf.group([ name2var[p.name.replace(':0', '') + '/ExponentialMovingAverage:0'].assign(p) for p in params ]) val_x_sym = tf.placeholder(dtype, [val_local_bs] + img_shp) # Validation and sampling (with EMA) if is_root: print('===== Validation graph =====') val_flow_kwargs = dict(vcfg=VarConfig(init=False, ema=ema, dtype=dtype), dropout_p=0., verbose=is_root) val_loss_sym, _ = build_forward(x=val_x_sym, dequant_flow=dequant_flow, flow=flow, posterior_flow=posterior_flow, flow_kwargs=val_flow_kwargs) # for debugging invertibility # val_inverr_sym = tf.reduce_max(tf.abs(dequant_x - flow.inverse(y, train_flow_kwargs)[0][:,:,:,:img_shp[-1]])) if is_root: print('===== Sampling graph =====') sample_flow_kwargs = dict(vcfg=VarConfig(init=False, ema=ema, dtype=dtype), dropout_p=0, verbose=is_root) samples_sym, _ = flow.sample(val_local_bs, sample_flow_kwargs) allgathered_samples_x_sym = hvd.allgather(tf.to_float(samples_sym)) assert len(tf.trainable_variables()) == len(params) def run_validation(sess, i_step): data_val_shard = np.array_split(data_val, hvd.size(), axis=0)[hvd.rank()] shard_losses = np.concatenate([ sess.run([val_loss_sym], {val_x_sym: val_batch}) for val_batch, in iterbatches([data_val_shard], batch_size=val_local_bs, include_final_partial_batch=False) ]) val_loss, total_count = mpi_average(shard_losses) samples = sess.run(allgathered_samples_x_sym) if is_root: logger.writekvs( [('val_bpd', bpd_scale_factor * val_loss), ('num_val_examples', total_count * val_local_bs), ('samples', tile_imgs(np.clip(samples, 0, 255).astype(np.uint8)))], i_step) if is_root: print('===== Optimization graph =====') # Optimization lr_sym = tf.placeholder(dtype, [], 'lr') optimizer = hvd.DistributedOptimizer(tf.train.AdamOptimizer(lr_sym)) if scale_loss is None: grads_and_vars = optimizer.compute_gradients(loss_sym, var_list=params) else: grads_and_vars = [(g / scale_loss, v) for (g, v) in optimizer.compute_gradients( loss_sym * scale_loss, var_list=params)] if scale_grad is not None: grads_and_vars = [(g / scale_grad, v) for (g, v) in grads_and_vars] if max_grad_norm is not None: clipped_grads, grad_norm_sym = tf.clip_by_global_norm( [g for (g, _) in grads_and_vars], max_grad_norm) grads_and_vars = [ (cg, v) for (cg, (_, v)) in zip(clipped_grads, grads_and_vars) ] else: grad_norm_sym = tf.constant(0.) opt_sym = tf.group(optimizer.apply_gradients(grads_and_vars), maintain_averages_op) def loop(sess: tf.Session): i_step = 0 if is_root: print('Initializing') sess.run(tf.global_variables_initializer()) if restore_checkpoint is not None: # Restore from checkpoint if is_root: saver = tf.train.Saver() print('Restoring checkpoint:', restore_checkpoint) restore_step = int(restore_checkpoint.split('-')[-1]) print('Restoring from step:', restore_step) saver.restore(sess, restore_checkpoint) i_step = restore_step else: saver = None else: # No checkpoint: perform data dependent initialization if is_root: print('Data dependent init') init_loss = sess.run( init_loss_sym, { x_init_sym: data_train[np.random.randint(0, data_train.shape[0], init_bs)] }) if is_root: print('Init loss:', init_loss * bpd_scale_factor) sess.run(copy_params_to_ema) saver = tf.train.Saver() if is_root else None if is_root: print('Broadcasting initial parameters') sess.run(hvd.broadcast_global_variables(0)) sess.graph.finalize() if is_root: print('Training') print(f'Total GFLOPS: {flops}') print_params() loss_hist = deque(maxlen=steps_per_log) gnorm_hist = deque(maxlen=steps_per_log) for i_epoch in range(99999999999): if i_epoch % epochs_per_val == 0: run_validation(sess, i_step=i_step) if saver is not None: saver.save(sess, os.path.join(checkpointdir, 'model'), global_step=i_step) epoch_start_t = time.time() for i_epoch_step, (batch, ) in enumerate( iterbatches( # non-sharded: each gpu goes through the whole dataset [data_train], batch_size=local_bs, include_final_partial_batch=False, )): lr = lr_schedule(i_step) loss, gnorm, _ = sess.run([loss_sym, grad_norm_sym, opt_sym], { x_sym: batch, lr_sym: lr }) loss_hist.append(loss) gnorm_hist.append(gnorm) # Skip timing the very first step, which will be unusually slow due to TF initialization if i_epoch == i_epoch_step == 0: epoch_start_t = time.time() if i_step % steps_per_log == 0: loss_hist_means = MPI.COMM_WORLD.gather(float( np.mean(loss_hist)), root=0) gnorm_hist_means = MPI.COMM_WORLD.gather(float( np.mean(gnorm_hist)), root=0) steps_per_sec = (i_epoch_step + 1) / (time.time() - epoch_start_t) if is_root: kvs = [ ('iter', i_step), ('epoch', i_epoch + i_epoch_step * local_bs / data_train.shape[0]), # epoch for this gpu ('bpd', float( np.mean(loss_hist_means) * bpd_scale_factor)), ('gnorm', float(np.mean(gnorm_hist_means))), ('lr', float(lr)), # ('fps', steps_per_sec * total_bs), # fps calculated over all gpus (this epoch) ('sps', steps_per_sec), ] logger.writekvs(kvs, i_step) i_step += 1 # End of epoch # Train config = tf.ConfigProto() # config.log_device_placement = True config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str( hvd.local_rank()) # Pin GPU to local rank (one GPU per process) if is_root: print('===== Creating session =====') with tf.Session(config=config) as sess: loop(sess)
def main(): #####for the logger ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S") ################### args = parser.parse_args() seed_all(args.seed) if args.on_server: matplotlib.use("Agg") # pygame without monitor os.environ["SDL_VIDEODRIVER"] = "dummy" from matplotlib import pyplot as plt mp.set_start_method("spawn") from rlmethods.b_actor_critic import ActorCritic from rlmethods.soft_ac import SoftActorCritic, QSoftActorCritic from rlmethods.rlutils import ReplayBuffer from envs.gridworld_drone import GridWorldDrone from featureExtractor.drone_feature_extractor import ( DroneFeatureSAM1, DroneFeatureOccup, DroneFeatureRisk, DroneFeatureRisk_v2, VasquezF1, VasquezF2, VasquezF3, Fahad, GoalConditionedFahad, ) from featureExtractor.gridworld_featureExtractor import ( FrontBackSide, LocalGlobal, OneHot, SocialNav, FrontBackSideSimple, ) from featureExtractor.drone_feature_extractor import ( DroneFeatureRisk_speed, DroneFeatureRisk_speedv2, ) from featureExtractor.drone_feature_extractor import VasquezF1 save_folder = None if not args.dont_save and not args.play: if not args.save_folder: print("Provide save folder.") exit() policy_net_dims = "-policy_net-" for dim in args.policy_net_hidden_dims: policy_net_dims += str(dim) policy_net_dims += "-" reward_net_dims = "-reward_net-" for dim in args.reward_net_hidden_dims: reward_net_dims += str(dim) reward_net_dims += "-" save_folder = ( "./results/" + args.save_folder + st + args.feat_extractor + "-seed-" + str(args.seed) + policy_net_dims + reward_net_dims + "-total-ep-" + str(args.total_episodes) + "-max-ep-len-" + str(args.max_ep_length) ) experiment_logger = Logger(save_folder, "experiment_info.txt") experiment_logger.log_header("Arguments for the experiment :") repo = git.Repo(search_parent_directories=True) experiment_logger.log_info({'From branch : ' : repo.active_branch.name}) experiment_logger.log_info({'Commit number : ' : repo.head.object.hexsha}) experiment_logger.log_info(vars(args)) window_size = 9 step_size = 2 agent_width = 10 obs_width = 10 grid_size = 10 feat_ext = None # initialize the feature extractor to be used if args.feat_extractor == "Onehot": feat_ext = OneHot(grid_rows=10, grid_cols=10) if args.feat_extractor == "SocialNav": feat_ext = SocialNav(fieldList=["agent_state", "goal_state"]) if args.feat_extractor == "FrontBackSideSimple": feat_ext = FrontBackSideSimple( thresh1=1, thresh2=2, thresh3=3, thresh4=4, step_size=step_size, agent_width=agent_width, obs_width=obs_width, ) if args.feat_extractor == "LocalGlobal": feat_ext = LocalGlobal( window_size=11, grid_size=grid_size, agent_width=agent_width, obs_width=obs_width, step_size=step_size, ) if args.feat_extractor == "DroneFeatureSAM1": feat_ext = DroneFeatureSAM1( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureOccup": feat_ext = DroneFeatureOccup( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, window_size=window_size, ) if args.feat_extractor == "DroneFeatureRisk": feat_ext = DroneFeatureRisk( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureRisk_v2": feat_ext = DroneFeatureRisk_v2( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureRisk_speed": feat_ext = DroneFeatureRisk_speed( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, return_tensor=False, thresh1=10, thresh2=15, ) if args.feat_extractor == "DroneFeatureRisk_speedv2": feat_ext = DroneFeatureRisk_speedv2( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, return_tensor=False, thresh1=18, thresh2=30, ) if args.feat_extractor == "VasquezF1": feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == "VasquezF2": feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == "VasquezF3": feat_ext = VasquezF3(agent_width) if args.feat_extractor == "Fahad": feat_ext = Fahad(36, 60, 0.5, 1.0) if args.feat_extractor == "GoalConditionedFahad": feat_ext = GoalConditionedFahad(36, 60, 0.5, 1.0) if feat_ext is None: print("Please enter proper feature extractor!") exit() # log feature extractor info if not args.dont_save and not args.play: experiment_logger.log_header("Parameters of the feature extractor :") experiment_logger.log_info(feat_ext.__dict__) # initialize the environment if args.replace_subject: replace_subject = True else: replace_subject = False env = GridWorldDrone( display=args.render, is_onehot=False, seed=args.seed, obstacles=None, show_trail=False, is_random=True, annotation_file=args.annotation_file, subject=args.subject, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, replace_subject=replace_subject, segment_size=args.segment_size, external_control=True, step_reward=0.001, show_comparison=True, consider_heading=True, show_orientation=True, # rows=200, cols=200, width=grid_size) rows=576, cols=720, width=grid_size, ) # env = gym.make('Acrobot-v1') # log environment info if not args.dont_save and not args.play: experiment_logger.log_header("Environment details :") experiment_logger.log_info(env.__dict__) # initialize RL if args.rl_method == "ActorCritic": model = ActorCritic( env, feat_extractor=feat_ext, gamma=1, log_interval=100, max_episode_length=args.max_ep_length, hidden_dims=args.policy_net_hidden_dims, save_folder=save_folder, lr=args.lr, entropy_coeff=args.entropy_coeff, max_episodes=args.total_episodes, ) if args.rl_method == "SAC": replay_buffer = ReplayBuffer(args.replay_buffer_size) model = SoftActorCritic( env, replay_buffer, feat_ext, buffer_sample_size=args.replay_buffer_sample_size, entropy_tuning=True, play_interval=args.play_interval, entropy_target=args.entropy_target, gamma=args.gamma, learning_rate=args.lr, ) if args.rl_method == "discrete_QSAC": replay_buffer = ReplayBuffer(args.replay_buffer_size) model = QSoftActorCritic( env, replay_buffer, feat_ext, buffer_sample_size=args.replay_buffer_sample_size, entropy_tuning=True, play_interval=args.play_interval, entropy_target=args.entropy_target, gamma=args.gamma, learning_rate=args.lr, ) # log RL info if not args.dont_save and not args.play: experiment_logger.log_header("Details of the RL method :") experiment_logger.log_info(model.__dict__) if args.policy_path is not None: from debugtools import numericalSort policy_file_list = [] reward_across_models = [] # print(args.policy_path) if os.path.isfile(args.policy_path): policy_file_list.append(args.policy_path) if os.path.isdir(args.policy_path): policy_names = glob.glob(os.path.join(args.policy_path, "*.pt")) policy_file_list = sorted(policy_names, key=numericalSort) xaxis = np.arange(len(policy_file_list)) if not args.play and not args.play_user: # no playing of any kind, so training if args.reward_path is None: if args.policy_path: model.policy.load(args.policy_path) if args.rl_method == "SAC" or args.rl_method == "discrete_QSAC": model.train(args.total_episodes, args.max_ep_length) else: model.train() else: from irlmethods.deep_maxent import RewardNet state_size = feat_ext.extract_features(env.reset()).shape[0] reward_net = RewardNet(state_size, args.reward_net_hidden_dims) reward_net.load(args.reward_path) print(next(reward_net.parameters()).is_cuda) model.train(reward_net=reward_net) if not args.dont_save: model.policy.save(save_folder + "/policy-models/") if args.play: # env.tickSpeed = 15 from debugtools import compile_results xaxis = [] counter = 1 plt.figure(0) avg_reward_list = [] frac_good_run_list = [] print(policy_file_list) for policy_file in policy_file_list: print("Playing for policy :", policy_file) model.policy.load(policy_file) policy_folder = policy_file.strip().split("/")[0:-2] save_folder = "" for p in policy_folder: save_folder = save_folder + p + "/" print("The final save folder ", save_folder) # env.tickSpeed = 10 assert args.policy_path is not None, "pass a policy to play from!" if args.exp_trajectory_path is not None: from irlmethods.irlUtils import calculate_expert_svf expert_svf = calculate_expert_svf( args.exp_trajectory_path, max_time_steps=args.max_ep_length, feature_extractor=feat_ext, gamma=1, ) # reward_across_models.append(model.generate_trajectory(args.num_trajs, args.render)) if args.exp_trajectory_path is None: if args.dont_save: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render ) else: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, store_raw=args.store_raw_states, path=save_folder + "/agent_generated_trajectories/", ) else: if args.dont_save: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, expert_svf=expert_svf ) else: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, path=save_folder + "/agent_generated_trajectories/", expert_svf=expert_svf, ) avg_reward, good_run_frac = compile_results( rewards, state_info, sub_info ) avg_reward_list.append(avg_reward) frac_good_run_list.append(good_run_frac) plt.plot(avg_reward_list, c="r") plt.plot(frac_good_run_list, c="g") plt.draw() plt.show() if args.play_user: env.tickSpeed = 200 model.generate_trajectory_user( args.num_trajs, args.render, path="./user_generated_trajectories/" )
from args import args import pandas as pd import numpy as np from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import StratifiedKFold from neural import get_optimizer_scheduler, get_model_loss from loops import train_fn, valid_fn, test_fn import wandb import random from utils import get_learning_rate, isclose, seed_all from test import get_test_samples import os device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using device:", device) seed_all(args) model_directory = "{}{}".format(args.model, "_" + args.name if args.name else "") model_directory = f"../models/{model_directory}/" if not os.path.exists(model_directory): os.makedirs(model_directory) train = pd.read_csv("../input/train.csv") # train_nocall = pd.read_csv("../input/env/nocall.csv") # train_nocall["folder"] = "env/audio/" train_le = LabelEncoder().fit(train.ebird_code.values) train["folder"] = "train_audio" train["ebird_label"] = train_le.transform(train.ebird_code.values) mapping = pd.Series(train.ebird_code.values,
def main(args): torch.backends.cudnn.benchmark = True seed_all(args.seed) d = Dataset(train_set_size=args.train_set_sz, num_cls=args.num_cls, remove_nan_center=False) train = d.train_set valid = d.test_set num_cls = args.num_cls + 1 # +1 for background net = UNet(in_dim=1, out_dim=num_cls).cuda() best_net = UNet(in_dim=1, out_dim=num_cls) best_val_dice = -np.inf best_cls_val_dices = None optimizer = torch.optim.Adam(params=net.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=10, total_epoch=50, after_scheduler=None) if not os.path.exists(args.log_dir): os.makedirs(args.log_dir, exist_ok=True) writer = tensorboardX.SummaryWriter(log_dir=args.log_dir) step = 1 for epoch in range(1, args.n_epochs + 1): for iteration in range( 1, int(np.ceil(train.dataset_sz() / args.batch_sz)) + 1): net.train() imgs, masks, one_hot_masks, centers, _, _, _, _ = train.next_batch( args.batch_sz) imgs = make_batch_input(imgs) imgs = torch.cuda.FloatTensor(imgs) one_hot_masks = torch.cuda.FloatTensor(one_hot_masks) pred_logit = net(imgs) pred_softmax = F.softmax(pred_logit, dim=1) if args.use_ce: ce = torch.nn.CrossEntropyLoss() loss = ce(pred_logit, torch.cuda.LongTensor(masks)) else: loss = dice_loss(pred_softmax, one_hot_masks, keep_background=False).mean() scheduler_warmup.step() optimizer.zero_grad() loss.backward() optimizer.step() if step % args.log_freq == 0: print( f"step={step}\tepoch={epoch}\titer={iteration}\tloss={loss.data.cpu().numpy()}" ) writer.add_scalar("cnn_dice_loss", loss.data.cpu().numpy(), step) writer.add_scalar("lr", optimizer.param_groups[0]["lr"], step) if step % args.train_eval_freq == 0: train_dice, cls_train_dices = do_eval(net, train.images, train.onehot_masks, args.batch_sz, num_cls) train_dice = train_dice.cpu().numpy() cls_train_dices = cls_train_dices.cpu().numpy() writer.add_scalar("train_dice", train_dice, step) # lr_sched.step(1-train_dice) for j, cls_train_dice in enumerate(cls_train_dices): writer.add_scalar(f"train_dice/{j}", cls_train_dice, step) print( f"step={step}\tepoch={epoch}\titer={iteration}\ttrain_eval: train_dice={train_dice}" ) if step % args.val_eval_freq == 0: _pickle.dump( net.state_dict(), open(os.path.join(args.log_dir, 'model.pth.tar'), 'wb')) val_dice, cls_val_dices = do_eval(net, valid.images, valid.onehot_masks, args.batch_sz, num_cls) val_dice = val_dice.cpu().numpy() cls_val_dices = cls_val_dices.cpu().numpy() writer.add_scalar("val_dice", val_dice, step) for j, cls_val_dice in enumerate(cls_val_dices): writer.add_scalar(f"val_dice/{j}", cls_val_dice, step) print( f"step={step}\tepoch={epoch}\titer={iteration}\tvalid_dice={val_dice}" ) if val_dice > best_val_dice: best_val_dice = val_dice best_cls_val_dices = cls_val_dices best_net.load_state_dict(net.state_dict().copy()) _pickle.dump( best_net.state_dict(), open(os.path.join(args.log_dir, 'best_model.pth.tar'), 'wb')) f = open( os.path.join(args.log_dir, f"best_val_dice{step}.txt"), 'w') f.write(str(best_val_dice) + "\n") f.write(" ".join([ str(dice_score) for dice_score in best_cls_val_dices ])) f.close() print(f"better val dice detected.") # if step % 5000 == 0: # _pickle.dump(net.state_dict(), open(os.path.join(args.log_dir, '{}.pth.tar'.format(step)), # 'wb')) step += 1 return best_val_dice, best_cls_val_dices
import utils # Set up parameters and output dir. params = utils.load_params(mode='wlnn') # based on terminal input params['script'] = 'run-wlnn-mnist.py' writer, out_dir = utils.init_output(params, overwrite=params['overwrite_output']) os.makedirs(os.path.join(out_dir, 'networks')) # dir to store all networks if params['use_cuda'] and not torch.cuda.is_available(): logging.info('use_cuda was set but cuda is not available, running on cpu') params['use_cuda'] = False device = 'cuda' if params['use_cuda'] else 'cpu' # Ensure deterministic computation. utils.seed_all(0) ### Ensure that runs are reproducible even on GPU. Note, this slows down training! torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Load dataset. train_images, train_labels, test_images, test_labels = load_preprocessed_dataset( params['dataset'], flatten_images=True, use_torch=True) train_dataset = torch.utils.data.TensorDataset(train_images, train_labels) test_dataset = torch.utils.data.TensorDataset(test_images, test_labels) # Create initial population. # TODO: Make train_only_outputs a learning_rule. train_only_outputs = (params['train_only_outputs'] or params['learning_rule'] == 'hebbian')
def train( *, flow_constructor, logdir, lr_schedule, dropout_p, seed, init_bs, total_bs, ema_decay, steps_per_log, max_grad_norm, dtype=tf.float32, scale_loss=None, dataset='imagenet32', steps_per_samples=20000, steps_per_dump=5000, n_epochs=2, restore_checkpoint=None, dump_samples_to_tensorboard=True, save_jpg=True, ): import horovod.tensorflow as hvd # Initialize Horovod hvd.init() # Verify that MPI multi-threading is supported. assert hvd.mpi_threads_supported() from mpi4py import MPI assert hvd.size() == MPI.COMM_WORLD.Get_size() is_root = hvd.rank() == 0 def mpi_average(local_list): local_list = list(map(float, local_list)) sums = MPI.COMM_WORLD.gather(sum(local_list), root=0) counts = MPI.COMM_WORLD.gather(len(local_list), root=0) sum_counts = sum(counts) if is_root else None avg = (sum(sums) / sum_counts) if is_root else None return avg, sum_counts # Seeding and logging setup seed_all(hvd.rank() + hvd.size() * seed) assert total_bs % hvd.size() == 0 local_bs = total_bs // hvd.size() logger = None logdir = '{}_mpi{}_{}'.format(os.path.expanduser(logdir), hvd.size(), time.time()) checkpointdir = os.path.join(logdir, 'checkpoints') profiledir = os.path.join(logdir, 'profiling') if is_root: print('Floating point format:', dtype) pprint(locals()) os.makedirs(logdir) os.makedirs(checkpointdir) os.makedirs(profiledir) logger = TensorBoardOutput(logdir) # Load data assert dataset in ['imagenet32', 'imagenet64', 'imagenet64_5bit'] if is_root: print('Loading data') MPI.COMM_WORLD.Barrier() if dataset == 'imagenet32': """The dataset as a npy file on RAM. There are as many copies as number of MPI threads. This isn't effficient and tf.Records would be better to read from disk. This is just done to ensure bits/dim reported are perfect and no data loading bugs creep in. However, the dataset is quite small resolution and even 8 MPI threads can work on 40GB RAM.""" data_train = np.load('../train_32x32.npy') data_val = np.load('../valid_32x32.npy') assert data_train.dtype == 'uint8' assert np.max(data_train) <= 255 assert np.min(data_train) >= 0 assert np.max(data_val) <= 255 assert np.min(data_val) >= 0 assert data_val.dtype == 'uint8' elif dataset == 'imagenet64': """The dataset as a npy file on RAM. There are as many copies as number of MPI threads. This isn't effficient and tf.Records would be better to read from disk. This is just done to ensure bits/dim reported are perfect and no data loading bugs creep in. If you don't have enough CPU RAM to run 8 threads, run it with fewer threads and adjust batch-size / model-size tradeoff accordingly.""" data_train = np.load('../train_64x64.npy') data_val = np.load('../valid_64x64.npy') assert data_train.dtype == 'uint8' assert np.max(data_train) <= 255 assert np.min(data_train) >= 0 assert np.max(data_val) <= 255 assert np.min(data_val) >= 0 elif dataset == 'imagenet64_5bit': """Similar loading as above. Quantized to 5-bit while loading.""" if is_root: data_train = np.load('../train_64x64.npy') data_train = np.floor(data_train / 8.) data_train = data_train.astype('uint8') assert np.max(data_train) <= 31 assert np.min(data_train) >= 0 np.save('../train_64x64_5bit.npy', data_train) del data_train data_val = np.load('../valid_64x64.npy') data_val = np.floor(data_val / 8.) data_val = data_val.astype('uint8') assert np.max(data_val) <= 31 assert np.min(data_val) >= 0 np.save('../valid_64x64_5bit.npy', data_val) del data_val MPI.COMM_WORLD.Barrier() data_train = np.load('../train_64x64_5bit.npy') data_val = np.load('../valid_64x64_5bit.npy') data_train = data_train.astype(dtype.as_numpy_dtype) data_val = data_val.astype(dtype.as_numpy_dtype) img_shp = list(data_train.shape[1:]) if dataset == 'imagenet32': assert img_shp == [32, 32, 3] else: assert img_shp == [64, 64, 3] if is_root: print('Training data: {}, Validation data: {}'.format( data_train.shape[0], data_val.shape[0])) print('Image shape:', img_shp) bpd_scale_factor = 1. / (np.log(2) * np.prod(img_shp)) # Build graph if is_root: print('Building graph') dequant_flow, flow, posterior_flow = flow_constructor() # Data-dependent init if restore_checkpoint is None: if is_root: print('===== Init graph =====') x_init_sym = tf.placeholder(dtype, [init_bs] + img_shp) init_syms, _ = build_forward(x=x_init_sym, dequant_flow=dequant_flow, flow=flow, posterior_flow=posterior_flow, flow_kwargs=dict(init=True, dropout_p=dropout_p, verbose=is_root)) # Training if is_root: print('===== Training graph =====') x_sym = tf.placeholder(dtype, [local_bs] + img_shp) loss_sym, _ = build_forward(x=x_sym, dequant_flow=dequant_flow, flow=flow, posterior_flow=posterior_flow, flow_kwargs=dict(dropout_p=dropout_p, verbose=is_root)) # EMA params = tf.trainable_variables() if is_root: print('Parameters', sum(np.prod(p.get_shape().as_list()) for p in params)) ema = tf.train.ExponentialMovingAverage(decay=ema_decay) maintain_averages_op = tf.group(ema.apply(params)) # Op for setting the ema params to the current non-ema params (for use after data-dependent init) name2var = {v.name: v for v in tf.global_variables()} copy_params_to_ema = tf.group([ name2var[p.name.replace(':0', '') + '/ExponentialMovingAverage:0'].assign(p) for p in params ]) # Validation and sampling (with EMA) if is_root: print('===== Validation graph =====') val_loss_sym, _ = build_forward(x=x_sym, dequant_flow=dequant_flow, flow=flow, posterior_flow=posterior_flow, flow_kwargs=dict(dropout_p=0, ema=ema, verbose=is_root)) # for debugging invertibility # val_inverr_sym = tf.reduce_max(tf.abs( # val_dequant_x_sym - flow.inverse(val_y_sym, dropout_p=0, ema=ema, verbose=is_root)[0] # )) if is_root: print('===== Sampling graph =====') samples_sym, _ = flow.sample(local_bs, flow_kwargs=dict(dropout_p=0., ema=ema, verbose=is_root)) allgathered_samples_sym = hvd.allgather(tf.to_float(samples_sym)) assert len(tf.trainable_variables()) == len(params) def run_sampling(sess, i_step, *, prefix=dataset, dump_to_tensorboard=True, save_jpg=False): samples = sess.run(allgathered_samples_sym) if is_root: print('samples gathered from the session') if dataset == 'imagenet64_5bit': """Quantized values. So different kind of sampling needed here.""" samples = np.floor(np.clip(samples, 0, 31)) samples = samples * 8 samples = samples.astype('uint8') # np.save('samples_' + prefix + '.npy', samples) # if save_jpg: # samples = tile_imgs(np.floor(np.clip(samples, 0, 255)).astype('uint8')) # cv2.imwrite('samples_' + prefix + '_' + str(i_step) + '.jpg', samples) if dump_to_tensorboard: """You can turn this off if tensorboard crashes for sample dumps. You can view the samples from the npy file anyway""" logger.writekvs( [('samples', tile_imgs(np.clip(samples, 0, 255).astype(np.uint8)))], i_step) def run_validation(sess, i_step): data_val_shard = np.array_split(data_val, hvd.size(), axis=0)[hvd.rank()] shard_losses = np.concatenate([ sess.run([val_loss_sym], {x_sym: val_batch}) for val_batch, in iterbatches([data_val_shard], batch_size=local_bs, include_final_partial_batch=False) ]) val_loss, total_count = mpi_average(shard_losses) if is_root: logger.writekvs([('val_bpd', bpd_scale_factor * val_loss), ('num_val_examples', total_count * local_bs)], i_step) # Optimization lr_sym = tf.placeholder(dtype, [], 'lr') optimizer = hvd.DistributedOptimizer(tf.train.AdamOptimizer(lr_sym)) if scale_loss is None: grads_and_vars = optimizer.compute_gradients(loss_sym, var_list=params) else: grads_and_vars = [(g / scale_loss, v) for (g, v) in optimizer.compute_gradients( loss_sym * scale_loss, var_list=params)] if max_grad_norm is not None: clipped_grads, grad_norm_sym = tf.clip_by_global_norm( [g for (g, _) in grads_and_vars], max_grad_norm) grads_and_vars = [ (cg, v) for (cg, (_, v)) in zip(clipped_grads, grads_and_vars) ] else: grad_norm_sym = tf.constant(0.) opt_sym = tf.group(optimizer.apply_gradients(grads_and_vars), maintain_averages_op) def loop(sess: tf.Session): i_step = 0 i_step_lr = 0 if is_root: print('Initializing') sess.run(tf.global_variables_initializer()) # if is_root: # logger.write_graph(sess.graph) if restore_checkpoint is not None: """If restoring from an existing checkpoint whose path is specified in the launcher""" restore_step = int(restore_checkpoint.split('-')[-1]) if is_root: saver = tf.train.Saver() print('Restoring checkpoint:', restore_checkpoint) print('Restoring from step:', restore_step) saver.restore(sess, restore_checkpoint) print('Loaded checkpoint') else: saver = None i_step = restore_step """You could re-start with the warm-up or start from wherever the checkpoint stopped depending on what is needed. If the session had to be stopped due to NaN/Inf, warm-up from a most recent working checkpoint is recommended. If it was because of Horovod Crash / Machine Shut down, re-starting from the same LR can be done in which case you need to uncomment the blow line. By default, it warms up.""" i_step_lr = restore_step else: if is_root: print('Data dependent init') sess.run( init_syms, { x_init_sym: data_train[np.random.randint(0, data_train.shape[0], init_bs)] }) sess.run(copy_params_to_ema) saver = tf.train.Saver() if is_root else None if is_root: print('Broadcasting initial parameters') sess.run(hvd.broadcast_global_variables(0)) sess.graph.finalize() if is_root: print('Training') print( 'Parameters(M)', sum(np.prod(p.get_shape().as_list()) for p in params) / 1024. / 1024.) loss_hist = deque(maxlen=steps_per_log) """ 2 epochs are sufficient to see good results on Imagenet. After 2 epochs, gains are marginal, but important for good bits/dim.""" for i_epoch in range(n_epochs): epoch_start_t = time.time() for i_epoch_step, (batch, ) in enumerate( iterbatches( # non-sharded: each gpu goes through the whole dataset [data_train], batch_size=local_bs, include_final_partial_batch=False, )): lr = lr_schedule(i_step_lr) loss, _ = sess.run( [loss_sym, opt_sym], { x_sym: batch, lr_sym: lr }, ) loss_hist.append(loss) if i_epoch == i_epoch_step == 0: epoch_start_t = time.time() if i_step % steps_per_log == 0: loss_hist_means = MPI.COMM_WORLD.gather(float( np.mean(loss_hist)), root=0) steps_per_sec = (i_epoch_step + 1) / (time.time() - epoch_start_t) if is_root: kvs = [ ('iter', i_step), ('epoch', i_epoch + i_epoch_step * local_bs / data_train.shape[0]), # epoch for this gpu ('bpd', float( np.mean(loss_hist_means) * bpd_scale_factor)), ('lr', float(lr)), ('fps', steps_per_sec * total_bs ), # fps calculated over all gpus (this epoch) ('sps', steps_per_sec), ] logger.writekvs(kvs, i_step) """You could pass the validation for Imagenet because the val set is reasonably big. It is extremely hard to overfit on Imagenet (if you manage to, let us know). So, skipping the validation throughout the training and validating at the end with the most recent checkpoint would be okay and good for wall clock time. You could also have steps_per_val specified in the launcher pretty high to find a balance.""" if i_step > 0 and i_step % steps_per_samples == 0 and i_step_lr > 0: run_sampling( sess, i_step=i_step, dump_to_tensorboard=dump_samples_to_tensorboard, save_jpg=save_jpg) print('Run Validation...') run_validation(sess, i_step) if i_step % steps_per_dump == 0 and i_step > 0 and i_step_lr > 0: if saver is not None: saver.save(sess, os.path.join(checkpointdir, 'model'), global_step=i_step) i_step += 1 i_step_lr += 1 # End of epoch # Train config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str( hvd.local_rank()) # Pin GPU to local rank (one GPU per process) with tf.Session(config=config) as sess: loop(sess)
def evaluate( *, flow_constructor, seed, restore_checkpoint, total_bs, iw_samples=4096, dtype=tf.float32, dataset='cifar10', samples_filename='samples.png', ): hvd, MPI, is_root, mpi_average = setup_horovod() restore_checkpoint = os.path.expanduser(restore_checkpoint) # Seeding and logging setup seed_all(hvd.rank() + hvd.size() * seed) assert total_bs % hvd.size() == 0 local_bs = total_bs // hvd.size() assert iw_samples % total_bs == 0 if is_root: print('===== EVALUATING {} ({} IW samples) ====='.format( restore_checkpoint, iw_samples)) # Load data if is_root: # Load once on root first to prevent downloading conflicts print('Loading data') load_data(dataset=dataset, dtype=dtype.as_numpy_dtype) MPI.COMM_WORLD.Barrier() data_train, data_val = load_data(dataset=dataset, dtype=dtype.as_numpy_dtype) img_shp = list(data_train.shape[1:]) H, W, Cx = img_shp bpd_scale_factor = 1. / (np.log(2) * np.prod(img_shp)) if is_root: print('Training data: {}, Validation data: {}'.format( data_train.shape[0], data_val.shape[0])) print('Image shape:', img_shp) # Build graph if is_root: print('Building graph') dequant_flow, flow, posterior_flow = flow_constructor() x_sym = tf.placeholder(dtype, [local_bs] + img_shp) # This is a fake training graph. Just used to mimic flow_training, so we can load from the saver build_forward(x=x_sym, dequant_flow=dequant_flow, flow=flow, posterior_flow=posterior_flow, flow_kwargs=dict(vcfg=VarConfig(init=False, ema=None, dtype=dtype), dropout_p=0, verbose=is_root) # note dropout is 0: it doesn't matter ) # EMA params = tf.trainable_variables() if is_root: print_params() ema = tf.train.ExponentialMovingAverage( decay=0.9999999999999) # ema turned off maintain_averages_op = tf.group(ema.apply(params)) # Validation and sampling (with EMA) if is_root: print('===== Validation graph =====') val_flow_kwargs = dict(vcfg=VarConfig(init=False, ema=ema, dtype=dtype), dropout_p=0., verbose=is_root) val_loss_sym, val_logratio_sym = build_forward( x=x_sym, dequant_flow=dequant_flow, flow=flow, posterior_flow=posterior_flow, flow_kwargs=val_flow_kwargs) allgathered_val_logratios_sym = hvd.allgather(val_logratio_sym) # for debugging invertibility # val_dequant_x_sym_rep = tf.reshape(tf.tile(tf.expand_dims(val_dequant_x_sym, 0), [sampling_times, 1, 1, 1, 1]), [-1] + val_dequant_x_sym.shape.as_list()[1:]) # val_inverr_sym = tf.reduce_max(tf.abs(val_dequant_x_sym_rep - flow.inverse(val_y_sym, **val_flow_kwargs)[0][:,:,:,:img_shp[-1]])) if is_root: print('===== Sampling graph =====') samples_sym, _ = flow.sample(64, val_flow_kwargs) allgathered_samples_x_sym = hvd.allgather(tf.to_float(samples_sym)) assert len(tf.trainable_variables()) == len(params) def run_iw_eval(sess): if is_root: print('Running IW eval with {} samples...'.format(iw_samples)) # Go through one example at a time all_val_losses = [] for i_example in (trange if is_root else range)(len(data_val)): # take this single example and tile it batch_x = np.tile(data_val[i_example, None, ...], (local_bs, 1, 1, 1)) # repeatedly evaluate logd for the IWAE bound batch_logratios = np.concatenate([ sess.run(allgathered_val_logratios_sym, {x_sym: batch_x}) for _ in range(iw_samples // total_bs) ]).astype(np.float64) assert batch_logratios.shape == (iw_samples, ) # log [1/n \sum_i exp(r_i)] = log [exp(-b) 1/n \sum_i exp(r_i + b)] = -b + log [1/n \sum_i exp(r_i + b)] shift = batch_logratios.max() all_val_losses.append( -bpd_scale_factor * (shift + np.log(np.mean(np.exp(batch_logratios - shift))))) if i_example % 100 == 0 and is_root: print(i_example, np.mean(all_val_losses)) if is_root: print(f'Final ({len(data_val)}):', np.mean(all_val_losses)) def run_standard_eval(sess): if is_root: print('Running standard eval...') # Standard validation (single sample) data_val_shard = np.array_split(data_val, hvd.size(), axis=0)[hvd.rank()] shard_losses = np.concatenate([ sess.run([val_loss_sym], {x_sym: val_batch}) for val_batch, in iterbatches([data_val_shard], batch_size=local_bs, include_final_partial_batch=False) ]) val_loss, total_count = mpi_average(shard_losses) if is_root: for k, v in [ ('val_bpd', bpd_scale_factor * val_loss), ('num_val_examples', total_count * local_bs), ]: print(k, v) def run_sampling_only(sess): samples = sess.run(allgathered_samples_x_sym) if is_root: from PIL import Image Image.fromarray( tile_imgs(np.clip(samples, 0, 255).astype( np.uint8))).save(samples_filename) print('Saved {} samples to {}'.format(len(samples), samples_filename)) # print('Sampled in {} seconds'.format(sample_time)) # Run config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str( hvd.local_rank()) # Pin GPU to local rank (one GPU per process) with tf.Session(config=config) as sess: if is_root: print('Initializing') sess.run(tf.global_variables_initializer()) # Restore from checkpoint if is_root: print('Restoring checkpoint:', restore_checkpoint) saver = tf.train.Saver() saver.restore(sess, restore_checkpoint) print('Broadcasting initial parameters') sess.run(hvd.broadcast_global_variables(0)) sess.graph.finalize() if samples_filename: run_sampling_only(sess) # Make sure data is the same on all MPI processes tmp_inds = [0, 183, 3, 6, 20, 88] check_batch = np.ascontiguousarray(data_val[tmp_inds]) gathered_batches = np.zeros( (hvd.size(), *check_batch.shape), check_batch.dtype) if is_root else None MPI.COMM_WORLD.Gather(check_batch, gathered_batches, root=0) if is_root: assert all( np.allclose(check_batch, b) for b in gathered_batches), 'data must be in the same order!' print('data ordering ok') # Run validation run_standard_eval(sess) run_iw_eval(sess)
def evaluate( *, flow_constructor, seed, restore_checkpoint, total_bs, iw_samples=1024, # 4096 is too slow for ImageNet dtype=tf.float32, dataset='imagenet32', samples_filename='samples.png', extra_dims=3, ): import horovod.tensorflow as hvd # Initialize Horovod hvd.init() # Verify that MPI multi-threading is supported. assert hvd.mpi_threads_supported() from mpi4py import MPI assert hvd.size() == MPI.COMM_WORLD.Get_size() is_root = hvd.rank() == 0 def mpi_average(local_list): local_list = list(map(float, local_list)) sums = MPI.COMM_WORLD.gather(sum(local_list), root=0) counts = MPI.COMM_WORLD.gather(len(local_list), root=0) sum_counts = sum(counts) if is_root else None avg = (sum(sums) / sum_counts) if is_root else None return avg, sum_counts restore_checkpoint = os.path.expanduser(restore_checkpoint) # Seeding and logging setup seed_all(hvd.rank() + hvd.size() * seed) assert total_bs % hvd.size() == 0 local_bs = total_bs // hvd.size() assert iw_samples % total_bs == 0 if is_root: print('===== EVALUATING {} ({} IW samples) ====='.format( restore_checkpoint, iw_samples)) # Load data assert dataset in ['imagenet32', 'imagenet64', 'imagenet64_5bit'] if is_root: print('Loading data') MPI.COMM_WORLD.Barrier() if dataset == 'imagenet32': """The dataset as a npy file on RAM. There are as many copies as number of MPI threads. This isn't effficient and tf.Records would be better to read from disk. This is just done to ensure bits/dim reported are perfect and no data loading bugs creep in. However, the dataset is quite small resolution and even 8 MPI threads can work on 40GB RAM.""" # data_train = np.load('../train_32x32.npy') data_val = np.load('../valid_32x32.npy') # assert data_train.dtype == 'uint8' # assert np.max(data_train) <= 255 # assert np.min(data_train) >= 0 assert np.max(data_val) <= 255 assert np.min(data_val) >= 0 assert data_val.dtype == 'uint8' elif dataset == 'imagenet64': """The dataset as a npy file on RAM. There are as many copies as number of MPI threads. This isn't effficient and tf.Records would be better to read from disk. This is just done to ensure bits/dim reported are perfect and no data loading bugs creep in. If you don't have enough CPU RAM to run 8 threads, run it with fewer threads and adjust batch-size / model-size tradeoff accordingly.""" data_train = np.load('../train_64x64.npy') data_val = np.load('../valid_64x64.npy') assert data_train.dtype == 'uint8' assert np.max(data_train) <= 255 assert np.min(data_train) >= 0 assert np.max(data_val) <= 255 assert np.min(data_val) >= 0 elif dataset == 'imagenet64_5bit': """Similar loading as above. Quantized to 5-bit while loading.""" if is_root: data_train = np.load('../train_64x64.npy') data_train = np.floor(data_train / 8.) data_train = data_train.astype('uint8') assert np.max(data_train) <= 31 assert np.min(data_train) >= 0 np.save('../train_64x64_5bit.npy', data_train) del data_train data_val = np.load('../valid_64x64.npy') data_val = np.floor(data_val / 8.) data_val = data_val.astype('uint8') assert np.max(data_val) <= 31 assert np.min(data_val) >= 0 np.save('../valid_64x64_5bit.npy', data_val) del data_val MPI.COMM_WORLD.Barrier() data_train = np.load('../train_64x64_5bit.npy') data_val = np.load('../valid_64x64_5bit.npy') # data_train = data_train.astype(dtype.as_numpy_dtype) data_val = data_val.astype(dtype.as_numpy_dtype) img_shp = list(data_val.shape[1:]) if dataset == 'imagenet32': assert img_shp == [32, 32, 3] else: assert img_shp == [64, 64, 3] if is_root: # print('Training data: {}, Validation data: {}'.format(data_train.shape[0], data_val.shape[0])) print('Image shape:', img_shp) bpd_scale_factor = 1. / (np.log(2) * np.prod(img_shp)) # Build graph if is_root: print('Building graph') dequant_flow, flow, posterior_flow = flow_constructor() x_sym = tf.placeholder(dtype, [local_bs] + img_shp) # This is a fake training graph. Just used to mimic flow_training, so we can load from the saver build_forward(x=x_sym, dequant_flow=dequant_flow, flow=flow, posterior_flow=posterior_flow, flow_kwargs=dict(init=False, ema=None, dropout_p=0, verbose=is_root) # note dropout is 0: it doesn't matter ) # EMA params = tf.trainable_variables() if is_root: print('Parameters', sum(np.prod(p.get_shape().as_list()) for p in params)) ema = tf.train.ExponentialMovingAverage( decay=0.9999999999999) # ema turned off maintain_averages_op = tf.group(ema.apply(params)) # Validation and sampling (with EMA) if is_root: print('===== Validation graph =====') val_flow_kwargs = dict(init=False, dropout_p=0, ema=ema, verbose=is_root) val_loss_sym, val_logratio_sym, val_dequant_x_sym = build_forward( x=x_sym, dequant_flow=dequant_flow, flow=flow, posterior_flow=posterior_flow, flow_kwargs=val_flow_kwargs) allgathered_val_logratios_sym = hvd.allgather(val_logratio_sym) # for debugging invertibility # val_inverr_sym = tf.reduce_max(tf.abs( # val_dequant_x_sym - flow.inverse(val_y_sym, dropout_p=0, ema=ema, verbose=is_root)[0] # )) if is_root: print('===== Sampling graph =====') samples_sym, _ = flow.sample(local_bs, flow_kwargs=val_flow_kwargs) allgathered_samples_sym = hvd.allgather(tf.to_float(samples_sym)) assert len(tf.trainable_variables()) == len(params) def run_iw_eval(sess): if is_root: print('Running IW eval with {} samples...'.format(iw_samples)) # Go through one example at a time all_val_losses = [] for i_example in (trange if is_root else range)(len(data_val)): # take this single example and tile it batch_x = np.tile(data_val[i_example, None, ...], (local_bs, 1, 1, 1)) # repeatedly evaluate logd for the IWAE bound batch_logratios = np.concatenate([ sess.run(allgathered_val_logratios_sym, {x_sym: batch_x}) for _ in range(iw_samples // total_bs) ]).astype(np.float64) assert batch_logratios.shape == (iw_samples, ) # log [1/n \sum_i exp(r_i)] = log [exp(-b) 1/n \sum_i exp(r_i + b)] = -b + log [1/n \sum_i exp(r_i + b)] shift = batch_logratios.max() all_val_losses.append( -bpd_scale_factor * (shift + np.log(np.mean(np.exp(batch_logratios - shift))))) if i_example % 100 == 0 and is_root: print(i_example, np.mean(all_val_losses)) if is_root: print(f'Final ({len(data_val)}):', np.mean(all_val_losses)) def run_sampling_only(sess, *, prefix=dataset, dump_to_tensorboard=True, save_jpg=False): samples = sess.run(allgathered_samples_sym) if is_root: print('samples gathered from the session') if dataset == 'imagenet64_5bit': """Quantized values. So different kind of sampling needed here.""" samples = np.floor(np.clip(samples, 0, 31)) samples = samples * 8 samples = samples.astype('uint8') # np.save('samples_' + prefix + '.npy', samples) import cv2 samples = tile_imgs( np.floor(np.clip(samples, 0, 255)).astype('uint8')) cv2.imwrite(samples_filename, samples) def run_validation(sess): data_val_shard = np.array_split(data_val, hvd.size(), axis=0)[hvd.rank()] shard_losses, shard_corr = zip(*[ sess.run([val_loss_sym, val_corr_sym], {x_sym: val_batch}) for val_batch, in iterbatches([data_val_shard], batch_size=local_bs, include_final_partial_batch=False) ]) val_loss, total_count = mpi_average(shard_losses) val_corr, _ = mpi_average(shard_corr) if is_root: for k, v in [ ('val_bpd', bpd_scale_factor * val_loss), ('val_corr', val_corr), ('num_val_examples', total_count * local_bs), ]: print(k, v) # Run config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str( hvd.local_rank()) # Pin GPU to local rank (one GPU per process) with tf.Session(config=config) as sess: if is_root: print('Initializing') sess.run(tf.global_variables_initializer()) # Restore from checkpoint if is_root: print('Restoring checkpoint:', restore_checkpoint) saver = tf.train.Saver() saver.restore(sess, restore_checkpoint) print('Broadcasting initial parameters') sess.run(hvd.broadcast_global_variables(0)) sess.graph.finalize() # if samples_filename: # run_sampling_only(sess) # Make sure data is the same on all MPI processes tmp_inds = [0, 183, 3, 6, 20, 88] check_batch = np.ascontiguousarray(data_val[tmp_inds]) gathered_batches = np.zeros( (hvd.size(), *check_batch.shape), check_batch.dtype) if is_root else None MPI.COMM_WORLD.Gather(check_batch, gathered_batches, root=0) if is_root: assert all( np.allclose(check_batch, b) for b in gathered_batches), 'data must be in the same order!' print('data ordering ok') # Run validation run_validation(sess) run_iw_eval(sess)
parser.add_argument("--method_name", type=str, default="ftl") # ours parser.add_argument("--k_ratio", type=float, default=0.2) parser.add_argument("--lr_ratio", type=float, default=1e-3) # precision parser.add_argument("--precision", type=float, default=0.2) # jocor parser.add_argument("--forget_rate", type=float, default=0.2) parser.add_argument("--co_lambda", type=float, default=0.9) args = parser.parse_args() seed_all(args.seed) device = f"cuda:{args.gpu}" if args.dataset_name in ["mnist", "cifar10", "cifar100", "tiny-imagenet"]: epochs = 201 epoch_decay_start = 80 batch_size = 128 learning_rate = 1e-3 mom1 = 0.9 mom2 = 0.1 alpha_plan = [learning_rate] * epochs beta1_plan = [mom1] * epochs for i in range(epoch_decay_start, epochs): alpha_plan[i] = (
# -*- coding: utf-8 -*- seed_all() device = get_device() from utils import seed_all, get_device from models import GetEncodings, SearchSimilar bart_tokenizer # Inference def inference(question, bart_tokenizer, bart_model, df_context, model_op, MODEL_STORE): # Get Pretrained BERT encodings ge = GetEncodings(MODEL_STORE=MODEL_STORE, type='questions') encoded_question = ge.encode(question, max_length=30) # Find top matching documents ss = SearchSimilar(iterator=df_context['context'].values.tolist(), filename='index.bin', embeddings=model_op, shape=768, device=device) similar_contexts = ss.get_n_similar_vectors(encoded_question, 3) similar_contexts.insert(0, question) combined_tokens = '</s></s>'.join(similar_contexts) print(f'Top similar document outputs is {combined_tokens}')
def main(args): torch.backends.cudnn.benchmark = True seed_all(args.seed) num_classes = 1 d = Dataset(train_set_size=args.train_set_sz, num_cls=num_classes) train = d.train_set valid = d.test_set net = UNet(in_dim=1, out_dim=4).cuda() snake_approx_net = UNet(in_dim=1, out_dim=1, wf=3, padding=True, first_layer_pad=None, depth=4, last_layer_resize=True).cuda() best_val_dice = -np.inf optimizer = torch.optim.Adam(params=net.parameters(), lr=args.lr, weight_decay=args.weight_decay) snake_approx_optimizer = torch.optim.Adam( params=snake_approx_net.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=10, total_epoch=50, after_scheduler=None) # load model if args.ckpt: loaded = _pickle.load(open(args.ckpt, 'rb')) net.load_state_dict(loaded[0]) optimizer.load_state_dict(loaded[1]) snake_approx_net.load_state_dict(loaded[2]) snake_approx_optimizer.load_state_dict(loaded[3]) if not os.path.exists(args.log_dir): os.makedirs(args.log_dir, exist_ok=True) writer = tensorboardX.SummaryWriter(log_dir=args.log_dir) snake = SnakePytorch(args.delta, args.batch_sz * args.num_samples, args.num_lines, args.radius) snake_eval = SnakePytorch(args.delta, args.batch_sz, args.num_lines, args.radius) noises = torch.zeros( (args.batch_sz, args.num_samples, args.num_lines, args.radius)).cuda() step = 1 start = timeit.default_timer() for epoch in range(1, args.n_epochs + 1): for iteration in range( 1, int(np.ceil(train.dataset_sz() / args.batch_sz)) + 1): scheduler_warmup.step() imgs, masks, onehot_masks, centers, dts_modified, dts_original, jitter_radius, bboxes = \ train.next_batch(args.batch_sz) xs = make_batch_input(imgs) xs = torch.cuda.FloatTensor(xs) net.train() unet_logits = net(xs) center_jitters, angle_jitters = [], [] for img, mask, center in zip(imgs, masks, centers): c_j, a_j = get_random_jitter_by_mask(mask, center, [1], args.theta_jitter) if not args.use_center_jitter: c_j = np.zeros_like(c_j) center_jitters.append(c_j) angle_jitters.append(a_j) center_jitters = np.asarray(center_jitters) angle_jitters = np.asarray(angle_jitters) # args.radius + 1 because we need additional outermost points for the gradient gs_logits_whole_img = unet_logits[:, 3, ...] gs_logits, coords_r, coords_c = get_star_pattern_values( gs_logits_whole_img, None, centers, args.num_lines, args.radius + 1, center_jitters=center_jitters, angle_jitters=angle_jitters) # currently only class 1 is foreground # if there's multiple foreground classes use a for loop gs = gs_logits[:, :, 1:] - gs_logits[:, :, :-1] # compute the gradient noises.normal_( 0, 1 ) # noises here is only used for random exploration so no need mirrored sampling gs_noisy = torch.unsqueeze(gs, 1) + noises def batch_eval_snake(snake, inputs, batch_sz): n_inputs = len(inputs) assert n_inputs % batch_sz == 0 n_batches = int(np.ceil(n_inputs / batch_sz)) ind_sets = [] for j in range(n_batches): inps = inputs[j * batch_sz:(j + 1) * batch_sz] batch_ind_sets = snake(inps).data.cpu().numpy() ind_sets.append(batch_ind_sets) ind_sets = np.concatenate(ind_sets, 0) return ind_sets gs_noisy = gs_noisy.reshape((args.batch_sz * args.num_samples, args.num_lines, args.radius)) ind_sets = batch_eval_snake(snake, gs_noisy, args.batch_sz * args.num_samples) ind_sets = ind_sets.reshape( (args.batch_sz * args.num_samples, args.num_lines)) ind_sets = np.expand_dims( smooth_ind(ind_sets, args.smoothing_window), -1) # loss layers m = torch.nn.LogSoftmax(dim=1) loss = torch.nn.NLLLoss() # =========================================================================== # Inner loop: Train dice loss prediction network snake_approx_net.train() for _ in range(args.dice_approx_train_steps): snake_approx_logits = snake_approx_net( gs_noisy.reshape(args.batch_sz * args.num_samples, 1, args.num_lines, args.radius).detach()) snake_approx_train_loss = loss( m(snake_approx_logits.squeeze().transpose(2, 1)), torch.cuda.LongTensor(ind_sets.squeeze())) snake_approx_optimizer.zero_grad() snake_approx_train_loss.backward() snake_approx_optimizer.step() # =========================================================================== # =========================================================================== # Now, minimize the approximate dice loss snake_approx_net.eval() gt_indices = [] for mask, center, cj, aj in zip(masks, centers, center_jitters, angle_jitters): gt_ind = mask_to_indices(mask, center, args.radius, args.num_lines, cj, aj) gt_indices.append(gt_ind) gt_indices = np.asarray(gt_indices).astype(int) gt_indices = gt_indices.reshape((args.batch_sz, args.num_lines)) gt_indices = torch.cuda.LongTensor(gt_indices) snake_approx_logits = snake_approx_net( gs.reshape((args.batch_sz, 1, args.num_lines, args.radius))) nll_approx_loss = loss( m(snake_approx_logits.squeeze().transpose(2, 1)), gt_indices) total_loss = nll_approx_loss optimizer.zero_grad() total_loss.backward() optimizer.step() # =========================================================================== snake_approx_train_loss = snake_approx_train_loss.data.cpu().numpy( ) nll_approx_loss = nll_approx_loss.data.cpu().numpy() total_loss = snake_approx_train_loss + nll_approx_loss if step % args.log_freq == 0: stop = timeit.default_timer() print(f"step={step}\tepoch={epoch}\titer={iteration}" f"\tloss={total_loss}" f"\tsnake_approx_train_loss={snake_approx_train_loss}" f"\tnll_approx_loss={nll_approx_loss}" f"\tlr={optimizer.param_groups[0]['lr']}" f"\ttime={stop-start}") start = stop writer.add_scalar("total_loss", total_loss, step) writer.add_scalar("nll_approx_loss", nll_approx_loss, step) writer.add_scalar("lr", optimizer.param_groups[0]["lr"], step) if step % args.train_eval_freq == 0: train_dice = do_eval( net, snake_eval, train.images, train.masks, train.centers, args.batch_sz, args.num_lines, args.radius, smoothing_window=args.smoothing_window).data.cpu().numpy() writer.add_scalar("train_dice", train_dice, step) print( f"step={step}\tepoch={epoch}\titer={iteration}\ttrain_eval: train_dice={train_dice}" ) if step % args.val_eval_freq == 0: val_dice = do_eval( net, snake_eval, valid.images, valid.masks, valid.centers, args.batch_sz, args.num_lines, args.radius, smoothing_window=args.smoothing_window).data.cpu().numpy() writer.add_scalar("val_dice", val_dice, step) print( f"step={step}\tepoch={epoch}\titer={iteration}\tvalid_dice={val_dice}" ) if val_dice > best_val_dice: best_val_dice = val_dice _pickle.dump([ net.state_dict(), optimizer.state_dict(), snake_approx_net.state_dict(), snake_approx_optimizer.state_dict() ], open( os.path.join(args.log_dir, 'best_model.pth.tar'), 'wb')) f = open( os.path.join(args.log_dir, f"best_val_dice{step}.txt"), 'w') f.write(str(best_val_dice)) f.close() print(f"better val dice detected.") step += 1 return best_val_dice
config = confuse.Configuration('research') config.set_file(args.config_file) # set model model = None if args.model == 'classic_lm': model = ClassicLanguageModel(**config['model'].get(), model_name=args.model) elif args.model == 'attention_lm': model = AttentionLanguageModel(**config['model'].get(), model_name=args.model) else: raise ValueError("You have wrong --model parameter") # seed everything seed_all(config['general']['seed'].get()) # get dataloaders and training framework loaders = load_dataloaders(**config['dataloaders'].get()) framework = LMFramework(model, **config['optimizer'].get(), loaders=loaders) if not os.path.isdir(config['general']['checkpoint_path'].get()): os.makedirs(config['general']['checkpoint_path'].get()) if not os.path.isdir(config['trainer_params']['default_save_path'].get()): os.makedirs(config['trainer_params']['default_save_path'].get()) exp_name = args.experiment_name + \ '_' + \
def main(): """Runs experiment""" args = parser.parse_args() utils.seed_all(args.seed) ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d_%H:%M:%S") to_save = pathlib.Path(args.save_dir) dir_name = args.save_folder + "_" + st to_save = to_save / dir_name to_save = str(to_save.resolve()) log_file = "Experiment_info.txt" experiment_logger = Logger(to_save, log_file) experiment_logger.log_header("Arguments for the experiment :") experiment_logger.log_info(vars(args)) feat_ext = fe_utils.load_feature_extractor(args.feat_extractor, obs_width=args.pedestrian_width, agent_width=args.pedestrian_width) experiment_logger.log_header("Parameters of the feature extractor :") experiment_logger.log_info(feat_ext.__dict__) env = GridWorld( display=args.render, is_random=False, rows=576, cols=720, agent_width=args.pedestrian_width, step_size=2, obs_width=args.pedestrian_width, width=10, subject=args.subject, annotation_file=args.annotation_file, goal_state=None, step_wrapper=utils.step_wrapper, seed=args.seed, replace_subject=args.replace_subject, segment_size=args.segment_size, external_control=True, continuous_action=False, reset_wrapper=utils.reset_wrapper, consider_heading=True, is_onehot=False, show_orientation=True, show_comparison=True, show_trail=True, ) experiment_logger.log_header("Environment details :") experiment_logger.log_info(env.__dict__) if args.rl_method == "ActorCritic": rl_method = ActorCritic( env, feat_extractor=feat_ext, gamma=1, log_interval=args.rl_log_intervals, max_episode_length=args.rl_ep_length, hidden_dims=args.policy_net_hidden_dims, save_folder=to_save, lr=args.lr_rl, max_episodes=args.rl_episodes, ) if args.rl_method == "SAC": if not env.continuous_action: print("The action space needs to be continuous for SAC to work.") exit() replay_buffer = ReplayBuffer(args.replay_buffer_size) rl_method = SoftActorCritic( env, replay_buffer, feat_ext, play_interval=500, learning_rate=args.lr_rl, buffer_sample_size=args.replay_buffer_sample_size, ) if args.rl_method == "discrete_QSAC": if not isinstance(env.action_space, gym.spaces.Discrete): print("discrete SAC requires a discrete action space to work.") exit() replay_buffer = ReplayBuffer(args.replay_buffer_size) rl_method = QSAC( env, replay_buffer, feat_ext, args.replay_buffer_sample_size, learning_rate=args.lr_rl, entropy_tuning=True, entropy_target=args.entropy_target, play_interval=args.play_interval, tau=args.tau, gamma=args.gamma, ) if args.rl_method == "discrete_SAC": if not isinstance(env.action_space, gym.spaces.Discrete): print("discrete SAC requires a discrete action space to work.") exit() replay_buffer = ReplayBuffer(args.replay_buffer_size) rl_method = DiscreteSAC( env, replay_buffer, feat_ext, args.replay_buffer_sample_size, learning_rate=args.lr_rl, entropy_tuning=True, entropy_target=args.entropy_target, play_interval=args.play_interval, tau=args.tau, gamma=args.gamma, ) print("RL method initialized.") print(rl_method.policy) if args.policy_path is not None: rl_method.policy.load(args.policy_path) experiment_logger.log_header("Details of the RL method :") experiment_logger.log_info(rl_method.__dict__) expert_trajectories = read_expert_trajectories(args.exp_trajectory_path) irl_method = PerTrajGCL( rl=rl_method, env=env, expert_trajectories=expert_trajectories, learning_rate=args.lr_irl, l2_regularization=args.regularizer, save_folder=to_save, saving_interval=args.saving_interval, ) print("IRL method intialized.") print(irl_method.reward_net) experiment_logger.log_header("Details of the IRL method :") experiment_logger.log_info(irl_method.__dict__) irl_method.pre_train( args.pre_train_iterations, args.num_expert_samples, account_for_terminal_state=args.account_for_terminal_state, gamma=args.gamma, ) rl_method.train( args.pre_train_rl_iterations, args.rl_ep_length, reward_network=irl_method.reward_net, ) # save intermediate RL result rl_method.policy.save(to_save + "/policy") irl_method.train( args.irl_iterations, args.rl_episodes, args.rl_ep_length, args.rl_ep_length, reset_training=args.reset_training, account_for_terminal_state=args.account_for_terminal_state, gamma=args.gamma, stochastic_sampling=args.stochastic_sampling, num_expert_samples=args.num_expert_samples, num_policy_samples=args.num_policy_samples, ) metric_applicator = metric_utils.LTHMP2020() metric_results = metric_utils.collect_trajectories_and_metrics( env, feat_ext, rl_method.policy, len(expert_trajectories), args.rl_ep_length, metric_applicator, disregard_collisions=True, ) pd_metrics = pd.DataFrame(metric_results).T pd_metrics = pd_metrics.applymap(lambda x: x[0]) pd_metrics.to_pickle(to_save + "/metrics.pkl") with open(to_save + "/rl_data.csv", "a") as f: rl_method.data_table.write_csv(f) with open(to_save + "/irl_data.csv", "a") as f: irl_method.data_table.write_csv(f) with open(to_save + "/pre_irl_data.csv", "a") as f: irl_method.pre_data_table.write_csv(f)
def run(): device = "cuda" # device = xm.xla_device(fold + 1) # model = model.to(device) # for num_fold, (df_train, df_valid) in enumerate(kfold_df(df)): # if num_fold==fold: # print("FOLD %s: "%num_fold) # train_dataset = SegmentDataset(df_train.Phrase.values, df_train.Sentiment.values) # valid_dataset = SegmentDataset(df_valid.Phrase.values, df_valid.Sentiment.values) # train_loader = DataLoader(train_dataset, batch_size= config.TRAIN_BATCH_SIZE, num_workers=4) # valid_loader = DataLoader(valid_dataset, batch_size= config.VALID_BATCH_SIZE, num_workers=4) # model = BertUncasedModel() # es = utils.EarlyStopping(patience=2, mode="max") # train_fn(model, train_dataset,len_train_dataset = len(train_dataset), device) # model = model.load_state_dict(torch.load(config.MODEL_PATH)) # print(eval_fn(model,valid_loader,device)) # else: # continue seed_all(42) df = pd.read_csv("/colabdrive/train.tsv", sep="\t", engine="python") df = df[["Phrase", "Sentiment"]] for num_fold, (df_train, df_valid) in enumerate(kfold_df(df)): print("FOLD %s: " % num_fold) train_dataset = SegmentDataset(df_train.Phrase.values, df_train.Sentiment.values) valid_dataset = SegmentDataset(df_valid.Phrase.values, df_valid.Sentiment.values) train_loader = DataLoader(train_dataset, batch_size=config.TRAIN_BATCH_SIZE, num_workers=4) valid_loader = DataLoader(valid_dataset, batch_size=config.VALID_BATCH_SIZE, num_workers=4) # train_test_split(stratify=target, shuffle=True, random_state=42) model = BertUncasedModel() param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.001, }, { "params": [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] num_train_steps = int( len(train_dataset) / config.TRAIN_BATCH_SIZE * config.NUM_EPOCHS) optimizer = AdamW(optimizer_parameters, lr=5e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=num_train_steps) train_fn(model, train_loader, optimizer, scheduler, device) model = model.load_state_dict(torch.load(config.MODEL_PATH)) print(eval_fn(model, valid_loader, device))