def main(args): # cuda cuda_exp = args.cuda_exp == "true" # Notebook experiment settings experiment_name = args.experiment_name experiment_results_folder = args.results_folder results_path = os.path.join("../", experiment_results_folder) data_folder = args.data_folder data_file = args.data_file # Regularization settings if args.noise_reg_scheduler == "constant": noise_reg_schedule = constant_regularization_schedule elif args.noise_reg_scheduler == "sqrt": noise_reg_schedule = square_root_noise_schedule elif args.noise_reg_scheduler == "rot": noise_reg_schedule = rule_of_thumb_noise_schedule else: noise_reg_schedule = constant_regularization_schedule noise_reg_sigma = args.noise_reg_sigma # Used as sigma in rule of thumb and as noise in const l2_reg = args.l2_reg initial_lr = args.initial_lr lr_factor = args.lr_factor lr_patience = args.lr_patience min_lr = args.min_lr # Data settings obs_cols = args.obs_cols context_cols = args.context_cols # Training settings epochs = args.epochs batch_size = args.batch_size clipped_adam = args.clipped_adam # Dimensions of problem problem_dim = len(args.obs_cols) context_dim = len(context_cols) # Flow settings flow_depth = args.flow_depth c_net_depth = args.c_net_depth c_net_h_dim = args.c_net_h_dim batchnorm_momentum = args.batchnorm_momentum # Define context conditioner context_n_depth = args.context_n_depth context_n_h_dim = args.context_n_h_dim rich_context_dim = args.rich_context_dim settings_dict = { "epochs": epochs, "batch_size": batch_size, "problem_dim": problem_dim, "context_dim": context_dim, "flow_depth": flow_depth, "c_net_depth": c_net_depth, "c_net_h_dim": c_net_h_dim, "context_n_depth": context_n_depth, "context_n_h_dim": context_n_h_dim, "rich_context_dim": rich_context_dim, "obs_cols": obs_cols, "context_cols": context_cols, "batchnorm_momentum": batchnorm_momentum, "l2_reg": l2_reg, "clipped_adam": clipped_adam, "noise_reg_schedule": args.noise_reg_scheduler, "noise_reg_sigma": noise_reg_sigma, "initial_lr": initial_lr, "lr_factor": lr_factor, "lr_patience": lr_patience, "min_lr": min_lr } print(f"Settings:\n{settings_dict}") # Load data csv_path = os.path.join(data_folder, data_file) df = pd.read_csv(csv_path) train_dataloader, test_dataloader, obs_scaler, context_scaler = simple_data_split_conditional( df=df, obs_cols=obs_cols, context_cols=context_cols, batch_size=batch_size, cuda_exp=True) # Define stuff for reqularization data_size = len(train_dataloader) data_dim = problem_dim + context_dim # Define normalizing flow normalizing_flow = combi_conditional_normalizing_flow_factory( flow_depth=flow_depth, problem_dim=problem_dim, c_net_depth=c_net_depth, c_net_h_dim=c_net_h_dim, context_dim=context_dim, context_n_h_dim=context_n_h_dim, context_n_depth=context_n_depth, rich_context_dim=rich_context_dim, cuda=cuda_exp, batchnorm_momentum=batchnorm_momentum) # Setup Optimizer if clipped_adam is None: if l2_reg is None: optimizer = optim.Adam(normalizing_flow.modules.parameters(), lr=initial_lr) else: optimizer = optim.Adam(normalizing_flow.modules.parameters(), lr=initial_lr, weight_decay=l2_reg) else: if l2_reg is None: optimizer = ClippedAdam(normalizing_flow.modules.parameters(), lr=initial_lr, clip_norm=clipped_adam) else: optimizer = ClippedAdam(normalizing_flow.modules.parameters(), lr=initial_lr, weight_decay=l2_reg, clip_norm=clipped_adam) if lr_factor is not None: scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, factor=lr_factor, patience=lr_patience, min_lr=min_lr, verbose=True) # Setup regularization h = noise_reg_schedule(data_size, data_dim, noise_reg_sigma) noise_reg = NoiseRegularizer(discrete_dims=None, h=h, cuda=cuda_exp) # Train and test sizes n_train = train_dataloader.dataset.shape[0] n_test = test_dataloader.dataset.shape[0] # Training loop full_train_losses = [] train_losses = [] test_losses = [] no_noise_losses = [] lr_scheduler_steps = [] for epoch in range(1, epochs + 1): normalizing_flow.modules.train() train_epoch_loss = 0 for k, batch in enumerate(train_dataloader): # Add noise reg to two moons batch = noise_reg.add_noise(batch) x = batch[:, :problem_dim] context = batch[:, problem_dim:] # Condition the flow on the sampled covariate and calculate -log_prob = loss conditioned_flow_dist = normalizing_flow.condition(context) loss = -conditioned_flow_dist.log_prob(x).sum() # Calculate gradients and take an optimizer step normalizing_flow.modules.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() train_epoch_loss += loss.item() full_train_losses.append(train_epoch_loss / n_train) # calculate test loss normalizing_flow.modules.eval() with torch.no_grad(): test_epoch_loss = 0 for j, batch in enumerate(test_dataloader): # Sample covariates and use them to sample from conditioned two_moons x = batch[:, :problem_dim] context = batch[:, problem_dim:] # Condition the flow on the sampled covariate and calculate -log_prob = loss conditioned_flow_dist = normalizing_flow.condition(context) test_loss = -conditioned_flow_dist.log_prob(x).sum() test_epoch_loss += test_loss.item() # save every 10 epoch to log and eval if epoch % 10 == 0 or epoch == epochs - 1: normalizing_flow.modules.eval() train_losses.append(train_epoch_loss / n_train) test_losses.append(test_epoch_loss / n_test) no_noise_epoch_loss = 0 for k, batch in enumerate(train_dataloader): # Add noise reg to two moons x = batch[:, :problem_dim] context = batch[:, problem_dim:] # Condition the flow on the sampled covariate and calculate -log_prob = loss conditioned_flow_dist = normalizing_flow.condition(context) loss = -conditioned_flow_dist.log_prob(x).sum() no_noise_epoch_loss += loss.item() no_noise_losses.append(no_noise_epoch_loss / n_train) if epoch % 100 == 0: print( f"Epoch {epoch}: train loss: {train_losses[-1]} no noise loss:{no_noise_losses[-1]} test_loss: {test_losses[-1]}" ) # Take scheduler step if needed if lr_factor is not None: scheduler.step(test_epoch_loss / n_test) lr_scheduler_steps.append(epoch) # Plot Epoch results if epoch == epochs-1: if epoch == epochs - 1: normalizing_flow.modules.eval() print( f"Epoch {epoch}: train loss: {train_losses[-1]} no noise loss:{no_noise_losses[-1]} test_loss: {test_losses[-1]}" ) experiment_dict = { 'train': train_losses, 'test': test_losses, 'no_noise_losses': no_noise_losses, 'lr_steps': lr_scheduler_steps } results_dict = { 'model': normalizing_flow, 'settings': settings_dict, 'logs': experiment_dict } file_name = f"{experiment_name}.pickle" file_path = os.path.join(results_path, file_name) print(f"Saving: {file_name}") with open(file_path, 'wb') as f: pickle.dump(results_dict, f)
def main(args): # cuda cuda_exp = args.cuda_exp == "true" print(cuda_exp) # Notebook experiment settings experiment_name = args.experiment_name experiment_results_folder = args.results_folder results_path = os.path.join("../", experiment_results_folder) data_folder = args.data_folder data_file = args.data_file # Regularization settings if args.noise_reg_scheduler == "constant": noise_reg_schedule = constant_regularization_schedule elif args.noise_reg_scheduler == "sqrt": noise_reg_schedule = square_root_noise_schedule elif args.noise_reg_scheduler == "rot": noise_reg_schedule = rule_of_thumb_noise_schedule else: noise_reg_schedule = constant_regularization_schedule noise_reg_sigma = args.noise_reg_sigma # Used as sigma in rule of thumb and as noise in const # Data settings obs_cols = args.obs_cols context_cols = args.context_cols # Training settings epochs = args.epochs batch_size = args.batch_size # Dimensions of problem problem_dim = len(args.obs_cols) context_dim = len(args.context_cols) # Flow settings flow_depth = args.flow_depth c_net_depth = args.c_net_depth c_net_h_dim = args.c_net_h_dim # Define context conditioner context_n_depth = args.context_n_depth context_n_h_dim = args.context_n_h_dim rich_context_dim = args.rich_context_dim settings_dict = { "epochs": epochs, "batch_size": batch_size, "problem_dim": problem_dim, "context_dim": context_dim, "flow_depth": flow_depth, "c_net_depth": c_net_depth, "c_net_h_dim": c_net_h_dim, "context_n_depth": context_n_depth, "context_n_h_dim": context_n_h_dim, "rich_context_dim": rich_context_dim, "obs_cols": obs_cols, "context_cols": context_cols } print(f"Settings:\n{settings_dict}") # Load data csv_path = os.path.join(data_folder, data_file) donkey_df = pd.read_csv(csv_path, parse_dates=[4, 11]) train_dataloader, test_dataloader, _, _ = searchlog_day_split( donkey_df, obs_cols, context_cols, batch_size, cuda_exp) train_idx, test_idx = get_split_idx_on_day(donkey_df) run_idxs = {'train': train_idx, 'test': test_idx} # Define stuff for reqularization data_size = len(train_dataloader) data_dim = problem_dim + context_dim # Define normalizing flow normalizing_flow = conditional_normalizing_flow_factory2( flow_depth=flow_depth, problem_dim=problem_dim, c_net_depth=c_net_depth, c_net_h_dim=c_net_h_dim, context_dim=context_dim, context_n_h_dim=context_n_h_dim, context_n_depth=context_n_depth, rich_context_dim=rich_context_dim, cuda=cuda_exp) # Setup Optimizer optimizer = optim.Adam(normalizing_flow.modules.parameters(), lr=1e-4) print("number of params: ", sum(p.numel() for p in normalizing_flow.modules.parameters())) # Setup regularization h = noise_reg_schedule(data_size, data_dim, noise_reg_sigma) noise_reg = NoiseRegularizer(discrete_dims=None, h=h, cuda=cuda_exp) print(f"Data size: {train_dataloader.dataset.shape}") print(f"Noise scale: {h}") # Train and test sizes n_train = train_dataloader.dataset.shape[0] n_test = test_dataloader.dataset.shape[0] print(f"n_train {n_train}") print(f"n_test {n_test}") # Training loop full_train_losses = [] train_losses = [] test_losses = [] no_noise_losses = [] for epoch in range(1, epochs + 1): normalizing_flow.modules.train() train_epoch_loss = 0 for k, batch in enumerate(train_dataloader): # Add noise reg to two moons batch = noise_reg.add_noise(batch) x = batch[:, :problem_dim] context = batch[:, problem_dim:] # Condition the flow on the sampled covariate and calculate -log_prob = loss conditioned_flow_dist = normalizing_flow.condition(context) loss = -conditioned_flow_dist.log_prob(x).sum() # Calculate gradients and take an optimizer step normalizing_flow.modules.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() train_epoch_loss += loss.item() full_train_losses.append(train_epoch_loss / n_train) # save every 10 epoch to log and eval if epoch % 10 == 0 or epoch == epochs - 1: normalizing_flow.modules.eval() train_losses.append(train_epoch_loss / n_train) no_noise_epoch_loss = 0 for k, batch in enumerate(train_dataloader): # Add noise reg to two moons x = batch[:, :problem_dim] context = batch[:, problem_dim:] # Condition the flow on the sampled covariate and calculate -log_prob = loss conditioned_flow_dist = normalizing_flow.condition(context) loss = -conditioned_flow_dist.log_prob(x).sum() no_noise_epoch_loss += loss.item() no_noise_losses.append(no_noise_epoch_loss / n_train) test_epoch_loss = 0 for j, batch in enumerate(test_dataloader): # Sample covariates and use them to sample from conditioned two_moons x = batch[:, :problem_dim] context = batch[:, problem_dim:] # Condition the flow on the sampled covariate and calculate -log_prob = loss conditioned_flow_dist = normalizing_flow.condition(context) test_loss = -conditioned_flow_dist.log_prob(x).sum() test_epoch_loss += test_loss.item() test_losses.append(test_epoch_loss / n_test) if epoch % 100 == 0: print( f"Epoch {epoch}: train loss: {train_losses[-1]} no noise loss:{no_noise_losses[-1]} test_loss: {test_losses[-1]}" ) # Plot Epoch results if epoch == epochs-1: if epoch == epochs - 1: normalizing_flow.modules.eval() print( f"Epoch {epoch}: train loss: {train_losses[-1]} no noise loss:{no_noise_losses[-1]} test_loss: {test_losses[-1]}" ) experiment_dict = { 'train': train_losses, 'test': test_losses, 'no_noise_losses': no_noise_losses } results_dict = { 'model': normalizing_flow, 'settings': settings_dict, 'logs': experiment_dict, 'data_split': run_idxs } file_name = f"{experiment_name}.pickle" file_path = os.path.join(results_path, file_name) print(f"Saving: {file_name}") with open(file_path, 'wb') as f: saved_flow = pickle.dump(results_dict, f)
def main(args): # cuda cuda_exp = args.cuda_exp == "true" # Notebook experiment settings experiment_name = args.experiment_name experiment_results_folder = args.results_folder results_path = os.path.join("../", experiment_results_folder) data_folder = args.data_folder data_file = args.data_file extra_data_file = args.extra_data_file # Regularization settings if args.noise_reg_scheduler == "constant": noise_reg_schedule = constant_regularization_schedule elif args.noise_reg_scheduler == "sqrt": noise_reg_schedule = square_root_noise_schedule elif args.noise_reg_scheduler == "rot": noise_reg_schedule = rule_of_thumb_noise_schedule else: noise_reg_schedule = constant_regularization_schedule noise_reg_sigma = args.noise_reg_sigma # Used as sigma in rule of thumb and as noise in const l2_reg = args.l2_reg initial_lr = args.initial_lr lr_factor = args.lr_factor lr_patience = args.lr_patience min_lr = args.min_lr # Data settings obs_cols = args.obs_cols semisup_context_cols = args.semisup_context_cols sup_context_cols = args.sup_context_cols if sup_context_cols is None: context_cols = semisup_context_cols else: context_cols = semisup_context_cols + sup_context_cols # Training settings epochs = args.epochs batch_size = args.batch_size clipped_adam = args.clipped_adam # Dimensions of problem problem_dim = len(args.obs_cols) context_dim = len(context_cols) # Flow settings flow_depth = args.flow_depth c_net_depth = args.c_net_depth c_net_h_dim = args.c_net_h_dim batchnorm_momentum = args.batchnorm_momentum # Define context conditioner context_n_depth = args.context_n_depth context_n_h_dim = args.context_n_h_dim rich_context_dim = args.rich_context_dim settings_dict = { "epochs": epochs, "batch_size": batch_size, "problem_dim": problem_dim, "context_dim": context_dim, "flow_depth": flow_depth, "c_net_depth": c_net_depth, "c_net_h_dim": c_net_h_dim, "context_n_depth": context_n_depth, "context_n_h_dim": context_n_h_dim, "rich_context_dim": rich_context_dim, "obs_cols": obs_cols, "context_cols": context_cols, "semisup_context_cols": semisup_context_cols, "sup_context_context_cols": sup_context_cols, "batchnorm_momentum": batchnorm_momentum, "l2_reg": l2_reg, "clipped_adam": clipped_adam, "initial_lr": initial_lr, "lr_factor": lr_factor, "lr_patience": lr_patience, "min_lr": min_lr, "noise_reg_sigma": noise_reg_sigma } print(f"Settings:\n{settings_dict}") # Load data csv_path = os.path.join(data_folder, data_file) donkey_df = pd.read_csv(csv_path, parse_dates=[4, 11]) csv_path = os.path.join(data_folder, extra_data_file) extra_df = pd.read_csv(csv_path, parse_dates=[4, 12]) # Save the test train split. We do use seed but this way we have it. train_idx, test_idx = get_split_idx_on_day(donkey_df) run_idxs = {'train': train_idx, 'test': test_idx} train_dataloader, test_dataloader, extra_dataloader, obs_scaler, semisup_context_scaler, sup_context_scaler = searchlog_semisup_day_split( sup_df=donkey_df, unsup_df=extra_df, obs_cols=obs_cols, semisup_context_cols=semisup_context_cols, sup_context_cols=sup_context_cols, batch_size=batch_size, cuda_exp=True) # Define stuff for reqularization data_size = len(train_dataloader) data_dim = problem_dim + context_dim # Define normalizing flow normalizing_flow = conditional_normalizing_flow_factory3( flow_depth=flow_depth, problem_dim=problem_dim, c_net_depth=c_net_depth, c_net_h_dim=c_net_h_dim, context_dim=context_dim, context_n_h_dim=context_n_h_dim, context_n_depth=context_n_depth, rich_context_dim=rich_context_dim, cuda=cuda_exp, batchnorm_momentum=batchnorm_momentum) # Setup Optimizer if clipped_adam is None: if l2_reg is None: optimizer = optim.Adam(normalizing_flow.modules.parameters(), lr=initial_lr) else: optimizer = optim.Adam(normalizing_flow.modules.parameters(), lr=initial_lr, weight_decay=l2_reg) else: if l2_reg is None: optimizer = ClippedAdam(normalizing_flow.modules.parameters(), lr=initial_lr, clip_norm=clipped_adam) else: optimizer = ClippedAdam(normalizing_flow.modules.parameters(), lr=initial_lr, weight_decay=l2_reg, clip_norm=clipped_adam) if lr_factor is not None: scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, factor=lr_factor, patience=lr_patience, min_lr=min_lr, verbose=True) # Setup regularization h = noise_reg_schedule(data_size, data_dim, noise_reg_sigma) noise_reg = NoiseRegularizer(discrete_dims=None, h=h, cuda=cuda_exp) # Train and test sizes n_train = train_dataloader.dataset.shape[0] n_test = test_dataloader.dataset.shape[0] # Define the possible supervised contexts to marginalize out during unsupervised training context_val_dict = {} if "wind_dir_sin" in sup_context_cols and "wind_dir_cos" in sup_context_cols: wind_dir_arr = np.unique(donkey_df[['wind_dir_sin', 'wind_dir_cos']].values.tolist(), axis=0) context_val_dict['wind_dir_sin'] = wind_dir_arr context_val_dict['wind_dir_cos'] = None if "windy" in sup_context_cols: windy_arr = donkey_df['windy'].unique() context_val_dict['windy'] = windy_arr if "air_temp" in sup_context_cols: air_temp_arr = donkey_df['air_temp'].unique() context_val_dict['air_temp'] = air_temp_arr if "rain" in sup_context_cols: rain_arr = donkey_df['rain'].unique() context_val_dict['rain'] = rain_arr context_val_arr = [ context_val_dict[col] for col in sup_context_cols if context_val_dict[col] is not None ] temp_contexts = np.array(list(itertools.product(*context_val_arr))) contexts_arr = [] for row in temp_contexts: cleaned_row = [] for elem in row: if isinstance(elem, np.ndarray): for value in elem: cleaned_row.append(value) else: cleaned_row.append(elem) contexts_arr.append(cleaned_row) possible_contexts = np.array(contexts_arr) prior_dict = { True: torch.tensor(donkey_df.rain.sum() / len(donkey_df)).float().cuda(), False: torch.tensor(1 - donkey_df.rain.sum() / len(donkey_df)).float().cuda() } print(len(possible_contexts)) print(possible_contexts) print(prior_dict) # Training loop train_losses = [] test_losses = [] no_noise_losses = [] lr_scheduler_steps = [] for epoch in range(1, epochs + 1): normalizing_flow.modules.train() train_epoch_loss = 0 for k, batch in enumerate(train_dataloader): # Add noise reg to two moons batch = noise_reg.add_noise(batch) x = batch[:, :problem_dim] context = batch[:, problem_dim:] # Condition the flow on the sampled covariate and calculate -log_prob = loss conditioned_flow_dist = normalizing_flow.condition(context) loss = -conditioned_flow_dist.log_prob(x).sum() # Calculate gradients and take an optimizer step normalizing_flow.modules.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() train_epoch_loss += loss.item() # Cheeky unsupervised step that's not really logged for k, batch in enumerate(extra_dataloader): batch = noise_reg.add_noise(batch) x = batch[:, :problem_dim] semisup_context = batch[:, problem_dim:] loss = 0 for unscaled_sup_context in possible_contexts: sup_context = sup_context_scaler.transform( [unscaled_sup_context]) sup_context = torch.tensor(sup_context).float().expand( (semisup_context.shape[0], len(sup_context[0]))).cuda() context = torch.cat((semisup_context, sup_context), dim=1) # Mayb conditioned_flow_dist = normalizing_flow.condition(context) loss += -(conditioned_flow_dist.log_prob(x) * prior_dict[unscaled_sup_context[0]]).sum() # Calculate gradients and take an optimizer step normalizing_flow.modules.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() normalizing_flow.modules.eval() with torch.no_grad(): test_epoch_loss = 0 for j, batch in enumerate(test_dataloader): # Sample covariates and use them to sample from conditioned two_moons x = batch[:, :problem_dim] context = batch[:, problem_dim:] # Condition the flow on the sampled covariate and calculate -log_prob = loss conditioned_flow_dist = normalizing_flow.condition(context) test_loss = -conditioned_flow_dist.log_prob(x).sum() test_epoch_loss += test_loss.item() # save every 10 epoch to log and eval if epoch % 10 == 0 or epoch == epochs - 1: normalizing_flow.modules.eval() train_losses.append(train_epoch_loss / n_train) test_losses.append(test_epoch_loss / n_test) no_noise_epoch_loss = 0 for k, batch in enumerate(train_dataloader): # Add noise reg to two moons x = batch[:, :problem_dim] context = batch[:, problem_dim:] # Condition the flow on the sampled covariate and calculate -log_prob = loss conditioned_flow_dist = normalizing_flow.condition(context) loss = -conditioned_flow_dist.log_prob(x).sum() no_noise_epoch_loss += loss.item() no_noise_losses.append(no_noise_epoch_loss / n_train) # Take scheduler step if needed if lr_factor is not None: scheduler.step(test_epoch_loss / n_test) lr_scheduler_steps.append(epoch) # Plot Epoch results if epoch == epochs-1: if epoch == epochs - 1: normalizing_flow.modules.eval() print( f"Epoch {epoch}: train loss: {train_losses[-1]} no noise loss:{no_noise_losses[-1]} test_loss: {test_losses[-1]}" ) experiment_dict = { 'train': train_losses, 'test': test_losses, 'no_noise_losses': no_noise_losses } results_dict = { 'model': normalizing_flow, 'settings': settings_dict, 'logs': experiment_dict, 'data_split': run_idxs, 'lr_steps': lr_scheduler_steps } file_name = f"{experiment_name}.pickle" file_path = os.path.join(results_path, file_name) print(f"Saving: {file_name}") with open(file_path, 'wb') as f: pickle.dump(results_dict, f)
def main(args): # cuda cuda_exp = args.cuda_exp == "true" # Notebook experiment settings experiment_name = args.experiment_name experiment_results_folder = args.results_folder results_path = os.path.join("../", experiment_results_folder) data_folder = args.data_folder data_file = args.data_file extra_data_file = args.extra_data_file # Regularization settings if args.noise_reg_scheduler == "constant": noise_reg_schedule = constant_regularization_schedule elif args.noise_reg_scheduler == "sqrt": noise_reg_schedule = square_root_noise_schedule elif args.noise_reg_scheduler == "rot": noise_reg_schedule = rule_of_thumb_noise_schedule else: noise_reg_schedule = constant_regularization_schedule noise_reg_sigma = args.noise_reg_sigma # Used as sigma in rule of thumb and as noise in const l2_reg = args.l2_reg initial_lr = args.initial_lr lr_decay = args.lr_decay # Data settings obs_cols = args.obs_cols semisup_context_cols = args.semisup_context_cols context_cols = semisup_context_cols # Training settings epochs = args.epochs batch_size = args.batch_size clipped_adam = args.clipped_adam # Dimensions of problem problem_dim = len(args.obs_cols) context_dim = len(context_cols) # Flow settings flow_depth = args.flow_depth c_net_depth = args.c_net_depth c_net_h_dim = args.c_net_h_dim batchnorm_momentum = args.batchnorm_momentum # Define context conditioner context_n_depth = args.context_n_depth context_n_h_dim = args.context_n_h_dim rich_context_dim = args.rich_context_dim settings_dict = { "epochs": epochs, "batch_size": batch_size, "problem_dim": problem_dim, "context_dim": context_dim, "flow_depth": flow_depth, "c_net_depth": c_net_depth, "c_net_h_dim": c_net_h_dim, "context_n_depth": context_n_depth, "context_n_h_dim": context_n_h_dim, "rich_context_dim": rich_context_dim, "obs_cols": obs_cols, "context_cols": context_cols, "semisup_context_cols": semisup_context_cols, "batchnorm_momentum": batchnorm_momentum, "l2_reg": l2_reg, "clipped_adam": clipped_adam, "initial_lr": initial_lr, "lr_decay": lr_decay } print(f"Settings:\n{settings_dict}") # Load data csv_path = os.path.join(data_folder, data_file) donkey_df = pd.read_csv(csv_path, parse_dates=[4, 11]) csv_path = os.path.join(data_folder, extra_data_file) extra_df = pd.read_csv(csv_path, parse_dates=[4, 12]) # Save the test train split. We do use seed but this way we have it. train_idx, test_idx = get_split_idx_on_day(donkey_df) run_idxs = {'train': train_idx, 'test': test_idx} train_dataloader, test_dataloader, extra_dataloader, obs_scaler, semisup_context_scaler, = searchlog_no_weather_day_split( sup_df=donkey_df, unsup_df=extra_df, obs_cols=obs_cols, semisup_context_cols=semisup_context_cols, batch_size=batch_size, cuda_exp=True) # Define stuff for reqularization data_size = len(train_dataloader) data_dim = problem_dim + context_dim # Define normalizing flow normalizing_flow = conditional_normalizing_flow_factory3( flow_depth=flow_depth, problem_dim=problem_dim, c_net_depth=c_net_depth, c_net_h_dim=c_net_h_dim, context_dim=context_dim, context_n_h_dim=context_n_h_dim, context_n_depth=context_n_depth, rich_context_dim=rich_context_dim, cuda=cuda_exp, batchnorm_momentum=batchnorm_momentum) # Setup Optimizer if clipped_adam is None: if l2_reg is None: optimizer = optim.Adam(normalizing_flow.modules.parameters(), lr=initial_lr) else: optimizer = optim.Adam(normalizing_flow.modules.parameters(), lr=initial_lr, weight_decay=l2_reg) else: if l2_reg is None: optimizer = ClippedAdam(normalizing_flow.modules.parameters(), lr=initial_lr, clip_norm=clipped_adam) else: optimizer = ClippedAdam(normalizing_flow.modules.parameters(), lr=initial_lr, weight_decay=l2_reg, clip_norm=clipped_adam) if lr_decay is not None: scheduler = optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=lr_decay, last_epoch=-1) # Setup regularization h = noise_reg_schedule(data_size, data_dim, noise_reg_sigma) noise_reg = NoiseRegularizer(discrete_dims=None, h=h, cuda=cuda_exp) # Train and test sizes n_train = train_dataloader.dataset.shape[0] n_test = test_dataloader.dataset.shape[0] # Training loop full_train_losses = [] train_losses = [] test_losses = [] no_noise_losses = [] for epoch in range(1, epochs + 1): normalizing_flow.modules.train() train_epoch_loss = 0 for k, batch in enumerate(train_dataloader): # Add noise reg to two moons batch = noise_reg.add_noise(batch) x = batch[:, :problem_dim] context = batch[:, problem_dim:] # Condition the flow on the sampled covariate and calculate -log_prob = loss conditioned_flow_dist = normalizing_flow.condition(context) loss = -conditioned_flow_dist.log_prob(x).sum() # Calculate gradients and take an optimizer step normalizing_flow.modules.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() train_epoch_loss += loss.item() full_train_losses.append(train_epoch_loss / n_train) # Loop over the data from the "unsupervised set" - we do not log the loss here, but we are only intereseted in test anyway for k, batch in enumerate(extra_dataloader): batch = noise_reg.add_noise(batch) x = batch[:, :problem_dim] context = batch[:, problem_dim:] conditioned_flow_dist = normalizing_flow.condition(context) loss = -conditioned_flow_dist.log_prob(x).sum() # Calculate gradients and take an optimizer step normalizing_flow.modules.zero_grad() optimizer.zero_grad() loss.backward() optimizer.step() # save every 10 epoch to log and eval if epoch % 10 == 0 or epoch == epochs - 1: normalizing_flow.modules.eval() train_losses.append(train_epoch_loss / n_train) no_noise_epoch_loss = 0 for k, batch in enumerate(train_dataloader): # Add noise reg to two moons x = batch[:, :problem_dim] context = batch[:, problem_dim:] # Condition the flow on the sampled covariate and calculate -log_prob = loss conditioned_flow_dist = normalizing_flow.condition(context) loss = -conditioned_flow_dist.log_prob(x).sum() no_noise_epoch_loss += loss.item() no_noise_losses.append(no_noise_epoch_loss / n_train) test_epoch_loss = 0 for j, batch in enumerate(test_dataloader): # Sample covariates and use them to sample from conditioned two_moons x = batch[:, :problem_dim] context = batch[:, problem_dim:] # Condition the flow on the sampled covariate and calculate -log_prob = loss conditioned_flow_dist = normalizing_flow.condition(context) test_loss = -conditioned_flow_dist.log_prob(x).sum() test_epoch_loss += test_loss.item() test_losses.append(test_epoch_loss / n_test) # Take scheduler step if needed if lr_decay is not None: scheduler.step() # Plot Epoch results if epoch == epochs-1: if epoch == epochs - 1: normalizing_flow.modules.eval() print( f"Epoch {epoch}: train loss: {train_losses[-1]} no noise loss:{no_noise_losses[-1]} test_loss: {test_losses[-1]}" ) experiment_dict = { 'train': train_losses, 'test': test_losses, 'no_noise_losses': no_noise_losses } results_dict = { 'model': normalizing_flow, 'settings': settings_dict, 'logs': experiment_dict, 'data_split': run_idxs } file_name = f"{experiment_name}.pickle" file_path = os.path.join(results_path, file_name) print(f"Saving: {file_name}") with open(file_path, 'wb') as f: pickle.dump(results_dict, f)