def main(): config = get_config() use_cuda = config['use_cuda'] and torch.cuda.is_available() set_seed(config['seed'], use_cuda) # set seeds for reproducibility init_cuda(config['deterministic'], config['allow_multigpu']) torch.multiprocessing.set_sharing_strategy('file_system') harness = GEORGEHarness(config, use_cuda=use_cuda) harness.save_full_config(config) dataloaders = harness.get_dataloaders(config, mode='erm') num_classes = dataloaders['train'].dataset.get_num_classes('superclass') model = harness.get_nn_model(config, num_classes=num_classes, mode='erm') print('Model architecture:') print(model) # Train a model with ERM erm_dir = harness.classify(config['classification_config'], model, dataloaders, 'erm') # Cluster the activations of the model reduction_model = UMAPReducer(random_state=12345, n_components=2, n_neighbors=10, min_dist=0) reduction_dir = harness.reduce(config['reduction_config'], reduction_model, inputs_path=os.path.join( erm_dir, 'outputs.pt')) cluster_model = GaussianMixture(covariance_type='full', n_components=5, n_init=3) cluster_dir = harness.cluster( config['cluster_config'], cluster_model, inputs_path=os.path.join(reduction_dir, 'outputs.pt'), ) set_seed(config['seed'], use_cuda) # reset random state dataloaders = harness.get_dataloaders(config, mode='george', subclass_labels=os.path.join( cluster_dir, 'clusters.pt')) model = harness.get_nn_model(config, num_classes=num_classes, mode='george') # Train the final (GEORGE) model george_dir = harness.classify(config['classification_config'], model, dataloaders, mode='george')
def main(): config = get_config() use_cuda = config["use_cuda"] and torch.cuda.is_available() set_seed(config["seed"], use_cuda) # set seeds for reproducibility init_cuda(config["deterministic"], config["allow_multigpu"]) torch.multiprocessing.set_sharing_strategy("file_system") harness = GEORGEHarness(config, use_cuda=use_cuda) harness.save_full_config(config) dataloaders = harness.get_dataloaders(config, mode="erm") num_classes = dataloaders["train"].dataset.get_num_classes("superclass") model = harness.get_nn_model(config, num_classes=num_classes, mode="erm") print("Model architecture:") print(model) # Train a model with ERM erm_dir = harness.classify( config["classification_config"], model, dataloaders, "erm" ) # Cluster the activations of the model reduction_model = UMAPReducer( random_state=12345, n_components=2, n_neighbors=10, min_dist=0 ) reduction_dir = harness.reduce( config["reduction_config"], reduction_model, inputs_path=os.path.join(erm_dir, "outputs.pt"), ) cluster_model = GaussianMixture(covariance_type="full", n_components=5, n_init=3) cluster_dir = harness.cluster( config["cluster_config"], cluster_model, inputs_path=os.path.join(reduction_dir, "outputs.pt"), ) set_seed(config["seed"], use_cuda) # reset random state dataloaders = harness.get_dataloaders( config, mode="george", subclass_labels=os.path.join(cluster_dir, "clusters.pt") ) model = harness.get_nn_model(config, num_classes=num_classes, mode="george") # Train the final (GEORGE) model george_dir = harness.classify( config["classification_config"], model, dataloaders, mode="george" )
def main(): config = get_config() use_cuda = config["use_cuda"] and torch.cuda.is_available() set_seed(config["seed"], use_cuda) # set seeds for reproducibility init_cuda(config["deterministic"], config["allow_multigpu"]) # Initialize wandb with online-logging as the default local_dir = Path(".", "local_logging") local_dir.mkdir(exist_ok=True) if config.get("log_offline", False): os.environ["WANDB_MODE"] = "dryrun" cluster_model_name = config["cluster_config"]["model"] if cluster_model_name == "topograd": if config["cluster_config"]["method_kwargs"].get("iters", -1) == 0: cluster_model_name = "tomato" wandb.init( entity="predictive-analytics-lab", project="hidden-stratification", dir=str(local_dir), config=config, reinit=True, group=config.get("group", f"{config['dataset']}/{cluster_model_name}"), ) torch.multiprocessing.set_sharing_strategy("file_system") harness = GEORGEHarness(config, use_cuda=use_cuda) harness.save_full_config(config) first_mode = "erm" if (config["mode"] == "george") else config["mode"] dataloaders = harness.get_dataloaders(config, mode=first_mode) num_classes = dataloaders["train"].dataset.get_num_classes("superclass") model = harness.get_nn_model(config, num_classes=num_classes, mode=first_mode) activ_done = config["activations_dir"] != "NONE" rep_done = config["representation_dir"] != "NONE" activ_done = ( activ_done or rep_done ) # don't need to get activations if we already have reduced ones # Train a model with ERM if activ_done and not (config["classification_config"]["eval_only"] or config["classification_config"]["save_act_only"]): erm_dir = config["activations_dir"] else: if (config["classification_config"]["eval_only"] or config["classification_config"]["save_act_only"]): erm_dir = config["activations_dir"] model_path = os.path.join( erm_dir, f'{config["classification_config"]["eval_mode"]}_model.pt') print(f"Loading model from {model_path}...") model.load_state_dict(torch.load(model_path)["state_dict"]) erm_dir = harness.classify(config["classification_config"], model, dataloaders, mode=first_mode) if not config["classification_config"]["bit_pretrained"] and not rep_done: model.load_state_dict( torch.load(os.path.join(erm_dir, "best_model.pt"))["state_dict"]) set_seed(config["seed"], use_cuda) # Dimensionality-reduce the model activations if rep_done: reduction_dir = config["representation_dir"] else: reduction_model = harness.get_reduction_model(config, nn_model=model) reduction_dir = harness.reduce( config["reduction_config"], reduction_model, inputs_path=os.path.join(erm_dir, "outputs.pt"), ) cluster_model = harness.get_cluster_model(config) harness.cluster( config["cluster_config"], cluster_model, inputs_path=os.path.join(reduction_dir, "outputs.pt"), )
def main() -> None: config = get_config() with initialize(config_path="../configs"): hydra_config = compose( config_name="biased_data", overrides=[ f"data={config['data_config']}", f"bias={config['bias_config']}" ], ) hydra_config["data"]["data_split_seed"] = config.get( "data_split_seed", config["seed"]) print(hydra_config) biased_data_config = BaseConfig.from_hydra(hydra_config) use_cuda = config['use_cuda'] and torch.cuda.is_available() set_seed(config['seed'], use_cuda) # set seeds for reproducibility init_cuda(config['deterministic'], config['allow_multigpu']) # Initialize wandb with online-logging as the default local_dir = Path(".", "local_logging") local_dir.mkdir(exist_ok=True) if config.get("log_offline", False): os.environ["WANDB_MODE"] = "dryrun" cluster_model_name = config["cluster_config"]["model"] wandb.init( entity="predictive-analytics-lab", project="suds", dir=str(local_dir), config=config, reinit=True, group=config.get("group", f"{config['dataset']}.GEORGE.{cluster_model_name}"), ) torch.multiprocessing.set_sharing_strategy('file_system') harness = GEORGEHarness(config, use_cuda=use_cuda) harness.save_full_config(config) first_mode = 'erm' if (config['mode'] == 'george') else config['mode'] dataloaders = harness.get_dataloaders(config=config, data_config=biased_data_config, mode=first_mode, use_cuda=use_cuda) num_classes = dataloaders['train'].dataset.get_num_classes('superclass') model = harness.get_nn_model(config, num_classes=num_classes, mode=first_mode) activ_done = config['activations_dir'] != 'NONE' rep_done = config['representation_dir'] != 'NONE' cluster_done = config['cluster_dir'] != 'NONE' rep_done = ( rep_done or cluster_done ) # if we already have clusters, don't need to do reduction step activ_done = ( activ_done or rep_done ) # don't need to get activations if we already have reduced ones if config['classification_config']['eval_only']: assert activ_done if config['cluster_dir'] != 'NONE': dataloaders = harness.get_dataloaders( config=config, data_config=biased_data_config, mode=first_mode, use_cuda=use_cuda, subclass_labels=os.path.join( config['cluster_dir'], 'clusters.pt') if os.path.isdir( config['cluster_dir']) else config['cluster_dir'], ) # Train a model with ERM if activ_done and not (config['classification_config']['eval_only'] or config['classification_config']['save_act_only']): erm_dir = config['activations_dir'] else: if (config['classification_config']['eval_only'] or config['classification_config']['save_act_only']): erm_dir = config['activations_dir'] model_path = os.path.join( erm_dir, f'{config["classification_config"]["eval_mode"]}_model.pt') print(f'Loading model from {model_path}...') model.load_state_dict(torch.load(model_path)['state_dict']) erm_dir = harness.classify(config['classification_config'], model, dataloaders, mode=first_mode) if (config['classification_config']['eval_only'] or config['classification_config']['save_act_only']): exit() if config['mode'] == 'george': if not config['classification_config'][ 'bit_pretrained'] and not rep_done: model.load_state_dict( torch.load(os.path.join(erm_dir, 'best_model.pt'))['state_dict']) set_seed(config['seed'], use_cuda) # Dimensionality-reduce the model activations if rep_done: reduction_dir = config['representation_dir'] else: reduction_model = harness.get_reduction_model(config, nn_model=model) reduction_dir = harness.reduce( config['reduction_config'], reduction_model, inputs_path=os.path.join(erm_dir, 'outputs.pt'), ) # Cluster the per-superclass features if cluster_done: cluster_dir = config['cluster_dir'] else: cluster_model = harness.get_cluster_model(config) cluster_dir = harness.cluster( config['cluster_config'], cluster_model, inputs_path=os.path.join(reduction_dir, 'outputs.pt'), ) set_seed(config['seed'], use_cuda) # reset random state cluster_label_path = os.path.join(cluster_dir, 'clusters.pt') dataloaders = harness.get_dataloaders( config, mode='george', data_config=biased_data_config, subclass_labels=cluster_label_path, use_cuda=use_cuda, ) model = harness.get_nn_model(config, num_classes=num_classes, mode='george') # Train the final (GEORGE) model george_dir = harness.classify(config['classification_config'], model, dataloaders, mode='george')
def main(): config = get_config() use_cuda = config['use_cuda'] and torch.cuda.is_available() set_seed(config['seed'], use_cuda) # set seeds for reproducibility init_cuda(config['deterministic'], config['allow_multigpu']) torch.multiprocessing.set_sharing_strategy('file_system') harness = GEORGEHarness(config, use_cuda=use_cuda) harness.save_full_config(config) first_mode = 'erm' if (config['mode'] == 'george') else config['mode'] dataloaders = harness.get_dataloaders(config, mode=first_mode) num_classes = dataloaders['train'].dataset.get_num_classes('superclass') model = harness.get_nn_model(config, num_classes=num_classes, mode=first_mode) activ_done = config['activations_dir'] != 'NONE' rep_done = config['representation_dir'] != 'NONE' cluster_done = config['cluster_dir'] != 'NONE' rep_done = rep_done or cluster_done # if we already have clusters, don't need to do reduction step activ_done = activ_done or rep_done # don't need to get activations if we already have reduced ones if config['classification_config']['eval_only']: assert (activ_done) if config['cluster_dir'] != 'NONE': dataloaders = harness.get_dataloaders( config, mode=first_mode, subclass_labels=os.path.join(config['cluster_dir'], 'clusters.pt') if os.path.isdir(config['cluster_dir']) else config['cluster_dir']) # Train a model with ERM if activ_done and not (config['classification_config']['eval_only'] or config['classification_config']['save_act_only']): erm_dir = config['activations_dir'] else: if config['classification_config']['eval_only'] or config['classification_config'][ 'save_act_only']: erm_dir = config['activations_dir'] model_path = os.path.join(erm_dir, f'{config["classification_config"]["eval_mode"]}_model.pt') print(f'Loading model from {model_path}...') model.load_state_dict(torch.load(model_path)['state_dict']) erm_dir = harness.classify(config['classification_config'], model, dataloaders, mode=first_mode) if config['classification_config']['eval_only'] or config['classification_config'][ 'save_act_only']: exit() if config['mode'] == 'george': if not config['classification_config']['bit_pretrained'] and not rep_done: model.load_state_dict(torch.load(os.path.join(erm_dir, 'best_model.pt'))['state_dict']) set_seed(config['seed'], use_cuda) # Dimensionality-reduce the model activations if rep_done: reduction_dir = config['representation_dir'] else: reduction_model = harness.get_reduction_model(config, nn_model=model) reduction_dir = harness.reduce(config['reduction_config'], reduction_model, inputs_path=os.path.join(erm_dir, 'outputs.pt')) # Cluster the per-superclass features if cluster_done: cluster_dir = config['cluster_dir'] else: cluster_model = harness.get_cluster_model(config) cluster_dir = harness.cluster(config['cluster_config'], cluster_model, inputs_path=os.path.join(reduction_dir, 'outputs.pt')) set_seed(config['seed'], use_cuda) # reset random state dataloaders = harness.get_dataloaders( config, mode='george', subclass_labels=os.path.join(cluster_dir, 'clusters.pt')) model = harness.get_nn_model(config, num_classes=num_classes, mode='george') # Train the final (GEORGE) model george_dir = harness.classify(config['classification_config'], model, dataloaders, mode='george')
def main(): config = get_config() use_cuda = config["use_cuda"] and torch.cuda.is_available() set_seed(config["seed"], use_cuda) # set seeds for reproducibility init_cuda(config["deterministic"], config["allow_multigpu"]) torch.multiprocessing.set_sharing_strategy("file_system") harness = GEORGEHarness(config, use_cuda=use_cuda) harness.save_full_config(config) first_mode = "erm" if (config["mode"] == "george") else config["mode"] dataloaders = harness.get_dataloaders(config, mode=first_mode) num_classes = dataloaders["train"].dataset.get_num_classes("superclass") model = harness.get_nn_model(config, num_classes=num_classes, mode=first_mode) activ_done = config["activations_dir"] != "NONE" rep_done = config["representation_dir"] != "NONE" cluster_done = config["cluster_dir"] != "NONE" rep_done = ( rep_done or cluster_done ) # if we already have clusters, don't need to do reduction step activ_done = ( activ_done or rep_done ) # don't need to get activations if we already have reduced ones if config["classification_config"]["eval_only"]: assert activ_done if config["cluster_dir"] != "NONE": dataloaders = harness.get_dataloaders( config, mode=first_mode, subclass_labels=os.path.join( config["cluster_dir"], "clusters.pt") if os.path.isdir( config["cluster_dir"]) else config["cluster_dir"], ) # Train a model with ERM if activ_done and not (config["classification_config"]["eval_only"] or config["classification_config"]["save_act_only"]): erm_dir = config["activations_dir"] else: if (config["classification_config"]["eval_only"] or config["classification_config"]["save_act_only"]): erm_dir = config["activations_dir"] model_path = os.path.join( erm_dir, f'{config["classification_config"]["eval_mode"]}_model.pt') print(f"Loading model from {model_path}...") model.load_state_dict(torch.load(model_path)["state_dict"]) erm_dir = harness.classify(config["classification_config"], model, dataloaders, mode=first_mode) if (config["classification_config"]["eval_only"] or config["classification_config"]["save_act_only"]): exit() if config["mode"] == "george": if not config["classification_config"][ "bit_pretrained"] and not rep_done: model.load_state_dict( torch.load(os.path.join(erm_dir, "best_model.pt"))["state_dict"]) set_seed(config["seed"], use_cuda) # Dimensionality-reduce the model activations if rep_done: reduction_dir = config["representation_dir"] else: reduction_model = harness.get_reduction_model(config, nn_model=model) reduction_dir = harness.reduce( config["reduction_config"], reduction_model, inputs_path=os.path.join(erm_dir, "outputs.pt"), ) # Cluster the per-superclass features if cluster_done: cluster_dir = config["cluster_dir"] else: cluster_model = harness.get_cluster_model(config) cluster_dir = harness.cluster( config["cluster_config"], cluster_model, inputs_path=os.path.join(reduction_dir, "outputs.pt"), ) set_seed(config["seed"], use_cuda) # reset random state dataloaders = harness.get_dataloaders( config, mode="george", subclass_labels=os.path.join(cluster_dir, "clusters.pt"), ) model = harness.get_nn_model(config, num_classes=num_classes, mode="george") # Train the final (GEORGE) model george_dir = harness.classify(config["classification_config"], model, dataloaders, mode="george")