for model_ind in model_inds: out_dir = os.path.join(out_root, str(model_ind)) net_names = [net_name_prefix + "_net.pytorch"] reloaded_config_path = os.path.join(out_dir, "config.pickle") print(("Loading restarting config from: %s" % reloaded_config_path)) with open(reloaded_config_path, "rb") as config_f: config = pickle.load(config_f) assert config.model_ind == model_ind if not hasattr(config, "use_doersch_datasets"): config.use_doersch_datasets = False if "Coco" in config.dataset: dataloaders_train, mapping_assignment_dataloader, mapping_test_dataloader = make_Coco_dataloaders( config) all_label_names = ["sky-stuff", "plant-stuff", "ground-stuff"] if config.include_things_labels: all_label_names += ["person-things"] if config.incl_animal_things: all_label_names += ["animal-things"] elif config.dataset == "Potsdam": dataloaders_train, mapping_assignment_dataloader, mapping_test_dataloader = make_Potsdam_dataloaders( config) if config.use_coarse_labels: all_label_names = [ "roads and cars", "buildings and clutter", "vegetation and trees", ]
def main(): # based on segmentation_multioutput_twohead - we pass in the config of the # IID run we are comparing against, so the settings can be copied parser = argparse.ArgumentParser() parser.add_argument("--model_ind", type=int, required=True) parser.add_argument("--out_root", type=str, default="/scratch/shared/slow/xuji/iid_private") parser.add_argument("--IID_model_ind", type=int, required=True) parser.add_argument("--max_num_train", type=int, required=True) parser.add_argument("--test_code", default=False, action="store_true") parser.add_argument("--do_sift", default=False, action="store_true") config = parser.parse_args() config.out_dir = os.path.join(config.out_root, str(config.model_ind)) if not os.path.exists(config.out_dir): os.makedirs(config.out_dir) archetype_config_path = os.path.join(config.out_root, str(config.IID_model_ind), "config.pickle") print("Loading archetype config from: %s" % archetype_config_path) with open(archetype_config_path, "rb") as config_f: archetype_config = pickle.load(config_f) assert (config.IID_model_ind == archetype_config.model_ind) assert (archetype_config.mode == "IID") # compare against fully unsup sample_fn = _get_vectorised_colour_samples if config.do_sift: sample_fn = _get_vectorised_sift_samples # set it to be only rgb (and ir if nec) but no sobel - we're clustering # single pixel colours archetype_config.include_rgb = True archetype_config.no_sobel = True if "Coco" in archetype_config.dataset: assert (not archetype_config.using_IR) archetype_config.in_channels = 3 elif archetype_config.dataset == "Potsdam": # IR assert (archetype_config.using_IR) archetype_config.in_channels = 4 # Data # ------------------------------------------------------------------------- if "Coco" in archetype_config.dataset: dataloaders_head_A, mapping_assignment_dataloader, \ mapping_test_dataloader = \ make_Coco_dataloaders(archetype_config) elif archetype_config.dataset == "Potsdam": dataloaders_head_A, mapping_assignment_dataloader, \ mapping_test_dataloader = \ make_Potsdam_dataloaders(archetype_config) else: raise NotImplementedError # unlike in clustering script for STL - isn't any data from unknown classes dataloaders_head_B = dataloaders_head_A # networks and optimisers # ------------------------------------------------------ assert (archetype_config.num_dataloaders == 1) dataloader = dataloaders_head_B[0] samples = sample_fn(archetype_config, dataloader) print("got training samples") sys.stdout.flush() if config.test_code: print("testing src, taking 10000 samples only") samples = samples[:10000, :] else: num_samples_train = min(samples.shape[0], config.max_num_train) print("taking %d samples" % num_samples_train) chosen_inds = np.random.choice(samples.shape[0], size=num_samples_train, replace=False) samples = samples[chosen_inds, :] print(samples.shape) sys.stdout.flush() kmeans = MiniBatchKMeans(n_clusters=archetype_config.gt_k, verbose=1).fit(samples) print("trained kmeans") sys.stdout.flush() # use mapping assign to assign output_k=gt_k to gt_k # and also assess on its predictions, since it's identical to # mapping_test_dataloader assign_samples, assign_labels = sample_fn(archetype_config, mapping_assignment_dataloader) num_samples = assign_samples.shape[0] assign_preds = kmeans.predict(assign_samples) print("finished prediction for mapping assign/test data") sys.stdout.flush() assign_preds = torch.from_numpy(assign_preds).cuda() assign_labels = torch.from_numpy(assign_labels).cuda() if archetype_config.eval_mode == "hung": match = _hungarian_match(assign_preds, assign_labels, preds_k=archetype_config.gt_k, targets_k=archetype_config.gt_k) elif archetype_config.eval_mode == "orig": # flat! match = _original_match(assign_preds, assign_labels, preds_k=archetype_config.gt_k, targets_k=archetype_config.gt_k) elif archetype_config.eval_mode == "orig_soft": assert (False) # not used # reorder predictions to be same cluster assignments as gt_k found = torch.zeros(archetype_config.gt_k) reordered_preds = torch.zeros(num_samples).to(torch.int32).cuda() for pred_i, target_i in match: reordered_preds[assign_preds == pred_i] = target_i found[pred_i] = 1 assert (found.sum() == archetype_config.gt_k ) # each output_k must get mapped acc = _acc(reordered_preds, assign_labels, archetype_config.gt_k) print("got acc %f" % acc) config.epoch_acc = [acc] config.centroids = kmeans.cluster_centers_ config.match = match # write results and centroids to model_ind output file with open(os.path.join(config.out_dir, "config.pickle"), "w") as outfile: pickle.dump(config, outfile) with open(os.path.join(config.out_dir, "config.txt"), "w") as text_file: text_file.write("%s" % config)
config.restart = True # copy over new num_epochs and lr schedule config.num_epochs = given_config.num_epochs config.lr_schedule = given_config.lr_schedule # Data ------------------------------------------------------------------------- # datasets produce either 2 or 5 channel images based on config.include_rgb # because fully unsupervised assert (config.mapping_assignment_partitions == config.mapping_test_partitions) if "Coco" in config.dataset: dataloaders, mapping_assignment_test_dataloader, _ = \ make_Coco_dataloaders(config) elif config.dataset == "Potsdam": dataloaders, mapping_assignment_test_dataloader, _ = \ make_Potsdam_dataloaders(config) else: raise NotImplementedError num_train_batches = len(dataloaders[0]) print("length of train dataloader %d" % num_train_batches) print("length of mapping assign and test dataloader %d" % len( mapping_assignment_test_dataloader)) assert (len(dataloaders) == 1) dataloader = dataloaders[0] # networks and optimisers ------------------------------------------------------
assert config.model_ind == given_config.model_ind config.restart = True # copy over new num_epochs and lr schedule config.num_epochs = given_config.num_epochs config.lr_schedule = given_config.lr_schedule # Data ------------------------------------------------------------------------- # datasets produce either 2 or 5 channel images based on config.include_rgb # because fully unsupervised assert config.mapping_assignment_partitions == config.mapping_test_partitions if "Coco" in config.dataset: dataloaders, mapping_assignment_test_dataloader, _ = make_Coco_dataloaders( config) elif config.dataset == "Potsdam": dataloaders, mapping_assignment_test_dataloader, _ = make_Potsdam_dataloaders( config) else: raise NotImplementedError num_train_batches = len(dataloaders[0]) print("length of train dataloader %d" % num_train_batches) print("length of mapping assign and test dataloader %d" % len(mapping_assignment_test_dataloader)) assert len(dataloaders) == 1 dataloader = dataloaders[0] # networks and optimisers ------------------------------------------------------