assert config.model_ind == model_ind

    if not hasattr(config, "use_doersch_datasets"):
        config.use_doersch_datasets = False

    if "Coco" in config.dataset:
        dataloaders_train, mapping_assignment_dataloader, mapping_test_dataloader = make_Coco_dataloaders(
            config)
        all_label_names = ["sky-stuff", "plant-stuff", "ground-stuff"]

        if config.include_things_labels:
            all_label_names += ["person-things"]
        if config.incl_animal_things:
            all_label_names += ["animal-things"]
    elif config.dataset == "Potsdam":
        dataloaders_train, mapping_assignment_dataloader, mapping_test_dataloader = make_Potsdam_dataloaders(
            config)
        if config.use_coarse_labels:
            all_label_names = [
                "roads and cars",
                "buildings and clutter",
                "vegetation and trees",
            ]
        else:
            all_label_names = [
                "roads",
                "buildings",
                "vegetation",
                "trees",
                "cars",
                "clutter",
            ]
Exemple #2
0
def main():
    # based on segmentation_multioutput_twohead - we pass in the config of the
    # IID run we are comparing against, so the settings can be copied

    parser = argparse.ArgumentParser()
    parser.add_argument("--model_ind", type=int, required=True)
    parser.add_argument("--out_root",
                        type=str,
                        default="/scratch/shared/slow/xuji/iid_private")
    parser.add_argument("--IID_model_ind", type=int, required=True)
    parser.add_argument("--max_num_train", type=int, required=True)
    parser.add_argument("--test_code", default=False, action="store_true")
    parser.add_argument("--do_sift", default=False, action="store_true")

    config = parser.parse_args()
    config.out_dir = os.path.join(config.out_root, str(config.model_ind))
    if not os.path.exists(config.out_dir):
        os.makedirs(config.out_dir)

    archetype_config_path = os.path.join(config.out_root,
                                         str(config.IID_model_ind),
                                         "config.pickle")
    print("Loading archetype config from: %s" % archetype_config_path)
    with open(archetype_config_path, "rb") as config_f:
        archetype_config = pickle.load(config_f)
    assert (config.IID_model_ind == archetype_config.model_ind)
    assert (archetype_config.mode == "IID")  # compare against fully unsup

    sample_fn = _get_vectorised_colour_samples
    if config.do_sift:
        sample_fn = _get_vectorised_sift_samples

    # set it to be only rgb (and ir if nec) but no sobel - we're clustering
    # single pixel colours
    archetype_config.include_rgb = True
    archetype_config.no_sobel = True
    if "Coco" in archetype_config.dataset:
        assert (not archetype_config.using_IR)
        archetype_config.in_channels = 3
    elif archetype_config.dataset == "Potsdam":  # IR
        assert (archetype_config.using_IR)
        archetype_config.in_channels = 4

    # Data
    # -------------------------------------------------------------------------
    if "Coco" in archetype_config.dataset:
        dataloaders_head_A, mapping_assignment_dataloader, \
        mapping_test_dataloader = \
            make_Coco_dataloaders(archetype_config)

    elif archetype_config.dataset == "Potsdam":
        dataloaders_head_A, mapping_assignment_dataloader, \
        mapping_test_dataloader = \
            make_Potsdam_dataloaders(archetype_config)
    else:
        raise NotImplementedError

    # unlike in clustering script for STL - isn't any data from unknown classes
    dataloaders_head_B = dataloaders_head_A

    # networks and optimisers
    # ------------------------------------------------------
    assert (archetype_config.num_dataloaders == 1)
    dataloader = dataloaders_head_B[0]

    samples = sample_fn(archetype_config, dataloader)
    print("got training samples")
    sys.stdout.flush()

    if config.test_code:
        print("testing src, taking 10000 samples only")
        samples = samples[:10000, :]
    else:
        num_samples_train = min(samples.shape[0], config.max_num_train)
        print("taking %d samples" % num_samples_train)
        chosen_inds = np.random.choice(samples.shape[0],
                                       size=num_samples_train,
                                       replace=False)
        samples = samples[chosen_inds, :]
        print(samples.shape)
    sys.stdout.flush()

    kmeans = MiniBatchKMeans(n_clusters=archetype_config.gt_k,
                             verbose=1).fit(samples)
    print("trained kmeans")
    sys.stdout.flush()

    # use mapping assign to assign output_k=gt_k to gt_k
    # and also assess on its predictions, since it's identical to
    # mapping_test_dataloader
    assign_samples, assign_labels = sample_fn(archetype_config,
                                              mapping_assignment_dataloader)
    num_samples = assign_samples.shape[0]
    assign_preds = kmeans.predict(assign_samples)
    print("finished prediction for mapping assign/test data")
    sys.stdout.flush()

    assign_preds = torch.from_numpy(assign_preds).cuda()
    assign_labels = torch.from_numpy(assign_labels).cuda()

    if archetype_config.eval_mode == "hung":
        match = _hungarian_match(assign_preds,
                                 assign_labels,
                                 preds_k=archetype_config.gt_k,
                                 targets_k=archetype_config.gt_k)
    elif archetype_config.eval_mode == "orig":  # flat!
        match = _original_match(assign_preds,
                                assign_labels,
                                preds_k=archetype_config.gt_k,
                                targets_k=archetype_config.gt_k)
    elif archetype_config.eval_mode == "orig_soft":
        assert (False)  # not used

    # reorder predictions to be same cluster assignments as gt_k
    found = torch.zeros(archetype_config.gt_k)
    reordered_preds = torch.zeros(num_samples).to(torch.int32).cuda()
    for pred_i, target_i in match:
        reordered_preds[assign_preds == pred_i] = target_i
        found[pred_i] = 1
    assert (found.sum() == archetype_config.gt_k
            )  # each output_k must get mapped

    acc = _acc(reordered_preds, assign_labels, archetype_config.gt_k)

    print("got acc %f" % acc)
    config.epoch_acc = [acc]
    config.centroids = kmeans.cluster_centers_
    config.match = match

    # write results and centroids to model_ind output file
    with open(os.path.join(config.out_dir, "config.pickle"), "w") as outfile:
        pickle.dump(config, outfile)

    with open(os.path.join(config.out_dir, "config.txt"), "w") as text_file:
        text_file.write("%s" % config)
    config.num_epochs = given_config.num_epochs
    config.lr_schedule = given_config.lr_schedule

# Data -------------------------------------------------------------------------

# datasets produce either 2 or 5 channel images based on config.include_rgb

# because fully unsupervised
assert (config.mapping_assignment_partitions == config.mapping_test_partitions)

if "Coco" in config.dataset:
    dataloaders, mapping_assignment_test_dataloader, _ = \
        make_Coco_dataloaders(config)
elif config.dataset == "Potsdam":
    dataloaders, mapping_assignment_test_dataloader, _ = \
        make_Potsdam_dataloaders(config)
else:
    raise NotImplementedError

num_train_batches = len(dataloaders[0])
print("length of train dataloader %d" % num_train_batches)
print("length of mapping assign and test dataloader %d" % len(
    mapping_assignment_test_dataloader))

assert (len(dataloaders) == 1)
dataloader = dataloaders[0]

# networks and optimisers ------------------------------------------------------

net = archs.__dict__[config.arch](config)
if config.restart:
    # copy over new num_epochs and lr schedule
    config.num_epochs = given_config.num_epochs
    config.lr_schedule = given_config.lr_schedule

# Data -------------------------------------------------------------------------

# datasets produce either 2 or 5 channel images based on config.include_rgb

# because fully unsupervised
assert config.mapping_assignment_partitions == config.mapping_test_partitions

if "Coco" in config.dataset:
    dataloaders, mapping_assignment_test_dataloader, _ = make_Coco_dataloaders(
        config)
elif config.dataset == "Potsdam":
    dataloaders, mapping_assignment_test_dataloader, _ = make_Potsdam_dataloaders(
        config)
else:
    raise NotImplementedError

num_train_batches = len(dataloaders[0])
print("length of train dataloader %d" % num_train_batches)
print("length of mapping assign and test dataloader %d" %
      len(mapping_assignment_test_dataloader))

assert len(dataloaders) == 1
dataloader = dataloaders[0]

# networks and optimisers ------------------------------------------------------

net = archs.__dict__[config.arch](config)  # type: ignore
choose_best = None