Exemple #1
0
def embedding_nearest_neighbour(n_neighbors=FLAGS.n_neighbours,
                                num_classes=FLAGS.way,
                                num_shots=FLAGS.shot,
                                num_tasks=FLAGS.num_tasks,
                                num_encoding_dims=FLAGS.num_encoding_dims,
                                test_set=FLAGS.test_set,
                                dataset=FLAGS.dataset):
    print('{}-way {}-shot embedding nearest neighbour'.format(
        num_classes, num_shots))
    if dataset != 'celeba':
        _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims,
                                                   test_set)
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        _, _, _, X_test, attributes_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks,
                                     partitions=partitions)

    accuracies = []

    for i_task, task in enumerate(tasks):
        if (i_task + 1) % (num_tasks // 10) == 0:
            print('test {}, accuracy {:.5}'.format(i_task + 1,
                                                   np.mean(accuracies)))
        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]

        knn = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1)
        knn.fit(Z_train_few, Y_train_few)
        accuracy = knn.score(Z_test_few, Y_test_few)

        accuracies.append(accuracy)

    print(
        '{}-way {}-shot embedding nearest neighbour: {:.5} with 95% CI {:.5} over {} tests'
        .format(num_classes, num_shots, np.mean(accuracies),
                1.96 * np.std(accuracies) / np.sqrt(num_tasks), num_tasks))
    def make_data_tensor(self, train=True):
        if train:
            mode = FLAGS.mt_mode
            num_classes = self.num_classes_train
            num_tasks = FLAGS.metatrain_iterations * self.batch_size
            num_splits = 1000
            if FLAGS.num_partitions == -1:
                num_partitions = num_tasks
            else:
                num_partitions = FLAGS.num_partitions
            if FLAGS.datasource == 'celeba':
                assert num_classes == 2, "CelebA must have two classes"
                X, attributes, Z = self.X_train, self.attributes_train, self.Z_train
            else:
                X, Y, Z = self.X_train, self.Y_train, self.Z_train
            num_samples_per_class = self.num_samples_per_class_train
            num_train_samples_per_class = FLAGS.inner_update_batch_size_train
            print('Setting up tasks for meta-training')
        else:
            mode = FLAGS.mv_mode
            if mode == 'encenc':
                raise NotImplementedError
            num_tasks = FLAGS.num_eval_tasks
            num_splits = 100
            num_partitions = num_tasks
            if FLAGS.datasource == 'celeba':
                X, attributes, Z = self.X_val, self.attributes_val, self.Z_val
            else:
                X, Y, Z = self.X_val, self.Y_val, self.Z_val
            num_classes = self.num_classes_val
            num_samples_per_class = self.num_samples_per_class_val
            num_train_samples_per_class = FLAGS.inner_update_batch_size_val
            print('Setting up tasks for meta-val')

        task_generator = TaskGenerator(
            num_classes=num_classes,
            num_train_samples_per_class=num_train_samples_per_class,
            num_samples_per_class=num_samples_per_class)
        partition_algorithm = FLAGS.partition_algorithm
        margin = FLAGS.margin

        print('Generating indices for {} tasks'.format(num_tasks))
        if mode == 'gtgt':
            if FLAGS.datasource == 'celeba':
                partitions = task_generator.get_celeba_task_pool(
                    attributes=attributes)
            else:
                print('Using ground truth partition to create classes')
                partition = task_generator.get_partition_from_labels(labels=Y)
                partitions = [partition]
        elif mode == 'encenc':
            if partition_algorithm == 'hyperplanes':
                print(
                    'Using {} hyperplanes-based partition(s) of encoding space to create classes, margin={}'
                    .format(num_partitions, margin))
                partitions = task_generator.get_partitions_hyperplanes(
                    encodings=Z,
                    num_splits=num_splits,
                    margin=margin,
                    num_partitions=num_partitions)
            elif partition_algorithm == 'kmeans':
                print(
                    'Using {} k-means based partition(s) of encoding space to create classes'
                    .format(num_partitions))
                partitions = task_generator.get_partitions_kmeans(encodings=Z,
                                                                  train=train)
            else:
                raise ValueError(
                    'Unrecognized partition-generating algorithm: either hyperplanes or kmeans'
                )
        elif mode == 'randrand':
            print('Randomly sampled and labeled tasks')
            partitions = []
            for p in tqdm(range(num_partitions)):
                labels = np.random.choice(FLAGS.num_clusters,
                                          size=Y.shape,
                                          replace=True)
                partition = task_generator.get_partition_from_labels(
                    labels=labels)
                partitions.append(partition)
        else:
            raise ValueError('Unrecognized task generation scheme')
        print('Average number of classes per partition: {}'.format(
            np.mean([len(list(partition.keys()))
                     for partition in partitions])))
        if FLAGS.on_encodings:
            features = features_ph = tf.placeholder(Z.dtype, Z.shape)
        else:
            assert X.dtype == 'uint8'
            features_ph = tf.placeholder(X.dtype, X.shape)
            features = tf.reshape(features_ph, [-1, self.dim_input])

        def gather_preprocess(task):
            for split in ['train', 'test']:
                task['{}_labels'.format(split)] = tf.one_hot(
                    task['{}_labels'.format(split)], num_classes)
                if not FLAGS.on_encodings:
                    task['{}_features'.format(split)] = tf.cast(
                        tf.gather(features, task['{}_indices'.format(split)]),
                        tf.float32) / 255.0
                else:
                    task['{}_features'.format(split)] = tf.gather(
                        features, task['{}_indices'.format(split)])
            return task

        def stack(task):
            features = tf.concat(
                (task['train_features'], task['test_features']), axis=0)
            labels = tf.concat((task['train_labels'], task['test_labels']),
                               axis=0)
            return features, labels

        tasks = task_generator.get_tasks(num_tasks=num_tasks,
                                         partitions=partitions)
        train_ind, train_labels, test_ind, test_labels = [
            task[0] for task in tasks
        ], [task[1] for task in tasks], [task[2] for task in tasks
                                         ], [task[3] for task in tasks]

        dataset = tf.data.Dataset.from_tensor_slices({
            "train_indices": train_ind,
            "train_labels": train_labels,
            "test_indices": test_ind,
            "test_labels": test_labels
        })
        dataset = dataset.map(map_func=gather_preprocess,
                              num_parallel_calls=FLAGS.num_parallel_calls)
        dataset = dataset.map(map_func=stack,
                              num_parallel_calls=FLAGS.num_parallel_calls)
        dataset = dataset.batch(batch_size=self.batch_size)
        dataset = dataset.prefetch(4)
        dataset = dataset.repeat()
        iterator = dataset.make_initializable_iterator()
        features_batch, labels_batch = iterator.get_next()

        if FLAGS.on_encodings:
            iterator.initializer.run(feed_dict={features_ph: Z})
        else:
            iterator.initializer.run(feed_dict={features_ph: X})

        return features_batch, labels_batch
Exemple #3
0
def embedding_cluster_matching(num_classes=FLAGS.way,
                               num_shots=FLAGS.shot,
                               num_tasks=FLAGS.num_tasks,
                               num_clusters=FLAGS.num_clusters,
                               num_encoding_dims=FLAGS.num_encoding_dims,
                               dataset=FLAGS.dataset,
                               test_set=FLAGS.test_set):
    if dataset != 'celeba':
        _, _, Z_train, X_test, Y_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)
    else:
        _, _, Z_train, X_test, attributes_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)

    start = time.time()
    kmeans = KMeans(n_clusters=num_clusters,
                    init='k-means++',
                    random_state=0,
                    precompute_distances=True,
                    n_jobs=10,
                    n_init=10,
                    max_iter=3000).fit(Z_train)
    print(
        "Ran KMeans with n_clusters={} in {:.5} seconds, objective {}.".format(
            num_clusters,
            time.time() - start, kmeans.score(Z_train)))

    if dataset != 'celeba':
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks,
                                     partitions=partitions)

    for num_shots in [FLAGS.shot]:
        accuracies = []
        start = time.time()
        num_degenerate_tasks = 0

        for i_task, task in enumerate(tasks):
            if (i_task + 1) % (num_tasks // 10) == 0:
                print('test {}, accuracy {:.5}'.format(i_task + 1,
                                                       np.mean(accuracies)))

            ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
            Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[
                ind_test_few]

            clusters_to_labels_few = defaultdict(list)
            examples_to_clusters_few = kmeans.predict(Z_train_few)
            for i in range(len(Y_train_few)):
                clusters_to_labels_few[examples_to_clusters_few[i]].append(
                    Y_train_few[i])
            for (cluster, labels) in list(clusters_to_labels_few.items()):
                uniques, counts = np.unique(labels, return_counts=True)
                clusters_to_labels_few[cluster] = [uniques[np.argmax(counts)]]
                # if len(np.unique(labels)) > 1:      # delete degenerate clusters
                #     del clusters_to_labels_few[cluster]
            if len(clusters_to_labels_few) == 0:
                num_degenerate_tasks += 1
                continue
            centroid_ind_to_cluster = np.array(
                list(clusters_to_labels_few.keys()))
            centroids = kmeans.cluster_centers_[centroid_ind_to_cluster]
            distances = distance.cdist(Z_test_few, centroids)
            predicted_clusters = centroid_ind_to_cluster[np.argmin(distances,
                                                                   axis=1)]
            predictions = []
            for cluster in predicted_clusters:
                predictions.append(clusters_to_labels_few[cluster][0])

            accuracies.append(accuracy_score(Y_test_few, predictions))
        print('dataset={}, encoder={}, num_encoding_dims={}, num_clusters={}'.
              format(dataset, FLAGS.encoder, num_clusters, num_encoding_dims))
        print(
            '{}-way {}-shot nearest-cluster after clustering embeddings: {:.5} with 95% CI {:.5} over {} tests'
            .format(num_classes, num_shots, np.mean(accuracies),
                    1.96 * np.std(accuracies) / np.sqrt(num_tasks), num_tasks))
        print(
            '{} few-shot classification tasks: {:.5} seconds with {} degenerate tasks.'
            .format(num_tasks,
                    time.time() - start, num_degenerate_tasks))
Exemple #4
0
def cluster_color_logistic_regression(
        C=FLAGS.inverse_reg,
        penalty='l2',
        multi_class='multinomial',
        n_clusters=FLAGS.num_clusters,
        num_classes=FLAGS.way,
        num_shots=FLAGS.shot,
        num_tasks=FLAGS.num_tasks,
        num_encoding_dims=FLAGS.num_encoding_dims,
        test_set=FLAGS.test_set,
        dataset=FLAGS.dataset):
    if dataset != 'celeba':
        _, _, Z_train, X_test, Y_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)
    else:
        _, _, Z_train, X_test, attributes_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)

    start = time.time()
    kmeans = KMeans(n_clusters=n_clusters,
                    precompute_distances=True,
                    n_jobs=-1,
                    n_init=100).fit(Z_train)
    print("Ran KMeans with n_clusters={} in {:.5} seconds.".format(
        n_clusters,
        time.time() - start))
    uniques, counts = np.unique(kmeans.labels_, return_counts=True)

    if dataset != 'celeba':
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks,
                                     partitions=partitions)

    train_accuracies, test_accuracies = [], []
    start = time.time()
    clusters_to_indices = task_generator.get_partition_from_labels(
        kmeans.labels_)
    for i_task, task in enumerate(tasks):
        if (i_task + 1) % (num_tasks // 10) == 0:
            print('test {}, train accuracy {:.5}, test accuracy {:.5}'.format(
                i_task + 1, np.mean(train_accuracies),
                np.mean(test_accuracies)))

        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]
        clusters_to_labels_few = defaultdict(list)
        indices_to_clusters_few = kmeans.predict(Z_train_few)
        for i in range(Z_train_few.shape[0]):
            clusters_to_labels_few[indices_to_clusters_few[i]].append(
                Y_train_few[i])
        Z_train_fit, Y_train_fit = [], []
        for cluster in list(clusters_to_labels_few.keys()):
            labels = clusters_to_labels_few[cluster]
            if len(np.unique(labels)) == 1:  # skip degenerate clusters
                Z_train_fit.extend(
                    Z_train[clusters_to_indices[cluster]]
                )  # propagate labels to unlabeled datapoints
                Y_train_fit.extend([
                    labels[0] for i in range(len(clusters_to_indices[cluster]))
                ])
        Z_train_fit, Y_train_fit = np.stack(Z_train_fit,
                                            axis=0), np.stack(Y_train_fit,
                                                              axis=0)
        Z_train_fit = np.concatenate((Z_train_fit, Z_train_few), axis=0)
        Y_train_fit = np.concatenate((Y_train_fit, Y_train_few), axis=0)

        logistic_regression = LogisticRegression(n_jobs=-1,
                                                 penalty=penalty,
                                                 C=C,
                                                 multi_class=multi_class,
                                                 solver='saga',
                                                 max_iter=500)
        logistic_regression.fit(Z_train_fit, Y_train_fit)
        test_accuracies.append(
            logistic_regression.score(Z_test_few, Y_test_few))
        train_accuracies.append(
            logistic_regression.score(Z_train_fit, Y_train_fit))
    print('n_clusters={}, penalty={}, C={}, multi_class={}'.format(
        n_clusters, penalty, C, multi_class))
    print(
        '{}-way {}-shot logistic regression after clustering: {:.5} with 95% CI {:.5} over {} tests'
        .format(num_classes, num_shots, np.mean(test_accuracies),
                1.96 * np.std(test_accuracies) / np.sqrt(num_tasks),
                num_tasks))
    print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(
        np.mean(train_accuracies), np.std(train_accuracies)))
    print('{} few-shot classification tasks: {:.5} seconds.'.format(
        num_tasks,
        time.time() - start))
Exemple #5
0
def embedding_logistic_regression(C=FLAGS.inverse_reg,
                                  penalty='l2',
                                  multi_class='multinomial',
                                  num_classes=FLAGS.way,
                                  num_shots=FLAGS.shot,
                                  num_tasks=FLAGS.num_tasks,
                                  num_encoding_dims=FLAGS.num_encoding_dims,
                                  test_set=FLAGS.test_set,
                                  dataset=FLAGS.dataset):
    print('{}-way {}-shot logistic regression'.format(num_classes, num_shots))
    if dataset != 'celeba':
        _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims,
                                                   test_set)
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        _, _, _, X_test, attributes_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks,
                                     partitions=partitions)

    train_accuracies, test_accuracies = [], []

    start = time.time()
    for i_task, task in enumerate(tasks):
        if (i_task + 1) % (num_tasks // 10) == 0:
            print('test {}, train accuracy {:.5}, test accuracy {:.5}'.format(
                i_task + 1, np.mean(train_accuracies),
                np.mean(test_accuracies)))
        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]

        logistic_regression = LogisticRegression(n_jobs=-1,
                                                 penalty=penalty,
                                                 C=C,
                                                 multi_class=multi_class,
                                                 solver='saga',
                                                 max_iter=1000)
        logistic_regression.fit(Z_train_few, Y_train_few)
        test_accuracies.append(
            logistic_regression.score(Z_test_few, Y_test_few))
        train_accuracies.append(
            logistic_regression.score(Z_train_few, Y_train_few))
    print('penalty={}, C={}, multi_class={}'.format(penalty, C, multi_class))
    print(
        '{}-way {}-shot logistic regression: {:.5} with 95% CI {:.5} over {} tests'
        .format(num_classes, num_shots, np.mean(test_accuracies),
                1.96 * np.std(test_accuracies) / np.sqrt(num_tasks),
                num_tasks))
    print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(
        np.mean(train_accuracies), np.std(train_accuracies)))
    print('{} few-shot classification tasks: {:.5} seconds.'.format(
        num_tasks,
        time.time() - start))
Exemple #6
0
def embedding_mlp(num_classes=FLAGS.way,
                  num_shots=FLAGS.shot,
                  num_tasks=FLAGS.num_tasks,
                  num_encoding_dims=FLAGS.num_encoding_dims,
                  test_set=FLAGS.test_set,
                  dataset=FLAGS.dataset,
                  units=FLAGS.units,
                  dropout=FLAGS.dropout):
    import keras
    from keras.layers import Dense, Dropout
    from keras.losses import categorical_crossentropy
    from keras.callbacks import EarlyStopping
    from keras import backend as K

    if dataset != 'celeba':
        _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims,
                                                   test_set)
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        _, _, _, X_test, attributes_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks,
                                     partitions=partitions)

    train_accuracies, test_accuracies = [], []

    start = time.time()
    for i_task, task in enumerate(tqdm(tasks)):
        if (i_task + 1) % (num_tasks // 10) == 0:
            tqdm.write('test {}, accuracy {:.5}'.format(
                i_task + 1, np.mean(test_accuracies)))
        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]
        Y_train_few, Y_test_few = keras.utils.to_categorical(
            Y_train_few, num_classes=num_classes), keras.utils.to_categorical(
                Y_test_few, num_classes=num_classes)

        model = keras.Sequential()
        model.add(
            Dense(units=units,
                  activation='relu',
                  input_dim=Z_train_few.shape[1]))
        model.add(Dropout(rate=dropout))
        model.add(Dense(units=num_classes, activation='softmax'))
        model.compile(loss=categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(),
                      metrics=['accuracy'])
        early_stopping = EarlyStopping(monitor='val_loss', patience=2)
        model.fit(Z_train_few,
                  Y_train_few,
                  batch_size=Z_train_few.shape[0],
                  epochs=500,
                  verbose=0,
                  validation_data=(Z_test_few, Y_test_few),
                  callbacks=[early_stopping])
        train_score = model.evaluate(Z_train_few, Y_train_few, verbose=0)
        train_accuracies.append(train_score[1])
        test_score = model.evaluate(Z_test_few, Y_test_few, verbose=0)
        test_accuracies.append(test_score[1])
        K.clear_session()

    print('units={}, dropout={}'.format(units, dropout))
    print(
        '{}-way {}-shot embedding mlp: {:.5} with 95% CI {:.5} over {} tests'.
        format(num_classes, num_shots, np.mean(test_accuracies),
               1.96 * np.std(test_accuracies) / np.sqrt(num_tasks), num_tasks))
    print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(
        np.mean(train_accuracies), np.std(train_accuracies)))
    print('{} few-shot classification tasks: {:.5} seconds.'.format(
        num_tasks,
        time.time() - start))