Esempio n. 1
0
def generate_tasks(interval, task_queue):
    global FINISH, PROCESSORS, MIN_EXPECTED_TIME_TO_PROCESS, MAX_EXPECTED_TIME_TO_PROCESS
    _taskGenerator = TaskGenerator(MIN_EXPECTED_TIME_TO_PROCESS, MAX_EXPECTED_TIME_TO_PROCESS, PROCESSORS)
    while not FINISH:
        sleep(interval)
        if randint(0, 1):
            task_queue.append(_taskGenerator.new_task())
Esempio n. 2
0
def main():

    # Step 1: init data folders
    print("init dataset")

    torch.cuda.manual_seed_all(1)

    if args.dataset == 'miniimagenet':
        if args.valid_set == 1:
            metatrain_folder, metatest_folder = mini_imagenet_folder(
                config.miniimagenet_trainvalfolder,
                config.miniimagenet_testfolder)
        else:
            metatrain_folder, metatest_folder = mini_imagenet_folder(
                config.miniimagenet_trainfolder,
                config.miniimagenet_testfolder)
        task_generator = TaskGenerator(metatrain_folder, metatest_folder)

    elif args.dataset == 'tieredimagenet':
        pass

    elif args.dataset == 'cub':
        pass

    elif args.dataset == 'car':
        pass

    elif args.dataset == 'aircraft':
        pass

    # step 2: init neural networks
    print('init neural networks')
    dcn = DCN(args.way,
              args.shot,
              args.query,
              args.embedding_class,
              with_variation=bool(args.variational),
              weight_or_not=args.weight_or_not,
              loss=args.loss)
    dcn.embedding = nn.DataParallel(dcn.embedding,
                                    device_ids=[args.gpu, args.gpu + 1])
    dcn.relation = nn.DataParallel(dcn.relation,
                                   device_ids=[args.gpu, args.gpu + 1])
    dcn.load_state_dict(
        torch.load("../models/VDRN-" + str(args.model_episode) + "-" +
                   str(args.embedding_class) + "-" + args.dataset + "-" +
                   args.loss + "-var" + str(args.variational) + "-shot" +
                   str(args.shot) + "-" + str(args.weight_or_not) + ".pkl",
                   map_location={'cuda:': 'cuda:' + str(args.gpu)}))
    #     dcn.load_state_dict(torch.load("../models/VDRN-"+str(args.model_episode)+"-"+str(args.embedding_class) + "-" + args.dataset + "-"+ str(args.variational) + "-shot"+ str(args.shot)+ ".pkl",map_location={'cuda:':'cuda:'+str(args.gpu)}))
    print("load model ok!")
    dcn.to(device)

    test(dcn, task_generator)
Esempio n. 3
0
    def execute(self):
        if len(sys.argv) > 1:
            argument = sys.argv[1]
            if argument == 'gen':
                if len(sys.argv) > 4:
                    task_generator = TaskGenerator(
                        int(sys.argv[2]), int(sys.argv[3])
                    )  # arv[2] = # of tasks ; argv[3]= Utilization
                    tasks = task_generator.generate_tasks()
                    self.w_file(tasks, sys.argv[4])
                    # file = FileCreator()
                    # file.write_file(tasks, sys.argv[4])
                    # print("Tasks written successfully ")
                else:
                    print(
                        "Arguments missing: Please use the form: gen  number_of_tasks  utilization_percentage  "
                        "filename.txt")
            elif argument == 'fdms':
                tasks = self.r_file(sys.argv[2])
                tasks_fdms = Fdms(tasks)
                tasks_fdms.fdms()
                tasks_fdms.print_s()
            elif argument == 'simulation':
                tasks = self.r_file(sys.argv[2])
                stop_time = int(sys.argv[3])
                fdms = Fdms(tasks)
                fdms.fdms()
                scheduler = SimulateDual(tasks, stop_time, False, True)
                scheduler.simulate()

            elif argument == 'simulation_graph':
                tasks = self.r_file(sys.argv[2])
                stop_time = int(sys.argv[3])
                fdms = Fdms(tasks)
                fdms.fdms()
                scheduler = SimulateDual(tasks, stop_time, True, False)
                scheduler.simulate()

        else:
            print("Insufficient arguments, please use correct syntax")
Esempio n. 4
0
def main():

    # Step 1: init data folders
    print("init dataset")

    if args.dataset == 'miniimagenet':
        if args.valid_set == 1:
            metatrain_folder, metatest_folder = mini_imagenet_folder(
                config.miniimagenet_trainvalfolder,
                config.miniimagenet_testfolder)
        else:
            metatrain_folder, metatest_folder = mini_imagenet_folder(
                config.miniimagenet_trainfolder, config.miniimagenet_valfolder)
        task_generator = TaskGenerator(metatrain_folder, metatest_folder)

    elif args.dataset == 'tieredimagenet':
        pass

    elif args.dataset == 'cub':
        pass

    elif args.dataset == 'car':
        pass

    elif args.dataset == 'aircraft':
        pass

    # step 2: init neural networks
    print('init neural networks')
    dcn = DCN(args.way,
              args.shot,
              args.query,
              args.embedding_class,
              with_variation=bool(args.variational),
              weight_or_not=args.weight_or_not,
              loss=args.loss)
    dcn.embedding = nn.DataParallel(dcn.embedding,
                                    device_ids=[args.gpu, args.gpu + 1])
    dcn.relation = nn.DataParallel(dcn.relation,
                                   device_ids=[args.gpu, args.gpu + 1])
    dcn.to(device)

    if args.train_embedding:
        embedding_train(dcn, task_generator)
        torch.cuda.empty_cache()
    relation_train(dcn, task_generator)
Esempio n. 5
0
def embedding_nearest_neighbour(n_neighbors=FLAGS.n_neighbours,
                                num_classes=FLAGS.way,
                                num_shots=FLAGS.shot,
                                num_tasks=FLAGS.num_tasks,
                                num_encoding_dims=FLAGS.num_encoding_dims,
                                test_set=FLAGS.test_set,
                                dataset=FLAGS.dataset):
    print('{}-way {}-shot embedding nearest neighbour'.format(
        num_classes, num_shots))
    if dataset != 'celeba':
        _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims,
                                                   test_set)
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        _, _, _, X_test, attributes_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks,
                                     partitions=partitions)

    accuracies = []

    for i_task, task in enumerate(tasks):
        if (i_task + 1) % (num_tasks // 10) == 0:
            print('test {}, accuracy {:.5}'.format(i_task + 1,
                                                   np.mean(accuracies)))
        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]

        knn = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1)
        knn.fit(Z_train_few, Y_train_few)
        accuracy = knn.score(Z_test_few, Y_test_few)

        accuracies.append(accuracy)

    print(
        '{}-way {}-shot embedding nearest neighbour: {:.5} with 95% CI {:.5} over {} tests'
        .format(num_classes, num_shots, np.mean(accuracies),
                1.96 * np.std(accuracies) / np.sqrt(num_tasks), num_tasks))
Esempio n. 6
0
    def make_data_tensor(self, train=True):
        if train:
            mode = FLAGS.mt_mode
            num_classes = self.num_classes_train
            num_tasks = FLAGS.metatrain_iterations * self.batch_size
            num_splits = 1000
            if FLAGS.num_partitions == -1:
                num_partitions = num_tasks
            else:
                num_partitions = FLAGS.num_partitions
            if FLAGS.datasource == 'celeba':
                assert num_classes == 2, "CelebA must have two classes"
                X, attributes, Z = self.X_train, self.attributes_train, self.Z_train
            else:
                X, Y, Z = self.X_train, self.Y_train, self.Z_train
            num_samples_per_class = self.num_samples_per_class_train
            num_train_samples_per_class = FLAGS.inner_update_batch_size_train
            print('Setting up tasks for meta-training')
        else:
            mode = FLAGS.mv_mode
            if mode == 'encenc':
                raise NotImplementedError
            num_tasks = FLAGS.num_eval_tasks
            num_splits = 100
            num_partitions = num_tasks
            if FLAGS.datasource == 'celeba':
                X, attributes, Z = self.X_val, self.attributes_val, self.Z_val
            else:
                X, Y, Z = self.X_val, self.Y_val, self.Z_val
            num_classes = self.num_classes_val
            num_samples_per_class = self.num_samples_per_class_val
            num_train_samples_per_class = FLAGS.inner_update_batch_size_val
            print('Setting up tasks for meta-val')

        task_generator = TaskGenerator(
            num_classes=num_classes,
            num_train_samples_per_class=num_train_samples_per_class,
            num_samples_per_class=num_samples_per_class)
        partition_algorithm = FLAGS.partition_algorithm
        margin = FLAGS.margin

        print('Generating indices for {} tasks'.format(num_tasks))
        if mode == 'gtgt':
            if FLAGS.datasource == 'celeba':
                partitions = task_generator.get_celeba_task_pool(
                    attributes=attributes)
            else:
                print('Using ground truth partition to create classes')
                partition = task_generator.get_partition_from_labels(labels=Y)
                partitions = [partition]
        elif mode == 'encenc':
            if partition_algorithm == 'hyperplanes':
                print(
                    'Using {} hyperplanes-based partition(s) of encoding space to create classes, margin={}'
                    .format(num_partitions, margin))
                partitions = task_generator.get_partitions_hyperplanes(
                    encodings=Z,
                    num_splits=num_splits,
                    margin=margin,
                    num_partitions=num_partitions)
            elif partition_algorithm == 'kmeans':
                print(
                    'Using {} k-means based partition(s) of encoding space to create classes'
                    .format(num_partitions))
                partitions = task_generator.get_partitions_kmeans(encodings=Z,
                                                                  train=train)
            else:
                raise ValueError(
                    'Unrecognized partition-generating algorithm: either hyperplanes or kmeans'
                )
        elif mode == 'randrand':
            print('Randomly sampled and labeled tasks')
            partitions = []
            for p in tqdm(range(num_partitions)):
                labels = np.random.choice(FLAGS.num_clusters,
                                          size=Y.shape,
                                          replace=True)
                partition = task_generator.get_partition_from_labels(
                    labels=labels)
                partitions.append(partition)
        else:
            raise ValueError('Unrecognized task generation scheme')
        print('Average number of classes per partition: {}'.format(
            np.mean([len(list(partition.keys()))
                     for partition in partitions])))
        if FLAGS.on_encodings:
            features = features_ph = tf.placeholder(Z.dtype, Z.shape)
        else:
            assert X.dtype == 'uint8'
            features_ph = tf.placeholder(X.dtype, X.shape)
            features = tf.reshape(features_ph, [-1, self.dim_input])

        def gather_preprocess(task):
            for split in ['train', 'test']:
                task['{}_labels'.format(split)] = tf.one_hot(
                    task['{}_labels'.format(split)], num_classes)
                if not FLAGS.on_encodings:
                    task['{}_features'.format(split)] = tf.cast(
                        tf.gather(features, task['{}_indices'.format(split)]),
                        tf.float32) / 255.0
                else:
                    task['{}_features'.format(split)] = tf.gather(
                        features, task['{}_indices'.format(split)])
            return task

        def stack(task):
            features = tf.concat(
                (task['train_features'], task['test_features']), axis=0)
            labels = tf.concat((task['train_labels'], task['test_labels']),
                               axis=0)
            return features, labels

        tasks = task_generator.get_tasks(num_tasks=num_tasks,
                                         partitions=partitions)
        train_ind, train_labels, test_ind, test_labels = [
            task[0] for task in tasks
        ], [task[1] for task in tasks], [task[2] for task in tasks
                                         ], [task[3] for task in tasks]

        dataset = tf.data.Dataset.from_tensor_slices({
            "train_indices": train_ind,
            "train_labels": train_labels,
            "test_indices": test_ind,
            "test_labels": test_labels
        })
        dataset = dataset.map(map_func=gather_preprocess,
                              num_parallel_calls=FLAGS.num_parallel_calls)
        dataset = dataset.map(map_func=stack,
                              num_parallel_calls=FLAGS.num_parallel_calls)
        dataset = dataset.batch(batch_size=self.batch_size)
        dataset = dataset.prefetch(4)
        dataset = dataset.repeat()
        iterator = dataset.make_initializable_iterator()
        features_batch, labels_batch = iterator.get_next()

        if FLAGS.on_encodings:
            iterator.initializer.run(feed_dict={features_ph: Z})
        else:
            iterator.initializer.run(feed_dict={features_ph: X})

        return features_batch, labels_batch
Esempio n. 7
0
File: main.py Progetto: qyxqyx/LWAU
def main():
    FLAGS.logdir = 'logs/miniimagenet' + str(FLAGS.update_batch_size) + 'shot/'

    if FLAGS.train == False:
        orig_meta_batch_size = FLAGS.meta_batch_size
        FLAGS.meta_batch_size = 1
        orig_update_batch_size = FLAGS.update_batch_size

    task_generator = TaskGenerator(FLAGS.update_batch_size + 15,
                                   FLAGS.meta_batch_size)
    dim_output = task_generator.dim_output
    dim_input = task_generator.dim_input

    model = LWAU(dim_input, dim_output)
    if FLAGS.train:
        model.construct_model(num_updates=FLAGS.num_updates, train=True)
    model.construct_model(num_updates=FLAGS.test_num_updates, train=False)

    # model.summ_op = tf.summary.merge_all()

    saver = loader = tf.train.Saver(tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES),
                                    max_to_keep=0)

    sess = tf.InteractiveSession()

    if FLAGS.train == False:
        # change to original meta batch size when loading model.
        FLAGS.meta_batch_size = orig_meta_batch_size
        FLAGS.update_batch_size = orig_update_batch_size

    exp_string = str(FLAGS.num_classes) + '.mbs_' + str(FLAGS.meta_batch_size)
    exp_string += '.nstep_' + str(FLAGS.num_updates) + '.tnstep_' + str(
        FLAGS.test_num_updates)
    exp_string += '.ubs_' + str(FLAGS.update_batch_size) + '.nts_' + str(
        FLAGS.num_train_tasks)
    exp_string += '.l1_' + str(FLAGS.l1_alpha) + '.l2_' + str(FLAGS.l2_alpha)
    exp_string += '.lr_' + str(FLAGS.meta_lr) + '.ulr_' + str(FLAGS.update_lr)

    exp_string += '.drop_' + str(FLAGS.dropout_rate) + '.nfs_' + str(
        FLAGS.base_num_filters)

    resume_itr = 0
    model_file = None

    tf.global_variables_initializer().run()
    tf.train.start_queue_runners()

    if FLAGS.resume:
        model_file = tf.train.latest_checkpoint(FLAGS.logdir + '/' +
                                                exp_string)
        if FLAGS.test_iter > 0:
            model_file = model_file[:model_file.index('model'
                                                      )] + 'model' + str(
                                                          FLAGS.test_iter)
        if model_file:
            ind1 = model_file.index('model')
            resume_itr = int(model_file[ind1 + 5:])
            print("Restoring model weights from " + model_file)
            saver.restore(sess, model_file)

    if FLAGS.train:
        train(model, saver, sess, exp_string, task_generator, resume_itr)
    else:
        import os
        max_accs = 0
        models = os.listdir(FLAGS.logdir + exp_string)
        model_epochs = []
        for model_file in models:
            if 'model' in model_file and 'index' in model_file:
                i = model_file.find('del')
                j = model_file.find('.')
                model_epoch = model_file[i + 3:j]
                model_epochs.append(int(model_epoch))
        model_epochs.sort()

        max_epoch = 0
        for epoch in model_epochs:
            if epoch > float(FLAGS.metatrain_iterations) / 20:
                model_file = FLAGS.logdir + exp_string + '/model' + str(epoch)
                saver.restore(sess, model_file)
                print("testing model: " + model_file)
                acc = test(model, sess, task_generator)
                if acc > max_accs:
                    max_accs = acc
                    max_epoch = epoch
                print('----------max_acc:', max_accs, '-----------max_model:',
                      max_epoch)
            else:
                pass
Esempio n. 8
0
def cluster_fit_color(num_classes=FLAGS.way,
                      num_tasks=FLAGS.num_tasks,
                      num_clusters=FLAGS.num_clusters,
                      num_encoding_dims=FLAGS.num_encoding_dims,
                      test_set=FLAGS.test_set,
                      dataset=FLAGS.dataset):
    assert dataset == 'mnist'
    import keras
    from keras.layers import Conv2D, Flatten, Dense
    from keras.losses import categorical_crossentropy
    from keras.optimizers import Adam
    from sklearn.cluster import KMeans

    X_train, Y_train, Z_train, X_test, Y_test, Z_test = get_data(
        dataset, num_encoding_dims, test_set)
    # Z_train, Z_test = whitening(Z_train, Z_test)
    start = time.time()
    kmeans = KMeans(n_clusters=num_clusters,
                    init='k-means++',
                    random_state=0,
                    precompute_distances=True,
                    n_jobs=-1,
                    n_init=1000,
                    max_iter=100000).fit(Z_train)
    print(
        "Ran KMeans with n_clusters={} in {:.5} seconds, objective {}.".format(
            num_clusters,
            time.time() - start, kmeans.score(Z_train)))

    X_train, X_test = X_train / 255.0, X_test / 255.0
    X_train, X_test = X_train.reshape((-1, 28, 28, 1)), X_test.reshape(
        (-1, 28, 28, 1))

    cluster_labels_train = keras.utils.to_categorical(kmeans.labels_,
                                                      num_clusters)
    cluster_labels_test = keras.utils.to_categorical(kmeans.predict(Z_test),
                                                     num_clusters)

    model = keras.Sequential()
    model.add(
        Conv2D(filters=32,
               kernel_size=(3, 3),
               strides=(2, 2),
               activation='relu',
               padding='same',
               input_shape=(28, 28, 1)))
    model.add(
        Conv2D(filters=32,
               kernel_size=(3, 3),
               strides=(2, 2),
               activation='relu',
               padding='same'))
    model.add(
        Conv2D(filters=32,
               kernel_size=(3, 3),
               strides=(2, 2),
               activation='relu',
               padding='same'))
    model.add(
        Conv2D(filters=32,
               kernel_size=(3, 3),
               strides=(2, 2),
               activation='relu',
               padding='same'))
    model.add(Flatten())
    model.add(Dense(units=num_clusters, activation='softmax'))
    model.summary()
    model.compile(loss=categorical_crossentropy,
                  optimizer=Adam(),
                  metrics=['accuracy'])
    model.fit(X_train,
              cluster_labels_train,
              batch_size=500,
              epochs=25,
              verbose=1,
              validation_data=(X_test, cluster_labels_test))
    score = model.evaluate(X_test, cluster_labels_test, verbose=1)
    print('Test loss: {}\tTest accuracy: {}'.format(score[0], score[1]))
    model.compile(loss=categorical_crossentropy,
                  optimizer=keras.optimizers.SGD(lr=0.01),
                  metrics=['accuracy'])

    for num_shots in [1, 5, 10]:
        accuracies, finetuned_accuracies = [], []
        num_degenerate_tasks = 0
        start = time.time()
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partition = task_generator.get_partition_from_labels(Y_test)
        for i_test in range(num_tasks):
            if (i_test + 1) % (num_tasks // 10) == 0:
                print(
                    'test {}, accuracy {:.5}, finetuned accuracy {:.5}'.format(
                        i_test + 1, np.mean(accuracies),
                        np.mean(finetuned_accuracies)))

            task = task_generator.get_task(partition=partition)
            ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
            X_train_few, X_test_few = X_test[ind_train_few], X_test[
                ind_test_few]

            cluster_to_labels_few = defaultdict(list)
            Z_train_few = np.argmax(model.predict(X_train_few), axis=1)
            Z_test_few = np.argmax(model.predict(X_test_few), axis=1)
            for i in range(len(Y_train_few)):
                cluster_to_labels_few[Z_train_few[i]].append(Y_train_few[i])
            cluster_to_label_few = defaultdict(int)
            for (cluster, labels) in list(cluster_to_labels_few.items()):
                uniques, counts = np.unique(labels, return_counts=True)
                cluster_to_label_few[cluster] = uniques[np.argmax(counts)]
            if len(cluster_to_label_few) == 0:
                num_degenerate_tasks += 1
                continue
            predictions = []
            for z in Z_test_few:
                predictions.append(cluster_to_label_few[z])
            accuracies.append(accuracy_score(Y_test_few, predictions))

        print('num_clusters={}, num_encoding_dims={}'.format(
            num_clusters, num_encoding_dims))
        print(
            '{}-way {}-shot fit_kmeans: {:.5} with 95% CI {:.5} over {} tests'.
            format(num_classes, num_shots, np.mean(accuracies),
                   1.96 * np.std(accuracies) / np.sqrt(num_tasks), num_tasks))
        print(
            '{}-way {}-shot fit_kmeans finetuned: {:.5} with 95% CI {:.5} over {} tests'
            .format(num_classes, num_shots, np.mean(finetuned_accuracies),
                    1.96 * np.std(finetuned_accuracies) / np.sqrt(num_tasks),
                    num_tasks))
        print(
            '{} few-shot classification tasks: {:.5} seconds with {} degenerate tasks.'
            .format(num_tasks,
                    time.time() - start, num_degenerate_tasks))
Esempio n. 9
0
def embedding_cluster_matching(num_classes=FLAGS.way,
                               num_shots=FLAGS.shot,
                               num_tasks=FLAGS.num_tasks,
                               num_clusters=FLAGS.num_clusters,
                               num_encoding_dims=FLAGS.num_encoding_dims,
                               dataset=FLAGS.dataset,
                               test_set=FLAGS.test_set):
    if dataset != 'celeba':
        _, _, Z_train, X_test, Y_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)
    else:
        _, _, Z_train, X_test, attributes_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)

    start = time.time()
    kmeans = KMeans(n_clusters=num_clusters,
                    init='k-means++',
                    random_state=0,
                    precompute_distances=True,
                    n_jobs=10,
                    n_init=10,
                    max_iter=3000).fit(Z_train)
    print(
        "Ran KMeans with n_clusters={} in {:.5} seconds, objective {}.".format(
            num_clusters,
            time.time() - start, kmeans.score(Z_train)))

    if dataset != 'celeba':
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks,
                                     partitions=partitions)

    for num_shots in [FLAGS.shot]:
        accuracies = []
        start = time.time()
        num_degenerate_tasks = 0

        for i_task, task in enumerate(tasks):
            if (i_task + 1) % (num_tasks // 10) == 0:
                print('test {}, accuracy {:.5}'.format(i_task + 1,
                                                       np.mean(accuracies)))

            ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
            Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[
                ind_test_few]

            clusters_to_labels_few = defaultdict(list)
            examples_to_clusters_few = kmeans.predict(Z_train_few)
            for i in range(len(Y_train_few)):
                clusters_to_labels_few[examples_to_clusters_few[i]].append(
                    Y_train_few[i])
            for (cluster, labels) in list(clusters_to_labels_few.items()):
                uniques, counts = np.unique(labels, return_counts=True)
                clusters_to_labels_few[cluster] = [uniques[np.argmax(counts)]]
                # if len(np.unique(labels)) > 1:      # delete degenerate clusters
                #     del clusters_to_labels_few[cluster]
            if len(clusters_to_labels_few) == 0:
                num_degenerate_tasks += 1
                continue
            centroid_ind_to_cluster = np.array(
                list(clusters_to_labels_few.keys()))
            centroids = kmeans.cluster_centers_[centroid_ind_to_cluster]
            distances = distance.cdist(Z_test_few, centroids)
            predicted_clusters = centroid_ind_to_cluster[np.argmin(distances,
                                                                   axis=1)]
            predictions = []
            for cluster in predicted_clusters:
                predictions.append(clusters_to_labels_few[cluster][0])

            accuracies.append(accuracy_score(Y_test_few, predictions))
        print('dataset={}, encoder={}, num_encoding_dims={}, num_clusters={}'.
              format(dataset, FLAGS.encoder, num_clusters, num_encoding_dims))
        print(
            '{}-way {}-shot nearest-cluster after clustering embeddings: {:.5} with 95% CI {:.5} over {} tests'
            .format(num_classes, num_shots, np.mean(accuracies),
                    1.96 * np.std(accuracies) / np.sqrt(num_tasks), num_tasks))
        print(
            '{} few-shot classification tasks: {:.5} seconds with {} degenerate tasks.'
            .format(num_tasks,
                    time.time() - start, num_degenerate_tasks))
Esempio n. 10
0
def cluster_color_logistic_regression(
        C=FLAGS.inverse_reg,
        penalty='l2',
        multi_class='multinomial',
        n_clusters=FLAGS.num_clusters,
        num_classes=FLAGS.way,
        num_shots=FLAGS.shot,
        num_tasks=FLAGS.num_tasks,
        num_encoding_dims=FLAGS.num_encoding_dims,
        test_set=FLAGS.test_set,
        dataset=FLAGS.dataset):
    if dataset != 'celeba':
        _, _, Z_train, X_test, Y_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)
    else:
        _, _, Z_train, X_test, attributes_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)

    start = time.time()
    kmeans = KMeans(n_clusters=n_clusters,
                    precompute_distances=True,
                    n_jobs=-1,
                    n_init=100).fit(Z_train)
    print("Ran KMeans with n_clusters={} in {:.5} seconds.".format(
        n_clusters,
        time.time() - start))
    uniques, counts = np.unique(kmeans.labels_, return_counts=True)

    if dataset != 'celeba':
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks,
                                     partitions=partitions)

    train_accuracies, test_accuracies = [], []
    start = time.time()
    clusters_to_indices = task_generator.get_partition_from_labels(
        kmeans.labels_)
    for i_task, task in enumerate(tasks):
        if (i_task + 1) % (num_tasks // 10) == 0:
            print('test {}, train accuracy {:.5}, test accuracy {:.5}'.format(
                i_task + 1, np.mean(train_accuracies),
                np.mean(test_accuracies)))

        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]
        clusters_to_labels_few = defaultdict(list)
        indices_to_clusters_few = kmeans.predict(Z_train_few)
        for i in range(Z_train_few.shape[0]):
            clusters_to_labels_few[indices_to_clusters_few[i]].append(
                Y_train_few[i])
        Z_train_fit, Y_train_fit = [], []
        for cluster in list(clusters_to_labels_few.keys()):
            labels = clusters_to_labels_few[cluster]
            if len(np.unique(labels)) == 1:  # skip degenerate clusters
                Z_train_fit.extend(
                    Z_train[clusters_to_indices[cluster]]
                )  # propagate labels to unlabeled datapoints
                Y_train_fit.extend([
                    labels[0] for i in range(len(clusters_to_indices[cluster]))
                ])
        Z_train_fit, Y_train_fit = np.stack(Z_train_fit,
                                            axis=0), np.stack(Y_train_fit,
                                                              axis=0)
        Z_train_fit = np.concatenate((Z_train_fit, Z_train_few), axis=0)
        Y_train_fit = np.concatenate((Y_train_fit, Y_train_few), axis=0)

        logistic_regression = LogisticRegression(n_jobs=-1,
                                                 penalty=penalty,
                                                 C=C,
                                                 multi_class=multi_class,
                                                 solver='saga',
                                                 max_iter=500)
        logistic_regression.fit(Z_train_fit, Y_train_fit)
        test_accuracies.append(
            logistic_regression.score(Z_test_few, Y_test_few))
        train_accuracies.append(
            logistic_regression.score(Z_train_fit, Y_train_fit))
    print('n_clusters={}, penalty={}, C={}, multi_class={}'.format(
        n_clusters, penalty, C, multi_class))
    print(
        '{}-way {}-shot logistic regression after clustering: {:.5} with 95% CI {:.5} over {} tests'
        .format(num_classes, num_shots, np.mean(test_accuracies),
                1.96 * np.std(test_accuracies) / np.sqrt(num_tasks),
                num_tasks))
    print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(
        np.mean(train_accuracies), np.std(train_accuracies)))
    print('{} few-shot classification tasks: {:.5} seconds.'.format(
        num_tasks,
        time.time() - start))
Esempio n. 11
0
def embedding_logistic_regression(C=FLAGS.inverse_reg,
                                  penalty='l2',
                                  multi_class='multinomial',
                                  num_classes=FLAGS.way,
                                  num_shots=FLAGS.shot,
                                  num_tasks=FLAGS.num_tasks,
                                  num_encoding_dims=FLAGS.num_encoding_dims,
                                  test_set=FLAGS.test_set,
                                  dataset=FLAGS.dataset):
    print('{}-way {}-shot logistic regression'.format(num_classes, num_shots))
    if dataset != 'celeba':
        _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims,
                                                   test_set)
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        _, _, _, X_test, attributes_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks,
                                     partitions=partitions)

    train_accuracies, test_accuracies = [], []

    start = time.time()
    for i_task, task in enumerate(tasks):
        if (i_task + 1) % (num_tasks // 10) == 0:
            print('test {}, train accuracy {:.5}, test accuracy {:.5}'.format(
                i_task + 1, np.mean(train_accuracies),
                np.mean(test_accuracies)))
        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]

        logistic_regression = LogisticRegression(n_jobs=-1,
                                                 penalty=penalty,
                                                 C=C,
                                                 multi_class=multi_class,
                                                 solver='saga',
                                                 max_iter=1000)
        logistic_regression.fit(Z_train_few, Y_train_few)
        test_accuracies.append(
            logistic_regression.score(Z_test_few, Y_test_few))
        train_accuracies.append(
            logistic_regression.score(Z_train_few, Y_train_few))
    print('penalty={}, C={}, multi_class={}'.format(penalty, C, multi_class))
    print(
        '{}-way {}-shot logistic regression: {:.5} with 95% CI {:.5} over {} tests'
        .format(num_classes, num_shots, np.mean(test_accuracies),
                1.96 * np.std(test_accuracies) / np.sqrt(num_tasks),
                num_tasks))
    print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(
        np.mean(train_accuracies), np.std(train_accuracies)))
    print('{} few-shot classification tasks: {:.5} seconds.'.format(
        num_tasks,
        time.time() - start))
Esempio n. 12
0
def embedding_mlp(num_classes=FLAGS.way,
                  num_shots=FLAGS.shot,
                  num_tasks=FLAGS.num_tasks,
                  num_encoding_dims=FLAGS.num_encoding_dims,
                  test_set=FLAGS.test_set,
                  dataset=FLAGS.dataset,
                  units=FLAGS.units,
                  dropout=FLAGS.dropout):
    import keras
    from keras.layers import Dense, Dropout
    from keras.losses import categorical_crossentropy
    from keras.callbacks import EarlyStopping
    from keras import backend as K

    if dataset != 'celeba':
        _, _, _, X_test, Y_test, Z_test = get_data(dataset, num_encoding_dims,
                                                   test_set)
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partition = task_generator.get_partition_from_labels(Y_test)
        partitions = [partition]
    else:
        _, _, _, X_test, attributes_test, Z_test = get_data(
            dataset, num_encoding_dims, test_set)
        task_generator = TaskGenerator(num_classes=num_classes,
                                       num_train_samples_per_class=num_shots,
                                       num_samples_per_class=num_shots + 5)
        partitions = task_generator.get_celeba_task_pool(attributes_test)
    tasks = task_generator.get_tasks(num_tasks=num_tasks,
                                     partitions=partitions)

    train_accuracies, test_accuracies = [], []

    start = time.time()
    for i_task, task in enumerate(tqdm(tasks)):
        if (i_task + 1) % (num_tasks // 10) == 0:
            tqdm.write('test {}, accuracy {:.5}'.format(
                i_task + 1, np.mean(test_accuracies)))
        ind_train_few, Y_train_few, ind_test_few, Y_test_few = task
        Z_train_few, Z_test_few = Z_test[ind_train_few], Z_test[ind_test_few]
        Y_train_few, Y_test_few = keras.utils.to_categorical(
            Y_train_few, num_classes=num_classes), keras.utils.to_categorical(
                Y_test_few, num_classes=num_classes)

        model = keras.Sequential()
        model.add(
            Dense(units=units,
                  activation='relu',
                  input_dim=Z_train_few.shape[1]))
        model.add(Dropout(rate=dropout))
        model.add(Dense(units=num_classes, activation='softmax'))
        model.compile(loss=categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(),
                      metrics=['accuracy'])
        early_stopping = EarlyStopping(monitor='val_loss', patience=2)
        model.fit(Z_train_few,
                  Y_train_few,
                  batch_size=Z_train_few.shape[0],
                  epochs=500,
                  verbose=0,
                  validation_data=(Z_test_few, Y_test_few),
                  callbacks=[early_stopping])
        train_score = model.evaluate(Z_train_few, Y_train_few, verbose=0)
        train_accuracies.append(train_score[1])
        test_score = model.evaluate(Z_test_few, Y_test_few, verbose=0)
        test_accuracies.append(test_score[1])
        K.clear_session()

    print('units={}, dropout={}'.format(units, dropout))
    print(
        '{}-way {}-shot embedding mlp: {:.5} with 95% CI {:.5} over {} tests'.
        format(num_classes, num_shots, np.mean(test_accuracies),
               1.96 * np.std(test_accuracies) / np.sqrt(num_tasks), num_tasks))
    print('Mean training accuracy: {:.5}; standard deviation: {:.5}'.format(
        np.mean(train_accuracies), np.std(train_accuracies)))
    print('{} few-shot classification tasks: {:.5} seconds.'.format(
        num_tasks,
        time.time() - start))
Esempio n. 13
0
    argparse.add_argument('--ckpt_dir',
                          type=str,
                          help='Path to the checkpoint directory',
                          default='../../weights/')
    argparse.add_argument('--his_dir',
                          type=str,
                          help='Path to the training history directory',
                          default='../../history/')
    # Generate args
    args = argparse.parse_args()

    print('\nBuild segmentation model: Unet\n')
    ml = MetaLearner(args=args)
    print('Initialize model\n')
    model = ml.initialize_Unet()
    model = ml.initialize(model)
    # tf.keras.utils.plot_model(model, to_file='../model.png',show_shapes=True,show_layer_names=True,dpi=128)
    # Initialize task generator
    print("\ntasks generation based on {} clusters\n".format(args.n_clusters))
    batch_generator = TaskGenerator(args)

    model = maml_train(model, batch_generator)

    print("\nTEST PHASE\n")
    restored_model = restore_model(
        model, '../../weights/{}/{}way{}shot'.format(args.dataset, args.n_way,
                                                     args.k_shot))
    eval_model(restored_model,
               batch_generator,
               num_steps=(0, 1, 5, 10, 50, 100, 200))
Esempio n. 14
0
    def make_data_tensor(self, train=True):
        if train:
            mode = FLAGS.mt_mode
            num_classes = self.num_classes_train
            num_samples_per_class = self.num_samples_per_class_train
            num_train_samples_per_class = FLAGS.inner_update_batch_size_train
            path_to_info_dict = self.split_to_path_to_info_dict['train']
            miniimagenet_path_to_info_dict = self.split_to_path_to_info_dict[
                'miniimagenet_train']
            print('Setting up tasks for meta-training')
        else:
            mode = FLAGS.mv_mode
            if mode == 'encenc':
                raise NotImplementedError
            num_tasks = FLAGS.num_eval_tasks
            if FLAGS.test_set:
                path_to_info_dict = self.split_to_path_to_info_dict['test']
            else:
                path_to_info_dict = self.split_to_path_to_info_dict['val']
            num_classes = self.num_classes_val
            num_samples_per_class = self.num_samples_per_class_val
            num_train_samples_per_class = FLAGS.inner_update_batch_size_val
            print('Setting up tasks for meta-val')

        task_generator = TaskGenerator(
            num_classes=num_classes,
            num_train_samples_per_class=num_train_samples_per_class,
            num_samples_per_class=num_samples_per_class)
        partition_algorithm = FLAGS.partition_algorithm
        margin = FLAGS.margin

        file_paths = list(path_to_info_dict.keys())
        file_path_to_ind = {
            file_path: ind
            for ind, file_path in enumerate(file_paths)
        }

        # create partitions
        partitions = []
        if not train or not FLAGS.miniimagenet_only or mode == 'semi':
            num_partitions = len([
                key for key in list(path_to_info_dict[file_paths[0]].keys())
                if 'cluster_ind' in key
            ]) if mode == 'encenc' else 1
            for i in tqdm(range(num_partitions)):
                partition = defaultdict(list)
                class_ind_key = {
                    'encenc': 'cluster_ind{}'.format(i),
                    'semi': 'cluster_ind{}'.format(i),
                    'gtgt': 'class_ind'
                }[mode]
                for file_path, info in tqdm(path_to_info_dict.items()):
                    partition[info[class_ind_key]].append(
                        file_path_to_ind[file_path])
                partition = task_generator.clean_partition(partition)
                partitions.append(partition)
        if train and (FLAGS.miniimagenet_only or mode == 'semi'):
            partition = defaultdict(list)
            class_ind_key = {'semi': 'class_ind', 'gtgt': 'class_ind'}[mode]
            for file_path, info in tqdm(
                    miniimagenet_path_to_info_dict.items()):
                partition[info[class_ind_key]].append(
                    file_path_to_ind[file_path])
            for cls, indices in partition.items():
                partition[cls] = indices[:600]
            partitions.append(partition)
        print('Number of partitions: {}'.format(len(partitions)))
        print('Average number of clusters/classes: {}'.format(
            np.mean([len(partition.keys()) for partition in partitions])))

        def sample_task():
            if mode == 'semi':
                assert len(partitions) == 2
                assert 0 <= FLAGS.p_gtgt <= 1
                p = [1 - FLAGS.p_gtgt, FLAGS.p_gtgt]
            else:
                p = None
            while True:
                i = np.random.choice(len(partitions), replace=False, p=p)
                train_ind, train_labels, test_ind, test_labels = task_generator.get_task(
                    partition=partitions[i])
                train_ind, train_labels, test_ind, test_labels = np.array(train_ind), np.array(train_labels), \
                                                                 np.array(test_ind), np.array(test_labels)
                yield train_ind, train_labels, test_ind, test_labels

        def make_dict(train_ind, train_labels, test_ind, test_labels):
            return {
                "train_indices": train_ind,
                "train_labels": train_labels,
                "test_indices": test_ind,
                "test_labels": test_labels
            }

        def preprocess_image(file_path):
            image_string = tf.read_file(file_path)
            image = tf.image.decode_jpeg(image_string, channels=3)
            image_processed = tf.cast(tf.reshape(image, [self.dim_input]),
                                      tf.float32) / 255.0
            return image_processed

        def preprocess_feature(file_path):
            return tf.py_func(
                lambda file_path: np.load(file_path.decode('utf-8')),
                [file_path], tf.float32)

        preprocess_func = {
            'images_84x84': preprocess_image,
            'images_224x224': preprocess_image,
            'features': preprocess_feature
        }[FLAGS.input_type]
        ind_to_file_path_ph = tf.placeholder_with_default(
            file_paths, shape=len(file_paths))

        def gather_preprocess(task):
            for split in ['train', 'test']:
                task['{}_labels'.format(split)] = tf.one_hot(
                    task['{}_labels'.format(split)], num_classes)
                task['{}_inputs'.format(split)] = tf.map_fn(
                    fn=preprocess_func,
                    dtype=tf.float32,
                    elems=tf.gather(ind_to_file_path_ph,
                                    task['{}_indices'.format(split)]))
            return task

        def stack(task):
            inputs = tf.concat((task['train_inputs'], task['test_inputs']),
                               axis=0)
            labels = tf.concat((task['train_labels'], task['test_labels']),
                               axis=0)
            return inputs, labels

        #
        # tasks = task_generator.get_tasks(num_tasks=num_tasks, partitions=partitions)
        # train_ind, train_labels, test_ind, test_labels = zip(*tasks)
        #
        # train_ind, train_labels, test_ind, test_labels = np.array(train_ind), np.array(train_labels), \
        #                                                  np.array(test_ind), np.array(test_labels)
        # train_ind_ph = tf.placeholder(dtype=tf.int64, shape=train_ind.shape)
        # train_labels_ph = tf.placeholder(dtype=tf.int64, shape=train_labels.shape)
        # test_ind_ph = tf.placeholder(dtype=tf.int64, shape=test_ind.shape)
        # test_labels_ph = tf.placeholder(dtype=tf.int64, shape=test_labels.shape)
        # dataset = tf.data.Dataset.from_tensor_slices(
        #     {"train_indices": train_ind_ph, "train_labels": train_labels_ph,
        #      "test_indices": test_ind_ph, "test_labels": test_labels_ph})
        # dataset = dataset.map(map_func=gather_preprocess, num_parallel_calls=FLAGS.num_parallel_calls)
        # dataset = dataset.map(map_func=stack, num_parallel_calls=FLAGS.num_parallel_calls)
        # dataset = dataset.batch(batch_size=self.batch_size)
        # dataset = dataset.prefetch(4)
        # dataset = dataset.repeat()
        # iterator = dataset.make_initializable_iterator()
        # inputs_batch, labels_batch = iterator.get_next()
        #
        # # sess = tf.InteractiveSession()
        # iterator.initializer.run(feed_dict={train_ind_ph: train_ind,
        #                                     train_labels_ph: train_labels,
        #                                     test_ind_ph: test_ind,
        #                                     test_labels_ph: test_labels})

        dataset = tf.data.Dataset.from_generator(
            sample_task, output_types=(tf.int64, tf.int64, tf.int64, tf.int64))
        dataset = dataset.map(map_func=make_dict, num_parallel_calls=1)
        dataset = dataset.map(map_func=gather_preprocess,
                              num_parallel_calls=FLAGS.num_parallel_calls)
        dataset = dataset.map(map_func=stack,
                              num_parallel_calls=FLAGS.num_parallel_calls)
        dataset = dataset.batch(batch_size=self.batch_size)
        dataset = dataset.prefetch(4)
        dataset = dataset.repeat()
        iterator = dataset.make_one_shot_iterator()
        inputs_batch, labels_batch = iterator.get_next()

        return inputs_batch, labels_batch