Exemple #1
0
    def make_data_list(self, data_type='train'):
        if data_type=='train':
            folders = self.metatrain_character_folders
            num_total_batches = 80000
        elif data_type=='test':
            folders = self.metatest_character_folders
            num_total_batches = 600
        elif data_type=='val':
            folders = self.metaval_character_folders
            num_total_batches = 600
        else:
            print('Please check data list type')

        if not os.path.exists(self.this_setting_filename_dir+'/' + data_type + '_filenames.npy'):
            print('Generating ' + data_type + ' filenames')
            all_filenames = []
            for _ in trange(num_total_batches):
                sampled_character_folders = random.sample(folders, self.num_classes)
                random.shuffle(sampled_character_folders)
                labels_and_images = get_images(sampled_character_folders, range(self.num_classes), nb_samples=self.num_samples_per_class, shuffle=False)
                # make sure the above isn't randomized order
                labels = [li[0] for li in labels_and_images]
                filenames = [li[1] for li in labels_and_images]
                all_filenames.extend(filenames)
            np.save(self.this_setting_filename_dir+'/' + data_type + '_labels.npy', labels)
            np.save(self.this_setting_filename_dir+'/' + data_type + '_filenames.npy', all_filenames)
            print('The ' + data_type + ' filename and label lists are saved')
        else:
            print('The ' + data_type + ' filename and label lists have already been created')
    def generate_data(self, data_type='train'):
        if data_type == 'train':
            metatrain_folder = FLAGS.metatrain_dir
            folders = [os.path.join(metatrain_folder, label) \
                for label in os.listdir(metatrain_folder) \
                if os.path.isdir(os.path.join(metatrain_folder, label)) \
                ]
            num_total_batches = FLAGS.metatrain_iterations * FLAGS.meta_batch_size + 10
            num_samples_per_class = FLAGS.shot_num + FLAGS.metatrain_epite_sample_num

        elif data_type == 'test':
            metatest_folder = FLAGS.metatest_dir
            folders = [os.path.join(metatest_folder, label) \
                for label in os.listdir(metatest_folder) \
                if os.path.isdir(os.path.join(metatest_folder, label)) \
                ]
            num_total_batches = 600
            if FLAGS.metatest_epite_sample_num == 0:
                num_samples_per_class = FLAGS.shot_num * 2
            else:
                num_samples_per_class = FLAGS.shot_num + FLAGS.metatest_epite_sample_num
        elif data_type == 'val':
            metaval_folder = FLAGS.metaval_dir
            folders = [os.path.join(metaval_folder, label) \
                for label in os.listdir(metaval_folder) \
                if os.path.isdir(os.path.join(metaval_folder, label)) \
                ]
            num_total_batches = 600
            if FLAGS.metatest_epite_sample_num == 0:
                num_samples_per_class = FLAGS.shot_num * 2
            else:
                num_samples_per_class = FLAGS.shot_num + FLAGS.metatest_epite_sample_num
        else:
            print('[Error] Please check data list type')

        task_num = FLAGS.way_num * num_samples_per_class
        epitr_sample_num = FLAGS.shot_num

        if not os.path.exists(self.this_setting_filename_dir + '/' +
                              data_type + '_data.npy'):
            print('Generating ' + data_type + ' data')
            data_list = []
            for epi_idx in trange(num_total_batches):
                sampled_character_folders = random.sample(
                    folders, FLAGS.way_num)
                random.shuffle(sampled_character_folders)
                labels_and_images = get_images(sampled_character_folders, \
                    range(FLAGS.way_num), nb_samples=num_samples_per_class, shuffle=False)
                labels = [li[0] for li in labels_and_images]
                filenames = [li[1] for li in labels_and_images]
                this_task_tr_filenames = []
                this_task_tr_labels = []
                this_task_te_filenames = []
                this_task_te_labels = []
                for class_idx in range(FLAGS.way_num):
                    this_class_filenames = filenames[class_idx *
                                                     num_samples_per_class:
                                                     (class_idx + 1) *
                                                     num_samples_per_class]
                    this_class_label = labels[class_idx *
                                              num_samples_per_class:
                                              (class_idx + 1) *
                                              num_samples_per_class]
                    this_task_tr_filenames += this_class_filenames[
                        0:epitr_sample_num]
                    this_task_tr_labels += this_class_label[0:epitr_sample_num]
                    this_task_te_filenames += this_class_filenames[
                        epitr_sample_num:]
                    this_task_te_labels += this_class_label[epitr_sample_num:]

                this_batch_data = {'filenamea': this_task_tr_filenames, 'filenameb': this_task_te_filenames, 'labela': this_task_tr_labels, \
                    'labelb': this_task_te_labels}
                data_list.append(this_batch_data)

            np.save(
                self.this_setting_filename_dir + '/' + data_type + '_data.npy',
                data_list)
            print('The ' + data_type + ' data is saved')
        else:
            print('The ' + data_type + ' data has already been created')
    def make_test_distractors_list(self, total_samples_per_class=100):
        un_folder = self.metatest_unlabeled_folder
        num_total_batches = 600

        label_names = np.load(self.this_setting_filename_dir +
                              '/test_labelnames.npy')
        unlabeled_dir = self.this_setting_filename_dir + '/unlabeled_samples_for_test/distracting_file'
        if not os.path.exists(unlabeled_dir):
            os.mkdir(unlabeled_dir)

        if os.path.exists(self.this_setting_filename_dir + '/test_labelnames.npy') and not \
                os.path.exists(unlabeled_dir + '/test_0_dis_filenames.npy'):
            print('Generating test distracting unlabeled filenames')

            base_list = os.listdir(un_folder)

            dis_dict = {}

            for n in range(7):
                dis_dict[n] = []

            def return_dis_labels(list_1, list_2, list_3):
                item = random.sample(list_1, 1)[0]
                if (item not in list_2) and (item not in list_3):
                    return item
                else:
                    return return_dis_labels(list_1, list_2, list_3)

            for i in trange(num_total_batches):

                task_label_list = label_names[i]
                task_distractors_list = []

                for j in range(7):
                    distractors_label = return_dis_labels(
                        base_list, task_label_list, task_distractors_list)
                    task_distractors_list.append(distractors_label)
                    sampled_distractors_folders = [
                        (os.path.join(un_folder,
                                      distractors_label), distractors_label)
                    ]
                    labels_and_images = get_images(
                        sampled_distractors_folders,
                        range(1),
                        nb_samples=total_samples_per_class,
                        shuffle=False)
                    filenames = [
                        li[1].replace('/home/lxz/python_code', '')
                        for li in labels_and_images
                    ]
                    dis_dict[j].extend(filenames)

            for k in range(7):
                np.save(
                    unlabeled_dir + '/test_' + str(k + 1) +
                    '_dis_filenames.npy', dis_dict[k])

            print('The test distractors filename lists are saved')
        else:
            print(
                'The test distractors filename lists have already been created'
            )
    def make_distractors_list(self, data_type='train'):
        if data_type == 'train':
            un_folder = self.metatrain_unlabeled_folder
            num_total_batches = 80000
        elif data_type == 'val':
            un_folder = self.metaval_unlabeled_folder
            num_total_batches = 600
        else:
            print('Please check data list type')

        label_names = np.load(self.this_setting_filename_dir + '/' +
                              data_type + '_labelnames.npy')
        unlabeled_dir = self.this_setting_filename_dir + '/unlabeled_samples_for_' + data_type + '/distracting_file'
        if not os.path.exists(unlabeled_dir):
            os.mkdir(unlabeled_dir)

        if os.path.exists(self.this_setting_filename_dir + '/' + data_type + '_labelnames.npy') and not \
                os.path.exists(unlabeled_dir + '/' + data_type + '_' + str(self.num_unlabeled_samples) + '_dis_filenames.npy'):
            print('Generating ' + data_type +
                  ' distracting unlabeled filenames')

            all_distractors_filenames = []
            all_distractors_labels = []

            base_list = os.listdir(un_folder)

            def return_dis_labels(list_1, list_2, list_3):
                item = random.sample(list_1, 1)[0]
                if (item not in list_2) and (item not in list_3):
                    return item
                else:
                    return return_dis_labels(list_1, list_2, list_3)

            for i in trange(num_total_batches):

                task_label_list = label_names[i]
                task_distractors_list = []

                for j in range(FLAGS.num_dis):
                    distractors_label = return_dis_labels(
                        base_list, task_label_list, task_distractors_list)
                    task_distractors_list.append(distractors_label)

                sampled_distractors_folders = [
                    (os.path.join(un_folder, name), name)
                    for name in task_distractors_list
                ]
                labels_and_images = get_images(
                    sampled_distractors_folders,
                    range(FLAGS.num_dis),
                    nb_samples=self.num_unlabeled_samples,
                    shuffle=False)
                labels = [-1 for li in labels_and_images]
                filenames = [
                    li[1].replace('/home/lxz/python_code', '')
                    for li in labels_and_images
                ]
                all_distractors_filenames.extend(filenames)
                all_distractors_labels.extend(labels)

            np.save(
                unlabeled_dir + '/' + data_type + '_' +
                str(self.num_unlabeled_samples) + '_dis_filenames.npy',
                all_distractors_filenames)

            print('The ' + data_type + ' distractors filename lists are saved')
        else:
            print('The ' + data_type +
                  ' distractors filename lists have already been created')
    def make_unlabeled_test_data_list(self):
        un_folder = self.metatest_unlabeled_folder
        num_total_batches = 600

        label_names = np.load(self.this_setting_filename_dir +
                              '/test_labelnames.npy')

        unlabeled_dir = self.this_setting_filename_dir + '/unlabeled_samples_for_test'
        if not os.path.exists(unlabeled_dir):
            os.mkdir(unlabeled_dir)

        if os.path.exists(self.this_setting_filename_dir + '/test_filenames.npy') and not \
                os.path.exists(unlabeled_dir + '/test_0_un_filenames.npy'):
            print('Generating test unlabeled filenames')

            file_dict = {}
            label_dict = {}

            for m in range(10):
                file_dict[m] = []
                label_dict[m] = []

            for i in trange(num_total_batches):

                names_for_task = label_names[i]

                sampled_character_folders = [(os.path.join(un_folder,
                                                           name), name)
                                             for name in names_for_task]

                labels_and_images = get_images(sampled_character_folders,
                                               range(self.num_classes),
                                               nb_samples=100,
                                               shuffle=False)
                for j in range(10):
                    sub_labels_and_images = []
                    for k in range(len(names_for_task)):
                        sub_labels_and_images.extend(
                            labels_and_images[j * 10 + k * 100:(j + 1) * 10 +
                                              k * 100])
                    labels = [li[0] for li in sub_labels_and_images]
                    filenames = [
                        li[1].replace('/home/lxz/python_code', '')
                        for li in sub_labels_and_images
                    ]
                    file_dict[j].extend(filenames)
                    label_dict[j].extend(labels)

                # make sure the above isn't randomized order
            for n in range(10):
                np.save(unlabeled_dir + '/test_' + str(n) + '_un_labels.npy',
                        label_dict[n])
                np.save(
                    unlabeled_dir + '/test_' + str(n) + '_un_filenames.npy',
                    file_dict[n])

            print('The test unlabeded filename and label lists are saved')
        else:
            print(
                'The test unlabeled filename and label lists have already been created'
            )
    def make_unlabeled_data_list(self, data_type='train'):
        if data_type == 'train':
            un_folder = self.metatrain_unlabeled_folder
            num_total_batches = 80000
        elif data_type == 'val':
            un_folder = self.metaval_unlabeled_folder
            num_total_batches = 600
        else:
            print('Please check data list type')

        label_names = np.load(self.this_setting_filename_dir + '/' +
                              data_type + '_labelnames.npy')

        unlabeled_dir = self.this_setting_filename_dir + '/unlabeled_samples_for_' + data_type
        if not os.path.exists(unlabeled_dir):
            os.mkdir(unlabeled_dir)

        if os.path.exists(self.this_setting_filename_dir + '/' + data_type + '_filenames.npy') and not \
                os.path.exists(unlabeled_dir + '/' + data_type + '_' + str(
                    self.num_unlabeled_samples) + '_un_filenames.npy'):
            print('Generating ' + data_type + ' unlabeled filenames')
            all_unlabeled_filenames = []
            all_unlabeled_labels = []

            for i in trange(num_total_batches):
                names_for_task = label_names[i]

                sampled_character_folders = [(os.path.join(un_folder,
                                                           name), name)
                                             for name in names_for_task]

                labels_and_images = get_images(
                    sampled_character_folders,
                    range(self.num_classes),
                    nb_samples=self.num_unlabeled_samples,
                    shuffle=False)
                # make sure the above isn't randomized order
                labels = [li[0] for li in labels_and_images]
                filenames = [
                    li[1].replace('/home/lxz/python_code', '')
                    for li in labels_and_images
                ]
                all_unlabeled_filenames.extend(filenames)
                all_unlabeled_labels.extend(labels)

            np.save(
                unlabeled_dir + '/' + data_type + '_' +
                str(self.num_unlabeled_samples) + '_un_labels.npy',
                all_unlabeled_labels)
            np.save(
                unlabeled_dir + '/' + data_type + '_' +
                str(self.num_unlabeled_samples) + '_un_filenames.npy',
                all_unlabeled_filenames)

            print('The ' + data_type +
                  ' unlabeded filename and label lists are saved')
        else:
            print(
                'The ' + data_type +
                ' unlabeled filename and label lists have already been created'
            )