def make_data_list(self, data_type='train'): if data_type=='train': folders = self.metatrain_character_folders num_total_batches = 80000 elif data_type=='test': folders = self.metatest_character_folders num_total_batches = 600 elif data_type=='val': folders = self.metaval_character_folders num_total_batches = 600 else: print('Please check data list type') if not os.path.exists(self.this_setting_filename_dir+'/' + data_type + '_filenames.npy'): print('Generating ' + data_type + ' filenames') all_filenames = [] for _ in trange(num_total_batches): sampled_character_folders = random.sample(folders, self.num_classes) random.shuffle(sampled_character_folders) labels_and_images = get_images(sampled_character_folders, range(self.num_classes), nb_samples=self.num_samples_per_class, shuffle=False) # make sure the above isn't randomized order labels = [li[0] for li in labels_and_images] filenames = [li[1] for li in labels_and_images] all_filenames.extend(filenames) np.save(self.this_setting_filename_dir+'/' + data_type + '_labels.npy', labels) np.save(self.this_setting_filename_dir+'/' + data_type + '_filenames.npy', all_filenames) print('The ' + data_type + ' filename and label lists are saved') else: print('The ' + data_type + ' filename and label lists have already been created')
def generate_data(self, data_type='train'): if data_type == 'train': metatrain_folder = FLAGS.metatrain_dir folders = [os.path.join(metatrain_folder, label) \ for label in os.listdir(metatrain_folder) \ if os.path.isdir(os.path.join(metatrain_folder, label)) \ ] num_total_batches = FLAGS.metatrain_iterations * FLAGS.meta_batch_size + 10 num_samples_per_class = FLAGS.shot_num + FLAGS.metatrain_epite_sample_num elif data_type == 'test': metatest_folder = FLAGS.metatest_dir folders = [os.path.join(metatest_folder, label) \ for label in os.listdir(metatest_folder) \ if os.path.isdir(os.path.join(metatest_folder, label)) \ ] num_total_batches = 600 if FLAGS.metatest_epite_sample_num == 0: num_samples_per_class = FLAGS.shot_num * 2 else: num_samples_per_class = FLAGS.shot_num + FLAGS.metatest_epite_sample_num elif data_type == 'val': metaval_folder = FLAGS.metaval_dir folders = [os.path.join(metaval_folder, label) \ for label in os.listdir(metaval_folder) \ if os.path.isdir(os.path.join(metaval_folder, label)) \ ] num_total_batches = 600 if FLAGS.metatest_epite_sample_num == 0: num_samples_per_class = FLAGS.shot_num * 2 else: num_samples_per_class = FLAGS.shot_num + FLAGS.metatest_epite_sample_num else: print('[Error] Please check data list type') task_num = FLAGS.way_num * num_samples_per_class epitr_sample_num = FLAGS.shot_num if not os.path.exists(self.this_setting_filename_dir + '/' + data_type + '_data.npy'): print('Generating ' + data_type + ' data') data_list = [] for epi_idx in trange(num_total_batches): sampled_character_folders = random.sample( folders, FLAGS.way_num) random.shuffle(sampled_character_folders) labels_and_images = get_images(sampled_character_folders, \ range(FLAGS.way_num), nb_samples=num_samples_per_class, shuffle=False) labels = [li[0] for li in labels_and_images] filenames = [li[1] for li in labels_and_images] this_task_tr_filenames = [] this_task_tr_labels = [] this_task_te_filenames = [] this_task_te_labels = [] for class_idx in range(FLAGS.way_num): this_class_filenames = filenames[class_idx * num_samples_per_class: (class_idx + 1) * num_samples_per_class] this_class_label = labels[class_idx * num_samples_per_class: (class_idx + 1) * num_samples_per_class] this_task_tr_filenames += this_class_filenames[ 0:epitr_sample_num] this_task_tr_labels += this_class_label[0:epitr_sample_num] this_task_te_filenames += this_class_filenames[ epitr_sample_num:] this_task_te_labels += this_class_label[epitr_sample_num:] this_batch_data = {'filenamea': this_task_tr_filenames, 'filenameb': this_task_te_filenames, 'labela': this_task_tr_labels, \ 'labelb': this_task_te_labels} data_list.append(this_batch_data) np.save( self.this_setting_filename_dir + '/' + data_type + '_data.npy', data_list) print('The ' + data_type + ' data is saved') else: print('The ' + data_type + ' data has already been created')
def make_test_distractors_list(self, total_samples_per_class=100): un_folder = self.metatest_unlabeled_folder num_total_batches = 600 label_names = np.load(self.this_setting_filename_dir + '/test_labelnames.npy') unlabeled_dir = self.this_setting_filename_dir + '/unlabeled_samples_for_test/distracting_file' if not os.path.exists(unlabeled_dir): os.mkdir(unlabeled_dir) if os.path.exists(self.this_setting_filename_dir + '/test_labelnames.npy') and not \ os.path.exists(unlabeled_dir + '/test_0_dis_filenames.npy'): print('Generating test distracting unlabeled filenames') base_list = os.listdir(un_folder) dis_dict = {} for n in range(7): dis_dict[n] = [] def return_dis_labels(list_1, list_2, list_3): item = random.sample(list_1, 1)[0] if (item not in list_2) and (item not in list_3): return item else: return return_dis_labels(list_1, list_2, list_3) for i in trange(num_total_batches): task_label_list = label_names[i] task_distractors_list = [] for j in range(7): distractors_label = return_dis_labels( base_list, task_label_list, task_distractors_list) task_distractors_list.append(distractors_label) sampled_distractors_folders = [ (os.path.join(un_folder, distractors_label), distractors_label) ] labels_and_images = get_images( sampled_distractors_folders, range(1), nb_samples=total_samples_per_class, shuffle=False) filenames = [ li[1].replace('/home/lxz/python_code', '') for li in labels_and_images ] dis_dict[j].extend(filenames) for k in range(7): np.save( unlabeled_dir + '/test_' + str(k + 1) + '_dis_filenames.npy', dis_dict[k]) print('The test distractors filename lists are saved') else: print( 'The test distractors filename lists have already been created' )
def make_distractors_list(self, data_type='train'): if data_type == 'train': un_folder = self.metatrain_unlabeled_folder num_total_batches = 80000 elif data_type == 'val': un_folder = self.metaval_unlabeled_folder num_total_batches = 600 else: print('Please check data list type') label_names = np.load(self.this_setting_filename_dir + '/' + data_type + '_labelnames.npy') unlabeled_dir = self.this_setting_filename_dir + '/unlabeled_samples_for_' + data_type + '/distracting_file' if not os.path.exists(unlabeled_dir): os.mkdir(unlabeled_dir) if os.path.exists(self.this_setting_filename_dir + '/' + data_type + '_labelnames.npy') and not \ os.path.exists(unlabeled_dir + '/' + data_type + '_' + str(self.num_unlabeled_samples) + '_dis_filenames.npy'): print('Generating ' + data_type + ' distracting unlabeled filenames') all_distractors_filenames = [] all_distractors_labels = [] base_list = os.listdir(un_folder) def return_dis_labels(list_1, list_2, list_3): item = random.sample(list_1, 1)[0] if (item not in list_2) and (item not in list_3): return item else: return return_dis_labels(list_1, list_2, list_3) for i in trange(num_total_batches): task_label_list = label_names[i] task_distractors_list = [] for j in range(FLAGS.num_dis): distractors_label = return_dis_labels( base_list, task_label_list, task_distractors_list) task_distractors_list.append(distractors_label) sampled_distractors_folders = [ (os.path.join(un_folder, name), name) for name in task_distractors_list ] labels_and_images = get_images( sampled_distractors_folders, range(FLAGS.num_dis), nb_samples=self.num_unlabeled_samples, shuffle=False) labels = [-1 for li in labels_and_images] filenames = [ li[1].replace('/home/lxz/python_code', '') for li in labels_and_images ] all_distractors_filenames.extend(filenames) all_distractors_labels.extend(labels) np.save( unlabeled_dir + '/' + data_type + '_' + str(self.num_unlabeled_samples) + '_dis_filenames.npy', all_distractors_filenames) print('The ' + data_type + ' distractors filename lists are saved') else: print('The ' + data_type + ' distractors filename lists have already been created')
def make_unlabeled_test_data_list(self): un_folder = self.metatest_unlabeled_folder num_total_batches = 600 label_names = np.load(self.this_setting_filename_dir + '/test_labelnames.npy') unlabeled_dir = self.this_setting_filename_dir + '/unlabeled_samples_for_test' if not os.path.exists(unlabeled_dir): os.mkdir(unlabeled_dir) if os.path.exists(self.this_setting_filename_dir + '/test_filenames.npy') and not \ os.path.exists(unlabeled_dir + '/test_0_un_filenames.npy'): print('Generating test unlabeled filenames') file_dict = {} label_dict = {} for m in range(10): file_dict[m] = [] label_dict[m] = [] for i in trange(num_total_batches): names_for_task = label_names[i] sampled_character_folders = [(os.path.join(un_folder, name), name) for name in names_for_task] labels_and_images = get_images(sampled_character_folders, range(self.num_classes), nb_samples=100, shuffle=False) for j in range(10): sub_labels_and_images = [] for k in range(len(names_for_task)): sub_labels_and_images.extend( labels_and_images[j * 10 + k * 100:(j + 1) * 10 + k * 100]) labels = [li[0] for li in sub_labels_and_images] filenames = [ li[1].replace('/home/lxz/python_code', '') for li in sub_labels_and_images ] file_dict[j].extend(filenames) label_dict[j].extend(labels) # make sure the above isn't randomized order for n in range(10): np.save(unlabeled_dir + '/test_' + str(n) + '_un_labels.npy', label_dict[n]) np.save( unlabeled_dir + '/test_' + str(n) + '_un_filenames.npy', file_dict[n]) print('The test unlabeded filename and label lists are saved') else: print( 'The test unlabeled filename and label lists have already been created' )
def make_unlabeled_data_list(self, data_type='train'): if data_type == 'train': un_folder = self.metatrain_unlabeled_folder num_total_batches = 80000 elif data_type == 'val': un_folder = self.metaval_unlabeled_folder num_total_batches = 600 else: print('Please check data list type') label_names = np.load(self.this_setting_filename_dir + '/' + data_type + '_labelnames.npy') unlabeled_dir = self.this_setting_filename_dir + '/unlabeled_samples_for_' + data_type if not os.path.exists(unlabeled_dir): os.mkdir(unlabeled_dir) if os.path.exists(self.this_setting_filename_dir + '/' + data_type + '_filenames.npy') and not \ os.path.exists(unlabeled_dir + '/' + data_type + '_' + str( self.num_unlabeled_samples) + '_un_filenames.npy'): print('Generating ' + data_type + ' unlabeled filenames') all_unlabeled_filenames = [] all_unlabeled_labels = [] for i in trange(num_total_batches): names_for_task = label_names[i] sampled_character_folders = [(os.path.join(un_folder, name), name) for name in names_for_task] labels_and_images = get_images( sampled_character_folders, range(self.num_classes), nb_samples=self.num_unlabeled_samples, shuffle=False) # make sure the above isn't randomized order labels = [li[0] for li in labels_and_images] filenames = [ li[1].replace('/home/lxz/python_code', '') for li in labels_and_images ] all_unlabeled_filenames.extend(filenames) all_unlabeled_labels.extend(labels) np.save( unlabeled_dir + '/' + data_type + '_' + str(self.num_unlabeled_samples) + '_un_labels.npy', all_unlabeled_labels) np.save( unlabeled_dir + '/' + data_type + '_' + str(self.num_unlabeled_samples) + '_un_filenames.npy', all_unlabeled_filenames) print('The ' + data_type + ' unlabeded filename and label lists are saved') else: print( 'The ' + data_type + ' unlabeled filename and label lists have already been created' )