def generate_representor(data_dir, dictionary_path, subclass): vocabulary_dict = load_vocabulary(dictionary_path) shape_vocabulary = np.shape(vocabulary_dict[0]) vocabulary_size = shape_vocabulary[0] representers = [] patches, coding_labeles, labeles = extract_patches_multidir( data_dir, subclasses=[subclass], return_flag=True) for case_index, cur_patches in enumerate(patches): cur_case_representor = np.zeros([10, vocabulary_size]) patches_coding_labeles = {} for patch_index, cur_patch in enumerate(cur_patches): cur_coding_label = coding_labeles[case_index][patch_index] if cur_coding_label not in patches_coding_labeles.keys(): patches_coding_labeles[cur_coding_label] = [] patches_coding_labeles[cur_coding_label].append(cur_patch) for key in patches_coding_labeles.keys(): cur_patches_coding_label = patches_coding_labeles[key] cur_vocabulary = vocabulary_dict[key] distance_arr = cal_distance(cur_patches_coding_label, cur_vocabulary) for i in range(len(distance_arr)): min_index = np.argmin(distance_arr[i]) cur_case_representor[int(key), min_index] += 1 representers.append(cur_case_representor.flatten()) return representers, labeles
def generate_representor_version2(data_dir, dictionary_path, subclass, vectorizer=None): save_dir = '/home/give/PycharmProjects/ICPR2018/LeaningBased/BoVW-NGram/patches' save_dir = os.path.join(save_dir, subclass) paths = glob(os.path.join(save_dir, '*.npy')) kmeans_model = joblib.load(dictionary_path) shape_vocabulary = np.shape(kmeans_model.cluster_centers_) vocabulary_size = shape_vocabulary[0] if len(paths) == 0: patches, _, labeles = extract_patches_multidir(data_dir, subclasses=[subclass], return_flag=True, patch_size=3) else: patches = [] labeles = [] for path in paths: patches.append(np.load(path)) labeles.append(int(os.path.basename(path).split('.npy')[0].split('_')[1])) all_patches = [] counts = [] for case_index, cur_patches in enumerate(patches): # print np.shape(cur_patches) if len(paths) == 0: np.save(os.path.join(save_dir, str(case_index) + '_' + str(labeles[case_index])), cur_patches) all_patches.extend(cur_patches) counts.append(len(cur_patches)) print 'all patches shape are ', np.shape(all_patches) predicted_labels = kmeans_model.predict(all_patches) start = 0 strs = convert2str(predicted_labels, counts) if vectorizer is None: vectorizer = TfidfVectorizer(analyzer='char', min_df=1, ngram_range=(1, 3), use_idf=False, stop_words=None) vectorizer = vectorizer.fit(strs) crs_matrix = vectorizer.transform(strs).toarray() return np.asarray(crs_matrix, np.float32), labeles, vectorizer
def generate_representor(data_dir, dictionary_path, subclass, phase_name): dictionary = load_vocabulary(dictionary_path) shape_vocabulary = np.shape(dictionary) vocabulary_size = shape_vocabulary[0] representers = [] patches, coding_labeles, labeles = extract_patches_multidir( data_dir, subclasses=[subclass], return_flag=True, phase_name=phase_name) all_patches = [] counts = [] for case_index, cur_patches in enumerate(patches): print np.shape(cur_patches) all_patches.extend(cur_patches) counts.append(len(cur_patches)) all_distance_arr = cal_distance(all_patches, dictionary) start = 0 for case_index, count in enumerate(counts): distance_arr = all_distance_arr[start:start + count] cur_case_representor = np.zeros([1, vocabulary_size]) for i in range(len(distance_arr)): min_index = np.argmin(distance_arr[i]) cur_case_representor[0, min_index] += 1 representers.append(cur_case_representor.squeeze()) start += count return representers, labeles
def generate_representor_version2(data_dir, dictionary_path, subclass): kmeans_model = joblib.load(dictionary_path) shape_vocabulary = np.shape(kmeans_model.cluster_centers_) vocabulary_size = shape_vocabulary[0] representers = [] patches, coding_labeles, labeles = extract_patches_multidir(data_dir, subclasses=[subclass], return_flag=True) all_patches = [] counts = [] for case_index, cur_patches in enumerate(patches): print np.shape(cur_patches) all_patches.extend(cur_patches) counts.append(len(cur_patches)) predicted_labels = kmeans_model.predict(all_patches) start = 0 for case_index, count in enumerate(counts): cur_predicted_label = predicted_labels[start: start + count] representer = np.histogram(cur_predicted_label, bins=vocabulary_size, normed=True)[0] representers.append(np.array(representer).squeeze()) start += count return representers, labeles
def generate_representor(data_dir, dictionary_path, subclass): dictionary = load_vocabulary(dictionary_path) shape_vocabulary = np.shape(dictionary) vocabulary_size = shape_vocabulary[0] representers = [] patches, coding_labeles, labeles = extract_patches_multidir( data_dir, subclasses=[subclass], return_flag=True) all_patches = [] counts = [] for case_index, cur_patches in enumerate(patches): print np.shape(cur_patches) all_patches.extend(cur_patches) counts.append(len(cur_patches)) all_distance_arr = cal_distance(all_patches, dictionary) start = 0 for case_index, count in enumerate(counts): distance_arr = all_distance_arr[start:start + count] cur_case_representor = np.zeros([1, vocabulary_size]) for i in range(len(distance_arr)): min_index = np.argmin(distance_arr[i]) cur_case_representor[0, min_index] += 1 representers.append(cur_case_representor.squeeze()) start += count # patches_coding_labeles = {} # for patch_index, cur_patch in enumerate(cur_patches): # cur_coding_label = coding_labeles[case_index][patch_index] # if cur_coding_label not in patches_coding_labeles.keys(): # patches_coding_labeles[cur_coding_label] = [] # patches_coding_labeles[cur_coding_label].append(cur_patch) # for key in patches_coding_labeles.keys(): # cur_patches_coding_label = patches_coding_labeles[key] # cur_vocabulary = vocabulary_dict[key] # distance_arr = cal_distance(cur_patches_coding_label, cur_vocabulary) # for i in range(len(distance_arr)): # min_index = np.argmin(distance_arr[i]) # cur_case_representor[int(key), min_index] += 1 # representers.append(cur_case_representor.flatten()) return representers, labeles