def __init__(self, mode='train'): self.database = DBManagement() # Get caption self.descriptions = utils.read_caption_clean_file( 'Flickr8k_text/Flickr8k.cleaned.lemma.token.txt') self.idxtoword, self.wordtoidx, self.vocab_size = utils.map_w2id( self.descriptions.values()) self.max_len = utils.calculate_caption_max_len( self.descriptions.values()) if mode not in ('train', 'val', 'test'): raise ValueError() self.mode = mode # choose data generator mode if self.mode == 'train': """Call train image vector""" self.imgs = self.database.get_image_data_from_list( 'Flickr8k_text/Flickr_8k.trainImages.txt') elif self.mode == 'val': """Call val image vector""" self.imgs = self.database.get_image_data_from_list( 'Flickr8k_text/Flickr_8k.devImages.txt') if self.mode == 'test': """Call test image vector""" self.imgs = self.database.get_image_data_from_list( 'Flickr8k_text/Flickr_8k.testImages.txt')
class FaceSearch: def __init__(self, dims=128, metric='euclidean'): """ Everytime turn on, load all embedding vector into a tree. With 76 images, building time is 0.001s """ self.dims = dims self.metric = metric self.feature_index = AnnoyIndex(self.dims, metric=self.metric) self.db = DBManagement() self.extractFeature = ExtractFeature() self._build_tree() def _build_tree(self, n_trees=18): # FIXME: Build annoy tree here. """ n_trees: parameter should be as twice as much the items """ v_arr = self.db.get_features() for index, vector in enumerate(v_arr): self.feature_index.add_item(index, vector) #n_trees = (index + 1) * 2 self.feature_index.build(n_trees) def save_tree(self, path): self.feature_index.save(path) def load_tree(self, path): """ Can't use this yet """ self.feature_index = AnnoyIndex(self.dims) self.feature_index.load(path) def search_index_by_vector(self, image, top_n=1): # FIXME: Return the clusters and distance data = {'success': False} try: vector = self.extractFeature.extract_feature_insight_face(image) #print('Vector: ', vector.shape) distances = self.feature_index.get_nns_by_vector( vector, top_n, include_distances=True) ids = self.db.get_ids() results = [{ 'id': ids[a], 'distance': distances[1][i] } for i, a in enumerate(distances[0])] """ If closest distance is beq to config.face_threshold then it failed """ if results[0]['distance'] >= config.face_threshold: results[0]['id'] = 'unknown' data['success'] = True data['results'] = results return data except: return data
def __init__(self, dims = 128, metric = 'euclidean'): """ Everytime turn on, load all embedding vector into a tree. With 76 images, building time is 0.001s """ self.dims = dims self.metric = metric self.feature_index = AnnoyIndex(self.dims, metric=self.metric) self.db = DBManagement() self.extractFeature = ExtractFeature() self._build_tree()
class ImageSearch: def __init__(self, dims = 128, metric = 'euclidean'): """ Everytime turn on, load all embedding vector into a tree. With 76 images, building time is 0.001s """ self.dims = dims self.metric = metric self.feature_index = AnnoyIndex(self.dims, metric=self.metric) self.db = DBManagement() self.extractFeature = ExtractFeature() self._build_tree() def _build_tree(self): v_arr = self.db.get_features() for index, vector in enumerate(v_arr): self.feature_index.add_item(index, vector) n_trees = (index + 1) * 2 self.feature_index.build(n_trees) def save_tree(self, path): self.feature_index.save(path) def load_tree(self, path): self.feature_index = AnnoyIndex(self.dims) self.feature_index.load(path) def search_index_by_vector(self, image, top_n=10): # FIXME: Return the clusters and distance data = {'success': False} try: vector = self.extractFeature.extract_feature_insight_face(image) #print('Vector: ', vector.shape) distances = self.feature_index.get_nns_by_vector( vector, top_n, include_distances=True ) ids = self.db.get_clusters() results = [{'id_cluster': ids[a], 'distance': distances[1][i]} for i, a in enumerate(distances[0])] """ If closest distance is beq to 1 then it failed """ if results[0]['distance'] >= 1: return data else: data['success'] = True data['results'] = results return data except: return None
def load_model(self): self.db = DBManagement() # Initialize our lists of extracted facial embeddings and corresponding people names self.features = [] self.ids = [] #self.genders = [] #self.ages = [] #self.clusters = [] #self.fileNames = [] parser = argparse.ArgumentParser() parser.add_argument('--image-size', default='112,112', help='') parser.add_argument( '--model', default='insightface/models/model-y1-test2/model,0', help='path to load model.') parser.add_argument('--ga-model', default='insightface/models/gamodel-r50/model,0', help='path to load model.') parser.add_argument('--gpu', default=0, type=int, help='gpu id') parser.add_argument( '--det', default=0, type=int, help='mtcnn option, 1 means using R+O, 0 means detect from begining' ) parser.add_argument('--flip', default=0, type=int, help='whether do lr flip aug') parser.add_argument('--threshold', default=1.24, type=float, help='ver dist threshold') args = parser.parse_args() # Initialize the faces embedder self.model = face_model.FaceModel(args)
class ExtractFeature(): def __init__(self): self.load_model() def load_model(self): self.db = DBManagement() # Initialize our lists of extracted facial embeddings and corresponding people names self.features = [] self.ids = [] #self.genders = [] #self.ages = [] #self.clusters = [] #self.fileNames = [] parser = argparse.ArgumentParser() parser.add_argument('--image-size', default='112,112', help='') parser.add_argument( '--model', default='insightface/models/model-y1-test2/model,0', help='path to load model.') parser.add_argument('--ga-model', default='insightface/models/gamodel-r50/model,0', help='path to load model.') parser.add_argument('--gpu', default=0, type=int, help='gpu id') parser.add_argument( '--det', default=0, type=int, help='mtcnn option, 1 means using R+O, 0 means detect from begining' ) parser.add_argument('--flip', default=0, type=int, help='whether do lr flip aug') parser.add_argument('--threshold', default=1.24, type=float, help='ver dist threshold') args = parser.parse_args() # Initialize the faces embedder self.model = face_model.FaceModel(args) def extract_feature_insight_faces(self, imagePath): for classname in os.listdir(imagePath): total = 0 for filename in os.listdir(os.path.join(imagePath, classname)): path = os.path.join(imagePath, classname, filename) #print(path) image = cv2.imread(path) face = self.model.get_input(image) if face is None: print('No face detected\n') continue embedding = self.model.get_feature(face) id = '{}_{}'.format(classname, total) self.ids.append(id) self.features.append(embedding) total += 1 print("[extract_feature_insight_face]: Extract face ", id) print(total, " faces embedded") # save to output self.db.save_data(self.features, self.ids) def extract_feature_insight_face(self, image): face = self.model.get_input(image) if face is None: return embedding = self.model.get_feature(face) return embedding
def dump_data(): db = DBManagement() db.save_image_data(processed_img)
class DataGenerator(Sequence): def __init__(self, mode='train'): self.database = DBManagement() # Get caption self.descriptions = utils.read_caption_clean_file( 'Flickr8k_text/Flickr8k.cleaned.lemma.token.txt') self.idxtoword, self.wordtoidx, self.vocab_size = utils.map_w2id( self.descriptions.values()) self.max_len = utils.calculate_caption_max_len( self.descriptions.values()) if mode not in ('train', 'val', 'test'): raise ValueError() self.mode = mode # choose data generator mode if self.mode == 'train': """Call train image vector""" self.imgs = self.database.get_image_data_from_list( 'Flickr8k_text/Flickr_8k.trainImages.txt') elif self.mode == 'val': """Call val image vector""" self.imgs = self.database.get_image_data_from_list( 'Flickr8k_text/Flickr_8k.devImages.txt') if self.mode == 'test': """Call test image vector""" self.imgs = self.database.get_image_data_from_list( 'Flickr8k_text/Flickr_8k.testImages.txt') #### Test purpose #### # self.imgs = self.database.get_image_data() def __len__(self): return len(self.imgs) // cf.batch_size def __getitem__(self, idx): X1, X2, y = list(), list(), list() img_dict = dict( (k, v) for (k, v) in self.imgs.items() if k in list(self.imgs.keys())[idx * cf.batch_size:(idx + 1) * cf.batch_size]) # print('\n{}.Batch size: {}\n'.format(idx,len(img_dict))) for k, v in img_dict.items(): desc_list = self.descriptions[k.split('.')[0]] ### Debug ### # print("Length of feature vector: {} of {}\n".format(len(v), k)) ############## for desc in desc_list: seq = [self.wordtoidx[word] for word in desc.split(' ')] for i in range(1, len(seq)): # split into input and output pair in_seq, out_seq = seq[:i], seq[i] # pad input sequence in_seq = pad_sequences([in_seq], maxlen=self.max_len)[0] # encode output sequence out_seq = to_categorical([out_seq], num_classes=self.vocab_size)[0] # store X1.append(v) X2.append(in_seq) y.append(out_seq) return [[np.array(X1), np.array(X2)], np.array(y)]