예제 #1
0
 def get_wordlist():
     words = {}
     with db.session_scope() as session:
         for word in session.query(db.Word):
             descriptor = np.frombuffer(word.descriptor, dtype=np.float32)
             words[word.id - 1] = descriptor
     return words
예제 #2
0
 def _create_bag_of_words_for_image(self, image_id, size, wordlist):
     with db.session_scope() as session:
         image = session.query(db.Image).get(image_id)
         method, width, height, features = load_features(
             self._get_feature_file(image.path))
         if method != self.method:
             raise RuntimeError(
                 f"dataset uses '{feature_name(self.method)}' method but "
                 f"features were extracted with '{feature_name(method)}' "
                 "method")
         words = self._classify_descriptors(
             features['descriptor'], wordlist=wordlist)
         feature_tree = feature_rtree(features)
         word_histogram = partial(
             self._word_histogram, size, words, feature_tree, width, height)
         # total image
         self._insert_bag_of_words(session, word_histogram, image, 1, 0, 0)
         # rule of 3rds
         for i in range(3):
             for j in range(3):
                 self._insert_bag_of_words(
                     session, word_histogram, image, 3, i, j)
         # rule of 5ths
         for i in range(5):
             for j in range(5):
                 self._insert_bag_of_words(
                     session, word_histogram, image, 5, i, j)
         image.has_words = True
예제 #3
0
 def get_keyword_counts():
     keywords = {}
     with db.session_scope() as session:
         for keyword in session.query(db.Keyword):
             keywords[keyword.name] = len(keyword.images)
     return {k: v for (k,v) in
             sorted(keywords.items(),
                    key=operator.itemgetter(1), reverse=True)}
예제 #4
0
 def get_keywords_for_image(self, image):
     image = self.relative_path(image)
     with db.session_scope() as session:
         image = session.query(db.Image).filter(
             db.Image.path == str(image)).one_or_none()
         if image:
             return set([match.keyword.name for match in image.keywords])
         return set()
예제 #5
0
 def __init__(self, path=None):
     if path is None:
         path = find_dataset(Path.cwd())
     self._path = path
     db.connect(path / _DATASET_DIR / db._DATABASE_NAME)
     with db.session_scope() as session:
         extractor_id = int(session.query(db.KeyValue.value).filter(
             db.KeyValue.key == 'method').first()[0])
         self._feature_extractor = create_feature_extractor(extractor_id)
예제 #6
0
 def index_keywords(self, *, progress=None):
     with db.session_scope() as session:
         image_ids = flatten(session.query(db.Image.id).filter(
             ~db.Image.has_keywords).all())
     keywords = self.get_keywords(id=True)
     if len(keywords) == 0:
         return False
     keywords_tree = self._get_keywords_rtree()
     for id in get_progress(progress)(image_ids):
         self._index_image_keywords(id, keywords_tree)
예제 #7
0
 def index_words(self, *, progress=None):
     with db.session_scope() as session:
         image_ids = flatten(session.query(db.Image.id).filter(
             ~db.Image.has_words).all())
     num_words = len(self.get_wordlist())
     if num_words == 0:
         raise RuntimeError('no wordlist set for dataset')
     wordlist = self._get_wordlist_rtree()
     for id in get_progress(progress)(image_ids):
         self._create_bag_of_words_for_image(id, num_words, wordlist)
예제 #8
0
 def _move_image_into(self, image_id, dest_dir):
     dest_dir = self.absolute_path(dest_dir)
     with db.session_scope() as session:
         image = session.query(db.Image).get(image_id)
         image_path = self.absolute_path(image.path)
         dest_dir.mkdir(parents=True, exist_ok=True)
         image_path.rename(dest_dir / image_path.name)
         self._get_feature_file(image_path).rename(
             self._get_feature_file(dest_dir / image_path.name))
         image.path = str(self.relative_path(dest_dir / image_path.name))
예제 #9
0
 def get_keywords(id=False):
     keywords = {}
     with db.session_scope() as session:
         for keyword in session.query(db.Keyword):
             themes = [np.frombuffer(theme.word_histogram, dtype=np.float32)
                       for theme in keyword.themes]
             if id:
                 keywords[keyword.id] = np.vstack(themes)
             else:
                 keywords[keyword.name] = np.vstack(themes)
     return keywords
예제 #10
0
 def get_images_from_keyword(keyword):
     images = []
     with db.session_scope() as session:
         keyword = session.query(db.Keyword).filter(
             db.Keyword.name == keyword).one_or_none()
         if keyword is None:
             return ValueError(
                 f"keyword '{keyword}' is not in this dataset")
         for match in keyword.images:
             images.append(match.image.path)
     return images
예제 #11
0
 def keyword_generator(self, paths):
     images = set(str(self.relative_path(file)) for file in image_files(paths))
     generator = KeywordGenerator()
     with db.session_scope() as session:
         query = session.query(db.BagOfWords.word_histogram).\
             join(db.BagOfWords.image).\
             filter(db.Image.path.in_(images)).\
             filter(db.BagOfWords.divisions == 1)
         for word_histogram in query:
             generator.add_histogram(
                 np.frombuffer(word_histogram[0], dtype=np.float32))
     return generator
예제 #12
0
 def index_images(self, *, progress=None):
     added = []
     fs_images = []
     with db.session_scope() as session:
         for path in get_progress(progress)(image_files(self.path)):
             relative_path = self.relative_path(path)
             if self._index_image(session, path):
                 added.append(relative_path)
             fs_images.append(relative_path)
         removed = self._remove_missing_images(session, fs_images)
     orphaned = self._remove_orphaned_features(fs_images)
     return added, removed, orphaned
예제 #13
0
def init_dataset(path, method_id=None):
    path = Path(path).absolute()
    if path.is_dir():
        ValueError(f"path '{path}' is not a directory")
    dataset_path = path / _DATASET_DIR
    dataset_path.mkdir(exist_ok=True)
    if (dataset_path / Path(db._DATABASE_NAME)).is_file():
        raise FileExistsError(f"existing dataset at '{dataset_path}'")
    method_id = create_feature_extractor(method_id).id
    db.init(dataset_path)
    with db.session_scope() as session:
        session.add(db.KeyValue(key='method', value=str(int(method_id))))
예제 #14
0
 def set_wordlist(words):
     with db.session_scope() as session:
         # remove invalid data
         session.query(db.Image).update(
             {'has_words': False, 'has_keywords': False})
         session.query(db.Word).delete()
         session.query(db.BagOfWords).delete()
         session.query(db.Keyword).delete()
         session.query(db.KeywordMatch).delete()
         # set wordlist
         wordlist_hash = hashlib.sha1(words.tobytes()).hexdigest()
         session.add(db.KeyValue(key='wordlist_hash', value=wordlist_hash))
         for id, word in enumerate(words):
             session.add(db.Word(id=(id + 1), descriptor=word.tobytes()))
예제 #15
0
 def set_keywords(self, keywords):
     with db.session_scope() as session:
         # remove invalid data
         session.query(db.Image).update({'has_keywords': False})
         session.query(db.Keyword).delete()
         session.query(db.KeywordTheme).delete()
         session.query(db.KeywordMatch).delete()
         # add keywords
         for keyword, data in keywords.items():
             keyword_ = db.Keyword(name=keyword)
             session.add(keyword_)
             for histogram in data:
                 session.add(db.KeywordTheme(
                     keyword=keyword_, word_histogram=histogram.tobytes()))
예제 #16
0
 def create_clusterer(*, global_only=False, image_cohesion_factor=2):
     if image_cohesion_factor < 0:
         raise ValueError("'image_cohesion_factor' cannot be less than 0")
     clusterere = Clusterer()
     with db.session_scope() as session:
         for bags_of_words in session.query(db.BagOfWords):
             if global_only and bags_of_words.divisions > 1:
                 continue
             histogram = np.frombuffer(
                 bags_of_words.word_histogram, dtype=np.float32)
             weight = 1 / (bags_of_words.divisions ** image_cohesion_factor)
             clusterere.add_histogram(
                 bags_of_words.image_id,
                 histogram, weight=weight)
     return clusterere
예제 #17
0
 def index_features(self, *, progress=None):
     new_features = []
     with db.session_scope() as session:
         images = session.query(db.Image).filter(
             ~db.Image.has_features).all()
         for image in get_progress(progress)(images):
             path = self.absolute_path(image.path)
             image_data = cv_image(path)
             features = self._feature_extractor.extract(
                 image_data, max_features=_MAX_FEATURES_PER_IMAGE)
             self._feature_extractor.save_features(
                 self._get_feature_file(path), image_data, features)
             image.has_features = True
             new_features.append(image.path)
     return new_features
예제 #18
0
 def _index_image_keywords(self, image_id, keywords_tree):
     with db.session_scope() as session:
         image = session.query(db.Image).get(image_id)
         image_keyword_ids = {}
         for bag_of_words in image.words:
             divisions = bag_of_words.divisions
             histogram = np.frombuffer(
                 bag_of_words.word_histogram, dtype=np.float32)
             for id in keywords_tree.nearest(np.tile(histogram, 2), 1):
                 image_keyword_ids.setdefault(id, 0)
                 image_keyword_ids[id] += 1/(divisions**1.5)
         image_keyword_ids = {
             k: v for k, v in image_keyword_ids.items() if v >= 1}
         for keyword_id in set(image_keyword_ids):
             keyword = session.query(db.Keyword).get(keyword_id)
             session.add(db.KeywordMatch(image=image, keyword=keyword))
         image.has_keywords = True
예제 #19
0
 def _get_feature_files(self):
     with db.session_scope() as session:
         return [self._get_feature_file(path[0])
                 for path in session.query(db.Image.path).all()]