def setWordCount(self, category, word, count, verbose=False): if not TypeValidator.isString(category): raise ValueError( 'wordRepository::addWord: expected a string for [category], got', type(category), '.') if not TypeValidator.isString(word): raise ValueError( 'wordRepository::addWord: expected a string for [word], got', type(word), ' .') if category not in self.data: self.addCategory(category, verbose=verbose) self.data[category][word] = count
def appendInBulk(self, url, category, words, imagesCount, autoAddCategory=True, verbose=False): if url in self.urls: if verbose: print('Url "', url, '" has been already added, skipping') return else: self.urls.add(url) if not TypeValidator.isString(category): raise ValueError( 'wordRepository::appendWordsInBulk: expected a string for [category], got', type(category), '.') if category not in self.data: if autoAddCategory: self.addCategory(category, verbose=verbose) else: raise KeyError('wordRepository::appendWordsInBulk: key ', type(category), ' not found.') self.articlesCounts[category] += 1 self.imagesCounts[category] += imagesCount for word, count in words.items(): newCount = self.getWordCount(category, word, verbose=verbose) + count self.setWordCount(category, word, newCount)
def getAllCategoryWords(self, category): if not TypeValidator.isString(category): raise ValueError( 'wordRepository::getCategoryWords: expected a string for [category], got', type(category), '.') if category not in self.data: raise KeyError('wordRepository::getCategoryWords: key ', type(category), ' not found.') return self.data[category]
def getWordCount(self, category, word, verbose=False): if not TypeValidator.isString(category): raise ValueError( 'wordRepository::getWordCount: expected a string for [category], got', type(category), '.') if not TypeValidator.isString(word): raise ValueError( 'wordRepository::getWordCount: expected a string for [word], got', type(word), ' .') if category not in self.data: raise KeyError('wordRepository::getWordCount: key ', type(category), ' not found.') if word not in self.data[category]: if verbose: print('wordRepository::getWordCount: didn\'t found "', word, '" in "', category, '", assuming count = 0') self.data[category][word] = 0 return self.data[category][word]
def loadData(self, dictOfWords: list, dictOfClasses: dict, tabOfImageInfo: list): # wczytuje bazę słów, klas i obrazków if not TypeValidator.isDict(dictOfClasses): raise ValueError( 'Bad types of input data (1). dictOfWords must be a dict') if not TypeValidator.isTabOfDictsOfInts(list(dictOfWords), len(dictOfClasses)): raise ValueError( 'Bad types of input data (2). dictOfWords must be a tab of dicts of ints and have this same len like tabOfClasses' ) if not TypeValidator.isDictOfInt(dictOfClasses): raise ValueError( 'Bad types of input data (3). dictOfClasses must be a dict of ints' ) if not TypeValidator.isTabOfInts(tabOfImageInfo): raise ValueError( 'Bad types of input data (4). tabOfImageInfo must be a tab of ints' ) self.clear() self.__words, self.__classes, self.__images = self.__fillData( dictOfWords, dictOfClasses, tabOfImageInfo)
def addCategory(self, category, verbose=False): if not TypeValidator.isString(category): raise ValueError( 'wordRepository::addCategory: expected a string, got', type(category), '.') if category in self.data: return False else: self.data[category] = dict() self.articlesCounts[category] = 0 self.imagesCounts[category] = 0 if verbose: print('wordRepository::addCategory: added category "', category, '".') return True