def getDocsStats(documents): counters = list(enumerate(map(lambda x: Counter(x), documents))) allwords = reduce(add, documents, []) allwords_counter = Counter(allwords) words = sorted(set(allwords)) occurencesIndex = lmap(lambda x: ((x, allwords_counter[x]), occurences(counters, x)), words) wordscount = lmap(len, documents) return {'allwords' : allwords_counter, 'occurences' : occurencesIndex, 'wordscount' : wordscount}
def getDocsStats(documents): counters = list(enumerate(map(lambda x: Counter(x), documents))) allwords = reduce(add, documents, []) allwords_counter = Counter(allwords) words = sorted(set(allwords)) occurencesIndex = lmap( lambda x: ((x, allwords_counter[x]), occurences(counters, x)), words) wordscount = lmap(len, documents) return { 'allwords': allwords_counter, 'occurences': occurencesIndex, 'wordscount': wordscount }
def _parse_pure_list(self, tokens): tokens = lfilter(lambda x: x != 'AND', tokens) if 'OR' not in tokens: return self._parse_list_ands(tokens) else: node = Node('OR') slist = splitlist(tokens, 'OR') node.children=lmap(self._parse_list_ands, slist) return node
def _query_to_stems(self, query): if isinstance(query, Node): query.children = lmap(self._query_to_stems, query.children) return query else: if query and self.stem: return getstem(query, self.lang) else: return query
def _parse_pure_list(self, tokens): tokens = lfilter(lambda x: x != 'AND', tokens) if 'OR' not in tokens: return self._parse_list_ands(tokens) else: node = Node('OR') slist = splitlist(tokens, 'OR') node.children = lmap(self._parse_list_ands, slist) return node
def occurences(counters, word): return lmap(lambda x: (x[0], x[1][word]), filter(lambda x: word in x[1], counters))
def get_documents(self, parsedQuery): documents = self._by_node(parsedQuery) return lmap(self._translate, documents)
def _by_words(self, words): return lmap(self._translate, reduce(and_, map(self._by_word, words)))
def readfiles(paths): return lmap(readfile, paths)