Beispiel #1
0
def getDocsStats(documents):
	counters = list(enumerate(map(lambda x: Counter(x), documents)))
	allwords = reduce(add, documents, [])
	allwords_counter = Counter(allwords)
	words = sorted(set(allwords))
	occurencesIndex = lmap(lambda x: ((x, allwords_counter[x]), occurences(counters, x)), words)
	wordscount = lmap(len, documents)
	return {'allwords' : allwords_counter, 'occurences' : occurencesIndex, 'wordscount' : wordscount}
Beispiel #2
0
def getDocsStats(documents):
    counters = list(enumerate(map(lambda x: Counter(x), documents)))
    allwords = reduce(add, documents, [])
    allwords_counter = Counter(allwords)
    words = sorted(set(allwords))
    occurencesIndex = lmap(
        lambda x: ((x, allwords_counter[x]), occurences(counters, x)), words)
    wordscount = lmap(len, documents)
    return {
        'allwords': allwords_counter,
        'occurences': occurencesIndex,
        'wordscount': wordscount
    }
Beispiel #3
0
	def _parse_pure_list(self, tokens):
		tokens = lfilter(lambda x: x != 'AND', tokens)
		if 'OR' not in tokens:
			return self._parse_list_ands(tokens)
		else:
			node = Node('OR')
			slist = splitlist(tokens, 'OR')
			node.children=lmap(self._parse_list_ands, slist)
			return node
Beispiel #4
0
	def _query_to_stems(self, query):
		if isinstance(query, Node):
			query.children = lmap(self._query_to_stems, query.children)
			return query
		else:
			if query and self.stem:
				return getstem(query, self.lang)
			else:
				return query
Beispiel #5
0
 def _parse_pure_list(self, tokens):
     tokens = lfilter(lambda x: x != 'AND', tokens)
     if 'OR' not in tokens:
         return self._parse_list_ands(tokens)
     else:
         node = Node('OR')
         slist = splitlist(tokens, 'OR')
         node.children = lmap(self._parse_list_ands, slist)
         return node
Beispiel #6
0
 def _query_to_stems(self, query):
     if isinstance(query, Node):
         query.children = lmap(self._query_to_stems, query.children)
         return query
     else:
         if query and self.stem:
             return getstem(query, self.lang)
         else:
             return query
Beispiel #7
0
def occurences(counters, word):
	return lmap(lambda x: (x[0], x[1][word]), filter(lambda x: word in x[1], counters))
Beispiel #8
0
	def get_documents(self, parsedQuery):
		documents = self._by_node(parsedQuery)
		return lmap(self._translate, documents)
Beispiel #9
0
	def _by_words(self, words):
		return lmap(self._translate, reduce(and_, map(self._by_word, words)))
Beispiel #10
0
def readfiles(paths):
	return lmap(readfile, paths)
Beispiel #11
0
def occurences(counters, word):
    return lmap(lambda x: (x[0], x[1][word]),
                filter(lambda x: word in x[1], counters))
Beispiel #12
0
def readfiles(paths):
    return lmap(readfile, paths)