Example #1
0
File: retrieve.py Project: Bekt/ir
    def top(self, query, num):
        tok = Tokenizer()
        tok.tokenize(query)

        acc = Counter()
        for token in tok._counter:
            dict_entry = self.dict_entry(token)
            print token, dict_entry
            if not dict_entry:
                continue
            post_entries = self.post_entries(dict_entry[1], dict_entry[2])
            # Adjust weights so that we don't open files multiple times in
            # cases of repeated tokens. (e.g: dog cat dog cat dog)
            for docid in post_entries:
                post_entries[docid] *= tok._counter[token]
            acc.update(post_entries)
        return acc.most_common(num), len(acc)
import sys
from tok import Tokenizer
tok = Tokenizer()
for review in sys.stdin:
	print ' '.join(tok.tokenize(review))