Example #1
0
File: index.py Project: sankosk/SIW
def main(args):
    indexer = Indexer()

    with io.open(args.texts, encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            bag = BagOfWords(line, filter_stopwords=False)
            indexer.index(bag)
    open_func = gzip.open if args.zip else io.open
    index_ext = ".json.gz" if args.zip else ".json"
    with open_func(args.index + index_ext, mode="wb") as f:
        indexer.dump(f)
    return 0
Example #2
0
File: tests.py Project: sankosk/SIW
    def test_dump(self):
        indexer = Indexer()
        for text in self.texts:
            text = text.strip()
            bag = BagOfWords(
                text, enable_stemming=False, filter_stopwords=False)
            indexer.index(bag)
        fd = StringIO()
        indexer.dump(fd)
        fd.seek(0)
        got = json.load(fd)

        self.assertSequenceEqual(self.expected["docs_index"],
                                 got["docs_index"])
        self.assertDictEqual(self.expected["terms_index"], got["terms_index"])
Example #3
0
    def test_dump(self):
        """Prueba que el fichero JSON generado sea correcto
        """
        indexer = Indexer()
        for text in self.texts:
            text = text.strip()
            bag = BagOfWords(text,
                             enable_stemming=False,
                             filter_stopwords=False)
            indexer.index(bag)
        fd = StringIO()
        indexer.dump(fd)
        fd.seek(0)
        got = json.load(fd)

        self.assertSequenceEqual(self.expected["docs_index"],
                                 got["docs_index"])
        self.assertDictEqual(self.expected["words_index"], got["words_index"])