def main(args): indexer = Indexer() with io.open(args.texts, encoding="utf-8") as f: for line in f: line = line.strip() bag = BagOfWords(line, filter_stopwords=False) indexer.index(bag) open_func = gzip.open if args.zip else io.open index_ext = ".json.gz" if args.zip else ".json" with open_func(args.index + index_ext, mode="wb") as f: indexer.dump(f) return 0
def test_dump(self): indexer = Indexer() for text in self.texts: text = text.strip() bag = BagOfWords( text, enable_stemming=False, filter_stopwords=False) indexer.index(bag) fd = StringIO() indexer.dump(fd) fd.seek(0) got = json.load(fd) self.assertSequenceEqual(self.expected["docs_index"], got["docs_index"]) self.assertDictEqual(self.expected["terms_index"], got["terms_index"])
def test_dump(self): """Prueba que el fichero JSON generado sea correcto """ indexer = Indexer() for text in self.texts: text = text.strip() bag = BagOfWords(text, enable_stemming=False, filter_stopwords=False) indexer.index(bag) fd = StringIO() indexer.dump(fd) fd.seek(0) got = json.load(fd) self.assertSequenceEqual(self.expected["docs_index"], got["docs_index"]) self.assertDictEqual(self.expected["words_index"], got["words_index"])