Ejemplo n.º 1
0
    def parse_folder(path):
        """
		Parses all .bib files in given folder.
		Returns a tuple (parsed_iten, search_index) containing all items found
		"""
        if not os.path.isdir(path):
            raise Exception("Path to folder expected")

        parsed_items = []
        files = utils.search_in_folder(path,
                                       lambda path: path.endswith(".bib"))
        executor = concurrent.futures.ProcessPoolExecutor(
            max_workers=multiprocessing.cpu_count())
        futures = [
            executor.submit(BibParser()._parse_file,
                            os.path.join(path, filename)) for filename in files
        ]
        for future in futures:
            parsed_items += future.result()
        executor.shutdown()

        parsed_items = list(
            sorted(parsed_items,
                   key=BibItem.key_to_key_func(const.DEFAULT_ORDER_BY)))
        item_index = search_index.Index(parsed_items)
        fin_ctx = FinalizingContext(item_index)
        for item in parsed_items:
            item.finalize_item_set(fin_ctx)
        item_index.update(parsed_items)
        return (parsed_items, item_index)
Ejemplo n.º 2
0
    def test_inverted_index_search(self):
        items = bib_parser.BibParser()._parse_string(TEST_ITEMS)
        item_index = index.Index(items)

        DIRECT_KEY = "cinquecento"
        INVERTED_KEY = const.INVERTED_INDEX_KEY_PREFIX + DIRECT_KEY
        subindex = item_index["keywords"]
        self.assertIn(DIRECT_KEY, subindex)
        self.assertIn(INVERTED_KEY, subindex)
        filtered_items = item_index["keywords"][INVERTED_KEY]
        self.assertEqual(len(filtered_items), 1)
        self.assertEqual(utils.first(filtered_items).id(), "id_2")
Ejemplo n.º 3
0
    def test_search_items(self):
        """
		Tests if parsed items can be searched by a bunch of parameters
		"""
        items = bib_parser.BibParser()._parse_string(TEST_ITEMS)
        item_index = index.Index(items)

        author_search = search.search_for_iterable("author", "Петров")
        filtered_items = filter(author_search, items)
        self.assertEqual(len(list(filtered_items)), 1)

        #testing exact match
        year_search = search.and_([
            search.search_for("year_from", 1825),
            search.search_for("year_to", 1825)
        ])
        filtered_items = filter(year_search, items)
        self.assertEqual(len(list(filtered_items)), 1)

        #testing partial intersection
        year_search = search.and_([
            search.search_for("year_from", 1500),
            search.search_for("year_to", 1600)
        ])
        filtered_items = filter(year_search, items)
        self.assertEqual(len(list(filtered_items)), 1)

        #testing inner containment
        year_search = search.and_([
            search.search_for("year_from", 1499),
            search.search_for("year_to", 1501)
        ])
        filtered_items = filter(year_search, items)
        self.assertEqual(len(list(filtered_items)), 1)

        #testing outer containment
        year_search = search.and_([
            search.search_for("year_from", 1400),
            search.search_for("year_to", 1600)
        ])
        filtered_items = filter(year_search, items)
        self.assertEqual(len(list(filtered_items)), 1)

        filtered_items = item_index["keywords"]["grumbling"]
        self.assertEqual(len(list(filtered_items)), 1)

        filtered_items = \
         item_index["keywords"]["cinquecento"] & \
         item_index["keywords"]["historical dance"]
        self.assertEqual(len(list(filtered_items)), 1)
Ejemplo n.º 4
0
    def test_parse_string(self):
        """
		Tests if string can be succesfully parsed by BibParser
		"""
        items = bib_parser.BibParser()._parse_string(TEST_ITEMS)
        item_index = index.Index(items)

        languages = set(langid for langid in item_index["langid"].keys()
                        if not langid.startswith("!"))
        keywords = set(item_index["keywords"].keys())

        self.assertEqual(len(items), 2)
        self.assertEqual(languages, EXPECTED_LANGUAGES)
        self.assertEqual(keywords, EXPECTED_KEYWORDS)

        item1 = next(iter(item_index["id"]["id_1"]))
        self.assertTrue('{' not in item1.title())
        self.assertTrue('}' not in item1.title())