Beispiel #1
0
    def parse_folder(path):
        """
		Parses all .bib files in given folder.
		Returns a tuple (parsed_iten, search_index) containing all items found
		"""
        if not os.path.isdir(path):
            raise Exception("Path to folder expected")

        parsed_items = []
        files = utils.search_in_folder(path,
                                       lambda path: path.endswith(".bib"))
        executor = concurrent.futures.ProcessPoolExecutor(
            max_workers=multiprocessing.cpu_count())
        futures = [
            executor.submit(BibParser()._parse_file,
                            os.path.join(path, filename)) for filename in files
        ]
        for future in futures:
            parsed_items += future.result()
        executor.shutdown()

        parsed_items = list(
            sorted(parsed_items,
                   key=BibItem.key_to_key_func(const.DEFAULT_ORDER_BY)))
        item_index = search_index.Index(parsed_items)
        fin_ctx = FinalizingContext(item_index)
        for item in parsed_items:
            item.finalize_item_set(fin_ctx)
        item_index.update(parsed_items)
        return (parsed_items, item_index)
Beispiel #2
0
	def parse_folder(path):
		"""
		Parses all .bib files in given folder.
		Returns a tuple (parsed_iten, search_index) containing all items found
		"""
		if not os.path.isdir(path):
			raise Exception("Path to folder expected")

		parsed_items = []
		files = utils.search_in_folder(path, lambda path: path.endswith(".bib"))
		executor = concurrent.futures.ProcessPoolExecutor(max_workers=multiprocessing.cpu_count())
		futures = [
			executor.submit(
				BibParser()._parse_file,
				os.path.join(path, filename)
			)
			for filename in files
		]
		for future in futures:
			parsed_items += future.result()
		executor.shutdown()

		parsed_items = list(sorted(
			parsed_items,
			key=BibItem.key_to_key_func(const.DEFAULT_ORDER_BY)
		))
		item_index = search_index.Index(parsed_items)
		fin_ctx = FinalizingContext(item_index)
		for item in parsed_items:
			item.finalize_item_set(fin_ctx)
		item_index.update(parsed_items)
		return (parsed_items, item_index)
Beispiel #3
0
def fetch_added_on_from_git():
	BLAME_REGEXP = re.compile(
		#commit hash
		r"^[\^0-9a-z]+\s+"
		#filename
		r"[^\s]*?\s+"
		#committer's name
		r"\([A-Za-z\-\s\\]*?\s+"
		#commit date
		r"(?P<date>\d{4}-\d{2}-\d{2})\s+"
		#commit time
		r"[\d:]+\s+"
		#commit time zone
		r"[+\d]+\s+"
		#line numberq
		r"\d+\)\s+"
		#item id
		r"(?P<id>[a-z_\d]+),\s*$"
	)
	def blame_file(path):
		data = subprocess.check_output([
			"git",
			"blame",
			#WARN: using show-name to guarantee output format
			"--show-name",
			#no such option in "git blame" on trusty
			#"--no-progress",
			path
		]).decode()
		result = dict()
		for line in data.split("\n"):
			match = BLAME_REGEXP.search(line)
			if not match:
				continue
			item_id = match.group("id")
			date = datetime.datetime.strptime(
				match.group("date"),
				config.parser.date_format
			)
			result[item_id] = date
		return result

	result = dict()
	filter = lambda path: path.endswith(".bib")
	for path in utils.search_in_folder(config.parser.bibdata_dir, filter):
		result.update(blame_file(path))
	return result
Beispiel #4
0
def fetch_backups_from_fs():
    if not os.path.isdir(config.www.backup_dir):
        return []
    FOLDERS_TO_VALIDATE = [
        "Cooking",
        "Fashion",
        "Games",
        "Images",
        "Library",
    ]
    trim_root = lambda path: os.path.relpath(path, start=config.www.backup_dir)
    filter = lambda path: const.FILENAME_REGEXP.match(os.path.basename(path))
    backups = []
    for basename in FOLDERS_TO_VALIDATE:
        folder = os.path.join(config.www.backup_dir, basename)
        backups += list(map(trim_root, utils.search_in_folder(folder, filter)))
    return set(backups)
Beispiel #5
0
def fetch_added_on_from_git():
    BLAME_REGEXP = re.compile(
        #commit hash
        r"^[\^0-9a-z]+\s+"
        #filename
        r"[^\s]*?\s+"
        #committer's name
        r"\([A-Za-z\-\s\\]*?\s+"
        #commit date
        r"(?P<date>\d{4}-\d{2}-\d{2})\s+"
        #commit time
        r"[\d:]+\s+"
        #commit time zone
        r"[+\d]+\s+"
        #line numberq
        r"\d+\)\s+"
        #item id
        r"(?P<id>[a-z_\d]+),\s*$")

    def blame_file(path):
        data = subprocess.check_output([
            "git",
            "blame",
            #WARN: using show-name to guarantee output format
            "--show-name",
            #no such option in "git blame" on trusty
            #"--no-progress",
            path
        ]).decode()
        result = dict()
        for line in data.split("\n"):
            match = BLAME_REGEXP.search(line)
            if not match:
                continue
            item_id = match.group("id")
            date = datetime.datetime.strptime(match.group("date"),
                                              config.parser.date_format)
            result[item_id] = date
        return result

    result = dict()
    filter = lambda path: path.endswith(".bib")
    for path in utils.search_in_folder(config.parser.bibdata_dir, filter):
        result.update(blame_file(path))
    return result
Beispiel #6
0
def fetch_filelist_from_fs():
    if not os.path.isdir(config.www.elibrary_dir):
        return []
    FOLDERS_TO_VALIDATE = ["Library"]
    EXCLUDED_FOLDERS = {
        "Ancillary sources (not in bibliography)",
        "Leaflets (not in bibliography)",
    }
    trim_root = lambda path: os.path.relpath(path,
                                             start=config.www.elibrary_dir)
    filter = lambda path: os.path.isfile(path) and path.endswith(".pdf")
    stored_files = []
    for basename in FOLDERS_TO_VALIDATE:
        folder = os.path.join(config.www.elibrary_dir, basename)
        stored_files += list(
            map(
                trim_root,
                utils.search_in_folder(folder,
                                       filter,
                                       excludes=EXCLUDED_FOLDERS)))
    return set(stored_files)
Beispiel #7
0
def fetch_backups_from_fs():
	if not os.path.isdir(config.www.backup_dir):
		return []
	FOLDERS_TO_VALIDATE = [
		"Cooking",
		"Fashion",
		"Games",
		"Images",
		"Library",
	]
	trim_root = lambda path: os.path.relpath(path, start=config.www.backup_dir)
	filter = lambda path: const.FILENAME_REGEXP.match(os.path.basename(path))
	backups = []
	for basename in FOLDERS_TO_VALIDATE:
		folder = os.path.join(config.www.backup_dir, basename)
		backups += list(
			map(
				trim_root,
				utils.search_in_folder(folder, filter)
			)
		)
	return set(backups)
Beispiel #8
0
def fetch_filelist_from_fs():
	if not os.path.isdir(config.www.elibrary_dir):
		return []
	FOLDERS_TO_VALIDATE = [
		"Library"
	]
	EXCLUDED_FOLDERS = {
		"Ancillary sources (not in bibliography)",
		"Leaflets (not in bibliography)",
	}
	trim_root = lambda path: os.path.relpath(path, start=config.www.elibrary_dir)
	filter = lambda path: os.path.isfile(path) and path.endswith(".pdf")
	stored_files = []
	for basename in FOLDERS_TO_VALIDATE:
		folder = os.path.join(config.www.elibrary_dir, basename)
		stored_files += list(
			map(
				trim_root,
				utils.search_in_folder(folder, filter, excludes=EXCLUDED_FOLDERS)
			)
		)
	return set(stored_files)