def validate_single_filename(abspath, filename, item, errors): """ Checks if file is accessible and matches item metadata """ if not os.path.isfile(abspath): errors.add(f"File [{abspath}] is not accessible") if not utils.isfile_case_sensitive(abspath): errors.add(f"File [{abspath}] is not accessible in case-sensitive mode") booktype = item.get("booktype") validate_periodical_filename(filename, item, errors) validate_short_desription_filename(filename, item, errors) validate_etiquette_filename(filename, item, errors) if booktype in MULTIENTRY_BOOKTYPES: return metadata = utils.extract_metadata_from_file(filename) #validating optional author, edition, tome #in case when item specifies value, but filename does not optional_meta_fields = [ "author" ] if booktype: optional_meta_fields += [ "edition", "volume", #For serial books, no number is present in metadata #Temporary disable check here #"number", "part" ] for meta_field in optional_meta_fields: if ( (item.has(meta_field)) and (meta_field not in metadata) ): errors.add(f"Field {meta_field} is not specified in filename [{filename}]") meta_keywords = metadata.get("keywords", {}) source_file = item.get("source_file") if ( (const.META_INCOMPLETE in meta_keywords) and (source_file != "_problems.bib") ): errors.add("Incomplete entries should be stored in _problems.bib") searches = utils.make_searches_from_metadata(metadata) for search_key, search_func in searches.items(): if not search_func(item): errors.add( f"Item is not searchable by {search_key} extracted from filename {abspath}.\n" f" Item has: {item.get(search_key)}\n" f" Search has: {metadata[search_key]}" )
def validate_single_filename(abspath, filename, item, errors): """ Checks if file is accessible and matches item metadata """ if not os.path.isfile(abspath): errors.add(f"File [{abspath}] is not accessible") if not utils.isfile_case_sensitive(abspath): errors.add( f"File [{abspath}] is not accessible in case-sensitive mode") booktype = item.get("booktype") validate_periodical_filename(filename, item, errors) validate_short_desription_filename(filename, item, errors) validate_etiquette_filename(filename, item, errors) if booktype in MULTIENTRY_BOOKTYPES: return metadata = utils.extract_metadata_from_file(filename) #validating optional author, edition, tome #in case when item specifies value, but filename does not optional_meta_fields = ["author"] if booktype: optional_meta_fields += [ "edition", "volume", #For serial books, no number is present in metadata #Temporary disable check here #"number", "part" ] for meta_field in optional_meta_fields: if ((item.has(meta_field)) and (meta_field not in metadata)): errors.add( f"Field {meta_field} is not specified in filename [{filename}]" ) meta_keywords = metadata.get("keywords", {}) source_file = item.get("source_file") if ((const.META_INCOMPLETE in meta_keywords) and (source_file != "_problems.bib")): errors.add("Incomplete entries should be stored in _problems.bib") searches = utils.make_searches_from_metadata(metadata) for search_key, search_func in searches.items(): if not search_func(item): errors.add( f"Item is not searchable by {search_key} extracted from filename {abspath}.\n" f" Item has: {item.get(search_key)}\n" f" Search has: {metadata[search_key]}")
def validate_url_validity(item, errors): """ Checks url for validity """ url = item.get("url") item_id = item.get("id") if url is None: return for idx, single_url in enumerate(url): if not utils.is_url_valid(single_url, item): errors.add( f"Field url with value [{single_url}] and number #{idx} is wrong" ) if not utils.is_url_self_served(single_url): continue match = utils.SELF_SERVED_URL_REGEXP.match(single_url) if not match: errors.add( f"Self served url [{single_url}] doesn't match SELF_SERVED_URL_REGEXP" ) continue if (match.group("item_id") != item_id): errors.add("Wrong item_id specified in self-served url") continue single_filename, single_filesize = utils.get_file_info_from_url( single_url, item) metadata = utils.extract_metadata_from_file(single_filename) owners = metadata.get("owner").split("+") if not owners: errors.add( f"Owner specification expected for self-served url #{number} [{url}], stored at [{single_filename}]" ) continue for owner in owners: owner_fullname = config.parser.bookkeepers.get(owner) if owner_fullname: note = item.get("note") if note is None: errors.add( f"Owner fullname ({owner_fullname}) should be present in note, but the note is missing" ) elif owner_fullname not in note: errors.add( f"Owner fullname ({owner_fullname}) should be present in note, but it is not" )
def validate_url_validity(item, errors): """ Checks url for validity """ url = item.get("url") item_id = item.get("id") if url is None: return for idx, single_url in enumerate(url): if not utils.is_url_valid(single_url, item): errors.add(f"Field url with value [{single_url}] and number #{idx} is wrong") if not utils.is_url_self_served(single_url): continue match = utils.SELF_SERVED_URL_REGEXP.match(single_url) if not match: errors.add(f"Self served url [{single_url}] doesn't match SELF_SERVED_URL_REGEXP") continue if (match.group("item_id") != item_id): errors.add("Wrong item_id specified in self-served url") continue single_filename, single_filesize = utils.get_file_info_from_url(single_url, item) metadata = utils.extract_metadata_from_file(single_filename) owners = metadata.get("owner").split("+") if not owners: errors.add(f"Owner specification expected for self-served url #{number} [{url}], stored at [{single_filename}]") continue for owner in owners: owner_fullname = config.parser.bookkeepers.get(owner) if owner_fullname: note = item.get("note") if note is None: errors.add(f"Owner fullname ({owner_fullname}) should be present in note, but the note is missing") elif owner_fullname not in note: errors.add(f"Owner fullname ({owner_fullname}) should be present in note, but it is not")