def get_meta(post, lang): """Get post meta from compiler or source file.""" meta = defaultdict(lambda: '') used_extractor = None config = getattr(post, 'config', None) metadata_extractors_by = getattr(post, 'metadata_extractors_by') if metadata_extractors_by is None: metadata_extractors_by = metadata_extractors.default_metadata_extractors_by() # If meta file exists, use it metafile_meta, used_extractor = get_metadata_from_meta_file(post.metadata_path, post, config, lang, metadata_extractors_by) is_two_file = bool(metafile_meta) # Fetch compiler metadata. compiler_meta = {} if (getattr(post, 'compiler', None) and post.compiler.supports_metadata and metadata_extractors.check_conditions(post, post.source_path, post.compiler.metadata_conditions, config, None)): compiler_meta = post.compiler.read_metadata(post, lang=lang) used_extractor = post.compiler meta.update(compiler_meta) # Meta files and inter-file metadata override compiler metadata if not metafile_meta: new_meta, used_extractor = get_metadata_from_file(post.source_path, post, config, lang, metadata_extractors_by) meta.update(new_meta) else: meta.update(metafile_meta) # Filename-based metadata extractors (fallback only) if not meta: extractors = metadata_extractors_by['source'].get(metadata_extractors.MetaSource.filename, []) for extractor in extractors: if not metadata_extractors.check_conditions(post, post.source_path, extractor.conditions, config, None): continue meta.update(extractor.extract_filename(post.source_path, lang)) if lang is None: # Only perform these checks for the default language if 'slug' not in meta: # If no slug is found in the metadata use the filename meta['slug'] = slugify(unicode_str(os.path.splitext( os.path.basename(post.source_path))[0]), post.default_lang) if 'title' not in meta: # If no title is found, use the filename without extension meta['title'] = os.path.splitext( os.path.basename(post.source_path))[0] # Set one-file status basing on default language only (Issue #3191) if is_two_file or lang is None: post.is_two_file = is_two_file return meta, used_extractor
def get_meta(post, lang): """Get post meta from compiler or source file.""" meta = defaultdict(lambda: '') used_extractor = None config = getattr(post, 'config', None) metadata_extractors_by = getattr(post, 'metadata_extractors_by') if metadata_extractors_by is None: metadata_extractors_by = metadata_extractors.default_metadata_extractors_by() # If meta file exists, use it metafile_meta = get_metadata_from_meta_file(post.metadata_path, post, config, lang, metadata_extractors_by) if not metafile_meta: post.is_two_file = False # Fetch compiler metadata. compiler_meta = {} if (getattr(post, 'compiler', None) and post.compiler.supports_metadata and metadata_extractors.check_conditions(post, post.source_path, post.compiler.metadata_conditions, config, None)): compiler_meta = post.compiler.read_metadata(post, lang=lang) used_extractor = post.compiler meta.update(compiler_meta) # Meta files and inter-file metadata override compiler metadata if not post.is_two_file: new_meta, used_extractor = get_metadata_from_file(post.source_path, post, config, lang, metadata_extractors_by) meta.update(new_meta) else: meta.update(metafile_meta) # Filename-based metadata extractors (fallback only) if not meta: extractors = metadata_extractors_by['source'].get(metadata_extractors.MetaSource.filename, []) for extractor in extractors: if not metadata_extractors.check_conditions(post, post.source_path, extractor.conditions, config, None): continue meta.update(extractor.extract_filename(post.source_path, lang)) if lang is None: # Only perform these checks for the default language if 'slug' not in meta: # If no slug is found in the metadata use the filename meta['slug'] = slugify(unicode_str(os.path.splitext( os.path.basename(post.source_path))[0]), post.default_lang) if 'title' not in meta: # If no title is found, use the filename without extension meta['title'] = os.path.splitext( os.path.basename(post.source_path))[0] return meta, used_extractor
def get_meta(post, lang): """Get post meta from compiler or source file.""" meta = defaultdict(lambda: '') used_extractor = None config = getattr(post, 'config', None) metadata_extractors_by = getattr(post, 'metadata_extractors_by') if metadata_extractors_by is None: metadata_extractors_by = metadata_extractors.default_metadata_extractors_by() # If meta file exists, use it meta.update(get_metadata_from_meta_file(post.metadata_path, post, config, lang, metadata_extractors_by)) if not meta: post.is_two_file = False # Fetch compiler metadata. compiler_meta = {} if (getattr(post, 'compiler', None) and post.compiler.supports_metadata and metadata_extractors.check_conditions(post, post.source_path, post.compiler.metadata_conditions, config, None)): compiler_meta = post.compiler.read_metadata(post, lang=lang) used_extractor = post.compiler meta.update(compiler_meta) if not post.is_two_file and not compiler_meta: # Meta file has precedence over file, which can contain garbage. # Moreover, we should not read the file if we have compiler meta. new_meta, used_extractor = get_metadata_from_file(post.source_path, post, config, lang, metadata_extractors_by) meta.update(new_meta) # Filename-based metadata extractors (fallback only) if not meta: extractors = metadata_extractors_by['source'].get(metadata_extractors.MetaSource.filename, []) for extractor in extractors: if not metadata_extractors.check_conditions(post, post.source_path, extractor.conditions, config, None): continue meta.update(extractor.extract_filename(post.source_path, lang)) if lang is None: # Only perform these checks for the default language if 'slug' not in meta: # If no slug is found in the metadata use the filename meta['slug'] = slugify(unicode_str(os.path.splitext( os.path.basename(post.source_path))[0]), post.default_lang) if 'title' not in meta: # If no title is found, use the filename without extension meta['title'] = os.path.splitext( os.path.basename(post.source_path))[0] return meta, used_extractor
def test_check_conditions(): post = dummy() post.compiler = dummy() post.compiler.name = 'foo' filename = 'foo.bar' config = {'baz': True, 'quux': False} assert check_conditions(post, filename, [ (MetaCondition.config_bool, 'baz'), (MetaCondition.config_present, 'quux') ], config, '') assert not check_conditions(post, filename, [ (MetaCondition.config_bool, 'quux') ], config, '') assert not check_conditions(post, filename, [ (MetaCondition.config_present, 'foobar') ], config, '') assert check_conditions(post, filename, [ (MetaCondition.extension, 'bar') ], config, '') assert not check_conditions(post, filename, [ (MetaCondition.extension, 'baz') ], config, '') assert check_conditions(post, filename, [ (MetaCondition.compiler, 'foo') ], config, '') assert not check_conditions(post, filename, [ (MetaCondition.compiler, 'foobar') ], config, '') assert not check_conditions(post, filename, [ (MetaCondition.never, None), (MetaCondition.config_present, 'bar') ], config, '')
def test_check_conditions(): post = dummy() post.compiler = dummy() post.compiler.name = 'foo' filename = 'foo.bar' config = {'baz': True, 'quux': False} assert check_conditions(post, filename, [(MetaCondition.config_bool, 'baz'), (MetaCondition.config_present, 'quux')], config, '') assert not check_conditions( post, filename, [(MetaCondition.config_bool, 'quux')], config, '') assert not check_conditions( post, filename, [(MetaCondition.config_present, 'foobar')], config, '') assert check_conditions(post, filename, [(MetaCondition.extension, 'bar')], config, '') assert not check_conditions(post, filename, [(MetaCondition.extension, 'baz')], config, '') assert check_conditions(post, filename, [(MetaCondition.compiler, 'foo')], config, '') assert not check_conditions( post, filename, [(MetaCondition.compiler, 'foobar')], config, '') assert not check_conditions( post, filename, [(MetaCondition.never, None), (MetaCondition.config_present, 'bar')], config, '')
def _parse_comment(self, filename): """Read a comment from a file, and return metadata dict and content.""" with io.open(filename, "r", encoding="utf-8-sig") as f: source_text = f.read() meta = None content = None for priority in metadata_extractors.MetaPriority: found_in_priority = False for extractor in self.site.metadata_extractors_by['priority'].get(priority, []): # Call metadata_extractors.check_conditions to check whether the extractor # can be used, and if that succeeds, check whether all requirements for the # extractor are there. We pass None as the post since we don't have a post. # The (currently only) consequence is that compiler-specific plugins don't # work: after all, the compiler is determined from the metadata to be extracted. if not metadata_extractors.check_conditions(None, filename, extractor.conditions, self.site.config, source_text): continue extractor.check_requirements() # Use the extractor to split the post into metadata and content, and to # decode the metadata. meta_str, content = extractor.split_metadata_from_text(source_text) new_meta = extractor._extract_metadata_from_text(meta_str) if new_meta: found_in_priority = True meta = new_meta break if found_in_priority: break if meta is None: _LOGGER.error("Cannot identify metadata format for comment {0}!".format(filename)) exit(1) return meta, content
def _parse_comment(self, filename): """Read a comment from a file, and return metadata dict and content.""" with io.open(filename, "r", encoding="utf-8-sig") as f: source_text = f.read() meta = None content = None for priority in metadata_extractors.MetaPriority: found_in_priority = False for extractor in self.site.metadata_extractors_by['priority'].get(priority, []): # Call metadata_extractors.check_conditions to check whether the extractor # can be used, and if that succeeds, check whether all requirements for the # extractor are there. We pass None as the post since we don't have a post. # The (currently only) consequence is that compiler-specific plugins don't # work: after all, the compiler is determined from the metadata to be extracted. if not metadata_extractors.check_conditions(None, filename, extractor.conditions, self.site.config, source_text): continue extractor.check_requirements() # Use the extractor to split the post into metadata and content, and to # decode the metadata. meta_str, content = extractor.split_metadata_from_text(source_text) new_meta = extractor._extract_metadata_from_text(meta_str) if new_meta: found_in_priority = True meta = new_meta break if found_in_priority: break if meta is None: _LOGGER.error("Cannot identify metadata format for comment {0}!".format(filename)) exit(1) return meta, content
def get_metadata_from_file(source_path, post, config, lang, metadata_extractors_by): """Extract metadata from the file itself, by parsing contents.""" try: if lang and config: source_path = get_translation_candidate(config, source_path, lang) elif lang: source_path += '.' + lang with io.open(source_path, "r", encoding="utf-8-sig") as meta_file: source_text = meta_file.read() meta = {} used_extractor = None for priority in metadata_extractors.MetaPriority: found_in_priority = False for extractor in metadata_extractors_by['priority'].get( priority, []): if not metadata_extractors.check_conditions( post, source_path, extractor.conditions, config, source_text): continue extractor.check_requirements() new_meta = extractor.extract_text(source_text) if new_meta: found_in_priority = True used_extractor = extractor # Map metadata from other platforms to names Nikola expects (Issue #2817) map_metadata(new_meta, extractor.map_from, config) meta.update(new_meta) break if found_in_priority: break return meta, used_extractor except (UnicodeDecodeError, UnicodeEncodeError): msg = 'Error reading {0}: Nikola only supports UTF-8 files'.format( source_path) LOGGER.error(msg) raise ValueError(msg) except Exception: # The file may not exist, for multilingual sites return {}, None
def get_metadata_from_file(source_path, post, config, lang, metadata_extractors_by): """Extract metadata from the file itself, by parsing contents.""" try: if lang and config: source_path = get_translation_candidate(config, source_path, lang) elif lang: source_path += '.' + lang with io.open(source_path, "r", encoding="utf-8-sig") as meta_file: source_text = meta_file.read() except (UnicodeDecodeError, UnicodeEncodeError): msg = 'Error reading {0}: Nikola only supports UTF-8 files'.format(source_path) LOGGER.error(msg) raise ValueError(msg) except Exception: # The file may not exist, for multilingual sites return {}, None meta = {} used_extractor = None for priority in metadata_extractors.MetaPriority: found_in_priority = False for extractor in metadata_extractors_by['priority'].get(priority, []): if not metadata_extractors.check_conditions(post, source_path, extractor.conditions, config, source_text): continue extractor.check_requirements() new_meta = extractor.extract_text(source_text) if new_meta: found_in_priority = True used_extractor = extractor # Map metadata from other platforms to names Nikola expects (Issue #2817) # Map metadata values (Issue #3025) map_metadata(new_meta, extractor.map_from, config) meta.update(new_meta) break if found_in_priority: break return meta, used_extractor
def test_check_conditions(conditions, dummy_post): filename = "foo.bar" config = {"baz": True, "quux": False} assert check_conditions(dummy_post, filename, conditions, config, "")