def __init__(self, folder_strategy=None, template=None): self.folder_strategy = None if folder_strategy is not None: self.folder_strategy = FolderStrategy.from_name(folder_strategy) self.formatter = PseudoFStringFormatter() self.template = template
class FilenameBuilder(object): def __init__(self, folder_strategy=None, template=None): self.folder_strategy = None if folder_strategy is not None: self.folder_strategy = FolderStrategy.from_name(folder_strategy) self.formatter = PseudoFStringFormatter() self.template = template def __call__(self, url=None, filename=None, ext=None, formatter_kwargs={}, compressed=False): original_ext = None if filename is None: base = md5(url) else: base, original_ext = splitext(filename) # We favor the extension found in given filename, else we fallback # on the provided one if any (usually inferred from http response) ext = original_ext if original_ext else (ext or '') if self.template is not None: try: filename = self.formatter.format(self.template, value=base, ext=ext, **formatter_kwargs) except Exception as e: raise FilenameFormattingError(reason=e, template=self.template) else: filename = base + ext if self.folder_strategy: filename = self.folder_strategy(filename, url=url) if compressed: filename += '.gz' return filename
# ============================================================================= # Minet Apply Scraper Function # ============================================================================= # # Function taking a scraper definition and applying its logic recursively # to yield its result. # import re import json from urllib.parse import urljoin from minet.utils import PseudoFStringFormatter, nested_get FORMATTER = PseudoFStringFormatter() DEFAULT_CONTEXT = {} EXTRACTOR_NAMES = set(['text', 'html', 'inner_html', 'outer_html']) TRANSFORMERS = { 'lower': lambda x: x.lower(), 'strip': lambda x: x.strip(), 'upper': lambda x: x.upper() } def get_aliases(o, aliases): for alias in aliases: if alias in o: return o[alias] def merge_contexts(global_context, local_context):
def test_pseudo_fstring_formatter(self): formatter = PseudoFStringFormatter() result = formatter.format('{line["test"]}', line={'test': 'hello'}) assert result == 'hello'