Ejemplo n.º 1
0
    def __init__(self, folder_strategy=None, template=None):
        self.folder_strategy = None

        if folder_strategy is not None:
            self.folder_strategy = FolderStrategy.from_name(folder_strategy)

        self.formatter = PseudoFStringFormatter()
        self.template = template
Ejemplo n.º 2
0
class FilenameBuilder(object):
    def __init__(self, folder_strategy=None, template=None):
        self.folder_strategy = None

        if folder_strategy is not None:
            self.folder_strategy = FolderStrategy.from_name(folder_strategy)

        self.formatter = PseudoFStringFormatter()
        self.template = template

    def __call__(self,
                 url=None,
                 filename=None,
                 ext=None,
                 formatter_kwargs={},
                 compressed=False):
        original_ext = None

        if filename is None:
            base = md5(url)
        else:
            base, original_ext = splitext(filename)

        # We favor the extension found in given filename, else we fallback
        # on the provided one if any (usually inferred from http response)
        ext = original_ext if original_ext else (ext or '')

        if self.template is not None:
            try:
                filename = self.formatter.format(self.template,
                                                 value=base,
                                                 ext=ext,
                                                 **formatter_kwargs)
            except Exception as e:
                raise FilenameFormattingError(reason=e, template=self.template)
        else:
            filename = base + ext

        if self.folder_strategy:
            filename = self.folder_strategy(filename, url=url)

        if compressed:
            filename += '.gz'

        return filename
Ejemplo n.º 3
0
# =============================================================================
# Minet Apply Scraper Function
# =============================================================================
#
# Function taking a scraper definition and applying its logic recursively
# to yield its result.
#
import re
import json
from urllib.parse import urljoin

from minet.utils import PseudoFStringFormatter, nested_get

FORMATTER = PseudoFStringFormatter()
DEFAULT_CONTEXT = {}
EXTRACTOR_NAMES = set(['text', 'html', 'inner_html', 'outer_html'])

TRANSFORMERS = {
    'lower': lambda x: x.lower(),
    'strip': lambda x: x.strip(),
    'upper': lambda x: x.upper()
}


def get_aliases(o, aliases):
    for alias in aliases:
        if alias in o:
            return o[alias]


def merge_contexts(global_context, local_context):
Ejemplo n.º 4
0
    def test_pseudo_fstring_formatter(self):
        formatter = PseudoFStringFormatter()

        result = formatter.format('{line["test"]}', line={'test': 'hello'})

        assert result == 'hello'