Ejemplo n.º 1
0
def parse_xml(site: Site, gramps_file_path: str) -> None:
    cache_directory_path = path.join(
        site.configuration.cache_directory_path, Gramps.name(),
        hashlib.md5(gramps_file_path.encode('utf-8')).hexdigest())
    with suppress(FileExistsError):
        makedirs(cache_directory_path)

    logger = logging.getLogger()
    logger.info('Parsing %s...' % gramps_file_path)

    try:
        gramps_file = gzip.open(gramps_file_path)
        try:
            tarfile.open(fileobj=gramps_file).extractall(cache_directory_path)
            gramps_file_path = path.join(cache_directory_path, 'data.gramps')
            # Treat the file as a tar archive (*.gpkg) with media and a gzipped XML file (./data.gz/data).
            _parse_tree(site.ancestry, etree.parse(gramps_file_path),
                        cache_directory_path)
        except tarfile.ReadError:
            # Treat the file as a gzipped XML file (*.gramps).
            _parse_tree(site.ancestry, etree.parse(gramps_file),
                        rootname(gramps_file_path))
    except OSError:
        # Treat the file as plain XML (*.gramps).
        _parse_tree(site.ancestry, etree.parse(gramps_file_path),
                    rootname(gramps_file_path))
Ejemplo n.º 2
0
 def __init__(self,
              session: aiohttp.ClientSession,
              cache_directory_path: str,
              ttl: int = 86400):
     self._cache_directory_path = join(cache_directory_path, 'wikipedia')
     makedirs(self._cache_directory_path)
     self._ttl = ttl
     self._session = session
Ejemplo n.º 3
0
async def generate_configuration_file(destination_file_path: str,
                                      jinja2_environment: Environment,
                                      **kwargs) -> None:
    configuration_file_template_path = path.join(path.dirname(__file__),
                                                 'assets', 'nginx.conf.j2')
    template = FileSystemLoader('/').load(jinja2_environment,
                                          configuration_file_template_path,
                                          jinja2_environment.globals)
    makedirs(path.dirname(destination_file_path))
    with open(destination_file_path, 'w') as f:
        f.write(await template.render_async(kwargs))
Ejemplo n.º 4
0
    def _render_config(self) -> None:
        output_directory_path = os.path.join(self._site.configuration.output_directory_path, 'nginx')
        makedirs(output_directory_path)

        # Render the ngnix configuration.
        file_name = 'nginx.conf.j2'
        destination_file_path = os.path.join(output_directory_path, file_name)
        self._site.resources.copy2(file_name, destination_file_path)

        # Render the Dockerfile.
        render_file(destination_file_path, create_environment(self._site))
        copyfile(os.path.join(DOCKER_PATH, 'Dockerfile'), os.path.join(output_directory_path, 'Dockerfile'))
Ejemplo n.º 5
0
    def one(self, link: Link) -> Optional[Entry]:
        parts = urlparse(link.uri)
        if parts.scheme not in ['http', 'https']:
            return None
        if not re.fullmatch(r'^[a-z]+\.wikipedia\.org$', parts.netloc,
                            re.IGNORECASE):
            return None
        if not re.fullmatch(r'^/wiki/.+$', parts.path, re.IGNORECASE):
            return None
        language_code, domain, _ = parts.netloc.split('.')
        title = parts.path[6:]
        uri = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=extracts&exintro&format=json&formatversion=2' % (
            language_code, title)

        cache_directory_path = join(self._cache_directory_path, 'wikipedia')
        makedirs(cache_directory_path)
        cache_file_path = join(cache_directory_path,
                               hashlib.md5(uri.encode('utf-8')).hexdigest())

        response_data = None
        try:
            if getmtime(cache_file_path) + self._ttl > time():
                with open(cache_file_path) as f:
                    response_data = load(f)
        except FileNotFoundError:
            pass

        if response_data is None:
            try:
                response = requests.get(uri)
                response_data = response.json()
                with open(cache_file_path, 'w') as f:
                    f.write(response.text)
            except (RequestException, ValueError) as e:
                logger = logging.getLogger()
                logger.warn(
                    'Could not retrieve content from Wikipedia at %s: %s' %
                    (uri, e))

        if response_data is None:
            try:
                with open(cache_file_path) as f:
                    response_data = load(f)
            except FileNotFoundError:
                pass

        if response_data is None:
            return None

        page = response_data['query']['pages'][0]
        return Entry(link.uri, page['title'], page['extract'])
Ejemplo n.º 6
0
def _filter_image(site: Site,
                  file: File,
                  width: Optional[int] = None,
                  height: Optional[int] = None) -> str:
    if width is None and height is None:
        raise ValueError('At least the width or height must be given.')

    with Image.open(file.path) as image:
        if width is not None:
            width = min(width, image.width)
        if height is not None:
            height = min(height, image.height)

        if width is None:
            size = height
            suffix = '-x%d'
            convert = resizeimage.resize_height
        elif height is None:
            size = width
            suffix = '%dx-'
            convert = resizeimage.resize_width
        else:
            size = (width, height)
            suffix = '%dx%d'
            convert = resizeimage.resize_cover

        file_directory_path = os.path.join(
            site.configuration.www_directory_path, 'file')
        destination_name = '%s-%s.%s' % (file.id, suffix % size,
                                         file.extension)
        destination_path = '/file/%s' % destination_name
        cache_directory_path = join(site.configuration.cache_directory_path,
                                    'image')
        cache_file_path = join(
            cache_directory_path,
            '%s-%s' % (hashfile(file.path), destination_name))
        output_file_path = join(file_directory_path, destination_name)

        try:
            os.link(cache_file_path, output_file_path)
        except FileExistsError:
            pass
        except FileNotFoundError:
            if exists(output_file_path):
                return destination_path
            makedirs(cache_directory_path)
            convert(image, size).save(cache_file_path)
            makedirs(file_directory_path)
            os.link(cache_file_path, output_file_path)

    return destination_path
Ejemplo n.º 7
0
    async def _generate_config(self, event: PostGenerateEvent) -> None:
        output_directory_path = os.path.join(
            self._site.configuration.output_directory_path, 'nginx')
        makedirs(output_directory_path)

        # Render the ngnix configuration.
        file_name = 'nginx.conf.j2'
        destination_file_path = os.path.join(output_directory_path, file_name)
        await self._site.assets.copy2(file_name, destination_file_path)
        await self._site.renderer.render_file(destination_file_path)

        # Render the Dockerfile.
        copyfile(os.path.join(DOCKER_PATH, 'Dockerfile'),
                 os.path.join(output_directory_path, 'Dockerfile'))
Ejemplo n.º 8
0
async def generate_configuration_file(destination_file_path: str,
                                      jinja2_environment: Environment,
                                      **kwargs) -> None:
    root_path = rootname(__file__)
    configuration_file_template_name = '/'.join(
        Path(
            path.relpath(
                path.join(path.dirname(__file__), 'assets', 'nginx.conf.j2'),
                root_path)).parts)
    template = FileSystemLoader(root_path).load(
        jinja2_environment, configuration_file_template_name,
        jinja2_environment.globals)
    makedirs(path.dirname(destination_file_path))
    with open(destination_file_path, 'w') as f:
        f.write(template.render(kwargs))
Ejemplo n.º 9
0
def _filter_file(site: Site, file: File) -> str:
    file_directory_path = os.path.join(site.configuration.www_directory_path,
                                       'file')

    destination_name = '%s.%s' % (file.id, file.extension)
    destination_path = '/file/%s' % destination_name
    output_destination_path = os.path.join(file_directory_path,
                                           destination_name)

    if exists(output_destination_path):
        return destination_path

    makedirs(file_directory_path)
    copy2(file.path, output_destination_path)

    return destination_path
Ejemplo n.º 10
0
def extract_xml_file(gramps_file_path: str, cache_directory_path: str) -> str:
    try:
        makedirs(cache_directory_path)
    except FileExistsError:
        pass
    ungzipped_outer_file = gzip.open(gramps_file_path)
    xml_file_path = join(cache_directory_path, 'data.xml')
    logger = logging.getLogger()
    logger.info('Extracting %s...' % xml_file_path)
    with open(xml_file_path, 'wb') as xml_file:
        try:
            tarfile.open(
                fileobj=ungzipped_outer_file).extractall(cache_directory_path)
            gramps_file_path = join(cache_directory_path, 'data.gramps')
            xml_file.write(gzip.open(gramps_file_path).read())
        except tarfile.ReadError:
            xml_file.write(ungzipped_outer_file.read())
    return xml_file_path
Ejemplo n.º 11
0
def _execute_filter_image(image: Image, file_path: str,
                          cache_directory_path: str,
                          destination_directory_path: str,
                          destination_name: str, width: int,
                          height: int) -> None:
    makedirs(destination_directory_path)
    cache_file_path = join(cache_directory_path,
                           '%s-%s' % (hashfile(file_path), destination_name))
    destination_file_path = join(destination_directory_path, destination_name)

    try:
        link_or_copy(cache_file_path, destination_file_path)
    except FileNotFoundError:
        makedirs(cache_directory_path)
        with image:
            if width is not None:
                width = min(width, image.width)
            if height is not None:
                height = min(height, image.height)

            if width is None:
                size = height
                convert = resizeimage.resize_height
            elif height is None:
                size = width
                convert = resizeimage.resize_width
            else:
                size = (width, height)
                convert = resizeimage.resize_cover
            convert(image, size).save(cache_file_path)
        makedirs(destination_directory_path)
        link_or_copy(cache_file_path, destination_file_path)
Ejemplo n.º 12
0
def _do_filter_image(file_path: str, cache_directory_path: str,
                     destination_directory_path: str, destination_name: str,
                     convert: Callable, size: Tuple[int, int]) -> None:
    makedirs(destination_directory_path)
    cache_file_path = join(cache_directory_path,
                           '%s-%s' % (hashfile(file_path), destination_name))
    destination_file_path = join(destination_directory_path, destination_name)

    try:
        os.link(cache_file_path, destination_file_path)
    except FileNotFoundError:
        makedirs(cache_directory_path)
        with Image.open(file_path) as image:
            convert(image, size).save(cache_file_path)
        makedirs(destination_directory_path)
        os.link(cache_file_path, destination_file_path)
Ejemplo n.º 13
0
async def generate_dockerfile_file(destination_file_path: str) -> None:
    makedirs(path.dirname(destination_file_path))
    copyfile(
        path.join(path.dirname(__file__), 'assets', 'docker', 'Dockerfile'),
        destination_file_path)
Ejemplo n.º 14
0
def _create_file(path: str) -> object:
    makedirs(os.path.dirname(path))
    return open(path, 'w')
Ejemplo n.º 15
0
def _do_filter_file(file_path: str, destination_directory_path: str,
                    destination_name: str) -> None:
    makedirs(destination_directory_path)
    destination_file_path = os.path.join(destination_directory_path,
                                         destination_name)
    link_or_copy(file_path, destination_file_path)
Ejemplo n.º 16
0
def _generate_openapi(www_directory_path: str, app: App) -> None:
    api_directory_path = join(www_directory_path, 'api')
    makedirs(api_directory_path)
    with open(join(api_directory_path, 'index.json'), 'w') as f:
        dump(build_specification(app), f)
Ejemplo n.º 17
0
 def __init__(self, cache_directory_path: str, ttl: int = 86400):
     self._cache_directory_path = join(cache_directory_path, 'wikipedia')
     makedirs(self._cache_directory_path)
     self._ttl = ttl