Exemple #1
0
 def test_detect_root_breakout(self):
     with self.assertRaisesRegex(
             RuntimeError,
             "Attempted to break out of output directory to /test"):
         utils.sanitised_join(
             "/foo/bar",
             "/test"
         )
Exemple #2
0
 def test_detect_root_breakout(self):
     with six.assertRaisesRegex(
             self,
             RuntimeError,
             "Attempted to break out of output directory to /test"):
         utils.sanitised_join(
             "/foo/bar",
             "/test"
         )
Exemple #3
0
 def test_detect_root_breakout(self):
     with self.assertRaisesRegex(
             RuntimeError,
             "Attempted to break out of output directory to "
             "(.*?:)?/test"):  # (.*?:)? accounts for Windows root
         utils.sanitised_join(
             "/foo/bar",
             "/test"
         )
Exemple #4
0
def compile_css_files(pelican_object):
    """
    Compiles specified less files into css files using lesscpy

    Configuration
    -------------
    generator.settings['LESS_CSS_FILES']:
    Dictionary with keys indicating output files
    values should be a tuple (input,output)
    """

    logger.info("Generating css with lesscpy")

    for key, value in pelican_object.settings["LESS_CSS_FILES"].items():
        input_rel, output_rel = value
        logger.info("Generating %s from %s using lesscpy", output_rel, input_rel)
        try:
            input_path = sanitised_join(os.getcwd(), input_rel)
        except JoinError:
            logger.error(
                "Skipping: file %r would be read outside output path", input_rel,
            )
            continue
        try:
            output_path = sanitised_join(
                pelican_object.settings["OUTPUT_PATH"], output_rel
            )
        except JoinError:
            logger.error(
                "Skipping: file %r would be written outside output path", output_rel,
            )
            continue
        out_dir = os.path.dirname(output_path)
        if not os.path.exists(out_dir):
            try:
                os.makedirs(out_dir)
            except Exception:
                logger.error(
                    "Error creating containing directory %r", out_dir,
                )
                raise
        try:
            with open(output_path, "w") as f:
                compile_css_file(input_path, f)
        except Exception:
            logger.error(
                "Error compiling %r as less file", input_rel,
            )
            raise
Exemple #5
0
    def valid_save_as(self):
        """Return true if save_as doesn't write outside output path, false
        otherwise."""
        try:
            output_path = self.settings["OUTPUT_PATH"]
        except KeyError:
            # we cannot check
            return True

        try:
            sanitised_join(output_path, self.save_as)
        except RuntimeError:  # outside output_dir
            return False

        return True
Exemple #6
0
    def valid_save_as(self):
        """Return true if save_as doesn't write outside output path, false
        otherwise."""
        try:
            output_path = self.settings["OUTPUT_PATH"]
        except KeyError:
            # we cannot check
            return True

        try:
            sanitised_join(output_path, self.save_as)
        except RuntimeError:  # outside output_dir
            return False

        return True
    def write_feed(self,
                   elements,
                   context,
                   path=None,
                   url=None,
                   feed_type='atom',
                   override_output=False,
                   feed_title=None):
        """Generate a feed with the list of articles provided

        Return the feed. If no path or output_path is specified, just
        return the feed object.

        :param elements: the articles to put on the feed.
        :param context: the context to get the feed metadata.
        :param path: the path to output.
        :param url: the publicly visible feed URL; if None, path is used
            instead
        :param feed_type: the feed type to use (atom or rss)
        :param override_output: boolean telling if we can override previous
            output with the same name (and if next files written with the same
            name should be skipped to keep that one)
        :param feed_title: the title of the feed.o
        """
        if not is_selected_for_writing(self.settings, path):
            return

        self.site_url = context.get('SITEURL',
                                    path_to_url(get_relative_path(path)))

        self.feed_domain = context.get('FEED_DOMAIN')
        self.feed_url = self.urljoiner(self.feed_domain, url if url else path)

        feed = self._create_new_feed(feed_type, feed_title, context)

        max_items = len(elements)
        if self.settings['FEED_MAX_ITEMS']:
            max_items = min(self.settings['FEED_MAX_ITEMS'], max_items)
        for i in range(max_items):
            self._add_item_to_the_feed(feed, elements[i])

        signals.feed_generated.send(context, feed=feed)
        if path:
            complete_path = sanitised_join(self.output_path, path)

            try:
                os.makedirs(os.path.dirname(complete_path))
            except Exception:
                pass

            encoding = 'utf-8' if six.PY3 else None
            with self._open_w(complete_path, encoding, override_output) as fp:
                feed.write(fp, 'utf-8')
                logger.info('Writing %s', complete_path)

            signals.feed_written.send(complete_path,
                                      context=context,
                                      feed=feed)
        return feed
Exemple #8
0
 def test_pass_deep_subpaths(self):
     self.assertEqual(
         utils.sanitised_join(
             "/foo/bar",
             "test"
         ),
         os.path.join("/foo/bar", "test")
     )
Exemple #9
0
 def test_pass_deep_subpaths(self):
     self.assertEqual(
         utils.sanitised_join(
             "/foo/bar",
             "test"
         ),
         os.path.join("/foo/bar", "test")
     )
Exemple #10
0
    def _has_valid_save_as(self):
        """Return true if save_as doesn't write outside output path, false
        otherwise."""
        try:
            output_path = self.settings["OUTPUT_PATH"]
        except KeyError:
            # we cannot check
            return True

        try:
            sanitised_join(output_path, self.save_as)
        except RuntimeError:  # outside output_dir
            logger.error(
                "Skipping %s: file %r would be written outside output path",
                self,
                self.save_as,
            )
            return False

        return True
Exemple #11
0
    def _has_valid_save_as(self):
        """Return true if save_as doesn't write outside output path, false
        otherwise."""
        try:
            output_path = self.settings["OUTPUT_PATH"]
        except KeyError:
            # we cannot check
            return True

        try:
            sanitised_join(output_path, self.save_as)
        except RuntimeError:  # outside output_dir
            logger.error(
                "Skipping %s: file %r would be written outside output path",
                self,
                self.save_as,
            )
            return False

        return True
Exemple #12
0
    def write_feed(self, elements, context, path=None, url=None,
                   feed_type='atom', override_output=False, feed_title=None):
        """Generate a feed with the list of articles provided

        Return the feed. If no path or output_path is specified, just
        return the feed object.

        :param elements: the articles to put on the feed.
        :param context: the context to get the feed metadata.
        :param path: the path to output.
        :param url: the publicly visible feed URL; if None, path is used
            instead
        :param feed_type: the feed type to use (atom or rss)
        :param override_output: boolean telling if we can override previous
            output with the same name (and if next files written with the same
            name should be skipped to keep that one)
        :param feed_title: the title of the feed.o
        """
        if not is_selected_for_writing(self.settings, path):
            return

        self.site_url = context.get(
            'SITEURL', path_to_url(get_relative_path(path)))

        self.feed_domain = context.get('FEED_DOMAIN')
        self.feed_url = self.urljoiner(self.feed_domain, url if url else path)

        feed = self._create_new_feed(feed_type, feed_title, context)

        max_items = len(elements)
        if self.settings['FEED_MAX_ITEMS']:
            max_items = min(self.settings['FEED_MAX_ITEMS'], max_items)
        for i in range(max_items):
            self._add_item_to_the_feed(feed, elements[i])

        signals.feed_generated.send(context, feed=feed)
        if path:
            complete_path = sanitised_join(self.output_path, path)

            try:
                os.makedirs(os.path.dirname(complete_path))
            except Exception:
                pass

            encoding = 'utf-8' if six.PY3 else None
            with self._open_w(complete_path, encoding, override_output) as fp:
                feed.write(fp, 'utf-8')
                logger.info('Writing %s', complete_path)

            signals.feed_written.send(
                complete_path, context=context, feed=feed)
        return feed
Exemple #13
0
        def _write_file(output_path, name, override):
            """Write the js file."""

            # output = 'var documents = ' + json.dumps(context['index_data'], indent=2) + ";"
            path = sanitised_join(output_path, name)

            dir = os.path.dirname(path)
            if not os.path.exists(dir):
                os.makedirs(dir)

            idx = lunr(
                ref="ref",
                fields=[
                    {
                        "field_name": "title",
                        "boost": 10
                    },
                    {
                        "field_name": "summary",
                        "boost": 2
                    },
                    "body",
                    "tags",
                ],
                documents=context["index_data"],
            )

            with self._open_w(path, "utf-8", override=override) as handle:
                handle.write("const lunrSerializedIdx = ")
                handle.write(json.dumps(idx.serialize()))
                handle.write(";\n")
                handle.write("const lunrDocuments = ")

                def remove_body():
                    for doc in context["index_data"]:
                        cleaned_doc = dict(doc)
                        del cleaned_doc["body"]
                        yield cleaned_doc

                handle.write(json.dumps([doc for doc in remove_body()]))
                handle.write(";\n")
            logger.info("Writing %s", path)

            # Send a signal to say we're writing a file with some specific
            # local context.
            signals.content_written.send(path, context=context)
Exemple #14
0
        def _write_file(template, localcontext, output_path, name, override):
            """Render the template write the file."""
            # set localsiteurl for context so that Contents can adjust links
            if localcontext['localsiteurl']:
                context['localsiteurl'] = localcontext['localsiteurl']
            output = template.render(localcontext)
            path = sanitised_join(output_path, name)

            try:
                os.makedirs(os.path.dirname(path))
            except Exception:
                pass

            with self._open_w(path, 'utf-8', override=override) as f:
                f.write(output)
            logger.info('Writing %s', path)

            # Send a signal to say we're writing a file with some specific
            # local context.
            signals.content_written.send(path, context=localcontext)
Exemple #15
0
        def _write_file(template, localcontext, output_path, name, override):
            """Render the template write the file."""
            # set localsiteurl for context so that Contents can adjust links
            if localcontext['localsiteurl']:
                context['localsiteurl'] = localcontext['localsiteurl']
            output = template.render(localcontext)
            path = sanitised_join(output_path, name)

            try:
                os.makedirs(os.path.dirname(path))
            except Exception:
                pass

            with self._open_w(path, 'utf-8', override=override) as f:
                f.write(output)
            logger.info('Writing %s', path)

            # Send a signal to say we're writing a file with some specific
            # local context.
            signals.content_written.send(path, context=localcontext)
Exemple #16
0
def add_readable_works(generator):
    if isinstance(generator, PersonGenerator):
        for person in generator.people:
            if 'work' in person.metadata:
                for work in person.metadata['work']:
                    for copy in work['copies'].values():
                        if 'provider' in copy and 'data' in copy:
                            if copy['provider'][
                                    'value'] == 'https://textgridrep.org':
                                content = fetch_cached_work(
                                    copy['data']['value'])
                                if content:
                                    doc = etree.XML(content)
                                    doc = DTA_STYLESHEET(
                                        transform(
                                            TG_EXTRACT_TEXT_STYLESHEET(doc)))
                                    determine_tags(
                                        doc.xpath('/tei:TEI/tei:text',
                                                  namespaces=NS)[0])
                                    hash = sha256()
                                    hash.update(work['title'].encode('utf-8'))
                                    hash.update(b'$$')
                                    hash.update(
                                        copy['data']['value'].encode('utf-8'))
                                    copy['read_url'] = {
                                        'value':
                                        '{0}/{1}.tei'.format(
                                            person.url[:-5], hash.hexdigest()),
                                        'label':
                                        None
                                    }

                                    target_dir = sanitised_join(
                                        generator.settings['OUTPUT_PATH'],
                                        person.url[:-5])
                                    os.makedirs(target_dir, exist_ok=True)
                                    with open(
                                            sanitised_join(
                                                target_dir, '{0}.tei'.format(
                                                    hash.hexdigest())),
                                            'wb') as out_f:
                                        out_f.write(
                                            etree.tostring(doc,
                                                           pretty_print=True))
                            elif copy['provider'][
                                    'label'] == 'Deutsches Textarchiv':
                                content = fetch_cached_work(
                                    copy['data']['value'])
                                if content:
                                    doc = etree.XML(content)
                                    doc = DTA_STYLESHEET(transform(doc))
                                    determine_tags(
                                        doc.xpath('/tei:TEI/tei:text',
                                                  namespaces=NS)[0])
                                    hash = sha256()
                                    hash.update(work['title'].encode('utf-8'))
                                    hash.update(b'$$')
                                    hash.update(
                                        copy['data']['value'].encode('utf-8'))
                                    copy['read_url'] = {
                                        'value':
                                        '{0}/{1}.tei'.format(
                                            person.url[:-5], hash.hexdigest()),
                                        'label':
                                        None
                                    }

                                    target_dir = sanitised_join(
                                        generator.settings['OUTPUT_PATH'],
                                        person.url[:-5])
                                    os.makedirs(target_dir, exist_ok=True)
                                    with open(
                                            sanitised_join(
                                                target_dir, '{0}.tei'.format(
                                                    hash.hexdigest())),
                                            'wb') as out_f:
                                        out_f.write(
                                            etree.tostring(doc,
                                                           pretty_print=True))
    tmp = list(TAGS - KNOWN_TAGS - IGNORED_TAGS)
    tmp.sort()
    for tag in tmp:
        print(tag)
Exemple #17
0
def get_css_names(generator):
    """
    Gets list of specified CSS files to be generated by lesscpy

    Configuration
    -------------
    generator.settings['LESS_INTEGRITY']:
    List

    generator.settings['LESS_CSS_FILES']:
    Dictionary with keys indicating output files
    values should be a tuple (input,output)

    Output
    ------
    generator.context['compiled_css']:
    Dictionary with keys corresponding to the configurations keys
    and values are filepaths relative to the output root
    """

    if "LESS_CSS_FILES" not in generator.settings:
        return

    hashes = generator.settings.get("LESS_INTEGRITY", [])
    versioned = generator.settings.get("VERSIONED_CSS", False)

    compiled_files = {}
    logger.info("Filling environment with lesscpy output file names")

    for key, value in generator.settings["LESS_CSS_FILES"].items():
        input_rel, output_rel = value
        hash_vals = []

        ver_string = ""

        logger.info(
            "Generating data for css file key={key}: ({input_rel})".format(
                key=key, input_rel=input_rel
            )
        )

        if hashes or versioned:
            try:
                input_path = sanitised_join(os.getcwd(), input_rel)
            except RuntimeError:
                logger.error(
                    "Skipping: file %r would be written outside output path", input_rel,
                )
                continue
            tmpio = io.StringIO()
            compile_css_file(input_path, tmpio)

        if versioned:
            ver_string = (
                "?" + hashlib.sha256(tmpio.getvalue().encode("utf-8")).hexdigest()[:6]
            )

        for h in hashes:
            if h not in hash_funcs:
                logger.error("Skipping generation of unknown hash %s", h)
                continue
            hash_vals.append(
                "{h}-{dgst}".format(
                    h=h,
                    dgst=base64.b64encode(
                        hash_funcs[h](tmpio.getvalue().encode("utf-8")).digest()
                    ).decode(),
                )
            )
        tmpio.close()

        compiled_files[key] = {
            "css_file": output_rel + ver_string,
            "integrity": " ".join(hash_vals),
        }

    generator.context["compiled_css"] = compiled_files