Beispiel #1
0
def compute_stats(doc):
    ts = TextStats(doc)
    stats = create_objdict()
    counts = {'sentences': ts.n_sents,
              'words': ts.n_words,
              'unique_words': ts.n_unique_words,
              'chars': ts.n_chars,
              'chars_per_word': ts.n_chars_per_word,
              'long_words': ts.n_long_words,
              'syllables': ts.n_syllables,
              'syllables_per_word': ts.n_syllables_per_word,
              'monosyllable_words': ts.n_monosyllable_words,
              'polysyllable_words': ts.n_polysyllable_words
              }
    stats.counts = dict_to_objdict(counts)
    readability = {}
    if stats.counts.words > 0:
        readability = {'flesch_kincaid_grade_level': ts.flesch_kincaid_grade_level,
                       'flesch_reading_ease': ts.flesch_reading_ease,
                       'smog_index': 0,
                       'gunning_fog_index': ts.gunning_fog_index,
                       'coleman_liau_index': ts.coleman_liau_index,
                       'automated_readability_index': ts.automated_readability_index,
                       'lix': ts.lix,
                       }
    if stats.counts.sentences >= 30:
        readability['smog_index'] = ts.smog_index
    stats.readability = dict_to_objdict(readability)
    return stats
Beispiel #2
0
def parse(post):
    """ Get a post content and extract frontmatter data if exist

    Args:
        post (str): post to parse

    Returns
        list: [meta data, md]

    note: all sanity check must be done via the linter and
    used in linter.validate()
    """
    md = post
    d = frontmatter_matcher.search(post)
    if d:
        frontmatter = d.group(1)
        md = md.replace(frontmatter, "")
        frontmatter = frontmatter.replace("---", '')
        try:
            m = yaml.load(frontmatter, Loader=yaml.SafeLoader)  # using YAML :)
        except yaml.YAMLError as ye:
            print(ye)
            m = None

        if type(m) != dict:
            meta_data = None
        else:
            meta_data = parse_fields(m)
            meta = utils.dict_to_objdict(meta_data)

    return [meta, md]
Beispiel #3
0
    def parse(self, md_file):
        """ Parse a md file into a post object
        """

        # compile the templates when we parse the first post. This is needed
        # to ensure that plugins get a chance to modify the templates before
        # we compile them.
        if not self.jinja2:
            self.jinja2 = jinja2.Environment(loader=jinja2.DictLoader(
                                             self.templates))

        parsed_post = utils.dict_to_objdict()

        # parsing frontmatter and getting the md
        parsed_post.meta, parsed_post.md = frontmatter.parse(md_file)

        # parsing markdown and extractring info
        # NOTE: this must called before every parsing
        self.renderer.init(self.jinja2, self.code_formatter, self.site,
                           parsed_post.meta)

        parsed_post.html = self.md_parser.parse(parsed_post.md)
        parsed_post.text = html2text(parsed_post.html)  # used by NLP
        parsed_post.meta.statistics = self.renderer.get_stats()
        parsed_post.meta.toc = self.renderer.get_json_toc()
        parsed_post.elements = self.renderer.get_info()
        return parsed_post
Beispiel #4
0
    def init(self, jinja2, code_formatter, site, meta):
        """Init function called before each parsing.

        Args:
            jinja2 (dict): jinja2 templates used for rendering
            code_formatter (dict): code syntax highlight configuration
            site (obj_dict): the full site context (SiteFab object)
            meta (obj_dict): the meta associated with the post
        Return:
            None
        Note:
            Used to ensure all the needed variables are reset between parsing
            executions.
        """
        # reset toc
        self.toc_tree = []
        self.toc_count = 0
        self.jinja2 = jinja2
        self.code_formatter = code_formatter
        self.plugin_data = site.plugin_data
        self.site = site
        self.meta = meta

        # Various information collected during the parsing
        self.info = utils.dict_to_objdict({
            "links": [],
            "images": [],
            "videos": [],
            "code": []
        })

        self.stats = utils.dict_to_objdict({
            "num_links": 0,
            "num_images": 0,
            "num_videos": 0,
            "num_code": 0,
        })
Beispiel #5
0
def myobjdict():
    d = {'a': 'test', 'b': {"c": '2nd'}}
    return utils.dict_to_objdict(d)
Beispiel #6
0
def parse_post(json_post):
    post_dict = json.loads(json_post)
    post = utils.dict_to_objdict(post_dict)
    post.nlp = nlp.analyze_post(post)
    return json.dumps(post)
Beispiel #7
0
    def parse(self):
        "parse md content into post objects"
        self.cnts.start('Parsing')
        filenames = self.filenames.posts
        self.posts = []

        # collections creation
        min_posts = self.config.collections.min_posts

        # posts_by_tag is what is rendered: it contains for as given post both
        #  its tags and its category
        tlp = self.jinja2.get_template(self.config.collections.template)
        path = self.get_output_dir() / self.config.collections.output_dir

        self.posts_by_tag = PostCollections(
            site=self,
            template=tlp,
            output_path=path,
            web_path=self.config.collections.output_dir,
            min_posts=min_posts)

        self.posts_by_category = PostCollections(
            site=self, web_path=self.config.collections.output_dir)

        self.posts_by_template = PostCollections(site=self)

        self.posts_by_microdata = PostCollections(site=self)

        # Parsing
        cprint("\nParsing posts", "magenta")
        progress_bar = tqdm(total=len(filenames),
                            unit=' files',
                            desc="Files",
                            leave=True)
        errors = []
        post_idx = 1
        threads = self.config.threads
        # NOTE: passing self.config.parser might seems strange
        # but it is required to get the pluging workings
        parser = Parser(self.config.parser, self)

        if threads > 1:
            todo_nlp_posts = []
            for filename in filenames:
                file_content = files.read_file(filename)
                post = parser.parse(file_content)
                post.filename = str(filename)
                post.id = post_idx
                post_idx += 1
                todo_nlp_posts.append(json.dumps(post))
            pool = Pool(threads)
            for parsed_post_json in pool.imap_unordered(
                    parse_post, todo_nlp_posts):
                parsed_post = json.loads(parsed_post_json)
                post = utils.dict_to_objdict(parsed_post)
                self.process_post(post)
                progress_bar.update(1)
            pool.close()
            pool.join()
        else:
            for filename in filenames:
                file_content = files.read_file(filename)
                post = parser.parse(file_content)
                post.filename = str(filename)
                post.id = post_idx
                parsed_post_json = parse_post(json.dumps(post))
                parsed_post = json.loads(parsed_post_json)
                self.process_post(utils.dict_to_objdict(parsed_post))
                progress_bar.update(1)
                post_idx += 1

        progress_bar.close()
        if len(errors):
            utils.error("\n".join(errors))

        self.cnts.stop('Parsing')
Beispiel #8
0
def test_creation(myobjdict):
    d = {'a': 'test', 'b': {"c": '2nd'}}
    p = utils.dict_to_objdict(d)
    assert d['a'] == p.a
    assert d['b']['c'] == p.b.c
Beispiel #9
0
def test_dict_to_objdict():
    dic = {"str": "str", "int": 1, "array": [1, 2, 3]}
    objdict = utils.dict_to_objdict(dic)
    assert objdict.str == "str"
    assert objdict.int == 1
    assert isinstance(objdict.array, type([]))
Beispiel #10
0
    def run_plugins(self, items, plugin_class, unit, site):
        """Execute a set of plugins on a given list of items

        :param list items: list of items to process
        :param str plugin_type: the plugin_class to use
        :param str unit: the unit to use in the display
        :param SiteFab site: pointer to the site object to be passed
        to the plugins

        :rtype: dict(dict(list))
        :return: plugins execution statistics
        """

        # dependencies map
        dependencie_map = {}

        # used to get back from the module name to the plugin
        module_name_to_plugin = {}

        plugins = self.plugins.getPluginsOfCategory(plugin_class)

        # collecting plugins that are to be executed.
        for plugin in plugins:
            if self.is_plugin_enabled(plugin):
                module_name = self.get_plugin_module_name(plugin)
                module_name_to_plugin[module_name] = plugin

        # dependencies computation.
        # Due to  potential dependencies on plugins from previous stage
        # this must be computed after collecting which
        # plugins were executed.
        for plugin in module_name_to_plugin.values():
            all_dependencies = self.get_plugin_dependencies(plugin)
            dependencies = set()  # topological sort requires use of set
            module_name = self.get_plugin_module_name(plugin)

            for dep_module_name in all_dependencies:
                if dep_module_name not in self.plugins_enabled:
                    utils.error("Plugin:%s can't be executed because\
                                plugin %s is not enable" %
                                (module_name, dep_module_name))

                # only add to the dependencies map the plugins
                # that are from the same stage
                if dep_module_name in module_name_to_plugin:
                    dependencies.add(dep_module_name)
                else:
                    # check if already executed
                    if dep_module_name not in self.plugins_executed:
                        utils.error("Plugin:%s can't be executed because\
                                    plugin %s was not executed in previous\
                                    stage" % (module_name, dep_module_name))

            dependencie_map[module_name] = dependencies

        # print dependencie_map

        # Topological sorting
        try:
            plugins_to_process = toposort_flatten(dependencie_map)
        except Exception as e:
            utils.error("Circular dependencies between plugins.\
                Can't execute plugins:%s" % e)

        s = "|-%s plugins" % (unit.strip().capitalize())
        desc = colored(s, "magenta")
        results = []
        for module_name in tqdm(plugins_to_process,
                                unit=' plugin',
                                desc=desc,
                                leave=True):
            if module_name in module_name_to_plugin:
                plugin = module_name_to_plugin[module_name]
            else:
                raise Exception("The following plugin module name listed in\
                    dependencies don't exist % s " % module_name)

            pclass = plugin_class.lower()
            filename = "%s.%s.html" % (pclass, module_name)
            log_id = site.logger.create_log(pclass, plugin.name, filename)

            plugin_results = utils.dict_to_objdict({
                site.OK: 0,
                site.SKIPPED: 0,
                site.ERROR: 0
            })

            config = self.get_plugin_config(plugin)

            for item in tqdm(items, unit=unit, desc=plugin.name, leave=False):
                result = plugin.plugin_object.process(item, site, config)
                plugin_results[result[0]] += 1

                severity = result[0]
                name = result[1]
                details = result[2]
                site.logger.record_event(log_id, name, severity, details)

            self.plugins_executed[module_name] = True
            results.append([plugin.name, plugin_results])
            site.logger.write_log(log_id)
        return results