def compute_stats(doc): ts = TextStats(doc) stats = create_objdict() counts = {'sentences': ts.n_sents, 'words': ts.n_words, 'unique_words': ts.n_unique_words, 'chars': ts.n_chars, 'chars_per_word': ts.n_chars_per_word, 'long_words': ts.n_long_words, 'syllables': ts.n_syllables, 'syllables_per_word': ts.n_syllables_per_word, 'monosyllable_words': ts.n_monosyllable_words, 'polysyllable_words': ts.n_polysyllable_words } stats.counts = dict_to_objdict(counts) readability = {} if stats.counts.words > 0: readability = {'flesch_kincaid_grade_level': ts.flesch_kincaid_grade_level, 'flesch_reading_ease': ts.flesch_reading_ease, 'smog_index': 0, 'gunning_fog_index': ts.gunning_fog_index, 'coleman_liau_index': ts.coleman_liau_index, 'automated_readability_index': ts.automated_readability_index, 'lix': ts.lix, } if stats.counts.sentences >= 30: readability['smog_index'] = ts.smog_index stats.readability = dict_to_objdict(readability) return stats
def parse(post): """ Get a post content and extract frontmatter data if exist Args: post (str): post to parse Returns list: [meta data, md] note: all sanity check must be done via the linter and used in linter.validate() """ md = post d = frontmatter_matcher.search(post) if d: frontmatter = d.group(1) md = md.replace(frontmatter, "") frontmatter = frontmatter.replace("---", '') try: m = yaml.load(frontmatter, Loader=yaml.SafeLoader) # using YAML :) except yaml.YAMLError as ye: print(ye) m = None if type(m) != dict: meta_data = None else: meta_data = parse_fields(m) meta = utils.dict_to_objdict(meta_data) return [meta, md]
def parse(self, md_file): """ Parse a md file into a post object """ # compile the templates when we parse the first post. This is needed # to ensure that plugins get a chance to modify the templates before # we compile them. if not self.jinja2: self.jinja2 = jinja2.Environment(loader=jinja2.DictLoader( self.templates)) parsed_post = utils.dict_to_objdict() # parsing frontmatter and getting the md parsed_post.meta, parsed_post.md = frontmatter.parse(md_file) # parsing markdown and extractring info # NOTE: this must called before every parsing self.renderer.init(self.jinja2, self.code_formatter, self.site, parsed_post.meta) parsed_post.html = self.md_parser.parse(parsed_post.md) parsed_post.text = html2text(parsed_post.html) # used by NLP parsed_post.meta.statistics = self.renderer.get_stats() parsed_post.meta.toc = self.renderer.get_json_toc() parsed_post.elements = self.renderer.get_info() return parsed_post
def init(self, jinja2, code_formatter, site, meta): """Init function called before each parsing. Args: jinja2 (dict): jinja2 templates used for rendering code_formatter (dict): code syntax highlight configuration site (obj_dict): the full site context (SiteFab object) meta (obj_dict): the meta associated with the post Return: None Note: Used to ensure all the needed variables are reset between parsing executions. """ # reset toc self.toc_tree = [] self.toc_count = 0 self.jinja2 = jinja2 self.code_formatter = code_formatter self.plugin_data = site.plugin_data self.site = site self.meta = meta # Various information collected during the parsing self.info = utils.dict_to_objdict({ "links": [], "images": [], "videos": [], "code": [] }) self.stats = utils.dict_to_objdict({ "num_links": 0, "num_images": 0, "num_videos": 0, "num_code": 0, })
def myobjdict(): d = {'a': 'test', 'b': {"c": '2nd'}} return utils.dict_to_objdict(d)
def parse_post(json_post): post_dict = json.loads(json_post) post = utils.dict_to_objdict(post_dict) post.nlp = nlp.analyze_post(post) return json.dumps(post)
def parse(self): "parse md content into post objects" self.cnts.start('Parsing') filenames = self.filenames.posts self.posts = [] # collections creation min_posts = self.config.collections.min_posts # posts_by_tag is what is rendered: it contains for as given post both # its tags and its category tlp = self.jinja2.get_template(self.config.collections.template) path = self.get_output_dir() / self.config.collections.output_dir self.posts_by_tag = PostCollections( site=self, template=tlp, output_path=path, web_path=self.config.collections.output_dir, min_posts=min_posts) self.posts_by_category = PostCollections( site=self, web_path=self.config.collections.output_dir) self.posts_by_template = PostCollections(site=self) self.posts_by_microdata = PostCollections(site=self) # Parsing cprint("\nParsing posts", "magenta") progress_bar = tqdm(total=len(filenames), unit=' files', desc="Files", leave=True) errors = [] post_idx = 1 threads = self.config.threads # NOTE: passing self.config.parser might seems strange # but it is required to get the pluging workings parser = Parser(self.config.parser, self) if threads > 1: todo_nlp_posts = [] for filename in filenames: file_content = files.read_file(filename) post = parser.parse(file_content) post.filename = str(filename) post.id = post_idx post_idx += 1 todo_nlp_posts.append(json.dumps(post)) pool = Pool(threads) for parsed_post_json in pool.imap_unordered( parse_post, todo_nlp_posts): parsed_post = json.loads(parsed_post_json) post = utils.dict_to_objdict(parsed_post) self.process_post(post) progress_bar.update(1) pool.close() pool.join() else: for filename in filenames: file_content = files.read_file(filename) post = parser.parse(file_content) post.filename = str(filename) post.id = post_idx parsed_post_json = parse_post(json.dumps(post)) parsed_post = json.loads(parsed_post_json) self.process_post(utils.dict_to_objdict(parsed_post)) progress_bar.update(1) post_idx += 1 progress_bar.close() if len(errors): utils.error("\n".join(errors)) self.cnts.stop('Parsing')
def test_creation(myobjdict): d = {'a': 'test', 'b': {"c": '2nd'}} p = utils.dict_to_objdict(d) assert d['a'] == p.a assert d['b']['c'] == p.b.c
def test_dict_to_objdict(): dic = {"str": "str", "int": 1, "array": [1, 2, 3]} objdict = utils.dict_to_objdict(dic) assert objdict.str == "str" assert objdict.int == 1 assert isinstance(objdict.array, type([]))
def run_plugins(self, items, plugin_class, unit, site): """Execute a set of plugins on a given list of items :param list items: list of items to process :param str plugin_type: the plugin_class to use :param str unit: the unit to use in the display :param SiteFab site: pointer to the site object to be passed to the plugins :rtype: dict(dict(list)) :return: plugins execution statistics """ # dependencies map dependencie_map = {} # used to get back from the module name to the plugin module_name_to_plugin = {} plugins = self.plugins.getPluginsOfCategory(plugin_class) # collecting plugins that are to be executed. for plugin in plugins: if self.is_plugin_enabled(plugin): module_name = self.get_plugin_module_name(plugin) module_name_to_plugin[module_name] = plugin # dependencies computation. # Due to potential dependencies on plugins from previous stage # this must be computed after collecting which # plugins were executed. for plugin in module_name_to_plugin.values(): all_dependencies = self.get_plugin_dependencies(plugin) dependencies = set() # topological sort requires use of set module_name = self.get_plugin_module_name(plugin) for dep_module_name in all_dependencies: if dep_module_name not in self.plugins_enabled: utils.error("Plugin:%s can't be executed because\ plugin %s is not enable" % (module_name, dep_module_name)) # only add to the dependencies map the plugins # that are from the same stage if dep_module_name in module_name_to_plugin: dependencies.add(dep_module_name) else: # check if already executed if dep_module_name not in self.plugins_executed: utils.error("Plugin:%s can't be executed because\ plugin %s was not executed in previous\ stage" % (module_name, dep_module_name)) dependencie_map[module_name] = dependencies # print dependencie_map # Topological sorting try: plugins_to_process = toposort_flatten(dependencie_map) except Exception as e: utils.error("Circular dependencies between plugins.\ Can't execute plugins:%s" % e) s = "|-%s plugins" % (unit.strip().capitalize()) desc = colored(s, "magenta") results = [] for module_name in tqdm(plugins_to_process, unit=' plugin', desc=desc, leave=True): if module_name in module_name_to_plugin: plugin = module_name_to_plugin[module_name] else: raise Exception("The following plugin module name listed in\ dependencies don't exist % s " % module_name) pclass = plugin_class.lower() filename = "%s.%s.html" % (pclass, module_name) log_id = site.logger.create_log(pclass, plugin.name, filename) plugin_results = utils.dict_to_objdict({ site.OK: 0, site.SKIPPED: 0, site.ERROR: 0 }) config = self.get_plugin_config(plugin) for item in tqdm(items, unit=unit, desc=plugin.name, leave=False): result = plugin.plugin_object.process(item, site, config) plugin_results[result[0]] += 1 severity = result[0] name = result[1] details = result[2] site.logger.record_event(log_id, name, severity, details) self.plugins_executed[module_name] = True results.append([plugin.name, plugin_results]) site.logger.write_log(log_id) return results