def process(self, unused, site, config): plugin_name = "search" js_filename = "search.js" json_filename = "search.json" output_path_js = config.output_path_js output_path_json = config.output_path_json num_terms = config.num_terms_per_post # log_info = "base javascript: %s<br>ouput:%s%s<br>" % ( # js_filename, output_path, js_filename) log_info = "" # Reading the base JS plugin_dir = os.path.dirname(__file__) json_file = os.path.join(plugin_dir, json_filename) jsondata = files.read_file(json_file) jsdata = files.read_file(os.path.join(plugin_dir, js_filename)) # if not js or len(js) < 10: # err = "Base Javascript:%s not found or too small." % js_file # return (SiteFab.ERROR, plugin_name, err) js_posts = {} table_data = [] for post in site.posts: terms = [t[0] for t in post.nlp.terms][:num_terms] js_post = { "id": post.id, "template": post.meta.template, "title": post.nlp.clean_fields.title, "authors": post.nlp.clean_fields.authors, "conference": "%s %s" % (post.nlp.clean_fields.conference_short_name, # noqa post.nlp.clean_fields.conference_name), # noqa "terms": terms } js_posts[post.id] = js_post table_data.append([js_post['title'], js_post['terms']]) log_info += tabulate(table_data, tablefmt='html', headers=['title', 'terms']) # output output_string = json.dumps(js_posts) jsondata = jsondata.replace("SEARCH_DOC_PLUGIN_REPLACE", output_string) path_json = os.path.join(site.get_output_dir(), output_path_json) files.write_file(path_json, json_filename, jsondata) path_js = os.path.join(site.get_output_dir(), output_path_js) files.write_file(path_js, js_filename, jsdata) return (SiteFab.OK, plugin_name, log_info)
def test_basic_file_flow(tmp_path): """testing all at once as it is hard to do in smaller chunk write -> read -> list -> clean -> list again """ fname = 'a.txt' content = 'this is a test' path = Path(tmp_path) / fname # write write_file(tmp_path, fname, content) assert path.exists() # read read_back = read_file(path) assert read_back == content # list files_list = get_files_list(tmp_path, '*.txt') assert path in files_list # clean clean_dir(tmp_path) assert not path.exists() # filelist again files_list = get_files_list(path, '*.txt') assert not files_list # must be empty
def test_parsing_basic_md(sitefab): parser = Parser(sitefab.config.parser, sitefab) fn = TEST_ROOT_DIR / 'data/basic.md' md = read_file(fn) post = parser.parse(md) # frontmatter assert 'Elie, Bursztein' in post.meta.authors assert post.meta.title == 'Test' # html assert 'this is a test.' in post.html assert '<h1 id="toc-0">heading 1</h1>' in post.html assert '<h2 id="toc-1">heading 2</h2>' in post.html assert '<strong>bold test</strong>' in post.html # text assert 'this is a test' in post.text assert '<' not in post.text # toc # format: name, heading type: {1,2,3,4...}, id assert post.meta.toc[0] == ('heading 1', 1, 0) assert post.meta.toc[1] == ('heading 2', 2, 1) # statistics assert post.meta.statistics.num_videos == 1 assert post.meta.statistics.num_images == 1
def test_parsing_real_md(sitefab): parser = Parser(sitefab.config.parser, sitefab) fn = sitefab.config.root_dir / 'content/posts/18-4-of-us-internet-users-got-at-least-one-of-their-account-compromised.md' # noqa md = read_file(fn) post = parser.parse(md) assert 'internet' in post.html assert 'Elie' in post.meta.authors[0] assert post.meta.statistics.num_images == 2
def process(self, unused, site, config): plugin_name = "js_posts" js_filename = "posts.json" # configuration output_path = config.output_path meta_fields_to_output = config.meta_fields_to_output plugin_data_to_output = config.plugin_data_to_output log_info = "base javascript: %s<br>ouput:%s%s<br>" % ( js_filename, output_path, js_filename) log_info = "meta fields to outputs:%s" % ( ", ".join(meta_fields_to_output)) # Reading the base JS plugin_dir = os.path.dirname(__file__) js_file = os.path.join(plugin_dir, js_filename) js = files.read_file(js_file) if not js: return (SiteFab.ERROR, plugin_name, "Base Javascript:%s not found." % js_file) js_posts = {} for post in site.posts: js_post = {} js_post['id'] = post.id for field in meta_fields_to_output: if field in post.meta: if field == "permanent_url": data = "%s/" % post.meta[field] js_post[field] = data else: js_post[field] = post.meta[field] if 'responsive_banner' in plugin_data_to_output and 'responsive_images' in site.plugin_data: if post.meta.banner in site.plugin_data['responsive_images']: js_post['banner_srcsets'] = site.plugin_data[ 'responsive_images'][post.meta.banner]['srcsets'] js_post['banner_allsizes'] = site.plugin_data[ 'responsive_images'][post.meta.banner]['allsizes'] if 'thumbnail_banner' in plugin_data_to_output and 'thumbnails' in site.plugin_data: if post.meta.banner in site.plugin_data['thumbnails']: js_post['thumbnails'] = site.plugin_data['thumbnails'][ post.meta.banner] js_posts[post.id] = js_post # replacing placeholder with post data output_string = json.dumps(js_posts) log_info += "output string:<br>%s" % output_string js = str(js) js = js.replace("JS_POSTS_PLUGIN_REPLACE", output_string) # output path = os.path.join(site.get_output_dir(), output_path) log_info += "output directory: %s" % path files.write_file(path, js_filename, js) return (SiteFab.OK, plugin_name, log_info)
def __init__(self, config): current_dir = Path(__file__).parent test_file = current_dir / 'tests.yaml' self.test_info = files.load_config(test_file) if not self.test_info: utils.error("Can't load linter tests") self.config = config self.results = {} template_content = files.read_file(self.config.report_template_file) self.jinja2_template = Template(str(template_content))
def test_unicode_support(tmp_path): fname = '😁.txt' content = 'this is a test 😁' path = Path(tmp_path) / fname # write write_file(tmp_path, fname, content) assert path.exists() # read read_back = read_file(path) assert read_back == content # cleanup clean_dir(tmp_path)
def test_filelist_recursive(tmp_path): clean_dir(tmp_path) sub_path = tmp_path / 'newdir' fname = 'test.md' txt = 'hello' write_file(sub_path, 'test.md', txt) # ensure we have a sub dir and the file in it assert sub_path.exists() assert sub_path.is_dir() read_text = read_file(sub_path / fname) assert read_text == txt # test we get a recursive listing assert len(get_files_list(tmp_path)) == 1 # test that non recursive returns nothing assert get_files_list(tmp_path, recursive=False) == []
def test_writing_subdirectory(tmp_path): "make sure sub directory is created" fname = 'myfile.md' content = 'this is a test 😁' subdir = 'tobecreated' subdir_path = Path(tmp_path) / subdir path = subdir_path / fname # write write_file(subdir_path, fname, content) assert path.exists() assert subdir_path.is_dir() # read read_back = read_file(path) assert read_back == content # cleanup clean_dir(tmp_path) assert not path.exists() assert not subdir_path.exists()
def make_config(config): """ Initialize a parser config with all the needed variables Args: config (obj_dict): the uninitialized configuration with basic variables Returns: obj_dict: The initialized configuration """ if not config: utils.detailed_error("Parser", 'make_config', 'supplied config is empty') if 'template_dir' not in config: utils.detailed_error("Parser", 'make_config', 'template_dir not found') config.templates = {} for fname in files.get_files_list(config.templates_path, "*.html"): template = files.read_file(fname) config.templates[fname.stem] = template return config
def test_non_existing_read_file(): assert read_file('asjkajlkajlksal32312ewdas3') == ""
def parse(self): "parse md content into post objects" self.cnts.start('Parsing') filenames = self.filenames.posts self.posts = [] # collections creation min_posts = self.config.collections.min_posts # posts_by_tag is what is rendered: it contains for as given post both # its tags and its category tlp = self.jinja2.get_template(self.config.collections.template) path = self.get_output_dir() / self.config.collections.output_dir self.posts_by_tag = PostCollections( site=self, template=tlp, output_path=path, web_path=self.config.collections.output_dir, min_posts=min_posts) self.posts_by_category = PostCollections( site=self, web_path=self.config.collections.output_dir) self.posts_by_template = PostCollections(site=self) self.posts_by_microdata = PostCollections(site=self) # Parsing cprint("\nParsing posts", "magenta") progress_bar = tqdm(total=len(filenames), unit=' files', desc="Files", leave=True) errors = [] post_idx = 1 threads = self.config.threads # NOTE: passing self.config.parser might seems strange # but it is required to get the pluging workings parser = Parser(self.config.parser, self) if threads > 1: todo_nlp_posts = [] for filename in filenames: file_content = files.read_file(filename) post = parser.parse(file_content) post.filename = str(filename) post.id = post_idx post_idx += 1 todo_nlp_posts.append(json.dumps(post)) pool = Pool(threads) for parsed_post_json in pool.imap_unordered( parse_post, todo_nlp_posts): parsed_post = json.loads(parsed_post_json) post = utils.dict_to_objdict(parsed_post) self.process_post(post) progress_bar.update(1) pool.close() pool.join() else: for filename in filenames: file_content = files.read_file(filename) post = parser.parse(file_content) post.filename = str(filename) post.id = post_idx parsed_post_json = parse_post(json.dumps(post)) parsed_post = json.loads(parsed_post_json) self.process_post(utils.dict_to_objdict(parsed_post)) progress_bar.update(1) post_idx += 1 progress_bar.close() if len(errors): utils.error("\n".join(errors)) self.cnts.stop('Parsing')
def process(self, unused, site, config): plugin_name = "autocomplete" json_filename = "autocomplete.json" js_filename = "autocomplete.js" # configuration output_path_js = config.output_path_js output_path_json = config.output_path_json num_suggestions = config.num_suggestions excluded_terms = config.excluded_terms log_info = "" # log_info = "base javascript: %s<br>ouput:%s%s" % ( # js_filename, output_path_js, js_filename) # Reading the base JS plugin_dir = Path(__file__).parent json_file = plugin_dir / json_filename jsondata = files.read_file(json_file) js_file = plugin_dir / js_filename jsdata = files.read_file(js_file) term_post_frequency = defaultdict(int) term_score = defaultdict(float) for post in site.posts: # authors for author in post.nlp.clean_fields.authors: term_post_frequency[author] += 1 term_score[author] += 1 # ensure authors always first for part in author.split(' '): if len(part) < 2: continue term_post_frequency[part] += 1 term_score[part] += 1 # title terms for term in post.nlp.title_terms: if term in excluded_terms: continue term_post_frequency[term[0]] += 1 term_score[term[0]] += term[1] * 2 # other terms for term in post.nlp.terms: if term in excluded_terms: continue term_post_frequency[term[0]] += 1 term_score[term[0]] += term[1] output = [] log_info += "num of terms considered: %s<br>" % len(term_score) top_terms = sorted(term_score, key=term_score.get, reverse=True) for term in top_terms[:num_suggestions]: score = term_score[term] post_frequency = term_post_frequency[term] output.append([term, post_frequency, score]) # log results log_info += tabulate(output, headers=['term', 'post frequency', 'score'], tablefmt='html') # replacing placeholder with computation result output_string = json.dumps(output) jsondata = jsondata.replace("AUTOCOMPLETE_PLUGIN_REPLACE", output_string) # output path_js = site.get_output_dir() / output_path_js path_json = site.get_output_dir() / output_path_json log_info += "<br> output directory for js: %s" % path_js log_info += "<br> output directory for json: %s" % path_json # write js data file files.write_file(path_js, js_filename, jsdata) # write code file files.write_file(path_json, json_filename, jsondata) return (SiteFab.OK, plugin_name, log_info)
def process(self, unused, site, config): template_name = config.template config.banner = "%s%s" % (site.config.url, config.banner) config.icon = "%s%s" % (site.config.url, config.icon) config.logo_svg = "%s%s" % (site.config.url, config.logo_svg) # rendering template if template_name not in site.jinja2.list_templates(): return SiteFab.ERROR, "rss", ("template %s not found" % template_name) template = site.jinja2.get_template(str(template_name)) # custom parser parser_tpl_path = Path(config.parser.template_dir) config.parser.templates_path = (site.config.root_dir / parser_tpl_path) config.parser = Parser.make_config(config.parser) parser = Parser(config.parser, site) # generating feed rss_items = [] count = 0 posts = [] for post in site.posts: posts.append(post) # sort posts from newer to older def k(x): return x.meta.creation_date_ts posts.sort(key=k, reverse=True) for post in posts: if (post.meta.hidden or ((post.meta.microdata_type != "BlogPosting") # noqa and (post.meta.microdata_type != "ScholarlyArticle") # noqa and (post.meta.microdata_type != "PublicationEvent"))): # noqa continue # parse the post with a customized parser file_content = files.read_file(post.filename) parsed_post = parser.parse(file_content) # adding the newly generated HTML as RSS post.rss = parsed_post.html formatted_rss_creation_date = datetime.fromtimestamp( int(post.meta.creation_date_ts)).strftime( '%a, %d %b %Y %H:%M:%S -0800') if post.meta.update_date_ts: formatted_rss_update_date = datetime.fromtimestamp( int(post.meta.update_date_ts)).strftime( '%a, %d %b %Y %H:%M:%S -0800') else: formatted_rss_update_date = formatted_rss_creation_date post.meta.formatted_creation = formatted_rss_creation_date post.meta.formatted_update = formatted_rss_update_date # size of image if post.meta.banner in site.plugin_data['image_info']: post.meta.banner_size = site.plugin_data['image_info'][ post.meta.banner]['file_size'] post.meta.banner_mimetype = site.plugin_data['image_info'][ post.meta.banner]['mime_type'] post.meta.banner_fullurl = "%s%s" % (site.config.url, post.meta.banner) post.meta.author = post.meta.authors[0].replace(",", "") rss_items.append(post) count += 1 if count == config.num_posts: break if not len(rss_items): return (SiteFab.ERROR, "rss", 'no RSS items') config.formatted_update = rss_items[0].meta.formatted_update try: rv = template.render(site=site, rss=config, items=rss_items) except Exception as e: return (SiteFab.ERROR, "rss", e) # output path = site.get_output_dir() files.write_file(path, 'rss.xml', rv) log_info = "template used:%s<br>ouput:%srss.xml" % (template_name, path) return SiteFab.OK, "rss", log_info