Esempio n. 1
0
    def process(self, unused, site, config):
        plugin_name = "search"
        js_filename = "search.js"
        json_filename = "search.json"
        output_path_js = config.output_path_js
        output_path_json = config.output_path_json
        num_terms = config.num_terms_per_post

        # log_info = "base javascript: %s<br>ouput:%s%s<br>" % (
        #     js_filename, output_path, js_filename)
        log_info = ""
        # Reading the base JS
        plugin_dir = os.path.dirname(__file__)
        json_file = os.path.join(plugin_dir, json_filename)
        jsondata = files.read_file(json_file)
        jsdata = files.read_file(os.path.join(plugin_dir, js_filename))
        # if not js or len(js) < 10:
        #     err = "Base Javascript:%s not found or too small." % js_file
        #     return (SiteFab.ERROR, plugin_name, err)

        js_posts = {}
        table_data = []
        for post in site.posts:
            terms = [t[0] for t in post.nlp.terms][:num_terms]

            js_post = {
                "id": post.id,
                "template": post.meta.template, 
                "title": post.nlp.clean_fields.title,
                "authors": post.nlp.clean_fields.authors,
                "conference": "%s %s" % (post.nlp.clean_fields.conference_short_name, # noqa
                                         post.nlp.clean_fields.conference_name),  # noqa
                "terms": terms
            }

            js_posts[post.id] = js_post
            table_data.append([js_post['title'], js_post['terms']])

        log_info += tabulate(table_data, tablefmt='html',
                             headers=['title', 'terms'])

        # output
        output_string = json.dumps(js_posts)
        jsondata = jsondata.replace("SEARCH_DOC_PLUGIN_REPLACE", output_string)
        path_json = os.path.join(site.get_output_dir(), output_path_json)
        files.write_file(path_json, json_filename, jsondata)

        path_js = os.path.join(site.get_output_dir(), output_path_js)
        files.write_file(path_js, js_filename, jsdata)
        return (SiteFab.OK, plugin_name, log_info)
Esempio n. 2
0
def test_basic_file_flow(tmp_path):
    """testing all at once as it is hard to do in smaller chunk
       write -> read -> list -> clean -> list again
    """
    fname = 'a.txt'
    content = 'this is a test'
    path = Path(tmp_path) / fname

    # write
    write_file(tmp_path, fname, content)
    assert path.exists()

    # read
    read_back = read_file(path)
    assert read_back == content

    # list
    files_list = get_files_list(tmp_path, '*.txt')
    assert path in files_list

    # clean
    clean_dir(tmp_path)
    assert not path.exists()

    # filelist again
    files_list = get_files_list(path, '*.txt')
    assert not files_list  # must be empty
Esempio n. 3
0
def test_parsing_basic_md(sitefab):
    parser = Parser(sitefab.config.parser, sitefab)
    fn = TEST_ROOT_DIR / 'data/basic.md'
    md = read_file(fn)
    post = parser.parse(md)

    # frontmatter
    assert 'Elie, Bursztein' in post.meta.authors
    assert post.meta.title == 'Test'

    # html
    assert 'this is a test.' in post.html
    assert '<h1 id="toc-0">heading 1</h1>' in post.html
    assert '<h2 id="toc-1">heading 2</h2>' in post.html
    assert '<strong>bold test</strong>' in post.html

    # text
    assert 'this is a test' in post.text
    assert '<' not in post.text

    # toc
    # format: name, heading type: {1,2,3,4...}, id
    assert post.meta.toc[0] == ('heading 1', 1, 0)
    assert post.meta.toc[1] == ('heading 2', 2, 1)

    # statistics
    assert post.meta.statistics.num_videos == 1
    assert post.meta.statistics.num_images == 1
Esempio n. 4
0
def test_parsing_real_md(sitefab):
    parser = Parser(sitefab.config.parser, sitefab)
    fn = sitefab.config.root_dir / 'content/posts/18-4-of-us-internet-users-got-at-least-one-of-their-account-compromised.md'  # noqa
    md = read_file(fn)
    post = parser.parse(md)
    assert 'internet' in post.html
    assert 'Elie' in post.meta.authors[0]
    assert post.meta.statistics.num_images == 2
Esempio n. 5
0
    def process(self, unused, site, config):
        plugin_name = "js_posts"
        js_filename = "posts.json"

        # configuration
        output_path = config.output_path
        meta_fields_to_output = config.meta_fields_to_output
        plugin_data_to_output = config.plugin_data_to_output

        log_info = "base javascript: %s<br>ouput:%s%s<br>" % (
            js_filename, output_path, js_filename)
        log_info = "meta fields to outputs:%s" % (
            ", ".join(meta_fields_to_output))

        # Reading the base JS
        plugin_dir = os.path.dirname(__file__)
        js_file = os.path.join(plugin_dir, js_filename)
        js = files.read_file(js_file)
        if not js:
            return (SiteFab.ERROR, plugin_name,
                    "Base Javascript:%s not found." % js_file)

        js_posts = {}
        for post in site.posts:
            js_post = {}
            js_post['id'] = post.id
            for field in meta_fields_to_output:
                if field in post.meta:
                    if field == "permanent_url":
                        data = "%s/" % post.meta[field]
                        js_post[field] = data
                    else:
                        js_post[field] = post.meta[field]
            if 'responsive_banner' in plugin_data_to_output and 'responsive_images' in site.plugin_data:
                if post.meta.banner in site.plugin_data['responsive_images']:
                    js_post['banner_srcsets'] = site.plugin_data[
                        'responsive_images'][post.meta.banner]['srcsets']
                    js_post['banner_allsizes'] = site.plugin_data[
                        'responsive_images'][post.meta.banner]['allsizes']
            if 'thumbnail_banner' in plugin_data_to_output and 'thumbnails' in site.plugin_data:
                if post.meta.banner in site.plugin_data['thumbnails']:
                    js_post['thumbnails'] = site.plugin_data['thumbnails'][
                        post.meta.banner]

            js_posts[post.id] = js_post

        # replacing placeholder with post data
        output_string = json.dumps(js_posts)
        log_info += "output string:<br>%s" % output_string
        js = str(js)
        js = js.replace("JS_POSTS_PLUGIN_REPLACE", output_string)

        # output
        path = os.path.join(site.get_output_dir(), output_path)
        log_info += "output directory: %s" % path
        files.write_file(path, js_filename, js)

        return (SiteFab.OK, plugin_name, log_info)
Esempio n. 6
0
    def __init__(self, config):
        current_dir = Path(__file__).parent
        test_file = current_dir / 'tests.yaml'
        self.test_info = files.load_config(test_file)
        if not self.test_info:
            utils.error("Can't load linter tests")

        self.config = config
        self.results = {}
        template_content = files.read_file(self.config.report_template_file)
        self.jinja2_template = Template(str(template_content))
Esempio n. 7
0
def test_unicode_support(tmp_path):

    fname = '😁.txt'
    content = 'this is a test 😁'
    path = Path(tmp_path) / fname

    # write
    write_file(tmp_path, fname, content)
    assert path.exists()

    # read
    read_back = read_file(path)
    assert read_back == content

    # cleanup
    clean_dir(tmp_path)
Esempio n. 8
0
def test_filelist_recursive(tmp_path):
    clean_dir(tmp_path)
    sub_path = tmp_path / 'newdir'
    fname = 'test.md'

    txt = 'hello'
    write_file(sub_path, 'test.md', txt)

    # ensure we have a sub dir and the file in it
    assert sub_path.exists()
    assert sub_path.is_dir()
    read_text = read_file(sub_path / fname)
    assert read_text == txt

    # test we get a recursive listing
    assert len(get_files_list(tmp_path)) == 1

    # test that non recursive returns nothing
    assert get_files_list(tmp_path, recursive=False)  == []
Esempio n. 9
0
def test_writing_subdirectory(tmp_path):
    "make sure sub directory is created"

    fname = 'myfile.md'
    content = 'this is a test 😁'
    subdir = 'tobecreated'
    subdir_path = Path(tmp_path) / subdir
    path = subdir_path / fname

    # write
    write_file(subdir_path, fname, content)
    assert path.exists()
    assert subdir_path.is_dir()

    # read
    read_back = read_file(path)
    assert read_back == content

    # cleanup
    clean_dir(tmp_path)
    assert not path.exists()
    assert not subdir_path.exists()
Esempio n. 10
0
    def make_config(config):
        """ Initialize a parser config with all the needed variables

        Args:
            config (obj_dict): the uninitialized configuration with basic
            variables
        Returns:
            obj_dict: The initialized configuration
        """

        if not config:
            utils.detailed_error("Parser", 'make_config',
                                 'supplied config is empty')

        if 'template_dir' not in config:
            utils.detailed_error("Parser", 'make_config',
                                 'template_dir not found')

        config.templates = {}
        for fname in files.get_files_list(config.templates_path, "*.html"):
            template = files.read_file(fname)
            config.templates[fname.stem] = template
        return config
Esempio n. 11
0
def test_non_existing_read_file():
    assert read_file('asjkajlkajlksal32312ewdas3') == ""
Esempio n. 12
0
    def parse(self):
        "parse md content into post objects"
        self.cnts.start('Parsing')
        filenames = self.filenames.posts
        self.posts = []

        # collections creation
        min_posts = self.config.collections.min_posts

        # posts_by_tag is what is rendered: it contains for as given post both
        #  its tags and its category
        tlp = self.jinja2.get_template(self.config.collections.template)
        path = self.get_output_dir() / self.config.collections.output_dir

        self.posts_by_tag = PostCollections(
            site=self,
            template=tlp,
            output_path=path,
            web_path=self.config.collections.output_dir,
            min_posts=min_posts)

        self.posts_by_category = PostCollections(
            site=self, web_path=self.config.collections.output_dir)

        self.posts_by_template = PostCollections(site=self)

        self.posts_by_microdata = PostCollections(site=self)

        # Parsing
        cprint("\nParsing posts", "magenta")
        progress_bar = tqdm(total=len(filenames),
                            unit=' files',
                            desc="Files",
                            leave=True)
        errors = []
        post_idx = 1
        threads = self.config.threads
        # NOTE: passing self.config.parser might seems strange
        # but it is required to get the pluging workings
        parser = Parser(self.config.parser, self)

        if threads > 1:
            todo_nlp_posts = []
            for filename in filenames:
                file_content = files.read_file(filename)
                post = parser.parse(file_content)
                post.filename = str(filename)
                post.id = post_idx
                post_idx += 1
                todo_nlp_posts.append(json.dumps(post))
            pool = Pool(threads)
            for parsed_post_json in pool.imap_unordered(
                    parse_post, todo_nlp_posts):
                parsed_post = json.loads(parsed_post_json)
                post = utils.dict_to_objdict(parsed_post)
                self.process_post(post)
                progress_bar.update(1)
            pool.close()
            pool.join()
        else:
            for filename in filenames:
                file_content = files.read_file(filename)
                post = parser.parse(file_content)
                post.filename = str(filename)
                post.id = post_idx
                parsed_post_json = parse_post(json.dumps(post))
                parsed_post = json.loads(parsed_post_json)
                self.process_post(utils.dict_to_objdict(parsed_post))
                progress_bar.update(1)
                post_idx += 1

        progress_bar.close()
        if len(errors):
            utils.error("\n".join(errors))

        self.cnts.stop('Parsing')
Esempio n. 13
0
    def process(self, unused, site, config):
        plugin_name = "autocomplete"
        json_filename = "autocomplete.json"
        js_filename = "autocomplete.js"
        # configuration
        output_path_js = config.output_path_js
        output_path_json = config.output_path_json
        num_suggestions = config.num_suggestions
        excluded_terms = config.excluded_terms

        log_info = ""
        # log_info = "base javascript: %s<br>ouput:%s%s" % (
        #     js_filename, output_path_js, js_filename)

        # Reading the base JS
        plugin_dir = Path(__file__).parent
        json_file = plugin_dir / json_filename
        jsondata = files.read_file(json_file)
        
        js_file = plugin_dir / js_filename
        jsdata = files.read_file(js_file)
      
        term_post_frequency = defaultdict(int)
        term_score = defaultdict(float)
        for post in site.posts:
            # authors
            for author in post.nlp.clean_fields.authors:
                term_post_frequency[author] += 1
                term_score[author] += 1  # ensure authors always first
                for part in author.split(' '):
                    if len(part) < 2:
                        continue
                    term_post_frequency[part] += 1
                    term_score[part] += 1

            # title terms
            for term in post.nlp.title_terms:
                if term in excluded_terms:
                    continue
                term_post_frequency[term[0]] += 1
                term_score[term[0]] += term[1] * 2

            # other terms
            for term in post.nlp.terms:
                if term in excluded_terms:
                    continue
                term_post_frequency[term[0]] += 1
                term_score[term[0]] += term[1]

        output = []
        log_info += "num of terms considered: %s<br>" % len(term_score)

        top_terms = sorted(term_score, key=term_score.get, reverse=True)
        for term in top_terms[:num_suggestions]:
            score = term_score[term]
            post_frequency = term_post_frequency[term]
            output.append([term, post_frequency, score])

        # log results
        log_info += tabulate(output,
                             headers=['term', 'post frequency', 'score'],
                             tablefmt='html')

        # replacing placeholder with computation result
        output_string = json.dumps(output)
        jsondata = jsondata.replace("AUTOCOMPLETE_PLUGIN_REPLACE", output_string)

        # output
        path_js = site.get_output_dir() / output_path_js
        path_json = site.get_output_dir() / output_path_json
        log_info += "<br> output directory for js: %s" % path_js
        log_info += "<br> output directory for json: %s" % path_json
        # write js data file
        files.write_file(path_js, js_filename, jsdata)
        # write code file
        files.write_file(path_json, json_filename, jsondata)

        return (SiteFab.OK, plugin_name, log_info)
Esempio n. 14
0
    def process(self, unused, site, config):
        template_name = config.template

        config.banner = "%s%s" % (site.config.url, config.banner)
        config.icon = "%s%s" % (site.config.url, config.icon)
        config.logo_svg = "%s%s" % (site.config.url, config.logo_svg)

        # rendering template
        if template_name not in site.jinja2.list_templates():
            return SiteFab.ERROR, "rss", ("template %s not found" %
                                          template_name)
        template = site.jinja2.get_template(str(template_name))

        # custom parser
        parser_tpl_path = Path(config.parser.template_dir)
        config.parser.templates_path = (site.config.root_dir / parser_tpl_path)
        config.parser = Parser.make_config(config.parser)
        parser = Parser(config.parser, site)

        # generating feed
        rss_items = []
        count = 0
        posts = []
        for post in site.posts:
            posts.append(post)

        # sort posts from newer to older
        def k(x):
            return x.meta.creation_date_ts

        posts.sort(key=k, reverse=True)

        for post in posts:
            if (post.meta.hidden or
                ((post.meta.microdata_type != "BlogPosting")  # noqa
                 and (post.meta.microdata_type != "ScholarlyArticle")  # noqa
                 and
                 (post.meta.microdata_type != "PublicationEvent"))):  # noqa
                continue

            # parse the post with a customized parser
            file_content = files.read_file(post.filename)
            parsed_post = parser.parse(file_content)
            # adding the newly generated HTML as RSS
            post.rss = parsed_post.html

            formatted_rss_creation_date = datetime.fromtimestamp(
                int(post.meta.creation_date_ts)).strftime(
                    '%a, %d %b %Y %H:%M:%S -0800')
            if post.meta.update_date_ts:
                formatted_rss_update_date = datetime.fromtimestamp(
                    int(post.meta.update_date_ts)).strftime(
                        '%a, %d %b %Y %H:%M:%S -0800')
            else:
                formatted_rss_update_date = formatted_rss_creation_date

            post.meta.formatted_creation = formatted_rss_creation_date
            post.meta.formatted_update = formatted_rss_update_date

            # size of image
            if post.meta.banner in site.plugin_data['image_info']:
                post.meta.banner_size = site.plugin_data['image_info'][
                    post.meta.banner]['file_size']
                post.meta.banner_mimetype = site.plugin_data['image_info'][
                    post.meta.banner]['mime_type']
                post.meta.banner_fullurl = "%s%s" % (site.config.url,
                                                     post.meta.banner)

            post.meta.author = post.meta.authors[0].replace(",", "")
            rss_items.append(post)
            count += 1
            if count == config.num_posts:
                break
        if not len(rss_items):
            return (SiteFab.ERROR, "rss", 'no RSS items')

        config.formatted_update = rss_items[0].meta.formatted_update

        try:
            rv = template.render(site=site, rss=config, items=rss_items)
        except Exception as e:
            return (SiteFab.ERROR, "rss", e)

        # output
        path = site.get_output_dir()
        files.write_file(path, 'rss.xml', rv)

        log_info = "template used:%s<br>ouput:%srss.xml" % (template_name,
                                                            path)
        return SiteFab.OK, "rss", log_info