Esempio n. 1
0
def main():

    for p in Package.select().where(Package.readme != ''):

        # This is a heuristic for word-count.
        # It will be not be precisely correct, depending on your definition of word.
        # For example, a path like 'com.app.example' is split into three words here.
        word_count = len(re.findall('\w+', p.readme))

        # Another heuristic.
        # In reStructuredText (reST), code blocks are introduced by ending a paragraph
        # with a special marker ::. The block must be indented and separated from the
        # surrounding paragraphs by blank lines. Thus, there must be at least two new line
        # characters after the special marker ::.

        # This may prove to be a broken heuristic. In that case, consider using Sphinx:
        # http://www.sphinx-doc.org/en/stable/index.html.
        block_count = len(re.findall('::.*\\n\\n', p.readme))

        try:
            analysis = ReadmeAnalysis.get(ReadmeAnalysis.package == p)
        except ReadmeAnalysis.DoesNotExist:
            analysis = ReadmeAnalysis.create(
                package=p, code_count=block_count, word_count=word_count
            )
            logging.debug("Created README analysis for package %s", p.name)
        else:
            analysis.code_count = block_count
            analysis.word_count = word_count
            analysis.save()
            logging.debug("Updated README analysis for package %s", p.name)

    logging.info("Finished analyzing READMEs.")
Esempio n. 2
0
def main():

    for p in Package.select().where(Package.readme != ''):

        readme_text = p.readme
        html = markdown.markdown(readme_text)
        soup = BeautifulSoup(html, 'html.parser')

        # This is a heuristic for word-count.
        # It will be not be precisely correct, depending on your definition of word.
        # For example, a path like 'com.app.example' is split into three words here.
        word_count = len(re.findall('\w+', soup.text))

        # Another heuristic.  As it's typical that inline code examples occur in <pre>
        # blocks, especially in formatted markdown, we count code blocks based
        # on the appearance of <pre> tags.
        code_blocks = soup.find_all('pre')
        block_count = len(code_blocks)

        try:
            analysis = ReadmeAnalysis.get(ReadmeAnalysis.package == p)
        except ReadmeAnalysis.DoesNotExist:
            analysis = ReadmeAnalysis.create(
                package=p, code_count=block_count, word_count=word_count
            )
            logging.debug("Created README analysis for package %s", p.name)
        else:
            analysis.code_count = block_count
            analysis.word_count = word_count
            analysis.save()
            logging.debug("Updated README analysis for package %s", p.name)