Ejemplo n.º 1
0
def fetch_npm_package_list():
    res = make_request(
        default_requests_session.get,
        "https://skimdb.npmjs.com/registry/_all_docs",
    )
    if res is not None:
        packages = res.json()['rows']
        for p in packages:
            NPMPackage.get_or_create(name=p['id'])
Ejemplo n.º 2
0
def fetch_npm_package_list():
    res = make_request(
        default_requests_session.get,
        "https://skimdb.npmjs.com/registry/_all_docs",
    )
    if res is not None:
        packages = res.json()['rows']
        for p in packages:
            NPMPackage.get_or_create(name=p['id'])
Ejemplo n.º 3
0
def npm_analysis():
    logging.info("Starting NPM package analysis.")
    for p in NPMPackage.select().where(NPMPackage.readme != ''):
        readme_text = p.readme
        html = markdown.markdown(readme_text)
        soup = BeautifulSoup(html, 'html.parser')

        # This is a heuristic for word-count.
        # It will be not be precisely correct, depending on your definition of word.
        # For example, a path like 'com.app.example' is split into three words here.
        word_count = len(re.findall('\w+', soup.text))

        # Another heuristic.  As it's typical that inline code examples occur in <pre>
        # blocks, especially in formatted markdown, we count code blocks based
        # on the appearance of <pre> tags.
        code_blocks = soup.find_all('pre')
        block_count = len(code_blocks)

        try:
            analysis = NPMReadmeAnalysis.get(NPMReadmeAnalysis.package == p)
        except NPMReadmeAnalysis.DoesNotExist:
            analysis = NPMReadmeAnalysis.create(
                package=p, code_count=block_count, word_count=word_count
            )
            logging.debug("Created README analysis for package %s", p.name)
        else:
            analysis.code_count = block_count
            analysis.word_count = word_count
            analysis.save()
            logging.debug("Updated README analysis for package %s", p.name)
Ejemplo n.º 4
0
        help="how many package names to fetch (only applicable to npm)")
    parser.add_argument('--github-readmes',
                        action='store_true',
                        help="fetch Github READMEs (only applicable to npm)")
    parser.add_argument('--github-stats',
                        action='store_true',
                        help="fetch Github stats (only applicable to npm)")
    args = parser.parse_args()

    if args.db == 'npm':
        if args.package_list:
            create_npm_tables()
            fetch_npm_package_list()
        if args.data:
            if args.update:
                packages = NPMPackage.select().where(
                    NPMPackage.description != '')
            else:
                packages = NPMPackage.select().where(
                    NPMPackage.readme >> None).order_by(fn.Random())
            fetch_npm_data(packages)
        if args.lib_packages:
            create_tables()
            fetch_packagenames_from_libraryio(args.lib_package_count)
        if args.github_readmes:
            fetch_github_readmes(NPMPackage.select())
        if args.github_stats:
            fetch_github_stats(NPMPackage.select())
    elif args.db == 'pypi':
        if args.package_list:
            create_pypi_tables()
            fetch_pypi_package_list()
Ejemplo n.º 5
0
    )
    parser.add_argument(
        '--github-stats',
        action='store_true',
        help="fetch Github stats (only applicable to npm)"
    )
    args = parser.parse_args()


    if args.db == 'npm':
        if args.package_list:
            create_npm_tables()
            fetch_npm_package_list()
        if args.data:
            if args.update:
                packages = NPMPackage.select().where(NPMPackage.description != '')
            else:
                packages = NPMPackage.select().where(NPMPackage.readme >> None).order_by(fn.Random())
            fetch_npm_data(packages)
        if args.lib_packages:
            create_tables()
            fetch_packagenames_from_libraryio(args.lib_package_count)
        if args.github_readmes:
            fetch_github_readmes(NPMPackage.select())
        if args.github_stats:
            fetch_github_stats(NPMPackage.select())
    elif args.db == 'pypi':
        if args.package_list:
            create_pypi_tables()
            fetch_pypi_package_list()
        if args.data: