Exemple #1
0
def scrape_vim_scripts_repos(num):
    """Scrape at least num repos from the vim-scripts GitHub user."""
    _, user_data = get_api_page('users/vim-scripts')

    # Calculate how many pages of repositories there are.
    num_repos = user_data['public_repos']
    num_pages = (num_repos + 99) / 100  # ceil(num_repos / 100.0)

    num_inserted = 0
    num_scraped = 0

    for page in range(1, num_pages + 1):
        if num_scraped >= num:
            break

        _, repos_data = get_api_page('users/vim-scripts/repos', page=page)

        for repo_data in repos_data:

            # Scrape plugin-relevant data. We don't need much info from
            # vim-scripts because it's a mirror of vim.org.

            # vimorg_id is required for associating with the corresponding
            # vim.org-scraped plugin.
            vimorg_id = util.get_vimorg_id_from_url(repo_data['homepage'])
            assert vimorg_id

            repo_name = repo_data['name']

            repo = PluginGithubRepos.get_with_owner_repo('vim-scripts',
                    repo_name)
            num_bundles = repo['plugin_manager_users'] if repo else 0

            db.plugins.add_scraped_data({
                'vimorg_id': vimorg_id,
                'github_vim_scripts_repo_name': repo_name,
                'github_vim_scripts_stars': repo_data['watchers'],
                'github_vim_scripts_bundles': num_bundles,
            })

            # Also add to our index of known GitHub plugins.
            inserted = PluginGithubRepos.upsert_with_owner_repo({
                'owner': 'vim-scripts',
                'repo_name': repo_name,
                'repo_data': repo_data,
            })

            num_inserted += int(inserted)
            num_scraped += 1

        print '    scraped %s repos' % num_scraped

    print "\nScraped %s vim-scripts GitHub repos; inserted %s new ones." % (
            num_scraped, num_inserted)
def review_vimorg_submission(submission):
    """Prompts whether to insert data about a plugin with a vimorg_id.

    Displays info about that submission, and displays an interactive prompt
    whether to add submitted data about it. If no, will add a field to the
    submission that it was rejected. If yes, will make the submission data
    searchable by adding a vimorg_id field so that data from it can be added
    when scraping vim.org plugins.
    """
    print
    print json.dumps(submission, indent=2)

    if not _query_yes_no("Add info about this vim.org submission?"):
        submission['rejected'] = True
        r.table('submitted_plugins').insert(submission, upsert=True).run(
                r_conn())
        return

    print "Ok, will update from this submission data on next vim.org scrape"
    vimorg_id = util.get_vimorg_id_from_url(submission['vimorg-link'])
    submission['vimorg_id'] = vimorg_id
    r.table('submitted_plugins').insert(submission, upsert=True).run(r_conn())
Exemple #3
0
def review_vimorg_submission(submission):
    """Prompts whether to insert data about a plugin with a vimorg_id.

    Displays info about that submission, and displays an interactive prompt
    whether to add submitted data about it. If no, will add a field to the
    submission that it was rejected. If yes, will make the submission data
    searchable by adding a vimorg_id field so that data from it can be added
    when scraping vim.org plugins.
    """
    print
    print json.dumps(submission, indent=2)

    if not _query_yes_no("Add info about this vim.org submission?"):
        submission['rejected'] = True
        r.table('submitted_plugins').insert(submission,
                                            conflict='replace').run(r_conn())
        return

    print "Ok, will update from this submission data on next vim.org scrape"
    vimorg_id = util.get_vimorg_id_from_url(submission['vimorg-link'])
    submission['vimorg_id'] = vimorg_id
    r.table('submitted_plugins').insert(submission,
                                        conflict='replace').run(r_conn())
Exemple #4
0
def scrape_vim_scripts_repos(num):
    """Scrape at least num repos from the vim-scripts GitHub user."""
    _, user_data = get_api_page('users/vim-scripts')

    # Calculate how many pages of repositories there are.
    num_repos = user_data['public_repos']
    num_pages = (num_repos + 99) / 100  # ceil(num_repos / 100.0)

    num_inserted = 0
    num_scraped = 0

    for page in range(1, num_pages + 1):
        if num_scraped >= num:
            break

        _, repos_data = get_api_page('users/vim-scripts/repos', page=page)

        for repo_data in repos_data:

            # Scrape plugin-relevant data. We don't need much info from
            # vim-scripts because it's a mirror of vim.org.

            # vimorg_id is required for associating with the corresponding
            # vim.org-scraped plugin.
            vimorg_id = util.get_vimorg_id_from_url(repo_data['homepage'])
            assert vimorg_id

            repo_name = repo_data['name']

            repo = PluginGithubRepos.get_with_owner_repo(
                'vim-scripts', repo_name)
            num_bundles = repo['plugin_manager_users'] if repo else 0

            db.plugins.add_scraped_data({
                'vimorg_id':
                vimorg_id,
                'github_vim_scripts_repo_name':
                repo_name,
                'github_vim_scripts_stars':
                repo_data['watchers'],
                'github_vim_scripts_bundles':
                num_bundles,
            })

            # Also add to our index of known GitHub plugins.
            inserted = PluginGithubRepos.upsert_with_owner_repo({
                'owner':
                'vim-scripts',
                'repo_name':
                repo_name,
                'repo_data':
                repo_data,
            })

            num_inserted += int(inserted)
            num_scraped += 1

        print '    scraped %s repos' % num_scraped

    print "\nScraped %s vim-scripts GitHub repos; inserted %s new ones." % (
        num_scraped, num_inserted)