Beispiel #1
0
def _scrape_sections() -> dict:
    """Get listing of sections with their headings."""
    result = {}
    _LOG.debug("Scraping available sections")
    response = requests.get("{}/{}".format(Configuration().github_docs, Configuration().github_docs_version))
    response.raise_for_status()

    soup = bs4.BeautifulSoup(response.content, 'html.parser')
    for topic in soup.find_all('li', {'class': 'js-topic'}):
        item = topic.h3.find_all('a')
        if not item or len(item) != 2:
            continue

        item = item[1]
        link = _get_item_href(item)
        if len(link) != 1:
            # Omit references to the same document e.g. '/v3/search/#search-repositories'
            _LOG.debug("Omitting in-document reference %r", link)
            continue

        _add_section(result, link[0], (item.text, None))

        for child in topic.findChildren():
            if child.name != 'li':
                continue

            link = _get_item_href(child.a)
            if link is None or len(link) != 1:
                # Omit references to the same document
                _LOG.debug("Omitting in-document reference %r", link)
                continue
            _add_section(result, link[0], (item.text, child.text))

    return result
Beispiel #2
0
def _convert2swagger(internal_representation: dict) -> dict:
    paths = {}
    for url, entries in internal_representation.items():
        for item_desc, value in entries.items():
            if not value or '@top' not in value:
                continue

            for request in value['@top'].get('@requests', []):
                if request['@endpoint'] not in paths:
                    paths[request['@endpoint']] = {}

                paths[request['@endpoint'].lower()] = {
                    request['@method'].lower(): {
                        'summary': request['@description'] or "",
                        'tags': [value['@tag']]
                    }
                }

    return {
        'paths': paths,
        "produces": [
            "application/json"
        ],
        "swagger": "2.0",
        "info": {
            "description": "GitHub API v3 swagger definition."
        },
        "host": Configuration().github_api,
        "basePath": "/{}".format(Configuration().github_docs_version),
        "schemes": "https"
    }
Beispiel #3
0
def cli(ctx=None,
        verbose=0,
        no_color=True,
        user=None,
        password=None,
        token=None,
        no_validate_schemas=False,
        no_omit_rate_limiting=False,
        no_pagination=False,
        headers=None,
        per_page_listing=None,
        github_api=None):
    """Githubcap command line interface."""
    if ctx:
        ctx.auto_envvar_prefix = 'GITHUBCAP'

    setup_logging(verbose, no_color)

    if user is not None:
        Configuration().user = user
    if password is not None:
        Configuration().password = password
    if token is not None:
        Configuration().token = token
    if per_page_listing is not None:
        Configuration().per_page_listing = per_page_listing
    if github_api is not None:
        Configuration().github_api = github_api
    if headers is not None:
        Configuration().headers = _parse_cli_headers(headers)

    Configuration().omit_rate_limiting = not no_omit_rate_limiting
    Configuration().pagination = not no_pagination
    Configuration().validate_schemas = not no_validate_schemas
    _LOG.debug("Configuration: %s", attr.asdict(Configuration().instance))
Beispiel #4
0
def _add_section(result: dict, link: str, text: typing.Tuple[str, typing.Union[None, str]]) -> None:
    """Add section to the resulting scraping section."""
    if link in _NO_SCRAPE_SITES:
        _LOG.debug("Skipping blacklisted section %s%s", Configuration().github_docs, link)
        return

    _LOG.debug("Found section to be scraped %r: %s%s",
               text, Configuration().github_docs, link)
    if link in result:
        raise ValueError
    result[link] = text
Beispiel #5
0
def cli_config(ctx,
               no_pretty=False,
               create=False,
               overwrite=False,
               path=None,
               no_print=False):
    """Manipulate with githubcap configuration."""
    if no_print and not create:
        _LOG.error("Nothing to do, exiting...")
        ctx.exit(1)

    if create:
        Configuration().write2file(path, overwrite=overwrite)

    if not no_print:
        print_command_result(Configuration().to_dict(), pretty=not no_pretty)
Beispiel #6
0
def _do_scrape(sections: dict, schemas_dir: Optional[str] = None, resources_dir: Optional[str] = None) -> dict:
    """Scrape remote documentation and return its parsed representation."""
    _LOG.debug("Creating directory %r for resources", resources_dir)
    os.makedirs(resources_dir, exist_ok=True)
    _LOG.debug("Creating directory %r for schemas", schemas_dir)
    os.makedirs(schemas_dir, exist_ok=True)

    all_items = {}
    for link, (item_tag, item_title) in sections.items():
        record = {}
        all_items[link] = record
        url = "{}{}".format(Configuration().github_docs, link)
        _LOG.debug("Scraping %r to automatically construct classes", url)
        response = requests.get(url)
        response.raise_for_status()

        soup = bs4.BeautifulSoup(response.content, 'html.parser')
        content = soup.find_all('div', {'class': 'content'})
        if len(content) != 1:
            raise ValueError("Found multiple contents in %r", url)
        content = content[0]

        for obj in content.find_all('h2'):
            if obj is None or not isinstance(obj, bs4.element.Tag):
                continue

            if obj.name != 'h2':
                continue

            item = obj.text.strip()
            assert item not in record
            record[item] = {}
            last_desc = None
            last_section_title = None
            last_subsection_title = None
            for sibling in obj.next_siblings:
                if sibling.name == 'h2':
                    break

                if isinstance(sibling, bs4.element.NavigableString):
                    continue

                if sibling.name == 'p':
                    last_desc = sibling

                if sibling.name in 'h3':
                    last_section_title = sibling.text.strip()
                    last_subsection_title = None
                    _LOG.debug("Found new section %r in %r", last_section_title, item)

                if sibling.name in 'h4':
                    last_subsection_title = sibling.text.strip()
                    _LOG.debug("Found new sub-section %r in %r", last_subsection_title, item)

                if sibling.name == 'pre' and \
                        sibling.text.lstrip().startswith(('GET', 'DELETE', 'PATCH', 'POST', 'PUT', 'DELETE')):
                    where = _where_location(record, item, last_section_title, last_subsection_title)
                    if '@requests' not in where:
                        where['@requests'] = []
                    where['@requests'].append(_parse_request_def(sibling.text, last_desc))

                if sibling.name == 'table':
                    _LOG.debug("Found table describing types for %r (subsection %r) in %r",
                               last_section_title, last_subsection_title, item)
                    where = _where_location(record, item, last_section_title, last_subsection_title)
                    type_def = _parse_type_definition(sibling)
                    if '@types' in where:
                        var_name = last_desc.find('code').text
                        if '@subtypes' not in where:
                            where['@subtypes'] = []
                        where['@subtypes'].append({var_name: type_def})
                    else:
                        where['@types'] = type_def

                if sibling.name == 'pre' and 'highlight-json' in sibling.attrs.get('class', []):
                    _LOG.debug("Found JSON format in %r for %r, subsection %r",
                               last_section_title, item, last_subsection_title)
                    where = _where_location(record, item, last_section_title, last_subsection_title)
                    if '@json' not in where:
                        where['@json'] = []
                    where['@json'].append(json.loads(sibling.text))

                if sibling.name == 'pre' and 'highlight-headers' in sibling.attrs.get('class', []):
                    _LOG.debug("Found headers in %r for %r, subsection %r",
                               last_section_title, item, last_subsection_title)
                    where = _where_location(record, item, last_section_title, last_subsection_title)
                    if '@headers' not in where:
                        where['@headers'] = []
                    where['@headers'].append(sibling.text)

                if sibling.name == 'div' and 'note' in sibling.attrs.get('class', []):
                    for s in sibling.find_all('code'):
                        if s.name == 'code' and s.text.startswith('application/vnd.github.'):
                            where = _where_location(record, item, last_section_title, last_subsection_title)
                            assert '@additonal-headers' not in where, where
                            where['@additional-headers'] = s.text.strip()

            if not record[item]:
                _LOG.debug("No valuable data found for %r", item)
                record.pop(item)
            else:
                record[item]['@description'] = _find_section_description(obj)
                record[item]['@tag'] = item_tag
                record[item]['@title'] = item_title

    return all_items
Beispiel #7
0
def validate_response(validator, response):
    if Configuration().validate_schemas:
        validator(response)

    return response
Beispiel #8
0
)
@click.option(
    '-P',
    '--no-pagination',
    is_flag=True,
    help="Respect pagination - perform multiple API calls on paginated response."
)
@click.option('-H',
              '--headers',
              type=str,
              help="A comma separated list of headers to be sent.")
@click.option('-l',
              '--per_page_listing',
              type=int,
              show_default=True,
              default=Configuration().per_page_listing,
              help="Number of entries in page listing in a single API call.")
@click.option('--github-api',
              type=str,
              default=Configuration().github_api,
              show_default=True,
              help="GitHub API endpoint.")
@click.option('--no-validate-schemas',
              '-S',
              is_flag=True,
              help="GitHub API endpoint.")
def cli(ctx=None,
        verbose=0,
        no_color=True,
        user=None,
        password=None,