예제 #1
0
def _download(source_url):
    response = requests.get(source_url)

    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        logger.error("Request failed: %s", response.text)
        raise

    return response.content
예제 #2
0
def _make_request(url, access_token, params):
    response = requests.get(
        url,
        params=params,
        headers={'Authorization': f'Bearer {access_token}'},
    )
    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        logger.error('Request failed: %s', response.text)
        raise
    return response.json()
예제 #3
0
def _hawk_api_request(
    url: str,
    credentials: dict,
    results_key: Optional[str],
    next_key: Optional[str],
    validate_response: Optional[bool] = True,
    force_http: Optional[bool] = False,
):
    sender = Sender(
        credentials,
        # Currently data workspace denies hawk requests signed with https urls.
        # Once fixed the protocol replacement can be removed.
        url.replace('https', 'http') if force_http else url,
        "get",
        content="",
        content_type="",
        always_hash_content=True,
    )

    logger.info(f"Fetching page {url}")
    response = requests.get(
        url,
        headers={
            "Authorization": sender.request_header,
            "Content-Type": ""
        },
        timeout=300,
    )

    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        logger.warning(f"Request failed: {response.text}")
        raise

    if validate_response:
        try:
            sender.accept_response(
                response.headers["Server-Authorization"],
                content=response.content,
                content_type=response.headers["Content-Type"],
            )
        except HawkFail as e:
            logger.error(f"HAWK Authentication failed {str(e)}")
            raise

    response_json = response.json()

    if (next_key and next_key not in response_json) or (
            results_key and results_key not in response_json):
        raise ValueError("Unexpected response structure")

    return response_json
예제 #4
0
def _ons_sparql_request(url: str,
                        query: str,
                        page: int = 1,
                        per_page: int = 10000):
    query += f" LIMIT {per_page} OFFSET {per_page * (page - 1)}"
    response = requests.request("POST",
                                url,
                                data={"query": query},
                                headers={"Accept": "application/json"})

    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        logger.error(f"Request failed: {response.text}")
        raise

    response_json = response.json()
    if "results" not in response_json:
        raise ValueError("Unexpected response structure")

    return response_json
def fetch_from_gtr_api(table_name: str, resource_type: str, **kwargs):
    source_url = 'https://gtr.ukri.org/gtr/api'

    s3 = S3Data(table_name, kwargs["ts_nodash"])
    page = 1

    while True:
        response = requests.get(
            f'{source_url}/{resource_type}s',
            params={
                'p': page,
                's': 100
            },
            headers={'Accept': 'application/json'},
        )

        try:
            response.raise_for_status()
        except requests.exceptions.HTTPError:
            logger.error(f"Request failed: {response.text}")
            raise

        response_json = response.json()
        total_pages = response_json['totalPages']
        total_number_of_results = response_json['totalSize']

        results = response_json[resource_type]

        s3.write_key(f"{page:010}.json", results)

        logger.info(
            f"Fetched {len(results*page)} out of {total_number_of_results} {resource_type} records"
        )

        page += 1
        if page > total_pages:
            break

    logger.info("Fetching from source completed")
예제 #6
0
    assert not tp_repo.is_dirty()

    df_repo = git.Repo(DF_ROOT)
    assert not df_repo.is_dirty()
    df_commits = df_repo.iter_commits(paths='dataflow')
    df_latest_commit = next(df_commits)
    logger.info("DataFlow commit to match: \n" + show_commit(df_latest_commit))

    unsynced_commits = []
    for cmt in tp_repo.iter_commits(paths=['tensorpack/dataflow', 'tensorpack/utils']):
        if match_commit(cmt, df_latest_commit):
            logger.info("Matched tensorpack commit: \n" + show_commit(cmt))
            break
        unsynced_commits.append(cmt)
    else:
        logger.error("Cannot find tensorpack commit that matches the above commit.")
        sys.exit(1)
    logger.info("{} more commits to sync".format(len(unsynced_commits)))

    unsynced_commits = unsynced_commits[::-1]

    for commit_to_sync in unsynced_commits:
        tp_repo.git.checkout(commit_to_sync.hexsha)
        logger.info("-" * 60)
        logger.info("Syncing commit '{}' at {}".format(
            commit_to_sync.message.strip(), show_date(commit_to_sync.authored_date)))

        # sync files
        dst = os.path.join(DF_ROOT, 'dataflow', 'dataflow')
        logger.info("Syncing {} ...".format(dst))
        shutil.rmtree(dst)
예제 #7
0
def fetch_from_api_endpoint(
    table_name: str,
    source_url: str,
    auth_token: Optional[str] = None,
    auth_token_builder: Optional[Callable] = None,
    results_key: Optional[str] = "results",
    next_key: Optional[str] = "next",
    auth_type: Optional[str] = "Token",
    extra_headers: Optional[Mapping] = None,
    **kwargs,
):
    if auth_token is not None and auth_token_builder is not None:
        raise ValueError(
            "You can provide at most one of `auth_token` and `auth_token_builder`"
        )

    s3 = S3Data(table_name, kwargs["ts_nodash"])
    total_records = 0
    page = 1

    while True:
        if auth_token:
            request_headers = {"Authorization": f'{auth_type} {auth_token}'}
        elif auth_token_builder:
            request_headers = {
                "Authorization": f'{auth_type} {auth_token_builder()}'
            }
        else:
            request_headers = {}

        if extra_headers:
            request_headers = {**request_headers, **extra_headers}

        response = requests.get(source_url, headers=request_headers)

        try:
            response.raise_for_status()
        except requests.exceptions.HTTPError:
            logger.error(f"Request failed: {response.text}")
            raise

        response_json = response.json()

        if (next_key and next_key not in response_json) or (
                results_key and results_key not in response_json):
            raise ValueError("Unexpected response structure")

        if results_key is not None:
            results = get_nested_key(response_json, results_key)
        else:
            results = response_json

        s3.write_key(f"{page:010}.json", results)

        total_records += len(results)
        logger.info(f"Fetched {total_records} records")

        source_url = get_nested_key(response_json,
                                    next_key) if next_key else None
        if not source_url:
            break

        page += 1

    logger.info("Fetching from source completed")