Beispiel #1
0
def download_file_from_s3(bucket_name: str, key: str, local_path: str) -> None:
    """
    Downloads file from S3 anonymously
    :param bucket_name: S3 Bucket name
    :param key: S3 File key name
    :param local_path: Local file path to download as
    """
    verify_ssl = get_verify_ssl()
    if not os.path.isfile(local_path):
        client = boto3.client("s3",
                              config=Config(signature_version=UNSIGNED),
                              verify=verify_ssl)

        try:
            logger.info("Downloading S3 data file...")
            total = client.head_object(Bucket=bucket_name,
                                       Key=key)["ContentLength"]
            with ProgressPercentage(client, bucket_name, key,
                                    total) as Callback:
                client.download_file(bucket_name,
                                     key,
                                     local_path,
                                     Callback=Callback)
        except ClientError:
            raise KeyError(
                f"File {key} not available in {bucket_name} bucket.")

    else:
        logger.info(f"Reusing cached file {local_path}...")
Beispiel #2
0
def download_private_file_from_s3(bucket_name: str, key: str, local_path: str):
    """
    Downloads file from S3 using credentials stored in ENV variables.
    :param bucket_name: S3 Bucket name
    :param key: S3 File keyname
    :param local_path: Local file path to download as
    """
    verify_ssl = get_verify_ssl()
    if not os.path.isfile(local_path):
        client = boto3.client(
            "s3",
            aws_access_key_id=os.getenv("ARMORY_PRIVATE_S3_ID"),
            aws_secret_access_key=os.getenv("ARMORY_PRIVATE_S3_KEY"),
            verify=verify_ssl,
        )
        try:
            logger.info("Downloading S3 data file...")
            total = client.head_object(Bucket=bucket_name,
                                       Key=key)["ContentLength"]
            with ProgressPercentage(client, bucket_name, key,
                                    total) as Callback:
                client.download_file(bucket_name,
                                     key,
                                     local_path,
                                     Callback=Callback)
        except ClientError:
            raise KeyError(
                f"File {key} not available in {bucket_name} bucket.")
    else:
        logger.info("Reusing cached S3 data file...")
Beispiel #3
0
def download_requests(url: str, dirpath: str, filename: str):
    verify_ssl = get_verify_ssl()

    filepath = os.path.join(dirpath, filename)
    chunk_size = 4096
    r = requests.get(url, stream=True, verify=verify_ssl)
    with open(filepath, "wb") as f:
        progress_bar = tqdm(unit="B",
                            total=int(r.headers["Content-Length"]),
                            unit_scale=True)
        for chunk in r.iter_content(chunk_size=chunk_size):
            if chunk:  # filter keep-alive chunks
                progress_bar.update(len(chunk))
                f.write(chunk)
Beispiel #4
0
def download_and_extract_repo(external_repo_name: str,
                              external_repo_dir: str = None) -> None:
    """
    Downloads and extracts an external repository for use within ARMORY. The external
    repositories project root will be added to the sys path.

    Private repositories require an `ARMORY_GITHUB_TOKEN` environment variable.
    :param external_repo_name: String name of "organization/repo-name" or "organization/repo-name@branch"
    """
    verify_ssl = get_verify_ssl()

    if external_repo_dir is None:
        external_repo_dir = paths.runtime_paths().external_repo_dir

    os.makedirs(external_repo_dir, exist_ok=True)
    headers = {}

    if "@" in external_repo_name:
        org_repo_name, branch = external_repo_name.split("@")
    else:
        org_repo_name = external_repo_name
        branch = "master"
    repo_name = org_repo_name.split("/")[-1]

    if "ARMORY_GITHUB_TOKEN" in os.environ and os.getenv(
            "ARMORY_GITHUB_TOKEN") != "":
        headers = {
            "Authorization": f'token {os.getenv("ARMORY_GITHUB_TOKEN")}'
        }

    response = requests.get(
        f"https://api.github.com/repos/{org_repo_name}/tarball/{branch}",
        headers=headers,
        stream=True,
        verify=verify_ssl,
    )

    if response.status_code == 200:
        logger.info(f"Downloading external repo: {external_repo_name}")

        tar_filename = os.path.join(external_repo_dir, repo_name + ".tar.gz")
        with open(tar_filename, "wb") as f:
            f.write(response.raw.read())
        tar = tarfile.open(tar_filename, "r:gz")
        dl_directory_name = tar.getnames()[0]
        tar.extractall(path=external_repo_dir)

        # Always overwrite existing repositories to keep them at HEAD
        final_dir_name = os.path.join(external_repo_dir, repo_name)
        if os.path.isdir(final_dir_name):
            shutil.rmtree(final_dir_name)
        os.rename(
            os.path.join(external_repo_dir, dl_directory_name),
            final_dir_name,
        )
        add_path(final_dir_name, include_parent=True)

    else:
        raise ConnectionError(
            "Unable to download repository. If it's private make sure "
            "`ARMORY_GITHUB_TOKEN` environment variable is set\n"
            f"status_code is {response.status_code}\n"
            f"full response is {response.text}")