Ejemplo n.º 1
0
def s3_base(worker_id):
    """
    Fixture for mocking S3 interaction.

    Sets up moto server in separate process locally
    Return url for motoserver/moto CI service
    """
    pytest.importorskip("s3fs")
    pytest.importorskip("boto3")

    with tm.ensure_safe_environment_variables():
        # temporary workaround as moto fails for botocore >= 1.11 otherwise,
        # see https://github.com/spulec/moto/issues/1924 & 1952
        os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key")
        os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret")
        if os.environ.get("PANDAS_CI", "0") == "1":
            if is_platform_arm() or is_platform_mac() or is_platform_windows():
                # NOT RUN on Windows/MacOS/ARM, only Ubuntu
                # - subprocess in CI can cause timeouts
                # - Azure pipelines/Github Actions do not support
                #   container services for the above OSs
                # - CircleCI will probably hit the Docker rate pull limit
                pytest.skip("S3 tests do not have a corresponding service in "
                            "Windows, MacOS or ARM platforms")
            else:
                yield "http://localhost:5000"
        else:
            requests = pytest.importorskip("requests")
            pytest.importorskip("moto", minversion="1.3.14")
            pytest.importorskip("flask")  # server mode needs flask too

            # Launching moto in server mode, i.e., as a separate process
            # with an S3 endpoint on localhost

            worker_id = "5" if worker_id == "master" else worker_id.lstrip(
                "gw")
            endpoint_port = f"555{worker_id}"
            endpoint_uri = f"http://127.0.0.1:{endpoint_port}/"

            # pipe to null to avoid logging in terminal
            with subprocess.Popen(
                    shlex.split(f"moto_server s3 -p {endpoint_port}"),
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
            ) as proc:

                timeout = 5
                while timeout > 0:
                    try:
                        # OK to go once server is accepting connections
                        r = requests.get(endpoint_uri)
                        if r.ok:
                            break
                    except Exception:
                        pass
                    timeout -= 0.1
                    time.sleep(0.1)
                yield endpoint_uri

                proc.terminate()
Ejemplo n.º 2
0
def s3_resource(tips_file, jsonl_file, feather_file):
    """
    Fixture for mocking S3 interaction.

    The primary bucket name is "pandas-test". The following datasets
    are loaded.

    - tips.csv
    - tips.csv.gz
    - tips.csv.bz2
    - items.jsonl

    A private bucket "cant_get_it" is also created. The boto3 s3 resource
    is yielded by the fixture.
    """
    s3fs = pytest.importorskip("s3fs")
    boto3 = pytest.importorskip("boto3")

    with tm.ensure_safe_environment_variables():
        # temporary workaround as moto fails for botocore >= 1.11 otherwise,
        # see https://github.com/spulec/moto/issues/1924 & 1952
        os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key")
        os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret")

        moto = pytest.importorskip("moto")

        test_s3_files = [
            ("tips#1.csv", tips_file),
            ("tips.csv", tips_file),
            ("tips.csv.gz", tips_file + ".gz"),
            ("tips.csv.bz2", tips_file + ".bz2"),
            ("items.jsonl", jsonl_file),
            ("simple_dataset.feather", feather_file),
        ]

        def add_tips_files(bucket_name):
            for s3_key, file_name in test_s3_files:
                with open(file_name, "rb") as f:
                    conn.Bucket(bucket_name).put_object(Key=s3_key, Body=f)

        try:
            s3 = moto.mock_s3()
            s3.start()

            # see gh-16135
            bucket = "pandas-test"
            conn = boto3.resource("s3", region_name="us-east-1")

            conn.create_bucket(Bucket=bucket)
            add_tips_files(bucket)

            conn.create_bucket(Bucket="cant_get_it", ACL="private")
            add_tips_files("cant_get_it")
            s3fs.S3FileSystem.clear_instance_cache()
            yield conn
        finally:
            s3.stop()
Ejemplo n.º 3
0
def test_read_with_creds_from_pub_bucket():
    # Ensure we can read from a public bucket with credentials
    # GH 34626
    # Use Amazon Open Data Registry - https://registry.opendata.aws/gdelt

    with tm.ensure_safe_environment_variables():
        # temporary workaround as moto fails for botocore >= 1.11 otherwise,
        # see https://github.com/spulec/moto/issues/1924 & 1952
        os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key")
        os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret")
        df = read_csv(
            "s3://gdelt-open-data/events/1981.csv", nrows=5, sep="\t", header=None
        )
        assert len(df) == 5
Ejemplo n.º 4
0
def s3_base(worker_id):
    """
    Fixture for mocking S3 interaction.

    Sets up moto server in separate process
    """
    pytest.importorskip("s3fs")
    pytest.importorskip("boto3")
    requests = pytest.importorskip("requests")
    logging.getLogger("requests").disabled = True

    with tm.ensure_safe_environment_variables():
        # temporary workaround as moto fails for botocore >= 1.11 otherwise,
        # see https://github.com/spulec/moto/issues/1924 & 1952
        os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key")
        os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret")

        pytest.importorskip("moto", minversion="1.3.14")
        pytest.importorskip("flask")  # server mode needs flask too

        # Launching moto in server mode, i.e., as a separate process
        # with an S3 endpoint on localhost

        worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw")
        endpoint_port = f"555{worker_id}"
        endpoint_uri = f"http://127.0.0.1:{endpoint_port}/"

        # pipe to null to avoid logging in terminal
        proc = subprocess.Popen(
            shlex.split(f"moto_server s3 -p {endpoint_port}"),
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        )

        timeout = 5
        while timeout > 0:
            try:
                # OK to go once server is accepting connections
                r = requests.get(endpoint_uri)
                if r.ok:
                    break
            except Exception:
                pass
            timeout -= 0.1
            time.sleep(0.1)
        yield endpoint_uri

        proc.terminate()
        proc.wait()