def s3_base(worker_id): """ Fixture for mocking S3 interaction. Sets up moto server in separate process locally Return url for motoserver/moto CI service """ pytest.importorskip("s3fs") pytest.importorskip("boto3") with tm.ensure_safe_environment_variables(): # temporary workaround as moto fails for botocore >= 1.11 otherwise, # see https://github.com/spulec/moto/issues/1924 & 1952 os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key") os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret") if os.environ.get("PANDAS_CI", "0") == "1": if is_platform_arm() or is_platform_mac() or is_platform_windows(): # NOT RUN on Windows/MacOS/ARM, only Ubuntu # - subprocess in CI can cause timeouts # - Azure pipelines/Github Actions do not support # container services for the above OSs # - CircleCI will probably hit the Docker rate pull limit pytest.skip("S3 tests do not have a corresponding service in " "Windows, MacOS or ARM platforms") else: yield "http://localhost:5000" else: requests = pytest.importorskip("requests") pytest.importorskip("moto", minversion="1.3.14") pytest.importorskip("flask") # server mode needs flask too # Launching moto in server mode, i.e., as a separate process # with an S3 endpoint on localhost worker_id = "5" if worker_id == "master" else worker_id.lstrip( "gw") endpoint_port = f"555{worker_id}" endpoint_uri = f"http://127.0.0.1:{endpoint_port}/" # pipe to null to avoid logging in terminal with subprocess.Popen( shlex.split(f"moto_server s3 -p {endpoint_port}"), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) as proc: timeout = 5 while timeout > 0: try: # OK to go once server is accepting connections r = requests.get(endpoint_uri) if r.ok: break except Exception: pass timeout -= 0.1 time.sleep(0.1) yield endpoint_uri proc.terminate()
def s3_resource(tips_file, jsonl_file, feather_file): """ Fixture for mocking S3 interaction. The primary bucket name is "pandas-test". The following datasets are loaded. - tips.csv - tips.csv.gz - tips.csv.bz2 - items.jsonl A private bucket "cant_get_it" is also created. The boto3 s3 resource is yielded by the fixture. """ s3fs = pytest.importorskip("s3fs") boto3 = pytest.importorskip("boto3") with tm.ensure_safe_environment_variables(): # temporary workaround as moto fails for botocore >= 1.11 otherwise, # see https://github.com/spulec/moto/issues/1924 & 1952 os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key") os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret") moto = pytest.importorskip("moto") test_s3_files = [ ("tips#1.csv", tips_file), ("tips.csv", tips_file), ("tips.csv.gz", tips_file + ".gz"), ("tips.csv.bz2", tips_file + ".bz2"), ("items.jsonl", jsonl_file), ("simple_dataset.feather", feather_file), ] def add_tips_files(bucket_name): for s3_key, file_name in test_s3_files: with open(file_name, "rb") as f: conn.Bucket(bucket_name).put_object(Key=s3_key, Body=f) try: s3 = moto.mock_s3() s3.start() # see gh-16135 bucket = "pandas-test" conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket=bucket) add_tips_files(bucket) conn.create_bucket(Bucket="cant_get_it", ACL="private") add_tips_files("cant_get_it") s3fs.S3FileSystem.clear_instance_cache() yield conn finally: s3.stop()
def test_read_with_creds_from_pub_bucket(): # Ensure we can read from a public bucket with credentials # GH 34626 # Use Amazon Open Data Registry - https://registry.opendata.aws/gdelt with tm.ensure_safe_environment_variables(): # temporary workaround as moto fails for botocore >= 1.11 otherwise, # see https://github.com/spulec/moto/issues/1924 & 1952 os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key") os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret") df = read_csv( "s3://gdelt-open-data/events/1981.csv", nrows=5, sep="\t", header=None ) assert len(df) == 5
def s3_base(worker_id): """ Fixture for mocking S3 interaction. Sets up moto server in separate process """ pytest.importorskip("s3fs") pytest.importorskip("boto3") requests = pytest.importorskip("requests") logging.getLogger("requests").disabled = True with tm.ensure_safe_environment_variables(): # temporary workaround as moto fails for botocore >= 1.11 otherwise, # see https://github.com/spulec/moto/issues/1924 & 1952 os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key") os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret") pytest.importorskip("moto", minversion="1.3.14") pytest.importorskip("flask") # server mode needs flask too # Launching moto in server mode, i.e., as a separate process # with an S3 endpoint on localhost worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw") endpoint_port = f"555{worker_id}" endpoint_uri = f"http://127.0.0.1:{endpoint_port}/" # pipe to null to avoid logging in terminal proc = subprocess.Popen( shlex.split(f"moto_server s3 -p {endpoint_port}"), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) timeout = 5 while timeout > 0: try: # OK to go once server is accepting connections r = requests.get(endpoint_uri) if r.ok: break except Exception: pass timeout -= 0.1 time.sleep(0.1) yield endpoint_uri proc.terminate() proc.wait()