def test_execute_without_replace(self, mock_hook, mock_hook2): mock_hook.return_value.list.return_value = MOCK_FILES mock_hook.return_value.download.return_value = b"testing" mock_hook2.return_value.list.return_value = MOCK_FILES operator = GCSToS3Operator(task_id=TASK_ID, bucket=GCS_BUCKET, prefix=PREFIX, delimiter=DELIMITER, dest_aws_conn_id=None, dest_s3_key=S3_BUCKET, replace=False) # create dest bucket with all the files hook = S3Hook(aws_conn_id=None) bucket = hook.get_bucket('bucket') bucket.create() for mock_file in MOCK_FILES: bucket.put_object(Key=mock_file, Body=b'testing') # we expect nothing to be uploaded # and all the MOCK_FILES to be present at the S3 bucket uploaded_files = operator.execute(None) self.assertEqual([], uploaded_files) self.assertEqual(sorted(MOCK_FILES), sorted(hook.list_keys('bucket', delimiter='/')))
def test_execute_incremental(self, mock_hook, mock_hook2): mock_hook.return_value.list.return_value = MOCK_FILES mock_hook.return_value.download.return_value = b"testing" mock_hook2.return_value.list.return_value = MOCK_FILES operator = GCSToS3Operator(task_id=TASK_ID, bucket=GCS_BUCKET, prefix=PREFIX, delimiter=DELIMITER, dest_aws_conn_id="aws_default", dest_s3_key=S3_BUCKET, replace=False) # create dest bucket hook = S3Hook(aws_conn_id='airflow_gcs_test') bucket = hook.get_bucket('bucket') bucket.create() bucket.put_object(Key=MOCK_FILES[0], Body=b'testing') # we expect all except first file in MOCK_FILES to be uploaded # and all the MOCK_FILES to be present at the S3 bucket uploaded_files = operator.execute(None) self.assertEqual(sorted(MOCK_FILES[1:]), sorted(uploaded_files)) self.assertEqual(sorted(MOCK_FILES), sorted(hook.list_keys('bucket', delimiter='/')))