def test_load_df_with_data_types(self, mock_run_cli): d = OrderedDict() d['b'] = [True] d['i'] = [-1] d['t'] = [1] d['f'] = [0.0] d['c'] = ['c'] d['M'] = [datetime.datetime(2018, 1, 1)] d['O'] = [object()] d['S'] = [b'STRING'] d['U'] = ['STRING'] d['V'] = [None] df = pd.DataFrame(d) hook = HiveCliHook() hook.load_df(df, 't') query = """ CREATE TABLE IF NOT EXISTS t ( b BOOLEAN, i BIGINT, t BIGINT, f DOUBLE, c STRING, M TIMESTAMP, O STRING, S STRING, U STRING, V STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS textfile ; """ assertEqualIgnoreMultipleSpaces(self, mock_run_cli.call_args_list[0][0][0], query)
def test_load_df_with_data_types(self, mock_run_cli): d = OrderedDict() d['b'] = [True] d['i'] = [-1] d['t'] = [1] d['f'] = [0.0] d['c'] = ['c'] d['M'] = [datetime.datetime(2018, 1, 1)] d['O'] = [object()] d['S'] = ['STRING'.encode('utf-8')] d['U'] = ['STRING'] d['V'] = [None] df = pd.DataFrame(d) hook = HiveCliHook() hook.load_df(df, 't') query = """ CREATE TABLE IF NOT EXISTS t ( b BOOLEAN, i BIGINT, t BIGINT, f DOUBLE, c STRING, M TIMESTAMP, O STRING, S STRING, U STRING, V STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS textfile ; """ assertEqualIgnoreMultipleSpaces(self, mock_run_cli.call_args_list[0][0][0], query)
def test_execute(self, mock_run): s3_keys = ['1.csv', '2.csv'] table = 'table' stage = 'stage' file_format = 'file_format' schema = 'schema' S3ToSnowflakeTransfer(s3_keys=s3_keys, table=table, stage=stage, file_format=file_format, schema=schema, columns_array=None, task_id="task_id", dag=None).execute(None) files = str(s3_keys) files = files.replace('[', '(') files = files.replace(']', ')') base_sql = """ FROM @{stage}/ files={files} file_format={file_format} """.format(stage=stage, files=files, file_format=file_format) copy_query = """ COPY INTO {schema}.{table} {base_sql} """.format(schema=schema, table=table, base_sql=base_sql) assert mock_run.call_count == 1 assertEqualIgnoreMultipleSpaces(self, mock_run.call_args[0][0], copy_query)
def test_assertEqualIgnoreMultipleSpaces_passes(self): str1 = 'w oo f' str2 = """ w oo f """ assertEqualIgnoreMultipleSpaces(self, str1, str2)
def test_execute( self, table_as_file_name, expected_s3_key, mock_run, mock_session, ): access_key = "aws_access_key_id" secret_key = "aws_secret_access_key" mock_session.return_value = Session(access_key, secret_key) schema = "schema" table = "table" s3_bucket = "bucket" s3_key = "key" unload_options = [ 'HEADER', ] RedshiftToS3Transfer(schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, unload_options=unload_options, include_header=True, redshift_conn_id="redshift_conn_id", aws_conn_id="aws_conn_id", task_id="task_id", table_as_file_name=table_as_file_name, dag=None).execute(None) unload_options = '\n\t\t\t'.join(unload_options) select_query = "SELECT * FROM {schema}.{table}".format(schema=schema, table=table) unload_query = """ UNLOAD ('{select_query}') TO 's3://{s3_bucket}/{s3_key}' with credentials 'aws_access_key_id={access_key};aws_secret_access_key={secret_key}' {unload_options}; """.format(select_query=select_query, s3_bucket=s3_bucket, s3_key=expected_s3_key, access_key=access_key, secret_key=secret_key, unload_options=unload_options) assert mock_run.call_count == 1 assertEqualIgnoreMultipleSpaces(self, mock_run.call_args[0][0], unload_query)
def test_assertEqualIgnoreMultipleSpaces_raises(self): str1 = 'w oo f' str2 = 'meow' self.assertRaises( AssertionError, lambda: assertEqualIgnoreMultipleSpaces(self, str1, str2))
def test_mysql_hook_test_bulk_dump_mock(self, mock_get_conn): mock_execute = mock.MagicMock() mock_get_conn.return_value.cursor.return_value.execute = mock_execute from airflow.hooks.mysql_hook import MySqlHook hook = MySqlHook('airflow_ci') table = "INFORMATION_SCHEMA.TABLES" tmp_file = "/path/to/output/file" hook.bulk_dump(table, tmp_file) from airflow.utils.tests import assertEqualIgnoreMultipleSpaces mock_execute.assert_called_once() query = """ SELECT * INTO OUTFILE '{tmp_file}' FROM {table} """.format(tmp_file=tmp_file, table=table) assertEqualIgnoreMultipleSpaces(self, mock_execute.call_args[0][0], query)
def test_execute(self, mock_run, mock_Session): access_key = "aws_access_key_id" secret_key = "aws_secret_access_key" mock_Session.return_value = Session(access_key, secret_key) schema = "schema" table = "table" s3_bucket = "bucket" s3_key = "key" copy_options = "" t = S3ToRedshiftTransfer( schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, copy_options=copy_options, redshift_conn_id="redshift_conn_id", aws_conn_id="aws_conn_id", task_id="task_id", dag=None) t.execute(None) copy_query = """ COPY {schema}.{table} FROM 's3://{s3_bucket}/{s3_key}/{table}' with credentials 'aws_access_key_id={access_key};aws_secret_access_key={secret_key}' {copy_options}; """.format(schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, access_key=access_key, secret_key=secret_key, copy_options=copy_options) assert mock_run.call_count == 1 assertEqualIgnoreMultipleSpaces(self, mock_run.call_args[0][0], copy_query)
def test_execute(self, mock_get_conn, mock_run, mock_Session): column_name = "col" cur = mock.MagicMock() cur.fetchall.return_value = [(column_name, )] mock_get_conn.return_value.cursor.return_value = cur access_key = "aws_access_key_id" secret_key = "aws_secret_access_key" mock_Session.return_value = Session(access_key, secret_key) schema = "schema" table = "table" s3_bucket = "bucket" s3_key = "key" unload_options = ('PARALLEL OFF', ) t = RedshiftToS3Transfer(schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, unload_options=unload_options, include_header=True, redshift_conn_id="redshift_conn_id", aws_conn_id="aws_conn_id", task_id="task_id", dag=None) t.execute(None) unload_options = '\n\t\t\t'.join(unload_options) columns_query = """ SELECT column_name FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position """.format(schema=schema, table=table) unload_query = """ UNLOAD ('SELECT {column_name} FROM (SELECT 2 sort_order, CAST({column_name} AS text) AS {column_name} FROM {schema}.{table} UNION ALL SELECT 1 sort_order, \\'{column_name}\\') ORDER BY sort_order') TO 's3://{s3_bucket}/{s3_key}/{table}_' with credentials 'aws_access_key_id={access_key};aws_secret_access_key={secret_key}' {unload_options}; """.format(column_name=column_name, schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, access_key=access_key, secret_key=secret_key, unload_options=unload_options) assert cur.execute.call_count == 1 assertEqualIgnoreMultipleSpaces(self, cur.execute.call_args[0][0], columns_query) assert mock_run.call_count == 1 assertEqualIgnoreMultipleSpaces(self, mock_run.call_args[0][0], unload_query)
def test_execute(self, mock_get_conn, mock_run, mock_Session): column_name = "col" cur = mock.MagicMock() cur.fetchall.return_value = [(column_name, )] mock_get_conn.return_value.cursor.return_value = cur access_key = "aws_access_key_id" secret_key = "aws_secret_access_key" mock_Session.return_value = Session(access_key, secret_key) schema = "schema" table = "table" s3_bucket = "bucket" s3_key = "key" unload_options = ('PARALLEL OFF',) t = RedshiftToS3Transfer( schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, unload_options=unload_options, include_header=True, redshift_conn_id="redshift_conn_id", aws_conn_id="aws_conn_id", task_id="task_id", dag=None) t.execute(None) unload_options = '\n\t\t\t'.join(unload_options) columns_query = """ SELECT column_name FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position """.format(schema=schema, table=table) unload_query = """ UNLOAD ('SELECT {column_name} FROM (SELECT 2 sort_order, CAST({column_name} AS text) AS {column_name} FROM {schema}.{table} UNION ALL SELECT 1 sort_order, \\'{column_name}\\') ORDER BY sort_order') TO 's3://{s3_bucket}/{s3_key}/{table}_' with credentials 'aws_access_key_id={access_key};aws_secret_access_key={secret_key}' {unload_options}; """.format(column_name=column_name, schema=schema, table=table, s3_bucket=s3_bucket, s3_key=s3_key, access_key=access_key, secret_key=secret_key, unload_options=unload_options) assert cur.execute.call_count == 1 assertEqualIgnoreMultipleSpaces(self, cur.execute.call_args[0][0], columns_query) assert mock_run.call_count == 1 assertEqualIgnoreMultipleSpaces(self, mock_run.call_args[0][0], unload_query)
def test_assertEqualIgnoreMultipleSpaces_raises(self): str1 = 'w oo f' str2 = 'meow' self.assertRaises(AssertionError, lambda: assertEqualIgnoreMultipleSpaces(self, str1, str2))