def test_create_operator_with_correct_parameters_mysql_proxy_socket(self, get_connections): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type=mysql&" "project_id=example-project&location=europe-west1&instance=testdb&" "use_proxy=True&sql_proxy_use_tcp=False") get_connections.return_value = [connection] operator = CloudSqlQueryOperator( sql=['SELECT * FROM TABLE'], task_id='task_id' ) operator.cloudsql_db_hook.create_connection() try: db_hook = operator.cloudsql_db_hook.get_database_hook() conn = db_hook._get_connections_from_db(db_hook.mysql_conn_id)[0] finally: operator.cloudsql_db_hook.delete_connection() self.assertEqual('mysql', conn.conn_type) self.assertEqual('localhost', conn.host) self.assertIn('/tmp', conn.extra_dejson['unix_socket']) self.assertIn('example-project:europe-west1:testdb', conn.extra_dejson['unix_socket']) self.assertIsNone(conn.port) self.assertEqual('testdb', conn.schema)
def test_create_operator_with_correct_parameters_mysql_ssl( self, get_connections): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type=mysql&" "project_id=example-project&location=europe-west1&instance=testdb&" "use_proxy=False&use_ssl=True&sslcert=/bin/bash&" "sslkey=/bin/bash&sslrootcert=/bin/bash") get_connections.return_value = [connection] operator = CloudSqlQueryOperator(sql=['SELECT * FROM TABLE'], task_id='task_id') operator.cloudsql_db_hook.create_connection() try: db_hook = operator.cloudsql_db_hook.get_database_hook() conn = db_hook._get_connections_from_db(db_hook.mysql_conn_id)[0] finally: operator.cloudsql_db_hook.delete_connection() self.assertEqual('mysql', conn.conn_type) self.assertEqual('8.8.8.8', conn.host) self.assertEqual(3200, conn.port) self.assertEqual('testdb', conn.schema) self.assertEqual('/bin/bash', json.loads(conn.extra_dejson['ssl'])['cert']) self.assertEqual('/bin/bash', json.loads(conn.extra_dejson['ssl'])['key']) self.assertEqual('/bin/bash', json.loads(conn.extra_dejson['ssl'])['ca'])
def test_extract_authority_uri(get_connection, mock_get_table_schemas): mock_get_table_schemas.side_effect = \ [[DB_TABLE_SCHEMA], NO_DB_TABLE_SCHEMA] conn = Connection() conn.parse_from_uri(uri=CONN_URI) get_connection.return_value = conn expected_inputs = [ Dataset( name=f"{DB_NAME}.{DB_SCHEMA_NAME}.{DB_TABLE_NAME.name}", source=Source( scheme='postgres', authority='localhost:5432', connection_url=CONN_URI_WITHOUT_USERPASS ), fields=[] ).to_openlineage_dataset()] task_metadata = PostgresExtractor(TASK).extract() assert task_metadata.name == f"{DAG_ID}.{TASK_ID}" assert task_metadata.inputs == expected_inputs assert task_metadata.outputs == []
def test_create_operator_with_correct_parameters_mysql_ssl(self, get_connections): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type=mysql&" "project_id=example-project&location=europe-west1&instance=testdb&" "use_proxy=False&use_ssl=True&sslcert=/bin/bash&" "sslkey=/bin/bash&sslrootcert=/bin/bash") get_connections.return_value = [connection] operator = CloudSqlQueryOperator( sql=['SELECT * FROM TABLE'], task_id='task_id' ) operator.cloudsql_db_hook.create_connection() try: db_hook = operator.cloudsql_db_hook.get_database_hook() conn = db_hook._get_connections_from_db(db_hook.mysql_conn_id)[0] finally: operator.cloudsql_db_hook.delete_connection() self.assertEqual('mysql', conn.conn_type) self.assertEqual('8.8.8.8', conn.host) self.assertEqual(3200, conn.port) self.assertEqual('testdb', conn.schema) self.assertEqual('/bin/bash', json.loads(conn.extra_dejson['ssl'])['cert']) self.assertEqual('/bin/bash', json.loads(conn.extra_dejson['ssl'])['key']) self.assertEqual('/bin/bash', json.loads(conn.extra_dejson['ssl'])['ca'])
def test_create_operator_with_wrong_parameters(self, project_id, location, instance_name, database_type, use_proxy, use_ssl, sql, message, get_connections): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type={database_type}&" "project_id={project_id}&location={location}&instance={instance_name}&" "use_proxy={use_proxy}&use_ssl={use_ssl}". format(database_type=database_type, project_id=project_id, location=location, instance_name=instance_name, use_proxy=use_proxy, use_ssl=use_ssl)) get_connections.return_value = [connection] with self.assertRaises(AirflowException) as cm: CloudSqlQueryOperator( sql=sql, task_id='task_id' ) err = cm.exception self.assertIn(message, str(err))
def test_extract(get_connection, mock_get_table_schemas): mock_get_table_schemas.side_effect = \ [[DB_TABLE_SCHEMA], NO_DB_TABLE_SCHEMA] conn = Connection() conn.parse_from_uri(uri=CONN_URI) get_connection.return_value = conn TASK.get_hook = mock.MagicMock() TASK.get_hook.return_value._get_conn_params.return_value = { 'account': 'test_account', 'database': DB_NAME } expected_inputs = [ Dataset(name=f"{DB_NAME}.{DB_SCHEMA_NAME}.{DB_TABLE_NAME.name}", source=Source(scheme='snowflake', authority='test_account', connection_url=CONN_URI), fields=[]).to_openlineage_dataset() ] # Set the environment variable for the connection os.environ[f"AIRFLOW_CONN_{CONN_ID.upper()}"] = CONN_URI task_metadata = SnowflakeExtractor(TASK).extract() assert task_metadata.name == f"{DAG_ID}.{TASK_ID}" assert task_metadata.inputs == expected_inputs assert task_metadata.outputs == []
def test_cloudsql_hook_delete_connection_on_exception( self, get_connections, run, get_connection, delete_connection): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=mysql&" "project_id=example-project&location=europe-west1&instance=testdb&" "use_proxy=False") get_connection.return_value = connection db_connection = Connection() db_connection.host = "127.0.0.1" db_connection.set_extra( json.dumps({ "project_id": "example-project", "location": "europe-west1", "instance": "testdb", "database_type": "mysql" })) get_connections.return_value = [db_connection] run.side_effect = Exception("Exception when running a query") operator = CloudSqlQueryOperator(sql=['SELECT * FROM TABLE'], task_id='task_id') with self.assertRaises(Exception) as cm: operator.execute(None) err = cm.exception self.assertEqual("Exception when running a query", str(err)) delete_connection.assert_called_once_with()
def test_cloudsql_hook_delete_connection_on_exception( self, get_connections, run, get_connection, delete_connection): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=mysql&" "project_id=example-project&location=europe-west1&instance=testdb&" "use_proxy=False") get_connection.return_value = connection db_connection = Connection() db_connection.host = "127.0.0.1" db_connection.set_extra(json.dumps({"project_id": "example-project", "location": "europe-west1", "instance": "testdb", "database_type": "mysql"})) get_connections.return_value = [db_connection] run.side_effect = Exception("Exception when running a query") operator = CloudSqlQueryOperator( sql=['SELECT * FROM TABLE'], task_id='task_id' ) with self.assertRaises(Exception) as cm: operator.execute(None) err = cm.exception self.assertEqual("Exception when running a query", str(err)) delete_connection.assert_called_once_with()
def create_connection(self) -> Connection: """ Create Connection object, according to whether it uses proxy, TCP, UNIX sockets, SSL. Connection ID will be randomly generated. """ connection = Connection(conn_id=self.db_conn_id) uri = self._generate_connection_uri() self.log.info("Creating connection %s", self.db_conn_id) connection.parse_from_uri(uri) return connection
def _setup_connections(get_connections, uri): gcp_connection = mock.MagicMock() gcp_connection.extra_dejson = mock.MagicMock() gcp_connection.extra_dejson.get.return_value = 'empty_project' cloudsql_connection = Connection() cloudsql_connection.parse_from_uri(uri) cloudsql_connection2 = Connection() cloudsql_connection2.parse_from_uri(uri) get_connections.side_effect = [[gcp_connection], [cloudsql_connection], [cloudsql_connection2]]
def _setup_connections(get_connections, uri): gcp_connection = mock.MagicMock() gcp_connection.extra_dejson = mock.MagicMock() gcp_connection.extra_dejson.get.return_value = 'empty_project' cloudsql_connection = Connection() cloudsql_connection.parse_from_uri(uri) cloudsql_connection2 = Connection() cloudsql_connection2.parse_from_uri(uri) get_connections.side_effect = [[gcp_connection], [cloudsql_connection], [cloudsql_connection2]]
def create_connection(self, session=None): """ Create connection in the Connection table, according to whether it uses proxy, TCP, UNIX sockets, SSL. Connection ID will be randomly generated. :param session: Session of the SQL Alchemy ORM (automatically generated with decorator). """ connection = Connection(conn_id=self.db_conn_id) uri = self._generate_connection_uri() self.log.info("Creating connection %s", self.db_conn_id) connection.parse_from_uri(uri) session.add(connection) session.commit()
def create_connection(self, session=None): """ Create connection in the Connection table, according to whether it uses proxy, TCP, UNIX sockets, SSL. Connection ID will be randomly generated. :param session: Session of the SQL Alchemy ORM (automatically generated with decorator). """ connection = Connection(conn_id=self.db_conn_id) uri = self._generate_connection_uri() self.log.info("Creating connection {}".format(self.db_conn_id)) connection.parse_from_uri(uri) session.add(connection) session.commit()
def get_connection(conn_id): # TODO: We may want to throw an exception if the connection # does not exist (ex: AirflowConnectionException). The connection # URI is required when collecting metadata for a data source. from airflow.models import Connection conn_uri = os.environ.get('AIRFLOW_CONN_' + conn_id.upper()) if conn_uri: conn = Connection() conn.parse_from_uri(uri=conn_uri) return conn create_session = safe_import_airflow( airflow_1_path="airflow.utils.db.create_session", airflow_2_path="airflow.utils.session.create_session", ) with create_session() as session: return (session.query(Connection).filter( Connection.conn_id == conn_id).first())
def test_create_operator_with_correct_parameters_mysql_tcp( self, get_connections): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type=mysql&" "project_id=example-project&location=europe-west1&instance=testdb&" "use_proxy=True&sql_proxy_use_tcp=True") get_connections.return_value = [connection] operator = CloudSqlQueryOperator(sql=['SELECT * FROM TABLE'], task_id='task_id') operator.cloudsql_db_hook.create_connection() try: db_hook = operator.cloudsql_db_hook.get_database_hook() conn = db_hook._get_connections_from_db(db_hook.mysql_conn_id)[0] finally: operator.cloudsql_db_hook.delete_connection() self.assertEqual('mysql', conn.conn_type) self.assertEqual('127.0.0.1', conn.host) self.assertNotEqual(3200, conn.port) self.assertEqual('testdb', conn.schema)
def test_create_operator_with_correct_parameters_postgres(self, get_connections): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type=postgres&" "project_id=example-project&location=europe-west1&instance=testdb&" "use_proxy=False&use_ssl=False") get_connections.return_value = [connection] operator = CloudSqlQueryOperator( sql=['SELECT * FROM TABLE'], task_id='task_id' ) operator.cloudsql_db_hook.create_connection() try: db_hook = operator.cloudsql_db_hook.get_database_hook() conn = db_hook._get_connections_from_db(db_hook.postgres_conn_id)[0] finally: operator.cloudsql_db_hook.delete_connection() self.assertEqual('postgres', conn.conn_type) self.assertEqual('8.8.8.8', conn.host) self.assertEqual(3200, conn.port) self.assertEqual('testdb', conn.schema)
def test_create_operator_with_wrong_parameters(self, project_id, location, instance_name, database_type, use_proxy, use_ssl, sql, message, get_connections): connection = Connection() connection.parse_from_uri( "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type={database_type}&" "project_id={project_id}&location={location}&instance={instance_name}&" "use_proxy={use_proxy}&use_ssl={use_ssl}".format( database_type=database_type, project_id=project_id, location=location, instance_name=instance_name, use_proxy=use_proxy, use_ssl=use_ssl)) get_connections.return_value = [connection] with self.assertRaises(AirflowException) as cm: CloudSqlQueryOperator(sql=sql, task_id='task_id') err = cm.exception self.assertIn(message, str(err))
# This script is used to correctly generate as `wasb` connection string for Airflow # which can be used in an environment variable to define an azure storage connection string # Like so `export AIRFLOW_CONN_WASB_FILE_UPLOAD=<ScriptURLOutputHere>` from airflow.models import Connection from urllib import parse from sys import argv if len(argv) < 3: print("Expect 2 arguments `AzureStorageAccountName` and `AzureStorageKey`") exit() # Make sure the password is URL escaped pw=argv[2] quoted = parse.quote_plus(pw) # Build the connection URL url = "wasb://{}:{}@azure".format(argv[1], quoted) # Check worked correctly conn = Connection() conn.parse_from_uri(url) print("Found these details are they correct?") print("Account Name: {}".format(conn.login)) print("Account Key: {}".format(conn.password)) print("\nAirflow Connection URL for this account:") print(url)
def test_get_normalized_postgres_connection_uri(): conn = Connection() conn.parse_from_uri(uri="postgresql://localhost:5432/testdb") assert get_normalized_postgres_connection_uri(conn) == AIRFLOW_CONN_URI
def test_get_connection_from_uri(): conn = Connection() conn.parse_from_uri(uri=AIRFLOW_CONN_URI) assert get_normalized_postgres_connection_uri(conn) == AIRFLOW_CONN_URI