Example #1
0
    def execute(self, context: Dict):
        salesforce = SalesforceHook(conn_id=self.salesforce_conn_id)
        response = salesforce.make_query(query=self.query,
                                         include_deleted=self.include_deleted,
                                         query_params=self.query_params)

        with tempfile.TemporaryDirectory() as tmp:
            path = os.path.join(tmp, "salesforce_temp_file")
            salesforce.write_object_to_file(
                query_results=response["records"],
                filename=path,
                fmt=self.export_format,
                coerce_to_timestamp=self.coerce_to_timestamp,
                record_time_added=self.record_time_added,
            )

            hook = GCSHook(gcp_conn_id=self.gcp_conn_id)
            hook.upload(
                bucket_name=self.bucket_name,
                object_name=self.object_name,
                filename=path,
                gzip=self.gzip,
            )

            gcs_uri = "gs://{}/{}".format(self.bucket_name, self.object_name)
            self.log.info("%s uploaded to GCS", gcs_uri)
            return gcs_uri
Example #2
0
    def execute(self, context: Dict) -> str:
        salesforce_hook = SalesforceHook(
            salesforce_conn_id=self.salesforce_conn_id)
        response = salesforce_hook.make_query(
            query=self.salesforce_query,
            include_deleted=self.include_deleted,
            query_params=self.query_params,
        )

        with tempfile.TemporaryDirectory() as tmp:
            path = os.path.join(tmp, "salesforce_temp_file")
            salesforce_hook.write_object_to_file(
                query_results=response["records"],
                filename=path,
                fmt=self.export_format,
                coerce_to_timestamp=self.coerce_to_timestamp,
                record_time_added=self.record_time_added,
            )

            s3_hook = S3Hook(aws_conn_id=self.aws_conn_id)
            s3_hook.load_file(
                filename=path,
                key=self.s3_key,
                bucket_name=self.s3_bucket_name,
                replace=self.replace,
                encrypt=self.encrypt,
                gzip=self.gzip,
                acl_policy=self.acl_policy,
            )

            s3_uri = f"s3://{self.s3_bucket_name}/{self.s3_key}"
            self.log.info(f"Salesforce data uploaded to S3 at {s3_uri}.")

            return s3_uri
Example #3
0
 def execute(self, context: dict) -> dict:
     """
     Makes an HTTP request to an APEX REST endpoint and pushes results to xcom.
     :param context: The task context during execution.
     :type context: dict
     :return: Apex response
     :rtype: dict
     """
     sf_hook = SalesforceHook(salesforce_conn_id=self.salesforce_conn_id)
     conn = sf_hook.get_conn()
     result = conn.apexecute(action=self.endpoint,
                             method=self.method,
                             data=self.payload)
     if self.do_xcom_push:
         return result
Example #4
0
    def execute(self, context: 'Context'):
        """
        Makes an HTTP request to Salesforce Bulk API.

        :param context: The task context during execution.
        :return: API response if do_xcom_push is True
        """
        sf_hook = SalesforceHook(salesforce_conn_id=self.salesforce_conn_id)
        conn = sf_hook.get_conn()

        result = []
        if self.operation == 'insert':
            result = conn.bulk.__getattr__(self.object_name).insert(
                data=self.payload,
                batch_size=self.batch_size,
                use_serial=self.use_serial)
        elif self.operation == 'update':
            result = conn.bulk.__getattr__(self.object_name).update(
                data=self.payload,
                batch_size=self.batch_size,
                use_serial=self.use_serial)
        elif self.operation == 'upsert':
            result = conn.bulk.__getattr__(self.object_name).upsert(
                data=self.payload,
                external_id_field=self.external_id_field,
                batch_size=self.batch_size,
                use_serial=self.use_serial,
            )
        elif self.operation == 'delete':
            result = conn.bulk.__getattr__(self.object_name).delete(
                data=self.payload,
                batch_size=self.batch_size,
                use_serial=self.use_serial)
        elif self.operation == 'hard_delete':
            result = conn.bulk.__getattr__(self.object_name).hard_delete(
                data=self.payload,
                batch_size=self.batch_size,
                use_serial=self.use_serial)

        if self.do_xcom_push and result:
            return result

        return None
Example #5
0
 def setUp(self):
     self.salesforce_hook = SalesforceHook(conn_id='conn_id')
Example #6
0
class TestSalesforceHook(unittest.TestCase):

    def setUp(self):
        self.salesforce_hook = SalesforceHook(conn_id='conn_id')

    def test_get_conn_exists(self):
        self.salesforce_hook.conn = Mock(spec=Salesforce)

        self.salesforce_hook.get_conn()

        self.assertIsNotNone(self.salesforce_hook.conn.return_value)

    @patch('airflow.providers.salesforce.hooks.salesforce.SalesforceHook.get_connection',
           return_value=Connection(
               login='******',
               password='******',
               extra='{"security_token": "token", "sandbox": "true"}'
           ))
    @patch('airflow.providers.salesforce.hooks.salesforce.Salesforce')
    def test_get_conn(self, mock_salesforce, mock_get_connection):
        self.salesforce_hook.get_conn()

        self.assertEqual(self.salesforce_hook.conn, mock_salesforce.return_value)
        mock_salesforce.assert_called_once_with(
            username=mock_get_connection.return_value.login,
            password=mock_get_connection.return_value.password,
            security_token=mock_get_connection.return_value.extra_dejson['security_token'],
            instance_url=mock_get_connection.return_value.host,
            sandbox=mock_get_connection.return_value.extra_dejson.get('sandbox', False)
        )

    @patch('airflow.providers.salesforce.hooks.salesforce.Salesforce')
    def test_make_query(self, mock_salesforce):
        mock_salesforce.return_value.query_all.return_value = dict(totalSize=123, done=True)
        self.salesforce_hook.conn = mock_salesforce.return_value
        query = 'SELECT * FROM table'

        query_results = self.salesforce_hook.make_query(query, include_deleted=True)

        mock_salesforce.return_value.query_all.assert_called_once_with(query, include_deleted=True)
        self.assertEqual(query_results, mock_salesforce.return_value.query_all.return_value)

    @patch('airflow.providers.salesforce.hooks.salesforce.Salesforce')
    def test_describe_object(self, mock_salesforce):
        obj = 'obj_name'
        mock_salesforce.return_value.__setattr__(obj, Mock(spec=Salesforce))
        self.salesforce_hook.conn = mock_salesforce.return_value

        obj_description = self.salesforce_hook.describe_object(obj)

        mock_salesforce.return_value.__getattr__(obj).describe.assert_called_once_with()
        self.assertEqual(obj_description, mock_salesforce.return_value.__getattr__(obj).describe.return_value)

    @patch('airflow.providers.salesforce.hooks.salesforce.SalesforceHook.get_conn')
    @patch('airflow.providers.salesforce.hooks.salesforce.SalesforceHook.describe_object',
           return_value={'fields': [{'name': 'field_1'}, {'name': 'field_2'}]})
    def test_get_available_fields(self, mock_describe_object, mock_get_conn):
        obj = 'obj_name'

        available_fields = self.salesforce_hook.get_available_fields(obj)

        mock_get_conn.assert_called_once_with()
        mock_describe_object.assert_called_once_with(obj)
        self.assertEqual(available_fields, ['field_1', 'field_2'])

    @patch('airflow.providers.salesforce.hooks.salesforce.SalesforceHook.make_query')
    def test_get_object_from_salesforce(self, mock_make_query):
        salesforce_objects = self.salesforce_hook.get_object_from_salesforce(obj='obj_name',
                                                                             fields=['field_1', 'field_2'])

        mock_make_query.assert_called_once_with("SELECT field_1,field_2 FROM obj_name")
        self.assertEqual(salesforce_objects, mock_make_query.return_value)

    def test_write_object_to_file_invalid_format(self):
        with self.assertRaises(ValueError):
            self.salesforce_hook.write_object_to_file(query_results=[], filename='test', fmt="test")

    @patch('airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records',
           return_value=pd.DataFrame({'test': [1, 2, 3]}))
    def test_write_object_to_file_csv(self, mock_data_frame):
        mock_data_frame.return_value.to_csv = Mock()
        filename = 'test'

        data_frame = self.salesforce_hook.write_object_to_file(query_results=[], filename=filename, fmt="csv")

        mock_data_frame.return_value.to_csv.assert_called_once_with(filename, index=False)
        pd.testing.assert_frame_equal(data_frame, pd.DataFrame({'test': [1, 2, 3]}))

    @patch('airflow.providers.salesforce.hooks.salesforce.SalesforceHook.describe_object',
           return_value={'fields': [{'name': 'field_1', 'type': 'date'}]})
    @patch('airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records',
           return_value=pd.DataFrame({
               'test': [1, 2, 3],
               'field_1': ['2019-01-01', '2019-01-02', '2019-01-03']
           }))
    def test_write_object_to_file_json_with_timestamp_conversion(self, mock_data_frame, mock_describe_object):
        mock_data_frame.return_value.to_json = Mock()
        filename = 'test'
        obj_name = 'obj_name'

        data_frame = self.salesforce_hook.write_object_to_file(
            query_results=[{'attributes': {'type': obj_name}}],
            filename=filename,
            fmt="json",
            coerce_to_timestamp=True
        )

        mock_describe_object.assert_called_once_with(obj_name)
        mock_data_frame.return_value.to_json.assert_called_once_with(filename, "records", date_unit="s")
        pd.testing.assert_frame_equal(data_frame, pd.DataFrame({
            'test': [1, 2, 3],
            'field_1': [1.546301e+09, 1.546387e+09, 1.546474e+09]
        }))

    @patch('airflow.providers.salesforce.hooks.salesforce.time.time', return_value=1.23)
    @patch('airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records',
           return_value=pd.DataFrame({'test': [1, 2, 3]}))
    def test_write_object_to_file_ndjson_with_record_time(self, mock_data_frame, mock_time):
        mock_data_frame.return_value.to_json = Mock()
        filename = 'test'

        data_frame = self.salesforce_hook.write_object_to_file(
            query_results=[],
            filename=filename,
            fmt="ndjson",
            record_time_added=True
        )

        mock_data_frame.return_value.to_json.assert_called_once_with(
            filename,
            "records",
            lines=True,
            date_unit="s"
        )
        pd.testing.assert_frame_equal(data_frame, pd.DataFrame({
            'test': [1, 2, 3],
            'time_fetched_from_salesforce': [
                mock_time.return_value, mock_time.return_value, mock_time.return_value
            ]
        }))
class TestSalesforceHook(unittest.TestCase):
    def setUp(self):
        self.salesforce_hook = SalesforceHook(conn_id="conn_id")

    def test_get_conn_exists(self):
        self.salesforce_hook.conn = Mock(spec=Salesforce)

        self.salesforce_hook.get_conn()

        self.assertIsNotNone(self.salesforce_hook.conn.return_value)

    @patch(
        "airflow.providers.salesforce.hooks.salesforce.SalesforceHook.get_connection",
        return_value=Connection(
            login="******",
            password="******",
            extra='{"security_token": "token", "domain": "test"}'),
    )
    @patch("airflow.providers.salesforce.hooks.salesforce.Salesforce")
    def test_get_conn(self, mock_salesforce, mock_get_connection):
        self.salesforce_hook.get_conn()

        self.assertEqual(self.salesforce_hook.conn,
                         mock_salesforce.return_value)
        mock_salesforce.assert_called_once_with(
            username=mock_get_connection.return_value.login,
            password=mock_get_connection.return_value.password,
            security_token=mock_get_connection.return_value.
            extra_dejson["security_token"],
            instance_url=mock_get_connection.return_value.host,
            domain=mock_get_connection.return_value.extra_dejson.get(
                "domain", None),
        )

    @patch("airflow.providers.salesforce.hooks.salesforce.Salesforce")
    def test_make_query(self, mock_salesforce):
        mock_salesforce.return_value.query_all.return_value = dict(
            totalSize=123, done=True)
        self.salesforce_hook.conn = mock_salesforce.return_value
        query = "SELECT * FROM table"

        query_results = self.salesforce_hook.make_query(query,
                                                        include_deleted=True)

        mock_salesforce.return_value.query_all.assert_called_once_with(
            query, include_deleted=True)
        self.assertEqual(query_results,
                         mock_salesforce.return_value.query_all.return_value)

    @patch("airflow.providers.salesforce.hooks.salesforce.Salesforce")
    def test_describe_object(self, mock_salesforce):
        obj = "obj_name"
        mock_salesforce.return_value.__setattr__(obj, Mock(spec=Salesforce))
        self.salesforce_hook.conn = mock_salesforce.return_value

        obj_description = self.salesforce_hook.describe_object(obj)

        mock_salesforce.return_value.__getattr__(
            obj).describe.assert_called_once_with()
        self.assertEqual(
            obj_description,
            mock_salesforce.return_value.__getattr__(
                obj).describe.return_value)

    @patch(
        "airflow.providers.salesforce.hooks.salesforce.SalesforceHook.get_conn"
    )
    @patch(
        "airflow.providers.salesforce.hooks.salesforce.SalesforceHook.describe_object",
        return_value={"fields": [{
            "name": "field_1"
        }, {
            "name": "field_2"
        }]},
    )
    def test_get_available_fields(self, mock_describe_object, mock_get_conn):
        obj = "obj_name"

        available_fields = self.salesforce_hook.get_available_fields(obj)

        mock_get_conn.assert_called_once_with()
        mock_describe_object.assert_called_once_with(obj)
        self.assertEqual(available_fields, ["field_1", "field_2"])

    @patch(
        "airflow.providers.salesforce.hooks.salesforce.SalesforceHook.make_query"
    )
    def test_get_object_from_salesforce(self, mock_make_query):
        salesforce_objects = self.salesforce_hook.get_object_from_salesforce(
            obj="obj_name", fields=["field_1", "field_2"])

        mock_make_query.assert_called_once_with(
            "SELECT field_1,field_2 FROM obj_name")
        self.assertEqual(salesforce_objects, mock_make_query.return_value)

    def test_write_object_to_file_invalid_format(self):
        with self.assertRaises(ValueError):
            self.salesforce_hook.write_object_to_file(query_results=[],
                                                      filename="test",
                                                      fmt="test")

    @patch(
        "airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
        return_value=pd.DataFrame({
            "test": [1, 2, 3],
            "dict": [None, None, {
                "foo": "bar"
            }]
        }),
    )
    def test_write_object_to_file_csv(self, mock_data_frame):
        mock_data_frame.return_value.to_csv = Mock()
        filename = "test"

        data_frame = self.salesforce_hook.write_object_to_file(
            query_results=[], filename=filename, fmt="csv")

        mock_data_frame.return_value.to_csv.assert_called_once_with(
            filename, index=False)
        pd.testing.assert_frame_equal(
            data_frame,
            pd.DataFrame({
                "test": [1, 2, 3],
                "dict": ["None", "None", str({"foo": "bar"})]
            }))

    @patch(
        "airflow.providers.salesforce.hooks.salesforce.SalesforceHook.describe_object",
        return_value={"fields": [{
            "name": "field_1",
            "type": "date"
        }]},
    )
    @patch(
        "airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
        return_value=pd.DataFrame({
            "test": [1, 2, 3],
            "field_1": ["2019-01-01", "2019-01-02", "2019-01-03"]
        }),
    )
    def test_write_object_to_file_json_with_timestamp_conversion(
            self, mock_data_frame, mock_describe_object):
        mock_data_frame.return_value.to_json = Mock()
        filename = "test"
        obj_name = "obj_name"

        data_frame = self.salesforce_hook.write_object_to_file(
            query_results=[{
                "attributes": {
                    "type": obj_name
                }
            }],
            filename=filename,
            fmt="json",
            coerce_to_timestamp=True,
        )

        mock_describe_object.assert_called_once_with(obj_name)
        mock_data_frame.return_value.to_json.assert_called_once_with(
            filename, "records", date_unit="s")
        pd.testing.assert_frame_equal(
            data_frame,
            pd.DataFrame({
                "test": [1, 2, 3],
                "field_1": [1.546301e09, 1.546387e09, 1.546474e09]
            }))

    @patch("airflow.providers.salesforce.hooks.salesforce.time.time",
           return_value=1.23)
    @patch(
        "airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
        return_value=pd.DataFrame({"test": [1, 2, 3]}),
    )
    def test_write_object_to_file_ndjson_with_record_time(
            self, mock_data_frame, mock_time):
        mock_data_frame.return_value.to_json = Mock()
        filename = "test"

        data_frame = self.salesforce_hook.write_object_to_file(
            query_results=[],
            filename=filename,
            fmt="ndjson",
            record_time_added=True)

        mock_data_frame.return_value.to_json.assert_called_once_with(
            filename, "records", lines=True, date_unit="s")
        pd.testing.assert_frame_equal(
            data_frame,
            pd.DataFrame({
                "test": [1, 2, 3],
                "time_fetched_from_salesforce": [
                    mock_time.return_value,
                    mock_time.return_value,
                    mock_time.return_value,
                ],
            }),
        )

    @patch(
        "airflow.providers.salesforce.hooks.salesforce.SalesforceHook.describe_object",
        return_value={"fields": [{
            "name": "field_1",
            "type": "date"
        }]},
    )
    @patch(
        "airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
        return_value=pd.DataFrame({
            "test": [1, 2, 3],
            "field_1": ["2019-01-01", "2019-01-02", "2019-01-03"]
        }),
    )
    def test_obect_to_df_with_timestamp_conversion(self, mock_data_frame,
                                                   mock_describe_object):
        obj_name = "obj_name"

        data_frame = self.salesforce_hook.object_to_df(
            query_results=[{
                "attributes": {
                    "type": obj_name
                }
            }],
            coerce_to_timestamp=True,
        )

        mock_describe_object.assert_called_once_with(obj_name)
        pd.testing.assert_frame_equal(
            data_frame,
            pd.DataFrame({
                "test": [1, 2, 3],
                "field_1": [1.546301e09, 1.546387e09, 1.546474e09]
            }))

    @patch("airflow.providers.salesforce.hooks.salesforce.time.time",
           return_value=1.23)
    @patch(
        "airflow.providers.salesforce.hooks.salesforce.pd.DataFrame.from_records",
        return_value=pd.DataFrame({"test": [1, 2, 3]}),
    )
    def test_object_to_df_with_record_time(self, mock_data_frame, mock_time):
        data_frame = self.salesforce_hook.object_to_df(query_results=[],
                                                       record_time_added=True)

        pd.testing.assert_frame_equal(
            data_frame,
            pd.DataFrame({
                "test": [1, 2, 3],
                "time_fetched_from_salesforce": [
                    mock_time.return_value,
                    mock_time.return_value,
                    mock_time.return_value,
                ],
            }),
        )