コード例 #1
0
    def execute(self, context: 'Context'):
        sheet_hook = GSheetsHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        # Pull data and upload
        destination_array: List[str] = []
        sheet_titles = sheet_hook.get_sheet_titles(
            spreadsheet_id=self.spreadsheet_id, sheet_filter=self.sheet_filter)
        for sheet_range in sheet_titles:
            data = sheet_hook.get_values(spreadsheet_id=self.spreadsheet_id,
                                         range_=sheet_range)
            gcs_path_to_file = self._upload_data(gcs_hook, sheet_hook,
                                                 sheet_range, data)
            destination_array.append(gcs_path_to_file)

        self.xcom_push(context, "destination_objects", destination_array)
        return destination_array
コード例 #2
0
ファイル: gcs_to_sheets.py プロジェクト: vipadm/airflow
    def execute(self, context: Any) -> None:
        sheet_hook = GSheetsHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        with NamedTemporaryFile("w+") as temp_file:
            # Download data
            gcs_hook.download(
                bucket_name=self.bucket_name,
                object_name=self.object_name,
                filename=temp_file.name,
            )

            # Upload data
            values = list(csv.reader(temp_file))
            sheet_hook.update_values(
                spreadsheet_id=self.spreadsheet_id,
                range_=self.spreadsheet_range,
                values=values,
            )
コード例 #3
0
 def execute(self, context: Any):
     hook = GSheetsHook(gcp_conn_id=self.gcp_conn_id,
                        delegate_to=self.delegate_to)
     spreadsheet = hook.create_spreadsheet(spreadsheet=self.spreadsheet)
     self.xcom_push(context, "spreadsheet_id", spreadsheet["spreadsheetId"])
     self.xcom_push(context, "spreadsheet_url",
                    spreadsheet["spreadsheetUrl"])
     return spreadsheet
コード例 #4
0
 def execute(self, context: Any) -> Dict[str, Any]:
     hook = GSheetsHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         impersonation_chain=self.impersonation_chain,
     )
     spreadsheet = hook.create_spreadsheet(spreadsheet=self.spreadsheet)
     self.xcom_push(context, "spreadsheet_id", spreadsheet["spreadsheetId"])
     self.xcom_push(context, "spreadsheet_url", spreadsheet["spreadsheetUrl"])
     return spreadsheet
コード例 #5
0
    def _upload_data(
        self,
        gcs_hook: GCSHook,
        hook: GSheetsHook,
        sheet_range: str,
        sheet_values: List[Any],
    ) -> str:
        # Construct destination file path
        sheet = hook.get_spreadsheet(self.spreadsheet_id)
        file_name = f"{sheet['properties']['title']}_{sheet_range}.csv".replace(
            " ", "_")
        dest_file_name = (f"{self.destination_path.strip('/')}/{file_name}"
                          if self.destination_path else file_name)

        with NamedTemporaryFile("w+") as temp_file:
            # Write data
            writer = csv.writer(temp_file)
            writer.writerows(sheet_values)
            temp_file.flush()

            # Upload to GCS
            gcs_hook.upload(
                bucket_name=self.destination_bucket,
                object_name=dest_file_name,
                filename=temp_file.name,
            )
        return dest_file_name
コード例 #6
0
ファイル: sql_to_sheets.py プロジェクト: yyhecust/airflow
    def execute(self, context: Any) -> None:
        self.log.info("Getting data")
        values = list(self._get_data())

        self.log.info("Connecting to Google")
        sheet_hook = GSheetsHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        self.log.info(f"Uploading data to https://docs.google.com/spreadsheets/d/{self.spreadsheet_id}")

        sheet_hook.update_values(
            spreadsheet_id=self.spreadsheet_id,
            range_=self.spreadsheet_range,
            values=values,
        )
コード例 #7
0
 def setUp(self):
     with mock.patch(
             'airflow.providers.google.cloud.hooks.base.CloudBaseHook.__init__',
             new=mock_base_gcp_hook_default_project_id):
         self.hook = GSheetsHook(gcp_conn_id=GCP_CONN_ID)
コード例 #8
0
class TestGSheetsHook(unittest.TestCase):
    def setUp(self):
        with mock.patch(
                'airflow.providers.google.cloud.hooks.base.CloudBaseHook.__init__',
                new=mock_base_gcp_hook_default_project_id):
            self.hook = GSheetsHook(gcp_conn_id=GCP_CONN_ID)

    @mock.patch(
        "airflow.providers.google.suite.hooks.sheets.GSheetsHook._authorize")
    @mock.patch("airflow.providers.google.suite.hooks.sheets.build")
    def test_gsheets_client_creation(self, mock_build, mock_authorize):
        result = self.hook.get_conn()
        mock_build.assert_called_once_with('sheets',
                                           'v4',
                                           http=mock_authorize.return_value,
                                           cache_discovery=False)
        self.assertEqual(mock_build.return_value, result)

    @mock.patch(
        "airflow.providers.google.suite.hooks.sheets.GSheetsHook.get_conn")
    def test_get_values(self, get_conn):
        get_method = get_conn.return_value.spreadsheets.return_value.values.return_value.get
        execute_method = get_method.return_value.execute
        execute_method.return_value = {"values": VALUES}
        result = self.hook.get_values(
            spreadsheet_id=SPREADHSEET_ID,
            range_=RANGE_,
            major_dimension=MAJOR_DIMENSION,
            value_render_option=VALUE_RENDER_OPTION,
            date_time_render_option=DATE_TIME_RENDER_OPTION)
        self.assertIs(result, VALUES)
        execute_method.assert_called_once_with(num_retries=NUM_RETRIES)
        get_method.assert_called_once_with(
            spreadsheetId=SPREADHSEET_ID,
            range=RANGE_,
            majorDimension=MAJOR_DIMENSION,
            valueRenderOption=VALUE_RENDER_OPTION,
            dateTimeRenderOption=DATE_TIME_RENDER_OPTION)

    @mock.patch(
        "airflow.providers.google.suite.hooks.sheets.GSheetsHook.get_conn")
    def test_batch_get_values(self, get_conn):
        batch_get_method = get_conn.return_value.spreadsheets.return_value.values.return_value.batchGet
        execute_method = batch_get_method.return_value.execute
        execute_method.return_value = API_RESPONSE
        result = self.hook.batch_get_values(
            spreadsheet_id=SPREADHSEET_ID,
            ranges=RANGES,
            major_dimension=MAJOR_DIMENSION,
            value_render_option=VALUE_RENDER_OPTION,
            date_time_render_option=DATE_TIME_RENDER_OPTION)
        self.assertIs(result, API_RESPONSE)
        execute_method.assert_called_once_with(num_retries=NUM_RETRIES)
        batch_get_method.assert_called_once_with(
            spreadsheetId=SPREADHSEET_ID,
            ranges=RANGES,
            majorDimension=MAJOR_DIMENSION,
            valueRenderOption=VALUE_RENDER_OPTION,
            dateTimeRenderOption=DATE_TIME_RENDER_OPTION)

    @mock.patch(
        "airflow.providers.google.suite.hooks.sheets.GSheetsHook.get_conn")
    def test_update_values(self, get_conn):
        update_method = get_conn.return_value.spreadsheets.return_value.values.return_value.update
        execute_method = update_method.return_value.execute
        execute_method.return_value = API_RESPONSE
        result = self.hook.update_values(
            spreadsheet_id=SPREADHSEET_ID,
            range_=RANGE_,
            values=VALUES,
            major_dimension=MAJOR_DIMENSION,
            value_input_option=VALUE_INPUT_OPTION,
            include_values_in_response=INCLUDE_VALUES_IN_RESPONSE,
            value_render_option=VALUE_RENDER_OPTION,
            date_time_render_option=DATE_TIME_RENDER_OPTION)
        body = {
            "range": RANGE_,
            "majorDimension": MAJOR_DIMENSION,
            "values": VALUES
        }
        self.assertIs(result, API_RESPONSE)
        execute_method.assert_called_once_with(num_retries=NUM_RETRIES)
        update_method.assert_called_once_with(
            spreadsheetId=SPREADHSEET_ID,
            range=RANGE_,
            valueInputOption=VALUE_INPUT_OPTION,
            includeValuesInResponse=INCLUDE_VALUES_IN_RESPONSE,
            responseValueRenderOption=VALUE_RENDER_OPTION,
            responseDateTimeRenderOption=DATE_TIME_RENDER_OPTION,
            body=body)

    @mock.patch(
        "airflow.providers.google.suite.hooks.sheets.GSheetsHook.get_conn")
    def test_batch_update_values(self, get_conn):
        batch_update_method = get_conn.return_value.spreadsheets.return_value.values.return_value.batchUpdate
        execute_method = batch_update_method.return_value.execute
        execute_method.return_value = API_RESPONSE
        result = self.hook.batch_update_values(
            spreadsheet_id=SPREADHSEET_ID,
            ranges=RANGES,
            values=VALUES_BATCH,
            major_dimension=MAJOR_DIMENSION,
            value_input_option=VALUE_INPUT_OPTION,
            include_values_in_response=INCLUDE_VALUES_IN_RESPONSE,
            value_render_option=VALUE_RENDER_OPTION,
            date_time_render_option=DATE_TIME_RENDER_OPTION)
        data = []
        for idx, range_ in enumerate(RANGES):
            value_range = {
                "range": range_,
                "majorDimension": MAJOR_DIMENSION,
                "values": VALUES_BATCH[idx]
            }
            data.append(value_range)
        body = {
            "valueInputOption": VALUE_INPUT_OPTION,
            "data": data,
            "includeValuesInResponse": INCLUDE_VALUES_IN_RESPONSE,
            "responseValueRenderOption": VALUE_RENDER_OPTION,
            "responseDateTimeRenderOption": DATE_TIME_RENDER_OPTION
        }
        self.assertIs(result, API_RESPONSE)
        execute_method.assert_called_once_with(num_retries=NUM_RETRIES)
        batch_update_method.assert_called_once_with(
            spreadsheetId=SPREADHSEET_ID, body=body)

    @mock.patch(
        "airflow.providers.google.suite.hooks.sheets.GSheetsHook.get_conn")
    def test_batch_update_values_with_bad_data(self, get_conn):
        batch_update_method = get_conn.return_value.spreadsheets.return_value.values.return_value.batchUpdate
        execute_method = batch_update_method.return_value.execute
        execute_method.return_value = API_RESPONSE
        with self.assertRaises(AirflowException) as cm:
            self.hook.batch_update_values(
                spreadsheet_id=SPREADHSEET_ID,
                ranges=['test!A1:B2', 'test!C1:C2'],
                values=[[1, 2, 3]],  # bad data
                major_dimension=MAJOR_DIMENSION,
                value_input_option=VALUE_INPUT_OPTION,
                include_values_in_response=INCLUDE_VALUES_IN_RESPONSE,
                value_render_option=VALUE_RENDER_OPTION,
                date_time_render_option=DATE_TIME_RENDER_OPTION)
        batch_update_method.assert_not_called()
        execute_method.assert_not_called()
        err = cm.exception
        self.assertIn("must be of equal length.", str(err))

    @mock.patch(
        "airflow.providers.google.suite.hooks.sheets.GSheetsHook.get_conn")
    def test_append_values(self, get_conn):
        append_method = get_conn.return_value.spreadsheets.return_value.values.return_value.append
        execute_method = append_method.return_value.execute
        execute_method.return_value = API_RESPONSE
        result = self.hook.append_values(
            spreadsheet_id=SPREADHSEET_ID,
            range_=RANGE_,
            values=VALUES,
            major_dimension=MAJOR_DIMENSION,
            value_input_option=VALUE_INPUT_OPTION,
            insert_data_option=INSERT_DATA_OPTION,
            include_values_in_response=INCLUDE_VALUES_IN_RESPONSE,
            value_render_option=VALUE_RENDER_OPTION,
            date_time_render_option=DATE_TIME_RENDER_OPTION)
        body = {
            "range": RANGE_,
            "majorDimension": MAJOR_DIMENSION,
            "values": VALUES
        }
        self.assertIs(result, API_RESPONSE)
        execute_method.assert_called_once_with(num_retries=NUM_RETRIES)
        append_method.assert_called_once_with(
            spreadsheetId=SPREADHSEET_ID,
            range=RANGE_,
            valueInputOption=VALUE_INPUT_OPTION,
            insertDataOption=INSERT_DATA_OPTION,
            includeValuesInResponse=INCLUDE_VALUES_IN_RESPONSE,
            responseValueRenderOption=VALUE_RENDER_OPTION,
            responseDateTimeRenderOption=DATE_TIME_RENDER_OPTION,
            body=body)

    @mock.patch(
        "airflow.providers.google.suite.hooks.sheets.GSheetsHook.get_conn")
    def test_clear_values(self, get_conn):
        clear_method = get_conn.return_value.spreadsheets.return_value.values.return_value.clear
        execute_method = clear_method.return_value.execute
        execute_method.return_value = API_RESPONSE
        result = self.hook.clear(spreadsheet_id=SPREADHSEET_ID, range_=RANGE_)

        self.assertIs(result, API_RESPONSE)
        execute_method.assert_called_once_with(num_retries=NUM_RETRIES)
        clear_method.assert_called_once_with(spreadsheetId=SPREADHSEET_ID,
                                             range=RANGE_)

    @mock.patch(
        "airflow.providers.google.suite.hooks.sheets.GSheetsHook.get_conn")
    def test_batch_clear_values(self, get_conn):
        batch_clear_method = get_conn.return_value.spreadsheets.return_value.values.return_value.batchClear
        execute_method = batch_clear_method.return_value.execute
        execute_method.return_value = API_RESPONSE
        result = self.hook.batch_clear(spreadsheet_id=SPREADHSEET_ID,
                                       ranges=RANGES)
        body = {"ranges": RANGES}
        self.assertIs(result, API_RESPONSE)
        execute_method.assert_called_once_with(num_retries=NUM_RETRIES)
        batch_clear_method.assert_called_once_with(
            spreadsheetId=SPREADHSEET_ID, body=body)

    @mock.patch(
        "airflow.providers.google.suite.hooks.sheets.GSheetsHook.get_conn")
    def test_get_spreadsheet(self, mock_get_conn):
        get_mock = mock_get_conn.return_value.spreadsheets.return_value.get
        get_mock.return_value.execute.return_value = API_RESPONSE

        result = self.hook.get_spreadsheet(spreadsheet_id=SPREADHSEET_ID)

        get_mock.assert_called_once_with(spreadsheetId=SPREADHSEET_ID)
        assert result == API_RESPONSE

    @mock.patch(
        "airflow.providers.google.suite.hooks.sheets.GSheetsHook.get_spreadsheet"
    )
    def test_get_sheet_titles(self, mock_get_spreadsheet):
        sheet1 = {"properties": {"title": "title1"}}
        sheet2 = {"properties": {"title": "title2"}}
        mock_get_spreadsheet.return_value = {"sheets": [sheet1, sheet2]}

        result = self.hook.get_sheet_titles(spreadsheet_id=SPREADHSEET_ID)
        mock_get_spreadsheet.assert_called_once_with(
            spreadsheet_id=SPREADHSEET_ID)
        assert result == ["title1", "title2"]

        result = self.hook.get_sheet_titles(spreadsheet_id=SPREADHSEET_ID,
                                            sheet_filter=["title1"])
        assert result == ["title1"]

    @mock.patch(
        "airflow.providers.google.suite.hooks.sheets.GSheetsHook.get_conn")
    def test_create_spreadsheet(self, mock_get_conn):
        spreadsheet = mock.MagicMock()

        create_mock = mock_get_conn.return_value.spreadsheets.return_value.create
        create_mock.return_value.execute.return_value = API_RESPONSE

        result = self.hook.create_spreadsheet(spreadsheet=spreadsheet)

        create_mock.assert_called_once_with(body=spreadsheet)
        assert result == API_RESPONSE
コード例 #9
0
ファイル: test_sheets.py プロジェクト: yqian1991/airflow
 def setUp(self):
     with mock.patch('airflow.gcp.hooks.base.CloudBaseHook.__init__',
                     new=mock_base_gcp_hook_default_project_id):
         self.hook = GSheetsHook(gcp_conn_id=GCP_CONN_ID,
                                 spreadsheet_id=SPREADHSEET_ID)