def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: client = GoogleSheetsClient(self.get_credentials(config)) spreadsheet_id = config["spreadsheet_id"] try: logger.info(f"Running discovery on sheet {spreadsheet_id}") spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False)) grid_sheets = Helpers.get_grid_sheets(spreadsheet_metadata) streams = [] for sheet_name in grid_sheets: try: header_row_data = Helpers.get_first_row(client, spreadsheet_id, sheet_name) stream = Helpers.headers_to_airbyte_stream(logger, sheet_name, header_row_data) streams.append(stream) except Exception as err: if str(err).startswith("Expected data for exactly one row for sheet"): logger.warn(f"Skip empty sheet: {sheet_name}") else: logger.error(str(err)) return AirbyteCatalog(streams=streams) except errors.HttpError as err: reason = str(err) if err.resp.status == status_codes.NOT_FOUND: reason = "Requested spreadsheet was not found." raise Exception(f"Could not run discovery: {reason}")
def headers_to_airbyte_stream( logger: AirbyteLogger, sheet_name: str, header_row_values: List[str]) -> AirbyteStream: """ Parses sheet headers from the provided row. This method assumes that data is contiguous i.e: every cell contains a value and the first cell which does not contain a value denotes the end of the headers. For example, if the first row contains "One | Two | | Three" then this method will parse the headers as ["One", "Two"]. This assumption is made for simplicity and can be modified later. """ fields, duplicate_fields = Helpers.get_valid_headers_and_duplicates( header_row_values) if duplicate_fields: logger.warn( f"Duplicate headers found in {sheet_name}. Ignoring them :{duplicate_fields}" ) sheet_json_schema = { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", # For simplicity, the type of every cell is a string "properties": {field: { "type": "string" } for field in fields}, } return AirbyteStream(name=sheet_name, json_schema=sheet_json_schema)
def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: # Check involves verifying that the specified spreadsheet is reachable with our credentials. try: client = GoogleSheetsClient(self.get_credentials(config)) except Exception as e: return AirbyteConnectionStatus(status=Status.FAILED, message=f"Please use valid credentials json file. Error: {e}") spreadsheet_id = config["spreadsheet_id"] try: # Attempt to get first row of sheet client.get(spreadsheetId=spreadsheet_id, includeGridData=False, ranges="1:1") except errors.HttpError as err: reason = str(err) # Give a clearer message if it's a common error like 404. if err.resp.status == status_codes.NOT_FOUND: reason = "Requested spreadsheet was not found." logger.error(f"Formatted error: {reason}") return AirbyteConnectionStatus( status=Status.FAILED, message=f"Unable to connect with the provided credentials to spreadsheet. Error: {reason}" ) # Check for duplicate headers spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False)) grid_sheets = Helpers.get_grid_sheets(spreadsheet_metadata) duplicate_headers_in_sheet = {} for sheet_name in grid_sheets: try: header_row_data = Helpers.get_first_row(client, spreadsheet_id, sheet_name) _, duplicate_headers = Helpers.get_valid_headers_and_duplicates(header_row_data) if duplicate_headers: duplicate_headers_in_sheet[sheet_name] = duplicate_headers except Exception as err: if str(err).startswith("Expected data for exactly one row for sheet"): logger.warn(f"Skip empty sheet: {sheet_name}") else: logger.error(str(err)) return AirbyteConnectionStatus( status=Status.FAILED, message=f"Unable to read the schema of sheet {sheet_name}. Error: {str(err)}" ) if duplicate_headers_in_sheet: duplicate_headers_error_message = ", ".join( [ f"[sheet:{sheet_name}, headers:{duplicate_sheet_headers}]" for sheet_name, duplicate_sheet_headers in duplicate_headers_in_sheet.items() ] ) return AirbyteConnectionStatus( status=Status.FAILED, message="The following duplicate headers were found in the following sheets. Please fix them to continue: " + duplicate_headers_error_message, ) return AirbyteConnectionStatus(status=Status.SUCCEEDED)
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: """ Tests if the input configuration can be used to successfully connect to the integration e.g: if a provided Stripe API token can be used to connect to the Stripe API. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config_path: Path to the file containing the configuration json config :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :return: AirbyteConnectionStatus indicating a Success or Failure """ try: # If an app on the appstore does not support subscriptions or sales, it cannot pull the relevant reports. # However, the way the Appstore API expresses this is not via clear error messages. Instead it expresses it by throwing an unrelated # error, in this case "invalid vendor ID". There is no way to distinguish if this error is due to invalid credentials or due to # the account not supporting this kind of report. So to "check connection" we see if any of the reports can be pulled and if so # return success. If no reports can be pulled we display the exception messages generated for all reports and return failure. api_fields_to_test = { "subscription_event_report": { "reportType": "SUBSCRIPTION_EVENT", "frequency": "DAILY", "reportSubType": "SUMMARY", "version": "1_2", }, "subscriber_report": {"reportType": "SUBSCRIBER", "frequency": "DAILY", "reportSubType": "DETAILED", "version": "1_2"}, "subscription_report": {"reportType": "SUBSCRIPTION", "frequency": "DAILY", "reportSubType": "SUMMARY", "version": "1_2"}, "sales_report": {"reportType": "SALES", "frequency": "DAILY", "reportSubType": "SUMMARY", "version": "1_0"}, } api = Api(config["key_id"], config["key_file"], config["issuer_id"]) stream_to_error = {} for stream, params in api_fields_to_test.items(): test_date = date.today() - timedelta(days=2) report_filters = {"reportDate": test_date.strftime("%Y-%m-%d"), "vendorNumber": f"{config['vendor']}"} report_filters.update(api_fields_to_test[stream]) try: rep_tsv = api.download_sales_and_trends_reports(filters=report_filters) if isinstance(rep_tsv, dict): raise Exception(f"An exception occurred: Received a JSON response instead of" f" the report: {str(rep_tsv)}") except Exception as e: logger.warn(f"Unable to download {stream}: {e}") stream_to_error[stream] = e # All streams have failed if len(stream_to_error.keys()) == api_fields_to_test.keys(): message = "\n".join([f"Unable to access {stream} due to error: {e}" for stream, e in stream_to_error]) return AirbyteConnectionStatus(status=Status.FAILED, message=message) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: logger.warn(e) return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {str(e)}")
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: """ Tests if the input configuration can be used to successfully connect to the integration e.g: if a provided Stripe API token can be used to connect to the Stripe API. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config_path: Path to the file containing the configuration json config :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :return: AirbyteConnectionStatus indicating a Success or Failure """ try: # create request fields for testing api_fields_to_test = { "subscription_report": { "reportType": "SUBSCRIPTION", "frequency": "DAILY", "reportSubType": "SUMMARY", "version": "1_2" } } test_date = date.today() - timedelta(days=2) report_filters = { "reportDate": test_date.strftime("%Y-%m-%d"), "vendorNumber": "{}".format(config["vendor"]) } report_filters.update(api_fields_to_test["subscription_report"]) # fetch data from appstore api api = Api(config["key_id"], config["key_file"], config["issuer_id"]) rep_tsv = api.download_sales_and_trends_reports( filters=report_filters) if isinstance(rep_tsv, dict): return AirbyteConnectionStatus( status=Status.FAILED, message= f"An exception occurred: Received a JSON response instead of" f" the report: {str(rep_tsv)}", ) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: logger.warn(e) return AirbyteConnectionStatus( status=Status.FAILED, message=f"An exception occurred: {str(e)}")
def read( self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any] ) -> Generator[AirbyteMessage, None, None]: client = self._client(config) logger.info("Starting syncing sendgrid") for configured_stream in catalog.streams: # TODO handle incremental syncs stream = configured_stream.stream if stream.name not in client.ENTITY_MAP.keys(): logger.warn(f"Stream '{stream}' not found in the recognized entities") continue for record in self._read_record(client=client, stream=stream.name): yield AirbyteMessage(type=Type.RECORD, record=record) logger.info("Finished syncing sendgrid")
def read(self, logger: AirbyteLogger, config_container: ConfigContainer, catalog_path, state=None) -> Generator[AirbyteMessage, None, None]: client = self._client(config_container) config = self.read_config(catalog_path) catalog = ConfiguredAirbyteCatalog.parse_obj(config) logger.info("Starting syncing recurly") for configured_stream in catalog.streams: # TODO handle incremental syncs stream = configured_stream.stream if stream.name not in client.ENTITIES: logger.warn( f"Stream '{stream}' not found in the recognized entities") continue for record in self._read_record(client=client, stream=stream.name): yield AirbyteMessage(type=Type.RECORD, record=record) logger.info("Finished syncing recurly")