def _check_with_catalog(logger: AirbyteLogger, streams: List, config: json): repositories = config["repository"].split(" ") for repository in repositories: org = repository.split("/")[0] # requests for checking streams permissions # first is for checking if user has access to Collaborators API # if user is not one of the collaborators, request will return 403 error # third is for checking access and permission to Teams API # Teams API is only available to authenticated members of the team's organization, # in another case it will return 404 error # if user doesn't have permission, it will return 401 error check_streams = { "collaborators": f"https://api.github.com/repos/{repository}/collaborators", "teams": f"https://api.github.com/orgs/{org}/teams?sort=created_at&direction=desc", } for stream in streams: if stream in check_streams: response = requests.get(check_streams[stream], auth=(config["access_token"], "")) if response.status_code != requests.codes.ok: logger.log_by_prefix(f"{repository} {response.text}", "ERROR") sys.exit(1)
def read( self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any] ) -> Generator[AirbyteMessage, None, None]: """ Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :param catalog: The input catalog is a ConfiguredAirbyteCatalog which is almost the same as AirbyteCatalog returned by discover(), but in addition, it's been configured in the UI! For each particular stream and field, there may have been provided with extra modifications such as: filtering streams and/or columns out, renaming some entities, etc :param state: When a Airbyte reads data from a source, it might need to keep a checkpoint cursor to resume replication in the future from that saved checkpoint. This is the object that is provided with state from previous runs and avoid replicating the entire set of data everytime. :return: A generator that produces a stream of AirbyteRecordMessage contained in AirbyteMessage object. """ for stream in catalog.streams: name = stream.stream.name key = stream.stream.name logger.debug(f'****** mode {stream.sync_mode} state={state}') if key == 'SiteMetaData': url = sitemetadata_url(config) elif key == 'WellScreens': url = screens_url(config) elif key == 'ManualGWL': url = manual_water_levels_url(config) elif key == 'PressureGWL': url = pressure_water_levels_url(config) elif key == 'AcousticGWL': url = acoustic_water_levels_url(config) else: continue while 1: objectid = state[key] if objectid: curl = f'{url}?objectid={objectid}' else: curl = url logger.info(f'fetching url={curl}') jobj = get_json(logger, curl) if jobj: state[key] = jobj[-1]['OBJECTID'] else: break for di in jobj: di['import_uuid'] = str(uuid.uuid4()) yield AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage(stream=name, data=di, emitted_at=int(datetime.now().timestamp()) * 1000))
def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: client = Helpers.get_authenticated_sheets_client( json.loads(config["credentials_json"])) spreadsheet_id = config["spreadsheet_id"] try: logger.info(f"Running discovery on sheet {spreadsheet_id}") spreadsheet_metadata = Spreadsheet.parse_obj( client.get(spreadsheetId=spreadsheet_id, includeGridData=False).execute()) sheet_names = [ sheet.properties.title for sheet in spreadsheet_metadata.sheets ] streams = [] for sheet_name in sheet_names: header_row_data = Helpers.get_first_row( client, spreadsheet_id, sheet_name) stream = Helpers.headers_to_airbyte_stream( sheet_name, header_row_data) streams.append(stream) return AirbyteCatalog(streams=streams) except errors.HttpError as err: reason = str(err) if err.resp.status == 404: reason = "Requested spreadsheet was not found." raise Exception(f"Could not run discovery: {reason}")
def headers_to_airbyte_stream( logger: AirbyteLogger, sheet_name: str, header_row_values: List[str]) -> AirbyteStream: """ Parses sheet headers from the provided row. This method assumes that data is contiguous i.e: every cell contains a value and the first cell which does not contain a value denotes the end of the headers. For example, if the first row contains "One | Two | | Three" then this method will parse the headers as ["One", "Two"]. This assumption is made for simplicity and can be modified later. """ fields, duplicate_fields = Helpers.get_valid_headers_and_duplicates( header_row_values) if duplicate_fields: logger.warn( f"Duplicate headers found in {sheet_name}. Ignoring them :{duplicate_fields}" ) sheet_json_schema = { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", # For simplicity, the type of every cell is a string "properties": {field: { "type": "string" } for field in fields}, } return AirbyteStream(name=sheet_name, json_schema=sheet_json_schema)
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: try: client = WebClient(token=config["token"]) client.conversations_list() return AirbyteConnectionStatus(status=Status.SUCCEEDED) except SlackApiError as e: logger.error(f"Got an error: {e.args[0]}") return AirbyteConnectionStatus(status=Status.FAILED, message=str(e.args[0]))
def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: # Check involves verifying that the specified spreadsheet is reachable with our credentials. client = GoogleSheetsClient(json.loads(config["credentials_json"])) spreadsheet_id = config["spreadsheet_id"] try: # Attempt to get first row of sheet client.get(spreadsheetId=spreadsheet_id, includeGridData=False, ranges="1:1") except errors.HttpError as err: reason = str(err) # Give a clearer message if it's a common error like 404. if err.resp.status == status_codes.NOT_FOUND: reason = "Requested spreadsheet was not found." logger.error(f"Formatted error: {reason}") return AirbyteConnectionStatus( status=Status.FAILED, message= f"Unable to connect with the provided credentials to spreadsheet. Error: {reason}" ) # Check for duplicate headers spreadsheet_metadata = Spreadsheet.parse_obj( client.get(spreadsheetId=spreadsheet_id, includeGridData=False)) sheet_names = [ sheet.properties.title for sheet in spreadsheet_metadata.sheets ] duplicate_headers_in_sheet = {} for sheet_name in sheet_names: try: header_row_data = Helpers.get_first_row( client, spreadsheet_id, sheet_name) _, duplicate_headers = Helpers.get_valid_headers_and_duplicates( header_row_data) if duplicate_headers: duplicate_headers_in_sheet[sheet_name] = duplicate_headers except Exception as err: logger.error(str(err)) return AirbyteConnectionStatus( status=Status.FAILED, message= f"Unable to read the schema of sheet {sheet_name}. Error: {str(err)}" ) if duplicate_headers_in_sheet: duplicate_headers_error_message = ", ".join([ f"[sheet:{sheet_name}, headers:{duplicate_sheet_headers}]" for sheet_name, duplicate_sheet_headers in duplicate_headers_in_sheet.items() ]) return AirbyteConnectionStatus( status=Status.FAILED, message= "The following duplicate headers were found in the following sheets. Please fix them to continue: " + duplicate_headers_error_message, ) return AirbyteConnectionStatus(status=Status.SUCCEEDED)
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: try: self.try_connect(logger, config) except self.api_error as err: logger.error(f"Exception while connecting to {self.tap_name}: {err}") # this should be in UI error_msg = f"Unable to connect to {self.tap_name} with the provided credentials. Error: {err}" return AirbyteConnectionStatus(status=Status.FAILED, message=error_msg) return AirbyteConnectionStatus(status=Status.SUCCEEDED)
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: try: self.discover(logger, config_path) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: logger.error("Exception while connecting to the Marketo API") logger.error(str(e)) return AirbyteConnectionStatus( status=Status.FAILED, message="Unable to connect to the Marketo API with the provided credentials. " )
def read( self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any] ) -> Generator[AirbyteMessage, None, None]: client = self._client(config) logger.info("Starting syncing mailchimp") for configured_stream in catalog.streams: yield from self._read_record(client=client, configured_stream=configured_stream, state=state) logger.info("Finished syncing mailchimp")
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: try: self.discover(logger, config_path) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception: logger.error("Exception while connecting to the Zendesk Support API") return AirbyteConnectionStatus( status=Status.FAILED, message="Unable to connect to the Zendesk Support API with the provided credentials. Please make sure the " "input credentials and environment are correct. ", )
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: """ Tests if the input configuration can be used to successfully connect to the integration e.g: if a provided Stripe API token can be used to connect to the Stripe API. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config_path: Path to the file containing the configuration json config :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :return: AirbyteConnectionStatus indicating a Success or Failure """ try: # If an app on the appstore does not support subscriptions or sales, it cannot pull the relevant reports. # However, the way the Appstore API expresses this is not via clear error messages. Instead it expresses it by throwing an unrelated # error, in this case "invalid vendor ID". There is no way to distinguish if this error is due to invalid credentials or due to # the account not supporting this kind of report. So to "check connection" we see if any of the reports can be pulled and if so # return success. If no reports can be pulled we display the exception messages generated for all reports and return failure. api_fields_to_test = { "subscription_event_report": { "reportType": "SUBSCRIPTION_EVENT", "frequency": "DAILY", "reportSubType": "SUMMARY", "version": "1_2", }, "subscriber_report": {"reportType": "SUBSCRIBER", "frequency": "DAILY", "reportSubType": "DETAILED", "version": "1_2"}, "subscription_report": {"reportType": "SUBSCRIPTION", "frequency": "DAILY", "reportSubType": "SUMMARY", "version": "1_2"}, "sales_report": {"reportType": "SALES", "frequency": "DAILY", "reportSubType": "SUMMARY", "version": "1_0"}, } api = Api(config["key_id"], config["key_file"], config["issuer_id"]) stream_to_error = {} for stream, params in api_fields_to_test.items(): test_date = date.today() - timedelta(days=2) report_filters = {"reportDate": test_date.strftime("%Y-%m-%d"), "vendorNumber": f"{config['vendor']}"} report_filters.update(api_fields_to_test[stream]) try: rep_tsv = api.download_sales_and_trends_reports(filters=report_filters) if isinstance(rep_tsv, dict): raise Exception(f"An exception occurred: Received a JSON response instead of" f" the report: {str(rep_tsv)}") except Exception as e: logger.warn(f"Unable to download {stream}: {e}") stream_to_error[stream] = e # All streams have failed if len(stream_to_error.keys()) == api_fields_to_test.keys(): message = "\n".join([f"Unable to access {stream} due to error: {e}" for stream, e in stream_to_error]) return AirbyteConnectionStatus(status=Status.FAILED, message=message) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: logger.warn(e) return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {str(e)}")
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: """ Tests if the input configuration can be used to successfully connect to the integration e.g: if a provided Stripe API token can be used to connect to the Stripe API. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config_path: Path to the file containing the configuration json config :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :return: AirbyteConnectionStatus indicating a Success or Failure """ try: # create request fields for testing api_fields_to_test = { "subscription_report": { "reportType": "SUBSCRIPTION", "frequency": "DAILY", "reportSubType": "SUMMARY", "version": "1_2" } } test_date = date.today() - timedelta(days=2) report_filters = { "reportDate": test_date.strftime("%Y-%m-%d"), "vendorNumber": "{}".format(config["vendor"]) } report_filters.update(api_fields_to_test["subscription_report"]) # fetch data from appstore api api = Api(config["key_id"], config["key_file"], config["issuer_id"]) rep_tsv = api.download_sales_and_trends_reports( filters=report_filters) if isinstance(rep_tsv, dict): return AirbyteConnectionStatus( status=Status.FAILED, message= f"An exception occurred: Received a JSON response instead of" f" the report: {str(rep_tsv)}", ) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: logger.warn(e) return AirbyteConnectionStatus( status=Status.FAILED, message=f"An exception occurred: {str(e)}")
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: try: session = shopify.Session(f"{config['shop']}.myshopify.com", "2020-10", config["api_key"]) shopify.ShopifyResource.activate_session(session) # try to read the name of the shop, which should be available with any level of permissions shopify.GraphQL().execute("{ shop { name id } }") shopify.ShopifyResource.clear_session() return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: logger.error(f"Exception connecting to Shopify: ${e}") return AirbyteConnectionStatus( status=Status.FAILED, message="Unable to connect to the Shopify API with the provided credentials." )
def read(self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]: client = GoogleSheetsClient(json.loads(config["credentials_json"])) sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog( catalog) spreadsheet_id = config["spreadsheet_id"] logger.info(f"Starting syncing spreadsheet {spreadsheet_id}") # For each sheet in the spreadsheet, get a batch of rows, and as long as there hasn't been # a blank row, emit the row batch sheet_to_column_index_to_name = Helpers.get_available_sheets_to_column_index_to_name( client, spreadsheet_id, sheet_to_column_name) sheet_row_counts = Helpers.get_sheet_row_count(client, spreadsheet_id) logger.info(f"Row counts: {sheet_row_counts}") for sheet in sheet_to_column_index_to_name.keys(): logger.info(f"Syncing sheet {sheet}") column_index_to_name = sheet_to_column_index_to_name[sheet] row_cursor = 2 # we start syncing past the header row # For the loop, it is necessary that the initial row exists when we send a request to the API, # if the last row of the interval goes outside the sheet - this is normal, we will return # only the real data of the sheet and in the next iteration we will loop out. while row_cursor <= sheet_row_counts[sheet]: range = f"{sheet}!{row_cursor}:{row_cursor + ROW_BATCH_SIZE}" logger.info(f"Fetching range {range}") row_batch = SpreadsheetValues.parse_obj( client.get_values(spreadsheetId=spreadsheet_id, ranges=range, majorDimension="ROWS")) row_cursor += ROW_BATCH_SIZE + 1 # there should always be one range since we requested only one value_ranges = row_batch.valueRanges[0] if not value_ranges.values: break row_values = value_ranges.values if len(row_values) == 0: break for row in row_values: if not Helpers.is_row_empty( row) and Helpers.row_contains_relevant_data( row, column_index_to_name.keys()): yield AirbyteMessage( type=Type.RECORD, record=Helpers.row_data_to_record_message( sheet, row, column_index_to_name)) logger.info(f"Finished syncing spreadsheet {spreadsheet_id}")
def check(self, logger: AirbyteLogger, config_container: ConfigContainer) -> AirbyteConnectionStatus: try: self.discover(logger, config_container) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: # TODO parse the exception message for a human readable error logger.error("Exception while connecting to the FB Marketing API") logger.error(str(e)) return AirbyteConnectionStatus( status=Status.FAILED, message= "Unable to connect to the FB Marketing API with the provided credentials. " )
def check(self, logger: AirbyteLogger, config_container: ConfigContainer) -> AirbyteConnectionStatus: try: json_config = config_container.rendered_config self.try_connect(logger, json_config) except self.api_error as err: logger.error("Exception while connecting to the %s: %s", self.tap_name, str(err)) # this should be in UI error_msg = f"Unable to connect to {self.tap_name} with the provided credentials. Error: {err}" return AirbyteConnectionStatus(status=Status.FAILED, message=error_msg) return AirbyteConnectionStatus(status=Status.SUCCEEDED)
def read(self, logger: AirbyteLogger, config_container: ConfigContainer, catalog_path, state_path: str = None) -> Generator[AirbyteMessage, None, None]: client = self._client(config_container) if state_path: logger.info("Starting sync with provided state file") state_obj = json.loads(open(state_path, "r").read()) else: logger.info("No state provided, starting fresh sync") state_obj = {} state = defaultdict(dict, state_obj) catalog = ConfiguredAirbyteCatalog.parse_obj( self.read_config(catalog_path)) logger.info("Starting syncing mailchimp") for configured_stream in catalog.streams: stream = configured_stream.stream for record in self._read_record(client=client, stream=stream.name, state=state): yield record logger.info("Finished syncing mailchimp")
def run_load_dataframes(config, expected_columns=10, expected_rows=42): df_list = SourceFile.load_dataframes(config=config, logger=AirbyteLogger(), skip_data=False) assert len(df_list) == 1 # Properly load 1 DataFrame df = df_list[0] assert len(df.columns) == expected_columns # DataFrame should have 10 columns assert len(df.index) == expected_rows # DataFrame should have 42 rows of data return df
def read(self, logger: AirbyteLogger, config_container: ConfigContainer, catalog_path, state=None) -> Generator[AirbyteMessage, None, None]: client = self._client(config_container) catalog = ConfiguredAirbyteCatalog.parse_obj( self.read_config(catalog_path)) logger.info("Starting syncing mailchimp") for configured_stream in catalog.streams: stream = configured_stream.stream for record in self._read_record(client=client, stream=stream.name): yield AirbyteMessage(type=Type.RECORD, record=record) logger.info("Finished syncing mailchimp")
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: """ Tests if the input configuration can be used to successfully connect to the integration e.g: if a provided Stripe API token can be used to connect to the Stripe API. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config_path: Path to the file containing the configuration json config :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :return: AirbyteConnectionStatus indicating a Success or Failure """ try: test_date = (date.today() - timedelta(days=2)).strftime("%Y-%m-%d %H:%M") params = { "from": test_date, "to": test_date, "api_token": config["api_token"] } base_url = "https://hq.appsflyer.com" test_endpoint = "/export/{}/installs_report/v5".format( config["app_id"]) url = base_url + test_endpoint logger.info("GET {}".format(url)) resp = requests.get(url, params=params) if resp.status_code == 200: return AirbyteConnectionStatus(status=Status.SUCCEEDED) else: return AirbyteConnectionStatus( status=Status.FAILED, message= f"An exception occurred: Status Code: {0}, content: {1}". format(resp.status_code, resp.content), ) except Exception as e: return AirbyteConnectionStatus( status=Status.FAILED, message=f"An exception occurred: {str(e)}")
def discover(self, logger: AirbyteLogger, config: Mapping) -> AirbyteCatalog: """ Returns an AirbyteCatalog representing the available streams and fields in this integration. For example, given valid credentials to a Remote CSV File, returns an Airbyte catalog where each csv file is a stream, and each column is a field. """ client = self._get_client(config) name = client.stream_name logger.info( f"Discovering schema of {name} at {client.reader.full_url}...") try: streams = list(client.streams) except Exception as err: reason = f"Failed to discover schemas of {name} at {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}" logger.error(reason) raise err return AirbyteCatalog(streams=streams)
def read(self, logger: AirbyteLogger, config_container, catalog_path, state=None) -> Generator[AirbyteMessage, None, None]: config = config_container.rendered_config client = Helpers.get_authenticated_sheets_client( json.loads(config["credentials_json"])) catalog = AirbyteCatalog.parse_obj(self.read_config(catalog_path)) sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog( catalog) spreadsheet_id = config["spreadsheet_id"] logger.info(f"Starting syncing spreadsheet {spreadsheet_id}") # For each sheet in the spreadsheet, get a batch of rows, and as long as there hasn't been # a blank row, emit the row batch sheet_to_column_index_to_name = Helpers.get_available_sheets_to_column_index_to_name( client, spreadsheet_id, sheet_to_column_name) for sheet in sheet_to_column_index_to_name.keys(): logger.info(f"Syncing sheet {sheet}") column_index_to_name = sheet_to_column_index_to_name[sheet] row_cursor = 2 # we start syncing past the header row encountered_blank_row = False while not encountered_blank_row: range = f"{sheet}!{row_cursor}:{row_cursor + ROW_BATCH_SIZE}" logger.info(f"Fetching range {range}") row_batch = SpreadsheetValues.parse_obj( client.values().batchGet(spreadsheetId=spreadsheet_id, ranges=range, majorDimension="ROWS").execute()) row_cursor += ROW_BATCH_SIZE + 1 # there should always be one range since we requested only one value_ranges = row_batch.valueRanges[0] if not value_ranges.values: break row_values = value_ranges.values if len(row_values) == 0: break for row in row_values: if Helpers.is_row_empty(row): encountered_blank_row = True break elif Helpers.row_contains_relevant_data( row, column_index_to_name.keys()): yield AirbyteMessage( type=Type.RECORD, record=Helpers.row_data_to_record_message( sheet, row, column_index_to_name)) logger.info(f"Finished syncing spreadsheet {spreadsheet_id}")
def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: # Check involves verifying that the specified spreadsheet is reachable with our credentials. client = GoogleSheetsClient(json.loads(config["credentials_json"])) spreadsheet_id = config["spreadsheet_id"] try: # Attempt to get first row of sheet client.get(spreadsheetId=spreadsheet_id, includeGridData=False, ranges="1:1") except errors.HttpError as err: reason = str(err) # Give a clearer message if it's a common error like 404. if err.resp.status == status_codes.NOT_FOUND: reason = "Requested spreadsheet was not found." logger.error(f"Formatted error: {reason}") return AirbyteConnectionStatus(status=Status.FAILED, message=str(reason)) return AirbyteConnectionStatus(status=Status.SUCCEEDED)
def _run_sync_test(conf, catalog): records = [] state = [] for message in SourceInstagram().read(AirbyteLogger(), conf, catalog): if message.type == Type.RECORD: records.append(message) elif message.type == Type.STATE: state.append(message) assert len(records) > 0 assert len(state) > 0
def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: try: access_token = config["access_token"] spreadsheet_id = config["spreadsheet_id"] smartsheet_client = smartsheet.Smartsheet(access_token) smartsheet_client.errors_as_exceptions(True) smartsheet_client.Sheets.get_sheet(spreadsheet_id) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: if isinstance(e, smartsheet.exceptions.ApiError): err = e.error.result code = 404 if err.code == 1006 else err.code reason = f"{err.name}: {code} - {err.message} | Check your spreadsheet ID." else: reason = str(e) logger.error(reason) return AirbyteConnectionStatus(status=Status.FAILED)
def test_streams_outputs_records(self, config_credentials, configured_catalog): """ Using standard tests is unreliable for Agent Activities and Agent Overview streams, because the data there changes in real-time, therefore additional pytests are used. """ records = [] for message in SourceZendeskTalk().read(AirbyteLogger(), config_credentials, configured_catalog): if message.type == Type.RECORD: records.append(message) assert len(records) > 0
def check(self, logger: AirbyteLogger, config_container: ConfigContainer) -> AirbyteConnectionStatus: try: json_config = config_container.rendered_config client = braintree.BraintreeGateway( braintree.Configuration( environment=getattr(braintree.Environment, json_config["environment"]), merchant_id=json_config["merchant_id"], public_key=json_config["public_key"], private_key=json_config["private_key"], ) ) client.transaction.search( braintree.TransactionSearch.created_at.between(datetime.now() + relativedelta(days=-1), datetime.now()) ) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except AuthenticationError: logger.error("Exception while connecting to the Braintree API") return AirbyteConnectionStatus( status=Status.FAILED, message="Unable to connect to the Braintree API with the provided credentials. Please make sure the input credentials and environment are correct.", )
def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: client = GoogleSheetsClient(self.get_credentials(config)) spreadsheet_id = config["spreadsheet_id"] try: logger.info(f"Running discovery on sheet {spreadsheet_id}") spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False)) grid_sheets = Helpers.get_grid_sheets(spreadsheet_metadata) streams = [] for sheet_name in grid_sheets: try: header_row_data = Helpers.get_first_row(client, spreadsheet_id, sheet_name) stream = Helpers.headers_to_airbyte_stream(logger, sheet_name, header_row_data) streams.append(stream) except Exception as err: if str(err).startswith("Expected data for exactly one row for sheet"): logger.warn(f"Skip empty sheet: {sheet_name}") else: logger.error(str(err)) return AirbyteCatalog(streams=streams) except errors.HttpError as err: reason = str(err) if err.resp.status == status_codes.NOT_FOUND: reason = "Requested spreadsheet was not found." raise Exception(f"Could not run discovery: {reason}")
def read(self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]: logger.info("read called") url = config["url"] username = config["username"] key = config["access_token"] client = WSClient(url) login = client.do_login(username, key, withpassword=False) query = config["query"] logger.info(query) data = client.do_query(query) try: for single_dict in data: yield AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage( stream=DATASET_ITEMS_STREAM_NAME, data=single_dict, emitted_at=int(datetime.now().timestamp()) * 1000), ) except Exception as err: reason = f"Failed to read data of {DATASET_ITEMS_STREAM_NAME} at {url}" logger.error(reason) raise err
def read_stream( source: SourceHubspot, config: Mapping, catalog: ConfiguredAirbyteCatalog, state: MutableMapping = None ) -> Tuple[Mapping, List]: records = {} states = [] for message in source.read(AirbyteLogger(), config, catalog, state): if message.type == Type.RECORD: records.setdefault(message.record.stream, []) records[message.record.stream].append(message.record) elif message.type == Type.STATE: states.append(message.state) return records, states