def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, any]: try: logger.info("Checking the config") google_api = GoogleAds(credentials=self.get_credentials(config)) accounts = self.get_account_info(google_api, config) customers = Customer.from_accounts(accounts) # Check custom query request validity by sending metric request with non-existant time window for customer in customers: for query in config.get("custom_queries", []): query = query.get("query") if customer.is_manager_account and self.is_metrics_in_custom_query( query): logger.warning( f"Metrics are not available for manager account {customer.id}. " f"Please remove metrics fields in your custom query: {query}." ) if CustomQuery.cursor_field in query: return False, f"Custom query should not contain {CustomQuery.cursor_field}" req_q = CustomQuery.insert_segments_date_expr( query, "1980-01-01", "1980-01-01") response = google_api.send_request(req_q, customer_id=customer.id) # iterate over the response otherwise exceptions will not be raised! for _ in response: pass return True, None except GoogleAdsException as exception: error_messages = ", ".join( [error.message for error in exception.failure.errors]) logger.error(traceback.format_exc()) return False, f"Unable to connect to Google Ads API with the provided configuration - {error_messages}"
def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Any]: try: params = {"access_key": config["access_key"]} base = config.get("base") if base is not None: params["base"] = base resp = requests.get( f"{ExchangeRates.url_base}{config['start_date']}", params=params) status = resp.status_code logger.info(f"Ping response code: {status}") if status == 200: return True, None # When API requests is sent but the requested data is not available or the API call fails # for some reason, a JSON error is returned. # https://exchangeratesapi.io/documentation/#errors error = resp.json().get("error") code = error.get("code") message = error.get("message") or error.get("info") # If code is base_currency_access_restricted, error is caused by switching base currency while using free # plan if code == "base_currency_access_restricted": message = f"{message} (this plan doesn't support selecting the base currency)" return False, message except Exception as e: return False, e
def streams(self, config: Mapping[str, Any]) -> List[Stream]: authenticator = TokenAuthenticator(config["api_token"]) default_start_date = pendulum.parse(config["start_date"]) threads_lookback_window = pendulum.Duration( days=config["lookback_window"]) streams = [ Channels(authenticator=authenticator), ChannelMembers(authenticator=authenticator), ChannelMessages(authenticator=authenticator, default_start_date=default_start_date), Threads(authenticator=authenticator, default_start_date=default_start_date, lookback_window=threads_lookback_window), Users(authenticator=authenticator), ] # To sync data from channels, the bot backed by this token needs to join all those channels. This operation is idempotent. if config["join_channels"]: logger = AirbyteLogger() logger.info("joining Slack channels") join_channels_stream = JoinChannelsStream( authenticator=authenticator) for stream_slice in join_channels_stream.stream_slices(): for message in join_channels_stream.read_records( sync_mode=SyncMode.full_refresh, stream_slice=stream_slice): logger.info(message["message"]) return streams
def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, any]: try: logger.info("Checking the config") GoogleAds(credentials=config["credentials"], customer_id=config["customer_id"]) return True, None except Exception as error: return False, f"Unable to connect to Google Ads API with the provided credentials - {repr(error)}"
def create_firebolt_wirter(connection: Connection, config: json, logger: AirbyteLogger) -> FireboltWriter: if config["loading_method"]["method"] == "S3": logger.info("Using the S3 writing strategy") writer = FireboltS3Writer( connection, config["loading_method"]["s3_bucket"], config["loading_method"]["aws_key_id"], config["loading_method"]["aws_key_secret"], config["loading_method"]["s3_region"], ) else: logger.info("Using the SQL writing strategy") writer = FireboltSQLWriter(connection) return writer
def discover(self, logger: AirbyteLogger, config: Mapping) -> AirbyteCatalog: """ Returns an AirbyteCatalog representing the available streams and fields in this integration. For example, given valid credentials to a Remote CSV File, returns an Airbyte catalog where each csv file is a stream, and each column is a field. """ client = self._get_client(config) name = client.stream_name logger.info( f"Discovering schema of {name} at {client.reader.full_url}...") try: streams = list(client.streams) except Exception as err: reason = f"Failed to discover schemas of {name} at {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}" logger.error(reason) raise err return AirbyteCatalog(streams=streams)
def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, any]: try: logger.info("Checking the config") google_api = GoogleAds(credentials=self.get_credentials(config), customer_id=config["customer_id"]) account_stream = Accounts(api=google_api) list(account_stream.read_records(sync_mode=SyncMode.full_refresh)) # Check custom query request validity by sending metric request with non-existant time window for q in config.get("custom_queries", []): q = q.get("query") if CustomQuery.cursor_field in q: raise Exception( f"Custom query should not contain {CustomQuery.cursor_field}" ) req_q = CustomQuery.insert_segments_date_expr( q, "1980-01-01", "1980-01-01") google_api.send_request(req_q) return True, None except GoogleAdsException as error: return False, f"Unable to connect to Google Ads API with the provided credentials - {repr(error.failure)}"
def check_connection(self, logger: AirbyteLogger, config: Mapping[str, any]) -> Tuple[bool, any]: """ :param config: the user-input config object conforming to the connector's spec.json :param logger: logger object :return Tuple[bool, any]: (True, None) if the input config can be used to connect to the API successfully, (False, error) otherwise. """ url_template = "https://{company}.hellobaton.com/api/" try: params = { "api_key": config["api_key"], } base_url = url_template.format(company=config["company"]) # This is just going to return a mapping of available endpoints response = requests.get(base_url, params=params) status_code = response.status_code logger.info(f"Status code: {status_code}") if status_code == 200: return True, None except Exception as e: return False, e
def read( self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None ) -> Iterator[AirbyteMessage]: """ Overwritten to dynamically receive only those streams that are necessary for reading for significant speed gains (Salesforce has a strict API limit on requests). """ connector_state = copy.deepcopy(state or {}) config, internal_config = split_config(config) # get the streams once in case the connector needs to make any queries to generate them logger.info("Starting generating streams") stream_instances = {s.name: s for s in self.streams(config, catalog=catalog)} logger.info(f"Starting syncing {self.name}") self._stream_to_instance_map = stream_instances for configured_stream in catalog.streams: stream_instance = stream_instances.get(configured_stream.stream.name) if not stream_instance: raise KeyError( f"The requested stream {configured_stream.stream.name} was not found in the source. Available streams: {stream_instances.keys()}" ) try: yield from self._read_stream( logger=logger, stream_instance=stream_instance, configured_stream=configured_stream, connector_state=connector_state, internal_config=internal_config, ) except Exception as e: logger.exception(f"Encountered an exception while reading stream {self.name}") raise e logger.info(f"Finished syncing {self.name}")
def read( self, logger: AirbyteLogger, config: Mapping, catalog: ConfiguredAirbyteCatalog, state_path: Mapping[str, any]) -> Generator[AirbyteMessage, None, None]: """Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state.""" client = self._get_client(config) fields = self.selected_fields(catalog) name = client.stream_name logger.info(f"Reading {name} ({client.reader.full_url})...") try: for row in client.read(fields=fields): record = AirbyteRecordMessage( stream=name, data=row, emitted_at=int(datetime.now().timestamp()) * 1000) yield AirbyteMessage(type=Type.RECORD, record=record) except Exception as err: reason = f"Failed to read data of {name} at {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}" logger.error(reason) raise err
def streams(self, config: Mapping[str, Any]) -> List[Stream]: authenticator = TokenAuthenticator(config["api_token"]) default_start_date = pendulum.now().subtract(days=14) # TODO make this configurable threads_lookback_window = {"days": 7} # TODO make this configurable streams = [ Channels(authenticator=authenticator), ChannelMembers(authenticator=authenticator), ChannelMessages(authenticator=authenticator, default_start_date=default_start_date), Threads(authenticator=authenticator, default_start_date=default_start_date, lookback_window=threads_lookback_window), Users(authenticator=authenticator), ] # To sync data from channels, the bot backed by this token needs to join all those channels. This operation is idempotent. # TODO make joining configurable. Also make joining archived and private channels configurable logger = AirbyteLogger() logger.info("joining Slack channels") join_channels_stream = JoinChannelsStream(authenticator=authenticator) for stream_slice in join_channels_stream.stream_slices(): for message in join_channels_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice): logger.info(message["message"]) return streams
def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: access_token = config["access_token"] spreadsheet_id = config["spreadsheet_id"] streams = [] smartsheet_client = smartsheet.Smartsheet(access_token) try: sheet = smartsheet_client.Sheets.get_sheet(spreadsheet_id) sheet = json.loads(str(sheet)) # make it subscriptable sheet_json_schema = get_json_schema(sheet) logger.info( f"Running discovery on sheet: {sheet['name']} with {spreadsheet_id}" ) stream = AirbyteStream(name=sheet["name"], json_schema=sheet_json_schema) streams.append(stream) except Exception as e: raise Exception(f"Could not run discovery: {str(e)}") return AirbyteCatalog(streams=streams)
def read( self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: Optional[MutableMapping[str, Any]] = None, ) -> Iterator[AirbyteMessage]: """ Overwritten to dynamically receive only those streams that are necessary for reading for significant speed gains (Salesforce has a strict API limit on requests). """ connector_state = copy.deepcopy(state or {}) config, internal_config = split_config(config) # get the streams once in case the connector needs to make any queries to generate them logger.info("Starting generating streams") stream_instances = { s.name: s for s in self.streams(config, catalog=catalog, state=state) } logger.info(f"Starting syncing {self.name}") self._stream_to_instance_map = stream_instances for configured_stream in catalog.streams: stream_instance = stream_instances.get( configured_stream.stream.name) if not stream_instance: raise KeyError( f"The requested stream {configured_stream.stream.name} was not found in the source. Available streams: {stream_instances.keys()}" ) try: yield from self._read_stream( logger=logger, stream_instance=stream_instance, configured_stream=configured_stream, connector_state=connector_state, internal_config=internal_config, ) except exceptions.HTTPError as error: error_data = error.response.json()[0] error_code = error_data.get("errorCode") if error.response.status_code == codes.FORBIDDEN and error_code == "REQUEST_LIMIT_EXCEEDED": logger.warn( f"API Call limit is exceeded. Error message: '{error_data.get('message')}'" ) break # if got 403 rate limit response, finish the sync with success. raise error except Exception as e: logger.exception( f"Encountered an exception while reading stream {self.name}" ) raise e logger.info(f"Finished syncing {self.name}")
def read(self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]: access_token = config["access_token"] spreadsheet_id = config["spreadsheet_id"] smartsheet_client = smartsheet.Smartsheet(access_token) for configured_stream in catalog.streams: stream = configured_stream.stream properties = stream.json_schema["properties"] if isinstance(properties, list): columns = tuple(key for dct in properties for key in dct.keys()) elif isinstance(properties, dict): columns = tuple(i for i in properties.keys()) else: logger.error( "Could not read properties from the JSONschema in this stream" ) name = stream.name try: sheet = smartsheet_client.Sheets.get_sheet(spreadsheet_id) sheet = json.loads(str(sheet)) # make it subscriptable logger.info(f"Starting syncing spreadsheet {sheet['name']}") logger.info(f"Row count: {sheet['totalRowCount']}") for row in sheet["rows"]: values = tuple(i["value"] if "value" in i else "" for i in row["cells"]) try: data = dict(zip(columns, values)) yield AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage( stream=name, data=data, emitted_at=int(datetime.now().timestamp()) * 1000), ) except Exception as e: logger.error( f"Unable to encode row into an AirbyteMessage with the following error: {e}" ) except Exception as e: logger.error(f"Could not read smartsheet: {name}") raise e logger.info(f"Finished syncing spreadsheet with ID: {spreadsheet_id}")
def read( self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None, ) -> Iterable[AirbyteMessage]: logger.info(I_AM_A_SECRET_VALUE) logger.info(I_AM_A_SECRET_VALUE + " plus Some non secret Value in the same log record" + NOT_A_SECRET_VALUE) logger.info(NOT_A_SECRET_VALUE) yield AirbyteMessage( record=AirbyteRecordMessage(stream="stream", data={"data": "stuff"}, emitted_at=1), type=Type.RECORD, )