def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: client = GoogleSheetsClient(self.get_credentials(config)) spreadsheet_id = config["spreadsheet_id"] try: logger.info(f"Running discovery on sheet {spreadsheet_id}") spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False)) grid_sheets = Helpers.get_grid_sheets(spreadsheet_metadata) streams = [] for sheet_name in grid_sheets: try: header_row_data = Helpers.get_first_row(client, spreadsheet_id, sheet_name) stream = Helpers.headers_to_airbyte_stream(logger, sheet_name, header_row_data) streams.append(stream) except Exception as err: if str(err).startswith("Expected data for exactly one row for sheet"): logger.warn(f"Skip empty sheet: {sheet_name}") else: logger.error(str(err)) return AirbyteCatalog(streams=streams) except errors.HttpError as err: reason = str(err) if err.resp.status == status_codes.NOT_FOUND: reason = "Requested spreadsheet was not found." raise Exception(f"Could not run discovery: {reason}")
def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: client = GoogleSheetsClient(json.loads(config["credentials_json"])) spreadsheet_id = config["spreadsheet_id"] try: logger.info(f"Running discovery on sheet {spreadsheet_id}") spreadsheet_metadata = Spreadsheet.parse_obj( client.get(spreadsheetId=spreadsheet_id, includeGridData=False)) sheet_names = [ sheet.properties.title for sheet in spreadsheet_metadata.sheets ] streams = [] for sheet_name in sheet_names: try: header_row_data = Helpers.get_first_row( client, spreadsheet_id, sheet_name) stream = Helpers.headers_to_airbyte_stream( sheet_name, header_row_data) streams.append(stream) except Exception as err: logger.error(str(err)) return AirbyteCatalog(streams=streams) except errors.HttpError as err: reason = str(err) if err.resp.status == status_codes.NOT_FOUND: reason = "Requested spreadsheet was not found." raise Exception(f"Could not run discovery: {reason}")
def read(self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]: logger.info("read called") url = config["url"] username = config["username"] key = config["access_token"] client = WSClient(url) login = client.do_login(username, key, withpassword=False) query = config["query"] logger.info(query) data = client.do_query(query) try: for single_dict in data: yield AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage( stream=DATASET_ITEMS_STREAM_NAME, data=single_dict, emitted_at=int(datetime.now().timestamp()) * 1000), ) except Exception as err: reason = f"Failed to read data of {DATASET_ITEMS_STREAM_NAME} at {url}" logger.error(reason) raise err
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: try: client = WebClient(token=config["token"]) client.conversations_list() return AirbyteConnectionStatus(status=Status.SUCCEEDED) except SlackApiError as e: logger.error(f"Got an error: {e.args[0]}") return AirbyteConnectionStatus(status=Status.FAILED, message=str(e.args[0]))
def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: # Check involves verifying that the specified spreadsheet is reachable with our credentials. client = GoogleSheetsClient(json.loads(config["credentials_json"])) spreadsheet_id = config["spreadsheet_id"] try: # Attempt to get first row of sheet client.get(spreadsheetId=spreadsheet_id, includeGridData=False, ranges="1:1") except errors.HttpError as err: reason = str(err) # Give a clearer message if it's a common error like 404. if err.resp.status == status_codes.NOT_FOUND: reason = "Requested spreadsheet was not found." logger.error(f"Formatted error: {reason}") return AirbyteConnectionStatus( status=Status.FAILED, message= f"Unable to connect with the provided credentials to spreadsheet. Error: {reason}" ) # Check for duplicate headers spreadsheet_metadata = Spreadsheet.parse_obj( client.get(spreadsheetId=spreadsheet_id, includeGridData=False)) sheet_names = [ sheet.properties.title for sheet in spreadsheet_metadata.sheets ] duplicate_headers_in_sheet = {} for sheet_name in sheet_names: try: header_row_data = Helpers.get_first_row( client, spreadsheet_id, sheet_name) _, duplicate_headers = Helpers.get_valid_headers_and_duplicates( header_row_data) if duplicate_headers: duplicate_headers_in_sheet[sheet_name] = duplicate_headers except Exception as err: logger.error(str(err)) return AirbyteConnectionStatus( status=Status.FAILED, message= f"Unable to read the schema of sheet {sheet_name}. Error: {str(err)}" ) if duplicate_headers_in_sheet: duplicate_headers_error_message = ", ".join([ f"[sheet:{sheet_name}, headers:{duplicate_sheet_headers}]" for sheet_name, duplicate_sheet_headers in duplicate_headers_in_sheet.items() ]) return AirbyteConnectionStatus( status=Status.FAILED, message= "The following duplicate headers were found in the following sheets. Please fix them to continue: " + duplicate_headers_error_message, ) return AirbyteConnectionStatus(status=Status.SUCCEEDED)
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: try: self.try_connect(logger, config) except self.api_error as err: logger.error(f"Exception while connecting to {self.tap_name}: {err}") # this should be in UI error_msg = f"Unable to connect to {self.tap_name} with the provided credentials. Error: {err}" return AirbyteConnectionStatus(status=Status.FAILED, message=error_msg) return AirbyteConnectionStatus(status=Status.SUCCEEDED)
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: try: self.discover(logger, config_path) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: logger.error("Exception while connecting to the Marketo API") logger.error(str(e)) return AirbyteConnectionStatus( status=Status.FAILED, message="Unable to connect to the Marketo API with the provided credentials. " )
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: try: self.discover(logger, config_path) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception: logger.error("Exception while connecting to the Zendesk Support API") return AirbyteConnectionStatus( status=Status.FAILED, message="Unable to connect to the Zendesk Support API with the provided credentials. Please make sure the " "input credentials and environment are correct. ", )
def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus: try: session = shopify.Session(f"{config['shop']}.myshopify.com", "2020-10", config["api_key"]) shopify.ShopifyResource.activate_session(session) # try to read the name of the shop, which should be available with any level of permissions shopify.GraphQL().execute("{ shop { name id } }") shopify.ShopifyResource.clear_session() return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: logger.error(f"Exception connecting to Shopify: ${e}") return AirbyteConnectionStatus( status=Status.FAILED, message="Unable to connect to the Shopify API with the provided credentials." )
def check(self, logger: AirbyteLogger, config_container: ConfigContainer) -> AirbyteConnectionStatus: try: self.discover(logger, config_container) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: # TODO parse the exception message for a human readable error logger.error("Exception while connecting to the FB Marketing API") logger.error(str(e)) return AirbyteConnectionStatus( status=Status.FAILED, message= "Unable to connect to the FB Marketing API with the provided credentials. " )
def check(self, logger: AirbyteLogger, config_container: ConfigContainer) -> AirbyteConnectionStatus: try: json_config = config_container.rendered_config self.try_connect(logger, json_config) except self.api_error as err: logger.error("Exception while connecting to the %s: %s", self.tap_name, str(err)) # this should be in UI error_msg = f"Unable to connect to {self.tap_name} with the provided credentials. Error: {err}" return AirbyteConnectionStatus(status=Status.FAILED, message=error_msg) return AirbyteConnectionStatus(status=Status.SUCCEEDED)
def read( self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any] ) -> Generator[AirbyteMessage, None, None]: client = self._get_client(config) logger.info(f"Starting syncing {self.__class__.__name__}") for configured_stream in catalog.streams: stream = configured_stream.stream if stream.name not in client.ENTITY_MAP.keys(): continue try: for record in self._read_record(client=client, stream=stream.name): yield AirbyteMessage(type=Type.RECORD, record=record) except requests.exceptions.RequestException: logger.error(f"Get {stream.name} error") logger.info(f"Finished syncing {self.__class__.__name__}")
def discover(self, logger: AirbyteLogger, config: Mapping) -> AirbyteCatalog: """ Returns an AirbyteCatalog representing the available streams and fields in this integration. For example, given valid credentials to a Remote CSV File, returns an Airbyte catalog where each csv file is a stream, and each column is a field. """ client = self._get_client(config) name = client.stream_name logger.info( f"Discovering schema of {name} at {client.reader.full_url}...") try: streams = list(client.streams) except Exception as err: reason = f"Failed to discover schemas of {name} at {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}" logger.error(reason) raise err return AirbyteCatalog(streams=streams)
def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: try: access_token = config["access_token"] spreadsheet_id = config["spreadsheet_id"] smartsheet_client = smartsheet.Smartsheet(access_token) smartsheet_client.errors_as_exceptions(True) smartsheet_client.Sheets.get_sheet(spreadsheet_id) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: if isinstance(e, smartsheet.exceptions.ApiError): err = e.error.result code = 404 if err.code == 1006 else err.code reason = f"{err.name}: {code} - {err.message} | Check your spreadsheet ID." else: reason = str(e) logger.error(reason) return AirbyteConnectionStatus(status=Status.FAILED)
def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: # Check involves verifying that the specified spreadsheet is reachable with our credentials. client = GoogleSheetsClient(json.loads(config["credentials_json"])) spreadsheet_id = config["spreadsheet_id"] try: # Attempt to get first row of sheet client.get(spreadsheetId=spreadsheet_id, includeGridData=False, ranges="1:1") except errors.HttpError as err: reason = str(err) # Give a clearer message if it's a common error like 404. if err.resp.status == status_codes.NOT_FOUND: reason = "Requested spreadsheet was not found." logger.error(f"Formatted error: {reason}") return AirbyteConnectionStatus(status=Status.FAILED, message=str(reason)) return AirbyteConnectionStatus(status=Status.SUCCEEDED)
def check(self, logger: AirbyteLogger, config_container: ConfigContainer) -> AirbyteConnectionStatus: try: json_config = config_container.rendered_config client = braintree.BraintreeGateway( braintree.Configuration( environment=getattr(braintree.Environment, json_config["environment"]), merchant_id=json_config["merchant_id"], public_key=json_config["public_key"], private_key=json_config["private_key"], ) ) client.transaction.search( braintree.TransactionSearch.created_at.between(datetime.now() + relativedelta(days=-1), datetime.now()) ) return AirbyteConnectionStatus(status=Status.SUCCEEDED) except AuthenticationError: logger.error("Exception while connecting to the Braintree API") return AirbyteConnectionStatus( status=Status.FAILED, message="Unable to connect to the Braintree API with the provided credentials. Please make sure the input credentials and environment are correct.", )
def read( self, logger: AirbyteLogger, config: Mapping, catalog: ConfiguredAirbyteCatalog, state_path: Mapping[str, any]) -> Generator[AirbyteMessage, None, None]: """Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state.""" client = self._get_client(config) fields = self.selected_fields(catalog) name = client.stream_name logger.info(f"Reading {name} ({client.reader.full_url})...") try: for row in client.read(fields=fields): record = AirbyteRecordMessage( stream=name, data=row, emitted_at=int(datetime.now().timestamp()) * 1000) yield AirbyteMessage(type=Type.RECORD, record=record) except Exception as err: reason = f"Failed to read data of {name} at {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}" logger.error(reason) raise err
def read(self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]: access_token = config["access_token"] spreadsheet_id = config["spreadsheet_id"] smartsheet_client = smartsheet.Smartsheet(access_token) for configured_stream in catalog.streams: stream = configured_stream.stream properties = stream.json_schema["properties"] if isinstance(properties, list): columns = tuple(key for dct in properties for key in dct.keys()) elif isinstance(properties, dict): columns = tuple(i for i in properties.keys()) else: logger.error( "Could not read properties from the JSONschema in this stream" ) name = stream.name try: sheet = smartsheet_client.Sheets.get_sheet(spreadsheet_id) sheet = json.loads(str(sheet)) # make it subscriptable logger.info(f"Starting syncing spreadsheet {sheet['name']}") logger.info(f"Row count: {sheet['totalRowCount']}") for row in sheet["rows"]: values = tuple(i["value"] for i in row["cells"]) try: data = dict(zip(columns, values)) yield AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage( stream=name, data=data, emitted_at=int(datetime.now().timestamp()) * 1000), ) except Exception as e: logger.error( f"Unable to encode row into an AirbyteMessage with the following error: {e}" ) except Exception as e: logger.error(f"Could not read smartsheet: {name}") raise e logger.info(f"Finished syncing spreadsheet with ID: {spreadsheet_id}")
def read(self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]: """ Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :param catalog: The input catalog is a ConfiguredAirbyteCatalog which is almost the same as AirbyteCatalog returned by discover(), but in addition, it's been configured in the UI! For each particular stream and field, there may have been provided with extra modifications such as: filtering streams and/or columns out, renaming some entities, etc :param state: When a Airbyte reads data from a source, it might need to keep a checkpoint cursor to resume replication in the future from that saved checkpoint. This is the object that is provided with state from previous runs and avoid replicating the entire set of data everytime. :return: A generator that produces a stream of AirbyteRecordMessage contained in AirbyteMessage object. """ stream_name = StreamGetSiteMetaData # Example def get_request_url(stream, config): query_params = dict() data_api_url = config[ConfigPropDataApiUrl] query_params[ConfigPropSystemKey] = config[ConfigPropSystemKey] if stream in config: for stream_prop in config[stream]: query_params[stream_prop] = config[stream][stream_prop] return f'{data_api_url}?method={stream}&{urlencode(query_params)}' req_url = get_request_url(stream_name, config) logger.info(f'requesting {req_url}') def assert_onerain_response(response_object, expect_http_code): assert isinstance(expect_http_code, int) assert response_object.status_code == expect_http_code #logger.info(r.text) doc = xmltodict.parse(r.text) assert 'onerain' in doc if 'error' in doc['onerain']: err_msg = doc['onerain']['error'] raise ValueError(err_msg) # if 'row' key is not an ordered dictionary then return # empty ordered dictionary results = [] #collections.OrderedDict() try: rows = doc['onerain']['response']['general']['row'] row = rows[0] results = rows except Exception as e: logger.debug(f'no records: str(e)') return results # RETRIEVE SITE METADATA try: r = requests.get(req_url) # ITERATE SITE METADATA AND RETURN AS STREAM results = assert_onerain_response(r, 200) for row in results: or_site_id = int(row['or_site_id']) site_id = row['site_id'] location = row['location'] owner = row['owner'] system_id = int(row['system_id']) client_id = row['client_id'] latitude_dec = float(row['latitude_dec']) longitude_dec = float(row['longitude_dec']) elevation = int(row['elevation']) data = dict() data['or_site_id'] = or_site_id data['site_id'] = site_id data['location'] = location data['owner'] = owner data['system_id'] = system_id data['client_id'] = client_id data['latitude_dec'] = latitude_dec data['longitude_dec'] = longitude_dec data['elevation'] = elevation yield AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage( stream=stream_name, data=data, emitted_at=int(datetime.now().timestamp()) * 1000), ) except Exception as e: logger.error( f'failed to process stream {stream_name}: {traceback.format_exc()}' ) # RETRIEVE SENSOR METADATA AND RETURN AS STREAM stream_name = StreamGetSensorMetaData req_url = get_request_url(stream_name, config) logger.info(f'requesting {req_url}') try: # submit request r = requests.get(req_url) results = assert_onerain_response(r, 200) for row in results: data = dict() data['site_id'] = row['site_id'] data['sensor_id'] = int(row['sensor_id']) data['or_site_id'] = int(row['or_site_id']) data['or_sensor_id'] = int(row['or_sensor_id']) data['location'] = row['location'] data['description'] = row['description'] data['sensor_class'] = int(row['sensor_class']) data['sensor_type'] = row['sensor_type'] data['units'] = row['units'] data['translate'] = str_to_bool(row['translate']) data['precision'] = int(row['precision']) data['last_time'] = row['last_time'] data['last_value'] = row['last_value'] data['last_time_received'] = row['last_time_received'] data['last_value_received'] = float(row['last_value_received']) data['last_raw_value'] = float(row['last_raw_value']) data['last_raw_value_received'] = float( row['last_raw_value_received']) #data['change_time'] = row['change_time'] data['normal'] = int(row['normal']) data['active'] = int(row['active']) data['valid'] = int(row['valid']) data['change_rate'] = float(row['change_rate']) data['time_min_consec_zeros'] = int( row['time_min_consec_zeros']) data['validation'] = row['validation'] data['value_max'] = float(row['value_max']) data['value_min'] = float(row['value_min']) data['delta_pos'] = float(row['delta_pos']) data['delta_neg'] = float(row['delta_neg']) data['time_max'] = int(row['time_max']) data['time_min'] = int(row['time_min']) data['slope'] = float(row['slope']) data['offset'] = float(row['offset']) data['reference'] = float(row['reference']) data['utc_offset'] = int(row['utc_offset']) data['using_dst'] = str_to_bool(row['using_dst']) data['conversion'] = row['conversion'] data['usage'] = row['usage'] data['protocol'] = int(row['protocol']) yield AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage( stream=stream_name, data=data, emitted_at=int(datetime.now().timestamp()) * 1000), ) except Exception as e: logger.error( f'failed to process stream {stream_name}: {traceback.format_exc()}' ) # RETRIEVE SENSOR DATA AND RETURN AS STREAM stream_name = StreamGetSensorData req_url = get_request_url(stream_name, config) logger.info(f'requesting {req_url}') try: # submit request r = requests.get(req_url) results = assert_onerain_response(r, 200) for row in results: data = dict() data['site_id'] = row['site_id'] data['sensor_id'] = row['sensor_id'] data['or_site_id'] = int(row['or_site_id']) data['or_sensor_id'] = int(row['or_sensor_id']) data['sensor_class'] = int(row['sensor_class']) data['data_time'] = row['data_time'] data['data_value'] = float(row['data_value']) data['raw_value'] = float(row['raw_value']) data['units'] = row['units'] yield AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage( stream=stream_name, data=data, emitted_at=int(datetime.now().timestamp()) * 1000), ) except Exception as e: logger.error( f'failed to process stream {stream_name}: {traceback.format_exc()}' )