Esempio n. 1
0
    def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
        client = GoogleSheetsClient(self.get_credentials(config))
        spreadsheet_id = config["spreadsheet_id"]
        try:
            logger.info(f"Running discovery on sheet {spreadsheet_id}")
            spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False))
            grid_sheets = Helpers.get_grid_sheets(spreadsheet_metadata)
            streams = []
            for sheet_name in grid_sheets:
                try:
                    header_row_data = Helpers.get_first_row(client, spreadsheet_id, sheet_name)
                    stream = Helpers.headers_to_airbyte_stream(logger, sheet_name, header_row_data)
                    streams.append(stream)
                except Exception as err:
                    if str(err).startswith("Expected data for exactly one row for sheet"):
                        logger.warn(f"Skip empty sheet: {sheet_name}")
                    else:
                        logger.error(str(err))
            return AirbyteCatalog(streams=streams)

        except errors.HttpError as err:
            reason = str(err)
            if err.resp.status == status_codes.NOT_FOUND:
                reason = "Requested spreadsheet was not found."
            raise Exception(f"Could not run discovery: {reason}")
Esempio n. 2
0
    def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
        client = GoogleSheetsClient(json.loads(config["credentials_json"]))
        spreadsheet_id = config["spreadsheet_id"]
        try:
            logger.info(f"Running discovery on sheet {spreadsheet_id}")
            spreadsheet_metadata = Spreadsheet.parse_obj(
                client.get(spreadsheetId=spreadsheet_id,
                           includeGridData=False))
            sheet_names = [
                sheet.properties.title for sheet in spreadsheet_metadata.sheets
            ]
            streams = []
            for sheet_name in sheet_names:
                try:
                    header_row_data = Helpers.get_first_row(
                        client, spreadsheet_id, sheet_name)
                    stream = Helpers.headers_to_airbyte_stream(
                        sheet_name, header_row_data)
                    streams.append(stream)
                except Exception as err:
                    logger.error(str(err))
            return AirbyteCatalog(streams=streams)

        except errors.HttpError as err:
            reason = str(err)
            if err.resp.status == status_codes.NOT_FOUND:
                reason = "Requested spreadsheet was not found."
            raise Exception(f"Could not run discovery: {reason}")
Esempio n. 3
0
    def read(self, logger: AirbyteLogger, config: json,
             catalog: ConfiguredAirbyteCatalog,
             state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]:

        logger.info("read called")

        url = config["url"]
        username = config["username"]
        key = config["access_token"]
        client = WSClient(url)
        login = client.do_login(username, key, withpassword=False)
        query = config["query"]
        logger.info(query)
        data = client.do_query(query)
        try:
            for single_dict in data:
                yield AirbyteMessage(
                    type=Type.RECORD,
                    record=AirbyteRecordMessage(
                        stream=DATASET_ITEMS_STREAM_NAME,
                        data=single_dict,
                        emitted_at=int(datetime.now().timestamp()) * 1000),
                )
        except Exception as err:
            reason = f"Failed to read data of {DATASET_ITEMS_STREAM_NAME} at {url}"
            logger.error(reason)
            raise err
Esempio n. 4
0
 def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus:
     try:
         client = WebClient(token=config["token"])
         client.conversations_list()
         return AirbyteConnectionStatus(status=Status.SUCCEEDED)
     except SlackApiError as e:
         logger.error(f"Got an error: {e.args[0]}")
         return AirbyteConnectionStatus(status=Status.FAILED, message=str(e.args[0]))
Esempio n. 5
0
    def check(self, logger: AirbyteLogger,
              config: json) -> AirbyteConnectionStatus:
        # Check involves verifying that the specified spreadsheet is reachable with our credentials.
        client = GoogleSheetsClient(json.loads(config["credentials_json"]))
        spreadsheet_id = config["spreadsheet_id"]
        try:
            # Attempt to get first row of sheet
            client.get(spreadsheetId=spreadsheet_id,
                       includeGridData=False,
                       ranges="1:1")
        except errors.HttpError as err:
            reason = str(err)
            # Give a clearer message if it's a common error like 404.
            if err.resp.status == status_codes.NOT_FOUND:
                reason = "Requested spreadsheet was not found."
            logger.error(f"Formatted error: {reason}")
            return AirbyteConnectionStatus(
                status=Status.FAILED,
                message=
                f"Unable to connect with the provided credentials to spreadsheet. Error: {reason}"
            )

        # Check for duplicate headers
        spreadsheet_metadata = Spreadsheet.parse_obj(
            client.get(spreadsheetId=spreadsheet_id, includeGridData=False))
        sheet_names = [
            sheet.properties.title for sheet in spreadsheet_metadata.sheets
        ]
        duplicate_headers_in_sheet = {}
        for sheet_name in sheet_names:
            try:
                header_row_data = Helpers.get_first_row(
                    client, spreadsheet_id, sheet_name)
                _, duplicate_headers = Helpers.get_valid_headers_and_duplicates(
                    header_row_data)
                if duplicate_headers:
                    duplicate_headers_in_sheet[sheet_name] = duplicate_headers
            except Exception as err:
                logger.error(str(err))
                return AirbyteConnectionStatus(
                    status=Status.FAILED,
                    message=
                    f"Unable to read the schema of sheet {sheet_name}. Error: {str(err)}"
                )
        if duplicate_headers_in_sheet:
            duplicate_headers_error_message = ", ".join([
                f"[sheet:{sheet_name}, headers:{duplicate_sheet_headers}]"
                for sheet_name, duplicate_sheet_headers in
                duplicate_headers_in_sheet.items()
            ])
            return AirbyteConnectionStatus(
                status=Status.FAILED,
                message=
                "The following duplicate headers were found in the following sheets. Please fix them to continue: "
                + duplicate_headers_error_message,
            )

        return AirbyteConnectionStatus(status=Status.SUCCEEDED)
Esempio n. 6
0
 def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus:
     try:
         self.try_connect(logger, config)
     except self.api_error as err:
         logger.error(f"Exception while connecting to {self.tap_name}: {err}")
         # this should be in UI
         error_msg = f"Unable to connect to {self.tap_name} with the provided credentials. Error: {err}"
         return AirbyteConnectionStatus(status=Status.FAILED, message=error_msg)
     return AirbyteConnectionStatus(status=Status.SUCCEEDED)
Esempio n. 7
0
 def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus:
     try:
         self.discover(logger, config_path)
         return AirbyteConnectionStatus(status=Status.SUCCEEDED)
     except Exception as e:
         logger.error("Exception while connecting to the Marketo API")
         logger.error(str(e))
         return AirbyteConnectionStatus(
             status=Status.FAILED, message="Unable to connect to the Marketo API with the provided credentials. "
         )
Esempio n. 8
0
 def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus:
     try:
         self.discover(logger, config_path)
         return AirbyteConnectionStatus(status=Status.SUCCEEDED)
     except Exception:
         logger.error("Exception while connecting to the Zendesk Support API")
         return AirbyteConnectionStatus(
             status=Status.FAILED,
             message="Unable to connect to the Zendesk Support API with the provided credentials.  Please make sure the "
             "input credentials and environment are correct. ",
         )
Esempio n. 9
0
 def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus:
     try:
         session = shopify.Session(f"{config['shop']}.myshopify.com", "2020-10", config["api_key"])
         shopify.ShopifyResource.activate_session(session)
         # try to read the name of the shop, which should be available with any level of permissions
         shopify.GraphQL().execute("{ shop { name id } }")
         shopify.ShopifyResource.clear_session()
         return AirbyteConnectionStatus(status=Status.SUCCEEDED)
     except Exception as e:
         logger.error(f"Exception connecting to Shopify: ${e}")
         return AirbyteConnectionStatus(
             status=Status.FAILED, message="Unable to connect to the Shopify API with the provided credentials."
         )
Esempio n. 10
0
 def check(self, logger: AirbyteLogger,
           config_container: ConfigContainer) -> AirbyteConnectionStatus:
     try:
         self.discover(logger, config_container)
         return AirbyteConnectionStatus(status=Status.SUCCEEDED)
     except Exception as e:
         # TODO parse the exception message for a human readable error
         logger.error("Exception while connecting to the FB Marketing API")
         logger.error(str(e))
         return AirbyteConnectionStatus(
             status=Status.FAILED,
             message=
             "Unable to connect to the FB Marketing API with the provided credentials. "
         )
Esempio n. 11
0
    def check(self, logger: AirbyteLogger,
              config_container: ConfigContainer) -> AirbyteConnectionStatus:
        try:
            json_config = config_container.rendered_config
            self.try_connect(logger, json_config)
        except self.api_error as err:
            logger.error("Exception while connecting to the %s: %s",
                         self.tap_name, str(err))
            # this should be in UI
            error_msg = f"Unable to connect to {self.tap_name} with the provided credentials. Error: {err}"
            return AirbyteConnectionStatus(status=Status.FAILED,
                                           message=error_msg)

        return AirbyteConnectionStatus(status=Status.SUCCEEDED)
Esempio n. 12
0
    def read(
        self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]
    ) -> Generator[AirbyteMessage, None, None]:
        client = self._get_client(config)

        logger.info(f"Starting syncing {self.__class__.__name__}")
        for configured_stream in catalog.streams:
            stream = configured_stream.stream
            if stream.name not in client.ENTITY_MAP.keys():
                continue
            try:
                for record in self._read_record(client=client, stream=stream.name):
                    yield AirbyteMessage(type=Type.RECORD, record=record)
            except requests.exceptions.RequestException:
                logger.error(f"Get {stream.name} error")
        logger.info(f"Finished syncing {self.__class__.__name__}")
Esempio n. 13
0
    def discover(self, logger: AirbyteLogger,
                 config: Mapping) -> AirbyteCatalog:
        """
        Returns an AirbyteCatalog representing the available streams and fields in this integration. For example, given valid credentials to a
        Remote CSV File, returns an Airbyte catalog where each csv file is a stream, and each column is a field.
        """
        client = self._get_client(config)
        name = client.stream_name

        logger.info(
            f"Discovering schema of {name} at {client.reader.full_url}...")
        try:
            streams = list(client.streams)
        except Exception as err:
            reason = f"Failed to discover schemas of {name} at {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}"
            logger.error(reason)
            raise err
        return AirbyteCatalog(streams=streams)
Esempio n. 14
0
    def check(self, logger: AirbyteLogger,
              config: json) -> AirbyteConnectionStatus:
        try:
            access_token = config["access_token"]
            spreadsheet_id = config["spreadsheet_id"]

            smartsheet_client = smartsheet.Smartsheet(access_token)
            smartsheet_client.errors_as_exceptions(True)
            smartsheet_client.Sheets.get_sheet(spreadsheet_id)

            return AirbyteConnectionStatus(status=Status.SUCCEEDED)
        except Exception as e:
            if isinstance(e, smartsheet.exceptions.ApiError):
                err = e.error.result
                code = 404 if err.code == 1006 else err.code
                reason = f"{err.name}: {code} - {err.message} | Check your spreadsheet ID."
            else:
                reason = str(e)
            logger.error(reason)
        return AirbyteConnectionStatus(status=Status.FAILED)
Esempio n. 15
0
    def check(self, logger: AirbyteLogger,
              config: json) -> AirbyteConnectionStatus:
        # Check involves verifying that the specified spreadsheet is reachable with our credentials.
        client = GoogleSheetsClient(json.loads(config["credentials_json"]))
        spreadsheet_id = config["spreadsheet_id"]
        try:
            # Attempt to get first row of sheet
            client.get(spreadsheetId=spreadsheet_id,
                       includeGridData=False,
                       ranges="1:1")
        except errors.HttpError as err:
            reason = str(err)
            # Give a clearer message if it's a common error like 404.
            if err.resp.status == status_codes.NOT_FOUND:
                reason = "Requested spreadsheet was not found."
            logger.error(f"Formatted error: {reason}")
            return AirbyteConnectionStatus(status=Status.FAILED,
                                           message=str(reason))

        return AirbyteConnectionStatus(status=Status.SUCCEEDED)
Esempio n. 16
0
 def check(self, logger: AirbyteLogger, config_container: ConfigContainer) -> AirbyteConnectionStatus:
     try:
         json_config = config_container.rendered_config
         client = braintree.BraintreeGateway(
             braintree.Configuration(
                 environment=getattr(braintree.Environment, json_config["environment"]),
                 merchant_id=json_config["merchant_id"],
                 public_key=json_config["public_key"],
                 private_key=json_config["private_key"],
             )
         )
         client.transaction.search(
             braintree.TransactionSearch.created_at.between(datetime.now() + relativedelta(days=-1), datetime.now())
         )
         return AirbyteConnectionStatus(status=Status.SUCCEEDED)
     except AuthenticationError:
         logger.error("Exception while connecting to the Braintree API")
         return AirbyteConnectionStatus(
             status=Status.FAILED,
             message="Unable to connect to the Braintree API with the provided credentials. Please make sure the input credentials and environment are correct.",
         )
Esempio n. 17
0
    def read(
        self, logger: AirbyteLogger, config: Mapping,
        catalog: ConfiguredAirbyteCatalog,
        state_path: Mapping[str,
                            any]) -> Generator[AirbyteMessage, None, None]:
        """Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state."""
        client = self._get_client(config)
        fields = self.selected_fields(catalog)
        name = client.stream_name

        logger.info(f"Reading {name} ({client.reader.full_url})...")
        try:
            for row in client.read(fields=fields):
                record = AirbyteRecordMessage(
                    stream=name,
                    data=row,
                    emitted_at=int(datetime.now().timestamp()) * 1000)
                yield AirbyteMessage(type=Type.RECORD, record=record)
        except Exception as err:
            reason = f"Failed to read data of {name} at {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}"
            logger.error(reason)
            raise err
Esempio n. 18
0
    def read(self, logger: AirbyteLogger, config: json,
             catalog: ConfiguredAirbyteCatalog,
             state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]:

        access_token = config["access_token"]
        spreadsheet_id = config["spreadsheet_id"]
        smartsheet_client = smartsheet.Smartsheet(access_token)

        for configured_stream in catalog.streams:
            stream = configured_stream.stream
            properties = stream.json_schema["properties"]
            if isinstance(properties, list):
                columns = tuple(key for dct in properties
                                for key in dct.keys())
            elif isinstance(properties, dict):
                columns = tuple(i for i in properties.keys())
            else:
                logger.error(
                    "Could not read properties from the JSONschema in this stream"
                )
            name = stream.name

            try:
                sheet = smartsheet_client.Sheets.get_sheet(spreadsheet_id)
                sheet = json.loads(str(sheet))  # make it subscriptable
                logger.info(f"Starting syncing spreadsheet {sheet['name']}")
                logger.info(f"Row count: {sheet['totalRowCount']}")

                for row in sheet["rows"]:
                    values = tuple(i["value"] for i in row["cells"])
                    try:
                        data = dict(zip(columns, values))

                        yield AirbyteMessage(
                            type=Type.RECORD,
                            record=AirbyteRecordMessage(
                                stream=name,
                                data=data,
                                emitted_at=int(datetime.now().timestamp()) *
                                1000),
                        )
                    except Exception as e:
                        logger.error(
                            f"Unable to encode row into an AirbyteMessage with the following error: {e}"
                        )

            except Exception as e:
                logger.error(f"Could not read smartsheet: {name}")
                raise e
        logger.info(f"Finished syncing spreadsheet with ID: {spreadsheet_id}")
Esempio n. 19
0
    def read(self, logger: AirbyteLogger, config: json,
             catalog: ConfiguredAirbyteCatalog,
             state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]:
        """
        Returns a generator of the AirbyteMessages generated by reading the source with the given configuration,
        catalog, and state.

        :param logger: Logging object to display debug/info/error to the logs
            (logs will not be accessible via airbyte UI if they are not passed to this logger)
        :param config: Json object containing the configuration of this source, content of this json is as specified in
            the properties of the spec.json file
        :param catalog: The input catalog is a ConfiguredAirbyteCatalog which is almost the same as AirbyteCatalog
            returned by discover(), but
        in addition, it's been configured in the UI! For each particular stream and field, there may have been provided
        with extra modifications such as: filtering streams and/or columns out, renaming some entities, etc
        :param state: When a Airbyte reads data from a source, it might need to keep a checkpoint cursor to resume
            replication in the future from that saved checkpoint.
            This is the object that is provided with state from previous runs and avoid replicating the entire set of
            data everytime.

        :return: A generator that produces a stream of AirbyteRecordMessage contained in AirbyteMessage object.
        """
        stream_name = StreamGetSiteMetaData  # Example

        def get_request_url(stream, config):
            query_params = dict()
            data_api_url = config[ConfigPropDataApiUrl]
            query_params[ConfigPropSystemKey] = config[ConfigPropSystemKey]

            if stream in config:
                for stream_prop in config[stream]:
                    query_params[stream_prop] = config[stream][stream_prop]

            return f'{data_api_url}?method={stream}&{urlencode(query_params)}'

        req_url = get_request_url(stream_name, config)

        logger.info(f'requesting {req_url}')

        def assert_onerain_response(response_object, expect_http_code):
            assert isinstance(expect_http_code, int)
            assert response_object.status_code == expect_http_code

            #logger.info(r.text)
            doc = xmltodict.parse(r.text)
            assert 'onerain' in doc
            if 'error' in doc['onerain']:
                err_msg = doc['onerain']['error']
                raise ValueError(err_msg)

            # if 'row' key is not an ordered dictionary then return
            # empty ordered dictionary
            results = []  #collections.OrderedDict()
            try:
                rows = doc['onerain']['response']['general']['row']
                row = rows[0]
                results = rows
            except Exception as e:
                logger.debug(f'no records: str(e)')

            return results

        # RETRIEVE SITE METADATA
        try:
            r = requests.get(req_url)

            # ITERATE SITE METADATA AND RETURN AS STREAM
            results = assert_onerain_response(r, 200)
            for row in results:
                or_site_id = int(row['or_site_id'])
                site_id = row['site_id']
                location = row['location']
                owner = row['owner']
                system_id = int(row['system_id'])
                client_id = row['client_id']
                latitude_dec = float(row['latitude_dec'])
                longitude_dec = float(row['longitude_dec'])
                elevation = int(row['elevation'])

                data = dict()
                data['or_site_id'] = or_site_id
                data['site_id'] = site_id
                data['location'] = location
                data['owner'] = owner
                data['system_id'] = system_id
                data['client_id'] = client_id
                data['latitude_dec'] = latitude_dec
                data['longitude_dec'] = longitude_dec
                data['elevation'] = elevation

                yield AirbyteMessage(
                    type=Type.RECORD,
                    record=AirbyteRecordMessage(
                        stream=stream_name,
                        data=data,
                        emitted_at=int(datetime.now().timestamp()) * 1000),
                )

        except Exception as e:
            logger.error(
                f'failed to process stream {stream_name}: {traceback.format_exc()}'
            )

        # RETRIEVE SENSOR METADATA AND RETURN AS STREAM
        stream_name = StreamGetSensorMetaData

        req_url = get_request_url(stream_name, config)

        logger.info(f'requesting {req_url}')

        try:
            # submit request
            r = requests.get(req_url)
            results = assert_onerain_response(r, 200)

            for row in results:

                data = dict()
                data['site_id'] = row['site_id']
                data['sensor_id'] = int(row['sensor_id'])
                data['or_site_id'] = int(row['or_site_id'])
                data['or_sensor_id'] = int(row['or_sensor_id'])
                data['location'] = row['location']
                data['description'] = row['description']
                data['sensor_class'] = int(row['sensor_class'])
                data['sensor_type'] = row['sensor_type']
                data['units'] = row['units']
                data['translate'] = str_to_bool(row['translate'])
                data['precision'] = int(row['precision'])
                data['last_time'] = row['last_time']
                data['last_value'] = row['last_value']
                data['last_time_received'] = row['last_time_received']
                data['last_value_received'] = float(row['last_value_received'])
                data['last_raw_value'] = float(row['last_raw_value'])
                data['last_raw_value_received'] = float(
                    row['last_raw_value_received'])
                #data['change_time'] = row['change_time']
                data['normal'] = int(row['normal'])
                data['active'] = int(row['active'])
                data['valid'] = int(row['valid'])
                data['change_rate'] = float(row['change_rate'])
                data['time_min_consec_zeros'] = int(
                    row['time_min_consec_zeros'])
                data['validation'] = row['validation']
                data['value_max'] = float(row['value_max'])
                data['value_min'] = float(row['value_min'])
                data['delta_pos'] = float(row['delta_pos'])
                data['delta_neg'] = float(row['delta_neg'])
                data['time_max'] = int(row['time_max'])
                data['time_min'] = int(row['time_min'])
                data['slope'] = float(row['slope'])
                data['offset'] = float(row['offset'])
                data['reference'] = float(row['reference'])
                data['utc_offset'] = int(row['utc_offset'])
                data['using_dst'] = str_to_bool(row['using_dst'])
                data['conversion'] = row['conversion']
                data['usage'] = row['usage']
                data['protocol'] = int(row['protocol'])

                yield AirbyteMessage(
                    type=Type.RECORD,
                    record=AirbyteRecordMessage(
                        stream=stream_name,
                        data=data,
                        emitted_at=int(datetime.now().timestamp()) * 1000),
                )
        except Exception as e:
            logger.error(
                f'failed to process stream {stream_name}: {traceback.format_exc()}'
            )
        # RETRIEVE SENSOR DATA AND RETURN AS STREAM
        stream_name = StreamGetSensorData

        req_url = get_request_url(stream_name, config)
        logger.info(f'requesting {req_url}')

        try:
            # submit request
            r = requests.get(req_url)

            results = assert_onerain_response(r, 200)

            for row in results:
                data = dict()
                data['site_id'] = row['site_id']
                data['sensor_id'] = row['sensor_id']
                data['or_site_id'] = int(row['or_site_id'])
                data['or_sensor_id'] = int(row['or_sensor_id'])
                data['sensor_class'] = int(row['sensor_class'])
                data['data_time'] = row['data_time']
                data['data_value'] = float(row['data_value'])
                data['raw_value'] = float(row['raw_value'])
                data['units'] = row['units']

                yield AirbyteMessage(
                    type=Type.RECORD,
                    record=AirbyteRecordMessage(
                        stream=stream_name,
                        data=data,
                        emitted_at=int(datetime.now().timestamp()) * 1000),
                )
        except Exception as e:
            logger.error(
                f'failed to process stream {stream_name}: {traceback.format_exc()}'
            )