Example #1
0
    def check_connection(self, logger: AirbyteLogger,
                         config: Mapping[str, Any]) -> Tuple[bool, any]:
        try:
            logger.info("Checking the config")
            google_api = GoogleAds(credentials=self.get_credentials(config))

            accounts = self.get_account_info(google_api, config)
            customers = Customer.from_accounts(accounts)
            # Check custom query request validity by sending metric request with non-existant time window
            for customer in customers:
                for query in config.get("custom_queries", []):
                    query = query.get("query")
                    if customer.is_manager_account and self.is_metrics_in_custom_query(
                            query):
                        logger.warning(
                            f"Metrics are not available for manager account {customer.id}. "
                            f"Please remove metrics fields in your custom query: {query}."
                        )
                    if CustomQuery.cursor_field in query:
                        return False, f"Custom query should not contain {CustomQuery.cursor_field}"
                    req_q = CustomQuery.insert_segments_date_expr(
                        query, "1980-01-01", "1980-01-01")
                    response = google_api.send_request(req_q,
                                                       customer_id=customer.id)
                    # iterate over the response otherwise exceptions will not be raised!
                    for _ in response:
                        pass
            return True, None
        except GoogleAdsException as exception:
            error_messages = ", ".join(
                [error.message for error in exception.failure.errors])
            logger.error(traceback.format_exc())
            return False, f"Unable to connect to Google Ads API with the provided configuration - {error_messages}"
Example #2
0
    def check_connection(self, logger: AirbyteLogger,
                         config: Mapping[str, Any]) -> Tuple[bool, Any]:
        try:
            params = {"access_key": config["access_key"]}
            base = config.get("base")
            if base is not None:
                params["base"] = base

            resp = requests.get(
                f"{ExchangeRates.url_base}{config['start_date']}",
                params=params)
            status = resp.status_code
            logger.info(f"Ping response code: {status}")
            if status == 200:
                return True, None
            # When API requests is sent but the requested data is not available or the API call fails
            # for some reason, a JSON error is returned.
            # https://exchangeratesapi.io/documentation/#errors
            error = resp.json().get("error")
            code = error.get("code")
            message = error.get("message") or error.get("info")
            # If code is base_currency_access_restricted, error is caused by switching base currency while using free
            # plan
            if code == "base_currency_access_restricted":
                message = f"{message} (this plan doesn't support selecting the base currency)"
            return False, message
        except Exception as e:
            return False, e
Example #3
0
    def streams(self, config: Mapping[str, Any]) -> List[Stream]:
        authenticator = TokenAuthenticator(config["api_token"])
        default_start_date = pendulum.parse(config["start_date"])
        threads_lookback_window = pendulum.Duration(
            days=config["lookback_window"])

        streams = [
            Channels(authenticator=authenticator),
            ChannelMembers(authenticator=authenticator),
            ChannelMessages(authenticator=authenticator,
                            default_start_date=default_start_date),
            Threads(authenticator=authenticator,
                    default_start_date=default_start_date,
                    lookback_window=threads_lookback_window),
            Users(authenticator=authenticator),
        ]

        # To sync data from channels, the bot backed by this token needs to join all those channels. This operation is idempotent.
        if config["join_channels"]:
            logger = AirbyteLogger()
            logger.info("joining Slack channels")
            join_channels_stream = JoinChannelsStream(
                authenticator=authenticator)
            for stream_slice in join_channels_stream.stream_slices():
                for message in join_channels_stream.read_records(
                        sync_mode=SyncMode.full_refresh,
                        stream_slice=stream_slice):
                    logger.info(message["message"])

        return streams
Example #4
0
 def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, any]:
     try:
         logger.info("Checking the config")
         GoogleAds(credentials=config["credentials"], customer_id=config["customer_id"])
         return True, None
     except Exception as error:
         return False, f"Unable to connect to Google Ads API with the provided credentials - {repr(error)}"
Example #5
0
def create_firebolt_wirter(connection: Connection, config: json,
                           logger: AirbyteLogger) -> FireboltWriter:
    if config["loading_method"]["method"] == "S3":
        logger.info("Using the S3 writing strategy")
        writer = FireboltS3Writer(
            connection,
            config["loading_method"]["s3_bucket"],
            config["loading_method"]["aws_key_id"],
            config["loading_method"]["aws_key_secret"],
            config["loading_method"]["s3_region"],
        )
    else:
        logger.info("Using the SQL writing strategy")
        writer = FireboltSQLWriter(connection)
    return writer
Example #6
0
    def discover(self, logger: AirbyteLogger,
                 config: Mapping) -> AirbyteCatalog:
        """
        Returns an AirbyteCatalog representing the available streams and fields in this integration. For example, given valid credentials to a
        Remote CSV File, returns an Airbyte catalog where each csv file is a stream, and each column is a field.
        """
        client = self._get_client(config)
        name = client.stream_name

        logger.info(
            f"Discovering schema of {name} at {client.reader.full_url}...")
        try:
            streams = list(client.streams)
        except Exception as err:
            reason = f"Failed to discover schemas of {name} at {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}"
            logger.error(reason)
            raise err
        return AirbyteCatalog(streams=streams)
Example #7
0
 def check_connection(self, logger: AirbyteLogger,
                      config: Mapping[str, Any]) -> Tuple[bool, any]:
     try:
         logger.info("Checking the config")
         google_api = GoogleAds(credentials=self.get_credentials(config),
                                customer_id=config["customer_id"])
         account_stream = Accounts(api=google_api)
         list(account_stream.read_records(sync_mode=SyncMode.full_refresh))
         # Check custom query request validity by sending metric request with non-existant time window
         for q in config.get("custom_queries", []):
             q = q.get("query")
             if CustomQuery.cursor_field in q:
                 raise Exception(
                     f"Custom query should not contain {CustomQuery.cursor_field}"
                 )
             req_q = CustomQuery.insert_segments_date_expr(
                 q, "1980-01-01", "1980-01-01")
             google_api.send_request(req_q)
         return True, None
     except GoogleAdsException as error:
         return False, f"Unable to connect to Google Ads API with the provided credentials - {repr(error.failure)}"
Example #8
0
    def check_connection(self, logger: AirbyteLogger, config: Mapping[str, any]) -> Tuple[bool, any]:
        """
        :param config:  the user-input config object conforming to the connector's spec.json
        :param logger:  logger object
        :return Tuple[bool, any]: (True, None) if the input config can be used to connect to the API successfully, (False, error) otherwise.
        """
        url_template = "https://{company}.hellobaton.com/api/"
        try:
            params = {
                "api_key": config["api_key"],
            }
            base_url = url_template.format(company=config["company"])
            # This is just going to return a mapping of available endpoints
            response = requests.get(base_url, params=params)
            status_code = response.status_code
            logger.info(f"Status code: {status_code}")
            if status_code == 200:
                return True, None

        except Exception as e:
            return False, e
Example #9
0
    def read(
        self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None
    ) -> Iterator[AirbyteMessage]:
        """
        Overwritten to dynamically receive only those streams that are necessary for reading for significant speed gains
        (Salesforce has a strict API limit on requests).
        """
        connector_state = copy.deepcopy(state or {})
        config, internal_config = split_config(config)
        # get the streams once in case the connector needs to make any queries to generate them
        logger.info("Starting generating streams")
        stream_instances = {s.name: s for s in self.streams(config, catalog=catalog)}
        logger.info(f"Starting syncing {self.name}")
        self._stream_to_instance_map = stream_instances
        for configured_stream in catalog.streams:
            stream_instance = stream_instances.get(configured_stream.stream.name)
            if not stream_instance:
                raise KeyError(
                    f"The requested stream {configured_stream.stream.name} was not found in the source. Available streams: {stream_instances.keys()}"
                )

            try:
                yield from self._read_stream(
                    logger=logger,
                    stream_instance=stream_instance,
                    configured_stream=configured_stream,
                    connector_state=connector_state,
                    internal_config=internal_config,
                )
            except Exception as e:
                logger.exception(f"Encountered an exception while reading stream {self.name}")
                raise e

        logger.info(f"Finished syncing {self.name}")
Example #10
0
    def read(
        self, logger: AirbyteLogger, config: Mapping,
        catalog: ConfiguredAirbyteCatalog,
        state_path: Mapping[str,
                            any]) -> Generator[AirbyteMessage, None, None]:
        """Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state."""
        client = self._get_client(config)
        fields = self.selected_fields(catalog)
        name = client.stream_name

        logger.info(f"Reading {name} ({client.reader.full_url})...")
        try:
            for row in client.read(fields=fields):
                record = AirbyteRecordMessage(
                    stream=name,
                    data=row,
                    emitted_at=int(datetime.now().timestamp()) * 1000)
                yield AirbyteMessage(type=Type.RECORD, record=record)
        except Exception as err:
            reason = f"Failed to read data of {name} at {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}"
            logger.error(reason)
            raise err
Example #11
0
    def streams(self, config: Mapping[str, Any]) -> List[Stream]:
        authenticator = TokenAuthenticator(config["api_token"])
        default_start_date = pendulum.now().subtract(days=14)  # TODO make this configurable
        threads_lookback_window = {"days": 7}  # TODO make this configurable

        streams = [
            Channels(authenticator=authenticator),
            ChannelMembers(authenticator=authenticator),
            ChannelMessages(authenticator=authenticator, default_start_date=default_start_date),
            Threads(authenticator=authenticator, default_start_date=default_start_date, lookback_window=threads_lookback_window),
            Users(authenticator=authenticator),
        ]

        # To sync data from channels, the bot backed by this token needs to join all those channels. This operation is idempotent.
        # TODO make joining configurable. Also make joining archived and private channels configurable
        logger = AirbyteLogger()
        logger.info("joining Slack channels")
        join_channels_stream = JoinChannelsStream(authenticator=authenticator)
        for stream_slice in join_channels_stream.stream_slices():
            for message in join_channels_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice):
                logger.info(message["message"])

        return streams
Example #12
0
    def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
        access_token = config["access_token"]
        spreadsheet_id = config["spreadsheet_id"]
        streams = []

        smartsheet_client = smartsheet.Smartsheet(access_token)
        try:
            sheet = smartsheet_client.Sheets.get_sheet(spreadsheet_id)
            sheet = json.loads(str(sheet))  # make it subscriptable
            sheet_json_schema = get_json_schema(sheet)

            logger.info(
                f"Running discovery on sheet: {sheet['name']} with {spreadsheet_id}"
            )

            stream = AirbyteStream(name=sheet["name"],
                                   json_schema=sheet_json_schema)
            streams.append(stream)

        except Exception as e:
            raise Exception(f"Could not run discovery: {str(e)}")

        return AirbyteCatalog(streams=streams)
Example #13
0
    def read(
        self,
        logger: AirbyteLogger,
        config: Mapping[str, Any],
        catalog: ConfiguredAirbyteCatalog,
        state: Optional[MutableMapping[str, Any]] = None,
    ) -> Iterator[AirbyteMessage]:
        """
        Overwritten to dynamically receive only those streams that are necessary for reading for significant speed gains
        (Salesforce has a strict API limit on requests).
        """
        connector_state = copy.deepcopy(state or {})
        config, internal_config = split_config(config)
        # get the streams once in case the connector needs to make any queries to generate them
        logger.info("Starting generating streams")
        stream_instances = {
            s.name: s
            for s in self.streams(config, catalog=catalog, state=state)
        }
        logger.info(f"Starting syncing {self.name}")
        self._stream_to_instance_map = stream_instances
        for configured_stream in catalog.streams:
            stream_instance = stream_instances.get(
                configured_stream.stream.name)
            if not stream_instance:
                raise KeyError(
                    f"The requested stream {configured_stream.stream.name} was not found in the source. Available streams: {stream_instances.keys()}"
                )

            try:
                yield from self._read_stream(
                    logger=logger,
                    stream_instance=stream_instance,
                    configured_stream=configured_stream,
                    connector_state=connector_state,
                    internal_config=internal_config,
                )
            except exceptions.HTTPError as error:
                error_data = error.response.json()[0]
                error_code = error_data.get("errorCode")
                if error.response.status_code == codes.FORBIDDEN and error_code == "REQUEST_LIMIT_EXCEEDED":
                    logger.warn(
                        f"API Call limit is exceeded. Error message: '{error_data.get('message')}'"
                    )
                    break  # if got 403 rate limit response, finish the sync with success.
                raise error

            except Exception as e:
                logger.exception(
                    f"Encountered an exception while reading stream {self.name}"
                )
                raise e

        logger.info(f"Finished syncing {self.name}")
Example #14
0
    def read(self, logger: AirbyteLogger, config: json,
             catalog: ConfiguredAirbyteCatalog,
             state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]:

        access_token = config["access_token"]
        spreadsheet_id = config["spreadsheet_id"]
        smartsheet_client = smartsheet.Smartsheet(access_token)

        for configured_stream in catalog.streams:
            stream = configured_stream.stream
            properties = stream.json_schema["properties"]
            if isinstance(properties, list):
                columns = tuple(key for dct in properties
                                for key in dct.keys())
            elif isinstance(properties, dict):
                columns = tuple(i for i in properties.keys())
            else:
                logger.error(
                    "Could not read properties from the JSONschema in this stream"
                )
            name = stream.name

            try:
                sheet = smartsheet_client.Sheets.get_sheet(spreadsheet_id)
                sheet = json.loads(str(sheet))  # make it subscriptable
                logger.info(f"Starting syncing spreadsheet {sheet['name']}")
                logger.info(f"Row count: {sheet['totalRowCount']}")

                for row in sheet["rows"]:
                    values = tuple(i["value"] if "value" in i else ""
                                   for i in row["cells"])
                    try:
                        data = dict(zip(columns, values))

                        yield AirbyteMessage(
                            type=Type.RECORD,
                            record=AirbyteRecordMessage(
                                stream=name,
                                data=data,
                                emitted_at=int(datetime.now().timestamp()) *
                                1000),
                        )
                    except Exception as e:
                        logger.error(
                            f"Unable to encode row into an AirbyteMessage with the following error: {e}"
                        )

            except Exception as e:
                logger.error(f"Could not read smartsheet: {name}")
                raise e
        logger.info(f"Finished syncing spreadsheet with ID: {spreadsheet_id}")
 def read(
     self,
     logger: AirbyteLogger,
     config: Mapping[str, Any],
     catalog: ConfiguredAirbyteCatalog,
     state: MutableMapping[str, Any] = None,
 ) -> Iterable[AirbyteMessage]:
     logger.info(I_AM_A_SECRET_VALUE)
     logger.info(I_AM_A_SECRET_VALUE + " plus Some non secret Value in the same log record" + NOT_A_SECRET_VALUE)
     logger.info(NOT_A_SECRET_VALUE)
     yield AirbyteMessage(
         record=AirbyteRecordMessage(stream="stream", data={"data": "stuff"}, emitted_at=1),
         type=Type.RECORD,
     )