Ejemplo n.º 1
0
    def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
        streams = []

        # Get the queue name by getting substring after last /
        stream_name = self.parse_queue_name(config["queue_url"])
        logger.debug("Amazon SQS Source Stream Discovery - stream is: " +
                     stream_name)

        json_schema = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
            "properties": {
                "id": {
                    "type": "string"
                },
                "body": {
                    "type": "string"
                },
                "attributes": {
                    "type": ["object", "null"]
                }
            },
        }
        streams.append(
            AirbyteStream(name=stream_name,
                          json_schema=json_schema,
                          supported_sync_modes=["full_refresh"]))
        return AirbyteCatalog(streams=streams)
Ejemplo n.º 2
0
def establish_connection(config: json, logger: AirbyteLogger) -> Connection:
    """
    Creates a connection to Firebolt database using the parameters provided.

    :param config: Json object containing db credentials.
    :param logger: AirbyteLogger instance to print logs.

    :return: PEP-249 compliant database Connection object.
    """
    logger.debug("Connecting to Firebolt.")
    connection = connect(**parse_config(config, logger))
    logger.debug("Connection to Firebolt established.")
    return connection
Ejemplo n.º 3
0
async def establish_async_connection(config: json,
                                     logger: AirbyteLogger) -> AsyncConnection:
    """
    Creates an async connection to Firebolt database using the parameters provided.
    This connection can be used for parallel operations.

    :param config: Json object containing db credentials.
    :param logger: AirbyteLogger instance to print logs.

    :return: PEP-249 compliant database Connection object.
    """
    logger.debug("Connecting to Firebolt.")
    connection = await async_connect(**parse_config(config, logger))
    logger.debug("Connection to Firebolt established.")
    return connection
Ejemplo n.º 4
0
    def json_type_to_pyarrow_type(
        typ: str,
        reverse: bool = False,
        logger: AirbyteLogger = AirbyteLogger()) -> str:
        """
        Converts Json Type to PyArrow types to (or the other way around if reverse=True)

        :param typ: Json type if reverse is False, else PyArrow type
        :param reverse: switch to True for PyArrow type -> Json type, defaults to False
        :param logger: defaults to AirbyteLogger()
        :return: PyArrow type if reverse is False, else Json type
        """
        str_typ = str(typ)
        # this is a map of airbyte types to pyarrow types. The first list element of the pyarrow types should be the one to use where required.
        map = {
            "boolean": ("bool_", "bool"),
            "integer": ("int64", "int8", "int16", "int32", "uint8", "uint16",
                        "uint32", "uint64"),
            "number": ("float64", "float16", "float32", "decimal128",
                       "decimal256", "halffloat", "float", "double"),
            "string": ("large_string", "string"),
            # TODO: support object type rather than coercing to string
            "object": ("large_string", ),
            # TODO: support array type rather than coercing to string
            "array": ("large_string", ),
            "null": ("large_string", ),
        }
        if not reverse:
            for json_type, pyarrow_types in map.items():
                if str_typ.lower() == json_type:
                    return str(
                        getattr(pa, pyarrow_types[0]).__call__()
                    )  # better way might be necessary when we decide to handle more type complexity
            logger.debug(
                f"JSON type '{str_typ}' is not mapped, falling back to default conversion to large_string"
            )
            return str(pa.large_string())
        else:
            for json_type, pyarrow_types in map.items():
                if any(
                        str_typ.startswith(pa_type)
                        for pa_type in pyarrow_types):
                    return json_type
            logger.debug(
                f"PyArrow type '{str_typ}' is not mapped, falling back to default conversion to string"
            )
            return "string"  # default type if unspecified in map
Ejemplo n.º 5
0
    def read(
        self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]
    ) -> Generator[AirbyteMessage, None, None]:
        stream_name = self.parse_queue_name(config["queue_url"])
        logger.debug("Amazon SQS Source Read - stream is: " + stream_name)

        # Required propeties
        queue_url = config["queue_url"]
        queue_region = config["region"]
        delete_messages = config["delete_messages"]

        # Optional Properties
        max_batch_size = config.get("max_batch_size", 10)
        max_wait_time = config.get("max_wait_time", 20)
        visibility_timeout = config.get("visibility_timeout")
        attributes_to_return = config.get("attributes_to_return")
        if attributes_to_return is None:
            attributes_to_return = ["All"]
        else:
            attributes_to_return = attributes_to_return.split(",")

        # Senstive Properties
        access_key = config["access_key"]
        secret_key = config["secret_key"]

        logger.debug("Amazon SQS Source Read - Creating SQS connection ---")
        session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=queue_region)
        sqs = session.resource("sqs")
        queue = sqs.Queue(url=queue_url)
        logger.debug("Amazon SQS Source Read - Connected to SQS Queue ---")
        timed_out = False
        while not timed_out:
            try:
                logger.debug("Amazon SQS Source Read - Beginning message poll ---")
                messages = queue.receive_messages(
                    MessageAttributeNames=attributes_to_return, MaxNumberOfMessages=max_batch_size, WaitTimeSeconds=max_wait_time
                )

                if not messages:
                    logger.debug("Amazon SQS Source Read - No messages recieved during poll, time out reached ---")
                    timed_out = True
                    break

                for msg in messages:
                    logger.debug("Amazon SQS Source Read - Message recieved: " + msg.message_id)
                    if visibility_timeout:
                        logger.debug("Amazon SQS Source Read - Setting message visibility timeout: " + msg.message_id)
                        self.change_message_visibility(msg, visibility_timeout)
                        logger.debug("Amazon SQS Source Read - Message visibility timeout set: " + msg.message_id)

                    data = {
                        "id": msg.message_id,
                        "body": msg.body,
                        "attributes": msg.message_attributes,
                    }

                    # TODO: Support a 'BATCH OUTPUT' mode that outputs the full batch in a single AirbyteRecordMessage
                    yield AirbyteMessage(
                        type=Type.RECORD,
                        record=AirbyteRecordMessage(stream=stream_name, data=data, emitted_at=int(datetime.now().timestamp()) * 1000),
                    )
                    if delete_messages:
                        logger.debug("Amazon SQS Source Read - Deleting message: " + msg.message_id)
                        self.delete_message(msg)
                        logger.debug("Amazon SQS Source Read - Message deleted: " + msg.message_id)
                        # TODO: Delete messages in batches to reduce amount of requests?

            except ClientError as error:
                raise Exception("Error in AWS Client: " + str(error))
Ejemplo n.º 6
0
    def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus:
        try:
            if "max_batch_size" in config:
                # Max batch size must be between 1 and 10
                if config["max_batch_size"] > 10 or config["max_batch_size"] < 1:
                    raise Exception("max_batch_size must be between 1 and 10")
            if "max_wait_time" in config:
                # Max wait time must be between 1 and 20
                if config["max_wait_time"] > 20 or config["max_wait_time"] < 1:
                    raise Exception("max_wait_time must be between 1 and 20")

            # Required propeties
            queue_url = config["queue_url"]
            logger.debug("Amazon SQS Source Config Check - queue_url: " + queue_url)
            queue_region = config["region"]
            logger.debug("Amazon SQS Source Config Check - region: " + queue_region)
            # Senstive Properties
            access_key = config["access_key"]
            logger.debug("Amazon SQS Source Config Check - access_key (ends with): " + access_key[-1])
            secret_key = config["secret_key"]
            logger.debug("Amazon SQS Source Config Check - secret_key (ends with): " + secret_key[-1])

            logger.debug("Amazon SQS Source Config Check - Starting connection test ---")
            session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=queue_region)
            sqs = session.resource("sqs")
            queue = sqs.Queue(url=queue_url)
            if hasattr(queue, "attributes"):
                logger.debug("Amazon SQS Source Config Check - Connection test successful ---")
                return AirbyteConnectionStatus(status=Status.SUCCEEDED)
            else:
                return AirbyteConnectionStatus(status=Status.FAILED, message="Amazon SQS Source Config Check - Could not connect to queue")
        except ClientError as e:
            return AirbyteConnectionStatus(status=Status.FAILED, message=f"Amazon SQS Source Config Check - Error in AWS Client: {str(e)}")
        except Exception as e:
            return AirbyteConnectionStatus(
                status=Status.FAILED, message=f"Amazon SQS Source Config Check - An exception occurred: {str(e)}"
            )