Ejemplo n.º 1
0
 def _check_with_catalog(logger: AirbyteLogger, streams: List,
                         config: json):
     repositories = config["repository"].split(" ")
     for repository in repositories:
         org = repository.split("/")[0]
         # requests for checking streams permissions
         # first is for checking if user has access to Collaborators API
         # if user is not one of the collaborators, request will return 403 error
         # third is for checking access and permission to Teams API
         # Teams API is only available to authenticated members of the team's organization,
         # in another case it will return 404 error
         # if user doesn't have permission, it will return 401 error
         check_streams = {
             "collaborators":
             f"https://api.github.com/repos/{repository}/collaborators",
             "teams":
             f"https://api.github.com/orgs/{org}/teams?sort=created_at&direction=desc",
         }
         for stream in streams:
             if stream in check_streams:
                 response = requests.get(check_streams[stream],
                                         auth=(config["access_token"], ""))
                 if response.status_code != requests.codes.ok:
                     logger.log_by_prefix(f"{repository} {response.text}",
                                          "ERROR")
                     sys.exit(1)
Ejemplo n.º 2
0
    def read(
            self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]
    ) -> Generator[AirbyteMessage, None, None]:
        """
        Returns a generator of the AirbyteMessages generated by reading the source with the given configuration,
        catalog, and state.

        :param logger: Logging object to display debug/info/error to the logs
            (logs will not be accessible via airbyte UI if they are not passed to this logger)
        :param config: Json object containing the configuration of this source, content of this json is as specified in
            the properties of the spec.json file
        :param catalog: The input catalog is a ConfiguredAirbyteCatalog which is almost the same as AirbyteCatalog
            returned by discover(), but
        in addition, it's been configured in the UI! For each particular stream and field, there may have been provided
        with extra modifications such as: filtering streams and/or columns out, renaming some entities, etc
        :param state: When a Airbyte reads data from a source, it might need to keep a checkpoint cursor to resume
            replication in the future from that saved checkpoint.
            This is the object that is provided with state from previous runs and avoid replicating the entire set of
            data everytime.

        :return: A generator that produces a stream of AirbyteRecordMessage contained in AirbyteMessage object.
        """

        for stream in catalog.streams:
            name = stream.stream.name
            key = stream.stream.name
            logger.debug(f'****** mode {stream.sync_mode} state={state}')
            if key == 'SiteMetaData':
                url = sitemetadata_url(config)
            elif key == 'WellScreens':
                url = screens_url(config)
            elif key == 'ManualGWL':
                url = manual_water_levels_url(config)
            elif key == 'PressureGWL':
                url = pressure_water_levels_url(config)
            elif key == 'AcousticGWL':
                url = acoustic_water_levels_url(config)
            else:
                continue

            while 1:
                objectid = state[key]
                if objectid:
                    curl = f'{url}?objectid={objectid}'
                else:
                    curl = url

                logger.info(f'fetching url={curl}')
                jobj = get_json(logger, curl)
                if jobj:
                    state[key] = jobj[-1]['OBJECTID']
                else:
                    break

                for di in jobj:
                    di['import_uuid'] = str(uuid.uuid4())
                    yield AirbyteMessage(
                        type=Type.RECORD,
                        record=AirbyteRecordMessage(stream=name, data=di,
                                                    emitted_at=int(datetime.now().timestamp()) * 1000))
Ejemplo n.º 3
0
    def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
        client = Helpers.get_authenticated_sheets_client(
            json.loads(config["credentials_json"]))
        spreadsheet_id = config["spreadsheet_id"]
        try:
            logger.info(f"Running discovery on sheet {spreadsheet_id}")
            spreadsheet_metadata = Spreadsheet.parse_obj(
                client.get(spreadsheetId=spreadsheet_id,
                           includeGridData=False).execute())
            sheet_names = [
                sheet.properties.title for sheet in spreadsheet_metadata.sheets
            ]
            streams = []
            for sheet_name in sheet_names:
                header_row_data = Helpers.get_first_row(
                    client, spreadsheet_id, sheet_name)
                stream = Helpers.headers_to_airbyte_stream(
                    sheet_name, header_row_data)
                streams.append(stream)
            return AirbyteCatalog(streams=streams)

        except errors.HttpError as err:
            reason = str(err)
            if err.resp.status == 404:
                reason = "Requested spreadsheet was not found."
            raise Exception(f"Could not run discovery: {reason}")
Ejemplo n.º 4
0
    def headers_to_airbyte_stream(
            logger: AirbyteLogger, sheet_name: str,
            header_row_values: List[str]) -> AirbyteStream:
        """
        Parses sheet headers from the provided row. This method assumes that data is contiguous
        i.e: every cell contains a value and the first cell which does not contain a value denotes the end
        of the headers. For example, if the first row contains "One | Two | | Three" then this method
        will parse the headers as ["One", "Two"]. This assumption is made for simplicity and can be modified later.
        """
        fields, duplicate_fields = Helpers.get_valid_headers_and_duplicates(
            header_row_values)
        if duplicate_fields:
            logger.warn(
                f"Duplicate headers found in {sheet_name}. Ignoring them :{duplicate_fields}"
            )

        sheet_json_schema = {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "type": "object",
            # For simplicity, the type of every cell is a string
            "properties": {field: {
                "type": "string"
            }
                           for field in fields},
        }

        return AirbyteStream(name=sheet_name, json_schema=sheet_json_schema)
Ejemplo n.º 5
0
 def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus:
     try:
         client = WebClient(token=config["token"])
         client.conversations_list()
         return AirbyteConnectionStatus(status=Status.SUCCEEDED)
     except SlackApiError as e:
         logger.error(f"Got an error: {e.args[0]}")
         return AirbyteConnectionStatus(status=Status.FAILED, message=str(e.args[0]))
Ejemplo n.º 6
0
    def check(self, logger: AirbyteLogger,
              config: json) -> AirbyteConnectionStatus:
        # Check involves verifying that the specified spreadsheet is reachable with our credentials.
        client = GoogleSheetsClient(json.loads(config["credentials_json"]))
        spreadsheet_id = config["spreadsheet_id"]
        try:
            # Attempt to get first row of sheet
            client.get(spreadsheetId=spreadsheet_id,
                       includeGridData=False,
                       ranges="1:1")
        except errors.HttpError as err:
            reason = str(err)
            # Give a clearer message if it's a common error like 404.
            if err.resp.status == status_codes.NOT_FOUND:
                reason = "Requested spreadsheet was not found."
            logger.error(f"Formatted error: {reason}")
            return AirbyteConnectionStatus(
                status=Status.FAILED,
                message=
                f"Unable to connect with the provided credentials to spreadsheet. Error: {reason}"
            )

        # Check for duplicate headers
        spreadsheet_metadata = Spreadsheet.parse_obj(
            client.get(spreadsheetId=spreadsheet_id, includeGridData=False))
        sheet_names = [
            sheet.properties.title for sheet in spreadsheet_metadata.sheets
        ]
        duplicate_headers_in_sheet = {}
        for sheet_name in sheet_names:
            try:
                header_row_data = Helpers.get_first_row(
                    client, spreadsheet_id, sheet_name)
                _, duplicate_headers = Helpers.get_valid_headers_and_duplicates(
                    header_row_data)
                if duplicate_headers:
                    duplicate_headers_in_sheet[sheet_name] = duplicate_headers
            except Exception as err:
                logger.error(str(err))
                return AirbyteConnectionStatus(
                    status=Status.FAILED,
                    message=
                    f"Unable to read the schema of sheet {sheet_name}. Error: {str(err)}"
                )
        if duplicate_headers_in_sheet:
            duplicate_headers_error_message = ", ".join([
                f"[sheet:{sheet_name}, headers:{duplicate_sheet_headers}]"
                for sheet_name, duplicate_sheet_headers in
                duplicate_headers_in_sheet.items()
            ])
            return AirbyteConnectionStatus(
                status=Status.FAILED,
                message=
                "The following duplicate headers were found in the following sheets. Please fix them to continue: "
                + duplicate_headers_error_message,
            )

        return AirbyteConnectionStatus(status=Status.SUCCEEDED)
Ejemplo n.º 7
0
 def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus:
     try:
         self.try_connect(logger, config)
     except self.api_error as err:
         logger.error(f"Exception while connecting to {self.tap_name}: {err}")
         # this should be in UI
         error_msg = f"Unable to connect to {self.tap_name} with the provided credentials. Error: {err}"
         return AirbyteConnectionStatus(status=Status.FAILED, message=error_msg)
     return AirbyteConnectionStatus(status=Status.SUCCEEDED)
Ejemplo n.º 8
0
 def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus:
     try:
         self.discover(logger, config_path)
         return AirbyteConnectionStatus(status=Status.SUCCEEDED)
     except Exception as e:
         logger.error("Exception while connecting to the Marketo API")
         logger.error(str(e))
         return AirbyteConnectionStatus(
             status=Status.FAILED, message="Unable to connect to the Marketo API with the provided credentials. "
         )
Ejemplo n.º 9
0
    def read(
        self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]
    ) -> Generator[AirbyteMessage, None, None]:
        client = self._client(config)

        logger.info("Starting syncing mailchimp")
        for configured_stream in catalog.streams:
            yield from self._read_record(client=client, configured_stream=configured_stream, state=state)

        logger.info("Finished syncing mailchimp")
Ejemplo n.º 10
0
 def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus:
     try:
         self.discover(logger, config_path)
         return AirbyteConnectionStatus(status=Status.SUCCEEDED)
     except Exception:
         logger.error("Exception while connecting to the Zendesk Support API")
         return AirbyteConnectionStatus(
             status=Status.FAILED,
             message="Unable to connect to the Zendesk Support API with the provided credentials.  Please make sure the "
             "input credentials and environment are correct. ",
         )
Ejemplo n.º 11
0
    def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus:
        """
        Tests if the input configuration can be used to successfully connect to the integration
            e.g: if a provided Stripe API token can be used to connect to the Stripe API.

        :param logger: Logging object to display debug/info/error to the logs
            (logs will not be accessible via airbyte UI if they are not passed to this logger)
        :param config_path: Path to the file containing the configuration json config
        :param config: Json object containing the configuration of this source, content of this json is as specified in
        the properties of the spec.json file

        :return: AirbyteConnectionStatus indicating a Success or Failure
        """
        try:
            # If an app on the appstore does not support subscriptions or sales, it cannot pull the relevant reports.
            # However, the way the Appstore API expresses this is not via clear error messages. Instead it expresses it by throwing an unrelated
            # error, in this case "invalid vendor ID". There is no way to distinguish if this error is due to invalid credentials or due to
            # the account not supporting this kind of report. So to "check connection" we see if any of the reports can be pulled and if so
            # return success. If no reports can be pulled we display the exception messages generated for all reports and return failure.
            api_fields_to_test = {
                "subscription_event_report": {
                    "reportType": "SUBSCRIPTION_EVENT",
                    "frequency": "DAILY",
                    "reportSubType": "SUMMARY",
                    "version": "1_2",
                },
                "subscriber_report": {"reportType": "SUBSCRIBER", "frequency": "DAILY", "reportSubType": "DETAILED", "version": "1_2"},
                "subscription_report": {"reportType": "SUBSCRIPTION", "frequency": "DAILY", "reportSubType": "SUMMARY", "version": "1_2"},
                "sales_report": {"reportType": "SALES", "frequency": "DAILY", "reportSubType": "SUMMARY", "version": "1_0"},
            }

            api = Api(config["key_id"], config["key_file"], config["issuer_id"])
            stream_to_error = {}
            for stream, params in api_fields_to_test.items():
                test_date = date.today() - timedelta(days=2)
                report_filters = {"reportDate": test_date.strftime("%Y-%m-%d"), "vendorNumber": f"{config['vendor']}"}
                report_filters.update(api_fields_to_test[stream])
                try:
                    rep_tsv = api.download_sales_and_trends_reports(filters=report_filters)
                    if isinstance(rep_tsv, dict):
                        raise Exception(f"An exception occurred: Received a JSON response instead of" f" the report: {str(rep_tsv)}")
                except Exception as e:
                    logger.warn(f"Unable to download {stream}: {e}")
                    stream_to_error[stream] = e

            # All streams have failed
            if len(stream_to_error.keys()) == api_fields_to_test.keys():
                message = "\n".join([f"Unable to access {stream} due to error: {e}" for stream, e in stream_to_error])
                return AirbyteConnectionStatus(status=Status.FAILED, message=message)

            return AirbyteConnectionStatus(status=Status.SUCCEEDED)
        except Exception as e:
            logger.warn(e)
            return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {str(e)}")
Ejemplo n.º 12
0
    def check_config(self, logger: AirbyteLogger, config_path: str,
                     config: json) -> AirbyteConnectionStatus:
        """
        Tests if the input configuration can be used to successfully connect to the integration
            e.g: if a provided Stripe API token can be used to connect to the Stripe API.

        :param logger: Logging object to display debug/info/error to the logs
            (logs will not be accessible via airbyte UI if they are not passed to this logger)
        :param config_path: Path to the file containing the configuration json config
        :param config: Json object containing the configuration of this source, content of this json is as specified in
        the properties of the spec.json file

        :return: AirbyteConnectionStatus indicating a Success or Failure
        """
        try:
            # create request fields for testing
            api_fields_to_test = {
                "subscription_report": {
                    "reportType": "SUBSCRIPTION",
                    "frequency": "DAILY",
                    "reportSubType": "SUMMARY",
                    "version": "1_2"
                }
            }
            test_date = date.today() - timedelta(days=2)
            report_filters = {
                "reportDate": test_date.strftime("%Y-%m-%d"),
                "vendorNumber": "{}".format(config["vendor"])
            }

            report_filters.update(api_fields_to_test["subscription_report"])

            # fetch data from appstore api
            api = Api(config["key_id"], config["key_file"],
                      config["issuer_id"])

            rep_tsv = api.download_sales_and_trends_reports(
                filters=report_filters)

            if isinstance(rep_tsv, dict):
                return AirbyteConnectionStatus(
                    status=Status.FAILED,
                    message=
                    f"An exception occurred: Received a JSON response instead of"
                    f" the report: {str(rep_tsv)}",
                )

            return AirbyteConnectionStatus(status=Status.SUCCEEDED)
        except Exception as e:
            logger.warn(e)
            return AirbyteConnectionStatus(
                status=Status.FAILED,
                message=f"An exception occurred: {str(e)}")
Ejemplo n.º 13
0
 def check_config(self, logger: AirbyteLogger, config_path: str, config: json) -> AirbyteConnectionStatus:
     try:
         session = shopify.Session(f"{config['shop']}.myshopify.com", "2020-10", config["api_key"])
         shopify.ShopifyResource.activate_session(session)
         # try to read the name of the shop, which should be available with any level of permissions
         shopify.GraphQL().execute("{ shop { name id } }")
         shopify.ShopifyResource.clear_session()
         return AirbyteConnectionStatus(status=Status.SUCCEEDED)
     except Exception as e:
         logger.error(f"Exception connecting to Shopify: ${e}")
         return AirbyteConnectionStatus(
             status=Status.FAILED, message="Unable to connect to the Shopify API with the provided credentials."
         )
Ejemplo n.º 14
0
    def read(self, logger: AirbyteLogger, config: json,
             catalog: ConfiguredAirbyteCatalog,
             state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]:
        client = GoogleSheetsClient(json.loads(config["credentials_json"]))

        sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog(
            catalog)
        spreadsheet_id = config["spreadsheet_id"]

        logger.info(f"Starting syncing spreadsheet {spreadsheet_id}")
        # For each sheet in the spreadsheet, get a batch of rows, and as long as there hasn't been
        # a blank row, emit the row batch
        sheet_to_column_index_to_name = Helpers.get_available_sheets_to_column_index_to_name(
            client, spreadsheet_id, sheet_to_column_name)
        sheet_row_counts = Helpers.get_sheet_row_count(client, spreadsheet_id)
        logger.info(f"Row counts: {sheet_row_counts}")
        for sheet in sheet_to_column_index_to_name.keys():
            logger.info(f"Syncing sheet {sheet}")
            column_index_to_name = sheet_to_column_index_to_name[sheet]
            row_cursor = 2  # we start syncing past the header row
            # For the loop, it is necessary that the initial row exists when we send a request to the API,
            # if the last row of the interval goes outside the sheet - this is normal, we will return
            # only the real data of the sheet and in the next iteration we will loop out.
            while row_cursor <= sheet_row_counts[sheet]:
                range = f"{sheet}!{row_cursor}:{row_cursor + ROW_BATCH_SIZE}"
                logger.info(f"Fetching range {range}")
                row_batch = SpreadsheetValues.parse_obj(
                    client.get_values(spreadsheetId=spreadsheet_id,
                                      ranges=range,
                                      majorDimension="ROWS"))

                row_cursor += ROW_BATCH_SIZE + 1
                # there should always be one range since we requested only one
                value_ranges = row_batch.valueRanges[0]

                if not value_ranges.values:
                    break

                row_values = value_ranges.values
                if len(row_values) == 0:
                    break

                for row in row_values:
                    if not Helpers.is_row_empty(
                            row) and Helpers.row_contains_relevant_data(
                                row, column_index_to_name.keys()):
                        yield AirbyteMessage(
                            type=Type.RECORD,
                            record=Helpers.row_data_to_record_message(
                                sheet, row, column_index_to_name))
        logger.info(f"Finished syncing spreadsheet {spreadsheet_id}")
Ejemplo n.º 15
0
 def check(self, logger: AirbyteLogger,
           config_container: ConfigContainer) -> AirbyteConnectionStatus:
     try:
         self.discover(logger, config_container)
         return AirbyteConnectionStatus(status=Status.SUCCEEDED)
     except Exception as e:
         # TODO parse the exception message for a human readable error
         logger.error("Exception while connecting to the FB Marketing API")
         logger.error(str(e))
         return AirbyteConnectionStatus(
             status=Status.FAILED,
             message=
             "Unable to connect to the FB Marketing API with the provided credentials. "
         )
Ejemplo n.º 16
0
    def check(self, logger: AirbyteLogger,
              config_container: ConfigContainer) -> AirbyteConnectionStatus:
        try:
            json_config = config_container.rendered_config
            self.try_connect(logger, json_config)
        except self.api_error as err:
            logger.error("Exception while connecting to the %s: %s",
                         self.tap_name, str(err))
            # this should be in UI
            error_msg = f"Unable to connect to {self.tap_name} with the provided credentials. Error: {err}"
            return AirbyteConnectionStatus(status=Status.FAILED,
                                           message=error_msg)

        return AirbyteConnectionStatus(status=Status.SUCCEEDED)
Ejemplo n.º 17
0
    def read(self,
             logger: AirbyteLogger,
             config_container: ConfigContainer,
             catalog_path,
             state_path: str = None) -> Generator[AirbyteMessage, None, None]:
        client = self._client(config_container)

        if state_path:
            logger.info("Starting sync with provided state file")
            state_obj = json.loads(open(state_path, "r").read())
        else:
            logger.info("No state provided, starting fresh sync")
            state_obj = {}

        state = defaultdict(dict, state_obj)
        catalog = ConfiguredAirbyteCatalog.parse_obj(
            self.read_config(catalog_path))

        logger.info("Starting syncing mailchimp")
        for configured_stream in catalog.streams:
            stream = configured_stream.stream
            for record in self._read_record(client=client,
                                            stream=stream.name,
                                            state=state):
                yield record

        logger.info("Finished syncing mailchimp")
Ejemplo n.º 18
0
def run_load_dataframes(config, expected_columns=10, expected_rows=42):
    df_list = SourceFile.load_dataframes(config=config, logger=AirbyteLogger(), skip_data=False)
    assert len(df_list) == 1  # Properly load 1 DataFrame
    df = df_list[0]
    assert len(df.columns) == expected_columns  # DataFrame should have 10 columns
    assert len(df.index) == expected_rows  # DataFrame should have 42 rows of data
    return df
Ejemplo n.º 19
0
    def read(self,
             logger: AirbyteLogger,
             config_container: ConfigContainer,
             catalog_path,
             state=None) -> Generator[AirbyteMessage, None, None]:
        client = self._client(config_container)

        catalog = ConfiguredAirbyteCatalog.parse_obj(
            self.read_config(catalog_path))

        logger.info("Starting syncing mailchimp")
        for configured_stream in catalog.streams:
            stream = configured_stream.stream
            for record in self._read_record(client=client, stream=stream.name):
                yield AirbyteMessage(type=Type.RECORD, record=record)

        logger.info("Finished syncing mailchimp")
Ejemplo n.º 20
0
    def check_config(self, logger: AirbyteLogger, config_path: str,
                     config: json) -> AirbyteConnectionStatus:
        """
        Tests if the input configuration can be used to successfully connect to the integration
            e.g: if a provided Stripe API token can be used to connect to the Stripe API.

        :param logger: Logging object to display debug/info/error to the logs
            (logs will not be accessible via airbyte UI if they are not passed to this logger)
        :param config_path: Path to the file containing the configuration json config
        :param config: Json object containing the configuration of this source, content of this json is as specified in
        the properties of the spec.json file

        :return: AirbyteConnectionStatus indicating a Success or Failure
        """
        try:
            test_date = (date.today() -
                         timedelta(days=2)).strftime("%Y-%m-%d %H:%M")
            params = {
                "from": test_date,
                "to": test_date,
                "api_token": config["api_token"]
            }

            base_url = "https://hq.appsflyer.com"
            test_endpoint = "/export/{}/installs_report/v5".format(
                config["app_id"])

            url = base_url + test_endpoint

            logger.info("GET {}".format(url))
            resp = requests.get(url, params=params)

            if resp.status_code == 200:
                return AirbyteConnectionStatus(status=Status.SUCCEEDED)
            else:
                return AirbyteConnectionStatus(
                    status=Status.FAILED,
                    message=
                    f"An exception occurred: Status Code: {0}, content: {1}".
                    format(resp.status_code, resp.content),
                )
        except Exception as e:
            return AirbyteConnectionStatus(
                status=Status.FAILED,
                message=f"An exception occurred: {str(e)}")
Ejemplo n.º 21
0
    def discover(self, logger: AirbyteLogger,
                 config: Mapping) -> AirbyteCatalog:
        """
        Returns an AirbyteCatalog representing the available streams and fields in this integration. For example, given valid credentials to a
        Remote CSV File, returns an Airbyte catalog where each csv file is a stream, and each column is a field.
        """
        client = self._get_client(config)
        name = client.stream_name

        logger.info(
            f"Discovering schema of {name} at {client.reader.full_url}...")
        try:
            streams = list(client.streams)
        except Exception as err:
            reason = f"Failed to discover schemas of {name} at {client.reader.full_url}: {repr(err)}\n{traceback.format_exc()}"
            logger.error(reason)
            raise err
        return AirbyteCatalog(streams=streams)
Ejemplo n.º 22
0
    def read(self,
             logger: AirbyteLogger,
             config_container,
             catalog_path,
             state=None) -> Generator[AirbyteMessage, None, None]:
        config = config_container.rendered_config
        client = Helpers.get_authenticated_sheets_client(
            json.loads(config["credentials_json"]))

        catalog = AirbyteCatalog.parse_obj(self.read_config(catalog_path))

        sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog(
            catalog)
        spreadsheet_id = config["spreadsheet_id"]

        logger.info(f"Starting syncing spreadsheet {spreadsheet_id}")
        # For each sheet in the spreadsheet, get a batch of rows, and as long as there hasn't been
        # a blank row, emit the row batch
        sheet_to_column_index_to_name = Helpers.get_available_sheets_to_column_index_to_name(
            client, spreadsheet_id, sheet_to_column_name)
        for sheet in sheet_to_column_index_to_name.keys():
            logger.info(f"Syncing sheet {sheet}")
            column_index_to_name = sheet_to_column_index_to_name[sheet]
            row_cursor = 2  # we start syncing past the header row
            encountered_blank_row = False
            while not encountered_blank_row:
                range = f"{sheet}!{row_cursor}:{row_cursor + ROW_BATCH_SIZE}"
                logger.info(f"Fetching range {range}")
                row_batch = SpreadsheetValues.parse_obj(
                    client.values().batchGet(spreadsheetId=spreadsheet_id,
                                             ranges=range,
                                             majorDimension="ROWS").execute())
                row_cursor += ROW_BATCH_SIZE + 1
                # there should always be one range since we requested only one
                value_ranges = row_batch.valueRanges[0]

                if not value_ranges.values:
                    break

                row_values = value_ranges.values
                if len(row_values) == 0:
                    break

                for row in row_values:
                    if Helpers.is_row_empty(row):
                        encountered_blank_row = True
                        break
                    elif Helpers.row_contains_relevant_data(
                            row, column_index_to_name.keys()):
                        yield AirbyteMessage(
                            type=Type.RECORD,
                            record=Helpers.row_data_to_record_message(
                                sheet, row, column_index_to_name))
        logger.info(f"Finished syncing spreadsheet {spreadsheet_id}")
Ejemplo n.º 23
0
    def check(self, logger: AirbyteLogger,
              config: json) -> AirbyteConnectionStatus:
        # Check involves verifying that the specified spreadsheet is reachable with our credentials.
        client = GoogleSheetsClient(json.loads(config["credentials_json"]))
        spreadsheet_id = config["spreadsheet_id"]
        try:
            # Attempt to get first row of sheet
            client.get(spreadsheetId=spreadsheet_id,
                       includeGridData=False,
                       ranges="1:1")
        except errors.HttpError as err:
            reason = str(err)
            # Give a clearer message if it's a common error like 404.
            if err.resp.status == status_codes.NOT_FOUND:
                reason = "Requested spreadsheet was not found."
            logger.error(f"Formatted error: {reason}")
            return AirbyteConnectionStatus(status=Status.FAILED,
                                           message=str(reason))

        return AirbyteConnectionStatus(status=Status.SUCCEEDED)
Ejemplo n.º 24
0
    def _run_sync_test(conf, catalog):
        records = []
        state = []
        for message in SourceInstagram().read(AirbyteLogger(), conf, catalog):
            if message.type == Type.RECORD:
                records.append(message)
            elif message.type == Type.STATE:
                state.append(message)

        assert len(records) > 0
        assert len(state) > 0
Ejemplo n.º 25
0
    def check(self, logger: AirbyteLogger,
              config: json) -> AirbyteConnectionStatus:
        try:
            access_token = config["access_token"]
            spreadsheet_id = config["spreadsheet_id"]

            smartsheet_client = smartsheet.Smartsheet(access_token)
            smartsheet_client.errors_as_exceptions(True)
            smartsheet_client.Sheets.get_sheet(spreadsheet_id)

            return AirbyteConnectionStatus(status=Status.SUCCEEDED)
        except Exception as e:
            if isinstance(e, smartsheet.exceptions.ApiError):
                err = e.error.result
                code = 404 if err.code == 1006 else err.code
                reason = f"{err.name}: {code} - {err.message} | Check your spreadsheet ID."
            else:
                reason = str(e)
            logger.error(reason)
        return AirbyteConnectionStatus(status=Status.FAILED)
Ejemplo n.º 26
0
    def test_streams_outputs_records(self, config_credentials, configured_catalog):
        """
        Using standard tests is unreliable for Agent Activities and Agent Overview streams,
        because the data there changes in real-time, therefore additional pytests are used.
        """
        records = []
        for message in SourceZendeskTalk().read(AirbyteLogger(), config_credentials, configured_catalog):
            if message.type == Type.RECORD:
                records.append(message)

        assert len(records) > 0
Ejemplo n.º 27
0
 def check(self, logger: AirbyteLogger, config_container: ConfigContainer) -> AirbyteConnectionStatus:
     try:
         json_config = config_container.rendered_config
         client = braintree.BraintreeGateway(
             braintree.Configuration(
                 environment=getattr(braintree.Environment, json_config["environment"]),
                 merchant_id=json_config["merchant_id"],
                 public_key=json_config["public_key"],
                 private_key=json_config["private_key"],
             )
         )
         client.transaction.search(
             braintree.TransactionSearch.created_at.between(datetime.now() + relativedelta(days=-1), datetime.now())
         )
         return AirbyteConnectionStatus(status=Status.SUCCEEDED)
     except AuthenticationError:
         logger.error("Exception while connecting to the Braintree API")
         return AirbyteConnectionStatus(
             status=Status.FAILED,
             message="Unable to connect to the Braintree API with the provided credentials. Please make sure the input credentials and environment are correct.",
         )
Ejemplo n.º 28
0
    def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
        client = GoogleSheetsClient(self.get_credentials(config))
        spreadsheet_id = config["spreadsheet_id"]
        try:
            logger.info(f"Running discovery on sheet {spreadsheet_id}")
            spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False))
            grid_sheets = Helpers.get_grid_sheets(spreadsheet_metadata)
            streams = []
            for sheet_name in grid_sheets:
                try:
                    header_row_data = Helpers.get_first_row(client, spreadsheet_id, sheet_name)
                    stream = Helpers.headers_to_airbyte_stream(logger, sheet_name, header_row_data)
                    streams.append(stream)
                except Exception as err:
                    if str(err).startswith("Expected data for exactly one row for sheet"):
                        logger.warn(f"Skip empty sheet: {sheet_name}")
                    else:
                        logger.error(str(err))
            return AirbyteCatalog(streams=streams)

        except errors.HttpError as err:
            reason = str(err)
            if err.resp.status == status_codes.NOT_FOUND:
                reason = "Requested spreadsheet was not found."
            raise Exception(f"Could not run discovery: {reason}")
Ejemplo n.º 29
0
    def read(self, logger: AirbyteLogger, config: json,
             catalog: ConfiguredAirbyteCatalog,
             state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]:

        logger.info("read called")

        url = config["url"]
        username = config["username"]
        key = config["access_token"]
        client = WSClient(url)
        login = client.do_login(username, key, withpassword=False)
        query = config["query"]
        logger.info(query)
        data = client.do_query(query)
        try:
            for single_dict in data:
                yield AirbyteMessage(
                    type=Type.RECORD,
                    record=AirbyteRecordMessage(
                        stream=DATASET_ITEMS_STREAM_NAME,
                        data=single_dict,
                        emitted_at=int(datetime.now().timestamp()) * 1000),
                )
        except Exception as err:
            reason = f"Failed to read data of {DATASET_ITEMS_STREAM_NAME} at {url}"
            logger.error(reason)
            raise err
Ejemplo n.º 30
0
def read_stream(
    source: SourceHubspot, config: Mapping, catalog: ConfiguredAirbyteCatalog, state: MutableMapping = None
) -> Tuple[Mapping, List]:
    records = {}
    states = []
    for message in source.read(AirbyteLogger(), config, catalog, state):
        if message.type == Type.RECORD:
            records.setdefault(message.record.stream, [])
            records[message.record.stream].append(message.record)
        elif message.type == Type.STATE:
            states.append(message.state)

    return records, states