Esempio n. 1
0
 def __init__(self):
     self.location = "./output/meli_challenge_result.hyper"
     self.test_location = "../output/meli_challenge_result.hyper"
     self.searchResult_table = TableDefinition('results', [
         TableDefinition.Column('id', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('site_id', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('title', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('seller', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('price', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('prices', SqlType.json(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('sale_price', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('currency_id', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('available_quantity', SqlType.int(), Nullability.NULLABLE),
         TableDefinition.Column('sold_quantity', SqlType.int(), Nullability.NULLABLE),
         TableDefinition.Column('buying_mode', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('listing_type_id', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('stop_time', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('condition', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('permalink', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('thumbnail', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('accepts_mercadopago', SqlType.bool(), Nullability.NULLABLE),
         TableDefinition.Column('installments', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('address', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('shipping', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('seller_address', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('attributes', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('original_price', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('category_id', SqlType.text(), Nullability.NOT_NULLABLE),
         TableDefinition.Column('official_store_id', SqlType.int(), Nullability.NULLABLE),
         TableDefinition.Column('domain_id', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('catalog_product_id', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('tags', SqlType.text(), Nullability.NULLABLE),
         TableDefinition.Column('catalog_listing', SqlType.bool(), Nullability.NULLABLE),
         TableDefinition.Column('order_backend', SqlType.int(), Nullability.NULLABLE),
     ])
def insert_box_events():
    # Hyper file instantiation
    path_to_database = Path(box_hyper_file)
    hyper_file_exists = Path.exists(path_to_database)

    # Start the Hyper API pricess
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

        # Check if the Hyper file exists or not. CreateMode.NONE will append. CreateMode.CREATE_AND_REPLACE will create a net new file
        create_mode = None
        if hyper_file_exists:
            create_mode = CreateMode.NONE
        else:
            create_mode = CreateMode.CREATE_AND_REPLACE

        # Open a new connection
        with Connection(endpoint=hyper.endpoint,
                        database=path_to_database,
                        create_mode=create_mode) as connection:
            # Check a new schema if it does not exist
            connection.catalog.create_schema_if_not_exists(schema=box_schema)

            # Instantiate the table schema
            box_events_table_def = TableDefinition(
                table_name=TableName(box_schema, box_events_table),
                columns=[
                    TableDefinition.Column(name='event_id',
                                           type=SqlType.text(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='event_type',
                                           type=SqlType.text(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='created_at',
                                           type=SqlType.timestamp_tz(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='created_by_id',
                                           type=SqlType.text(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='created_by_name',
                                           type=SqlType.text(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='created_by_login',
                                           type=SqlType.text(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='source',
                                           type=SqlType.json(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='ip_address',
                                           type=SqlType.text(),
                                           nullability=NULLABLE),
                    TableDefinition.Column(name='additional_details',
                                           type=SqlType.json(),
                                           nullability=NULLABLE)
                ])
            print('Found schema: {0} and table def: {1}'.format(
                box_events_table_def.table_name.schema_name,
                box_events_table_def.table_name))
            # Create the table if it does not exist and get the Box events table
            connection.catalog.create_table_if_not_exists(
                table_definition=box_events_table_def)
            table_name = TableName(box_schema, box_events_table)

            # Get the MAX row by created_at
            last_event_created_at = connection.execute_scalar_query(
                query=
                f"SELECT MAX(created_at) FROM {box_events_table_def.table_name}"
            )
            if last_event_created_at is not None:
                print('Found last event in hyper file: {0}'.format(
                    last_event_created_at.to_datetime()))

            # Get the Box service account client
            auth = JWTAuth.from_settings_file(box_config)
            box_client = Client(auth)
            service_account = box_client.user().get()
            print(
                'Found Service Account with name: {0}, id: {1}, and login: {2}'
                .format(service_account.name, service_account.id,
                        service_account.login))

            # Get the current date and the date for one month ago if there is not lastest event
            today = datetime.utcnow()
            if last_event_created_at is None:
                last_event_created_at = today - relativedelta.relativedelta(
                    months=month_lookback)
            else:
                last_event_created_at = last_event_created_at.to_datetime(
                ).replace(tzinfo=timezone.utc).astimezone(tz=None)

            # Get the Box enterprise events for a given date range
            print(
                'Using date range for events  today: {0} and starting datetime: {1}'
                .format(today, last_event_created_at))
            get_box_events(box_client, 0, last_event_created_at, today)

            # Insert the Box enteprise events into the Hyper file
            with Inserter(connection, box_events_table_def) as inserter:
                inserter.add_rows(rows=box_events)
                inserter.execute()

            # Number of rows in the "Box"."Events" table.
            row_count = connection.execute_scalar_query(
                query=f"SELECT COUNT(*) FROM {table_name}")
            print(f"The number of rows in table {table_name} is {row_count}.")
        print("The connection to the Hyper file has been closed.")
    print("The Hyper process has been shut down.")