def vertica_compliant_schema(self):
        """Transforms mysql table schema into a vertica compliant schema."""

        if not self.table_schema:
            results = get_mysql_query_results(
                self.db_credentials, self.database,
                'describe {}'.format(self.table_name))
            for result in results:
                field_name = result[0].strip()
                field_type = result[1].strip()
                field_null = result[2].strip()

                types_with_parentheses = [
                    'tinyint', 'smallint', 'int', 'bigint', 'datetime'
                ]
                if any(_type in field_type
                       for _type in types_with_parentheses):
                    field_type = field_type.rsplit('(')[0]
                elif field_type == 'longtext':
                    field_type = 'LONG VARCHAR'
                elif field_type == 'longblob':
                    field_type = 'LONG VARBINARY'
                elif field_type == 'double':
                    field_type = 'DOUBLE PRECISION'

                if field_null == "NO":
                    field_type = field_type + " NOT NULL"

                field_name = "\"{}\"".format(field_name)

                self.table_schema.append((field_name, field_type))

        return self.table_schema
    def get_snowflake_schema(self):
        """
        Transforms MySQL table schema into a Snowflake-compliant schema.
        """
        if not self.table_fields:
            results = get_mysql_query_results(self.db_credentials, self.database, 'describe {}'.format(self.table_name))
            for result in results:
                field_name = result[0].strip()
                field_type = result[1].strip()
                field_null = result[2].strip()

                if self.should_exclude_field(self.table_name, field_name):
                    self.deleted_fields.append(field_name)
                else:
                    # Enclose any Snowflake-reserved keyword field names within double-quotes.
                    if field_name.upper() in SNOWFLAKE_RESERVED_KEYWORDS:
                        field_name = '"{}"'.format(field_name.upper())

                    mysql_types_with_parentheses = ['smallint', 'int', 'bigint', 'datetime', 'varchar']
                    if field_type == 'tinyint(1)':
                        field_type = 'BOOLEAN'
                    elif any(_type in field_type for _type in mysql_types_with_parentheses):
                        field_type = field_type.rsplit('(')[0]
                    elif field_type == 'longtext':
                        field_type = 'VARCHAR'
                    elif field_type == 'longblob':
                        field_type = 'BINARY'

                    if field_null == 'NO':
                        field_type += ' NOT NULL'

                    self.table_fields.append((field_name, field_type))

        return self.table_fields
 def requires(self):
     """
     Determines the required tasks given the non-excluded tables in the MySQL schema.
     """
     if not self.table_list:
         # Compute the list of required MySQL tables to import, excluding any excluded tables.
         results = get_mysql_query_results(self.db_credentials, self.database, 'show tables')
         unfiltered_table_list = [result[0].strip() for result in results]
         self.table_list = [table_name for table_name in unfiltered_table_list if not self.should_exclude_table(table_name)]
     if self.required_tasks is None:
         self.required_tasks = []
         for table_name in self.table_list:
             self.required_tasks.append(
                 LoadMysqlToSnowflakeTableTask(
                     db_credentials=self.db_credentials,
                     sf_database=self.sf_database,
                     schema=self.schema,
                     scratch_schema=self.scratch_schema,
                     run_id=self.run_id,
                     warehouse=self.warehouse,
                     role=self.role,
                     warehouse_path=self.warehouse_path,
                     warehouse_subdirectory=self.warehouse_subdirectory,
                     database=self.database,
                     table_name=table_name,
                     overwrite=self.overwrite,
                     date=self.date,
                     credentials=self.credentials,
                     exclude_field=self.exclude_field,
                 )
             )
     return self.required_tasks
    def vertica_compliant_schema(self):
        """Transforms mysql table schema into a vertica compliant schema."""

        if not self.table_schema:
            results = get_mysql_query_results(self.db_credentials, self.database, 'describe {}'.format(self.table_name))
            for result in results:
                field_name = result[0].strip()
                field_type = result[1].strip()
                field_null = result[2].strip()

                types_with_parentheses = ['tinyint', 'smallint', 'int', 'bigint', 'datetime']
                if any(_type in field_type for _type in types_with_parentheses):
                    field_type = field_type.rsplit('(')[0]
                elif field_type == 'longtext':
                    field_type = 'LONG VARCHAR'
                elif field_type == 'double':
                    field_type = 'DOUBLE PRECISION'

                if field_null == "NO":
                    field_type = field_type + " NOT NULL"

                field_name = "\"{}\"".format(field_name)

                self.table_schema.append((field_name, field_type))

        return self.table_schema
 def requires(self):
     if not self.table_list:
         results = get_mysql_query_results(self.db_credentials, self.database, 'show tables')
         unfiltered_table_list = [result[0].strip() for result in results]
         self.table_list = [table_name for table_name in unfiltered_table_list if not self.should_exclude_table(table_name)]
     if self.required_tasks is None:
         self.required_tasks = []
         for table_name in self.table_list:
             self.required_tasks.append(
                 LoadMysqlToBigQueryTableTask(
                     db_credentials=self.db_credentials,
                     database=self.database,
                     warehouse_path=self.warehouse_path,
                     warehouse_subdirectory=self.warehouse_subdirectory,
                     table_name=table_name,
                     overwrite=self.overwrite,
                     date=self.date,
                     dataset_id=self.dataset_id,
                     credentials=self.credentials,
                     max_bad_records=self.max_bad_records,
                     skip_clear_marker=self.overwrite,
                     exclude_field=self.exclude_field,
                 )
             )
     return self.required_tasks
    def requires(self):
        if not self.table_list:
            results = get_mysql_query_results(self.db_credentials,
                                              self.database, 'show tables')
            self.table_list = [result[0].strip() for result in results]

        pre_import_task = PreImportDatabaseTask(
            date=self.date,
            schema=self.schema,
            credentials=self.credentials,
            marker_schema=self.marker_schema,
            overwrite=self.overwrite)
        yield pre_import_task

        for table_name in self.table_list:
            if not self.should_exclude_table(table_name):
                yield LoadMysqlToVerticaTableTask(
                    credentials=self.credentials,
                    schema=pre_import_task.schema_loading,
                    db_credentials=self.db_credentials,
                    database=self.database,
                    warehouse_path=self.warehouse_path,
                    table_name=table_name,
                    overwrite=self.overwrite,
                    date=self.date,
                    marker_schema=self.marker_schema,
                )

        yield PostImportDatabaseTask(date=self.date,
                                     schema=self.schema,
                                     credentials=self.credentials,
                                     marker_schema=self.marker_schema,
                                     overwrite=self.overwrite)
 def requires(self):
     if not self.table_list:
         results = get_mysql_query_results(self.db_credentials, self.database, 'show tables')
         unfiltered_table_list = [result[0].strip() for result in results]
         self.table_list = [table_name for table_name in unfiltered_table_list if not self.should_exclude_table(table_name)]
     if self.required_tasks is None:
         self.required_tasks = []
         for table_name in self.table_list:
             self.required_tasks.append(
                 LoadMysqlToBigQueryTableTask(
                     db_credentials=self.db_credentials,
                     database=self.database,
                     warehouse_path=self.warehouse_path,
                     warehouse_subdirectory=self.warehouse_subdirectory,
                     table_name=table_name,
                     overwrite=self.overwrite,
                     date=self.date,
                     dataset_id=self.dataset_id,
                     credentials=self.credentials,
                     max_bad_records=self.max_bad_records,
                     skip_clear_marker=self.overwrite,
                     exclude_field=self.exclude_field,
                 )
             )
     return self.required_tasks
    def requires(self):
        if not self.table_list:
            results = get_mysql_query_results(self.db_credentials, self.database, 'show tables')
            self.table_list = [result[0].strip() for result in results]

        pre_import_task = PreImportDatabaseTask(
            date=self.date,
            schema=self.schema,
            credentials=self.credentials,
            marker_schema=self.marker_schema,
            overwrite=self.overwrite
        )
        yield pre_import_task

        for table_name in self.table_list:
            if not self.should_exclude_table(table_name):
                yield LoadMysqlToVerticaTableTask(
                    credentials=self.credentials,
                    schema=pre_import_task.schema_loading,
                    db_credentials=self.db_credentials,
                    database=self.database,
                    warehouse_path=self.warehouse_path,
                    table_name=table_name,
                    overwrite=self.overwrite,
                    date=self.date,
                    marker_schema=self.marker_schema,
                )

        yield PostImportDatabaseTask(
            date=self.date,
            schema=self.schema,
            credentials=self.credentials,
            marker_schema=self.marker_schema,
            overwrite=self.overwrite
        )
    def get_bigquery_schema(self):
        """Transforms mysql table schema into a vertica compliant schema."""

        if not self.table_schema:
            results = get_mysql_query_results(
                self.db_credentials, self.database,
                'describe {}'.format(self.table_name))
            for result in results:
                field_name = result[0].strip()
                field_type = result[1].strip()
                field_null = result[2].strip()

                # Strip off size information from any type except booleans.
                if field_type != 'tinyint(1)':
                    field_type = field_type.rsplit('(')[0]

                bigquery_type = MYSQL_TO_BIGQUERY_TYPE_MAP.get(field_type)
                mode = 'REQUIRED' if field_null == 'NO' else 'NULLABLE'
                description = ''

                if self.should_exclude_field(field_name):
                    self.deleted_fields.append(field_name)
                else:
                    self.table_schema.append(
                        SchemaField(field_name,
                                    bigquery_type,
                                    description=description,
                                    mode=mode))

        return self.table_schema
Ejemplo n.º 10
0
    def requires(self):
        if self.creation_time is None:
            self.creation_time = datetime.datetime.utcnow().isoformat()

        if self.required_tasks is None:
            self.required_tasks = []
            if not self.table_includes_list:
                results = get_mysql_query_results(self.db_credentials,
                                                  self.database, 'show tables')
                table_list = [result[0].strip() for result in results]
                self.table_includes_list = [
                    table_name for table_name in table_list
                    if not self.should_exclude_table(table_name)
                ]

            for table_name in self.table_includes_list:
                self.required_tasks.append(
                    ExportMysqlTableToS3Task(
                        date=self.date,
                        database=self.database,
                        db_credentials=self.db_credentials,
                        exclude_field=self.exclude_field,
                        table_name=table_name,
                    ))
        return self.required_tasks
Ejemplo n.º 11
0
 def rows(self):
     query_result = get_mysql_query_results(credentials=self.credentials,
                                            database=self.database,
                                            query=self.insert_query)
     log.info('query_sql = [{}]'.format(self.insert_query))
     for row in query_result:
         yield row
Ejemplo n.º 12
0
 def mysql_compliant_schema(self):
     if not self.mysql_table_schema:
         results = get_mysql_query_results(self.db_credentials, self.database, 'describe {}'.format(self.table_name))
         for result in results:
             field_name = result[0].strip()
             field_type = result[1].strip()
             field_null = result[2].strip()
             if self.should_exclude_field(self.table_name, field_name):
                 self.deleted_fields.append(field_name)
             else:
                 self.mysql_table_schema.append((field_name, field_type, field_null))
     return self.mysql_table_schema
    def run(self):
        # Add yields of tasks in run() method, to serve as dynamic dependencies.
        # This method should be rerun each time it yields a job.
        if not self.table_list:
            results = get_mysql_query_results(self.db_credentials, self.database, 'show tables')
            self.table_list = [result[0].strip() for result in results]

        pre_import_task = PreImportDatabaseTask(
            date=self.date,
            schema=self.schema,
            credentials=self.credentials,
            marker_schema=self.marker_schema,
            overwrite=self.overwrite,
        )
        yield pre_import_task

        table_white_list = []
        for table_name in self.table_list:
            if not self.should_exclude_table(table_name):
                table_white_list.append(table_name)
                yield LoadMysqlToVerticaTableTask(
                    credentials=self.credentials,
                    schema=pre_import_task.schema_loading,
                    db_credentials=self.db_credentials,
                    database=self.database,
                    warehouse_path=self.warehouse_path,
                    warehouse_subdirectory=self.warehouse_subdirectory,
                    table_name=table_name,
                    overwrite=self.overwrite,
                    date=self.date,
                    marker_schema=self.marker_schema,
                    exclude_field=self.exclude_field,
                )

        yield PostImportDatabaseTask(
            date=self.date,
            schema=self.schema,
            credentials=self.credentials,
            marker_schema=self.marker_schema,
            overwrite=self.overwrite,
            tables=table_white_list
        )
        self.is_complete = True
    def run(self):
        # Add yields of tasks in run() method, to serve as dynamic dependencies.
        # This method should be rerun each time it yields a job.
        if not self.table_list:
            results = get_mysql_query_results(self.db_credentials, self.database, 'show tables')
            self.table_list = [result[0].strip() for result in results]

        pre_import_task = PreImportDatabaseTask(
            date=self.date,
            schema=self.schema,
            credentials=self.credentials,
            marker_schema=self.marker_schema,
            overwrite=self.overwrite,
        )
        yield pre_import_task

        table_white_list = []
        for table_name in self.table_list:
            if not self.should_exclude_table(table_name):
                table_white_list.append(table_name)
                yield LoadMysqlToVerticaTableTask(
                    credentials=self.credentials,
                    schema=pre_import_task.schema_loading,
                    db_credentials=self.db_credentials,
                    database=self.database,
                    warehouse_path=self.warehouse_path,
                    warehouse_subdirectory=self.warehouse_subdirectory,
                    table_name=table_name,
                    overwrite=self.overwrite,
                    date=self.date,
                    marker_schema=self.marker_schema,
                    exclude_field=self.exclude_field,
                )

        yield PostImportDatabaseTask(
            date=self.date,
            schema=self.schema,
            credentials=self.credentials,
            marker_schema=self.marker_schema,
            overwrite=self.overwrite,
            tables=table_white_list
        )
        self.is_complete = True
    def get_bigquery_schema(self):
        """Transforms mysql table schema into a vertica compliant schema."""

        if not self.table_schema:
            results = get_mysql_query_results(self.db_credentials, self.database, 'describe {}'.format(self.table_name))
            for result in results:
                field_name = result[0].strip()
                field_type = result[1].strip()
                field_null = result[2].strip()

                # Strip off size information from any type except booleans.
                if field_type != 'tinyint(1)':
                    field_type = field_type.rsplit('(')[0]

                bigquery_type = MYSQL_TO_BIGQUERY_TYPE_MAP.get(field_type)
                mode = 'REQUIRED' if field_null == 'NO' else 'NULLABLE'
                description = ''

                if self.should_exclude_field(self.table_name, field_name):
                    self.deleted_fields.append(field_name)
                else:
                    self.table_schema.append(SchemaField(field_name, bigquery_type, description=description, mode=mode))

        return self.table_schema
Ejemplo n.º 16
0
 def load_data(self):
     log.info('query_sql = [{}]'.format(self.query))
     query_result = get_mysql_query_results(credentials=self.credentials,
                                            database=self.database,
                                            query=self.query)
     return query_result