Beispiel #1
0
 def _get_create_lakeout_statement(self):
     projection_columns = self.get_projection_columns(
         self.get_lake_column_names(), self.columns_lakeout)
     select_statement = HQLGenerator.SelectStatementBuilder(
         self.db_table_lake, projection_columns).build()
     return HQLGenerator.generate_create_view_as_select(
         self.db_view_lake_out, select_statement)
Beispiel #2
0
    def create_tables(self, lake_table_location_prefix):
        hql = "\n".join([
            self._get_create_database_if_not_exists(self.db_landing),
            self._get_create_database_if_not_exists(self.db_lake),
            self._get_create_landing_statement(
                self.dir_landing_final).with_semicolon(),
            HQLGenerator.generate_repair_table(
                self.db_table_landing).with_semicolon(),
            self._get_create_lake_statement(
                self.dir_lake_final if lake_table_location_prefix is None else
                os.path.join(self.dir_lake_table, lake_table_location_prefix)).
            with_semicolon(),
            HQLGenerator.generate_repair_table(
                self.db_table_lake).with_semicolon()
        ])

        try:
            self.emr_system.execute_hive(hql)
        except Exception:
            msg = "Failed to create {} and {} tables.".format(
                self.db_table_landing, self.db_table_lake)
            logging.error(msg)
            raise

        logging.info("Successfully created {} and {} tables.".format(
            self.db_table_landing, self.db_table_lake))
Beispiel #3
0
        def drop_table(db_table):
            drop_table_hql = HQLGenerator.generate_drop_table(
                db_table).with_semicolon()

            try:
                logging.info("Dropping table {} from Hive".format(db_table))
                self.emr_system.execute_hive(drop_table_hql)
                return 1
            except Exception:
                logging.warning("Unable to drop {} table".format(db_table))
                return 0
Beispiel #4
0
    def _get_create_landing_statement(self, table_location):
        table_properties = {
            "serialization.encoding": "UTF-8",
        }

        if int(self.header_lines) > 0:
            table_properties["skip.header.line.count"] = str(self.header_lines)

        return HQLGenerator.CreateDSVTableStatementBuilder(
            self.db_table_landing, table_location, self.columns_lake,
            self.delimiter).with_properties(table_properties).build(
                is_external=True)
Beispiel #5
0
        def reset_table(db_table, create_table_hql, table_location,
                        table_partitioned_flag):
            if table_partitioned_flag:
                drop_table_hql = HQLGenerator.generate_drop_table(
                    db_table).with_semicolon()
                repair_table_hql = HQLGenerator.generate_repair_table(
                    db_table).with_semicolon()
                hql = "\n".join(
                    [drop_table_hql, create_table_hql, repair_table_hql])
            else:
                hql = HQLGenerator.generate_alter_table_location(
                    db_table, table_location).with_semicolon()

            try:
                logging.info("Resetting '{}' table.".format(db_table))
                self.emr_system.execute_hive(hql)
            except M3DEMRStepException as e:
                if "Table not found" in str(e):
                    pass  # the table might already not be present, so we will ignore error arising from that case
                else:
                    logging.info("Failed to reset '{}' table: {}".format(
                        db_table, e))
                    raise
Beispiel #6
0
 def _get_drop_lakeout_statement(self):
     return HQLGenerator.generate_drop_view_if_exists(self.db_view_lake_out)
Beispiel #7
0
 def create_statement(_columns, _target_partitions=None):
     return HQLGenerator.CreateParquetTableStatementBuilder(self.db_table_lake, table_location, _columns) \
         .partitioned_by(_target_partitions) \
         .with_properties({"serialization.encoding": "UTF-8"}) \
         .build(is_external=True)
Beispiel #8
0
 def _get_create_database_if_not_exists(database_name):
     return HQLGenerator.generate_create_database_if_not_exits(
         database_name).with_semicolon()