Exemple #1
0
    def test_hive_error_msg(self):
        msg = ('{...} errorMessage="Error while compiling statement: FAILED: '
               "SemanticException [Error 10001]: Line 4"
               ":5 Table not found 'fact_ridesfdslakj'\", statusCode=3, "
               "sqlState='42S02', errorCode=10001)){...}")
        self.assertEquals(
            ("Error while compiling statement: FAILED: "
             "SemanticException [Error 10001]: Line 4:5 "
             "Table not found 'fact_ridesfdslakj'"),
            HiveEngineSpec.extract_error_message(Exception(msg)),
        )

        e = Exception("Some string that doesn't match the regex")
        self.assertEquals(str(e), HiveEngineSpec.extract_error_message(e))

        msg = (
            "errorCode=10001, "
            'errorMessage="Error while compiling statement"), operationHandle'
            '=None)"')
        self.assertEquals(
            ("Error while compiling statement"),
            HiveEngineSpec.extract_error_message(Exception(msg)),
        )
 def test_job_2_launched_stage_2_stages_progress(
     self
 ):  # pylint: disable=invalid-name
     log = """
         17/02/07 19:15:55 INFO ql.Driver: Total jobs = 2
         17/02/07 19:15:55 INFO ql.Driver: Launching Job 1 out of 2
         17/02/07 19:16:09 INFO exec.Task: 2017-02-07 19:16:09,173 Stage-1 map = 100%,  reduce = 0%
         17/02/07 19:15:55 INFO ql.Driver: Launching Job 2 out of 2
         17/02/07 19:16:09 INFO exec.Task: 2017-02-07 19:16:09,173 Stage-1 map = 0%,  reduce = 0%
         17/02/07 19:16:09 INFO exec.Task: 2017-02-07 19:16:09,173 Stage-1 map = 40%,  reduce = 0%
     """.split(
         "\n"
     )
     self.assertEqual(60, HiveEngineSpec.progress(log))
Exemple #3
0
def test_df_to_sql_if_exists_replace_with_schema(mock_upload_to_s3, mock_g):
    mock_upload_to_s3.return_value = "mock-location"
    mock_g.user = True
    mock_database = mock.MagicMock()
    mock_database.get_df.return_value.empty = False
    mock_execute = mock.MagicMock(return_value=True)
    mock_database.get_sqla_engine.return_value.execute = mock_execute
    table_name = "foobar"
    schema = "schema"

    HiveEngineSpec.df_to_sql(
        mock_database,
        Table(table=table_name, schema=schema),
        pd.DataFrame(),
        {
            "if_exists": "replace",
            "header": 1,
            "na_values": "mock",
            "sep": "mock"
        },
    )

    mock_execute.assert_any_call(f"DROP TABLE IF EXISTS {schema}.{table_name}")
Exemple #4
0
def test_create_table_from_csv_if_exists_replace(mock_upload_to_s3, mock_table,
                                                 mock_g):
    mock_upload_to_s3.return_value = "mock-location"
    mock_table.infer.return_value = {}
    mock_g.user = True
    mock_database = mock.MagicMock()
    mock_database.get_df.return_value.empty = False
    mock_execute = mock.MagicMock(return_value=True)
    mock_database.get_sqla_engine.return_value.execute = mock_execute
    table_name = "foobar"

    HiveEngineSpec.create_table_from_csv(
        "foo.csv",
        Table(table=table_name),
        mock_database,
        {
            "sep": "mock",
            "header": 1,
            "na_values": "mock"
        },
        {"if_exists": "replace"},
    )

    mock_execute.assert_any_call(f"DROP TABLE IF EXISTS {table_name}")
 def test_hive_get_view_names_return_empty_list(self, ):  # pylint: disable=invalid-name
     self.assertEqual([],
                      HiveEngineSpec.get_view_names(mock.ANY, mock.ANY,
                                                    mock.ANY))
Exemple #6
0
 def epoch_to_dttm(cls) -> str:
     return HiveEngineSpec.epoch_to_dttm()
Exemple #7
0
 def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
     return HiveEngineSpec.convert_dttm(target_type, dttm)
Exemple #8
0
 def is_readonly(sql: str) -> bool:
     return HiveEngineSpec.is_readonly_query(ParsedQuery(sql))
Exemple #9
0
 def is_correct_result(data: List, result: List) -> bool:
     df = pd.DataFrame({"partition": data})
     return HiveEngineSpec._latest_partition_from_df(df) == result
 def test_job_1_launched_progress(self):
     log = """
         17/02/07 19:15:55 INFO ql.Driver: Total jobs = 2
         17/02/07 19:15:55 INFO ql.Driver: Launching Job 1 out of 2
     """.split("\n")
     self.assertEquals(0, HiveEngineSpec.progress(log))
Exemple #11
0
def test_hive_get_view_names_return_empty_list():  # pylint: disable=invalid-name
    assert HiveEngineSpec.get_view_names(mock.ANY, mock.ANY, mock.ANY) == []
Exemple #12
0
 def convert_dttm(
     cls, target_type: str, dttm: datetime, db_extra: Optional[Dict[str, Any]] = None
 ) -> Optional[str]:
     return HiveEngineSpec.convert_dttm(target_type, dttm, db_extra=db_extra)
def test_number_of_jobs_progress():
    log = """
        17/02/07 19:15:55 INFO ql.Driver: Total jobs = 2
    """.split("\n")
    assert HiveEngineSpec.progress(log) == 0
def test_fetch_data_success(fetch_data_mock):
    return_value = ["a", "b"]
    fetch_data_mock.return_value = return_value
    cursor = mock.Mock()
    assert HiveEngineSpec.fetch_data(cursor) == return_value
def test_fetch_data_programming_error(fetch_data_mock):
    from pyhive.exc import ProgrammingError

    fetch_data_mock.side_effect = ProgrammingError
    cursor = mock.Mock()
    assert HiveEngineSpec.fetch_data(cursor) == []
 def test_0_progress(self):
     log = """
         17/02/07 18:26:27 INFO log.PerfLogger: <PERFLOG method=compile from=org.apache.hadoop.hive.ql.Driver>
         17/02/07 18:26:27 INFO log.PerfLogger: <PERFLOG method=parse from=org.apache.hadoop.hive.ql.Driver>
     """.split("\n")
     self.assertEquals(0, HiveEngineSpec.progress(log))
 def test_number_of_jobs_progress(self):
     log = """
         17/02/07 19:15:55 INFO ql.Driver: Total jobs = 2
     """.split("\n")
     self.assertEquals(0, HiveEngineSpec.progress(log))
Exemple #18
0
def test_create_table_from_csv_append() -> None:

    with pytest.raises(SupersetException):
        HiveEngineSpec.create_table_from_csv(
            "foo.csv", Table("foobar"), mock.MagicMock(), {}, {"if_exists": "append"}
        )
 def test_hive_get_view_names_return_empty_list(self):
     self.assertEquals([],
                       HiveEngineSpec.get_view_names(
                           mock.ANY, mock.ANY, mock.ANY))
Exemple #20
0
def test_get_create_table_stmt() -> None:
    table = Table("employee")
    schema_def = """eid int, name String, salary String, destination String"""
    location = "s3a://directory/table"
    from unittest import TestCase

    TestCase.maxDiff = None
    assert HiveEngineSpec.get_create_table_stmt(
        table, schema_def, location, ",", 0, [""]
    ) == (
        """CREATE TABLE employee ( eid int, name String, salary String, destination String )
                ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim
                STORED AS TEXTFILE LOCATION :location
                tblproperties ('skip.header.line.count'=:header_line_count, 'serialization.null.format'=:null_value)""",
        {
            "delim": ",",
            "location": "s3a://directory/table",
            "header_line_count": "1",
            "null_value": "",
        },
    )
    assert HiveEngineSpec.get_create_table_stmt(
        table, schema_def, location, ",", 1, ["1", "2"]
    ) == (
        """CREATE TABLE employee ( eid int, name String, salary String, destination String )
                ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim
                STORED AS TEXTFILE LOCATION :location
                tblproperties ('skip.header.line.count'=:header_line_count, 'serialization.null.format'=:null_value)""",
        {
            "delim": ",",
            "location": "s3a://directory/table",
            "header_line_count": "2",
            "null_value": "1",
        },
    )
    assert HiveEngineSpec.get_create_table_stmt(
        table, schema_def, location, ",", 100, ["NaN"]
    ) == (
        """CREATE TABLE employee ( eid int, name String, salary String, destination String )
                ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim
                STORED AS TEXTFILE LOCATION :location
                tblproperties ('skip.header.line.count'=:header_line_count, 'serialization.null.format'=:null_value)""",
        {
            "delim": ",",
            "location": "s3a://directory/table",
            "header_line_count": "101",
            "null_value": "NaN",
        },
    )
    assert HiveEngineSpec.get_create_table_stmt(
        table, schema_def, location, ",", None, None
    ) == (
        """CREATE TABLE employee ( eid int, name String, salary String, destination String )
                ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim
                STORED AS TEXTFILE LOCATION :location""",
        {"delim": ",", "location": "s3a://directory/table"},
    )
    assert HiveEngineSpec.get_create_table_stmt(
        table, schema_def, location, ",", 100, []
    ) == (
        """CREATE TABLE employee ( eid int, name String, salary String, destination String )
                ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim
                STORED AS TEXTFILE LOCATION :location
                tblproperties ('skip.header.line.count'=:header_line_count)""",
        {"delim": ",", "location": "s3a://directory/table", "header_line_count": "101"},
    )