def test_load_df_with_data_types(self, mock_run_cli): d = OrderedDict() d['b'] = [True] d['i'] = [-1] d['t'] = [1] d['f'] = [0.0] d['c'] = ['c'] d['M'] = [datetime.datetime(2018, 1, 1)] d['O'] = [object()] d['S'] = ['STRING'.encode('utf-8')] d['U'] = ['STRING'] d['V'] = [None] df = pd.DataFrame(d) hook = HiveCliHook() hook.load_df(df, 't') query = """ CREATE TABLE IF NOT EXISTS t ( b BOOLEAN, i BIGINT, t BIGINT, f DOUBLE, c STRING, M TIMESTAMP, O STRING, S STRING, U STRING, V STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS textfile ; """ assertEqualIgnoreMultipleSpaces(self, mock_run_cli.call_args_list[0][0][0], query)
def test_load_df_with_data_types(self, mock_run_cli): d = OrderedDict() d['b'] = [True] d['i'] = [-1] d['t'] = [1] d['f'] = [0.0] d['c'] = ['c'] d['M'] = [datetime.datetime(2018, 1, 1)] d['O'] = [object()] d['S'] = [b'STRING'] d['U'] = ['STRING'] d['V'] = [None] df = pd.DataFrame(d) hook = HiveCliHook() hook.load_df(df, 't') query = """ CREATE TABLE IF NOT EXISTS t ( b BOOLEAN, i BIGINT, t BIGINT, f DOUBLE, c STRING, M TIMESTAMP, O STRING, S STRING, U STRING, V STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS textfile ; """ assertEqualIgnoreMultipleSpaces(self, mock_run_cli.call_args_list[0][0][0], query)
def test_load_df_with_optional_parameters(self, mock_to_csv, mock_load_file): hook = HiveCliHook() b = (True, False) for create, recreate in itertools.product(b, b): mock_load_file.reset_mock() hook.load_df(df=pd.DataFrame({"c": range(0, 10)}), table="t", create=create, recreate=recreate) assert mock_load_file.call_count == 1 kwargs = mock_load_file.call_args[1] self.assertEqual(kwargs["create"], create) self.assertEqual(kwargs["recreate"], recreate)
def test_load_df_with_optional_parameters(self, mock_to_csv, mock_load_file): hook = HiveCliHook() b = (True, False) for create, recreate in itertools.product(b, b): mock_load_file.reset_mock() hook.load_df(df=pd.DataFrame({"c": range(0, 10)}), table="t", create=create, recreate=recreate) mock_load_file.assert_called_once() kwargs = mock_load_file.call_args[1] self.assertEqual(kwargs["create"], create) self.assertEqual(kwargs["recreate"], recreate)
def test_load_df_with_data_types(self, mock_run_cli): d = OrderedDict() d['b'] = [True] d['i'] = [-1] d['t'] = [1] d['f'] = [0.0] d['c'] = ['c'] d['M'] = [datetime.datetime(2018, 1, 1)] d['O'] = [object()] d['S'] = ['STRING'.encode('utf-8')] d['U'] = ['STRING'] d['V'] = [None] df = pd.DataFrame(d) hook = HiveCliHook() hook.load_df(df, 't') query = """ CREATE TABLE IF NOT EXISTS t ( b BOOLEAN, i BIGINT, t BIGINT, f DOUBLE, c STRING, M TIMESTAMP, O STRING, S STRING, U STRING, V STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS textfile ; """ def _trim(s): return re.sub("\s+", " ", s.strip()) self.assertEqual(_trim(mock_run_cli.call_args_list[0][0][0]), _trim(query))
def test_load_df(self, mock_to_csv, mock_load_file): df = pd.DataFrame({"c": ["foo", "bar", "baz"]}) table = "t" delimiter = "," encoding = "utf-8" hook = HiveCliHook() hook.load_df(df=df, table=table, delimiter=delimiter, encoding=encoding) mock_to_csv.assert_called_once() kwargs = mock_to_csv.call_args[1] self.assertEqual(kwargs["header"], False) self.assertEqual(kwargs["index"], False) self.assertEqual(kwargs["sep"], delimiter.encode(encoding)) mock_load_file.assert_called_once() kwargs = mock_load_file.call_args[1] self.assertEqual(kwargs["delimiter"], delimiter) self.assertEqual(kwargs["field_dict"], {"c": u"STRING"}) self.assertEqual(kwargs["table"], table)
def test_load_df(self, mock_to_csv, mock_load_file): df = pd.DataFrame({"c": ["foo", "bar", "baz"]}) table = "t" delimiter = "," encoding = "utf-8" hook = HiveCliHook() hook.load_df(df=df, table=table, delimiter=delimiter, encoding=encoding) assert mock_to_csv.call_count == 1 kwargs = mock_to_csv.call_args[1] self.assertEqual(kwargs["header"], False) self.assertEqual(kwargs["index"], False) self.assertEqual(kwargs["sep"], delimiter) assert mock_load_file.call_count == 1 kwargs = mock_load_file.call_args[1] self.assertEqual(kwargs["delimiter"], delimiter) self.assertEqual(kwargs["field_dict"], {"c": "STRING"}) self.assertTrue(isinstance(kwargs["field_dict"], OrderedDict)) self.assertEqual(kwargs["table"], table)
def test_load_df(self, mock_to_csv, mock_load_file): df = pd.DataFrame({"c": ["foo", "bar", "baz"]}) table = "t" delimiter = "," encoding = "utf-8" hook = HiveCliHook() hook.load_df(df=df, table=table, delimiter=delimiter, encoding=encoding) mock_to_csv.assert_called_once() kwargs = mock_to_csv.call_args[1] self.assertEqual(kwargs["header"], False) self.assertEqual(kwargs["index"], False) self.assertEqual(kwargs["sep"], delimiter) mock_load_file.assert_called_once() kwargs = mock_load_file.call_args[1] self.assertEqual(kwargs["delimiter"], delimiter) self.assertEqual(kwargs["field_dict"], {"c": u"STRING"}) self.assertTrue(isinstance(kwargs["field_dict"], OrderedDict)) self.assertEqual(kwargs["table"], table)
def test_load_df_with_data_types(self, mock_run_cli): ord_dict = OrderedDict() ord_dict['b'] = [True] ord_dict['i'] = [-1] ord_dict['t'] = [1] ord_dict['f'] = [0.0] ord_dict['c'] = ['c'] ord_dict['M'] = [datetime.datetime(2018, 1, 1)] ord_dict['O'] = [object()] ord_dict['S'] = [b'STRING'] ord_dict['U'] = ['STRING'] ord_dict['V'] = [None] df = pd.DataFrame(ord_dict) hook = HiveCliHook() hook.load_df(df, 't') query = """ CREATE TABLE IF NOT EXISTS t ( `b` BOOLEAN, `i` BIGINT, `t` BIGINT, `f` DOUBLE, `c` STRING, `M` TIMESTAMP, `O` STRING, `S` STRING, `U` STRING, `V` STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS textfile ; """ assert_equal_ignore_multiple_spaces( self, mock_run_cli.call_args_list[0][0][0], query)