def test_execute_query_cache_exists_does_not_call_super(self, cache_exists, super_exec): c = Phos(MagicMock()) q = 'Hallo' c.execute(q, _cache=True) self.assertFalse(super_exec.called, "Super.execute should not have been called when cache is " + "demanded and exists.")
def test_headers(self, super_exec, description): desc = desc = [('int', 'int_type'), ('bigint', 'bigint_type'), ('str', 'string_type'), ('float', 'decimal_type')] description.return_value = desc c = Phos(MagicMock()) c.execute('exec needs to be called before fetchall', _cache=False) self.assertSequenceEqual([x[0] for x in desc], c.headers())
def test_set_dag_name_generates_proper_query(self, super_exec): c = Phos(MagicMock()) dagname = 'tst dag with UPPER and utf8 ✓' c.set_dag_name(dagname) arg = super_exec.call_args[0] # I don't want to change my tests if case or spaces are changed, so fuzzy testing ftw. self.assertTrue('sethive.query.name=' in arg[0].replace(' ', '')) self.assertTrue(dagname in arg[0])
def test_execute_progress_is_not_called_when_not_required(self): """ Makes sure the async_ args is passed to execute, no matter what the other params are. Stops the execution after the call to execute because it is irrelevant. """ class CallException(Exception): pass with patch.object(pyhive.hive.Cursor, 'execute', side_effect=CallException("Execute called")) as super_exec: c = Phos(MagicMock()) try: c.execute('Hallo', _cache=False, _progress=False) except CallException: args, kwargs = super_exec.call_args_list[0] self.assertFalse(kwargs.get('async_', False)) else: self.fail("super.execute should have been called when _cache=False")
def test_fetchall_cache_not_exists_dumps_and_calls_super(self, super_exec, super_fetch): c = Phos(MagicMock()) q = 'Hallo' c.execute(q, _cache=True, _recache=True) c.del_cache() dumped_and_loaded = c.fetchall() self.assertEqual(dumped_and_loaded, 'Some data', "Data dumped loaded from cache does not match what super.fetchall returned.") self.assertTrue(super_fetch.called)
def test_pformat(self, super_exec, description, fetchall): """ Overrides description and fetchall (part of Python DP API) to check output. """ desc = [('int', 'int_type'), ('bigint', 'bigint_type'), ('str', 'string_type'), ('float', 'decimal_type')] description.return_value = desc c = Phos(MagicMock()) c.execute('exec needs to be called before fetchall', _cache=False) # Unsorted formatted = c.pformat() self.assertEqual(formatted['rowcount'], 2) # Note: all dewrap and \ are to remove newlines and indentation cause by the triple quote. expected = textwrap.dedent("""\ +-----+---------------+------+------------+ | int | bigint | str | float | +=====+===============+======+============+ | 1 | 42 | str2 | 3.5 | | 1 | 1234567891234 | str1 | 4.45643245 | +-----+---------------+------+------------+""") self.assertEqual(formatted['table'], expected) # Sorted formatted_sorted = c.pformat(sort_by=[2]) self.assertEqual(formatted['rowcount'], 2) # Note: all dewrap and \ are to remove newlines and indentation cause by the triple quote. expected_sorted = textwrap.dedent("""\ +-----+---------------+------+------------+ | int | bigint | str | float | +=====+===============+======+============+ | 1 | 1234567891234 | str1 | 4.45643245 | | 1 | 42 | str2 | 3.5 | +-----+---------------+------+------------+""") self.assertEqual(formatted_sorted['table'], expected_sorted)
def test_plot_uses_proper_arguments(self, super_exec, description, fetchall, lineplot): """ Overrides description and fetchall (part of Python DP API) to check output. """ desc = [('int', 'int_type'), ('bigint', 'bigint_type'), ('str', 'string_type'), ('float', 'decimal_type')] description.return_value = desc c = Phos(MagicMock()) c.execute('exec needs to be called before fetchall', _cache=False) c.plot_xyh(plot='line') lineplot.assert_called() with self.assertRaises(UnknownPlotTypeError): c.plot_xyh(plot='poofpoof')
def test_fetchall_cache_exists_does_not_dump_and_does_not_call_super(self, super_exec, super_fetch): c = Phos(MagicMock()) q = 'Hallo' c.execute(q, _cache=True, _recache=True) c.del_cache() cached = 'Some data' c.q.write_cache({'data': cached}) dumped_and_loaded = c.fetchall() self.assertEqual(dumped_and_loaded, cached) self.assertFalse(super_fetch.called)
def test_description_cache_exists_does_not_dump_and_does_not_call_super(self, super_exec, super_desc): c = Phos(MagicMock()) q = 'Hallo' c.execute(q, _cache=True, _recache=True) c.del_cache() cached = 'Some data' c.q.write_cache({'data': 'who cares', 'description': cached}) dumped_and_loaded = c.description self.assertEqual(dumped_and_loaded, cached) self.assertFalse(super_desc.called)
def test_df(self, super_exec, description, fetchall): """ Overrides description and fetchall (part of Python DP API) to check output. """ desc = [('int', 'int_type'), ('bigint', 'bigint_type'), ('str', 'string_type'), ('float', 'decimal_type')] description.return_value = desc c = Phos(MagicMock()) c.execute('exec needs to be called before fetchall', _cache=False) df = c.get_df() self.assertSequenceEqual( c.headers(), list(df.columns), "Fetchall() and df do not have the same headers.") self.assertEqual( len(c.fetchall()), len(df), "Fetchall() and df do not have the same number of rows.") for r in df.itertuples(index=True): self.assertSequenceEqual( r[1:], c.fetchall()[r[0]], f"Rows {r[0]} of df and fetchall() differ.")
def test_fetchall_requires_exec_first(self): c = Phos(MagicMock()) with self.assertRaises(FetchWithoutExecuteError): c.fetchall()
def test_fetchall_no_cache_calls_super(self, super_exec, super_fetch): c = Phos(MagicMock()) q = 'Hallo' c.execute(q, _cache=False) c.fetchall() super_fetch.assert_called()
def test_execute_query_cache_no_exists_calls_super(self, cache_exists, super_exec): c = Phos(MagicMock()) q = 'Hallo' c.execute(q, _cache=True) super_exec.assert_called_with(q)
def test_execute_query_no_cache_calls_super(self, super_exec): c = Phos(MagicMock()) q = 'Hallo' c.execute(q, _cache=False) super_exec.assert_called_with(q)
def test_description_requires_exec_first(self): c = Phos(MagicMock()) with self.assertRaises(FetchWithoutExecuteError): c.description
# Reminder: partition() returns 3 strings: the one before the partitioner, the partitioner itself and the # rest. If the partitioner is not found, the 2nd and 3rd string are empty (not None). cmd, dbldash, comment = li.partition('--') cmd1, semicolon, cmd2 = cmd.partition(';') # A ';' appears outside a comment if semicolon: current_q.append(cmd1) # Cmd1 1 is complete. all_qs.append(current_q) # Put the rest (cmd2) back on the stack to process it and reset current accumulator. current_q = [] stack.append(cmd2 + dbldash + comment) else: # Nothing exciting, move on. current_q.append(li) all_qs.append(current_q) return ['\n'.join(x).strip() for x in all_qs] if __name__ == "__main__": cnx = Connection('hive-server-host') c = Phos(cnx) c.set_yarn_ts_url('timeline-server-host:8188') c.set_rm_url('resource-manager-host:8088') cli = Cli(c) cli.input_loop()
def test_description_no_cache_calls_super(self, super_exec, super_desc): c = Phos(MagicMock()) q = 'Hallo' c.execute(q, _cache=False) c.description() super_desc.assert_called()