def test_load_file_create_table(self, mock_run_cli): filepath = "/path/to/input/file" table = "output_table" field_dict = OrderedDict([("name", "string"), ("gender", "string")]) fields = ",\n ".join( ['`{k}` {v}'.format(k=k.strip('`'), v=v) for k, v in field_dict.items()]) hook = MockHiveCliHook() hook.load_file(filepath=filepath, table=table, field_dict=field_dict, create=True, recreate=True) create_table = ( "DROP TABLE IF EXISTS {table};\n" "CREATE TABLE IF NOT EXISTS {table} (\n{fields})\n" "ROW FORMAT DELIMITED\n" "FIELDS TERMINATED BY ','\n" "STORED AS textfile\n;".format(table=table, fields=fields) ) load_data = ( "LOAD DATA LOCAL INPATH '{filepath}' " "OVERWRITE INTO TABLE {table} ;\n" .format(filepath=filepath, table=table) ) calls = [ call(create_table), call(load_data) ] mock_run_cli.assert_has_calls(calls, any_order=True)
def test_fakesock_socket_real_sendall_when_sending_data( POTENTIAL_HTTP_PORTS, old_socket): ("fakesock.socket#real_sendall should connect before sending data") # Background: the real socket will stop returning bytes after the # first call real_socket = old_socket.return_value real_socket.recv.side_effect = [b'response from foobar :)', b""] # And the potential http port is 4000 POTENTIAL_HTTP_PORTS.__contains__.side_effect = lambda other: int(other ) == 4000 POTENTIAL_HTTP_PORTS.union.side_effect = lambda other: POTENTIAL_HTTP_PORTS # Given a fake socket socket = fakesock.socket() # When I call connect to a server in a port that is considered HTTP socket.connect(('foobar.com', 4000)) # And send some data socket.real_sendall(b"SOMEDATA") # Then connect should have been called real_socket.connect.assert_called_once_with(('foobar.com', 4000)) # And the socket was set to blocking real_socket.setblocking.assert_called_once_with(1) # And recv was called with the bufsize real_socket.recv.assert_has_calls([call(socket._bufsize)]) # And the buffer should contain the data from the server socket.fd.read().should.equal(b"response from foobar :)")
def test_fakesock_socket_real_sendall_when_http(old_socket): ("fakesock.socket#real_sendall sends data and buffers " "the response in the file descriptor") # Background: the real socket will stop returning bytes after the # first call real_socket = old_socket.return_value real_socket.recv.side_effect = [b'response from server', b""] # Given a fake socket socket = fakesock.socket() socket._address = ('1.2.3.4', 42) socket.is_http = True # When I call real_sendall with data, some args and kwargs socket.real_sendall(b"SOMEDATA", b'some extra args...', foo=b'bar') # Then it should have called sendall in the real socket real_socket.sendall.assert_called_once_with(b"SOMEDATA", b'some extra args...', foo=b'bar') # And the socket was set to blocking real_socket.setblocking.assert_called_once_with(1) # And recv was called with the bufsize real_socket.recv.assert_has_calls([call(socket._bufsize)]) # And the buffer should contain the data from the server socket.fd.read().should.equal(b"response from server") # And connect was called real_socket.connect.called.should.be.true
def test_fakesock_socket_real_sendall_continue_eagain_when_http(socket, old_socket): ("fakesock.socket#real_sendall should continue if the socket error was EAGAIN") socket.error = SocketErrorStub # Background: the real socket will stop returning bytes after the # first call real_socket = old_socket.return_value real_socket.recv.side_effect = [SocketErrorStub(errno.EAGAIN), b'after error', b""] # Given a fake socket socket = fakesock.socket() socket._address = ('1.2.3.4', 42) socket.is_http = True # When I call real_sendall with data, some args and kwargs socket.real_sendall(b"SOMEDATA", b'some extra args...', foo=b'bar') # Then it should have called sendall in the real socket real_socket.sendall.assert_called_once_with(b"SOMEDATA", b'some extra args...', foo=b'bar') # And the socket was set to blocking real_socket.setblocking.assert_called_once_with(1) # And recv was called with the bufsize real_socket.recv.assert_has_calls([ call(socket._bufsize) ]) # And the buffer should contain the data from the server socket.fd.read().should.equal(b"after error") # And connect was called real_socket.connect.called.should.be.true
def test_load_df_with_data_types(self, mock_run_cli): ord_dict = OrderedDict() ord_dict['b'] = [True] ord_dict['i'] = [-1] ord_dict['t'] = [1] ord_dict['f'] = [0.0] ord_dict['c'] = ['c'] ord_dict['M'] = [datetime.datetime(2018, 1, 1)] ord_dict['O'] = [object()] ord_dict['S'] = [b'STRING'] ord_dict['U'] = ['STRING'] ord_dict['V'] = [None] df = pd.DataFrame(ord_dict) hook = MockHiveCliHook() hook.load_df(df, 't') query = """ CREATE TABLE IF NOT EXISTS t ( `b` BOOLEAN, `i` BIGINT, `t` BIGINT, `f` DOUBLE, `c` STRING, `M` TIMESTAMP, `O` STRING, `S` STRING, `U` STRING, `V` STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS textfile ; """ call( self, mock_run_cli.call_args_list[0][0][0], query)
def test_load_file_without_create_table(self, mock_run_cli): filepath = "/path/to/input/file" table = "output_table" hook = MockHiveCliHook() hook.load_file(filepath=filepath, table=table, create=False) query = ( "LOAD DATA LOCAL INPATH '{filepath}' " "OVERWRITE INTO TABLE {table} ;\n" .format(filepath=filepath, table=table) ) calls = [ call(query) ] mock_run_cli.assert_has_calls(calls, any_order=True)