Example #1
0
    def test_load_file_create_table(self, mock_run_cli):
        filepath = "/path/to/input/file"
        table = "output_table"
        field_dict = OrderedDict([("name", "string"), ("gender", "string")])
        fields = ",\n    ".join(
            ['`{k}` {v}'.format(k=k.strip('`'), v=v) for k, v in field_dict.items()])

        hook = MockHiveCliHook()
        hook.load_file(filepath=filepath, table=table,
                       field_dict=field_dict, create=True, recreate=True)

        create_table = (
            "DROP TABLE IF EXISTS {table};\n"
            "CREATE TABLE IF NOT EXISTS {table} (\n{fields})\n"
            "ROW FORMAT DELIMITED\n"
            "FIELDS TERMINATED BY ','\n"
            "STORED AS textfile\n;".format(table=table, fields=fields)
        )

        load_data = (
            "LOAD DATA LOCAL INPATH '{filepath}' "
            "OVERWRITE INTO TABLE {table} ;\n"
            .format(filepath=filepath, table=table)
        )
        calls = [
            call(create_table),
            call(load_data)
        ]
        mock_run_cli.assert_has_calls(calls, any_order=True)
Example #2
0
def test_fakesock_socket_real_sendall_when_sending_data(
        POTENTIAL_HTTP_PORTS, old_socket):
    ("fakesock.socket#real_sendall should connect before sending data")
    # Background: the real socket will stop returning bytes after the
    # first call
    real_socket = old_socket.return_value
    real_socket.recv.side_effect = [b'response from foobar :)', b""]

    # And the potential http port is 4000
    POTENTIAL_HTTP_PORTS.__contains__.side_effect = lambda other: int(other
                                                                      ) == 4000
    POTENTIAL_HTTP_PORTS.union.side_effect = lambda other: POTENTIAL_HTTP_PORTS

    # Given a fake socket
    socket = fakesock.socket()

    # When I call connect to a server in a port that is considered HTTP
    socket.connect(('foobar.com', 4000))

    # And send some data
    socket.real_sendall(b"SOMEDATA")

    # Then connect should have been called
    real_socket.connect.assert_called_once_with(('foobar.com', 4000))

    # And the socket was set to blocking
    real_socket.setblocking.assert_called_once_with(1)

    # And recv was called with the bufsize
    real_socket.recv.assert_has_calls([call(socket._bufsize)])

    # And the buffer should contain the data from the server
    socket.fd.read().should.equal(b"response from foobar :)")
Example #3
0
def test_fakesock_socket_real_sendall_when_http(old_socket):
    ("fakesock.socket#real_sendall sends data and buffers "
     "the response in the file descriptor")
    # Background: the real socket will stop returning bytes after the
    # first call
    real_socket = old_socket.return_value
    real_socket.recv.side_effect = [b'response from server', b""]

    # Given a fake socket
    socket = fakesock.socket()
    socket._address = ('1.2.3.4', 42)
    socket.is_http = True

    # When I call real_sendall with data, some args and kwargs
    socket.real_sendall(b"SOMEDATA", b'some extra args...', foo=b'bar')

    # Then it should have called sendall in the real socket
    real_socket.sendall.assert_called_once_with(b"SOMEDATA",
                                                b'some extra args...',
                                                foo=b'bar')

    # And the socket was set to blocking
    real_socket.setblocking.assert_called_once_with(1)

    # And recv was called with the bufsize
    real_socket.recv.assert_has_calls([call(socket._bufsize)])

    # And the buffer should contain the data from the server
    socket.fd.read().should.equal(b"response from server")

    # And connect was called
    real_socket.connect.called.should.be.true
Example #4
0
def test_fakesock_socket_real_sendall_continue_eagain_when_http(socket, old_socket):
    ("fakesock.socket#real_sendall should continue if the socket error was EAGAIN")
    socket.error = SocketErrorStub
    # Background: the real socket will stop returning bytes after the
    # first call
    real_socket = old_socket.return_value
    real_socket.recv.side_effect = [SocketErrorStub(errno.EAGAIN), b'after error', b""]

    # Given a fake socket
    socket = fakesock.socket()
    socket._address = ('1.2.3.4', 42)
    socket.is_http = True

    # When I call real_sendall with data, some args and kwargs
    socket.real_sendall(b"SOMEDATA", b'some extra args...', foo=b'bar')

    # Then it should have called sendall in the real socket
    real_socket.sendall.assert_called_once_with(b"SOMEDATA", b'some extra args...', foo=b'bar')

    # And the socket was set to blocking
    real_socket.setblocking.assert_called_once_with(1)

    # And recv was called with the bufsize
    real_socket.recv.assert_has_calls([
        call(socket._bufsize)
    ])

    # And the buffer should contain the data from the server
    socket.fd.read().should.equal(b"after error")

    # And connect was called
    real_socket.connect.called.should.be.true
Example #5
0
    def test_load_df_with_data_types(self, mock_run_cli):
        ord_dict = OrderedDict()
        ord_dict['b'] = [True]
        ord_dict['i'] = [-1]
        ord_dict['t'] = [1]
        ord_dict['f'] = [0.0]
        ord_dict['c'] = ['c']
        ord_dict['M'] = [datetime.datetime(2018, 1, 1)]
        ord_dict['O'] = [object()]
        ord_dict['S'] = [b'STRING']
        ord_dict['U'] = ['STRING']
        ord_dict['V'] = [None]
        df = pd.DataFrame(ord_dict)

        hook = MockHiveCliHook()
        hook.load_df(df, 't')

        query = """
            CREATE TABLE IF NOT EXISTS t (
                `b` BOOLEAN,
                `i` BIGINT,
                `t` BIGINT,
                `f` DOUBLE,
                `c` STRING,
                `M` TIMESTAMP,
                `O` STRING,
                `S` STRING,
                `U` STRING,
                `V` STRING)
            ROW FORMAT DELIMITED
            FIELDS TERMINATED BY ','
            STORED AS textfile
            ;
        """
        call(
            self, mock_run_cli.call_args_list[0][0][0], query)
Example #6
0
    def test_load_file_without_create_table(self, mock_run_cli):
        filepath = "/path/to/input/file"
        table = "output_table"

        hook = MockHiveCliHook()
        hook.load_file(filepath=filepath, table=table, create=False)

        query = (
            "LOAD DATA LOCAL INPATH '{filepath}' "
            "OVERWRITE INTO TABLE {table} ;\n"
            .format(filepath=filepath, table=table)
        )
        calls = [
            call(query)
        ]
        mock_run_cli.assert_has_calls(calls, any_order=True)