def test_get_conn(self, mock_get_connections, mock_insecure_client): mock_insecure_client.side_effect = [HdfsError('Error'), mock_insecure_client.return_value] conn = self.webhdfs_hook.get_conn() mock_insecure_client.assert_has_calls([ call('http://{host}:{port}'.format(host=connection.host, port=connection.port), user=connection.login) for connection in mock_get_connections.return_value ]) mock_insecure_client.return_value.status.assert_called_once_with('/') self.assertEqual(conn, mock_insecure_client.return_value)
def _checksum_from_hdfs_file(hdfs_client, hdfs_path): """ Generates the md5 hash for a file in the HDFS. :param hdfs_client: HDFS client :param hdfs_path: file in the HDFS. :return: md5 hash of the file """ status_hdfs = hdfs_client.status(hdfs_path, strict=False) if status_hdfs is not None and status_hdfs['type'] == 'FILE': chunk_size = 64 * 1024 hash_md5 = hashlib.md5() with hdfs_client.read(hdfs_path) as file: for chunk in iter(lambda: file.read(chunk_size), b""): hash_md5.update(chunk) return hash_md5.hexdigest() raise HdfsError("{0} provided is not file.".format(hdfs_path))
def test_get_conn(self, socket_mock, mock_get_connections, mock_insecure_client): mock_insecure_client.side_effect = [ HdfsError('Error'), mock_insecure_client.return_value ] socket_mock.socket.return_value.connect_ex.return_value = 0 conn = self.webhdfs_hook.get_conn() mock_insecure_client.assert_has_calls([ call( f'http://{connection.host}:{connection.port}', user=connection.login, ) for connection in mock_get_connections.return_value ]) mock_insecure_client.return_value.status.assert_called_once_with('/') assert conn == mock_insecure_client.return_value