def test_ha_client_socket_timeout(self): e = socket.timeout mocked_client_cat = Mock(side_effect=e) ha_client = HAClient([Namenode("foo"), Namenode("bar")]) ha_client.cat = HAClient._ha_gen_method(mocked_client_cat) cat_result_gen = ha_client.cat(ha_client, ['foobar']) self.assertRaises(OutOfNNException, all, cat_result_gen)
def test_ha_client_standby_errror(self): e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar") mocked_client_cat = Mock(side_effect=e) ha_client = HAClient([Namenode("foo"), Namenode("bar")]) ha_client.cat = HAClient._ha_gen_method(mocked_client_cat) cat_result_gen = ha_client.cat(ha_client, ['foobar']) self.assertRaises(OutOfNNException, all, cat_result_gen)
def _read_config_snakebiterc(self, path=os.path.join(os.path.expanduser('~'), '.snakebiterc')): old_version_info = "You're are using snakebite %s with Trash support together with old snakebiterc, please update/remove your %s file. By default Trash is %s." % ( path, version(), 'disabled' if not self.configs['use_trash'] else 'enabled') with open(path) as config_file: configs = json.load(config_file) if isinstance(configs, list): # Version 1: List of namenodes # config is a list of namenode(s) - possibly HA for config in configs: nn = Namenode( config['namenode'], self.__use_cl_port_first( config.get('port', Namenode.DEFAULT_PORT)), config.get('version', Namenode.DEFAULT_VERSION)) self.namenodes.append(nn) if self.__usetrash_unset(): # commandline setting has higher priority print_info(old_version_info) # There's no info about Trash in version 1, use default policy: self.args.usetrash = self.configs['use_trash'] elif isinstance(configs, dict): # Version 2: {} # Can be either new configuration or just one namenode # which was the very first configuration syntax if 'config_version' in configs: # Config version => 2 for nn_config in configs['namenodes']: nn = Namenode( nn_config['host'], self.__use_cl_port_first( nn_config.get('port', Namenode.DEFAULT_PORT)), nn_config.get('version', Namenode.DEFAULT_VERSION)) self.namenodes.append(nn) if self.__usetrash_unset(): # commandline setting has higher priority self.args.usetrash = configs.get("use_trash", self.configs['use_trash']) self.user = configs.get("user") else: # config is a single namenode - no HA self.namenodes.append( Namenode( configs['namenode'], self.__use_cl_port_first( configs.get('port', Namenode.DEFAULT_PORT)), configs.get('version', Namenode.DEFAULT_VERSION))) if self.__usetrash_unset(): # commandline setting has higher priority print_info(old_version_info) self.args.usetrash = self.configs['use_trash'] else: print_error_exit( "Config retrieved from %s is corrupted! Remove it!" % path)
def test_ha_client_ehostunreach_socket_error(self): e = socket.error e.errno = errno.EHOSTUNREACH mocked_client_cat = Mock(side_effect=e) ha_client = HAClient([Namenode("foo"), Namenode("bar")]) ha_client.cat = HAClient._ha_gen_method(mocked_client_cat) cat_result_gen = ha_client.cat(ha_client, ['foobar']) self.assertRaises(OutOfNNException, all, cat_result_gen)
def test_ha_client_econnrefused_socket_error(self): e = SocketError e.errno = errno.ECONNREFUSED mocked_client_cat = Mock(side_effect=e) ha_client = HAClient([Namenode("foo"), Namenode("bar")]) ha_client.cat = HAClient._ha_gen_method(mocked_client_cat) cat_result_gen = ha_client.cat(ha_client, ['foobar']) self.assertRaises(OutOfNNException, all, cat_result_gen)
def test_ha_client_retry(self, rpc_call): retry_attempts = 3 e = RequestError("org.apache.hadoop.ipc.RetriableException foo bar") rpc_call.side_effect = e nns = [Namenode("foo"), Namenode("bar")] ha_client = HAClient(nns, max_retries=retry_attempts) cat_result_gen = ha_client.cat(['foobar']) self.assertRaises(RequestError, all, cat_result_gen) self.assertEquals(rpc_call.call_count, 1 + retry_attempts)
def test_ha_client_failover_retry_for_exception(self, rpc_call): failover_attempts = 3 e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar") rpc_call.side_effect = e nns = [Namenode("foo", 8020), Namenode("bar", 8020)] ha_client = HAClient(nns, max_failovers=failover_attempts) cat_result_gen = ha_client.cat(['foobar']) self.assertRaises(OutOfNNException, all, cat_result_gen) self.assertEquals(rpc_call.call_count, 1 + failover_attempts)
def test_ha_client_failover_retry(self, rpc_call): failover_attempts = 3 e = socket.timeout e.message = "socket.timeout" rpc_call.side_effect = e nns = [Namenode("foo"), Namenode("bar")] ha_client = HAClient(nns, max_failovers=failover_attempts) cat_result_gen = ha_client.cat(['foobar']) self.assertRaises(OutOfNNException, all, cat_result_gen) self.assertEquals(rpc_call.call_count, 1 + failover_attempts)
def test_ha_client_retry2(self, get_connection): retry_attempts = 2 e = RequestError("org.apache.hadoop.ipc.RetriableException foo bar") get_connection.side_effect = e nns = [Namenode("foo", 8020), Namenode("bar", 8020)] ha_client = HAClient(nns, max_retries=retry_attempts) cat_result_gen = ha_client.cat(['foobar']) self.assertRaises(RequestError, all, cat_result_gen) calls = [call("foo", 8020), call("foo", 8020), call("foo", 8020)] get_connection.assert_has_calls(calls)
def test_ha_client_failover_retry_for_exception2(self, get_connection): failover_attempts = 2 e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar") get_connection.side_effect = e nns = [Namenode("foo"), Namenode("bar")] ha_client = HAClient(nns, max_failovers=failover_attempts) cat_result_gen = ha_client.cat(['foobar']) self.assertRaises(OutOfNNException, all, cat_result_gen) calls = [call("foo", 8020), call("bar", 8020), call("foo", 8020)] get_connection.assert_has_calls(calls)
def test_ha_client_failover_retry2(self, get_connection): failover_attempts = 2 e = socket.timeout e.message = "socket.timeout" get_connection.side_effect = e nns = [Namenode("foo", 8020), Namenode("bar", 8020)] ha_client = HAClient(nns, max_failovers=failover_attempts) cat_result_gen = ha_client.cat(['foobar']) self.assertRaises(OutOfNNException, all, cat_result_gen) calls = [call("foo", 8020), call("bar", 8020), call("foo", 8020)] get_connection.assert_has_calls(calls)
def get_snakebite_hdfs_client(): """ 获得 snakebite库的HDFS Client :return: snakebite HDFS Client """ n1 = Namenode("hadoop101", 9000) n2 = Namenode("hadoop102", 9000) client = HAClient([n1, n2], effective_user="******", sock_request_timeout=10000000000) return client
def _read_config_cl(self): ''' Check if any directory arguments contain hdfs://''' dirs_to_check = self.__get_all_directories() hosts, ports = [], [] for path in dirs_to_check: if path.startswith('hdfs://'): parse_result = urlparse(path) hosts.append(parse_result.hostname) ports.append(parse_result.port) # remove duplicates and None from (hosts + self.args.namenode) hosts = list(filter(lambda x: x != None, set(hosts + [self.args.namenode]))) if len(hosts) > 1: print_error_exit('Conficiting namenode hosts in commandline arguments, hosts: %s' % str(hosts)) ports = list(filter(lambda x: x != None, set(ports + [self.args.port]))) if len(ports) > 1: print_error_exit('Conflicting namenode ports in commandline arguments, ports: %s' % str(ports)) # Store port from CL in arguments - CL port has the highest priority if len(ports) == 1: self.args.port = ports[0] # do we agree on one namenode? if len(hosts) == 1 and len(ports) <= 1: self.args.namenode = hosts[0] self.args.port = ports[0] if len(ports) == 1 else Namenode.DEFAULT_PORT self.namenodes.append(Namenode(self.args.namenode, self.args.port)) # we got the info from CL -> check if use_trash is set - if not use default policy: if self.__usetrash_unset(): self.args.usetrash = self.configs['use_trash'] return True else: return False
def test_response_error_no_client_retry(self, rpc_call): retry_attempts = 3 e = RpcResponseError("Response read error") rpc_call.side_effect = e nns = [Namenode("foo")] ha_client = HAClient(nns, max_retries=retry_attempts) cat_result_gen = ha_client.rename(['foobar'], 'foo') self.assertRaises(RpcResponseError, all, cat_result_gen) self.assertEquals(rpc_call.call_count, 1)
def __create_hdfs_client__(): try: namenode_conf = os.path.dirname( os.path.abspath(__file__)) + '/../conf/namenode.conf' config_dict = config_parse.config_parse(namenode_conf) if 'namenode' not in config_dict or 'host' not in config_dict['namenode'] or \ 'port' not in config_dict['namenode'] or 'second_namenode' not in config_dict or \ 'host' not in config_dict['second_namenode'] or 'port' not in config_dict['second_namenode']: logger.error('namenode config file:[%s] invalid' % namenode_conf) sys.exit(2) namenode_host = config_dict['namenode']['host'] namenode_port = int(config_dict['namenode']['port']) second_namenode_host = config_dict['second_namenode']['host'] second_namenode_port = int(config_dict['second_namenode']['port']) namenode = Namenode(namenode_host, namenode_port) second_namenode = Namenode(second_namenode_host, second_namenode_port) return HAClient([namenode, second_namenode], use_trash=True) except Exception, e: logger.error('create hdfs client exception:[%s]' % str(e)) sys.exit(2)
def read_config(self): self.configs = HDFSConfig.get_external_config() # Try to retrieve namenode config from within CL arguments if self._read_config_cl(): return config_file = os.path.join(os.path.expanduser('~'), '.snakebiterc') if os.path.exists(config_file): #if ~/.snakebiterc exists - read config from it self._read_config_snakebiterc() elif os.path.exists('/etc/snakebiterc'): self._read_config_snakebiterc('/etc/snakebiterc') else: # if configs from HDFS config files exist and contain something if self.configs: for config in self.configs['namenodes']: nn = Namenode(config['namenode'], self.__use_cl_port_first(config['port'])) self.namenodes.append(nn) if self.__usetrash_unset(): self.args.usetrash = self.configs['use_trash'] self.use_sasl = self.configs['use_sasl'] if len(self.namenodes): return else: print( "No ~/.snakebiterc found, no HADOOP_HOME set and no -n and -p provided" ) print("Tried to find core-site.xml in:") for core_conf_path in HDFSConfig.core_try_paths: print(" - %s" % core_conf_path) print("Tried to find hdfs-site.xml in:") for hdfs_conf_path in HDFSConfig.hdfs_try_paths: print(" - %s" % hdfs_conf_path) print( "\nYou can manually create ~/.snakebiterc with the following content:" ) print('{') print(' "config_version": 2,') print(' "use_trash": true,') print(' "namenodes": [') print(' {"host": "namenode-ha1", "port": %d, "version": %d},' % (Namenode.DEFAULT_PORT, Namenode.DEFAULT_VERSION)) print(' {"host": "namenode-ha2", "port": %d, "version": %d}' % (Namenode.DEFAULT_PORT, Namenode.DEFAULT_VERSION)) print(' ]') print('}') sys.exit(1)
def get_client(self, instance): if 'namenode' in instance: # backward compatibility for old style configuration of that check host, port = instance['namenode'], instance.get( 'port', DEFAULT_PORT) return snakebite.client.Client(host, port) if type(instance['namenodes']) != list or len( instance['namenodes']) == 0: raise ValueError( '"namenodes parameter should be a list of dictionaries.') for namenode in instance['namenodes']: if type(namenode) != dict: raise ValueError( '"namenodes parameter should be a list of dictionaries.') if "url" not in namenode: raise ValueError( 'Each namenode should specify a "url" parameter.') if len(instance['namenodes']) == 1: host, port = instance['namenodes'][0]['url'], instance[ 'namenodes'][0].get('port', DEFAULT_PORT) return snakebite.client.Client(host, port) else: # We are running on HA mode if Namenode is None: # We are running snakebite 1.x which is not compatible with the HA mode # Let's display a warning and use regular mode self.warning( "HA Mode is not available with snakebite < 2.2.0" "Upgrade to the latest version of snakebiteby running: " "sudo /opt/datadog-agent/embedded/bin/pip install --upgrade snakebite" ) host, port = instance['namenodes'][0]['url'], instance[ 'namenodes'][0].get('port', DEFAULT_PORT) return snakebite.client.Client(host, port) else: self.log.debug("Running in HA Mode") nodes = [] for namenode in instance['namenodes']: nodes.append( Namenode(namenode['url'], namenode.get('port', DEFAULT_PORT))) return snakebite.client.HAClient(nodes)
def get_client(self, instance): if 'namenode' in instance: host, port = instance['namenode'], instance.get( 'port', DEFAULT_PORT) return snakebite.client.Client(host, port) if type(instance['namenodes']) != list or len( instance['namenodes']) == 0: raise ValueError( '"namenodes parameter should be a list of dictionaries.') for namenode in instance['namenodes']: if type(namenode) != dict: raise ValueError( '"namenodes parameter should be a list of dictionaries.') if "url" not in namenode: raise ValueError( 'Each namenode should specify a "url" parameter.') if len(instance['namenodes']) == 1: host, port = instance['namenodes'][0]['url'], instance[ 'namenodes'][0].get('port', DEFAULT_PORT) return snakebite.client.Client(host, port) else: if Namenode is None: self.warning( "HA Mode is not available with snakebite < 2.2.0" "Upgrade to the latest version of snakebiteby running: " "sudo /opt/datamonitor-agent/embedded/bin/pip install --upgrade snakebite" ) host, port = instance['namenodes'][0]['url'], instance[ 'namenodes'][0].get('port', DEFAULT_PORT) return snakebite.client.Client(host, port) else: self.log.debug("Running in HA Mode") nodes = [] for namenode in instance['namenodes']: nodes.append( Namenode(namenode['url'], namenode.get('port', DEFAULT_PORT))) return snakebite.client.HAClient(nodes)
def __init__(self, namenode, path, use_trash=False, effective_user=None, use_sasl=True, hdfs_namenode_principal='hdfs', use_datanode_hostname=False): from snakebite.client import HAClient from snakebite.namenode import Namenode self.path = path namenodes = [Namenode(namenode)] self._client = HAClient( namenodes, use_trash=use_trash, effective_user=effective_user, use_sasl=use_sasl, hdfs_namenode_principal=hdfs_namenode_principal, use_datanode_hostname=use_datanode_hostname)
import urllib, time import urllib2 import string, datetime import xml.sax import xml.sax.handler import sys, time, os, datetime from subprocess import * import threading import socket import requests import json from snakebite.client import HAClient from snakebite.namenode import Namenode n1 = Namenode("namenode-1", 8022) n2 = Namenode("namenode-2", 8022) ENDPOINT = "hive-db-monitor" # Unique identifier STEP = 600 FALCON_AGENT_URL = "http://192.168.17.13:1988/v1/push" this_timestamp = int(time.time()) d = datetime.datetime.now() item = {} item['endpoint'] = 'hive-db-monitor-folder' item['metric'] = '' item['timestamp'] = this_timestamp item['step'] = STEP item['counterType'] = 'GAUGE' item['tags'] = ''
def ha_test(): n1 = Namenode("192.168.24.137", 9990) n2 = Namenode("192.168.24.138", 9990) client = HAClient([n1, n2]) for x in client.ls(['/']): print x