Exemple #1
0
 def test_ha_client_socket_timeout(self):
     e = socket.timeout
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemple #2
0
 def test_ha_client_standby_errror(self):
     e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar")
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
    def _read_config_snakebiterc(self,
                                 path=os.path.join(os.path.expanduser('~'),
                                                   '.snakebiterc')):
        old_version_info = "You're are using snakebite %s with Trash support together with old snakebiterc, please update/remove your %s file. By default Trash is %s." % (
            path, version(),
            'disabled' if not self.configs['use_trash'] else 'enabled')
        with open(path) as config_file:
            configs = json.load(config_file)

        if isinstance(configs, list):
            # Version 1: List of namenodes
            # config is a list of namenode(s) - possibly HA
            for config in configs:
                nn = Namenode(
                    config['namenode'],
                    self.__use_cl_port_first(
                        config.get('port', Namenode.DEFAULT_PORT)),
                    config.get('version', Namenode.DEFAULT_VERSION))
                self.namenodes.append(nn)
            if self.__usetrash_unset():
                # commandline setting has higher priority
                print_info(old_version_info)
                # There's no info about Trash in version 1, use default policy:
                self.args.usetrash = self.configs['use_trash']
        elif isinstance(configs, dict):
            # Version 2: {}
            # Can be either new configuration or just one namenode
            # which was the very first configuration syntax
            if 'config_version' in configs:
                # Config version => 2
                for nn_config in configs['namenodes']:
                    nn = Namenode(
                        nn_config['host'],
                        self.__use_cl_port_first(
                            nn_config.get('port', Namenode.DEFAULT_PORT)),
                        nn_config.get('version', Namenode.DEFAULT_VERSION))
                    self.namenodes.append(nn)

                if self.__usetrash_unset():
                    # commandline setting has higher priority
                    self.args.usetrash = configs.get("use_trash",
                                                     self.configs['use_trash'])

                self.user = configs.get("user")
            else:
                # config is a single namenode - no HA
                self.namenodes.append(
                    Namenode(
                        configs['namenode'],
                        self.__use_cl_port_first(
                            configs.get('port', Namenode.DEFAULT_PORT)),
                        configs.get('version', Namenode.DEFAULT_VERSION)))
                if self.__usetrash_unset():
                    # commandline setting has higher priority
                    print_info(old_version_info)
                    self.args.usetrash = self.configs['use_trash']
        else:
            print_error_exit(
                "Config retrieved from %s is corrupted! Remove it!" % path)
Exemple #4
0
 def test_ha_client_ehostunreach_socket_error(self):
     e = socket.error
     e.errno = errno.EHOSTUNREACH
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemple #5
0
 def test_ha_client_econnrefused_socket_error(self):
     e = SocketError
     e.errno = errno.ECONNREFUSED
     mocked_client_cat = Mock(side_effect=e)
     ha_client = HAClient([Namenode("foo"), Namenode("bar")])
     ha_client.cat = HAClient._ha_gen_method(mocked_client_cat)
     cat_result_gen = ha_client.cat(ha_client, ['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
Exemple #6
0
 def test_ha_client_retry(self, rpc_call):
     retry_attempts = 3
     e = RequestError("org.apache.hadoop.ipc.RetriableException foo bar")
     rpc_call.side_effect = e
     nns = [Namenode("foo"), Namenode("bar")]
     ha_client = HAClient(nns, max_retries=retry_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(RequestError, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1 + retry_attempts)
Exemple #7
0
 def test_ha_client_failover_retry_for_exception(self, rpc_call):
     failover_attempts = 3
     e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar")
     rpc_call.side_effect = e
     nns = [Namenode("foo", 8020), Namenode("bar", 8020)]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1 + failover_attempts)
Exemple #8
0
 def test_ha_client_failover_retry(self, rpc_call):
     failover_attempts = 3
     e = socket.timeout
     e.message = "socket.timeout"
     rpc_call.side_effect = e
     nns = [Namenode("foo"), Namenode("bar")]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1 + failover_attempts)
Exemple #9
0
 def test_ha_client_retry2(self, get_connection):
     retry_attempts = 2
     e = RequestError("org.apache.hadoop.ipc.RetriableException foo bar")
     get_connection.side_effect = e
     nns = [Namenode("foo", 8020), Namenode("bar", 8020)]
     ha_client = HAClient(nns, max_retries=retry_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(RequestError, all, cat_result_gen)
     calls = [call("foo", 8020), call("foo", 8020), call("foo", 8020)]
     get_connection.assert_has_calls(calls)
Exemple #10
0
 def test_ha_client_failover_retry_for_exception2(self, get_connection):
     failover_attempts = 2
     e = RequestError("org.apache.hadoop.ipc.StandbyException foo bar")
     get_connection.side_effect = e
     nns = [Namenode("foo"), Namenode("bar")]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     calls = [call("foo", 8020), call("bar", 8020), call("foo", 8020)]
     get_connection.assert_has_calls(calls)
Exemple #11
0
 def test_ha_client_failover_retry2(self, get_connection):
     failover_attempts = 2
     e = socket.timeout
     e.message = "socket.timeout"
     get_connection.side_effect = e
     nns = [Namenode("foo", 8020), Namenode("bar", 8020)]
     ha_client = HAClient(nns, max_failovers=failover_attempts)
     cat_result_gen = ha_client.cat(['foobar'])
     self.assertRaises(OutOfNNException, all, cat_result_gen)
     calls = [call("foo", 8020), call("bar", 8020), call("foo", 8020)]
     get_connection.assert_has_calls(calls)
def get_snakebite_hdfs_client():
    """
    获得 snakebite库的HDFS Client
    :return: snakebite HDFS Client
    """
    n1 = Namenode("hadoop101", 9000)
    n2 = Namenode("hadoop102", 9000)
    client = HAClient([n1, n2],
                      effective_user="******",
                      sock_request_timeout=10000000000)
    return client
    def _read_config_cl(self):
        ''' Check if any directory arguments contain hdfs://'''
        dirs_to_check = self.__get_all_directories()
        hosts, ports = [], []
        for path in dirs_to_check:
            if path.startswith('hdfs://'):
                parse_result = urlparse(path)
                hosts.append(parse_result.hostname)
                ports.append(parse_result.port)

        # remove duplicates and None from (hosts + self.args.namenode)
        hosts = list(filter(lambda x: x != None, set(hosts + [self.args.namenode])))
        if len(hosts) > 1:
            print_error_exit('Conficiting namenode hosts in commandline arguments, hosts: %s' % str(hosts))

        ports = list(filter(lambda x: x != None, set(ports + [self.args.port])))
        if len(ports) > 1:
            print_error_exit('Conflicting namenode ports in commandline arguments, ports: %s' % str(ports))

        # Store port from CL in arguments - CL port has the highest priority
        if len(ports) == 1:
            self.args.port = ports[0]

        # do we agree on one namenode?
        if len(hosts) == 1 and len(ports) <= 1:
            self.args.namenode = hosts[0]
            self.args.port = ports[0] if len(ports) == 1 else Namenode.DEFAULT_PORT
            self.namenodes.append(Namenode(self.args.namenode, self.args.port))
            # we got the info from CL -> check if use_trash is set - if not use default policy:
            if self.__usetrash_unset():
                self.args.usetrash = self.configs['use_trash']
            return True
        else:
            return False
Exemple #14
0
 def test_response_error_no_client_retry(self, rpc_call):
     retry_attempts = 3
     e = RpcResponseError("Response read error")
     rpc_call.side_effect = e
     nns = [Namenode("foo")]
     ha_client = HAClient(nns, max_retries=retry_attempts)
     cat_result_gen = ha_client.rename(['foobar'], 'foo')
     self.assertRaises(RpcResponseError, all, cat_result_gen)
     self.assertEquals(rpc_call.call_count, 1)
Exemple #15
0
def __create_hdfs_client__():
    try:
        namenode_conf = os.path.dirname(
            os.path.abspath(__file__)) + '/../conf/namenode.conf'
        config_dict = config_parse.config_parse(namenode_conf)
        if 'namenode' not in config_dict or 'host' not in config_dict['namenode'] or \
                'port' not in config_dict['namenode'] or 'second_namenode' not in config_dict or \
                'host' not in config_dict['second_namenode'] or 'port' not in config_dict['second_namenode']:
            logger.error('namenode config file:[%s] invalid' % namenode_conf)
            sys.exit(2)
        namenode_host = config_dict['namenode']['host']
        namenode_port = int(config_dict['namenode']['port'])
        second_namenode_host = config_dict['second_namenode']['host']
        second_namenode_port = int(config_dict['second_namenode']['port'])

        namenode = Namenode(namenode_host, namenode_port)
        second_namenode = Namenode(second_namenode_host, second_namenode_port)
        return HAClient([namenode, second_namenode], use_trash=True)
    except Exception, e:
        logger.error('create hdfs client exception:[%s]' % str(e))
        sys.exit(2)
Exemple #16
0
    def read_config(self):
        self.configs = HDFSConfig.get_external_config()

        # Try to retrieve namenode config from within CL arguments
        if self._read_config_cl():
            return

        config_file = os.path.join(os.path.expanduser('~'), '.snakebiterc')

        if os.path.exists(config_file):
            #if ~/.snakebiterc exists - read config from it
            self._read_config_snakebiterc()
        elif os.path.exists('/etc/snakebiterc'):
            self._read_config_snakebiterc('/etc/snakebiterc')
        else:
            # if configs from HDFS config files exist and contain something
            if self.configs:
                for config in self.configs['namenodes']:
                    nn = Namenode(config['namenode'],
                                  self.__use_cl_port_first(config['port']))
                    self.namenodes.append(nn)
                if self.__usetrash_unset():
                    self.args.usetrash = self.configs['use_trash']
                self.use_sasl = self.configs['use_sasl']

        if len(self.namenodes):
            return
        else:
            print(
                "No ~/.snakebiterc found, no HADOOP_HOME set and no -n and -p provided"
            )
            print("Tried to find core-site.xml in:")
            for core_conf_path in HDFSConfig.core_try_paths:
                print(" - %s" % core_conf_path)
            print("Tried to find hdfs-site.xml in:")
            for hdfs_conf_path in HDFSConfig.hdfs_try_paths:
                print(" - %s" % hdfs_conf_path)
            print(
                "\nYou can manually create ~/.snakebiterc with the following content:"
            )
            print('{')
            print('  "config_version": 2,')
            print('  "use_trash": true,')
            print('  "namenodes": [')
            print('    {"host": "namenode-ha1", "port": %d, "version": %d},' %
                  (Namenode.DEFAULT_PORT, Namenode.DEFAULT_VERSION))
            print('    {"host": "namenode-ha2", "port": %d, "version": %d}' %
                  (Namenode.DEFAULT_PORT, Namenode.DEFAULT_VERSION))
            print('  ]')
            print('}')

            sys.exit(1)
Exemple #17
0
    def get_client(self, instance):

        if 'namenode' in instance:
            # backward compatibility for old style configuration of that check
            host, port = instance['namenode'], instance.get(
                'port', DEFAULT_PORT)
            return snakebite.client.Client(host, port)

        if type(instance['namenodes']) != list or len(
                instance['namenodes']) == 0:
            raise ValueError(
                '"namenodes parameter should be a list of dictionaries.')

        for namenode in instance['namenodes']:
            if type(namenode) != dict:
                raise ValueError(
                    '"namenodes parameter should be a list of dictionaries.')

            if "url" not in namenode:
                raise ValueError(
                    'Each namenode should specify a "url" parameter.')

        if len(instance['namenodes']) == 1:
            host, port = instance['namenodes'][0]['url'], instance[
                'namenodes'][0].get('port', DEFAULT_PORT)
            return snakebite.client.Client(host, port)

        else:
            # We are running on HA mode
            if Namenode is None:
                # We are running snakebite 1.x which is not compatible with the HA mode
                # Let's display a warning and use regular mode
                self.warning(
                    "HA Mode is not available with snakebite < 2.2.0"
                    "Upgrade to the latest version of snakebiteby running: "
                    "sudo /opt/datadog-agent/embedded/bin/pip install --upgrade snakebite"
                )

                host, port = instance['namenodes'][0]['url'], instance[
                    'namenodes'][0].get('port', DEFAULT_PORT)
                return snakebite.client.Client(host, port)
            else:
                self.log.debug("Running in HA Mode")
                nodes = []
                for namenode in instance['namenodes']:
                    nodes.append(
                        Namenode(namenode['url'],
                                 namenode.get('port', DEFAULT_PORT)))

                return snakebite.client.HAClient(nodes)
Exemple #18
0
    def get_client(self, instance):

        if 'namenode' in instance:
            host, port = instance['namenode'], instance.get(
                'port', DEFAULT_PORT)
            return snakebite.client.Client(host, port)

        if type(instance['namenodes']) != list or len(
                instance['namenodes']) == 0:
            raise ValueError(
                '"namenodes parameter should be a list of dictionaries.')

        for namenode in instance['namenodes']:
            if type(namenode) != dict:
                raise ValueError(
                    '"namenodes parameter should be a list of dictionaries.')

            if "url" not in namenode:
                raise ValueError(
                    'Each namenode should specify a "url" parameter.')

        if len(instance['namenodes']) == 1:
            host, port = instance['namenodes'][0]['url'], instance[
                'namenodes'][0].get('port', DEFAULT_PORT)
            return snakebite.client.Client(host, port)

        else:
            if Namenode is None:
                self.warning(
                    "HA Mode is not available with snakebite < 2.2.0"
                    "Upgrade to the latest version of snakebiteby running: "
                    "sudo /opt/datamonitor-agent/embedded/bin/pip install --upgrade snakebite"
                )

                host, port = instance['namenodes'][0]['url'], instance[
                    'namenodes'][0].get('port', DEFAULT_PORT)
                return snakebite.client.Client(host, port)
            else:
                self.log.debug("Running in HA Mode")
                nodes = []
                for namenode in instance['namenodes']:
                    nodes.append(
                        Namenode(namenode['url'],
                                 namenode.get('port', DEFAULT_PORT)))

                return snakebite.client.HAClient(nodes)
Exemple #19
0
 def __init__(self,
              namenode,
              path,
              use_trash=False,
              effective_user=None,
              use_sasl=True,
              hdfs_namenode_principal='hdfs',
              use_datanode_hostname=False):
     from snakebite.client import HAClient
     from snakebite.namenode import Namenode
     self.path = path
     namenodes = [Namenode(namenode)]
     self._client = HAClient(
         namenodes,
         use_trash=use_trash,
         effective_user=effective_user,
         use_sasl=use_sasl,
         hdfs_namenode_principal=hdfs_namenode_principal,
         use_datanode_hostname=use_datanode_hostname)
Exemple #20
0
import urllib, time
import urllib2
import string, datetime
import xml.sax
import xml.sax.handler
import sys, time, os, datetime
from subprocess import *
import threading
import socket
import requests
import json

from snakebite.client import HAClient
from snakebite.namenode import Namenode

n1 = Namenode("namenode-1", 8022)
n2 = Namenode("namenode-2", 8022)

ENDPOINT = "hive-db-monitor"  # Unique identifier
STEP = 600
FALCON_AGENT_URL = "http://192.168.17.13:1988/v1/push"

this_timestamp = int(time.time())
d = datetime.datetime.now()
item = {}
item['endpoint'] = 'hive-db-monitor-folder'
item['metric'] = ''
item['timestamp'] = this_timestamp
item['step'] = STEP
item['counterType'] = 'GAUGE'
item['tags'] = ''
def ha_test():
    n1 = Namenode("192.168.24.137", 9990)
    n2 = Namenode("192.168.24.138", 9990)
    client = HAClient([n1, n2])
    for x in client.ls(['/']):
        print x