Ejemplo n.º 1
0
    def __init__(self, solr_hosts, zookeeper_hosts=None, timeout=15):
        self.client = requests.Session()
        self.master_hosts = solr_hosts
        self.current_hosts = solr_hosts
        self.servers = []
        self.timeout = timeout
        if zookeeper_hosts is not None:
            self.zookeeper = Zookeeper(zookeeper_hosts)
        else:
            self.zookeeper = None

        self.last_error = None
        # time to revert to old host list (in minutes) after an error
        self.check_hosts = 5
Ejemplo n.º 2
0
class SolrRequest(object):
    """
    Handle requests to SOLR and response from SOLR
    """
    def __init__(self, solr_hosts, zookeeper_hosts=None, timeout=15):
        self.client = requests.Session()
        self.master_hosts = solr_hosts
        self.current_hosts = solr_hosts
        self.servers = []
        self.timeout = timeout
        if zookeeper_hosts is not None:
            self.zookeeper = Zookeeper(zookeeper_hosts)
        else:
            self.zookeeper = None

        self.last_error = None
        # time to revert to old host list (in minutes) after an error
        self.check_hosts = 5

    def request(self, path, params, method, body=None):
        """
        Prepare data and send request to SOLR servers
        """
        def handle_error():
            # if the sdk knows where zookeeper lives
            if self.zookeeper is not None:
                self.last_error = time.time()
                # get the current list of active nodes from zookeeper
                curr_hosts = self.zookeeper.get_active_hosts()
                if len(curr_hosts) == 0:
                    # all nodes are down - so raise an error
                    raise SolrError("SOLR reporting all nodes as down")
                else:
                    # if all nodes are not down, update the current_hosts list
                    # and the servers list
                    self.current_hosts = curr_hosts
                    self.servers = curr_hosts

            host = self.servers.pop(0)
            return make_request(host, path)

        headers = {'content-type': 'application/json'}
        extraparams = {'wt': 'json', 'omitHeader': 'true', 'json.nl': 'map'}

        if params is None:
            params = {}

        params.update(extraparams)

        # if there hasn't been an error in 5 minutes, reset the solr_hosts
        if self.last_error is not None and \
                (time.time() - self.last_error) % 60 > self.check_hosts:

            self.current_hosts = self.master_hosts
            self.last_error = None

        def make_request(host, path):
            fullpath = urljoin(host, path)
            try:
                response = self.client.request(method,
                                               fullpath,
                                               params=params,
                                               headers=headers,
                                               data=body,
                                               timeout=self.timeout)

                # Connected to the node, but didn't get a successful response
                if (len(self.servers) > 0 and hasattr(response, 'status_code')
                        and response.status_code != 200):
                    # try with another node
                    handle_error()

                return response

            # Didn't successfully connect to the node
            except ConnectionError as e:
                if len(self.servers) > 0:
                    # try with another node
                    handle_error()

                raise SolrError(str(e))

        self.servers = list(self.current_hosts)
        if len(self.servers) == 0:
            handle_error()

        random.shuffle(self.servers)
        host = self.servers.pop(0)

        response = make_request(host, path)
        return process_response(response)

    def post(self, path, params=None, body=None):
        """
        Send a POST request to the SOLR servers
        """
        return self.request(path, params, 'POST', body=body)

    def get(self, path, params=None):
        """
        Send a GET request to the SOLR servers
        """
        return self.request(path, params, 'GET')
Ejemplo n.º 3
0
    def __init__(self,
                 solr_hosts,
                 solr_collection,
                 zookeeper_hosts=None,
                 timeout=15,
                 zookeeper_timeout=5):
        """
        Do all the interactions with SOLR server
        (e.g. update, select, get and delete)

        :param solr_hosts: the hosts for SOLR.
        :type server: str

        :param solr_collection: the name of the collection in SOLR.
        :type solr_collection: str

        :param zookeeper_hosts: the hosts for zookeeper.
        :type zookeeper_hosts: str

        :param timeout: the timeout for request to SOLR.
        :type timeout: int

        """

        if solr_hosts is None and zookeeper_hosts is not None:
            logger.info('Getting solr hosts from zookeeper for collection %s',
                        solr_collection)
            zk = Zookeeper(zookeeper_hosts, zookeeper_timeout)
            solr_hosts = zk.get_active_hosts(collection_name=solr_collection)

        if solr_hosts is None or solr_collection is None:
            logger.error('Neither solr_hosts nor solr_collection has been set')
            raise solr_errors.SolrError(
                "Either solr_hosts or solr_collection can not be None")

        if not isinstance(solr_hosts, list):
            solr_hosts = solr_hosts.split(",")

        if zookeeper_hosts is not None:
            hostnames, sep, chroot = zookeeper_hosts.rpartition('/')

            # If hostnames is empty then there is no chroot. Set it to empty.
            if not hostnames:
                chroot = ''
            else:
                chroot = '/%s' % chroot

            logger.debug('Using solr via zookeeper at chroot %s', chroot)

            self.zookeeper_hosts = [
                "http://%s%s" % (
                    host,
                    chroot,
                ) for host in zookeeper_hosts.split(",")
            ]

            logger.info('Connected to zookeeper hosts at %s',
                        self.zookeeper_hosts)

        else:
            logger.debug('Not using zookeeper for SolrCloud')
            self.zookeeper_hosts = None

        logger.info('Connected to solr hosts %s', solr_hosts)

        self.solr_hosts = [_format_solr_url(host) for host in solr_hosts]

        self.solr_collection = solr_collection

        self.client = SolrRequest(solr_hosts=self.solr_hosts,
                                  zookeeper_hosts=zookeeper_hosts,
                                  timeout=timeout)
Ejemplo n.º 4
0
 def zookeeper(self):
     if self._zookeeper is None and self.zookeeper_hosts:
         self._zookeeper = Zookeeper(self.zookeeper_hosts,
                                     self.zookeeper_timeout)
     return self._zookeeper
Ejemplo n.º 5
0
class TestZookeeper(unittest.TestCase):

    zook_client = Zookeeper("http://*****:*****@patch('kazoo.client.KazooClient')
    def test_valid_clusterstate_file(self, mock_kazoo):

        class MockKazoo(object):
            def __init__(self):
                pass

            def start(self, *args, **kwargs):
                return True

            def get_children(self, *args, **kwargs):
                return ['test_collection_one']

            def get(self, *args, **kwargs):
                if args and args[0] == '/aliases.json':
                    data = None
                else:
                    data =  json.dumps({
                        'test_collection_one':{
                            'shards':{
                                'shard1':{
                                    'range': '80000000-7fffffff',
                                    'state': 'active',
                                    'replicas':{
                                        'core_node1':{
                                            'state': 'active',
                                            'core': 'test_collection_one_shard1_replica2',
                                            'node_name': '127.0.0.1:8080_solr',
                                            'base_url': 'http://127.0.0.1:8080/solr',
                                            'leader': 'true',
                                        },
                                        'core_node2':{
                                            'state': 'active',
                                            'core': 'test_collection_one_shard1_replica1',
                                            'node_name': '127.0.0.1:9090_solr',
                                            'base_url': 'http://127.0.0.1:9090/solr',
                                        }
                                    }
                                }
                            },
                            'maxShardsPerNode': '1',
                            'router':{'name': 'compositeId'},
                            'replicationFactor': '2',
                            'autoAddReplicas': 'false'}
                        })

                return (data,) # Note that this is a tuple on purpose

            def stop(self):
                return True

        mock_kazoo.return_value = MockKazoo()
        result = self.zook_client.get_active_hosts()
        self.assertEqual(
            result.sort(),
            [u'http://127.0.0.1:8080', u'http://127.0.0.1:9090'].sort()
        )

    @patch('kazoo.client.KazooClient')
    def test_no_clusterstate_file(self, mock_kazoo):

        class MockKazoo(object):
            def __init__(self):
                pass

            def start(self, *args, **kwargs):
                return True

            def get_children(self, *args, **kwargs):
                return []

            def get(self, *args, **kwargs):
                return None

            def stop(self):
                return True

        mock_kazoo.return_value = MockKazoo()
        result = self.zook_client.get_active_hosts()
        self.assertEqual(result, [])

    @patch('kazoo.client.KazooClient')
    def test_bad_connection(self, mock_kazoo):

        class MockKazoo(object):
            def __init__(self):
                pass

            def start(self, *args, **kwargs):
                raise requests.exceptions.ConnectionError

            def get_children(self, *args, **kwargs):
                return []

            def get(self, *args, **kwargs):
                return None

            def stop(self):
                return True

        mock_kazoo.return_value = MockKazoo()
        result = self.zook_client.get_active_hosts()
        self.assertEqual(result, [])

    @patch('kazoo.client.KazooClient')
    def test_get_aliases(self, mock_kazoo):
        class MockKazoo(object):
            def __init__(self):
                pass

            def start(self, *args, **kwargs):
                return True

            def get_children(self, *args, **kwargs):
                return ['test_collection_one']

            def get(self, *args, **kwargs):
                data = ''
                if args and args[0] == '/aliases.json':
                    data = json.dumps({
                        'collection': {
                            'my_alias': 'test_collection_one',
                        }
                    })
                else:
                    data = json.dumps({
                        'test_collection_one':{
                            'shards':{
                                'shard1':{
                                    'range': '80000000-7fffffff',
                                    'state': 'active',
                                    'replicas':{
                                        'core_node1':{
                                            'state': 'active',
                                            'core': 'test_collection_one_shard1_replica2',
                                            'node_name': '127.0.0.1:8080_solr',
                                            'base_url': 'http://127.0.0.1:8080/solr',
                                            'leader': 'true',
                                        },
                                        'core_node2':{
                                            'state': 'active',
                                            'core': 'test_collection_one_shard1_replica1',
                                            'node_name': '127.0.0.1:9090_solr',
                                            'base_url': 'http://127.0.0.1:9090/solr',
                                        }
                                    }
                                }
                            },
                            'maxShardsPerNode': '1',
                            'router':{'name': 'compositeId'},
                            'replicationFactor': '2',
                            'autoAddReplicas': 'false'}
                        })

                return (data,) # Note that this is a tuple on purpose

            def stop(self):
                return True

        mock_kazoo.return_value = MockKazoo()
        result = self.zook_client._get_active_hosts()
        print(result)
        self.assertEqual(
            result['test_collection_one'],
            result['my_alias']
        )