Ejemplo n.º 1
0
    def test_locked_container_dbs(self):

        def run_test(num_locks, catch_503):
            container = 'container-%s' % uuid4()
            client.put_container(self.url, self.token, container)
            db_files = self._get_container_db_files(container)
            db_conns = []
            for i in range(num_locks):
                db_conn = connect(db_files[i])
                db_conn.execute('begin exclusive transaction')
                db_conns.append(db_conn)
            if catch_503:
                try:
                    client.delete_container(self.url, self.token, container)
                except client.ClientException as err:
                    self.assertEqual(err.http_status, 503)
                else:
                    self.fail("Expected ClientException but didn't get it")
            else:
                client.delete_container(self.url, self.token, container)

        proxy_conf = readconf(self.configs['proxy-server'],
                              section_name='app:proxy-server')
        node_timeout = int(proxy_conf.get('node_timeout', 10))
        pool = GreenPool()
        try:
            with Timeout(node_timeout + 5):
                pool.spawn(run_test, 1, False)
                pool.spawn(run_test, 2, True)
                pool.spawn(run_test, 3, True)
                pool.waitall()
        except Timeout as err:
            raise Exception(
                "The server did not return a 503 on container db locks, "
                "it just hangs: %s" % err)
Ejemplo n.º 2
0
    def test_locked_container_dbs(self):

        def run_test(num_locks, catch_503):
            container = 'container-%s' % uuid4()
            client.put_container(self.url, self.token, container)
            db_files = self._get_container_db_files(container)
            db_conns = []
            for i in range(num_locks):
                db_conn = connect(db_files[i])
                db_conn.execute('begin exclusive transaction')
                db_conns.append(db_conn)
            if catch_503:
                exc = None
                try:
                    client.delete_container(self.url, self.token, container)
                except client.ClientException as err:
                    exc = err
                self.assertEquals(exc.http_status, 503)
            else:
                client.delete_container(self.url, self.token, container)

        pool = GreenPool()
        try:
            with Timeout(15):
                pool.spawn(run_test, 1, False)
                pool.spawn(run_test, 2, True)
                pool.spawn(run_test, 3, True)
                pool.waitall()
        except Timeout as err:
            raise Exception(
                "The server did not return a 503 on container db locks, "
                "it just hangs: %s" % err)
    def test_locked_container_dbs(self):
        def run_test(num_locks, catch_503):
            container = 'container-%s' % uuid4()
            client.put_container(self.url, self.token, container)
            db_files = self._get_container_db_files(container)
            db_conns = []
            for i in range(num_locks):
                db_conn = connect(db_files[i])
                db_conn.execute('begin exclusive transaction')
                db_conns.append(db_conn)
            if catch_503:
                try:
                    client.delete_container(self.url, self.token, container)
                except client.ClientException as err:
                    self.assertEqual(err.http_status, 503)
                else:
                    self.fail("Expected ClientException but didn't get it")
            else:
                client.delete_container(self.url, self.token, container)

        pool = GreenPool()
        try:
            with Timeout(15):
                pool.spawn(run_test, 1, False)
                pool.spawn(run_test, 2, True)
                pool.spawn(run_test, 3, True)
                pool.waitall()
        except Timeout as err:
            raise Exception(
                "The server did not return a 503 on container db locks, "
                "it just hangs: %s" % err)
Ejemplo n.º 4
0
    def run(self, *args, **kwargs):
        try:
            self.logger.info('conscience agent: starting')

            pool = GreenPool(len(self.watchers))
            for watcher in self.watchers:
                pool.spawn(watcher.start)

            self.running = True
            while self.running:
                sleep(1)
                for w in self.watchers:
                    if w.failed:
                        self.watchers.remove(w)
                        self.logger.warn('restart watcher "%s"', w.name)
                        new_w = ServiceWatcher(self.conf, w.service)
                        self.watchers.append(new_w)
                        pool.spawn(new_w.start)

        except Exception as e:
            self.logger.error('ERROR in main loop %s', e)
            raise e
        finally:
            self.logger.warn('conscience agent: stopping')
            self.running = False
            self.stop_watchers()
Ejemplo n.º 5
0
    def test_connection_pool_timeout(self):
        orig_conn_pool = memcached.MemcacheConnPool
        try:
            connections = defaultdict(Queue)
            pending = defaultdict(int)
            served = defaultdict(int)

            class MockConnectionPool(orig_conn_pool):
                def get(self):
                    pending[self.server] += 1
                    conn = connections[self.server].get()
                    pending[self.server] -= 1
                    return conn

                def put(self, *args, **kwargs):
                    connections[self.server].put(*args, **kwargs)
                    served[self.server] += 1

            memcached.MemcacheConnPool = MockConnectionPool

            memcache_client = memcached.MemcacheRing(['1.2.3.4:11211',
                                                      '1.2.3.5:11211'],
                                                     io_timeout=0.5,
                                                     pool_timeout=0.1)

            p = GreenPool()
            for i in range(10):
                p.spawn(memcache_client.set, 'key', 'value')

            # let everyone block
            sleep(0)
            self.assertEqual(pending['1.2.3.5:11211'], 10)

            # hand out a couple slow connection
            mock_conn = MagicMock(), MagicMock()
            mock_conn[1].sendall = lambda x: sleep(0.2)
            connections['1.2.3.5:11211'].put(mock_conn)
            connections['1.2.3.5:11211'].put(mock_conn)

            # so far so good, everyone is still waiting
            sleep(0)
            self.assertEqual(pending['1.2.3.5:11211'], 8)
            self.assertEqual(len(memcache_client._errors['1.2.3.5:11211']), 0)

            # but they won't wait longer than pool_timeout
            mock_conn = MagicMock(), MagicMock()
            connections['1.2.3.4:11211'].put(mock_conn)
            connections['1.2.3.4:11211'].put(mock_conn)
            p.waitall()
            self.assertEqual(len(memcache_client._errors['1.2.3.5:11211']), 8)
            self.assertEqual(served['1.2.3.5:11211'], 2)
            self.assertEqual(len(memcache_client._errors['1.2.3.4:11211']), 0)
            self.assertEqual(served['1.2.3.4:11211'], 8)

            # and we never got more put in that we gave out
            self.assertEqual(connections['1.2.3.5:11211'].qsize(), 2)
            self.assertEqual(connections['1.2.3.4:11211'].qsize(), 2)
        finally:
            memcached.MemcacheConnPool = orig_conn_pool
Ejemplo n.º 6
0
    def test_connection_pool_timeout(self):
        orig_conn_pool = memcached.MemcacheConnPool
        try:
            connections = defaultdict(Queue)
            pending = defaultdict(int)
            served = defaultdict(int)

            class MockConnectionPool(orig_conn_pool):
                def get(self):
                    pending[self.server] += 1
                    conn = connections[self.server].get()
                    pending[self.server] -= 1
                    return conn

                def put(self, *args, **kwargs):
                    connections[self.server].put(*args, **kwargs)
                    served[self.server] += 1

            memcached.MemcacheConnPool = MockConnectionPool

            memcache_client = memcached.MemcacheRing(
                ['1.2.3.4:11211', '1.2.3.5:11211'],
                io_timeout=0.5,
                pool_timeout=0.1)

            p = GreenPool()
            for i in range(10):
                p.spawn(memcache_client.set, 'key', 'value')

            # let everyone block
            sleep(0)
            self.assertEqual(pending['1.2.3.5:11211'], 10)

            # hand out a couple slow connection
            mock_conn = MagicMock(), MagicMock()
            mock_conn[1].sendall = lambda x: sleep(0.2)
            connections['1.2.3.5:11211'].put(mock_conn)
            connections['1.2.3.5:11211'].put(mock_conn)

            # so far so good, everyone is still waiting
            sleep(0)
            self.assertEqual(pending['1.2.3.5:11211'], 8)
            self.assertEqual(len(memcache_client._errors['1.2.3.5:11211']), 0)

            # but they won't wait longer than pool_timeout
            mock_conn = MagicMock(), MagicMock()
            connections['1.2.3.4:11211'].put(mock_conn)
            connections['1.2.3.4:11211'].put(mock_conn)
            p.waitall()
            self.assertEqual(len(memcache_client._errors['1.2.3.5:11211']), 8)
            self.assertEqual(served['1.2.3.5:11211'], 2)
            self.assertEqual(len(memcache_client._errors['1.2.3.4:11211']), 0)
            self.assertEqual(served['1.2.3.4:11211'], 8)

            # and we never got more put in that we gave out
            self.assertEqual(connections['1.2.3.5:11211'].qsize(), 2)
            self.assertEqual(connections['1.2.3.4:11211'].qsize(), 2)
        finally:
            memcached.MemcacheConnPool = orig_conn_pool
Ejemplo n.º 7
0
    def test_connection_pooling_pre_0_9_17(self):
        with patch('swift.common.memcached.socket') as mock_module:
            connected = []
            count = [0]

            def _slow_yielding_connector(addr):
                count[0] += 1
                if count[0] % 3 == 0:
                    raise ValueError('whoops!')
                sleep(0.1)
                connected.append(addr)

            mock_module.socket.return_value.connect.side_effect = \
                _slow_yielding_connector

            # If POOL_SIZE is not small enough relative to USER_COUNT, the
            # "free_items" business in the eventlet.pools.Pool will cause
            # spurious failures below.  I found these values to work well on a
            # VM running in VirtualBox on a late 2013 Retina MacbookPro:
            POOL_SIZE = 5
            USER_COUNT = 50

            pool = memcached.MemcacheConnPool('1.2.3.4:11211', size=POOL_SIZE,
                                              connect_timeout=10)
            self.assertEqual(POOL_SIZE, pool.max_size)

            def _user():
                got = None
                while not got:
                    try:
                        got = pool.get()
                    # This was really supposed to be "except:" but ran afoul
                    # of the H201 check, which does not implement the "noqa"
                    # exception.  Once that's fixed, the except here can be
                    # changed to "except: # noqa"
                    except (Exception, BaseException):
                        pass
                pool.put(got)

            # make a bunch of requests "at the same time"
            p = GreenPool()
            for i in range(USER_COUNT):
                p.spawn(_user)
            p.waitall()

            # If the except block after the "created = self.create()" call
            # doesn't correctly decrement self.current_size, this test will
            # fail by having some number less than POOL_SIZE connections (in my
            # testing, anyway).
            self.assertEqual(POOL_SIZE, len(connected))

            # Subsequent requests should get and use the existing
            # connections, not creating any more.
            for i in range(USER_COUNT):
                p.spawn(_user)
            p.waitall()

            self.assertEqual(POOL_SIZE, len(connected))
Ejemplo n.º 8
0
    def test_connection_pool_timeout(self):
        connections = defaultdict(Queue)
        pending = defaultdict(int)
        served = defaultdict(int)

        class MockConnectionPool(memcached.MemcacheConnPool):
            def get(self):
                pending[self.host] += 1
                conn = connections[self.host].get()
                pending[self.host] -= 1
                return conn

            def put(self, *args, **kwargs):
                connections[self.host].put(*args, **kwargs)
                served[self.host] += 1

        with mock.patch.object(memcached, 'MemcacheConnPool',
                               MockConnectionPool):
            memcache_client = memcached.MemcacheRing(
                ['1.2.3.4:11211', '1.2.3.5:11211'],
                io_timeout=0.5,
                pool_timeout=0.1,
                logger=self.logger)

            # Hand out a couple slow connections to 1.2.3.5, leaving 1.2.3.4
            # fast. All ten (10) clients should try to talk to .5 first, and
            # then move on to .4, and we'll assert all that below.
            mock_conn = MagicMock(), MagicMock()
            mock_conn[0].readline = lambda: b'STORED\r\n'
            mock_conn[1].sendall = lambda x: sleep(0.2)
            connections['1.2.3.5'].put(mock_conn)
            connections['1.2.3.5'].put(mock_conn)

            mock_conn = MagicMock(), MagicMock()
            mock_conn[0].readline = lambda: b'STORED\r\n'
            connections['1.2.3.4'].put(mock_conn)
            connections['1.2.3.4'].put(mock_conn)

            p = GreenPool()
            for i in range(10):
                p.spawn(memcache_client.set, 'key', 'value')

            # Wait for the dust to settle.
            p.waitall()

        self.assertEqual(pending['1.2.3.5'], 8)
        self.assertEqual(len(memcache_client._errors['1.2.3.5:11211']), 8)
        self.assertEqual(
            self.logger.get_lines_for_level('error'),
            ['Timeout getting a connection to memcached: 1.2.3.5:11211'] * 8)
        self.assertEqual(served['1.2.3.5'], 2)
        self.assertEqual(pending['1.2.3.4'], 0)
        self.assertEqual(len(memcache_client._errors['1.2.3.4:11211']), 0)
        self.assertEqual(served['1.2.3.4'], 8)

        # and we never got more put in that we gave out
        self.assertEqual(connections['1.2.3.5'].qsize(), 2)
        self.assertEqual(connections['1.2.3.4'].qsize(), 2)
Ejemplo n.º 9
0
    def test_connection_pooling(self):
        with patch('swift.common.memcached.socket') as mock_module:
            # patch socket, stub socket.socket, mock sock
            mock_sock = mock_module.socket.return_value

            # track clients waiting for connections
            connected = []
            connections = Queue()
            errors = []

            def wait_connect(addr):
                connected.append(addr)
                sleep(0.1)  # yield
                val = connections.get()
                if val is not None:
                    errors.append(val)

            mock_sock.connect = wait_connect

            memcache_client = memcached.MemcacheRing(['1.2.3.4:11211'],
                                                     connect_timeout=10)
            # sanity
            self.assertEquals(1, len(memcache_client._client_cache))
            for server, pool in memcache_client._client_cache.items():
                self.assertEqual(2, pool.max_size)

            # make 10 requests "at the same time"
            p = GreenPool()
            for i in range(10):
                p.spawn(memcache_client.set, 'key', 'value')
            for i in range(3):
                sleep(0.1)
                self.assertEqual(2, len(connected))

            # give out a connection
            connections.put(None)

            # at this point, only one connection should have actually been
            # created, the other is in the creation step, and the rest of the
            # clients are not attempting to connect. we let this play out a
            # bit to verify.
            for i in range(3):
                sleep(0.1)
                self.assertEqual(2, len(connected))

            # finish up, this allows the final connection to be created, so
            # that all the other clients can use the two existing connections
            # and no others will be created.
            connections.put(None)
            connections.put('nono')
            self.assertEqual(2, len(connected))
            p.waitall()
            self.assertEqual(2, len(connected))
            self.assertEqual(0, len(errors),
                             "A client was allowed a third connection")
            connections.get_nowait()
            self.assertTrue(connections.empty())
Ejemplo n.º 10
0
    def test_connection_pooling(self):
        with patch('swift.common.memcached.socket') as mock_module:
            # patch socket, stub socket.socket, mock sock
            mock_sock = mock_module.socket.return_value

            # track clients waiting for connections
            connected = []
            connections = Queue()
            errors = []

            def wait_connect(addr):
                connected.append(addr)
                sleep(0.1)  # yield
                val = connections.get()
                if val is not None:
                    errors.append(val)

            mock_sock.connect = wait_connect

            memcache_client = memcached.MemcacheRing(['1.2.3.4:11211'],
                                                     connect_timeout=10)
            # sanity
            self.assertEquals(1, len(memcache_client._client_cache))
            for server, pool in memcache_client._client_cache.items():
                self.assertEqual(2, pool.max_size)

            # make 10 requests "at the same time"
            p = GreenPool()
            for i in range(10):
                p.spawn(memcache_client.set, 'key', 'value')
            for i in range(3):
                sleep(0.1)
                self.assertEqual(2, len(connected))

            # give out a connection
            connections.put(None)

            # at this point, only one connection should have actually been
            # created, the other is in the creation step, and the rest of the
            # clients are not attempting to connect. we let this play out a
            # bit to verify.
            for i in range(3):
                sleep(0.1)
                self.assertEqual(2, len(connected))

            # finish up, this allows the final connection to be created, so
            # that all the other clients can use the two existing connections
            # and no others will be created.
            connections.put(None)
            connections.put('nono')
            self.assertEqual(2, len(connected))
            p.waitall()
            self.assertEqual(2, len(connected))
            self.assertEqual(0, len(errors),
                             "A client was allowed a third connection")
            connections.get_nowait()
            self.assertTrue(connections.empty())
Ejemplo n.º 11
0
    def run(self, *args, **kwargs):
        try:
            self.logger.info('event agent: starting')

            pool = GreenPool(len(self.workers))

            for worker in self.workers:
                pool.spawn(worker.start)

            def front(server, backend):
                while True:
                    msg = server.recv_multipart()
                    if validate_msg(msg):
                        try:
                            event_id = sqlite3.Binary(msg[2])
                            data = msg[3]
                            self.queue.put(event_id, data)
                            event = ['', msg[2], msg[3]]
                            backend.send_multipart(event)
                        except Exception:
                            pass
                        finally:
                            ack = msg[0:3]
                            server.send_multipart(ack)

            def back(backend):
                while True:
                    msg = backend.recv_multipart()
                    event_id = msg[1]
                    event_id = sqlite3.Binary(event_id)
                    self.queue.delete(event_id)

            boss_pool = GreenPool(2)
            boss_pool.spawn_n(front, self.server, self.backend)
            boss_pool.spawn_n(back, self.backend)
            while True:
                sleep(1)

                now = time.time()
                if now - self.last_retry > self.retry_interval:
                    self.retry()
                    self.last_retry = now

                for w in self.workers:
                    if w.failed:
                        self.workers.remove(w)
                        self.logger.warn('restart worker "%s"', w.name)
                        new_w = EventWorker(self.conf, w.name, self.context)
                        self.workers.append(new_w)
                        pool.spawn(new_w.start)

        except Exception as e:
            self.logger.error('ERROR in main loop %s', e)
            raise e
        finally:
            self.logger.warn('event agent: stopping')
            self.stop_workers()
Ejemplo n.º 12
0
    def test_connection_pool_timeout(self):
        orig_conn_pool = memcached.MemcacheConnPool
        try:
            connections = defaultdict(Queue)
            pending = defaultdict(int)
            served = defaultdict(int)

            class MockConnectionPool(orig_conn_pool):
                def get(self):
                    pending[self.server] += 1
                    conn = connections[self.server].get()
                    pending[self.server] -= 1
                    return conn

                def put(self, *args, **kwargs):
                    connections[self.server].put(*args, **kwargs)
                    served[self.server] += 1

            memcached.MemcacheConnPool = MockConnectionPool

            memcache_client = memcached.MemcacheRing(
                ['1.2.3.4:11211', '1.2.3.5:11211'],
                io_timeout=0.5,
                pool_timeout=0.1)

            # Hand out a couple slow connections to 1.2.3.5, leaving 1.2.3.4
            # fast. All ten (10) clients should try to talk to .5 first, and
            # then move on to .4, and we'll assert all that below.
            mock_conn = MagicMock(), MagicMock()
            mock_conn[1].sendall = lambda x: sleep(0.2)
            connections['1.2.3.5:11211'].put(mock_conn)
            connections['1.2.3.5:11211'].put(mock_conn)

            mock_conn = MagicMock(), MagicMock()
            connections['1.2.3.4:11211'].put(mock_conn)
            connections['1.2.3.4:11211'].put(mock_conn)

            p = GreenPool()
            for i in range(10):
                p.spawn(memcache_client.set, 'key', 'value')

            # Wait for the dust to settle.
            p.waitall()

            self.assertEqual(pending['1.2.3.5:11211'], 8)
            self.assertEqual(len(memcache_client._errors['1.2.3.5:11211']), 8)
            self.assertEqual(served['1.2.3.5:11211'], 2)
            self.assertEqual(pending['1.2.3.4:11211'], 0)
            self.assertEqual(len(memcache_client._errors['1.2.3.4:11211']), 0)
            self.assertEqual(served['1.2.3.4:11211'], 8)

            # and we never got more put in that we gave out
            self.assertEqual(connections['1.2.3.5:11211'].qsize(), 2)
            self.assertEqual(connections['1.2.3.4:11211'].qsize(), 2)
        finally:
            memcached.MemcacheConnPool = orig_conn_pool
Ejemplo n.º 13
0
    def test_connection_pool_timeout(self):
        orig_conn_pool = memcached.MemcacheConnPool
        try:
            connections = defaultdict(Queue)
            pending = defaultdict(int)
            served = defaultdict(int)

            class MockConnectionPool(orig_conn_pool):
                def get(self):
                    pending[self.host] += 1
                    conn = connections[self.host].get()
                    pending[self.host] -= 1
                    return conn

                def put(self, *args, **kwargs):
                    connections[self.host].put(*args, **kwargs)
                    served[self.host] += 1

            memcached.MemcacheConnPool = MockConnectionPool

            memcache_client = memcached.MemcacheRing(['1.2.3.4:11211',
                                                      '1.2.3.5:11211'],
                                                     io_timeout=0.5,
                                                     pool_timeout=0.1)

            # Hand out a couple slow connections to 1.2.3.5, leaving 1.2.3.4
            # fast. All ten (10) clients should try to talk to .5 first, and
            # then move on to .4, and we'll assert all that below.
            mock_conn = MagicMock(), MagicMock()
            mock_conn[1].sendall = lambda x: sleep(0.2)
            connections['1.2.3.5'].put(mock_conn)
            connections['1.2.3.5'].put(mock_conn)

            mock_conn = MagicMock(), MagicMock()
            connections['1.2.3.4'].put(mock_conn)
            connections['1.2.3.4'].put(mock_conn)

            p = GreenPool()
            for i in range(10):
                p.spawn(memcache_client.set, 'key', 'value')

            # Wait for the dust to settle.
            p.waitall()

            self.assertEqual(pending['1.2.3.5'], 8)
            self.assertEqual(len(memcache_client._errors['1.2.3.5:11211']), 8)
            self.assertEqual(served['1.2.3.5'], 2)
            self.assertEqual(pending['1.2.3.4'], 0)
            self.assertEqual(len(memcache_client._errors['1.2.3.4:11211']), 0)
            self.assertEqual(served['1.2.3.4'], 8)

            # and we never got more put in that we gave out
            self.assertEqual(connections['1.2.3.5'].qsize(), 2)
            self.assertEqual(connections['1.2.3.4'].qsize(), 2)
        finally:
            memcached.MemcacheConnPool = orig_conn_pool
Ejemplo n.º 14
0
    def test_connection_pooling_pre_0_9_17(self):
        with patch('swift.common.memcached.socket') as mock_module:
            connected = []
            count = [0]

            def _slow_yielding_connector(addr):
                count[0] += 1
                if count[0] % 3 == 0:
                    raise ValueError('whoops!')
                sleep(0.1)
                connected.append(addr)

            mock_module.socket.return_value.connect.side_effect = \
                _slow_yielding_connector

            # If POOL_SIZE is not small enough relative to USER_COUNT, the
            # "free_items" business in the eventlet.pools.Pool will cause
            # spurious failures below.  I found these values to work well on a
            # VM running in VirtualBox on a late 2013 Retina MacbookPro:
            POOL_SIZE = 5
            USER_COUNT = 50

            pool = memcached.MemcacheConnPool('1.2.3.4:11211',
                                              size=POOL_SIZE,
                                              connect_timeout=10)
            self.assertEqual(POOL_SIZE, pool.max_size)

            def _user():
                got = None
                while not got:
                    try:
                        got = pool.get()
                    except:  # noqa
                        pass
                pool.put(got)

            # make a bunch of requests "at the same time"
            p = GreenPool()
            for i in range(USER_COUNT):
                p.spawn(_user)
            p.waitall()

            # If the except block after the "created = self.create()" call
            # doesn't correctly decrement self.current_size, this test will
            # fail by having some number less than POOL_SIZE connections (in my
            # testing, anyway).
            self.assertEqual(POOL_SIZE, len(connected))

            # Subsequent requests should get and use the existing
            # connections, not creating any more.
            for i in range(USER_COUNT):
                p.spawn(_user)
            p.waitall()

            self.assertEqual(POOL_SIZE, len(connected))
Ejemplo n.º 15
0
    def serve_forever(self):
        self.running = True
        self.server = listen(self.address, self._family())
        pool = GreenPool()

        try:
            while self.running:
                sock, address = self.server.accept()
                pool.spawn(self.call_handler, sock, address)
                self.greenlets
        except OSError:
            pass
Ejemplo n.º 16
0
    def run(self):
        signal.signal(signal.SIGINT, self.signal_handler)
        pool = GreenPool()

        with open("config.yml") as file:
            config = yaml.load(file.read())
        for router in config["routers"]:
            printmsg("Starting trasa on %s" % router["local_address"])
            trasa = Ldp(router["local_address"])
            self.trasas.append(trasa)
            pool.spawn(self.call_handler, trasa)
        pool.waitall()
        printmsg("All greenlets gone, exiting")
Ejemplo n.º 17
0
 def serve_forever(self):
     self.running = True
     self.socket = socket.socket(socket.PF_PACKET, socket.SOCK_RAW,
                                 socket.htons(self.ethertype))
     self.socket.bind((self.interface_name, 0))
     self.get_interface_index()
     self.set_socket_promiscuous()
     self.poller = select.poll()
     self.poller.register(
         self.socket, select.POLLIN | select.POLLPRI | select.POLLERR
         | select.POLLHUP | select.POLLNVAL)
     pool = GreenPool()
     self.greenlets.add(pool.spawn(self.server))
     self.greenlets.add(pool.spawn(self.dispatcher))
     pool.waitall()
Ejemplo n.º 18
0
class Resources:
    def __init__(self, providerbase):
        self._providerbase = providerbase
        self._spec2thread = {}
        self._pool = GreenPool()
        self._resources = {}

    def _dispatchprovider(self, spec):
        parts = spec.split(":")
        name = parts.pop(0)
        provider = getattr(self._providerbase, "provide_" + name)
        self._resources[spec] = res = provider(*parts)
        return res

    def getresources(self, *specs):
        for spec in specs:
            if spec not in self._resources:
                if spec not in self._spec2thread:
                    t = self._pool.spawn(self._dispatchprovider, spec)
                    self._spec2thread[spec] = t
        resources = []
        for spec in specs:
            if spec not in self._resources:
                self._spec2thread[spec].wait()
            resources.append(self._resources[spec])
        return resources
    def test_connection(self):

        """
        conn = Connection(auth_endpoint="https://identity.api.rackspacecloud.com/v2.0",
                          client_id=str(uuid.uuid4()),
                          endpoint="http://localhost:8888/v1/12345",
                          user="", key="")

        """

        conn = Connection(auth_endpoint="https://identity.api.rackspacecloud.com/v2.0",
                          client_id=str(uuid.uuid4()),
                          endpoint="http://166.78.143.130/v1/12345",
                          user="", key="")


        conn.connect(token='blah')

        def create_worker(queue_name):
            return conn.create_queue(queue_name, 100)

        def post_worker(queue):
            return queue.post_message('test_message', 10)

        def delete_worker(queue_name):
            conn.delete_queue(queue_name)
            return queue_name

        pool = GreenPool(1000)

        def on_message_posted(greenthread):
            msg = greenthread.wait()
            print msg._href

        def on_queue_created(greenthread):
            queue = greenthread.wait()
            print queue.name

            for x in range(0, 10):
                gt = pool.spawn(post_worker, queue)
                gt.link(on_message_posted)

        queue_names = ["queue-"+str(x) for x in xrange(0,5)]

        for queue_name in queue_names:
            gt = pool.spawn(create_worker, queue_name)
            gt.link(on_queue_created)

        pool.waitall()

        def delete_worker(queue_name):
            conn.delete_queue(queue_name)
            print "Queue:", queue_name, " deleted"

        for queue in conn.get_queues():
            gt = pool.spawn_n(delete_worker, queue.name)

        print "Waiting for everything to finish"
        pool.waitall()
        print "Done"
Ejemplo n.º 20
0
    def runtestsmulti(self, envlist):
        pool = GreenPool(size=self._toxconfig.option.numproc)
        threads = []
        for env in envlist:
            threads.append(pool.spawn(self.runtests, env))

        for t in threads:
            # re-raises any exceptions of the worker thread
            t.wait()
        if not self.toxsession.config.option.sdistonly:
            retcode = self._toxsession._summary()
            return retcode
Ejemplo n.º 21
0
 def test_print_route_updates(self):
     fake_route_update = "FAKE ROUTE UPDATE"
     self.state_machine.route_updates.put(fake_route_update)
     pool = GreenPool()
     eventlet = pool.spawn(self.peering.print_route_updates)
     for _ in range(10):
         sleep(0)
         if self.route_catcher.route_updates:
             break
     self.assertEqual(len(self.route_catcher.route_updates), 1)
     self.assertEqual(self.route_catcher.route_updates[0], fake_route_update)
     eventlet.kill()
Ejemplo n.º 22
0
    def test_connection_pooling(self):
        with patch('swift.common.memcached.socket') as mock_module:
            # patch socket, stub socket.socket, mock sock
            mock_sock = mock_module.socket.return_value

            # track clients waiting for connections
            connected = []
            connections = Queue()

            def wait_connect(addr):
                connected.append(addr)
                connections.get()
            mock_sock.connect = wait_connect

            memcache_client = memcached.MemcacheRing(['1.2.3.4:11211'],
                                                     connect_timeout=10)
            # sanity
            self.assertEquals(1, len(memcache_client._client_cache))
            for server, pool in memcache_client._client_cache.items():
                self.assertEquals(2, pool.max_size)

            # make 10 requests "at the same time"
            p = GreenPool()
            for i in range(10):
                p.spawn(memcache_client.set, 'key', 'value')
            for i in range(3):
                sleep(0.1)
                self.assertEquals(2, len(connected))
            # give out a connection
            connections.put(None)
            for i in range(3):
                sleep(0.1)
                self.assertEquals(2, len(connected))
            # finish up
            for i in range(8):
                connections.put(None)
            self.assertEquals(2, len(connected))
            p.waitall()
            self.assertEquals(2, len(connected))
Ejemplo n.º 23
0
    def test_locked_container_dbs(self):
        def run_test(num_locks, catch_503):
            container = 'container-%s' % uuid4()
            client.put_container(self.url, self.token, container)
            # Get the container info into memcache (so no stray
            # get_container_info calls muck up our timings)
            client.get_container(self.url, self.token, container)
            db_files = self.get_container_db_files(container)
            db_conns = []
            for i in range(num_locks):
                db_conn = connect(db_files[i])
                db_conn.execute('begin exclusive transaction')
                db_conns.append(db_conn)
            if catch_503:
                try:
                    client.delete_container(self.url, self.token, container)
                except client.ClientException as err:
                    self.assertEqual(err.http_status, 503)
                else:
                    self.fail("Expected ClientException but didn't get it")
            else:
                client.delete_container(self.url, self.token, container)

        proxy_conf = readconf(self.configs['proxy-server'],
                              section_name='app:proxy-server')
        node_timeout = int(proxy_conf.get('node_timeout', 10))
        pool = GreenPool()
        try:
            with Timeout(node_timeout + 5):
                pool.spawn(run_test, 1, False)
                pool.spawn(run_test, 2, True)
                pool.spawn(run_test, 3, True)
                pool.waitall()
        except Timeout as err:
            raise Exception(
                "The server did not return a 503 on container db locks, "
                "it just hangs: %s" % err)
Ejemplo n.º 24
0
    def run(self, *args, **kwargs):
        try:
            self.logger.info('conscience agent: starting')

            pool = GreenPool(len(self.watchers))
            for watcher in self.watchers:
                pool.spawn(watcher.start)

            while True:
                sleep(1)
                for w in self.watchers:
                    if w.failed:
                        self.watchers.remove(w)
                        self.logger.warn('restart watcher "%s"', w.name)
                        new_w = ServiceWatcher(self.conf, w.service)
                        self.watchers.append(new_w)
                        pool.spawn(new_w.start)

        except Exception as e:
            self.logger.error('ERROR in main loop %s', e)
            raise e
        finally:
            self.logger.warn('conscience agent: stopping')
            self.stop_watchers()
Ejemplo n.º 25
0
class WaitPool(PoolInterface):
    def __init__(self, pool_size=1000, queue_size=1000):
        self._pool_size = int(pool_size)
        self._queue_size = int(queue_size)
        self._pool = GreenPool(self._pool_size)
        self._max_job_id = ''

    def can_spawn(self, job_id):
        if job_id <= self._max_job_id:
            return True
        if self._pool.free() > 0 or self._pool.waiting() < self._queue_size:
            self._max_job_id = job_id
            return True
        return False

    def _spawn(self, function, *args, **kwargs):
        return self._pool.spawn(function, *args, **kwargs)
Ejemplo n.º 26
0
class PriorityPool(PoolInterface):

    def __init__(self, low_watermark=1000, high_watermark=1000):
        self._low_watermark = int(low_watermark)
        self._high_watermark = int(high_watermark)
        self._pool = GreenPool(self._high_watermark)
        self._max_job_id = ''

    def can_spawn(self, job_id):
        if job_id <= self._max_job_id:
            return True
        if self._pool.running() < self._low_watermark:
            self._max_job_id = job_id
            return True
        return False

    def _spawn(self, function, *args, **kwargs):
        return self._pool.spawn(function, *args, **kwargs)
Ejemplo n.º 27
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """

    def __init__(self, conf, logger=None):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = PrefixLoggerAdapter(
            logger or get_logger(conf, log_route='object-replicator'), {})
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.bind_ip = conf.get('bind_ip', '0.0.0.0')
        self.servers_per_port = int(conf.get('servers_per_port', '0') or 0)
        self.port = None if self.servers_per_port else \
            int(conf.get('bind_port', 6200))
        self.concurrency = int(conf.get('concurrency', 1))
        self.replicator_workers = int(conf.get('replicator_workers', 0))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.replication_cycle = random.randint(0, 9)
        self.partition_times = []
        self.interval = int(conf.get('interval') or
                            conf.get('run_pause') or 30)
        if 'run_pause' in conf and 'interval' not in conf:
            self.logger.warning('Option object-replicator/run_pause '
                                'is deprecated and will be removed in a '
                                'future version. Update your configuration'
                                ' to use option object-replicator/'
                                'interval.')
        self.rsync_timeout = int(conf.get('rsync_timeout',
                                          DEFAULT_RSYNC_TIMEOUT))
        self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
        self.rsync_bwlimit = conf.get('rsync_bwlimit', '0')
        self.rsync_compress = config_true_value(
            conf.get('rsync_compress', 'no'))
        self.rsync_module = conf.get('rsync_module', '').rstrip('/')
        if not self.rsync_module:
            self.rsync_module = '{replication_ip}::object'
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")
        self._next_rcache_update = time.time() + self.stats_interval
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.sync_method = getattr(self, conf.get('sync_method') or 'rsync')
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.default_headers = {
            'Content-Length': '0',
            'user-agent': 'object-replicator %s' % os.getpid()}
        self.rsync_error_log_line_length = \
            int(conf.get('rsync_error_log_line_length', 0))
        self.handoffs_first = config_true_value(conf.get('handoffs_first',
                                                         False))
        self.handoff_delete = config_auto_int_value(
            conf.get('handoff_delete', 'auto'), 0)
        if any((self.handoff_delete, self.handoffs_first)):
            self.logger.warning('Handoff only mode is not intended for normal '
                                'operation, please disable handoffs_first and '
                                'handoff_delete before the next '
                                'normal rebalance')
        self.is_multiprocess_worker = None
        self._df_router = DiskFileRouter(conf, self.logger)
        self._child_process_reaper_queue = queue.LightQueue()

    def _zero_stats(self):
        self.stats_for_dev = defaultdict(Stats)

    @property
    def total_stats(self):
        return sum(self.stats_for_dev.values(), Stats())

    def _emplace_log_prefix(self, worker_index):
        self.logger.set_prefix("[worker %d/%d pid=%d] " % (
            worker_index + 1,  # use 1-based indexing for more readable logs
            self.replicator_workers,
            os.getpid()))

    def _get_my_replication_ips(self):
        my_replication_ips = set()
        ips = whataremyips()
        for policy in POLICIES:
            self.load_object_ring(policy)
            for local_dev in [dev for dev in policy.object_ring.devs
                              if dev and dev['replication_ip'] in ips and
                              dev['replication_port'] == self.port]:
                my_replication_ips.add(local_dev['replication_ip'])
        return list(my_replication_ips)

    def _child_process_reaper(self):
        """
        Consume processes from self._child_process_reaper_queue and wait() for
        them
        """
        procs = set()
        done = False
        while not done:
            timeout = 60 if procs else None
            try:
                new_proc = self._child_process_reaper_queue.get(
                    timeout=timeout)
                if new_proc is not None:
                    procs.add(new_proc)
                else:
                    done = True
            except queue.Empty:
                pass

            reaped_procs = set()
            for proc in procs:
                # this will reap the process if it has exited, but
                # otherwise will not wait
                if proc.poll() is not None:
                    reaped_procs.add(proc)
            procs -= reaped_procs

    def get_worker_args(self, once=False, **kwargs):
        if self.replicator_workers < 1:
            return []

        override_opts = parse_override_options(once=once, **kwargs)
        have_overrides = bool(override_opts.devices or override_opts.partitions
                              or override_opts.policies)

        # save this off for ring-change detection later in is_healthy()
        self.all_local_devices = self.get_local_devices()

        if override_opts.devices:
            devices_to_replicate = [
                d for d in override_opts.devices
                if d in self.all_local_devices]
        else:
            # The sort isn't strictly necessary since we're just trying to
            # spread devices around evenly, but it makes testing easier.
            devices_to_replicate = sorted(self.all_local_devices)

        # Distribute devices among workers as evenly as possible
        self.replicator_workers = min(self.replicator_workers,
                                      len(devices_to_replicate))
        return [{'override_devices': devs,
                 'override_partitions': override_opts.partitions,
                 'override_policies': override_opts.policies,
                 'have_overrides': have_overrides,
                 'multiprocess_worker_index': index}
                for index, devs in enumerate(
                    distribute_evenly(devices_to_replicate,
                                      self.replicator_workers))]

    def is_healthy(self):
        """
        Check whether our set of local devices remains the same.

        If devices have been added or removed, then we return False here so
        that we can kill off any worker processes and then distribute the
        new set of local devices across a new set of workers so that all
        devices are, once again, being worked on.

        This function may also cause recon stats to be updated.

        :returns: False if any local devices have been added or removed,
          True otherwise
        """
        # We update recon here because this is the only function we have in
        # a multiprocess replicator that gets called periodically in the
        # parent process.
        if time.time() >= self._next_rcache_update:
            update = self.aggregate_recon_update()
            dump_recon_cache(update, self.rcache, self.logger)
        return self.get_local_devices() == self.all_local_devices

    def get_local_devices(self):
        """
        Returns a set of all local devices in all replication-type storage
        policies.

        This is the device names, e.g. "sdq" or "d1234" or something, not
        the full ring entries.
        """
        ips = whataremyips(self.bind_ip)
        local_devices = set()
        for policy in POLICIES:
            if policy.policy_type != REPL_POLICY:
                continue
            self.load_object_ring(policy)
            for device in policy.object_ring.devs:
                if device and is_local_device(
                        ips, self.port,
                        device['replication_ip'],
                        device['replication_port']):
                    local_devices.add(device['device'])
        return local_devices

    # Just exists for doc anchor point
    def sync(self, node, job, suffixes, *args, **kwargs):
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean and dictionary, boolean indicating success or failure
        """
        return self.sync_method(node, job, suffixes, *args, **kwargs)

    def load_object_ring(self, policy):
        """
        Make sure the policy's rings are loaded.

        :param policy: the StoragePolicy instance
        :returns: appropriate ring object
        """
        policy.load_ring(self.swift_dir)
        return policy.object_ring

    def _limit_rsync_log(self, line):
        """
        If rsync_error_log_line_length is defined then
        limit the error to that length

        :param line: rsync log line
        :return: If enabled the line limited to rsync_error_log_line_length
                 otherwise the initial line.
        """
        if self.rsync_error_log_line_length:
            return line[:self.rsync_error_log_line_length]

        return line

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        proc = None

        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(
                self._limit_rsync_log(
                    _("Killing long-running rsync: %s") % str(args)))
            if proc:
                proc.kill()
                try:
                    # Note: Python 2.7's subprocess.Popen class doesn't take
                    # any arguments for wait(), but Python 3's does.
                    # However, Eventlet's replacement Popen takes a timeout
                    # argument regardless of Python version, so we don't
                    # need any conditional code here.
                    proc.wait(timeout=1.0)
                except subprocess.TimeoutExpired:
                    # Sometimes a process won't die immediately even after a
                    # SIGKILL. This can be due to failing disks, high load,
                    # or other reasons. We can't wait for it forever since
                    # we're taking up a slot in the (green)thread pool, so
                    # we send it over to another greenthread, not part of
                    # our pool, whose sole duty is to wait for child
                    # processes to exit.
                    self._child_process_reaper_queue.put(proc)
            return 1  # failure response code

        total_time = time.time() - start_time
        for result in results.split('\n'):
            if result == '':
                continue
            if result.startswith('cd+'):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            self.logger.error(
                self._limit_rsync_log(
                    _('Bad rsync return code: %(ret)d <- %(args)s') %
                    {'args': str(args), 'ret': ret_val}))
        else:
            log_method = self.logger.info if results else self.logger.debug
            log_method(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {'src': args[-2], 'dst': args[-1], 'time': total_time})
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Uses rsync to implement the sync method. This was the first
        sync method in Swift.
        """
        if not os.path.exists(job['path']):
            return False, {}
        args = [
            'rsync',
            '--recursive',
            '--whole-file',
            '--human-readable',
            '--xattrs',
            '--itemize-changes',
            '--ignore-existing',
            '--timeout=%s' % self.rsync_io_timeout,
            '--contimeout=%s' % self.rsync_io_timeout,
            '--bwlimit=%s' % self.rsync_bwlimit,
            '--exclude=.*.%s' % ''.join('[0-9a-zA-Z]' for i in range(6))
        ]
        if self.rsync_compress and \
                job['region'] != node['region']:
            # Allow for compression, but only if the remote node is in
            # a different region than the local one.
            args.append('--compress')
        rsync_module = rsync_module_interpolation(self.rsync_module, node)
        had_any = False
        for suffix in suffixes:
            spath = join(job['path'], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False, {}
        data_dir = get_data_dir(job['policy'])
        args.append(join(rsync_module, node['device'],
                    data_dir, job['partition']))
        return self._rsync(args) == 0, {}

    def ssync(self, node, job, suffixes, remote_check_objs=None):
        return ssync_sender.Sender(
            self, node, job, suffixes, remote_check_objs)()

    def check_ring(self, object_ring):
        """
        Check to see if the ring has been updated
        :param object_ring: the ring to check

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """

        def tpool_get_suffixes(path):
            return [suff for suff in os.listdir(path)
                    if len(suff) == 3 and isdir(join(path, suff))]

        stats = self.stats_for_dev[job['device']]
        stats.attempted += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'],))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        failure_devs_info = set()
        begin = time.time()
        handoff_partition_deleted = False
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            synced_remote_regions = {}
            delete_objs = None
            if suffixes:
                for node in job['nodes']:
                    stats.rsync += 1
                    kwargs = {}
                    if node['region'] in synced_remote_regions and \
                            self.conf.get('sync_method', 'rsync') == 'ssync':
                        kwargs['remote_check_objs'] = \
                            synced_remote_regions[node['region']]
                    # candidates is a dict(hash=>timestamp) of objects
                    # for deletion
                    success, candidates = self.sync(
                        node, job, suffixes, **kwargs)
                    if success:
                        with Timeout(self.http_timeout):
                            conn = http_connect(
                                node['replication_ip'],
                                node['replication_port'],
                                node['device'], job['partition'], 'REPLICATE',
                                '/' + '-'.join(suffixes), headers=headers)
                            conn.getresponse().read()
                        if node['region'] != job['region']:
                            synced_remote_regions[node['region']] = viewkeys(
                                candidates)
                    else:
                        failure_devs_info.add((node['replication_ip'],
                                               node['device']))
                    responses.append(success)
                for cand_objs in synced_remote_regions.values():
                    if delete_objs is None:
                        delete_objs = cand_objs
                    else:
                        delete_objs = delete_objs & cand_objs

            if self.handoff_delete:
                # delete handoff if we have had handoff_delete successes
                delete_handoff = len([resp for resp in responses if resp]) >= \
                    self.handoff_delete
            else:
                # delete handoff if all syncs were successful
                delete_handoff = len(responses) == len(job['nodes']) and \
                    all(responses)
            if delete_handoff:
                stats.remove += 1
                if (self.conf.get('sync_method', 'rsync') == 'ssync' and
                        delete_objs is not None):
                    self.logger.info(_("Removing %s objects"),
                                     len(delete_objs))
                    _junk, error_paths = self.delete_handoff_objs(
                        job, delete_objs)
                    # if replication works for a hand-off device and it failed,
                    # the remote devices which are target of the replication
                    # from the hand-off device will be marked. Because cleanup
                    # after replication failed means replicator needs to
                    # replicate again with the same info.
                    if error_paths:
                        failure_devs_info.update(
                            [(failure_dev['replication_ip'],
                              failure_dev['device'])
                             for failure_dev in job['nodes']])
                else:
                    self.delete_partition(job['path'])
                    handoff_partition_deleted = True
            elif not suffixes:
                self.delete_partition(job['path'])
                handoff_partition_deleted = True
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
            stats.add_failure_stats(failure_devs_info)
        finally:
            target_devs_info = set([(target_dev['replication_ip'],
                                     target_dev['device'])
                                    for target_dev in job['nodes']])
            stats.success += len(target_devs_info - failure_devs_info)
            if not handoff_partition_deleted:
                self.handoffs_remaining += 1
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.delete.timing', begin)

    def delete_partition(self, path):
        self.logger.info(_("Removing partition: %s"), path)
        try:
            tpool.execute(shutil.rmtree, path)
        except OSError as e:
            if e.errno not in (errno.ENOENT, errno.ENOTEMPTY):
                # If there was a race to create or delete, don't worry
                raise

    def delete_handoff_objs(self, job, delete_objs):
        success_paths = []
        error_paths = []
        for object_hash in delete_objs:
            object_path = storage_directory(job['obj_path'], job['partition'],
                                            object_hash)
            tpool.execute(shutil.rmtree, object_path, ignore_errors=True)
            suffix_dir = dirname(object_path)
            try:
                os.rmdir(suffix_dir)
                success_paths.append(object_path)
            except OSError as e:
                if e.errno not in (errno.ENOENT, errno.ENOTEMPTY):
                    error_paths.append(object_path)
                    self.logger.exception(
                        "Unexpected error trying to cleanup suffix dir:%r",
                        suffix_dir)
        return success_paths, error_paths

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        stats = self.stats_for_dev[job['device']]
        stats.attempted += 1
        self.logger.increment('partition.update.count.%s' % (job['device'],))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        target_devs_info = set()
        failure_devs_info = set()
        begin = time.time()
        df_mgr = self._df_router[job['policy']]
        try:
            hashed, local_hash = tpool.execute(
                df_mgr._get_hashes, job['device'],
                job['partition'], job['policy'],
                do_listdir=_do_listdir(
                    int(job['partition']),
                    self.replication_cycle))
            stats.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            synced_remote_regions = set()
            random.shuffle(job['nodes'])
            nodes = itertools.chain(
                job['nodes'],
                job['policy'].object_ring.get_more_nodes(
                    int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIteration it will be caught way below
                node = next(nodes)
                target_devs_info.add((node['replication_ip'], node['device']))
                attempts_left -= 1
                # if we have already synced to this remote region,
                # don't sync again on this replication pass
                if node['region'] in synced_remote_regions:
                    continue
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '', headers=headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(
                                _('%(replication_ip)s/%(device)s '
                                  'responded as unmounted'), node)
                            attempts_left += 1
                            failure_devs_info.add((node['replication_ip'],
                                                   node['device']))
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(_("Invalid response %(resp)s "
                                                "from %(ip)s"),
                                              {'resp': resp.status,
                                               'ip': node['replication_ip']})
                            failure_devs_info.add((node['replication_ip'],
                                                   node['device']))
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    if not suffixes:
                        stats.hashmatch += 1
                        continue
                    hashed, recalc_hash = tpool.execute(
                        df_mgr._get_hashes,
                        job['device'], job['partition'], job['policy'],
                        recalculate=suffixes)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    stats.rsync += 1
                    success, _junk = self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '/' + '-'.join(suffixes),
                            headers=headers)
                        conn.getresponse().read()
                    if not success:
                        failure_devs_info.add((node['replication_ip'],
                                               node['device']))
                    # add only remote region when replicate succeeded
                    if success and node['region'] != job['region']:
                        synced_remote_regions.add(node['region'])
                    stats.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    failure_devs_info.add((node['replication_ip'],
                                           node['device']))
                    self.logger.exception(_("Error syncing with node: %s") %
                                          node)
            stats.suffix_count += len(local_hash)
        except StopIteration:
            self.logger.error('Ran out of handoffs while replicating '
                              'partition %s of policy %d',
                              job['partition'], int(job['policy']))
        except (Exception, Timeout):
            failure_devs_info.update(target_devs_info)
            self.logger.exception(_("Error syncing partition"))
        finally:
            stats.add_failure_stats(failure_devs_info)
            stats.success += len(target_devs_info - failure_devs_info)
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        stats = self.total_stats
        replication_count = stats.attempted
        if replication_count > self.last_replication_count:
            self.last_replication_count = replication_count
            elapsed = (time.time() - self.start) or 0.000001
            rate = replication_count / elapsed
            self.logger.info(
                _("%(replicated)d/%(total)d (%(percentage).2f%%)"
                  " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                  "%(remaining)s remaining)"),
                {'replicated': replication_count, 'total': self.job_count,
                 'percentage': replication_count * 100.0 / self.job_count,
                 'time': time.time() - self.start, 'rate': rate,
                 'remaining': '%d%s' % compute_eta(self.start,
                                                   replication_count,
                                                   self.job_count)})
            self.logger.info(_('%(success)s successes, %(failure)s failures')
                             % dict(success=stats.success,
                                    failure=stats.failure))

            if stats.suffix_count:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"),
                    {'checked': stats.suffix_count,
                     'hashed':
                     (stats.suffix_hash * 100.0) / stats.suffix_count,
                     'synced':
                     (stats.suffix_sync * 100.0) / stats.suffix_count})
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"),
                    {'max': self.partition_times[-1],
                     'min': self.partition_times[0],
                     'med': self.partition_times[
                         len(self.partition_times) // 2]})
        else:
            self.logger.info(
                _("Nothing replicated for %s seconds."),
                (time.time() - self.start))

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def build_replication_jobs(self, policy, ips, override_devices=None,
                               override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        df_mgr = self._df_router[policy]
        self.all_devs_info.update(
            [(dev['replication_ip'], dev['device'])
             for dev in policy.object_ring.devs if dev])
        data_dir = get_data_dir(policy)
        found_local = False
        for local_dev in [dev for dev in policy.object_ring.devs
                          if (dev
                              and is_local_device(ips,
                                                  self.port,
                                                  dev['replication_ip'],
                                                  dev['replication_port'])
                              and (override_devices is None
                                   or dev['device'] in override_devices))]:
            found_local = True
            local_dev_stats = self.stats_for_dev[local_dev['device']]
            try:
                dev_path = check_drive(self.devices_dir, local_dev['device'],
                                       self.mount_check)
            except ValueError as err:
                local_dev_stats.add_failure_stats(
                    [(failure_dev['replication_ip'],
                      failure_dev['device'])
                     for failure_dev in policy.object_ring.devs
                     if failure_dev])
                self.logger.warning("%s", err)
                continue
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            unlink_older_than(tmp_path, time.time() -
                              df_mgr.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None and partition.isdigit()
                        and int(partition) not in override_partitions):
                    continue

                if (partition.startswith('auditor_status_') and
                        partition.endswith('.json')):
                    # ignore auditor status files
                    continue

                part_nodes = None
                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(
                        int(partition))
                    nodes = [node for node in part_nodes
                             if node['id'] != local_dev['id']]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy=policy,
                             partition=partition,
                             region=local_dev['region']))
                except ValueError:
                    if part_nodes:
                        local_dev_stats.add_failure_stats(
                            [(failure_dev['replication_ip'],
                              failure_dev['device'])
                             for failure_dev in nodes])
                    else:
                        local_dev_stats.add_failure_stats(
                            [(failure_dev['replication_ip'],
                              failure_dev['device'])
                             for failure_dev in policy.object_ring.devs
                             if failure_dev])
                    continue
        if not found_local:
            self.logger.error("Can't find itself in policy with index %d with"
                              " ips %s and with port %s in ring file, not"
                              " replicating",
                              int(policy), ", ".join(ips), self.port)
        return jobs

    def collect_jobs(self, override_devices=None, override_partitions=None,
                     override_policies=None):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be rsynced.

        :param override_devices: if set, only jobs on these devices
            will be returned
        :param override_partitions: if set, only jobs on these partitions
            will be returned
        :param override_policies: if set, only jobs in these storage
            policies will be returned
        """
        jobs = []
        ips = whataremyips(self.bind_ip)
        for policy in POLICIES:
            # Skip replication if next_part_power is set. In this case
            # every object is hard-linked twice, but the replicator can't
            # detect them and would create a second copy of the file if not
            # yet existing - and this might double the actual transferred
            # and stored data
            next_part_power = getattr(
                policy.object_ring, 'next_part_power', None)
            if next_part_power is not None:
                self.logger.warning(
                    _("next_part_power set in policy '%s'. Skipping"),
                    policy.name)
                continue

            if policy.policy_type == REPL_POLICY:
                if (override_policies is not None and
                        policy.idx not in override_policies):
                    continue
                # ensure rings are loaded for policy
                self.load_object_ring(policy)
                jobs += self.build_replication_jobs(
                    policy, ips, override_devices=override_devices,
                    override_partitions=override_partitions)
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff parts to the front of the list
            jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs

    def replicate(self, override_devices=None, override_partitions=None,
                  override_policies=None, start_time=None):
        """Run a replication pass"""
        if start_time is None:
            start_time = time.time()
        self.start = start_time
        self.last_replication_count = 0
        self.replication_cycle = (self.replication_cycle + 1) % 10
        self.partition_times = []
        self.my_replication_ips = self._get_my_replication_ips()
        self.all_devs_info = set()
        self.handoffs_remaining = 0

        stats = eventlet.spawn(self.heartbeat)
        eventlet.sleep()  # Give spawns a cycle

        current_nodes = None
        dev_stats = None
        num_jobs = 0
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs(override_devices=override_devices,
                                     override_partitions=override_partitions,
                                     override_policies=override_policies)
            for job in jobs:
                dev_stats = self.stats_for_dev[job['device']]
                num_jobs += 1
                current_nodes = job['nodes']
                try:
                    check_drive(self.devices_dir, job['device'],
                                self.mount_check)
                except ValueError as err:
                    dev_stats.add_failure_stats([
                        (failure_dev['replication_ip'], failure_dev['device'])
                        for failure_dev in job['nodes']])
                    self.logger.warning("%s", err)
                    continue
                if self.handoffs_first and not job['delete']:
                    # in handoffs first mode, we won't process primary
                    # partitions until rebalance was successful!
                    if self.handoffs_remaining:
                        self.logger.warning(_(
                            "Handoffs first mode still has handoffs "
                            "remaining.  Aborting current "
                            "replication pass."))
                        break
                if not self.check_ring(job['policy'].object_ring):
                    self.logger.info(_("Ring change detected. Aborting "
                                       "current replication pass."))
                    return

                try:
                    if isfile(job['path']):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning(
                            'Removing partition directory '
                            'which was a file: %s', job['path'])
                        os.remove(job['path'])
                        continue
                except OSError:
                    continue
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            current_nodes = None
            self.run_pool.waitall()
        except (Exception, Timeout) as err:
            if dev_stats:
                if current_nodes:
                    dev_stats.add_failure_stats(
                        [(failure_dev['replication_ip'],
                          failure_dev['device'])
                         for failure_dev in current_nodes])
                else:
                    dev_stats.add_failure_stats(self.all_devs_info)
            self.logger.exception(
                _("Exception in top-level replication loop: %s"), err)
        finally:
            stats.kill()
            self.stats_line()

    def update_recon(self, total, end_time, override_devices):
        # Called at the end of a replication pass to update recon stats.
        if self.is_multiprocess_worker:
            # If it weren't for the failure_nodes field, we could do this as
            # a bunch of shared memory using multiprocessing.Value, which
            # would be nice because it'd avoid dealing with existing data
            # during an upgrade.
            update = {
                'object_replication_per_disk': {
                    od: {'replication_stats':
                         self.stats_for_dev[od].to_recon(),
                         'replication_time': total,
                         'replication_last': end_time,
                         'object_replication_time': total,
                         'object_replication_last': end_time}
                    for od in override_devices}}
        else:
            update = {'replication_stats': self.total_stats.to_recon(),
                      'replication_time': total,
                      'replication_last': end_time,
                      'object_replication_time': total,
                      'object_replication_last': end_time}
        dump_recon_cache(update, self.rcache, self.logger)

    def aggregate_recon_update(self):
        per_disk_stats = load_recon_cache(self.rcache).get(
            'object_replication_per_disk', {})
        recon_update = {}
        min_repl_last = float('inf')
        min_repl_time = float('inf')

        # If every child has reported some stats, then aggregate things.
        if all(ld in per_disk_stats for ld in self.all_local_devices):
            aggregated = Stats()
            for device_name, data in per_disk_stats.items():
                aggregated += Stats.from_recon(data['replication_stats'])
                min_repl_time = min(
                    min_repl_time, data['object_replication_time'])
                min_repl_last = min(
                    min_repl_last, data['object_replication_last'])
            recon_update['replication_stats'] = aggregated.to_recon()
            recon_update['replication_last'] = min_repl_last
            recon_update['replication_time'] = min_repl_time
            recon_update['object_replication_last'] = min_repl_last
            recon_update['object_replication_time'] = min_repl_time

        # Clear out entries for old local devices that we no longer have
        devices_to_remove = set(per_disk_stats) - set(self.all_local_devices)
        if devices_to_remove:
            recon_update['object_replication_per_disk'] = {
                dtr: {} for dtr in devices_to_remove}

        return recon_update

    def run_once(self, multiprocess_worker_index=None,
                 have_overrides=False, *args, **kwargs):
        if multiprocess_worker_index is not None:
            self.is_multiprocess_worker = True
            self._emplace_log_prefix(multiprocess_worker_index)

        rsync_reaper = eventlet.spawn(self._child_process_reaper)
        self._zero_stats()
        self.logger.info(_("Running object replicator in script mode."))

        override_opts = parse_override_options(once=True, **kwargs)
        devices = override_opts.devices or None
        partitions = override_opts.partitions or None
        policies = override_opts.policies or None

        start_time = time.time()
        self.replicate(
            override_devices=devices,
            override_partitions=partitions,
            override_policies=policies,
            start_time=start_time)
        end_time = time.time()
        total = (end_time - start_time) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)

        # If we've been manually run on a subset of
        # policies/devices/partitions, then our recon stats are not
        # representative of how replication is doing, so we don't publish
        # them.
        if self.is_multiprocess_worker:
            # The main process checked for overrides and determined that
            # there were none
            should_update_recon = not have_overrides
        else:
            # We are single-process, so update recon only if we worked on
            # everything
            should_update_recon = not (partitions or devices or policies)
        if should_update_recon:
            self.update_recon(total, end_time, devices)

        # Give rsync processes one last chance to exit, then bail out and
        # let them be init's problem
        self._child_process_reaper_queue.put(None)
        rsync_reaper.wait()

    def run_forever(self, multiprocess_worker_index=None,
                    override_devices=None, *args, **kwargs):
        if multiprocess_worker_index is not None:
            self.is_multiprocess_worker = True
            self._emplace_log_prefix(multiprocess_worker_index)
        self.logger.info(_("Starting object replicator in daemon mode."))
        eventlet.spawn_n(self._child_process_reaper)
        # Run the replicator continually
        while True:
            self._zero_stats()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            start = time.time()
            self.replicate(override_devices=override_devices)
            end = time.time()
            total = (end - start) / 60
            self.logger.info(
                _("Object replication complete. (%.02f minutes)"), total)
            self.update_recon(total, end, override_devices)
            self.logger.debug('Replication sleeping for %s seconds.',
                              self.interval)
            sleep(self.interval)

    def post_multiprocess_run(self):
        # This method is called after run_once using multiple workers.
        update = self.aggregate_recon_update()
        dump_recon_cache(update, self.rcache, self.logger)
Ejemplo n.º 28
0
class Ldp(object):
    LISTEN_PORT = 646
    MULTICAST_ADDRESS = '224.0.0.2'

    def __init__(self, listen_ip):
        self.listen_ip = listen_ip
        self.running = False
        self.socket = None
        self.eventlets = []
        self.last_message_id = 0

    def get_message_id(self):
        self.last_message_id += 1
        return self.last_message_id

    def run(self):
        self.running = True
        self.pool = GreenPool()
        self.eventlets = []

        self.eventlets.append(self.pool.spawn(self.handle_packets_in))
        self.eventlets.append(self.pool.spawn(self.hello_timer))
        self.eventlets.append(self.pool.spawn(self.run_tcp_handler))

        self.pool.waitall()

    def run_tcp_handler(self):
        print("Starting TCP socket on %s:%s" %
              (self.listen_ip, self.LISTEN_PORT))
        self.stream_server = StreamServer((self.listen_ip, self.LISTEN_PORT),
                                          self.handle_tcp)
        self.stream_server.serve_forever()

    def handle_tcp(self, socket, address):
        peer_ip, peer_port = address
        messages_sent = 0
        print("Got connection from %s:%s" % (peer_ip, peer_port))
        input_stream = socket.makefile(mode="rb")
        chopper = Chopper(4, 2, 0, input_stream)
        state_machine = LdpStateMachine(self.listen_ip, peer_ip)
        try:
            while True:
                sleep(0)
                serialised_pdu = chopper.next()
                print("Got PDU from %s:%s" % (peer_ip, peer_port))
                pdu = parse_ldp_pdu(serialised_pdu)
                messages = pdu.messages
                for message in messages:
                    outbound_messages = state_machine.message_received(message)
                    outbound_pdus = []
                    for outbound_message in outbound_messages:
                        outbound_message.message_id = self.get_message_id()
                        print("Sending message %s" % outbound_message)
                        pdu = LdpPdu(1, self.listen_ip, 0,
                                     [outbound_message.pack()])
                        outbound_pdus.append(pdu)
                    for pdu in outbound_pdus:
                        socket.send(pdu.pack())
                if state_machine.state == "NONEXISTENT":
                    break
        except (SocketClosedError, StopIteration) as e:
            print("Socket closed from %s:%s" % (peer_ip, peer_port))
        print("Closing socket with %s:%s" % (peer_ip, peer_port))
        socket.close()

    def handle_packets_in(self):
        self.multicast_socket = MulticastSocket(self.MULTICAST_ADDRESS,
                                                self.LISTEN_PORT,
                                                self.listen_ip)
        self.multicast_socket.bind()

        try:
            while self.running:
                sleep(1)
                while True:
                    data, address = self.multicast_socket.recv(4096, 10)
                    if not data:
                        break

                    pdu = parse_ldp_pdu(data)
                    messages = pdu.messages
                    if len(messages) > 1:
                        print(
                            "Weird... got PDU from %s with lots of messages: "
                            % (address, messages))
                        continue

                    message = messages[0]
                    if not isinstance(message, LdpHelloMessage):
                        print(
                            "Got message from %s but it isn't a hello message: %s"
                            % (address, message))
                        continue

                    print("Got hello message from %s ID %s" %
                          (address, message.message_id))

        except OSError:
            pass

    def hello_timer(self):
        next_timer_at = int(time())
        while self.running:
            sleep(1)
            if int(time()) > next_timer_at:
                self.send_hello(self.get_message_id())
                next_timer_at += 5

    def send_hello(self, message_id):
        print("Sending hello message")
        tlvs = {0x0401: build_byte_string("ac1a016a")}
        message = LdpHelloMessage(message_id, 15, False, False, tlvs)
        pdu = LdpPdu(1, self.listen_ip, 0, [message.pack()])
        if self.multicast_socket:
            self.multicast_socket.send(pdu.pack())
        else:
            print("Not sending; UDP socket dead")

    def shutdown(self):
        self.running = False
        self.multicast_socket.shutdown()

        for eventlet in self.eventlets:
            eventlet.kill()
'''
running count() method in a native threading manner
'''
class CountThread(threading.Thread):
    def run(self):
        count()
 
print "running count() as two threads"
c1 = CountThread()
c2 = CountThread()
start_time = datetime.datetime.now()
c1.start()
c2.start()
c1.join()
c2.join()
end_time = datetime.datetime.now()
print end_time - start_time

'''
running count() in a green threading manner
'''
print "running count() as two green threads"

start_time = datetime.datetime.now()

pool = GreenPool()
pool.spawn(count())
pool.spawn(count())

end_time = datetime.datetime.now()
print end_time - start_time
Ejemplo n.º 30
0
    def reap_container(self, account, account_partition, account_nodes,
                       container):
        """
        Deletes the data and the container itself for the given container. This
        will call :func:`reap_object` up to sqrt(self.concurrency) times
        concurrently for the objects in the container.

        If there is any exception while deleting a single object, the process
        will continue for any other objects in the container and the failed
        objects will be tried again the next time this function is called with
        the same parameters.

        If there is any exception while listing the objects for deletion, the
        process will stop (but will obviously be tried again the next time this
        function is called with the same parameters). This is a possibility
        since the listing comes from querying just the primary remote container
        server.

        Once all objects have been attempted to be deleted, the container
        itself will be attempted to be deleted by sending a delete request to
        all container nodes. The format of the delete request is such that each
        container server will update a corresponding account server, removing
        the container from the account's listing.

        This function returns nothing and should raise no exception but only
        update various self.stats_* values for what occurs.

        :param account: The name of the account for the container.
        :param account_partition: The partition for the account on the account
                                  ring.
        :param account_nodes: The primary node dicts for the account.
        :param container: The name of the container to delete.

        * See also: :func:`swift.common.ring.Ring.get_nodes` for a description
          of the account node dicts.
        """
        account_nodes = list(account_nodes)
        part, nodes = self.get_container_ring().get_nodes(account, container)
        node = nodes[-1]
        pool = GreenPool(size=self.object_concurrency)
        marker = ''
        while True:
            objects = None
            try:
                headers, objects = direct_get_container(
                    node, part, account, container,
                    marker=marker,
                    conn_timeout=self.conn_timeout,
                    response_timeout=self.node_timeout)
                self.stats_return_codes[2] = \
                    self.stats_return_codes.get(2, 0) + 1
                self.logger.increment('return_codes.2')
            except ClientException as err:
                if self.logger.getEffectiveLevel() <= DEBUG:
                    self.logger.exception(
                        _('Exception with %(ip)s:%(port)s/%(device)s'), node)
                self.stats_return_codes[err.http_status / 100] = \
                    self.stats_return_codes.get(err.http_status / 100, 0) + 1
                self.logger.increment(
                    'return_codes.%d' % (err.http_status / 100,))
            if not objects:
                break
            try:
                policy_index = headers.get('X-Backend-Storage-Policy-Index', 0)
                for obj in objects:
                    if isinstance(obj['name'], unicode):
                        obj['name'] = obj['name'].encode('utf8')
                    pool.spawn(self.reap_object, account, container, part,
                               nodes, obj['name'], policy_index)
                pool.waitall()
            except (Exception, Timeout):
                self.logger.exception(_('Exception with objects for container '
                                        '%(container)s for account %(account)s'
                                        ),
                                      {'container': container,
                                       'account': account})
            marker = objects[-1]['name']
            if marker == '':
                break
        successes = 0
        failures = 0
        for node in nodes:
            anode = account_nodes.pop()
            try:
                direct_delete_container(
                    node, part, account, container,
                    conn_timeout=self.conn_timeout,
                    response_timeout=self.node_timeout,
                    headers={'X-Account-Host': '%(ip)s:%(port)s' % anode,
                             'X-Account-Partition': str(account_partition),
                             'X-Account-Device': anode['device'],
                             'X-Account-Override-Deleted': 'yes'})
                successes += 1
                self.stats_return_codes[2] = \
                    self.stats_return_codes.get(2, 0) + 1
                self.logger.increment('return_codes.2')
            except ClientException as err:
                if self.logger.getEffectiveLevel() <= DEBUG:
                    self.logger.exception(
                        _('Exception with %(ip)s:%(port)s/%(device)s'), node)
                failures += 1
                self.logger.increment('containers_failures')
                self.stats_return_codes[err.http_status / 100] = \
                    self.stats_return_codes.get(err.http_status / 100, 0) + 1
                self.logger.increment(
                    'return_codes.%d' % (err.http_status / 100,))
        if successes > failures:
            self.stats_containers_deleted += 1
            self.logger.increment('containers_deleted')
        elif not successes:
            self.stats_containers_remaining += 1
            self.logger.increment('containers_remaining')
        else:
            self.stats_containers_possibly_remaining += 1
            self.logger.increment('containers_possibly_remaining')
Ejemplo n.º 31
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """
    def __init__(self, conf):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = get_logger(conf, log_route='object-replicator')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.vm_test_mode = config_true_value(conf.get('vm_test_mode', 'no'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.port = int(conf.get('bind_port', 6000))
        self.concurrency = int(conf.get('concurrency', 1))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
        self.partition_times = []
        self.run_pause = int(conf.get('run_pause', 30))
        self.rsync_timeout = int(conf.get('rsync_timeout', 900))
        self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
        self.rsync_bwlimit = conf.get('rsync_bwlimit', '0')
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.sync_method = getattr(self, conf.get('sync_method') or 'rsync')
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.headers = {
            'Content-Length': '0',
            'user-agent': 'obj-replicator %s' % os.getpid()
        }
        self.rsync_error_log_line_length = \
            int(conf.get('rsync_error_log_line_length', 0))
        self.handoffs_first = config_true_value(
            conf.get('handoffs_first', False))
        self.handoff_delete = config_auto_int_value(
            conf.get('handoff_delete', 'auto'), 0)
        self._diskfile_mgr = DiskFileManager(conf, self.logger)

    def sync(self, node, job, suffixes):  # Just exists for doc anchor point
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean indicating success or failure
        """
        return self.sync_method(node, job, suffixes)

    def get_object_ring(self, policy_idx):
        """
        Get the ring object to use to handle a request based on its policy.

        :policy_idx: policy index as defined in swift.conf
        :returns: appropriate ring object
        """
        return POLICIES.get_object_ring(policy_idx, self.swift_dir)

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        ret_val = None
        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(_("Killing long-running rsync: %s"), str(args))
            proc.kill()
            return 1  # failure response code
        total_time = time.time() - start_time
        for result in results.split('\n'):
            if result == '':
                continue
            if result.startswith('cd+'):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            error_line = _('Bad rsync return code: %(ret)d <- %(args)s') % \
                {'args': str(args), 'ret': ret_val}
            if self.rsync_error_log_line_length:
                error_line = error_line[:self.rsync_error_log_line_length]
            self.logger.error(error_line)
        elif results:
            self.logger.info(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"), {
                    'src': args[-2],
                    'dst': args[-1],
                    'time': total_time
                })
        else:
            self.logger.debug(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"), {
                    'src': args[-2],
                    'dst': args[-1],
                    'time': total_time
                })
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Uses rsync to implement the sync method. This was the first
        sync method in Swift.
        """
        if not os.path.exists(job['path']):
            return False
        args = [
            'rsync',
            '--recursive',
            '--whole-file',
            '--human-readable',
            '--xattrs',
            '--itemize-changes',
            '--ignore-existing',
            '--timeout=%s' % self.rsync_io_timeout,
            '--contimeout=%s' % self.rsync_io_timeout,
            '--bwlimit=%s' % self.rsync_bwlimit,
        ]
        node_ip = rsync_ip(node['replication_ip'])
        if self.vm_test_mode:
            rsync_module = '%s::object%s' % (node_ip, node['replication_port'])
        else:
            rsync_module = '%s::object' % node_ip
        had_any = False
        for suffix in suffixes:
            spath = join(job['path'], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False
        data_dir = get_data_dir(job['policy_idx'])
        args.append(
            join(rsync_module, node['device'], data_dir, job['partition']))
        return self._rsync(args) == 0

    def ssync(self, node, job, suffixes):
        return ssync_sender.Sender(self, node, job, suffixes)()

    def check_ring(self, object_ring):
        """
        Check to see if the ring has been updated
        :param object_ring: the ring to check

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """
        def tpool_get_suffixes(path):
            return [
                suff for suff in os.listdir(path)
                if len(suff) == 3 and isdir(join(path, suff))
            ]

        self.replication_count += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'], ))
        self.headers[POLICY_INDEX] = job['policy_idx']
        begin = time.time()
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            if suffixes:
                for node in job['nodes']:
                    success = self.sync(node, job, suffixes)
                    if success:
                        with Timeout(self.http_timeout):
                            conn = http_connect(node['replication_ip'],
                                                node['replication_port'],
                                                node['device'],
                                                job['partition'],
                                                'REPLICATE',
                                                '/' + '-'.join(suffixes),
                                                headers=self.headers)
                            conn.getresponse().read()
                    responses.append(success)
            if self.handoff_delete:
                # delete handoff if we have had handoff_delete successes
                delete_handoff = len([resp for resp in responses if resp]) >= \
                    self.handoff_delete
            else:
                # delete handoff if all syncs were successful
                delete_handoff = len(responses) == len(job['nodes']) and \
                    all(responses)
            if not suffixes or delete_handoff:
                self.logger.info(_("Removing partition: %s"), job['path'])
                tpool.execute(shutil.rmtree, job['path'], ignore_errors=True)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.delete.timing', begin)

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'], ))
        self.headers[POLICY_INDEX] = job['policy_idx']
        begin = time.time()
        try:
            hashed, local_hash = tpool_reraise(
                get_hashes,
                job['path'],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age)
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            nodes = itertools.chain(
                job['nodes'],
                job['object_ring'].get_more_nodes(int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                attempts_left -= 1
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'],
                            node['replication_port'],
                            node['device'],
                            job['partition'],
                            'REPLICATE',
                            '',
                            headers=self.headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(
                                _('%(ip)s/%(device)s responded'
                                  ' as unmounted'), node)
                            attempts_left += 1
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s "
                                  "from %(ip)s"), {
                                      'resp': resp.status,
                                      'ip': node['replication_ip']
                                  })
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    if not suffixes:
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        get_hashes,
                        job['path'],
                        recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]

                    self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '/' + '-'.join(suffixes),
                                            headers=self.headers)
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    self.logger.exception(
                        _("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        if self.replication_count:
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.replication_count / elapsed
            self.logger.info(
                _("%(replicated)d/%(total)d (%(percentage).2f%%)"
                  " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                  "%(remaining)s remaining)"), {
                      'replicated':
                      self.replication_count,
                      'total':
                      self.job_count,
                      'percentage':
                      self.replication_count * 100.0 / self.job_count,
                      'time':
                      time.time() - self.start,
                      'rate':
                      rate,
                      'remaining':
                      '%d%s' % compute_eta(self.start, self.replication_count,
                                           self.job_count)
                  })
            if self.suffix_count:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"), {
                          'checked': self.suffix_count,
                          'hashed':
                          (self.suffix_hash * 100.0) / self.suffix_count,
                          'synced':
                          (self.suffix_sync * 100.0) / self.suffix_count
                      })
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"), {
                          'max': self.partition_times[-1],
                          'min': self.partition_times[0],
                          'med':
                          self.partition_times[len(self.partition_times) // 2]
                      })
        else:
            self.logger.info(_("Nothing replicated for %s seconds."),
                             (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the replicator finishes its replication
        pass in some eventuality.
        """
        while True:
            eventlet.sleep(self.lockup_timeout)
            if self.replication_count == self.last_replication_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_replication_count = self.replication_count

    def process_repl(self, policy, jobs, ips):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        obj_ring = self.get_object_ring(policy.idx)
        data_dir = get_data_dir(policy.idx)
        for local_dev in [
                dev for dev in obj_ring.devs
                if dev and dev['replication_ip'] in ips
                and dev['replication_port'] == self.port
        ]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, 'tmp')
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                try:
                    job_path = join(obj_path, partition)
                    if isfile(job_path):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning(
                            'Removing partition directory '
                            'which was a file: %s', job_path)
                        os.remove(job_path)
                        continue
                    part_nodes = obj_ring.get_part_nodes(int(partition))
                    nodes = [
                        node for node in part_nodes
                        if node['id'] != local_dev['id']
                    ]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy_idx=policy.idx,
                             partition=partition,
                             object_ring=obj_ring))

                except (ValueError, OSError):
                    continue

    def collect_jobs(self):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be rsynced.
        """
        jobs = []
        ips = whataremyips()
        for policy in POLICIES:
            if policy.policy_type == 'replication':
                self.process_repl(policy, jobs, ips)
            # add else conditions here for future policy types
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff parts to the front of the list
            jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs

    def replicate(self, override_devices=None, override_partitions=None):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.partition_times = []

        if override_devices is None:
            override_devices = []
        if override_partitions is None:
            override_partitions = []

        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle

        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs()
            for job in jobs:
                if override_devices and job['device'] not in override_devices:
                    continue
                if override_partitions and \
                        job['partition'] not in override_partitions:
                    continue
                dev_path = join(self.devices_dir, job['device'])
                if self.mount_check and not ismount(dev_path):
                    self.logger.warn(_('%s is not mounted'), job['device'])
                    continue
                if not self.check_ring(job['object_ring']):
                    self.logger.info(
                        _("Ring change detected. Aborting "
                          "current replication pass."))
                    return
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()

    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object replicator in script mode."))
        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = list_from_csv(kwargs.get('partitions'))
        self.replicate(override_devices=override_devices,
                       override_partitions=override_partitions)
        total = (time.time() - start) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            dump_recon_cache(
                {
                    'object_replication_time': total,
                    'object_replication_last': time.time()
                }, self.rcache, self.logger)

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object replicator in daemon mode."))
        # Run the replicator continually
        while True:
            start = time.time()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            self.replicate()
            total = (time.time() - start) / 60
            self.logger.info(_("Object replication complete. (%.02f minutes)"),
                             total)
            dump_recon_cache(
                {
                    'object_replication_time': total,
                    'object_replication_last': time.time()
                }, self.rcache, self.logger)
            self.logger.debug('Replication sleeping for %s seconds.',
                              self.run_pause)
            sleep(self.run_pause)
Ejemplo n.º 32
0
    def test_connection(self):
        """
        conn = Connection(
            auth_endpoint="https://identity.api.rackspacecloud.com/v2.0",
            client_id=str(uuid.uuid4()),
            endpoint="http://localhost:8888/v1/12345",
            user="", key="")

        """

        conn = Connection(
            auth_endpoint="https://identity.api.rackspacecloud.com/v2.0",
            client_id=str(uuid.uuid4()),
            endpoint="http://166.78.143.130/v1/12345",
            user="",
            key="")

        conn.connect(token='blah')

        def create_worker(queue_name):
            return conn.create_queue(queue_name)

        def post_worker(queue):
            return queue.post_message('test_message', 10)

        def delete_worker(queue_name):
            conn.delete_queue(queue_name)
            return queue_name

        pool = GreenPool(100)

        def on_message_posted(greenthread):
            msg = greenthread.wait()
            print msg._href

        def on_queue_created(greenthread):
            queue = greenthread.wait()
            print queue.name

            for x in range(0, 10):
                gt = pool.spawn(post_worker, queue)
                gt.link(on_message_posted)

        queue_names = ["queue-" + str(x) for x in xrange(0, 5)]

        for queue_name in queue_names:
            gt = pool.spawn(create_worker, queue_name)
            gt.link(on_queue_created)

        pool.waitall()

        def delete_worker(queue_name):
            conn.delete_queue(queue_name)
            print "Queue:", queue_name, " deleted"

        for queue in conn.get_queues():
            gt = pool.spawn_n(delete_worker, queue.name)

        print "Waiting for everything to finish"
        pool.waitall()
        print "Done"
Ejemplo n.º 33
0
            products_updated.append(id)
        else:
            products_errored.append(id)

    if not async:
        for p in products:
            if api_call(p['cart_prod_id'], p['qty']) == 1:
                products_updated.append(p['cart_prod_id'])
            else:
                products_errored.append(p['cart_prod_id'])
        return products_updated, products_errored, retry_count

    pool = GreenPool(max_connections)
    monkey_patch()
    for p in products:
        pool.spawn(api_call, p['cart_prod_id'], p['qty']).link(callback, p['cart_prod_id'])
    pool.waitall()

    if retry and len(products_errored) > 0:
        product_dict = dict(zip([x['cart_prod_id'] for x in products], [x['qty'] for x in products]))
        while (len(products_errored) > 0) and (retry_count < len(products)*2): # @todo tweak the retry count
            for index, product_id in enumerate(products_errored):
                qty = product_dict[product_id]
                products_errored.pop(index)
                retry_count += 1
                pool.spawn(api_call, product_id, qty).link(callback, product_id)
            pool.waitall()

    return products_updated, products_errored, retry_count

if __name__ == '__main__':
Ejemplo n.º 34
0
            db_conns = []
            for i in range(num_locks):
                db_conn = connect(db_files[i])
                db_conn.execute('begin exclusive transaction')
                db_conns.append(db_conn)
            if catch_503:
                exc = None
                try:
                    client.delete_container(self.url, self.token, container)
                except client.ClientException, err:
                    exc = err
                self.assertEquals(exc.http_status, 503)
            else:
                client.delete_container(self.url, self.token, container)

        pool = GreenPool()
        try:
            with Timeout(15):
                pool.spawn(run_test, 1, False)
                pool.spawn(run_test, 2, True)
                pool.spawn(run_test, 3, True)
                pool.waitall()
        except Timeout, err:
            raise Exception(
                "The server did not return a 503 on container db locks, "
                "it just hangs: %s" % err)


if __name__ == '__main__':
    main()
Ejemplo n.º 35
0
'''


class CountThread(threading.Thread):
    def run(self):
        count()


print "running count() as two threads"
c1 = CountThread()
c2 = CountThread()
start_time = datetime.datetime.now()
c1.start()
c2.start()
c1.join()
c2.join()
end_time = datetime.datetime.now()
print end_time - start_time
'''
running count() in a green threading manner
'''
print "running count() as two green threads"

start_time = datetime.datetime.now()

pool = GreenPool()
pool.spawn(count())
pool.spawn(count())

end_time = datetime.datetime.now()
print end_time - start_time
Ejemplo n.º 36
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """

    def __init__(self, conf, logger=None):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = logger or get_logger(conf, log_route="object-replicator")
        self.devices_dir = conf.get("devices", "/srv/node")
        self.mount_check = config_true_value(conf.get("mount_check", "true"))
        self.swift_dir = conf.get("swift_dir", "/etc/swift")
        self.bind_ip = conf.get("bind_ip", "0.0.0.0")
        self.servers_per_port = int(conf.get("servers_per_port", "0") or 0)
        self.port = None if self.servers_per_port else int(conf.get("bind_port", 6000))
        self.concurrency = int(conf.get("concurrency", 1))
        self.stats_interval = int(conf.get("stats_interval", "300"))
        self.ring_check_interval = int(conf.get("ring_check_interval", 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get("reclaim_age", 86400 * 7))
        self.partition_times = []
        self.interval = int(conf.get("interval") or conf.get("run_pause") or 30)
        self.rsync_timeout = int(conf.get("rsync_timeout", 900))
        self.rsync_io_timeout = conf.get("rsync_io_timeout", "30")
        self.rsync_bwlimit = conf.get("rsync_bwlimit", "0")
        self.rsync_compress = config_true_value(conf.get("rsync_compress", "no"))
        self.rsync_module = conf.get("rsync_module", "").rstrip("/")
        if not self.rsync_module:
            self.rsync_module = "{replication_ip}::object"
            if config_true_value(conf.get("vm_test_mode", "no")):
                self.logger.warning(
                    "Option object-replicator/vm_test_mode "
                    "is deprecated and will be removed in a "
                    "future version. Update your "
                    "configuration to use option "
                    "object-replicator/rsync_module."
                )
                self.rsync_module += "{replication_port}"
        self.http_timeout = int(conf.get("http_timeout", 60))
        self.lockup_timeout = int(conf.get("lockup_timeout", 1800))
        self.recon_cache_path = conf.get("recon_cache_path", "/var/cache/swift")
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")
        self.conn_timeout = float(conf.get("conn_timeout", 0.5))
        self.node_timeout = float(conf.get("node_timeout", 10))
        self.sync_method = getattr(self, conf.get("sync_method") or "rsync")
        self.network_chunk_size = int(conf.get("network_chunk_size", 65536))
        self.default_headers = {"Content-Length": "0", "user-agent": "object-replicator %s" % os.getpid()}
        self.rsync_error_log_line_length = int(conf.get("rsync_error_log_line_length", 0))
        self.handoffs_first = config_true_value(conf.get("handoffs_first", False))
        self.handoff_delete = config_auto_int_value(conf.get("handoff_delete", "auto"), 0)
        if any((self.handoff_delete, self.handoffs_first)):
            self.logger.warning(
                "Handoff only mode is not intended for normal "
                "operation, please disable handoffs_first and "
                "handoff_delete before the next "
                "normal rebalance"
            )
        self._diskfile_mgr = DiskFileManager(conf, self.logger)

    def _zero_stats(self):
        """Zero out the stats."""
        self.stats = {
            "attempted": 0,
            "success": 0,
            "failure": 0,
            "hashmatch": 0,
            "rsync": 0,
            "remove": 0,
            "start": time.time(),
            "failure_nodes": {},
        }

    def _add_failure_stats(self, failure_devs_info):
        for node, dev in failure_devs_info:
            self.stats["failure"] += 1
            failure_devs = self.stats["failure_nodes"].setdefault(node, {})
            failure_devs.setdefault(dev, 0)
            failure_devs[dev] += 1

    def _get_my_replication_ips(self):
        my_replication_ips = set()
        ips = whataremyips()
        for policy in POLICIES:
            self.load_object_ring(policy)
            for local_dev in [
                dev
                for dev in policy.object_ring.devs
                if dev and dev["replication_ip"] in ips and dev["replication_port"] == self.port
            ]:
                my_replication_ips.add(local_dev["replication_ip"])
        return list(my_replication_ips)

    # Just exists for doc anchor point
    def sync(self, node, job, suffixes, *args, **kwargs):
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean and dictionary, boolean indicating success or failure
        """
        return self.sync_method(node, job, suffixes, *args, **kwargs)

    def load_object_ring(self, policy):
        """
        Make sure the policy's rings are loaded.

        :param policy: the StoragePolicy instance
        :returns: appropriate ring object
        """
        policy.load_ring(self.swift_dir)
        return policy.object_ring

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        ret_val = None
        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(_("Killing long-running rsync: %s"), str(args))
            proc.kill()
            return 1  # failure response code
        total_time = time.time() - start_time
        for result in results.split("\n"):
            if result == "":
                continue
            if result.startswith("cd+"):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            error_line = _("Bad rsync return code: %(ret)d <- %(args)s") % {"args": str(args), "ret": ret_val}
            if self.rsync_error_log_line_length:
                error_line = error_line[: self.rsync_error_log_line_length]
            self.logger.error(error_line)
        elif results:
            self.logger.info(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {"src": args[-2], "dst": args[-1], "time": total_time},
            )
        else:
            self.logger.debug(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {"src": args[-2], "dst": args[-1], "time": total_time},
            )
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Uses rsync to implement the sync method. This was the first
        sync method in Swift.
        """
        if not os.path.exists(job["path"]):
            return False, {}
        args = [
            "rsync",
            "--recursive",
            "--whole-file",
            "--human-readable",
            "--xattrs",
            "--itemize-changes",
            "--ignore-existing",
            "--timeout=%s" % self.rsync_io_timeout,
            "--contimeout=%s" % self.rsync_io_timeout,
            "--bwlimit=%s" % self.rsync_bwlimit,
        ]
        if self.rsync_compress and job["region"] != node["region"]:
            # Allow for compression, but only if the remote node is in
            # a different region than the local one.
            args.append("--compress")
        rsync_module = rsync_module_interpolation(self.rsync_module, node)
        had_any = False
        for suffix in suffixes:
            spath = join(job["path"], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False, {}
        data_dir = get_data_dir(job["policy"])
        args.append(join(rsync_module, node["device"], data_dir, job["partition"]))
        return self._rsync(args) == 0, {}

    def ssync(self, node, job, suffixes, remote_check_objs=None):
        return ssync_sender.Sender(self, node, job, suffixes, remote_check_objs)()

    def check_ring(self, object_ring):
        """
        Check to see if the ring has been updated
        :param object_ring: the ring to check

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """

        def tpool_get_suffixes(path):
            return [suff for suff in os.listdir(path) if len(suff) == 3 and isdir(join(path, suff))]

        self.replication_count += 1
        self.logger.increment("partition.delete.count.%s" % (job["device"],))
        headers = dict(self.default_headers)
        headers["X-Backend-Storage-Policy-Index"] = int(job["policy"])
        failure_devs_info = set()
        begin = time.time()
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job["path"])
            synced_remote_regions = {}
            delete_objs = None
            if suffixes:
                for node in job["nodes"]:
                    self.stats["rsync"] += 1
                    kwargs = {}
                    if node["region"] in synced_remote_regions and self.conf.get("sync_method", "rsync") == "ssync":
                        kwargs["remote_check_objs"] = synced_remote_regions[node["region"]]
                    # candidates is a dict(hash=>timestamp) of objects
                    # for deletion
                    success, candidates = self.sync(node, job, suffixes, **kwargs)
                    if success:
                        with Timeout(self.http_timeout):
                            conn = http_connect(
                                node["replication_ip"],
                                node["replication_port"],
                                node["device"],
                                job["partition"],
                                "REPLICATE",
                                "/" + "-".join(suffixes),
                                headers=headers,
                            )
                            conn.getresponse().read()
                        if node["region"] != job["region"]:
                            synced_remote_regions[node["region"]] = viewkeys(candidates)
                    else:
                        failure_devs_info.add((node["replication_ip"], node["device"]))
                    responses.append(success)
                for region, cand_objs in synced_remote_regions.items():
                    if delete_objs is None:
                        delete_objs = cand_objs
                    else:
                        delete_objs = delete_objs & cand_objs

            if self.handoff_delete:
                # delete handoff if we have had handoff_delete successes
                delete_handoff = len([resp for resp in responses if resp]) >= self.handoff_delete
            else:
                # delete handoff if all syncs were successful
                delete_handoff = len(responses) == len(job["nodes"]) and all(responses)
            if delete_handoff:
                self.stats["remove"] += 1
                if self.conf.get("sync_method", "rsync") == "ssync" and delete_objs is not None:
                    self.logger.info(_("Removing %s objects"), len(delete_objs))
                    _junk, error_paths = self.delete_handoff_objs(job, delete_objs)
                    # if replication works for a hand-off device and it failed,
                    # the remote devices which are target of the replication
                    # from the hand-off device will be marked. Because cleanup
                    # after replication failed means replicator needs to
                    # replicate again with the same info.
                    if error_paths:
                        failure_devs_info.update(
                            [(failure_dev["replication_ip"], failure_dev["device"]) for failure_dev in job["nodes"]]
                        )
                else:
                    self.delete_partition(job["path"])
            elif not suffixes:
                self.delete_partition(job["path"])
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
        finally:
            target_devs_info = set(
                [(target_dev["replication_ip"], target_dev["device"]) for target_dev in job["nodes"]]
            )
            self.stats["success"] += len(target_devs_info - failure_devs_info)
            self._add_failure_stats(failure_devs_info)
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since("partition.delete.timing", begin)

    def delete_partition(self, path):
        self.logger.info(_("Removing partition: %s"), path)
        tpool.execute(shutil.rmtree, path)

    def delete_handoff_objs(self, job, delete_objs):
        success_paths = []
        error_paths = []
        for object_hash in delete_objs:
            object_path = storage_directory(job["obj_path"], job["partition"], object_hash)
            tpool.execute(shutil.rmtree, object_path, ignore_errors=True)
            suffix_dir = dirname(object_path)
            try:
                os.rmdir(suffix_dir)
                success_paths.append(object_path)
            except OSError as e:
                if e.errno not in (errno.ENOENT, errno.ENOTEMPTY):
                    error_paths.append(object_path)
                    self.logger.exception("Unexpected error trying to cleanup suffix dir:%r", suffix_dir)
        return success_paths, error_paths

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment("partition.update.count.%s" % (job["device"],))
        headers = dict(self.default_headers)
        headers["X-Backend-Storage-Policy-Index"] = int(job["policy"])
        target_devs_info = set()
        failure_devs_info = set()
        begin = time.time()
        try:
            hashed, local_hash = tpool_reraise(
                self._diskfile_mgr._get_hashes,
                job["path"],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age,
            )
            self.suffix_hash += hashed
            self.logger.update_stats("suffix.hashes", hashed)
            attempts_left = len(job["nodes"])
            synced_remote_regions = set()
            random.shuffle(job["nodes"])
            nodes = itertools.chain(job["nodes"], job["policy"].object_ring.get_more_nodes(int(job["partition"])))
            while attempts_left > 0:
                # If this throws StopIteration it will be caught way below
                node = next(nodes)
                target_devs_info.add((node["replication_ip"], node["device"]))
                attempts_left -= 1
                # if we have already synced to this remote region,
                # don't sync again on this replication pass
                if node["region"] in synced_remote_regions:
                    continue
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node["replication_ip"],
                            node["replication_port"],
                            node["device"],
                            job["partition"],
                            "REPLICATE",
                            "",
                            headers=headers,
                        ).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(_("%(ip)s/%(device)s responded" " as unmounted"), node)
                            attempts_left += 1
                            failure_devs_info.add((node["replication_ip"], node["device"]))
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s " "from %(ip)s"),
                                {"resp": resp.status, "ip": node["replication_ip"]},
                            )
                            failure_devs_info.add((node["replication_ip"], node["device"]))
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)]
                    if not suffixes:
                        self.stats["hashmatch"] += 1
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        self._diskfile_mgr._get_hashes, job["path"], recalculate=suffixes, reclaim_age=self.reclaim_age
                    )
                    self.logger.update_stats("suffix.hashes", hashed)
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)]
                    self.stats["rsync"] += 1
                    success, _junk = self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(
                            node["replication_ip"],
                            node["replication_port"],
                            node["device"],
                            job["partition"],
                            "REPLICATE",
                            "/" + "-".join(suffixes),
                            headers=headers,
                        )
                        conn.getresponse().read()
                    if not success:
                        failure_devs_info.add((node["replication_ip"], node["device"]))
                    # add only remote region when replicate succeeded
                    if success and node["region"] != job["region"]:
                        synced_remote_regions.add(node["region"])
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats("suffix.syncs", len(suffixes))
                except (Exception, Timeout):
                    failure_devs_info.add((node["replication_ip"], node["device"]))
                    self.logger.exception(_("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            failure_devs_info.update(target_devs_info)
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.stats["success"] += len(target_devs_info - failure_devs_info)
            self._add_failure_stats(failure_devs_info)
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since("partition.update.timing", begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        if self.replication_count:
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.replication_count / elapsed
            self.logger.info(
                _(
                    "%(replicated)d/%(total)d (%(percentage).2f%%)"
                    " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                    "%(remaining)s remaining)"
                ),
                {
                    "replicated": self.replication_count,
                    "total": self.job_count,
                    "percentage": self.replication_count * 100.0 / self.job_count,
                    "time": time.time() - self.start,
                    "rate": rate,
                    "remaining": "%d%s" % compute_eta(self.start, self.replication_count, self.job_count),
                },
            )
            if self.suffix_count:
                self.logger.info(
                    _("%(checked)d suffixes checked - " "%(hashed).2f%% hashed, %(synced).2f%% synced"),
                    {
                        "checked": self.suffix_count,
                        "hashed": (self.suffix_hash * 100.0) / self.suffix_count,
                        "synced": (self.suffix_sync * 100.0) / self.suffix_count,
                    },
                )
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, " "min %(min).4fs, med %(med).4fs"),
                    {
                        "max": self.partition_times[-1],
                        "min": self.partition_times[0],
                        "med": self.partition_times[len(self.partition_times) // 2],
                    },
                )
        else:
            self.logger.info(_("Nothing replicated for %s seconds."), (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the replicator finishes its replication
        pass in some eventuality.
        """
        while True:
            eventlet.sleep(self.lockup_timeout)
            if self.replication_count == self.last_replication_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_replication_count = self.replication_count

    def build_replication_jobs(self, policy, ips, override_devices=None, override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        self.all_devs_info.update([(dev["replication_ip"], dev["device"]) for dev in policy.object_ring.devs if dev])
        data_dir = get_data_dir(policy)
        found_local = False
        for local_dev in [
            dev
            for dev in policy.object_ring.devs
            if (
                dev
                and is_local_device(ips, self.port, dev["replication_ip"], dev["replication_port"])
                and (override_devices is None or dev["device"] in override_devices)
            )
        ]:
            found_local = True
            dev_path = join(self.devices_dir, local_dev["device"])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self._add_failure_stats(
                    [
                        (failure_dev["replication_ip"], failure_dev["device"])
                        for failure_dev in policy.object_ring.devs
                        if failure_dev
                    ]
                )
                self.logger.warning(_("%s is not mounted"), local_dev["device"])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception("ERROR creating %s" % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if override_partitions is not None and partition not in override_partitions:
                    continue

                part_nodes = None
                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(int(partition))
                    nodes = [node for node in part_nodes if node["id"] != local_dev["id"]]
                    jobs.append(
                        dict(
                            path=job_path,
                            device=local_dev["device"],
                            obj_path=obj_path,
                            nodes=nodes,
                            delete=len(nodes) > len(part_nodes) - 1,
                            policy=policy,
                            partition=partition,
                            region=local_dev["region"],
                        )
                    )
                except ValueError:
                    if part_nodes:
                        self._add_failure_stats(
                            [(failure_dev["replication_ip"], failure_dev["device"]) for failure_dev in nodes]
                        )
                    else:
                        self._add_failure_stats(
                            [
                                (failure_dev["replication_ip"], failure_dev["device"])
                                for failure_dev in policy.object_ring.devs
                                if failure_dev
                            ]
                        )
                    continue
        if not found_local:
            self.logger.error(
                "Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port
            )
        return jobs

    def collect_jobs(self, override_devices=None, override_partitions=None, override_policies=None):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be rsynced.

        :param override_devices: if set, only jobs on these devices
            will be returned
        :param override_partitions: if set, only jobs on these partitions
            will be returned
        :param override_policies: if set, only jobs in these storage
            policies will be returned
        """
        jobs = []
        ips = whataremyips(self.bind_ip)
        for policy in POLICIES:
            if policy.policy_type == REPL_POLICY:
                if override_policies is not None and str(policy.idx) not in override_policies:
                    continue
                # ensure rings are loaded for policy
                self.load_object_ring(policy)
                jobs += self.build_replication_jobs(
                    policy, ips, override_devices=override_devices, override_partitions=override_partitions
                )
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff parts to the front of the list
            jobs.sort(key=lambda job: not job["delete"])
        self.job_count = len(jobs)
        return jobs

    def replicate(self, override_devices=None, override_partitions=None, override_policies=None):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.partition_times = []
        self.my_replication_ips = self._get_my_replication_ips()
        self.all_devs_info = set()

        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle

        current_nodes = None
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs(
                override_devices=override_devices,
                override_partitions=override_partitions,
                override_policies=override_policies,
            )
            for job in jobs:
                current_nodes = job["nodes"]
                if override_devices and job["device"] not in override_devices:
                    continue
                if override_partitions and job["partition"] not in override_partitions:
                    continue
                dev_path = join(self.devices_dir, job["device"])
                if self.mount_check and not ismount(dev_path):
                    self._add_failure_stats(
                        [(failure_dev["replication_ip"], failure_dev["device"]) for failure_dev in job["nodes"]]
                    )
                    self.logger.warning(_("%s is not mounted"), job["device"])
                    continue
                if not self.check_ring(job["policy"].object_ring):
                    self.logger.info(_("Ring change detected. Aborting " "current replication pass."))
                    return
                try:
                    if isfile(job["path"]):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning("Removing partition directory " "which was a file: %s", job["path"])
                        os.remove(job["path"])
                        continue
                except OSError:
                    continue
                if job["delete"]:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            current_nodes = None
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            if current_nodes:
                self._add_failure_stats(
                    [(failure_dev["replication_ip"], failure_dev["device"]) for failure_dev in current_nodes]
                )
            else:
                self._add_failure_stats(self.all_devs_info)
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()
            self.stats["attempted"] = self.replication_count

    def run_once(self, *args, **kwargs):
        self._zero_stats()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get("devices"))
        override_partitions = list_from_csv(kwargs.get("partitions"))
        override_policies = list_from_csv(kwargs.get("policies"))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None

        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions,
            override_policies=override_policies,
        )
        total = (time.time() - self.stats["start"]) / 60
        self.logger.info(_("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            replication_last = time.time()
            dump_recon_cache(
                {
                    "replication_stats": self.stats,
                    "replication_time": total,
                    "replication_last": replication_last,
                    "object_replication_time": total,
                    "object_replication_last": replication_last,
                },
                self.rcache,
                self.logger,
            )

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object replicator in daemon mode."))
        # Run the replicator continually
        while True:
            self._zero_stats()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            self.replicate()
            total = (time.time() - self.stats["start"]) / 60
            self.logger.info(_("Object replication complete. (%.02f minutes)"), total)
            replication_last = time.time()
            dump_recon_cache(
                {
                    "replication_stats": self.stats,
                    "replication_time": total,
                    "replication_last": replication_last,
                    "object_replication_time": total,
                    "object_replication_last": replication_last,
                },
                self.rcache,
                self.logger,
            )
            self.logger.debug("Replication sleeping for %s seconds.", self.interval)
            sleep(self.interval)
Ejemplo n.º 37
0
    def run(self, *args, **kwargs):
        try:
            self.logger.info('event agent: starting')

            pool = GreenPool(len(self.workers))

            for worker in self.workers:
                pool.spawn(worker.start)

            def front(server, backend):
                while True:
                    msg = server.recv_multipart()
                    if validate_msg(msg):
                        try:
                            event_id = msg[2]
                            data = msg[3]
                            self.queue.put(event_id, data)
                            event = ['', msg[2], msg[3]]
                            backend.send_multipart(event)
                        except Exception:
                            pass
                        finally:
                            ack = msg[0:3]
                            server.send_multipart(ack)

            def back(backend):
                while True:
                    msg = backend.recv_multipart()
                    event_id = msg[1]
                    success = msg[2]
                    if not success:
                        self.queue.failed(event_id)
                        self.logger.warn('event %s moved to failed',
                                         binascii.hexlify(event_id))
                    else:
                        self.queue.delete(event_id)
                        self.logger.debug('event %s removed from queue',
                                          binascii.hexlify(event_id))

            boss_pool = GreenPool(2)
            boss_pool.spawn_n(front, self.server, self.backend)
            boss_pool.spawn_n(back, self.backend)
            while True:

                results = self.queue.load(self.batch_size)

                for event in results:
                    event_id, data = event
                    msg = ['', event_id, str(data)]
                    self.backend.send_multipart(msg)
                    self.retries_run_time = ratelimit(
                        self.retries_run_time, self.max_retries_per_second)

                for w in self.workers:
                    if w.failed:
                        self.workers.remove(w)
                        self.logger.warn('restart worker "%s"', w.name)
                        new_w = EventWorker(self.conf, w.name, self.context)
                        self.workers.append(new_w)
                        pool.spawn(new_w.start)

                sleep(SLEEP_TIME)

        except Exception as e:
            self.logger.error('ERROR in main loop %s', e)
            raise
        finally:
            self.logger.warn('event agent: stopping')
            self.stop_workers()

            self.context.destroy(linger=True)
            self.context = None
Ejemplo n.º 38
0
class ObjectMover(Daemon):
    def __init__(self, conf):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """

        self.conf = conf
        self.logger = get_logger(conf, log_route='object-mover')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.vm_test_mode = config_true_value(conf.get('vm_test_mode', 'no'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.bind_ip = conf.get('bind_ip', '0.0.0.0')
        self.servers_per_port = int(conf.get('servers_per_port', '0') or 0)
        self.port = None if self.servers_per_port else \
            int(conf.get('bind_port', 6000))
        self.concurrency = int(conf.get('concurrency', 1))
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))

        self.handoffs_first = config_true_value(conf.get('handoffs_first',
                                                         False))

        self.data_moving_map_dump = (conf.get('data_moving_map_dump')
                                     or DEFAULT_DUMP_FILE)

        self._diskfile_mgr = DiskFileManager(conf, self.logger)

        self.mover_tmp_dir = (conf.get('mover_tmp_dir') or 'data_mover')
        self.retries = int(conf.get('retries', 3))
        self.test = bool(conf.get('test', False))

        self.retrie_list = []

    def create_remote_directory(self, job):
        """
        Creates a temporal directory, at remote server.

        :param job: information about the partition being synced

        """
        node = job['node']

        args = ["ssh", rsync_ip(node['replication_ip']),
                "mkdir", "-p", job['remote_path']]

        if not self.test:
            proc = subprocess.Popen(args,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.STDOUT)

            results = proc.stdout.read()
            ret_val = proc.wait()

            #TODO: ret_val check
            (results, ret_val)

        else:
            print " ".join(args)

    #TODO: same as replicator load_object_ring
    def load_object_ring(self, policy):
        """
        Make sure the policy's rings are loaded.

        :param policy: the StoragePolicy instance
        :returns: appropriate ring object
        """

        policy.load_ring(self.swift_dir)
        return policy.object_ring

    #TODO: check if _rsync from replicator will be used instead
    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """

        start_time = time.time()
        ret_val = None

        proc = subprocess.Popen(args,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT)

        results = proc.stdout.read()
        ret_val = proc.wait()

        total_time = time.time() - start_time
        for result in results.split('\n'):
            if result == '':
                continue
            if result.startswith('cd+'):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            error_line = 'Bad rsync return code: %(ret)d <- %(args)s' % \
                {'args': str(args), 'ret': ret_val}
            if self.rsync_error_log_line_length:
                error_line = error_line[:self.rsync_error_log_line_length]
            self.logger.error(error_line)
        elif results:
            self.logger.info(
                "Successful rsync of %(src)s at %(dst)s (%(time).03f)",
                {'src': args[-2], 'dst': args[-1], 'time': total_time})
        else:
            self.logger.debug(
                "Successful rsync of %(src)s at %(dst)s (%(time).03f)",
                {'src': args[-2], 'dst': args[-1], 'time': total_time})

        return ret_val

    def rsync(self, job):
        """
        Uses rsync to implement the sync method. This was the first
        sync method in Swift.
        """

        if not os.path.exists(job['path']):
            if self.test:
                print "Error: the path %s does not exists" % job['path']
            return False, {}

        args = [
            'rsync',
            '-a',
            '--whole-file',
            '--human-readable',
            '--xattrs',
            '--ignore-existing',
        ]

        node = job['node']
        node_ip = rsync_ip(node['replication_ip'])
        rsync_module = '%s:%s' % (node_ip, job['remote_path'])

        args.append(job['path'])
        args.append(rsync_module)

        if not self.test:
            return self._rsync(args) == 0, {}
        else:
            print " ".join(args)
            return True, {}

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """

        self.logger.increment('partition.update.count.%s' % (job['device'],))

        begin = time.time()
        try:

            self.create_remote_directory(job)
            success, _junk = self.rsync(job)
            if not success:
                self.retrie_list.append(job)

        except (Exception, Timeout):
            self.logger.exception("Error syncing partition")
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)

#TODO: same as replicator kill coros
    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def build_replication_jobs(self, policy, ips, old_dict,
                               new_dict, moving_map):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy

        :param policy: swift policy object
        :param ips: the local server ips
        :param old_dict: dictionary with devices from old ring
        :param new_dict: dictionary with devices from new ring
        :param moving_map: the dictionary that contains all the partitions
            that should be moved, their sources and destinations
        """

        jobs = []
        data_dir = get_data_dir(policy)
        devices = Set(map(lambda x: x[1], moving_map.values()))
        partitions = Set(map(lambda x: x[0], moving_map.values()))

        for local_dev in [dev for dev in policy.object_ring.devs
                          if (dev
                              and is_local_device(ips,
                                                  self.port,
                                                  dev['replication_ip'],
                                                  dev['replication_port'])
                              )]:

            if self.test:
                print local_dev['id']

            if unicode(local_dev['id']) not in devices:
                continue

            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self.logger.warn('%s is not mounted' % local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)

            for partition in os.listdir(obj_path):
                partition = unicode(partition)

                if (partition not in partitions):
                    continue

                try:

                    key = "%s_%s" % (local_dev['id'], partition)
                    if key not in moving_map:
                        continue

                    job_path = join(obj_path, partition)

                    _, source_id, dest_id = moving_map[key]

                    if source_id != unicode(local_dev['id']):
                        continue

                    node = {}
                    replication_ip, replication_device = new_dict[dest_id]
                    node['replication_ip'] = replication_ip
                    node['device'] = replication_device

                    remote_path = os.path.join(self.devices_dir,
                                               node['device'],
                                               self.mover_tmp_dir)

                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             node=node,
                             policy=policy,
                             partition=partition,
                             remote_path=remote_path))

                except ValueError:
                    continue
                except Exception as e:
                    self.logger.exception(
                        "an %s exception accure at build_replication_jobs" % e)
                    if self.test:
                        print e
        return jobs

    def collect_jobs(self, old_dict, new_dict, moving_map):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be rsynced.

        :param old_dict: dictionary with devices from old ring
        :param new_dict: dictionary with devices from new ring
        :param moving_map: the dictionary that contains all the partitions
            that should be moved, their sources and destinations
        """

        jobs = []
        ips = whataremyips(self.bind_ip)

        for policy in POLICIES:
            if policy.policy_type == REPL_POLICY:
                # ensure rings are loaded for policy
                self.load_object_ring(policy)
                jobs += self.build_replication_jobs(
                    policy, ips, old_dict, new_dict, moving_map)
        random.shuffle(jobs)

        return jobs

    def move(self, old_dict, new_dict, moving_map):
        """Run a move pass.

        :param old_dict: dictionary with devices from old ring
        :param new_dict: dictionary with devices from new ring
        :param moving_map: the dictionary that contains all the partitions
            that should be moved, their sources and destinations
        """

        self.start = time.time()
        self.replication_count = 0
        self.last_replication_count = -1
        self.partition_times = []

        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs(old_dict, new_dict, moving_map)
            for job in jobs:
                dev_path = join(self.devices_dir, job['device'])
                if self.mount_check and not ismount(dev_path):
                    self.logger.warn('%s is not mounted' % job['device'])
                    continue

                try:
                    if isfile(job['path']):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning(
                            'Removing partition directory '
                            'which was a file: %s', job['path'])
                        os.remove(job['path'])
                        continue
                except OSError:
                    continue

                self.run_pool.spawn(self.update, job)

            self.run_pool.waitall()

        except (Exception, Timeout) as e:
            self.kill_coros()
            self.logger.exception(
                "Exception in top-level partition move loop %s" % e)
            if self.test:
                print e

    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info("Running object mover in script mode.")

        old_dict, new_dict, moving_map =\
            load_moving_map(self.data_moving_map_dump)

        self.move(old_dict, new_dict, moving_map)

        trie = 0
        while trie < self.retries:
            if len(self.retrie_list) == 0:
                break
            current_retrie_list = self.retrie_list
            self.retrie_list = []

            for job in current_retrie_list:
                self.update(job)

            trie += 1

        total = (time.time() - start) / 60
        self.logger.info(
            "Object move complete (once). "
            "(%.02f minutes), %s partition movement failed"
            % (total, len(self.retrie_list)))
Ejemplo n.º 39
0
            db_conns = []
            for i in range(num_locks):
                db_conn = connect(db_files[i])
                db_conn.execute('begin exclusive transaction')
                db_conns.append(db_conn)
            if catch_503:
                exc = None
                try:
                    client.delete_container(self.url, self.token, container)
                except client.ClientException, err:
                    exc = err
                self.assertEquals(exc.http_status, 503)
            else:
                client.delete_container(self.url, self.token, container)

        pool = GreenPool()
        try:
            with Timeout(15):
                pool.spawn(run_test, 1, False)
                pool.spawn(run_test, 2, True)
                pool.spawn(run_test, 3, True)
                pool.waitall()
        except Timeout, err:
            raise Exception(
                "The server did not return a 503 on container db locks, "
                "it just hangs: %s" % err)


if __name__ == '__main__':
    main()
Ejemplo n.º 40
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """

    def __init__(self, conf):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = get_logger(conf, log_route='object-replicator')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = conf.get('mount_check', 'true').lower() in \
                              ('true', 't', '1', 'on', 'yes', 'y')
        self.vm_test_mode = conf.get(
                'vm_test_mode', 'no').lower() in ('yes', 'true', 'on', '1')
        self.chase_dir = conf.get('chase_dir', '/etc/chase')
        self.port = int(conf.get('bind_port', 6000))
        self.concurrency = int(conf.get('concurrency', 1))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.object_ring = Ring(join(self.chase_dir, 'object.ring.gz'))
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
        self.partition_times = []
        self.run_pause = int(conf.get('run_pause', 30))
        self.rsync_timeout = int(conf.get('rsync_timeout', 900))
        self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
        self.recon_enable = conf.get(
                'recon_enable', 'no').lower() in TRUE_VALUES
        self.recon_cache_path = conf.get(
                'recon_cache_path', '/var/cache/chase')
        self.recon_object = os.path.join(self.recon_cache_path, "object.recon")

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        ret_val = None
        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args, stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(_("Killing long-running rsync: %s"), str(args))
            proc.kill()
            return 1  # failure response code
        total_time = time.time() - start_time
        for result in results.split('\n'):
            if result == '':
                continue
            if result.startswith('cd+'):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            self.logger.error(_('Bad rsync return code: %(args)s -> %(ret)d'),
                    {'args': str(args), 'ret': ret_val})
        elif results:
            self.logger.info(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {'src': args[-2], 'dst': args[-1], 'time': total_time})
        else:
            self.logger.debug(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {'src': args[-2], 'dst': args[-1], 'time': total_time})
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean indicating success or failure
        """
        if not os.path.exists(job['path']):
            return False
        args = [
            'rsync',
            '--recursive',
            '--whole-file',
            '--human-readable',
            '--xattrs',
            '--itemize-changes',
            '--ignore-existing',
            '--timeout=%s' % self.rsync_io_timeout,
            '--contimeout=%s' % self.rsync_io_timeout,
        ]
        if self.vm_test_mode:
            rsync_module = '[%s]::object%s' % (node['ip'], node['port'])
        else:
            rsync_module = '[%s]::object' % node['ip']
        had_any = False
        for suffix in suffixes:
            spath = join(job['path'], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False
        args.append(join(rsync_module, node['device'],
                    'objects', job['partition']))
        return self._rsync(args) == 0

    def check_ring(self):
        """
        Check to see if the ring has been updated

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if self.object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """

        def tpool_get_suffixes(path):
            return [suff for suff in os.listdir(path)
                    if len(suff) == 3 and isdir(join(path, suff))]
        self.replication_count += 1
        begin = time.time()
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            if suffixes:
                for node in job['nodes']:
                    success = self.rsync(node, job, suffixes)
                    if success:
                        with Timeout(self.http_timeout):
                            http_connect(node['ip'], node['port'],
                                node['device'], job['partition'], 'REPLICATE',
                                '/' + '-'.join(suffixes),
                          headers={'Content-Length': '0'}).getresponse().read()
                    responses.append(success)
            if not suffixes or (len(responses) == \
                        self.object_ring.replica_count and all(responses)):
                self.logger.info(_("Removing partition: %s"), job['path'])
                tpool.execute(shutil.rmtree, job['path'], ignore_errors=True)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
        finally:
            self.partition_times.append(time.time() - begin)

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        begin = time.time()
        try:
            hashed, local_hash = tpool.execute(tpooled_get_hashes, job['path'],
                    do_listdir=(self.replication_count % 10) == 0,
                    reclaim_age=self.reclaim_age)
            # See tpooled_get_hashes "Hack".
            if isinstance(hashed, BaseException):
                raise hashed
            self.suffix_hash += hashed
            attempts_left = self.object_ring.replica_count - 1
            nodes = itertools.chain(job['nodes'],
                        self.object_ring.get_more_nodes(int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                attempts_left -= 1
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(node['ip'], node['port'],
                                node['device'], job['partition'], 'REPLICATE',
                            '', headers={'Content-Length': '0'}).getresponse()
                        if resp.status == 507:
                            self.logger.error(_('%(ip)s/%(device)s responded'
                                    ' as unmounted'), node)
                            attempts_left += 1
                            continue
                        if resp.status != 200:
                            self.logger.error(_("Invalid response %(resp)s "
                                "from %(ip)s"),
                                {'resp': resp.status, 'ip': node['ip']})
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [suffix for suffix in local_hash if
                            local_hash[suffix] != remote_hash.get(suffix, -1)]
                    if not suffixes:
                        continue
                    hashed, recalc_hash = tpool.execute(tpooled_get_hashes,
                        job['path'], recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    # See tpooled_get_hashes "Hack".
                    if isinstance(hashed, BaseException):
                        raise hashed
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if
                            local_hash[suffix] != remote_hash.get(suffix, -1)]
                    self.rsync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['ip'], node['port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '/' + '-'.join(suffixes),
                            headers={'Content-Length': '0'})
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                except (Exception, Timeout):
                    self.logger.exception(_("Error syncing with node: %s") %
                                            node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        if self.replication_count:
            rate = self.replication_count / (time.time() - self.start)
            self.logger.info(_("%(replicated)d/%(total)d (%(percentage).2f%%)"
                " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                "%(remaining)s remaining)"),
                {'replicated': self.replication_count, 'total': self.job_count,
                 'percentage': self.replication_count * 100.0 / self.job_count,
                 'time': time.time() - self.start, 'rate': rate,
                 'remaining': '%d%s' % compute_eta(self.start,
                           self.replication_count, self.job_count)})
            if self.suffix_count:
                self.logger.info(_("%(checked)d suffixes checked - "
                    "%(hashed).2f%% hashed, %(synced).2f%% synced"),
                    {'checked': self.suffix_count,
                     'hashed': (self.suffix_hash * 100.0) / self.suffix_count,
                     'synced': (self.suffix_sync * 100.0) / self.suffix_count})
                self.partition_times.sort()
                self.logger.info(_("Partition times: max %(max).4fs, "
                    "min %(min).4fs, med %(med).4fs"),
                    {'max': self.partition_times[-1],
                     'min': self.partition_times[0],
                     'med': self.partition_times[
                                len(self.partition_times) // 2]})
        else:
            self.logger.info(_("Nothing replicated for %s seconds."),
                (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the replicator finishes its replication
        pass in some eventuality.
        """
        while True:
            eventlet.sleep(self.lockup_timeout)
            if self.replication_count == self.last_replication_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_replication_count = self.replication_count

    def collect_jobs(self):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be rsynced.
        """
        jobs = []
        ips = whataremyips()
        for local_dev in [dev for dev in self.object_ring.devs
                if dev and dev['ip'] in ips and dev['port'] == self.port]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, 'objects')
            tmp_path = join(dev_path, 'tmp')
            if self.mount_check and not os.path.ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                continue
            for partition in os.listdir(obj_path):
                try:
                    nodes = [node for node in
                        self.object_ring.get_part_nodes(int(partition))
                             if node['id'] != local_dev['id']]
                    jobs.append(dict(path=join(obj_path, partition),
                        nodes=nodes,
                        delete=len(nodes) > self.object_ring.replica_count - 1,
                        partition=partition))
                except ValueError:
                    continue
        random.shuffle(jobs)
        # Partititons that need to be deleted take priority
        jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs

    def replicate(self):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.partition_times = []
        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs()
            for job in jobs:
                if not self.check_ring():
                    self.logger.info(_("Ring change detected. Aborting "
                            "current replication pass."))
                    return
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()

    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object replicator in script mode."))
        self.replicate()
        total = (time.time() - start) / 60
        self.logger.info(
            _("Object replication complete. (%.02f minutes)"), total)
        if self.recon_enable:
            try:
                dump_recon_cache('object_replication_time', total, \
                    self.recon_object)
            except (Exception, Timeout):
                self.logger.exception(_('Exception dumping recon cache'))

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object replicator in daemon mode."))
        # Run the replicator continually
        while True:
            start = time.time()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            self.replicate()
            total = (time.time() - start) / 60
            self.logger.info(
                _("Object replication complete. (%.02f minutes)"), total)
            if self.recon_enable:
                try:
                    dump_recon_cache('object_replication_time', total, \
                        self.recon_object)
                except (Exception, Timeout):
                    self.logger.exception(_('Exception dumping recon cache'))
            self.logger.debug(_('Replication sleeping for %s seconds.'),
                self.run_pause)
            sleep(self.run_pause)
Ejemplo n.º 41
0
class Chewie(object):
    SIOCGIFHWADDR = 0x8927
    SIOCGIFINDEX = 0x8933
    PACKET_MR_MULTICAST = 0
    PACKET_MR_PROMISC = 1
    SOL_PACKET = 263
    PACKET_ADD_MEMBERSHIP = 1
    EAP_ADDRESS = MacAddress.from_string("01:80:c2:00:00:03")

    def __init__(self, interface_name, credentials, logger=None, auth_handler=None, group_address=None):
        self.interface_name = interface_name
        self.credentials = credentials
        self.logger = logger
        self.auth_handler = auth_handler
        self.group_address = group_address
        if not group_address:
            self.group_address = self.EAP_ADDRESS

    def run(self):
        self.logger.info("CHEWIE: Starting")
        self.open_socket()
        self.get_interface_info()
        self.build_state_machine()
        self.join_multicast_group()
        self.start_threads_and_wait()

    def start_threads_and_wait(self):
        self.pool = GreenPool()
        self.eventlets = []

        self.eventlets.append(self.pool.spawn(self.send_messages))
        self.eventlets.append(self.pool.spawn(self.receive_messages))

        self.pool.waitall()

    def auth_success(self, src_mac):
        if self.auth_handler:
            self.auth_handler(src_mac, self.group_address)

    def send_messages(self):
        while True:
            sleep(0)
            message = self.state_machine.output_messages.get()
            self.logger.info("CHEWIE: Sending message %s to %s" % (message, str(self.group_address)))
            self.socket.send(MessagePacker.pack(message, self.group_address))

    def receive_messages(self):
        while True:
            sleep(0)
            packed_message = self.socket.recv(4096)
            message = MessageParser.parse(packed_message)
            self.logger.info("CHEWIE: Received message: %s" % message)
            event = EventMessageReceived(message)
            self.state_machine.event(event)

    def open_socket(self):
        self.socket = socket.socket(socket.PF_PACKET, socket.SOCK_RAW, socket.htons(0x888e))
        self.socket.bind((self.interface_name, 0))

    def build_state_machine(self):
        self.state_machine = StateMachine(self.interface_address, self.auth_success)

    def get_interface_info(self):
        self.get_interface_address()
        self.get_interface_index()

    def get_interface_address(self):
        # http://man7.org/linux/man-pages/man7/netdevice.7.html
        ifreq = struct.pack('16sH6s', self.interface_name.encode("utf-8"), 0, b"")
        response = ioctl(self.socket, self.SIOCGIFHWADDR, ifreq)
        _interface_name, _address_family, interface_address = struct.unpack('16sH6s', response)
        self.interface_address = MacAddress(interface_address)

    def get_interface_index(self):
        # http://man7.org/linux/man-pages/man7/netdevice.7.html
        ifreq = struct.pack('16sI', self.interface_name.encode("utf-8"), 0)
        response = ioctl(self.socket, self.SIOCGIFINDEX, ifreq)
        _ifname, self.interface_index = struct.unpack('16sI', response)

    def join_multicast_group(self):
        # TODO this works but should blank out the end bytes
        mreq = struct.pack("IHH8s", self.interface_index, self.PACKET_MR_PROMISC, len(self.EAP_ADDRESS.address), self.EAP_ADDRESS.address)
        self.socket.setsockopt(self.SOL_PACKET, self.PACKET_ADD_MEMBERSHIP, mreq)
Ejemplo n.º 42
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """

    def __init__(self, conf):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = get_logger(conf, log_route="object-replicator")
        self.devices_dir = conf.get("devices", "/srv/node")
        self.mount_check = conf.get("mount_check", "true").lower() in ("true", "t", "1", "on", "yes", "y")
        self.vm_test_mode = conf.get("vm_test_mode", "no").lower() in ("yes", "true", "on", "1")
        self.swift_dir = conf.get("swift_dir", "/etc/swift")
        self.port = int(conf.get("bind_port", 6000))
        self.concurrency = int(conf.get("concurrency", 1))
        self.stats_interval = int(conf.get("stats_interval", "300"))
        self.object_ring = Ring(self.swift_dir, ring_name="object")
        self.ring_check_interval = int(conf.get("ring_check_interval", 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get("reclaim_age", 86400 * 7))
        self.partition_times = []
        self.run_pause = int(conf.get("run_pause", 30))
        self.rsync_timeout = int(conf.get("rsync_timeout", 900))
        self.rsync_io_timeout = conf.get("rsync_io_timeout", "30")
        self.http_timeout = int(conf.get("http_timeout", 60))
        self.lockup_timeout = int(conf.get("lockup_timeout", 1800))
        self.recon_cache_path = conf.get("recon_cache_path", "/var/cache/swift")
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        ret_val = None
        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(_("Killing long-running rsync: %s"), str(args))
            proc.kill()
            return 1  # failure response code
        total_time = time.time() - start_time
        for result in results.split("\n"):
            if result == "":
                continue
            if result.startswith("cd+"):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            self.logger.error(_("Bad rsync return code: %(args)s -> %(ret)d"), {"args": str(args), "ret": ret_val})
        elif results:
            self.logger.info(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {"src": args[-2], "dst": args[-1], "time": total_time},
            )
        else:
            self.logger.debug(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {"src": args[-2], "dst": args[-1], "time": total_time},
            )
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean indicating success or failure
        """
        if not os.path.exists(job["path"]):
            return False
        args = [
            "rsync",
            "--recursive",
            "--whole-file",
            "--human-readable",
            "--xattrs",
            "--itemize-changes",
            "--ignore-existing",
            "--timeout=%s" % self.rsync_io_timeout,
            "--contimeout=%s" % self.rsync_io_timeout,
        ]
        node_ip = rsync_ip(node["ip"])
        if self.vm_test_mode:
            rsync_module = "%s::object%s" % (node_ip, node["port"])
        else:
            rsync_module = "%s::object" % node_ip
        had_any = False
        for suffix in suffixes:
            spath = join(job["path"], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False
        args.append(join(rsync_module, node["device"], "objects", job["partition"]))
        return self._rsync(args) == 0

    def check_ring(self):
        """
        Check to see if the ring has been updated

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if self.object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """

        def tpool_get_suffixes(path):
            return [suff for suff in os.listdir(path) if len(suff) == 3 and isdir(join(path, suff))]

        self.replication_count += 1
        self.logger.increment("partition.delete.count.%s" % (job["device"],))
        begin = time.time()
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job["path"])
            if suffixes:
                for node in job["nodes"]:
                    success = self.rsync(node, job, suffixes)
                    if success:
                        with Timeout(self.http_timeout):
                            http_connect(
                                node["ip"],
                                node["port"],
                                node["device"],
                                job["partition"],
                                "REPLICATE",
                                "/" + "-".join(suffixes),
                                headers={"Content-Length": "0"},
                            ).getresponse().read()
                    responses.append(success)
            if not suffixes or (len(responses) == len(job["nodes"]) and all(responses)):
                self.logger.info(_("Removing partition: %s"), job["path"])
                tpool.execute(shutil.rmtree, job["path"], ignore_errors=True)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since("partition.delete.timing", begin)

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment("partition.update.count.%s" % (job["device"],))
        begin = time.time()
        try:
            hashed, local_hash = tpool_reraise(
                get_hashes, job["path"], do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age
            )
            self.suffix_hash += hashed
            self.logger.update_stats("suffix.hashes", hashed)
            attempts_left = len(job["nodes"])
            nodes = itertools.chain(job["nodes"], self.object_ring.get_more_nodes(int(job["partition"])))
            while attempts_left > 0:
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                attempts_left -= 1
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node["ip"],
                            node["port"],
                            node["device"],
                            job["partition"],
                            "REPLICATE",
                            "",
                            headers={"Content-Length": "0"},
                        ).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(_("%(ip)s/%(device)s responded" " as unmounted"), node)
                            attempts_left += 1
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s " "from %(ip)s"), {"resp": resp.status, "ip": node["ip"]}
                            )
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)]
                    if not suffixes:
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        get_hashes, job["path"], recalculate=suffixes, reclaim_age=self.reclaim_age
                    )
                    self.logger.update_stats("suffix.hashes", hashed)
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)]
                    self.rsync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(
                            node["ip"],
                            node["port"],
                            node["device"],
                            job["partition"],
                            "REPLICATE",
                            "/" + "-".join(suffixes),
                            headers={"Content-Length": "0"},
                        )
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats("suffix.syncs", len(suffixes))
                except (Exception, Timeout):
                    self.logger.exception(_("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since("partition.update.timing", begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        if self.replication_count:
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.replication_count / elapsed
            self.logger.info(
                _(
                    "%(replicated)d/%(total)d (%(percentage).2f%%)"
                    " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                    "%(remaining)s remaining)"
                ),
                {
                    "replicated": self.replication_count,
                    "total": self.job_count,
                    "percentage": self.replication_count * 100.0 / self.job_count,
                    "time": time.time() - self.start,
                    "rate": rate,
                    "remaining": "%d%s" % compute_eta(self.start, self.replication_count, self.job_count),
                },
            )
            if self.suffix_count:
                self.logger.info(
                    _("%(checked)d suffixes checked - " "%(hashed).2f%% hashed, %(synced).2f%% synced"),
                    {
                        "checked": self.suffix_count,
                        "hashed": (self.suffix_hash * 100.0) / self.suffix_count,
                        "synced": (self.suffix_sync * 100.0) / self.suffix_count,
                    },
                )
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, " "min %(min).4fs, med %(med).4fs"),
                    {
                        "max": self.partition_times[-1],
                        "min": self.partition_times[0],
                        "med": self.partition_times[len(self.partition_times) // 2],
                    },
                )
        else:
            self.logger.info(_("Nothing replicated for %s seconds."), (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the replicator finishes its replication
        pass in some eventuality.
        """
        while True:
            eventlet.sleep(self.lockup_timeout)
            if self.replication_count == self.last_replication_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_replication_count = self.replication_count

    def collect_jobs(self):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be rsynced.
        """
        jobs = []
        ips = whataremyips()
        for local_dev in [
            dev for dev in self.object_ring.devs if dev and dev["ip"] in ips and dev["port"] == self.port
        ]:
            dev_path = join(self.devices_dir, local_dev["device"])
            obj_path = join(dev_path, "objects")
            tmp_path = join(dev_path, "tmp")
            if self.mount_check and not os.path.ismount(dev_path):
                self.logger.warn(_("%s is not mounted"), local_dev["device"])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                mkdirs(obj_path)
                continue
            for partition in os.listdir(obj_path):
                try:
                    part_nodes = self.object_ring.get_part_nodes(int(partition))
                    nodes = [node for node in part_nodes if node["id"] != local_dev["id"]]
                    jobs.append(
                        dict(
                            path=join(obj_path, partition),
                            device=local_dev["device"],
                            nodes=nodes,
                            delete=len(nodes) > len(part_nodes) - 1,
                            partition=partition,
                        )
                    )
                except ValueError:
                    continue
        random.shuffle(jobs)
        # Partititons that need to be deleted take priority
        jobs.sort(key=lambda job: not job["delete"])
        self.job_count = len(jobs)
        return jobs

    def replicate(self):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.partition_times = []
        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs()
            for job in jobs:
                dev_path = join(self.devices_dir, job["device"])
                if self.mount_check and not os.path.ismount(dev_path):
                    self.logger.warn(_("%s is not mounted"), job["device"])
                    continue
                if not self.check_ring():
                    self.logger.info(_("Ring change detected. Aborting " "current replication pass."))
                    return
                if job["delete"]:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()

    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object replicator in script mode."))
        self.replicate()
        total = (time.time() - start) / 60
        self.logger.info(_("Object replication complete. (%.02f minutes)"), total)
        dump_recon_cache({"object_replication_time": total}, self.rcache, self.logger)

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object replicator in daemon mode."))
        # Run the replicator continually
        while True:
            start = time.time()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            self.replicate()
            total = (time.time() - start) / 60
            self.logger.info(_("Object replication complete. (%.02f minutes)"), total)
            dump_recon_cache({"object_replication_time": total}, self.rcache, self.logger)
            self.logger.debug(_("Replication sleeping for %s seconds."), self.run_pause)
            sleep(self.run_pause)
Ejemplo n.º 43
0
class ObjectReconstructor(Daemon):
    """
    Reconstruct objects using erasure code.  And also rebalance EC Fragment
    Archive objects off handoff nodes.

    Encapsulates most logic and data needed by the object reconstruction
    process. Each call to .reconstruct() performs one pass.  It's up to the
    caller to do this in a loop.
    """

    def __init__(self, conf, logger=None):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = logger or get_logger(
            conf, log_route='object-reconstructor')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.bind_ip = conf.get('bind_ip', '0.0.0.0')
        self.servers_per_port = int(conf.get('servers_per_port', '0') or 0)
        self.port = None if self.servers_per_port else \
            int(conf.get('bind_port', 6200))
        self.concurrency = int(conf.get('concurrency', 1))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
        self.partition_times = []
        self.interval = int(conf.get('interval') or
                            conf.get('run_pause') or 30)
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")
        # defaults subject to change after beta
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.headers = {
            'Content-Length': '0',
            'user-agent': 'obj-reconstructor %s' % os.getpid()}
        self.handoffs_first = config_true_value(conf.get('handoffs_first',
                                                         False))
        self._df_router = DiskFileRouter(conf, self.logger)

    def load_object_ring(self, policy):
        """
        Make sure the policy's rings are loaded.

        :param policy: the StoragePolicy instance
        :returns: appropriate ring object
        """
        policy.load_ring(self.swift_dir)
        return policy.object_ring

    def check_ring(self, object_ring):
        """
        Check to see if the ring has been updated

        :param object_ring: the ring to check
        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if object_ring.has_changed():
                return False
        return True

    def _full_path(self, node, part, path, policy):
        return '%(replication_ip)s:%(replication_port)s' \
            '/%(device)s/%(part)s%(path)s ' \
            'policy#%(policy)d frag#%(frag_index)s' % {
                'replication_ip': node['replication_ip'],
                'replication_port': node['replication_port'],
                'device': node['device'],
                'part': part, 'path': path,
                'policy': policy,
                'frag_index': node.get('index', 'handoff'),
            }

    def _get_response(self, node, part, path, headers, policy):
        """
        Helper method for reconstruction that GETs a single EC fragment
        archive

        :param node: the node to GET from
        :param part: the partition
        :param path: full path of the desired EC archive
        :param headers: the headers to send
        :param policy: an instance of
                       :class:`~swift.common.storage_policy.BaseStoragePolicy`
        :returns: response
        """
        resp = None
        try:
            with ConnectionTimeout(self.conn_timeout):
                conn = http_connect(node['ip'], node['port'], node['device'],
                                    part, 'GET', path, headers=headers)
            with Timeout(self.node_timeout):
                resp = conn.getresponse()
            if resp.status not in [HTTP_OK, HTTP_NOT_FOUND]:
                self.logger.warning(
                    _("Invalid response %(resp)s from %(full_path)s"),
                    {'resp': resp.status,
                     'full_path': self._full_path(node, part, path, policy)})
                resp = None
            elif resp.status == HTTP_NOT_FOUND:
                resp = None
        except (Exception, Timeout):
            self.logger.exception(
                _("Trying to GET %(full_path)s"), {
                    'full_path': self._full_path(node, part, path, policy)})
        return resp

    def reconstruct_fa(self, job, node, datafile_metadata):
        """
        Reconstructs a fragment archive - this method is called from ssync
        after a remote node responds that is missing this object - the local
        diskfile is opened to provide metadata - but to reconstruct the
        missing fragment archive we must connect to multiple object servers.

        :param job: job from ssync_sender
        :param node: node that we're rebuilding to
        :param datafile_metadata:  the datafile metadata to attach to
                                   the rebuilt fragment archive
        :returns: a DiskFile like class for use by ssync
        :raises DiskFileError: if the fragment archive cannot be reconstructed
        """

        part_nodes = job['policy'].object_ring.get_part_nodes(
            job['partition'])
        part_nodes.remove(node)

        # the fragment index we need to reconstruct is the position index
        # of the node we're rebuilding to within the primary part list
        fi_to_rebuild = node['index']

        # KISS send out connection requests to all nodes, see what sticks
        headers = self.headers.copy()
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        pile = GreenAsyncPile(len(part_nodes))
        path = datafile_metadata['name']
        for node in part_nodes:
            pile.spawn(self._get_response, node, job['partition'],
                       path, headers, job['policy'])
        responses = []
        etag = None
        for resp in pile:
            if not resp:
                continue
            resp.headers = HeaderKeyDict(resp.getheaders())
            if str(fi_to_rebuild) == \
                    resp.headers.get('X-Object-Sysmeta-Ec-Frag-Index'):
                continue
            if resp.headers.get('X-Object-Sysmeta-Ec-Frag-Index') in set(
                    r.headers.get('X-Object-Sysmeta-Ec-Frag-Index')
                    for r in responses):
                continue
            responses.append(resp)
            etag = sorted(responses, reverse=True,
                          key=lambda r: Timestamp(
                              r.headers.get('X-Backend-Timestamp')
                          ))[0].headers.get('X-Object-Sysmeta-Ec-Etag')
            responses = [r for r in responses if
                         r.headers.get('X-Object-Sysmeta-Ec-Etag') == etag]

            if len(responses) >= job['policy'].ec_ndata:
                break
        else:
            self.logger.error(
                'Unable to get enough responses (%s/%s) '
                'to reconstruct %s with ETag %s' % (
                    len(responses), job['policy'].ec_ndata,
                    self._full_path(node, job['partition'],
                                    datafile_metadata['name'], job['policy']),
                    etag))
            raise DiskFileError('Unable to reconstruct EC archive')

        rebuilt_fragment_iter = self.make_rebuilt_fragment_iter(
            responses[:job['policy'].ec_ndata], path, job['policy'],
            fi_to_rebuild)
        return RebuildingECDiskFileStream(datafile_metadata, fi_to_rebuild,
                                          rebuilt_fragment_iter)

    def _reconstruct(self, policy, fragment_payload, frag_index):
        return policy.pyeclib_driver.reconstruct(fragment_payload,
                                                 [frag_index])[0]

    def make_rebuilt_fragment_iter(self, responses, path, policy, frag_index):
        """
        Turn a set of connections from backend object servers into a generator
        that yields up the rebuilt fragment archive for frag_index.
        """

        def _get_one_fragment(resp):
            buff = ''
            remaining_bytes = policy.fragment_size
            while remaining_bytes:
                chunk = resp.read(remaining_bytes)
                if not chunk:
                    break
                remaining_bytes -= len(chunk)
                buff += chunk
            return buff

        def fragment_payload_iter():
            # We need a fragment from each connections, so best to
            # use a GreenPile to keep them ordered and in sync
            pile = GreenPile(len(responses))
            while True:
                for resp in responses:
                    pile.spawn(_get_one_fragment, resp)
                try:
                    with Timeout(self.node_timeout):
                        fragment_payload = [fragment for fragment in pile]
                except (Exception, Timeout):
                    self.logger.exception(
                        _("Error trying to rebuild %(path)s "
                          "policy#%(policy)d frag#%(frag_index)s"),
                        {'path': path,
                         'policy': policy,
                         'frag_index': frag_index,
                         })
                    break
                if not all(fragment_payload):
                    break
                rebuilt_fragment = self._reconstruct(
                    policy, fragment_payload, frag_index)
                yield rebuilt_fragment

        return fragment_payload_iter()

    def stats_line(self):
        """
        Logs various stats for the currently running reconstruction pass.
        """
        if (self.device_count and self.part_count and
                self.reconstruction_device_count):
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.reconstruction_part_count / elapsed
            total_part_count = (self.part_count *
                                self.device_count /
                                self.reconstruction_device_count)
            self.logger.info(
                _("%(reconstructed)d/%(total)d (%(percentage).2f%%)"
                  " partitions of %(device)d/%(dtotal)d "
                  "(%(dpercentage).2f%%) devices"
                  " reconstructed in %(time).2fs "
                  "(%(rate).2f/sec, %(remaining)s remaining)"),
                {'reconstructed': self.reconstruction_part_count,
                 'total': self.part_count,
                 'percentage':
                 self.reconstruction_part_count * 100.0 / self.part_count,
                 'device': self.reconstruction_device_count,
                 'dtotal': self.device_count,
                 'dpercentage':
                 self.reconstruction_device_count * 100.0 / self.device_count,
                 'time': time.time() - self.start, 'rate': rate,
                 'remaining': '%d%s' %
                 compute_eta(self.start,
                             self.reconstruction_part_count,
                             total_part_count)})

            if self.suffix_count and self.partition_times:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"),
                    {'checked': self.suffix_count,
                     'hashed': (self.suffix_hash * 100.0) / self.suffix_count,
                     'synced': (self.suffix_sync * 100.0) / self.suffix_count})
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"),
                    {'max': self.partition_times[-1],
                     'min': self.partition_times[0],
                     'med': self.partition_times[
                         len(self.partition_times) // 2]})
        else:
            self.logger.info(
                _("Nothing reconstructed for %s seconds."),
                (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during reconstruction.  It
        periodically logs progress.
        """
        while True:
            sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the reconstructor finishes its
        reconstruction pass in some eventuality.
        """
        while True:
            sleep(self.lockup_timeout)
            if self.reconstruction_count == self.last_reconstruction_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_reconstruction_count = self.reconstruction_count

    def _get_hashes(self, policy, path, recalculate=None, do_listdir=False):
        df_mgr = self._df_router[policy]
        hashed, suffix_hashes = tpool_reraise(
            df_mgr._get_hashes, path, recalculate=recalculate,
            do_listdir=do_listdir, reclaim_age=self.reclaim_age)
        self.logger.update_stats('suffix.hashes', hashed)
        return suffix_hashes

    def get_suffix_delta(self, local_suff, local_index,
                         remote_suff, remote_index):
        """
        Compare the local suffix hashes with the remote suffix hashes
        for the given local and remote fragment indexes.  Return those
        suffixes which should be synced.

        :param local_suff: the local suffix hashes (from _get_hashes)
        :param local_index: the local fragment index for the job
        :param remote_suff: the remote suffix hashes (from remote
                            REPLICATE request)
        :param remote_index: the remote fragment index for the job

        :returns: a list of strings, the suffix dirs to sync
        """
        suffixes = []
        for suffix, sub_dict_local in local_suff.items():
            sub_dict_remote = remote_suff.get(suffix, {})
            if (sub_dict_local.get(None) != sub_dict_remote.get(None) or
                    sub_dict_local.get(local_index) !=
                    sub_dict_remote.get(remote_index)):
                suffixes.append(suffix)
        return suffixes

    def rehash_remote(self, node, job, suffixes):
        try:
            with Timeout(self.http_timeout):
                conn = http_connect(
                    node['replication_ip'], node['replication_port'],
                    node['device'], job['partition'], 'REPLICATE',
                    '/' + '-'.join(sorted(suffixes)),
                    headers=self.headers)
                conn.getresponse().read()
        except (Exception, Timeout):
            self.logger.exception(
                _("Trying to sync suffixes with %s") % self._full_path(
                    node, job['partition'], '', job['policy']))

    def _get_suffixes_to_sync(self, job, node):
        """
        For SYNC jobs we need to make a remote REPLICATE request to get
        the remote node's current suffix's hashes and then compare to our
        local suffix's hashes to decide which suffixes (if any) are out
        of sync.

        :param: the job dict, with the keys defined in ``_get_part_jobs``
        :param node: the remote node dict
        :returns: a (possibly empty) list of strings, the suffixes to be
                  synced with the remote node.
        """
        # get hashes from the remote node
        remote_suffixes = None
        try:
            with Timeout(self.http_timeout):
                resp = http_connect(
                    node['replication_ip'], node['replication_port'],
                    node['device'], job['partition'], 'REPLICATE',
                    '', headers=self.headers).getresponse()
            if resp.status == HTTP_INSUFFICIENT_STORAGE:
                self.logger.error(
                    _('%s responded as unmounted'),
                    self._full_path(node, job['partition'], '',
                                    job['policy']))
            elif resp.status != HTTP_OK:
                full_path = self._full_path(node, job['partition'], '',
                                            job['policy'])
                self.logger.error(
                    _("Invalid response %(resp)s from %(full_path)s"),
                    {'resp': resp.status, 'full_path': full_path})
            else:
                remote_suffixes = pickle.loads(resp.read())
        except (Exception, Timeout):
            # all exceptions are logged here so that our caller can
            # safely catch our exception and continue to the next node
            # without logging
            self.logger.exception('Unable to get remote suffix hashes '
                                  'from %r' % self._full_path(
                                      node, job['partition'], '',
                                      job['policy']))

        if remote_suffixes is None:
            raise SuffixSyncError('Unable to get remote suffix hashes')

        suffixes = self.get_suffix_delta(job['hashes'],
                                         job['frag_index'],
                                         remote_suffixes,
                                         node['index'])
        # now recalculate local hashes for suffixes that don't
        # match so we're comparing the latest
        local_suff = self._get_hashes(job['policy'], job['path'],
                                      recalculate=suffixes)

        suffixes = self.get_suffix_delta(local_suff,
                                         job['frag_index'],
                                         remote_suffixes,
                                         node['index'])

        self.suffix_count += len(suffixes)
        return suffixes

    def delete_reverted_objs(self, job, objects, frag_index):
        """
        For EC we can potentially revert only some of a partition
        so we'll delete reverted objects here. Note that we delete
        the fragment index of the file we sent to the remote node.

        :param job: the job being processed
        :param objects: a dict of objects to be deleted, each entry maps
                        hash=>timestamp
        :param frag_index: (int) the fragment index of data files to be deleted
        """
        df_mgr = self._df_router[job['policy']]
        for object_hash, timestamps in objects.items():
            try:
                df = df_mgr.get_diskfile_from_hash(
                    job['local_dev']['device'], job['partition'],
                    object_hash, job['policy'],
                    frag_index=frag_index)
                df.purge(timestamps['ts_data'], frag_index)
            except DiskFileError:
                self.logger.exception(
                    'Unable to purge DiskFile (%r %r %r)',
                    object_hash, timestamps['ts_data'], frag_index)
                continue

    def process_job(self, job):
        """
        Sync the local partition with the remote node(s) according to
        the parameters of the job.  For primary nodes, the SYNC job type
        will define both left and right hand sync_to nodes to ssync with
        as defined by this primary nodes index in the node list based on
        the fragment index found in the partition.  For non-primary
        nodes (either handoff revert, or rebalance) the REVERT job will
        define a single node in sync_to which is the proper/new home for
        the fragment index.

        N.B. ring rebalancing can be time consuming and handoff nodes'
        fragment indexes do not have a stable order, it's possible to
        have more than one REVERT job for a partition, and in some rare
        failure conditions there may even also be a SYNC job for the
        same partition - but each one will be processed separately
        because each job will define a separate list of node(s) to
        'sync_to'.

        :param: the job dict, with the keys defined in ``_get_job_info``
        """
        self.headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        begin = time.time()
        if job['job_type'] == REVERT:
            self._revert(job, begin)
        else:
            self._sync(job, begin)
        self.partition_times.append(time.time() - begin)
        self.reconstruction_count += 1

    def _sync(self, job, begin):
        """
        Process a SYNC job.
        """
        self.logger.increment(
            'partition.update.count.%s' % (job['local_dev']['device'],))
        # after our left and right partners, if there's some sort of
        # failure we'll continue onto the remaining primary nodes and
        # make sure they're in sync - or potentially rebuild missing
        # fragments we find
        dest_nodes = itertools.chain(
            job['sync_to'],
            # I think we could order these based on our index to better
            # protect against a broken chain
            [
                n for n in
                job['policy'].object_ring.get_part_nodes(job['partition'])
                if n['id'] != job['local_dev']['id'] and
                n['id'] not in (m['id'] for m in job['sync_to'])
            ],
        )
        syncd_with = 0
        for node in dest_nodes:
            if syncd_with >= len(job['sync_to']):
                # success!
                break

            try:
                suffixes = self._get_suffixes_to_sync(job, node)
            except SuffixSyncError:
                continue

            if not suffixes:
                syncd_with += 1
                continue

            # ssync any out-of-sync suffixes with the remote node
            success, _ = ssync_sender(
                self, node, job, suffixes)()
            # let remote end know to rehash it's suffixes
            self.rehash_remote(node, job, suffixes)
            # update stats for this attempt
            self.suffix_sync += len(suffixes)
            self.logger.update_stats('suffix.syncs', len(suffixes))
            if success:
                syncd_with += 1
        self.logger.timing_since('partition.update.timing', begin)

    def _revert(self, job, begin):
        """
        Process a REVERT job.
        """
        self.logger.increment(
            'partition.delete.count.%s' % (job['local_dev']['device'],))
        # we'd desperately like to push this partition back to it's
        # primary location, but if that node is down, the next best thing
        # is one of the handoff locations - which *might* be us already!
        dest_nodes = itertools.chain(
            job['sync_to'],
            job['policy'].object_ring.get_more_nodes(job['partition']),
        )
        syncd_with = 0
        reverted_objs = {}
        for node in dest_nodes:
            if syncd_with >= len(job['sync_to']):
                break
            if node['id'] == job['local_dev']['id']:
                # this is as good a place as any for this data for now
                break
            success, in_sync_objs = ssync_sender(
                self, node, job, job['suffixes'])()
            self.rehash_remote(node, job, job['suffixes'])
            if success:
                syncd_with += 1
                reverted_objs.update(in_sync_objs)
        if syncd_with >= len(job['sync_to']):
            self.delete_reverted_objs(
                job, reverted_objs, job['frag_index'])
        self.logger.timing_since('partition.delete.timing', begin)

    def _get_part_jobs(self, local_dev, part_path, partition, policy):
        """
        Helper function to build jobs for a partition, this method will
        read the suffix hashes and create job dictionaries to describe
        the needed work.  There will be one job for each fragment index
        discovered in the partition.

        For a fragment index which corresponds to this node's ring
        index, a job with job_type SYNC will be created to ensure that
        the left and right hand primary ring nodes for the part have the
        corresponding left and right hand fragment archives.

        A fragment index (or entire partition) for which this node is
        not the primary corresponding node, will create job(s) with
        job_type REVERT to ensure that fragment archives are pushed to
        the correct node and removed from this one.

        A partition may result in multiple jobs.  Potentially many
        REVERT jobs, and zero or one SYNC job.

        :param local_dev:  the local device
        :param part_path: full path to partition
        :param partition: partition number
        :param policy: the policy

        :returns: a list of dicts of job info
        """
        # find all the fi's in the part, and which suffixes have them
        hashes = self._get_hashes(policy, part_path, do_listdir=True)
        non_data_fragment_suffixes = []
        data_fi_to_suffixes = defaultdict(list)
        for suffix, fi_hash in hashes.items():
            if not fi_hash:
                # this is for sanity and clarity, normally an empty
                # suffix would get del'd from the hashes dict, but an
                # OSError trying to re-hash the suffix could leave the
                # value empty - it will log the exception; but there's
                # no way to properly address this suffix at this time.
                continue
            data_frag_indexes = [f for f in fi_hash if f is not None]
            if not data_frag_indexes:
                non_data_fragment_suffixes.append(suffix)
            else:
                for fi in data_frag_indexes:
                    data_fi_to_suffixes[fi].append(suffix)

        # helper to ensure consistent structure of jobs
        def build_job(job_type, frag_index, suffixes, sync_to):
            return {
                'job_type': job_type,
                'frag_index': frag_index,
                'suffixes': suffixes,
                'sync_to': sync_to,
                'partition': partition,
                'path': part_path,
                'hashes': hashes,
                'policy': policy,
                'local_dev': local_dev,
                # ssync likes to have it handy
                'device': local_dev['device'],
            }

        # aggregate jobs for all the fragment index in this part
        jobs = []

        # check the primary nodes - to see if the part belongs here
        part_nodes = policy.object_ring.get_part_nodes(partition)
        for node in part_nodes:
            if node['id'] == local_dev['id']:
                # this partition belongs here, we'll need a sync job
                frag_index = node['index']
                try:
                    suffixes = data_fi_to_suffixes.pop(frag_index)
                except KeyError:
                    suffixes = []
                sync_job = build_job(
                    job_type=SYNC,
                    frag_index=frag_index,
                    suffixes=suffixes,
                    sync_to=_get_partners(frag_index, part_nodes),
                )
                # ssync callback to rebuild missing fragment_archives
                sync_job['sync_diskfile_builder'] = self.reconstruct_fa
                jobs.append(sync_job)
                break

        # assign remaining data fragment suffixes to revert jobs
        ordered_fis = sorted((len(suffixes), fi) for fi, suffixes
                             in data_fi_to_suffixes.items())
        for count, fi in ordered_fis:
            revert_job = build_job(
                job_type=REVERT,
                frag_index=fi,
                suffixes=data_fi_to_suffixes[fi],
                sync_to=[part_nodes[fi]],
            )
            jobs.append(revert_job)

        # now we need to assign suffixes that have no data fragments
        if non_data_fragment_suffixes:
            if jobs:
                # the first job will be either the sync_job, or the
                # revert_job for the fragment index that is most common
                # among the suffixes
                jobs[0]['suffixes'].extend(non_data_fragment_suffixes)
            else:
                # this is an unfortunate situation, we need a revert job to
                # push partitions off this node, but none of the suffixes
                # have any data fragments to hint at which node would be a
                # good candidate to receive the tombstones.
                jobs.append(build_job(
                    job_type=REVERT,
                    frag_index=None,
                    suffixes=non_data_fragment_suffixes,
                    # this is super safe
                    sync_to=part_nodes,
                    # something like this would be probably be better
                    # sync_to=random.sample(part_nodes, 3),
                ))
        # return a list of jobs for this part
        return jobs

    def collect_parts(self, override_devices=None,
                      override_partitions=None):
        """
        Helper for yielding partitions in the top level reconstructor
        """
        override_devices = override_devices or []
        override_partitions = override_partitions or []
        ips = whataremyips(self.bind_ip)
        for policy in POLICIES:
            if policy.policy_type != EC_POLICY:
                continue
            self._diskfile_mgr = self._df_router[policy]
            self.load_object_ring(policy)
            data_dir = get_data_dir(policy)
            local_devices = list(six.moves.filter(
                lambda dev: dev and is_local_device(
                    ips, self.port,
                    dev['replication_ip'], dev['replication_port']),
                policy.object_ring.devs))

            if override_devices:
                self.device_count = len(override_devices)
            else:
                self.device_count = len(local_devices)

            for local_dev in local_devices:
                if override_devices and (local_dev['device'] not in
                                         override_devices):
                    continue
                self.reconstruction_device_count += 1
                dev_path = self._df_router[policy].get_dev_path(
                    local_dev['device'])
                if not dev_path:
                    self.logger.warning(_('%s is not mounted'),
                                        local_dev['device'])
                    continue
                obj_path = join(dev_path, data_dir)
                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
                unlink_older_than(tmp_path, time.time() -
                                  self.reclaim_age)
                if not os.path.exists(obj_path):
                    try:
                        mkdirs(obj_path)
                    except Exception:
                        self.logger.exception(
                            'Unable to create %s' % obj_path)
                    continue
                try:
                    partitions = os.listdir(obj_path)
                except OSError:
                    self.logger.exception(
                        'Unable to list partitions in %r' % obj_path)
                    continue

                self.part_count += len(partitions)
                for partition in partitions:
                    part_path = join(obj_path, partition)
                    if not (partition.isdigit() and
                            os.path.isdir(part_path)):
                        self.logger.warning(
                            'Unexpected entity in data dir: %r' % part_path)
                        remove_file(part_path)
                        self.reconstruction_part_count += 1
                        continue
                    partition = int(partition)
                    if override_partitions and (partition not in
                                                override_partitions):
                        continue
                    part_info = {
                        'local_dev': local_dev,
                        'policy': policy,
                        'partition': partition,
                        'part_path': part_path,
                    }
                    yield part_info
                    self.reconstruction_part_count += 1

    def build_reconstruction_jobs(self, part_info):
        """
        Helper function for collect_jobs to build jobs for reconstruction
        using EC style storage policy
        """
        jobs = self._get_part_jobs(**part_info)
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff revert jobs to the front of the list
            jobs.sort(key=lambda job: job['job_type'], reverse=True)
        self.job_count += len(jobs)
        return jobs

    def _reset_stats(self):
        self.start = time.time()
        self.job_count = 0
        self.part_count = 0
        self.device_count = 0
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.reconstruction_count = 0
        self.reconstruction_part_count = 0
        self.reconstruction_device_count = 0
        self.last_reconstruction_count = -1

    def delete_partition(self, path):
        self.logger.info(_("Removing partition: %s"), path)
        tpool.execute(shutil.rmtree, path, ignore_errors=True)

    def reconstruct(self, **kwargs):
        """Run a reconstruction pass"""
        self._reset_stats()
        self.partition_times = []

        stats = spawn(self.heartbeat)
        lockup_detector = spawn(self.detect_lockups)
        sleep()  # Give spawns a cycle

        try:
            self.run_pool = GreenPool(size=self.concurrency)
            for part_info in self.collect_parts(**kwargs):
                if not self.check_ring(part_info['policy'].object_ring):
                    self.logger.info(_("Ring change detected. Aborting "
                                       "current reconstruction pass."))
                    return
                jobs = self.build_reconstruction_jobs(part_info)
                if not jobs:
                    # If this part belongs on this node, _get_part_jobs
                    # will *always* build a sync_job - even if there's
                    # no suffixes in the partition that needs to sync.
                    # If there's any suffixes in the partition then our
                    # job list would have *at least* one revert job.
                    # Therefore we know this part a) doesn't belong on
                    # this node and b) doesn't have any suffixes in it.
                    self.run_pool.spawn(self.delete_partition,
                                        part_info['part_path'])
                for job in jobs:
                    self.run_pool.spawn(self.process_job, job)
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            self.logger.exception(_("Exception in top-level"
                                    "reconstruction loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()

    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object reconstructor in script mode."))
        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = [int(p) for p in
                               list_from_csv(kwargs.get('partitions'))]
        self.reconstruct(
            override_devices=override_devices,
            override_partitions=override_partitions)
        total = (time.time() - start) / 60
        self.logger.info(
            _("Object reconstruction complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            dump_recon_cache({'object_reconstruction_time': total,
                              'object_reconstruction_last': time.time()},
                             self.rcache, self.logger)

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object reconstructor in daemon mode."))
        # Run the reconstructor continually
        while True:
            start = time.time()
            self.logger.info(_("Starting object reconstruction pass."))
            # Run the reconstructor
            self.reconstruct()
            total = (time.time() - start) / 60
            self.logger.info(
                _("Object reconstruction complete. (%.02f minutes)"), total)
            dump_recon_cache({'object_reconstruction_time': total,
                              'object_reconstruction_last': time.time()},
                             self.rcache, self.logger)
            self.logger.debug('reconstruction sleeping for %s seconds.',
                              self.interval)
            sleep(self.interval)
Ejemplo n.º 44
0
    def reap_container(self, account, account_partition, account_nodes,
                       container):
        """
        Deletes the data and the container itself for the given container. This
        will call :func:`reap_object` up to sqrt(self.concurrency) times
        concurrently for the objects in the container.

        If there is any exception while deleting a single object, the process
        will continue for any other objects in the container and the failed
        objects will be tried again the next time this function is called with
        the same parameters.

        If there is any exception while listing the objects for deletion, the
        process will stop (but will obviously be tried again the next time this
        function is called with the same parameters). This is a possibility
        since the listing comes from querying just the primary remote container
        server.

        Once all objects have been attempted to be deleted, the container
        itself will be attempted to be deleted by sending a delete request to
        all container nodes. The format of the delete request is such that each
        container server will update a corresponding account server, removing
        the container from the account's listing.

        This function returns nothing and should raise no exception but only
        update various self.stats_* values for what occurs.

        :param account: The name of the account for the container.
        :param account_partition: The partition for the account on the account
                                  ring.
        :param account_nodes: The primary node dicts for the account.
        :param container: The name of the container to delete.

        * See also: :func:`swift.common.ring.Ring.get_nodes` for a description
          of the account node dicts.
        """
        account_nodes = list(account_nodes)
        part, nodes = self.get_container_ring().get_nodes(account, container)
        node = nodes[-1]
        pool = GreenPool(size=self.object_concurrency)
        marker = ''
        while True:
            objects = None
            try:
                objects = direct_get_container(
                    node,
                    part,
                    account,
                    container,
                    marker=marker,
                    conn_timeout=self.conn_timeout,
                    response_timeout=self.node_timeout)[1]
                self.stats_return_codes[2] = \
                    self.stats_return_codes.get(2, 0) + 1
            except ClientException, err:
                if self.logger.getEffectiveLevel() <= DEBUG:
                    self.logger.exception(
                        _('Exception with %(ip)s:%(port)s/%(device)s'), node)
                self.stats_return_codes[err.http_status / 100] = \
                    self.stats_return_codes.get(err.http_status / 100, 0) + 1
            if not objects:
                break
            try:
                for obj in objects:
                    if isinstance(obj['name'], unicode):
                        obj['name'] = obj['name'].encode('utf8')
                    pool.spawn(self.reap_object, account, container, part,
                               nodes, obj['name'])
                pool.waitall()
            except Exception:
                self.logger.exception(
                    _('Exception with objects for container '
                      '%(container)s for account %(account)s'), {
                          'container': container,
                          'account': account
                      })
            marker = objects[-1]['name']
Ejemplo n.º 45
0
class ObjectReconstructor(Daemon):
    """
    Reconstruct objects using erasure code.  And also rebalance EC Fragment
    Archive objects off handoff nodes.

    Encapsulates most logic and data needed by the object reconstruction
    process. Each call to .reconstruct() performs one pass.  It's up to the
    caller to do this in a loop.
    """
    def __init__(self, conf, logger=None):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = logger or get_logger(conf,
                                           log_route='object-reconstructor')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.bind_ip = conf.get('bind_ip', '0.0.0.0')
        self.servers_per_port = int(conf.get('servers_per_port', '0') or 0)
        self.port = None if self.servers_per_port else \
            int(conf.get('bind_port', 6000))
        self.concurrency = int(conf.get('concurrency', 1))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
        self.partition_times = []
        self.interval = int(
            conf.get('interval') or conf.get('run_pause') or 30)
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")
        # defaults subject to change after beta
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.headers = {
            'Content-Length': '0',
            'user-agent': 'obj-reconstructor %s' % os.getpid()
        }
        self.handoffs_first = config_true_value(
            conf.get('handoffs_first', False))
        self._df_router = DiskFileRouter(conf, self.logger)

    def load_object_ring(self, policy):
        """
        Make sure the policy's rings are loaded.

        :param policy: the StoragePolicy instance
        :returns: appropriate ring object
        """
        policy.load_ring(self.swift_dir)
        return policy.object_ring

    def check_ring(self, object_ring):
        """
        Check to see if the ring has been updated

        :param object_ring: the ring to check
        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if object_ring.has_changed():
                return False
        return True

    def _full_path(self, node, part, path, policy):
        return '%(replication_ip)s:%(replication_port)s' \
            '/%(device)s/%(part)s%(path)s ' \
            'policy#%(policy)d frag#%(frag_index)s' % {
                'replication_ip': node['replication_ip'],
                'replication_port': node['replication_port'],
                'device': node['device'],
                'part': part, 'path': path,
                'policy': policy,
                'frag_index': node.get('index', 'handoff'),
            }

    def _get_response(self, node, part, path, headers, policy):
        """
        Helper method for reconstruction that GETs a single EC fragment
        archive

        :param node: the node to GET from
        :param part: the partition
        :param path: full path of the desired EC archive
        :param headers: the headers to send
        :param policy: an instance of
                       :class:`~swift.common.storage_policy.BaseStoragePolicy`
        :returns: response
        """
        resp = None
        try:
            with ConnectionTimeout(self.conn_timeout):
                conn = http_connect(node['ip'],
                                    node['port'],
                                    node['device'],
                                    part,
                                    'GET',
                                    path,
                                    headers=headers)
            with Timeout(self.node_timeout):
                resp = conn.getresponse()
            if resp.status not in [HTTP_OK, HTTP_NOT_FOUND]:
                self.logger.warning(
                    _("Invalid response %(resp)s from %(full_path)s"), {
                        'resp': resp.status,
                        'full_path': self._full_path(node, part, path, policy)
                    })
                resp = None
            elif resp.status == HTTP_NOT_FOUND:
                resp = None
        except (Exception, Timeout):
            self.logger.exception(
                _("Trying to GET %(full_path)s"),
                {'full_path': self._full_path(node, part, path, policy)})
        return resp

    def reconstruct_fa(self, job, node, datafile_metadata):
        """
        Reconstructs a fragment archive - this method is called from ssync
        after a remote node responds that is missing this object - the local
        diskfile is opened to provide metadata - but to reconstruct the
        missing fragment archive we must connect to multiple object servers.

        :param job: job from ssync_sender
        :param node: node that we're rebuilding to
        :param datafile_metadata:  the datafile metadata to attach to
                                   the rebuilt fragment archive
        :returns: a DiskFile like class for use by ssync
        :raises DiskFileError: if the fragment archive cannot be reconstructed
        """

        part_nodes = job['policy'].object_ring.get_part_nodes(job['partition'])
        part_nodes.remove(node)

        # the fragment index we need to reconstruct is the position index
        # of the node we're rebuilding to within the primary part list
        fi_to_rebuild = node['index']

        # KISS send out connection requests to all nodes, see what sticks
        headers = self.headers.copy()
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        pile = GreenAsyncPile(len(part_nodes))
        path = datafile_metadata['name']
        for node in part_nodes:
            pile.spawn(self._get_response, node, job['partition'], path,
                       headers, job['policy'])
        responses = []
        etag = None
        for resp in pile:
            if not resp:
                continue
            resp.headers = HeaderKeyDict(resp.getheaders())
            if str(fi_to_rebuild) == \
                    resp.headers.get('X-Object-Sysmeta-Ec-Frag-Index'):
                continue
            if resp.headers.get('X-Object-Sysmeta-Ec-Frag-Index') in set(
                    r.headers.get('X-Object-Sysmeta-Ec-Frag-Index')
                    for r in responses):
                continue
            responses.append(resp)
            etag = sorted(
                responses,
                reverse=True,
                key=lambda r: Timestamp(r.headers.get('X-Backend-Timestamp'))
            )[0].headers.get('X-Object-Sysmeta-Ec-Etag')
            responses = [
                r for r in responses
                if r.headers.get('X-Object-Sysmeta-Ec-Etag') == etag
            ]

            if len(responses) >= job['policy'].ec_ndata:
                break
        else:
            self.logger.error('Unable to get enough responses (%s/%s) '
                              'to reconstruct %s with ETag %s' %
                              (len(responses), job['policy'].ec_ndata,
                               self._full_path(node, job['partition'],
                                               datafile_metadata['name'],
                                               job['policy']), etag))
            raise DiskFileError('Unable to reconstruct EC archive')

        rebuilt_fragment_iter = self.make_rebuilt_fragment_iter(
            responses[:job['policy'].ec_ndata], path, job['policy'],
            fi_to_rebuild)
        return RebuildingECDiskFileStream(datafile_metadata, fi_to_rebuild,
                                          rebuilt_fragment_iter)

    def _reconstruct(self, policy, fragment_payload, frag_index):
        return policy.pyeclib_driver.reconstruct(fragment_payload,
                                                 [frag_index])[0]

    def make_rebuilt_fragment_iter(self, responses, path, policy, frag_index):
        """
        Turn a set of connections from backend object servers into a generator
        that yields up the rebuilt fragment archive for frag_index.
        """
        def _get_one_fragment(resp):
            buff = ''
            remaining_bytes = policy.fragment_size
            while remaining_bytes:
                chunk = resp.read(remaining_bytes)
                if not chunk:
                    break
                remaining_bytes -= len(chunk)
                buff += chunk
            return buff

        def fragment_payload_iter():
            # We need a fragment from each connections, so best to
            # use a GreenPile to keep them ordered and in sync
            pile = GreenPile(len(responses))
            while True:
                for resp in responses:
                    pile.spawn(_get_one_fragment, resp)
                try:
                    with Timeout(self.node_timeout):
                        fragment_payload = [fragment for fragment in pile]
                except (Exception, Timeout):
                    self.logger.exception(
                        _("Error trying to rebuild %(path)s "
                          "policy#%(policy)d frag#%(frag_index)s"), {
                              'path': path,
                              'policy': policy,
                              'frag_index': frag_index,
                          })
                    break
                if not all(fragment_payload):
                    break
                rebuilt_fragment = self._reconstruct(policy, fragment_payload,
                                                     frag_index)
                yield rebuilt_fragment

        return fragment_payload_iter()

    def stats_line(self):
        """
        Logs various stats for the currently running reconstruction pass.
        """
        if (self.device_count and self.part_count
                and self.reconstruction_device_count):
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.reconstruction_part_count / elapsed
            total_part_count = (self.part_count * self.device_count /
                                self.reconstruction_device_count)
            self.logger.info(
                _("%(reconstructed)d/%(total)d (%(percentage).2f%%)"
                  " partitions of %(device)d/%(dtotal)d "
                  "(%(dpercentage).2f%%) devices"
                  " reconstructed in %(time).2fs "
                  "(%(rate).2f/sec, %(remaining)s remaining)"), {
                      'reconstructed':
                      self.reconstruction_part_count,
                      'total':
                      self.part_count,
                      'percentage':
                      self.reconstruction_part_count * 100.0 / self.part_count,
                      'device':
                      self.reconstruction_device_count,
                      'dtotal':
                      self.device_count,
                      'dpercentage':
                      self.reconstruction_device_count * 100.0 /
                      self.device_count,
                      'time':
                      time.time() - self.start,
                      'rate':
                      rate,
                      'remaining':
                      '%d%s' %
                      compute_eta(self.start, self.reconstruction_part_count,
                                  total_part_count)
                  })

            if self.suffix_count and self.partition_times:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"), {
                          'checked': self.suffix_count,
                          'hashed':
                          (self.suffix_hash * 100.0) / self.suffix_count,
                          'synced':
                          (self.suffix_sync * 100.0) / self.suffix_count
                      })
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"), {
                          'max': self.partition_times[-1],
                          'min': self.partition_times[0],
                          'med':
                          self.partition_times[len(self.partition_times) // 2]
                      })
        else:
            self.logger.info(_("Nothing reconstructed for %s seconds."),
                             (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during reconstruction.  It
        periodically logs progress.
        """
        while True:
            sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the reconstructor finishes its
        reconstruction pass in some eventuality.
        """
        while True:
            sleep(self.lockup_timeout)
            if self.reconstruction_count == self.last_reconstruction_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_reconstruction_count = self.reconstruction_count

    def _get_hashes(self, policy, path, recalculate=None, do_listdir=False):
        df_mgr = self._df_router[policy]
        hashed, suffix_hashes = tpool_reraise(df_mgr._get_hashes,
                                              path,
                                              recalculate=recalculate,
                                              do_listdir=do_listdir,
                                              reclaim_age=self.reclaim_age)
        self.logger.update_stats('suffix.hashes', hashed)
        return suffix_hashes

    def get_suffix_delta(self, local_suff, local_index, remote_suff,
                         remote_index):
        """
        Compare the local suffix hashes with the remote suffix hashes
        for the given local and remote fragment indexes.  Return those
        suffixes which should be synced.

        :param local_suff: the local suffix hashes (from _get_hashes)
        :param local_index: the local fragment index for the job
        :param remote_suff: the remote suffix hashes (from remote
                            REPLICATE request)
        :param remote_index: the remote fragment index for the job

        :returns: a list of strings, the suffix dirs to sync
        """
        suffixes = []
        for suffix, sub_dict_local in local_suff.items():
            sub_dict_remote = remote_suff.get(suffix, {})
            if (sub_dict_local.get(None) != sub_dict_remote.get(None)
                    or sub_dict_local.get(local_index) !=
                    sub_dict_remote.get(remote_index)):
                suffixes.append(suffix)
        return suffixes

    def rehash_remote(self, node, job, suffixes):
        try:
            with Timeout(self.http_timeout):
                conn = http_connect(node['replication_ip'],
                                    node['replication_port'],
                                    node['device'],
                                    job['partition'],
                                    'REPLICATE',
                                    '/' + '-'.join(sorted(suffixes)),
                                    headers=self.headers)
                conn.getresponse().read()
        except (Exception, Timeout):
            self.logger.exception(
                _("Trying to sync suffixes with %s") %
                self._full_path(node, job['partition'], '', job['policy']))

    def _get_suffixes_to_sync(self, job, node):
        """
        For SYNC jobs we need to make a remote REPLICATE request to get
        the remote node's current suffix's hashes and then compare to our
        local suffix's hashes to decide which suffixes (if any) are out
        of sync.

        :param: the job dict, with the keys defined in ``_get_part_jobs``
        :param node: the remote node dict
        :returns: a (possibly empty) list of strings, the suffixes to be
                  synced with the remote node.
        """
        # get hashes from the remote node
        remote_suffixes = None
        try:
            with Timeout(self.http_timeout):
                resp = http_connect(node['replication_ip'],
                                    node['replication_port'],
                                    node['device'],
                                    job['partition'],
                                    'REPLICATE',
                                    '',
                                    headers=self.headers).getresponse()
            if resp.status == HTTP_INSUFFICIENT_STORAGE:
                self.logger.error(
                    _('%s responded as unmounted'),
                    self._full_path(node, job['partition'], '', job['policy']))
            elif resp.status != HTTP_OK:
                full_path = self._full_path(node, job['partition'], '',
                                            job['policy'])
                self.logger.error(
                    _("Invalid response %(resp)s from %(full_path)s"), {
                        'resp': resp.status,
                        'full_path': full_path
                    })
            else:
                remote_suffixes = pickle.loads(resp.read())
        except (Exception, Timeout):
            # all exceptions are logged here so that our caller can
            # safely catch our exception and continue to the next node
            # without logging
            self.logger.exception(
                'Unable to get remote suffix hashes '
                'from %r' %
                self._full_path(node, job['partition'], '', job['policy']))

        if remote_suffixes is None:
            raise SuffixSyncError('Unable to get remote suffix hashes')

        suffixes = self.get_suffix_delta(job['hashes'], job['frag_index'],
                                         remote_suffixes, node['index'])
        # now recalculate local hashes for suffixes that don't
        # match so we're comparing the latest
        local_suff = self._get_hashes(job['policy'],
                                      job['path'],
                                      recalculate=suffixes)

        suffixes = self.get_suffix_delta(local_suff, job['frag_index'],
                                         remote_suffixes, node['index'])

        self.suffix_count += len(suffixes)
        return suffixes

    def delete_reverted_objs(self, job, objects, frag_index):
        """
        For EC we can potentially revert only some of a partition
        so we'll delete reverted objects here. Note that we delete
        the fragment index of the file we sent to the remote node.

        :param job: the job being processed
        :param objects: a dict of objects to be deleted, each entry maps
                        hash=>timestamp
        :param frag_index: (int) the fragment index of data files to be deleted
        """
        df_mgr = self._df_router[job['policy']]
        for object_hash, timestamps in objects.items():
            try:
                df = df_mgr.get_diskfile_from_hash(job['local_dev']['device'],
                                                   job['partition'],
                                                   object_hash,
                                                   job['policy'],
                                                   frag_index=frag_index)
                df.purge(timestamps['ts_data'], frag_index)
            except DiskFileError:
                self.logger.exception('Unable to purge DiskFile (%r %r %r)',
                                      object_hash, timestamps['ts_data'],
                                      frag_index)
                continue

    def process_job(self, job):
        """
        Sync the local partition with the remote node(s) according to
        the parameters of the job.  For primary nodes, the SYNC job type
        will define both left and right hand sync_to nodes to ssync with
        as defined by this primary nodes index in the node list based on
        the fragment index found in the partition.  For non-primary
        nodes (either handoff revert, or rebalance) the REVERT job will
        define a single node in sync_to which is the proper/new home for
        the fragment index.

        N.B. ring rebalancing can be time consuming and handoff nodes'
        fragment indexes do not have a stable order, it's possible to
        have more than one REVERT job for a partition, and in some rare
        failure conditions there may even also be a SYNC job for the
        same partition - but each one will be processed separately
        because each job will define a separate list of node(s) to
        'sync_to'.

        :param: the job dict, with the keys defined in ``_get_job_info``
        """
        self.headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        begin = time.time()
        if job['job_type'] == REVERT:
            self._revert(job, begin)
        else:
            self._sync(job, begin)
        self.partition_times.append(time.time() - begin)
        self.reconstruction_count += 1

    def _sync(self, job, begin):
        """
        Process a SYNC job.
        """
        self.logger.increment('partition.update.count.%s' %
                              (job['local_dev']['device'], ))
        # after our left and right partners, if there's some sort of
        # failure we'll continue onto the remaining primary nodes and
        # make sure they're in sync - or potentially rebuild missing
        # fragments we find
        dest_nodes = itertools.chain(
            job['sync_to'],
            # I think we could order these based on our index to better
            # protect against a broken chain
            [
                n for n in job['policy'].object_ring.get_part_nodes(
                    job['partition']) if n['id'] != job['local_dev']['id']
                and n['id'] not in (m['id'] for m in job['sync_to'])
            ],
        )
        syncd_with = 0
        for node in dest_nodes:
            if syncd_with >= len(job['sync_to']):
                # success!
                break

            try:
                suffixes = self._get_suffixes_to_sync(job, node)
            except SuffixSyncError:
                continue

            if not suffixes:
                syncd_with += 1
                continue

            # ssync any out-of-sync suffixes with the remote node
            success, _ = ssync_sender(self, node, job, suffixes)()
            # let remote end know to rehash it's suffixes
            self.rehash_remote(node, job, suffixes)
            # update stats for this attempt
            self.suffix_sync += len(suffixes)
            self.logger.update_stats('suffix.syncs', len(suffixes))
            if success:
                syncd_with += 1
        self.logger.timing_since('partition.update.timing', begin)

    def _revert(self, job, begin):
        """
        Process a REVERT job.
        """
        self.logger.increment('partition.delete.count.%s' %
                              (job['local_dev']['device'], ))
        # we'd desperately like to push this partition back to it's
        # primary location, but if that node is down, the next best thing
        # is one of the handoff locations - which *might* be us already!
        dest_nodes = itertools.chain(
            job['sync_to'],
            job['policy'].object_ring.get_more_nodes(job['partition']),
        )
        syncd_with = 0
        reverted_objs = {}
        for node in dest_nodes:
            if syncd_with >= len(job['sync_to']):
                break
            if node['id'] == job['local_dev']['id']:
                # this is as good a place as any for this data for now
                break
            success, in_sync_objs = ssync_sender(self, node, job,
                                                 job['suffixes'])()
            self.rehash_remote(node, job, job['suffixes'])
            if success:
                syncd_with += 1
                reverted_objs.update(in_sync_objs)
        if syncd_with >= len(job['sync_to']):
            self.delete_reverted_objs(job, reverted_objs, job['frag_index'])
        self.logger.timing_since('partition.delete.timing', begin)

    def _get_part_jobs(self, local_dev, part_path, partition, policy):
        """
        Helper function to build jobs for a partition, this method will
        read the suffix hashes and create job dictionaries to describe
        the needed work.  There will be one job for each fragment index
        discovered in the partition.

        For a fragment index which corresponds to this node's ring
        index, a job with job_type SYNC will be created to ensure that
        the left and right hand primary ring nodes for the part have the
        corresponding left and right hand fragment archives.

        A fragment index (or entire partition) for which this node is
        not the primary corresponding node, will create job(s) with
        job_type REVERT to ensure that fragment archives are pushed to
        the correct node and removed from this one.

        A partition may result in multiple jobs.  Potentially many
        REVERT jobs, and zero or one SYNC job.

        :param local_dev:  the local device
        :param part_path: full path to partition
        :param partition: partition number
        :param policy: the policy

        :returns: a list of dicts of job info
        """
        # find all the fi's in the part, and which suffixes have them
        hashes = self._get_hashes(policy, part_path, do_listdir=True)
        non_data_fragment_suffixes = []
        data_fi_to_suffixes = defaultdict(list)
        for suffix, fi_hash in hashes.items():
            if not fi_hash:
                # this is for sanity and clarity, normally an empty
                # suffix would get del'd from the hashes dict, but an
                # OSError trying to re-hash the suffix could leave the
                # value empty - it will log the exception; but there's
                # no way to properly address this suffix at this time.
                continue
            data_frag_indexes = [f for f in fi_hash if f is not None]
            if not data_frag_indexes:
                non_data_fragment_suffixes.append(suffix)
            else:
                for fi in data_frag_indexes:
                    data_fi_to_suffixes[fi].append(suffix)

        # helper to ensure consistent structure of jobs
        def build_job(job_type, frag_index, suffixes, sync_to):
            return {
                'job_type': job_type,
                'frag_index': frag_index,
                'suffixes': suffixes,
                'sync_to': sync_to,
                'partition': partition,
                'path': part_path,
                'hashes': hashes,
                'policy': policy,
                'local_dev': local_dev,
                # ssync likes to have it handy
                'device': local_dev['device'],
            }

        # aggregate jobs for all the fragment index in this part
        jobs = []

        # check the primary nodes - to see if the part belongs here
        part_nodes = policy.object_ring.get_part_nodes(partition)
        for node in part_nodes:
            if node['id'] == local_dev['id']:
                # this partition belongs here, we'll need a sync job
                frag_index = node['index']
                try:
                    suffixes = data_fi_to_suffixes.pop(frag_index)
                except KeyError:
                    suffixes = []
                sync_job = build_job(
                    job_type=SYNC,
                    frag_index=frag_index,
                    suffixes=suffixes,
                    sync_to=_get_partners(frag_index, part_nodes),
                )
                # ssync callback to rebuild missing fragment_archives
                sync_job['sync_diskfile_builder'] = self.reconstruct_fa
                jobs.append(sync_job)
                break

        # assign remaining data fragment suffixes to revert jobs
        ordered_fis = sorted((len(suffixes), fi)
                             for fi, suffixes in data_fi_to_suffixes.items())
        for count, fi in ordered_fis:
            revert_job = build_job(
                job_type=REVERT,
                frag_index=fi,
                suffixes=data_fi_to_suffixes[fi],
                sync_to=[part_nodes[fi]],
            )
            jobs.append(revert_job)

        # now we need to assign suffixes that have no data fragments
        if non_data_fragment_suffixes:
            if jobs:
                # the first job will be either the sync_job, or the
                # revert_job for the fragment index that is most common
                # among the suffixes
                jobs[0]['suffixes'].extend(non_data_fragment_suffixes)
            else:
                # this is an unfortunate situation, we need a revert job to
                # push partitions off this node, but none of the suffixes
                # have any data fragments to hint at which node would be a
                # good candidate to receive the tombstones.
                jobs.append(
                    build_job(
                        job_type=REVERT,
                        frag_index=None,
                        suffixes=non_data_fragment_suffixes,
                        # this is super safe
                        sync_to=part_nodes,
                        # something like this would be probably be better
                        # sync_to=random.sample(part_nodes, 3),
                    ))
        # return a list of jobs for this part
        return jobs

    def collect_parts(self, override_devices=None, override_partitions=None):
        """
        Helper for yielding partitions in the top level reconstructor
        """
        override_devices = override_devices or []
        override_partitions = override_partitions or []
        ips = whataremyips(self.bind_ip)
        for policy in POLICIES:
            if policy.policy_type != EC_POLICY:
                continue
            self._diskfile_mgr = self._df_router[policy]
            self.load_object_ring(policy)
            data_dir = get_data_dir(policy)
            local_devices = list(
                six.moves.filter(
                    lambda dev: dev and is_local_device(
                        ips, self.port, dev['replication_ip'], dev[
                            'replication_port']), policy.object_ring.devs))

            if override_devices:
                self.device_count = len(override_devices)
            else:
                self.device_count = len(local_devices)

            for local_dev in local_devices:
                if override_devices and (local_dev['device']
                                         not in override_devices):
                    continue
                self.reconstruction_device_count += 1
                dev_path = self._df_router[policy].get_dev_path(
                    local_dev['device'])
                if not dev_path:
                    self.logger.warning(_('%s is not mounted'),
                                        local_dev['device'])
                    continue
                obj_path = join(dev_path, data_dir)
                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
                unlink_older_than(tmp_path, time.time() - self.reclaim_age)
                if not os.path.exists(obj_path):
                    try:
                        mkdirs(obj_path)
                    except Exception:
                        self.logger.exception('Unable to create %s' % obj_path)
                    continue
                try:
                    partitions = os.listdir(obj_path)
                except OSError:
                    self.logger.exception('Unable to list partitions in %r' %
                                          obj_path)
                    continue

                self.part_count += len(partitions)
                for partition in partitions:
                    part_path = join(obj_path, partition)
                    if not (partition.isdigit() and os.path.isdir(part_path)):
                        self.logger.warning(
                            'Unexpected entity in data dir: %r' % part_path)
                        remove_file(part_path)
                        self.reconstruction_part_count += 1
                        continue
                    partition = int(partition)
                    if override_partitions and (partition
                                                not in override_partitions):
                        continue
                    part_info = {
                        'local_dev': local_dev,
                        'policy': policy,
                        'partition': partition,
                        'part_path': part_path,
                    }
                    yield part_info
                    self.reconstruction_part_count += 1

    def build_reconstruction_jobs(self, part_info):
        """
        Helper function for collect_jobs to build jobs for reconstruction
        using EC style storage policy
        """
        jobs = self._get_part_jobs(**part_info)
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff revert jobs to the front of the list
            jobs.sort(key=lambda job: job['job_type'], reverse=True)
        self.job_count += len(jobs)
        return jobs

    def _reset_stats(self):
        self.start = time.time()
        self.job_count = 0
        self.part_count = 0
        self.device_count = 0
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.reconstruction_count = 0
        self.reconstruction_part_count = 0
        self.reconstruction_device_count = 0
        self.last_reconstruction_count = -1

    def delete_partition(self, path):
        self.logger.info(_("Removing partition: %s"), path)
        tpool.execute(shutil.rmtree, path, ignore_errors=True)

    def reconstruct(self, **kwargs):
        """Run a reconstruction pass"""
        self._reset_stats()
        self.partition_times = []

        stats = spawn(self.heartbeat)
        lockup_detector = spawn(self.detect_lockups)
        sleep()  # Give spawns a cycle

        try:
            self.run_pool = GreenPool(size=self.concurrency)
            for part_info in self.collect_parts(**kwargs):
                if not self.check_ring(part_info['policy'].object_ring):
                    self.logger.info(
                        _("Ring change detected. Aborting "
                          "current reconstruction pass."))
                    return
                jobs = self.build_reconstruction_jobs(part_info)
                if not jobs:
                    # If this part belongs on this node, _get_part_jobs
                    # will *always* build a sync_job - even if there's
                    # no suffixes in the partition that needs to sync.
                    # If there's any suffixes in the partition then our
                    # job list would have *at least* one revert job.
                    # Therefore we know this part a) doesn't belong on
                    # this node and b) doesn't have any suffixes in it.
                    self.run_pool.spawn(self.delete_partition,
                                        part_info['part_path'])
                for job in jobs:
                    self.run_pool.spawn(self.process_job, job)
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            self.logger.exception(
                _("Exception in top-level"
                  "reconstruction loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()

    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object reconstructor in script mode."))
        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = [
            int(p) for p in list_from_csv(kwargs.get('partitions'))
        ]
        self.reconstruct(override_devices=override_devices,
                         override_partitions=override_partitions)
        total = (time.time() - start) / 60
        self.logger.info(
            _("Object reconstruction complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            dump_recon_cache(
                {
                    'object_reconstruction_time': total,
                    'object_reconstruction_last': time.time()
                }, self.rcache, self.logger)

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object reconstructor in daemon mode."))
        # Run the reconstructor continually
        while True:
            start = time.time()
            self.logger.info(_("Starting object reconstruction pass."))
            # Run the reconstructor
            self.reconstruct()
            total = (time.time() - start) / 60
            self.logger.info(
                _("Object reconstruction complete. (%.02f minutes)"), total)
            dump_recon_cache(
                {
                    'object_reconstruction_time': total,
                    'object_reconstruction_last': time.time()
                }, self.rcache, self.logger)
            self.logger.debug('reconstruction sleeping for %s seconds.',
                              self.interval)
            sleep(self.interval)
Ejemplo n.º 46
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """
    def __init__(self, conf):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = get_logger(conf, log_route='object-replicator')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = conf.get('mount_check', 'true').lower() in \
                              ('true', 't', '1', 'on', 'yes', 'y')
        self.vm_test_mode = conf.get('vm_test_mode',
                                     'no').lower() in ('yes', 'true', 'on',
                                                       '1')
        self.chase_dir = conf.get('chase_dir', '/etc/chase')
        self.port = int(conf.get('bind_port', 6000))
        self.concurrency = int(conf.get('concurrency', 1))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.object_ring = Ring(join(self.chase_dir, 'object.ring.gz'))
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
        self.partition_times = []
        self.run_pause = int(conf.get('run_pause', 30))
        self.rsync_timeout = int(conf.get('rsync_timeout', 900))
        self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
        self.recon_enable = conf.get('recon_enable',
                                     'no').lower() in TRUE_VALUES
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/chase')
        self.recon_object = os.path.join(self.recon_cache_path, "object.recon")

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        ret_val = None
        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(_("Killing long-running rsync: %s"), str(args))
            proc.kill()
            return 1  # failure response code
        total_time = time.time() - start_time
        for result in results.split('\n'):
            if result == '':
                continue
            if result.startswith('cd+'):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            self.logger.error(_('Bad rsync return code: %(args)s -> %(ret)d'),
                              {
                                  'args': str(args),
                                  'ret': ret_val
                              })
        elif results:
            self.logger.info(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"), {
                    'src': args[-2],
                    'dst': args[-1],
                    'time': total_time
                })
        else:
            self.logger.debug(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"), {
                    'src': args[-2],
                    'dst': args[-1],
                    'time': total_time
                })
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean indicating success or failure
        """
        if not os.path.exists(job['path']):
            return False
        args = [
            'rsync',
            '--recursive',
            '--whole-file',
            '--human-readable',
            '--xattrs',
            '--itemize-changes',
            '--ignore-existing',
            '--timeout=%s' % self.rsync_io_timeout,
            '--contimeout=%s' % self.rsync_io_timeout,
        ]
        if self.vm_test_mode:
            rsync_module = '[%s]::object%s' % (node['ip'], node['port'])
        else:
            rsync_module = '[%s]::object' % node['ip']
        had_any = False
        for suffix in suffixes:
            spath = join(job['path'], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False
        args.append(
            join(rsync_module, node['device'], 'objects', job['partition']))
        return self._rsync(args) == 0

    def check_ring(self):
        """
        Check to see if the ring has been updated

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if self.object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """
        def tpool_get_suffixes(path):
            return [
                suff for suff in os.listdir(path)
                if len(suff) == 3 and isdir(join(path, suff))
            ]

        self.replication_count += 1
        begin = time.time()
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            if suffixes:
                for node in job['nodes']:
                    success = self.rsync(node, job, suffixes)
                    if success:
                        with Timeout(self.http_timeout):
                            http_connect(node['ip'],
                                         node['port'],
                                         node['device'],
                                         job['partition'],
                                         'REPLICATE',
                                         '/' + '-'.join(suffixes),
                                         headers={
                                             'Content-Length': '0'
                                         }).getresponse().read()
                    responses.append(success)
            if not suffixes or (len(responses) == \
                        self.object_ring.replica_count and all(responses)):
                self.logger.info(_("Removing partition: %s"), job['path'])
                tpool.execute(shutil.rmtree, job['path'], ignore_errors=True)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
        finally:
            self.partition_times.append(time.time() - begin)

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        begin = time.time()
        try:
            hashed, local_hash = tpool.execute(
                tpooled_get_hashes,
                job['path'],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age)
            # See tpooled_get_hashes "Hack".
            if isinstance(hashed, BaseException):
                raise hashed
            self.suffix_hash += hashed
            attempts_left = self.object_ring.replica_count - 1
            nodes = itertools.chain(
                job['nodes'],
                self.object_ring.get_more_nodes(int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                attempts_left -= 1
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(node['ip'],
                                            node['port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '',
                                            headers={
                                                'Content-Length': '0'
                                            }).getresponse()
                        if resp.status == 507:
                            self.logger.error(
                                _('%(ip)s/%(device)s responded'
                                  ' as unmounted'), node)
                            attempts_left += 1
                            continue
                        if resp.status != 200:
                            self.logger.error(
                                _("Invalid response %(resp)s "
                                  "from %(ip)s"), {
                                      'resp': resp.status,
                                      'ip': node['ip']
                                  })
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    if not suffixes:
                        continue
                    hashed, recalc_hash = tpool.execute(
                        tpooled_get_hashes,
                        job['path'],
                        recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    # See tpooled_get_hashes "Hack".
                    if isinstance(hashed, BaseException):
                        raise hashed
                    local_hash = recalc_hash
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    self.rsync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['ip'],
                                            node['port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '/' + '-'.join(suffixes),
                                            headers={'Content-Length': '0'})
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                except (Exception, Timeout):
                    self.logger.exception(
                        _("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        if self.replication_count:
            rate = self.replication_count / (time.time() - self.start)
            self.logger.info(
                _("%(replicated)d/%(total)d (%(percentage).2f%%)"
                  " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                  "%(remaining)s remaining)"), {
                      'replicated':
                      self.replication_count,
                      'total':
                      self.job_count,
                      'percentage':
                      self.replication_count * 100.0 / self.job_count,
                      'time':
                      time.time() - self.start,
                      'rate':
                      rate,
                      'remaining':
                      '%d%s' % compute_eta(self.start, self.replication_count,
                                           self.job_count)
                  })
            if self.suffix_count:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"), {
                          'checked': self.suffix_count,
                          'hashed':
                          (self.suffix_hash * 100.0) / self.suffix_count,
                          'synced':
                          (self.suffix_sync * 100.0) / self.suffix_count
                      })
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"), {
                          'max': self.partition_times[-1],
                          'min': self.partition_times[0],
                          'med':
                          self.partition_times[len(self.partition_times) // 2]
                      })
        else:
            self.logger.info(_("Nothing replicated for %s seconds."),
                             (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the replicator finishes its replication
        pass in some eventuality.
        """
        while True:
            eventlet.sleep(self.lockup_timeout)
            if self.replication_count == self.last_replication_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_replication_count = self.replication_count

    def collect_jobs(self):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be rsynced.
        """
        jobs = []
        ips = whataremyips()
        for local_dev in [
                dev for dev in self.object_ring.devs
                if dev and dev['ip'] in ips and dev['port'] == self.port
        ]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, 'objects')
            tmp_path = join(dev_path, 'tmp')
            if self.mount_check and not os.path.ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                continue
            for partition in os.listdir(obj_path):
                try:
                    nodes = [
                        node for node in self.object_ring.get_part_nodes(
                            int(partition)) if node['id'] != local_dev['id']
                    ]
                    jobs.append(
                        dict(path=join(obj_path, partition),
                             nodes=nodes,
                             delete=len(nodes) >
                             self.object_ring.replica_count - 1,
                             partition=partition))
                except ValueError:
                    continue
        random.shuffle(jobs)
        # Partititons that need to be deleted take priority
        jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs

    def replicate(self):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.partition_times = []
        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs()
            for job in jobs:
                if not self.check_ring():
                    self.logger.info(
                        _("Ring change detected. Aborting "
                          "current replication pass."))
                    return
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()

    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object replicator in script mode."))
        self.replicate()
        total = (time.time() - start) / 60
        self.logger.info(_("Object replication complete. (%.02f minutes)"),
                         total)
        if self.recon_enable:
            try:
                dump_recon_cache('object_replication_time', total, \
                    self.recon_object)
            except (Exception, Timeout):
                self.logger.exception(_('Exception dumping recon cache'))

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object replicator in daemon mode."))
        # Run the replicator continually
        while True:
            start = time.time()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            self.replicate()
            total = (time.time() - start) / 60
            self.logger.info(_("Object replication complete. (%.02f minutes)"),
                             total)
            if self.recon_enable:
                try:
                    dump_recon_cache('object_replication_time', total, \
                        self.recon_object)
                except (Exception, Timeout):
                    self.logger.exception(_('Exception dumping recon cache'))
            self.logger.debug(_('Replication sleeping for %s seconds.'),
                              self.run_pause)
            sleep(self.run_pause)
Ejemplo n.º 47
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """
    def __init__(self, conf, logger=None):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = PrefixLoggerAdapter(
            logger or get_logger(conf, log_route='object-replicator'), {})
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.bind_ip = conf.get('bind_ip', '0.0.0.0')
        self.servers_per_port = int(conf.get('servers_per_port', '0') or 0)
        self.port = None if self.servers_per_port else \
            int(conf.get('bind_port', 6200))
        self.concurrency = int(conf.get('concurrency', 1))
        self.replicator_workers = int(conf.get('replicator_workers', 0))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.replication_cycle = random.randint(0, 9)
        self.partition_times = []
        self.interval = int(
            conf.get('interval') or conf.get('run_pause') or 30)
        self.rsync_timeout = int(
            conf.get('rsync_timeout', DEFAULT_RSYNC_TIMEOUT))
        self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
        self.rsync_bwlimit = conf.get('rsync_bwlimit', '0')
        self.rsync_compress = config_true_value(
            conf.get('rsync_compress', 'no'))
        self.rsync_module = conf.get('rsync_module', '').rstrip('/')
        if not self.rsync_module:
            self.rsync_module = '{replication_ip}::object'
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")
        self._next_rcache_update = time.time() + self.stats_interval
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.sync_method = getattr(self, conf.get('sync_method') or 'rsync')
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.default_headers = {
            'Content-Length': '0',
            'user-agent': 'object-replicator %s' % os.getpid()
        }
        self.rsync_error_log_line_length = \
            int(conf.get('rsync_error_log_line_length', 0))
        self.handoffs_first = config_true_value(
            conf.get('handoffs_first', False))
        self.handoff_delete = config_auto_int_value(
            conf.get('handoff_delete', 'auto'), 0)
        if any((self.handoff_delete, self.handoffs_first)):
            self.logger.warning('Handoff only mode is not intended for normal '
                                'operation, please disable handoffs_first and '
                                'handoff_delete before the next '
                                'normal rebalance')
        self.is_multiprocess_worker = None
        self._df_router = DiskFileRouter(conf, self.logger)
        self._child_process_reaper_queue = queue.LightQueue()

    def _zero_stats(self):
        self.stats_for_dev = defaultdict(Stats)

    @property
    def total_stats(self):
        return sum(self.stats_for_dev.values(), Stats())

    def _emplace_log_prefix(self, worker_index):
        self.logger.set_prefix("[worker %d/%d pid=%d] " % (
            worker_index + 1,  # use 1-based indexing for more readable logs
            self.replicator_workers,
            os.getpid()))

    def _get_my_replication_ips(self):
        my_replication_ips = set()
        ips = whataremyips()
        for policy in POLICIES:
            self.load_object_ring(policy)
            for local_dev in [
                    dev for dev in policy.object_ring.devs
                    if dev and dev['replication_ip'] in ips
                    and dev['replication_port'] == self.port
            ]:
                my_replication_ips.add(local_dev['replication_ip'])
        return list(my_replication_ips)

    def _child_process_reaper(self):
        """
        Consume processes from self._child_process_reaper_queue and wait() for
        them
        """
        procs = set()
        done = False
        while not done:
            timeout = 60 if procs else None
            try:
                new_proc = self._child_process_reaper_queue.get(
                    timeout=timeout)
                if new_proc is not None:
                    procs.add(new_proc)
                else:
                    done = True
            except queue.Empty:
                pass

            reaped_procs = set()
            for proc in procs:
                try:
                    # this will reap the process if it has exited, but
                    # otherwise will not wait
                    proc.wait(timeout=0)
                    reaped_procs.add(proc)
                except subprocess.TimeoutExpired:
                    pass
            procs -= reaped_procs

    def get_worker_args(self, once=False, **kwargs):
        if self.replicator_workers < 1:
            return []

        override_opts = parse_override_options(once=once, **kwargs)
        have_overrides = bool(override_opts.devices or override_opts.partitions
                              or override_opts.policies)

        # save this off for ring-change detection later in is_healthy()
        self.all_local_devices = self.get_local_devices()

        if override_opts.devices:
            devices_to_replicate = [
                d for d in override_opts.devices if d in self.all_local_devices
            ]
        else:
            # The sort isn't strictly necessary since we're just trying to
            # spread devices around evenly, but it makes testing easier.
            devices_to_replicate = sorted(self.all_local_devices)

        # Distribute devices among workers as evenly as possible
        self.replicator_workers = min(self.replicator_workers,
                                      len(devices_to_replicate))
        return [{
            'override_devices': devs,
            'override_partitions': override_opts.partitions,
            'override_policies': override_opts.policies,
            'have_overrides': have_overrides,
            'multiprocess_worker_index': index
        } for index, devs in enumerate(
            distribute_evenly(devices_to_replicate, self.replicator_workers))]

    def is_healthy(self):
        """
        Check whether our set of local devices remains the same.

        If devices have been added or removed, then we return False here so
        that we can kill off any worker processes and then distribute the
        new set of local devices across a new set of workers so that all
        devices are, once again, being worked on.

        This function may also cause recon stats to be updated.

        :returns: False if any local devices have been added or removed,
          True otherwise
        """
        # We update recon here because this is the only function we have in
        # a multiprocess replicator that gets called periodically in the
        # parent process.
        if time.time() >= self._next_rcache_update:
            update = self.aggregate_recon_update()
            dump_recon_cache(update, self.rcache, self.logger)
        return self.get_local_devices() == self.all_local_devices

    def get_local_devices(self):
        """
        Returns a set of all local devices in all replication-type storage
        policies.

        This is the device names, e.g. "sdq" or "d1234" or something, not
        the full ring entries.
        """
        ips = whataremyips(self.bind_ip)
        local_devices = set()
        for policy in POLICIES:
            if policy.policy_type != REPL_POLICY:
                continue
            self.load_object_ring(policy)
            for device in policy.object_ring.devs:
                if device and is_local_device(ips, self.port,
                                              device['replication_ip'],
                                              device['replication_port']):
                    local_devices.add(device['device'])
        return local_devices

    # Just exists for doc anchor point
    def sync(self, node, job, suffixes, *args, **kwargs):
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean and dictionary, boolean indicating success or failure
        """
        return self.sync_method(node, job, suffixes, *args, **kwargs)

    def load_object_ring(self, policy):
        """
        Make sure the policy's rings are loaded.

        :param policy: the StoragePolicy instance
        :returns: appropriate ring object
        """
        policy.load_ring(self.swift_dir)
        return policy.object_ring

    def _limit_rsync_log(self, line):
        """
        If rsync_error_log_line_length is defined then
        limit the error to that length

        :param line: rsync log line
        :return: If enabled the line limited to rsync_error_log_line_length
                 otherwise the initial line.
        """
        if self.rsync_error_log_line_length:
            return line[:self.rsync_error_log_line_length]

        return line

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        proc = None

        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(
                self._limit_rsync_log(
                    _("Killing long-running rsync: %s") % str(args)))
            if proc:
                proc.kill()
                try:
                    # Note: Python 2.7's subprocess.Popen class doesn't take
                    # any arguments for wait(), but Python 3's does.
                    # However, Eventlet's replacement Popen takes a timeout
                    # argument regardless of Python version, so we don't
                    # need any conditional code here.
                    proc.wait(timeout=1.0)
                except subprocess.TimeoutExpired:
                    # Sometimes a process won't die immediately even after a
                    # SIGKILL. This can be due to failing disks, high load,
                    # or other reasons. We can't wait for it forever since
                    # we're taking up a slot in the (green)thread pool, so
                    # we send it over to another greenthread, not part of
                    # our pool, whose sole duty is to wait for child
                    # processes to exit.
                    self._child_process_reaper_queue.put(proc)
            return 1  # failure response code

        total_time = time.time() - start_time
        for result in results.split('\n'):
            if result == '':
                continue
            if result.startswith('cd+'):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            self.logger.error(
                self._limit_rsync_log(
                    _('Bad rsync return code: %(ret)d <- %(args)s') % {
                        'args': str(args),
                        'ret': ret_val
                    }))
        else:
            log_method = self.logger.info if results else self.logger.debug
            log_method(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"), {
                    'src': args[-2],
                    'dst': args[-1],
                    'time': total_time
                })
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Uses rsync to implement the sync method. This was the first
        sync method in Swift.
        """
        if not os.path.exists(job['path']):
            return False, {}
        args = [
            'rsync', '--recursive', '--whole-file', '--human-readable',
            '--xattrs', '--itemize-changes', '--ignore-existing',
            '--timeout=%s' % self.rsync_io_timeout,
            '--contimeout=%s' % self.rsync_io_timeout,
            '--bwlimit=%s' % self.rsync_bwlimit,
            '--exclude=.*.%s' % ''.join('[0-9a-zA-Z]' for i in range(6))
        ]
        if self.rsync_compress and \
                job['region'] != node['region']:
            # Allow for compression, but only if the remote node is in
            # a different region than the local one.
            args.append('--compress')
        rsync_module = rsync_module_interpolation(self.rsync_module, node)
        had_any = False
        for suffix in suffixes:
            spath = join(job['path'], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False, {}
        data_dir = get_data_dir(job['policy'])
        args.append(
            join(rsync_module, node['device'], data_dir, job['partition']))
        return self._rsync(args) == 0, {}

    def ssync(self, node, job, suffixes, remote_check_objs=None):
        return ssync_sender.Sender(self, node, job, suffixes,
                                   remote_check_objs)()

    def check_ring(self, object_ring):
        """
        Check to see if the ring has been updated
        :param object_ring: the ring to check

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """
        def tpool_get_suffixes(path):
            return [
                suff for suff in os.listdir(path)
                if len(suff) == 3 and isdir(join(path, suff))
            ]

        stats = self.stats_for_dev[job['device']]
        stats.attempted += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'], ))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        failure_devs_info = set()
        begin = time.time()
        handoff_partition_deleted = False
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            synced_remote_regions = {}
            delete_objs = None
            if suffixes:
                for node in job['nodes']:
                    stats.rsync += 1
                    kwargs = {}
                    if node['region'] in synced_remote_regions and \
                            self.conf.get('sync_method', 'rsync') == 'ssync':
                        kwargs['remote_check_objs'] = \
                            synced_remote_regions[node['region']]
                    # candidates is a dict(hash=>timestamp) of objects
                    # for deletion
                    success, candidates = self.sync(node, job, suffixes,
                                                    **kwargs)
                    if success:
                        with Timeout(self.http_timeout):
                            conn = http_connect(node['replication_ip'],
                                                node['replication_port'],
                                                node['device'],
                                                job['partition'],
                                                'REPLICATE',
                                                '/' + '-'.join(suffixes),
                                                headers=headers)
                            conn.getresponse().read()
                        if node['region'] != job['region']:
                            synced_remote_regions[node['region']] = viewkeys(
                                candidates)
                    else:
                        failure_devs_info.add(
                            (node['replication_ip'], node['device']))
                    responses.append(success)
                for cand_objs in synced_remote_regions.values():
                    if delete_objs is None:
                        delete_objs = cand_objs
                    else:
                        delete_objs = delete_objs & cand_objs

            if self.handoff_delete:
                # delete handoff if we have had handoff_delete successes
                delete_handoff = len([resp for resp in responses if resp]) >= \
                    self.handoff_delete
            else:
                # delete handoff if all syncs were successful
                delete_handoff = len(responses) == len(job['nodes']) and \
                    all(responses)
            if delete_handoff:
                stats.remove += 1
                if (self.conf.get('sync_method', 'rsync') == 'ssync'
                        and delete_objs is not None):
                    self.logger.info(_("Removing %s objects"),
                                     len(delete_objs))
                    _junk, error_paths = self.delete_handoff_objs(
                        job, delete_objs)
                    # if replication works for a hand-off device and it failed,
                    # the remote devices which are target of the replication
                    # from the hand-off device will be marked. Because cleanup
                    # after replication failed means replicator needs to
                    # replicate again with the same info.
                    if error_paths:
                        failure_devs_info.update([
                            (failure_dev['replication_ip'],
                             failure_dev['device'])
                            for failure_dev in job['nodes']
                        ])
                else:
                    self.delete_partition(job['path'])
                    handoff_partition_deleted = True
            elif not suffixes:
                self.delete_partition(job['path'])
                handoff_partition_deleted = True
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
            stats.add_failure_stats(failure_devs_info)
        finally:
            target_devs_info = set([(target_dev['replication_ip'],
                                     target_dev['device'])
                                    for target_dev in job['nodes']])
            stats.success += len(target_devs_info - failure_devs_info)
            if not handoff_partition_deleted:
                self.handoffs_remaining += 1
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.delete.timing', begin)

    def delete_partition(self, path):
        self.logger.info(_("Removing partition: %s"), path)
        tpool.execute(shutil.rmtree, path)

    def delete_handoff_objs(self, job, delete_objs):
        success_paths = []
        error_paths = []
        for object_hash in delete_objs:
            object_path = storage_directory(job['obj_path'], job['partition'],
                                            object_hash)
            tpool.execute(shutil.rmtree, object_path, ignore_errors=True)
            suffix_dir = dirname(object_path)
            try:
                os.rmdir(suffix_dir)
                success_paths.append(object_path)
            except OSError as e:
                if e.errno not in (errno.ENOENT, errno.ENOTEMPTY):
                    error_paths.append(object_path)
                    self.logger.exception(
                        "Unexpected error trying to cleanup suffix dir:%r",
                        suffix_dir)
        return success_paths, error_paths

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        stats = self.stats_for_dev[job['device']]
        stats.attempted += 1
        self.logger.increment('partition.update.count.%s' % (job['device'], ))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        target_devs_info = set()
        failure_devs_info = set()
        begin = time.time()
        df_mgr = self._df_router[job['policy']]
        try:
            hashed, local_hash = tpool_reraise(df_mgr._get_hashes,
                                               job['device'],
                                               job['partition'],
                                               job['policy'],
                                               do_listdir=_do_listdir(
                                                   int(job['partition']),
                                                   self.replication_cycle))
            stats.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            synced_remote_regions = set()
            random.shuffle(job['nodes'])
            nodes = itertools.chain(
                job['nodes'], job['policy'].object_ring.get_more_nodes(
                    int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIteration it will be caught way below
                node = next(nodes)
                target_devs_info.add((node['replication_ip'], node['device']))
                attempts_left -= 1
                # if we have already synced to this remote region,
                # don't sync again on this replication pass
                if node['region'] in synced_remote_regions:
                    continue
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '',
                                            headers=headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(
                                _('%(replication_ip)s/%(device)s '
                                  'responded as unmounted'), node)
                            attempts_left += 1
                            failure_devs_info.add(
                                (node['replication_ip'], node['device']))
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s "
                                  "from %(ip)s"), {
                                      'resp': resp.status,
                                      'ip': node['replication_ip']
                                  })
                            failure_devs_info.add(
                                (node['replication_ip'], node['device']))
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    if not suffixes:
                        stats.hashmatch += 1
                        continue
                    hashed, recalc_hash = tpool_reraise(df_mgr._get_hashes,
                                                        job['device'],
                                                        job['partition'],
                                                        job['policy'],
                                                        recalculate=suffixes)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    stats.rsync += 1
                    success, _junk = self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '/' + '-'.join(suffixes),
                                            headers=headers)
                        conn.getresponse().read()
                    if not success:
                        failure_devs_info.add(
                            (node['replication_ip'], node['device']))
                    # add only remote region when replicate succeeded
                    if success and node['region'] != job['region']:
                        synced_remote_regions.add(node['region'])
                    stats.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    failure_devs_info.add(
                        (node['replication_ip'], node['device']))
                    self.logger.exception(
                        _("Error syncing with node: %s") % node)
            stats.suffix_count += len(local_hash)
        except StopIteration:
            self.logger.error(
                'Ran out of handoffs while replicating '
                'partition %s of policy %d', job['partition'],
                int(job['policy']))
        except (Exception, Timeout):
            failure_devs_info.update(target_devs_info)
            self.logger.exception(_("Error syncing partition"))
        finally:
            stats.add_failure_stats(failure_devs_info)
            stats.success += len(target_devs_info - failure_devs_info)
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        stats = self.total_stats
        replication_count = stats.attempted
        if replication_count > self.last_replication_count:
            self.last_replication_count = replication_count
            elapsed = (time.time() - self.start) or 0.000001
            rate = replication_count / elapsed
            self.logger.info(
                _("%(replicated)d/%(total)d (%(percentage).2f%%)"
                  " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                  "%(remaining)s remaining)"),
                {
                    'replicated':
                    replication_count,
                    'total':
                    self.job_count,
                    'percentage':
                    replication_count * 100.0 / self.job_count,
                    'time':
                    time.time() - self.start,
                    'rate':
                    rate,
                    'remaining':
                    '%d%s' %
                    compute_eta(self.start, replication_count, self.job_count)
                })
            self.logger.info(
                _('%(success)s successes, %(failure)s failures') %
                dict(success=stats.success, failure=stats.failure))

            if stats.suffix_count:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"), {
                          'checked': stats.suffix_count,
                          'hashed':
                          (stats.suffix_hash * 100.0) / stats.suffix_count,
                          'synced':
                          (stats.suffix_sync * 100.0) / stats.suffix_count
                      })
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"), {
                          'max': self.partition_times[-1],
                          'min': self.partition_times[0],
                          'med':
                          self.partition_times[len(self.partition_times) // 2]
                      })
        else:
            self.logger.info(_("Nothing replicated for %s seconds."),
                             (time.time() - self.start))

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def build_replication_jobs(self,
                               policy,
                               ips,
                               override_devices=None,
                               override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        df_mgr = self._df_router[policy]
        self.all_devs_info.update([(dev['replication_ip'], dev['device'])
                                   for dev in policy.object_ring.devs if dev])
        data_dir = get_data_dir(policy)
        found_local = False
        for local_dev in [
                dev for dev in policy.object_ring.devs if
            (dev and is_local_device(ips, self.port, dev['replication_ip'],
                                     dev['replication_port']) and
             (override_devices is None or dev['device'] in override_devices))
        ]:
            found_local = True
            dev_path = check_drive(self.devices_dir, local_dev['device'],
                                   self.mount_check)
            local_dev_stats = self.stats_for_dev[local_dev['device']]
            if not dev_path:
                local_dev_stats.add_failure_stats([
                    (failure_dev['replication_ip'], failure_dev['device'])
                    for failure_dev in policy.object_ring.devs if failure_dev
                ])
                self.logger.warning(_('%s is not mounted'),
                                    local_dev['device'])
                continue
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            unlink_older_than(tmp_path, time.time() - df_mgr.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                if (partition.startswith('auditor_status_')
                        and partition.endswith('.json')):
                    # ignore auditor status files
                    continue

                part_nodes = None
                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(
                        int(partition))
                    nodes = [
                        node for node in part_nodes
                        if node['id'] != local_dev['id']
                    ]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy=policy,
                             partition=partition,
                             region=local_dev['region']))
                except ValueError:
                    if part_nodes:
                        local_dev_stats.add_failure_stats([
                            (failure_dev['replication_ip'],
                             failure_dev['device']) for failure_dev in nodes
                        ])
                    else:
                        local_dev_stats.add_failure_stats([
                            (failure_dev['replication_ip'],
                             failure_dev['device'])
                            for failure_dev in policy.object_ring.devs
                            if failure_dev
                        ])
                    continue
        if not found_local:
            self.logger.error(
                "Can't find itself in policy with index %d with"
                " ips %s and with port %s in ring file, not"
                " replicating", int(policy), ", ".join(ips), self.port)
        return jobs

    def collect_jobs(self,
                     override_devices=None,
                     override_partitions=None,
                     override_policies=None):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be rsynced.

        :param override_devices: if set, only jobs on these devices
            will be returned
        :param override_partitions: if set, only jobs on these partitions
            will be returned
        :param override_policies: if set, only jobs in these storage
            policies will be returned
        """
        jobs = []
        ips = whataremyips(self.bind_ip)
        for policy in POLICIES:
            # Skip replication if next_part_power is set. In this case
            # every object is hard-linked twice, but the replicator can't
            # detect them and would create a second copy of the file if not
            # yet existing - and this might double the actual transferred
            # and stored data
            next_part_power = getattr(policy.object_ring, 'next_part_power',
                                      None)
            if next_part_power is not None:
                self.logger.warning(
                    _("next_part_power set in policy '%s'. Skipping"),
                    policy.name)
                continue

            if policy.policy_type == REPL_POLICY:
                if (override_policies is not None
                        and policy.idx not in override_policies):
                    continue
                # ensure rings are loaded for policy
                self.load_object_ring(policy)
                jobs += self.build_replication_jobs(
                    policy,
                    ips,
                    override_devices=override_devices,
                    override_partitions=override_partitions)
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff parts to the front of the list
            jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs

    def replicate(self,
                  override_devices=None,
                  override_partitions=None,
                  override_policies=None,
                  start_time=None):
        """Run a replication pass"""
        if start_time is None:
            start_time = time.time()
        self.start = start_time
        self.last_replication_count = 0
        self.replication_cycle = (self.replication_cycle + 1) % 10
        self.partition_times = []
        self.my_replication_ips = self._get_my_replication_ips()
        self.all_devs_info = set()
        self.handoffs_remaining = 0

        stats = eventlet.spawn(self.heartbeat)
        eventlet.sleep()  # Give spawns a cycle

        current_nodes = None
        dev_stats = None
        num_jobs = 0
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs(override_devices=override_devices,
                                     override_partitions=override_partitions,
                                     override_policies=override_policies)
            for job in jobs:
                dev_stats = self.stats_for_dev[job['device']]
                num_jobs += 1
                current_nodes = job['nodes']
                dev_path = check_drive(self.devices_dir, job['device'],
                                       self.mount_check)
                if not dev_path:
                    dev_stats.add_failure_stats([
                        (failure_dev['replication_ip'], failure_dev['device'])
                        for failure_dev in job['nodes']
                    ])
                    self.logger.warning(_('%s is not mounted'), job['device'])
                    continue
                if self.handoffs_first and not job['delete']:
                    # in handoffs first mode, we won't process primary
                    # partitions until rebalance was successful!
                    if self.handoffs_remaining:
                        self.logger.warning(
                            _("Handoffs first mode still has handoffs "
                              "remaining.  Aborting current "
                              "replication pass."))
                        break
                if not self.check_ring(job['policy'].object_ring):
                    self.logger.info(
                        _("Ring change detected. Aborting "
                          "current replication pass."))
                    return

                try:
                    if isfile(job['path']):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning(
                            'Removing partition directory '
                            'which was a file: %s', job['path'])
                        os.remove(job['path'])
                        continue
                except OSError:
                    continue
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            current_nodes = None
            self.run_pool.waitall()
        except (Exception, Timeout) as err:
            if dev_stats:
                if current_nodes:
                    dev_stats.add_failure_stats([
                        (failure_dev['replication_ip'], failure_dev['device'])
                        for failure_dev in current_nodes
                    ])
                else:
                    dev_stats.add_failure_stats(self.all_devs_info)
            self.logger.exception(
                _("Exception in top-level replication loop: %s"), err)
        finally:
            stats.kill()
            self.stats_line()

    def update_recon(self, total, end_time, override_devices):
        # Called at the end of a replication pass to update recon stats.
        if self.is_multiprocess_worker:
            # If it weren't for the failure_nodes field, we could do this as
            # a bunch of shared memory using multiprocessing.Value, which
            # would be nice because it'd avoid dealing with existing data
            # during an upgrade.
            update = {
                'object_replication_per_disk': {
                    od: {
                        'replication_stats': self.stats_for_dev[od].to_recon(),
                        'replication_time': total,
                        'replication_last': end_time,
                        'object_replication_time': total,
                        'object_replication_last': end_time
                    }
                    for od in override_devices
                }
            }
        else:
            update = {
                'replication_stats': self.total_stats.to_recon(),
                'replication_time': total,
                'replication_last': end_time,
                'object_replication_time': total,
                'object_replication_last': end_time
            }
        dump_recon_cache(update, self.rcache, self.logger)

    def aggregate_recon_update(self):
        per_disk_stats = load_recon_cache(self.rcache).get(
            'object_replication_per_disk', {})
        recon_update = {}
        min_repl_last = float('inf')
        min_repl_time = float('inf')

        # If every child has reported some stats, then aggregate things.
        if all(ld in per_disk_stats for ld in self.all_local_devices):
            aggregated = Stats()
            for device_name, data in per_disk_stats.items():
                aggregated += Stats.from_recon(data['replication_stats'])
                min_repl_time = min(min_repl_time,
                                    data['object_replication_time'])
                min_repl_last = min(min_repl_last,
                                    data['object_replication_last'])
            recon_update['replication_stats'] = aggregated.to_recon()
            recon_update['replication_last'] = min_repl_last
            recon_update['replication_time'] = min_repl_time
            recon_update['object_replication_last'] = min_repl_last
            recon_update['object_replication_time'] = min_repl_time

        # Clear out entries for old local devices that we no longer have
        devices_to_remove = set(per_disk_stats) - set(self.all_local_devices)
        if devices_to_remove:
            recon_update['object_replication_per_disk'] = {
                dtr: {}
                for dtr in devices_to_remove
            }

        return recon_update

    def run_once(self,
                 multiprocess_worker_index=None,
                 have_overrides=False,
                 *args,
                 **kwargs):
        if multiprocess_worker_index is not None:
            self.is_multiprocess_worker = True
            self._emplace_log_prefix(multiprocess_worker_index)

        rsync_reaper = eventlet.spawn(self._child_process_reaper)
        self._zero_stats()
        self.logger.info(_("Running object replicator in script mode."))

        override_opts = parse_override_options(once=True, **kwargs)
        devices = override_opts.devices or None
        partitions = override_opts.partitions or None
        policies = override_opts.policies or None

        start_time = time.time()
        self.replicate(override_devices=devices,
                       override_partitions=partitions,
                       override_policies=policies,
                       start_time=start_time)
        end_time = time.time()
        total = (end_time - start_time) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)

        # If we've been manually run on a subset of
        # policies/devices/partitions, then our recon stats are not
        # representative of how replication is doing, so we don't publish
        # them.
        if self.is_multiprocess_worker:
            # The main process checked for overrides and determined that
            # there were none
            should_update_recon = not have_overrides
        else:
            # We are single-process, so update recon only if we worked on
            # everything
            should_update_recon = not (partitions or devices or policies)
        if should_update_recon:
            self.update_recon(total, end_time, devices)

        # Give rsync processes one last chance to exit, then bail out and
        # let them be init's problem
        self._child_process_reaper_queue.put(None)
        rsync_reaper.wait()

    def run_forever(self,
                    multiprocess_worker_index=None,
                    override_devices=None,
                    *args,
                    **kwargs):
        if multiprocess_worker_index is not None:
            self.is_multiprocess_worker = True
            self._emplace_log_prefix(multiprocess_worker_index)
        self.logger.info(_("Starting object replicator in daemon mode."))
        eventlet.spawn_n(self._child_process_reaper)
        # Run the replicator continually
        while True:
            self._zero_stats()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            start = time.time()
            self.replicate(override_devices=override_devices)
            end = time.time()
            total = (end - start) / 60
            self.logger.info(_("Object replication complete. (%.02f minutes)"),
                             total)
            self.update_recon(total, end, override_devices)
            self.logger.debug('Replication sleeping for %s seconds.',
                              self.interval)
            sleep(self.interval)

    def post_multiprocess_run(self):
        # This method is called after run_once using multiple workers.
        update = self.aggregate_recon_update()
        dump_recon_cache(update, self.rcache, self.logger)
Ejemplo n.º 48
0
class Crawler(object):
    def __init__(self, max_connections, input_is_plain):
        self.max_connections = max_connections
        self.input_is_plain = input_is_plain

        self.queue = Queue(1)
        self.closed = False
        self._handler_pool = GreenPool(self.max_connections)
        self._robots_cache = PoolMap(self.get_robots_checker, pool_max_size=1, timeout=600)

        # Start IO worker and die if he does.
        self.io_worker = io.Worker(lambda: self.closed)
        t = spawn(self.io_worker.run_loop)
        t.link(reraise_errors, greenthread.getcurrent())

        log.debug(u"Crawler started. Max connections: %d.", self.max_connections)

    def crawl(self, forever=True):
        # TODO: do something special about signals?

        if forever:
            self.start_queue_updater()

        while not self.closed:
            # `get_nowait` will only work together with sleep(0) here
            # because we need greenlet switch to reraise exception from `do_process`.
            sleep()
            try:
                item = self.queue.get_nowait()
            except Empty:
                if not forever:
                    self.graceful_stop()
                sleep(0.01)
                continue
            t = self._handler_pool.spawn(self.do_process, item)
            t.link(reraise_errors, greenthread.getcurrent())

    def stop(self):
        self.closed = True

    def graceful_stop(self, timeout=None):
        """Stops crawler and waits for all already started crawling requests to finish.

        If `timeout` is supplied, it waits for at most `timeout` time to finish
            and returns True if allocated time was enough.
            Returns False if `timeout` was not enough.
        """
        self.closed = True
        if timeout is not None:
            with eventlet.Timeout(timeout, False):
                if hasattr(self, "_queue_updater_thread"):
                    self._queue_updater_thread.kill()
                self._handler_pool.waitall()
                return True
            return False
        else:
            if hasattr(self, "_queue_updater_thread"):
                self._queue_updater_thread.kill()
            self._handler_pool.waitall()

    def start_queue_updater(self):
        self._queue_updater_thread = spawn(self.queue_updater)
        self._queue_updater_thread.link(reraise_errors, greenthread.getcurrent())

    def queue_updater(self):
        log.debug("Waiting for crawl jobs on stdin.")
        for line in sys.stdin:
            if self.closed:
                break

            line = line.strip()

            if self.input_is_plain:
                job = {"url": line}
            else:
                try:
                    job = json.loads(line)
                except ValueError:
                    log.error(u"Decoding input line: %s", line)
                    continue

            # extend worker queue
            # 1. skip duplicate URLs
            for queue_item in self.queue.queue:
                if queue_item["url"] == job["url"]:  # compare URLs
                    break
            else:
                # 2. extend queue with new items
                # May block here, when queue is full. This is a feature.
                self.queue.put(job)

        # Stdin exhausted -> stop.
        while not self.queue.empty():
            sleep(0.01)

        sleep(2)  # FIXME: Crutch to prevent stopping too early.

        self.graceful_stop()

    def get_robots_checker(self, scheme, authority):
        """PoolMap func :: scheme, authority -> (agent, uri -> bool)."""
        robots_uri = "%s://%s/robots.txt" % (scheme, authority)

        fetch_result = self.io_worker.fetch(robots_uri)
        # Graceful stop thing.
        if fetch_result is None:
            return None

        if fetch_result["success"]:
            # TODO: set expiration time from headers
            # but this must be done after `self._robots_cache.put` or somehow else...
            if 200 <= fetch_result["status_code"] < 300:
                parser = robotparser.RobotFileParser()
                content_lines = fetch_result["content"].splitlines()
                try:
                    parser.parse(content_lines)
                except KeyError:
                    raise RobotsError(u"Known robotparser bug: KeyError at urllib.quote(path).")
                return parser.can_fetch
            # Authorization required and Forbidden are considered Disallow all.
            elif fetch_result["status_code"] in (401, 403):
                return lambda _agent, _uri: False
            # /robots.txt Not Found is considered Allow all.
            elif fetch_result["status_code"] == 404:
                return lambda _agent, _uri: True
            # FIXME: this is an optimistic rule and probably should be detailed with more specific checks
            elif fetch_result["status_code"] >= 400:
                return lambda _agent, _uri: True
            # What other cases left? 100 and redirects. Consider it Disallow all.
            else:
                return lambda _agent, _uri: False
        else:
            raise FetchError(u"/robots.txt fetch problem: %s" % (fetch_result["result"]))

    def ask_robots(self, uri, scheme, authority):
        key = scheme + ":" + authority
        with self._robots_cache.getc(key, scheme, authority) as checker:
            try:
                # Graceful stop thing.
                if checker is None:
                    return None
                return checker(settings.identity["name"], uri)
            except Exception, e:
                log.exception(u"Get rid of this. ask_robots @ %s", uri)
                raise RobotsError(u"Error checking robots.txt permissions for URI '%s': %s" % (uri, unicode(e)))
Ejemplo n.º 49
0
class AccountReaper(Daemon):
    """
    Removes data from status=DELETED accounts. These are accounts that have
    been asked to be removed by the reseller via services
    remove_storage_account XMLRPC call.

    The account is not deleted immediately by the services call, but instead
    the account is simply marked for deletion by setting the status column in
    the account_stat table of the account database. This account reaper scans
    for such accounts and removes the data in the background. The background
    deletion process will occur on the primary account server for the account.

    :param server_conf: The [account-server] dictionary of the account server
                        configuration file
    :param reaper_conf: The [account-reaper] dictionary of the account server
                        configuration file

    See the etc/account-server.conf-sample for information on the possible
    configuration parameters.
    """

    def __init__(self, conf, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(conf, log_route='account-reaper')
        self.devices = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.interval = int(conf.get('interval', 3600))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.account_ring = None
        self.container_ring = None
        self.object_ring = None
        self.node_timeout = int(conf.get('node_timeout', 10))
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.myips = whataremyips()
        self.concurrency = int(conf.get('concurrency', 25))
        self.container_concurrency = self.object_concurrency = \
            sqrt(self.concurrency)
        self.container_pool = GreenPool(size=self.container_concurrency)
        swift.common.db.DB_PREALLOCATION = \
            config_true_value(conf.get('db_preallocation', 'f'))
        self.delay_reaping = int(conf.get('delay_reaping') or 0)
        reap_warn_after = float(conf.get('reap_warn_after') or 86400 * 30)
        self.reap_not_done_after = reap_warn_after + self.delay_reaping

    def get_account_ring(self):
        """The account :class:`swift.common.ring.Ring` for the cluster."""
        if not self.account_ring:
            self.account_ring = Ring(self.swift_dir, ring_name='account')
        return self.account_ring

    def get_container_ring(self):
        """The container :class:`swift.common.ring.Ring` for the cluster."""
        if not self.container_ring:
            self.container_ring = Ring(self.swift_dir, ring_name='container')
        return self.container_ring

    def get_object_ring(self, policy_idx):
        """
        Get the ring identified by the policy index

        :param policy_idx: Storage policy index
        :returns: A ring matching the storage policy
        """
        return POLICIES.get_object_ring(policy_idx, self.swift_dir)

    def run_forever(self, *args, **kwargs):
        """Main entry point when running the reaper in normal daemon mode.

        This repeatedly calls :func:`reap_once` no quicker than the
        configuration interval.
        """
        self.logger.debug('Daemon started.')
        sleep(random.random() * self.interval)
        while True:
            begin = time()
            self.run_once()
            elapsed = time() - begin
            if elapsed < self.interval:
                sleep(self.interval - elapsed)

    def run_once(self, *args, **kwargs):
        """
        Main entry point when running the reaper in 'once' mode, where it will
        do a single pass over all accounts on the server. This is called
        repeatedly by :func:`run_forever`. This will call :func:`reap_device`
        once for each device on the server.
        """
        self.logger.debug('Begin devices pass: %s', self.devices)
        begin = time()
        try:
            for device in os.listdir(self.devices):
                if self.mount_check and not ismount(
                        os.path.join(self.devices, device)):
                    self.logger.increment('errors')
                    self.logger.debug(
                        _('Skipping %s as it is not mounted'), device)
                    continue
                self.reap_device(device)
        except (Exception, Timeout):
            self.logger.exception(_("Exception in top-level account reaper "
                                    "loop"))
        elapsed = time() - begin
        self.logger.info(_('Devices pass completed: %.02fs'), elapsed)

    def reap_device(self, device):
        """
        Called once per pass for each device on the server. This will scan the
        accounts directory for the device, looking for partitions this device
        is the primary for, then looking for account databases that are marked
        status=DELETED and still have containers and calling
        :func:`reap_account`. Account databases marked status=DELETED that no
        longer have containers will eventually be permanently removed by the
        reclaim process within the account replicator (see
        :mod:`swift.db_replicator`).

        :param device: The device to look for accounts to be deleted.
        """
        datadir = os.path.join(self.devices, device, DATADIR)
        if not os.path.exists(datadir):
            return
        for partition in os.listdir(datadir):
            partition_path = os.path.join(datadir, partition)
            if not partition.isdigit():
                continue
            nodes = self.get_account_ring().get_part_nodes(int(partition))
            if nodes[0]['ip'] not in self.myips or \
                    not os.path.isdir(partition_path):
                continue
            for suffix in os.listdir(partition_path):
                suffix_path = os.path.join(partition_path, suffix)
                if not os.path.isdir(suffix_path):
                    continue
                for hsh in os.listdir(suffix_path):
                    hsh_path = os.path.join(suffix_path, hsh)
                    if not os.path.isdir(hsh_path):
                        continue
                    for fname in sorted(os.listdir(hsh_path), reverse=True):
                        if fname.endswith('.ts'):
                            break
                        elif fname.endswith('.db'):
                            self.start_time = time()
                            broker = \
                                AccountBroker(os.path.join(hsh_path, fname))
                            if broker.is_status_deleted() and \
                                    not broker.empty():
                                self.reap_account(broker, partition, nodes)

    def reset_stats(self):
        self.stats_return_codes = {}
        self.stats_containers_deleted = 0
        self.stats_objects_deleted = 0
        self.stats_containers_remaining = 0
        self.stats_objects_remaining = 0
        self.stats_containers_possibly_remaining = 0
        self.stats_objects_possibly_remaining = 0

    def reap_account(self, broker, partition, nodes):
        """
        Called once per pass for each account this server is the primary for
        and attempts to delete the data for the given account. The reaper will
        only delete one account at any given time. It will call
        :func:`reap_container` up to sqrt(self.concurrency) times concurrently
        while reaping the account.

        If there is any exception while deleting a single container, the
        process will continue for any other containers and the failed
        containers will be tried again the next time this function is called
        with the same parameters.

        If there is any exception while listing the containers for deletion,
        the process will stop (but will obviously be tried again the next time
        this function is called with the same parameters). This isn't likely
        since the listing comes from the local database.

        After the process completes (successfully or not) statistics about what
        was accomplished will be logged.

        This function returns nothing and should raise no exception but only
        update various self.stats_* values for what occurs.

        :param broker: The AccountBroker for the account to delete.
        :param partition: The partition in the account ring the account is on.
        :param nodes: The primary node dicts for the account to delete.

        .. seealso::

            :class:`swift.account.backend.AccountBroker` for the broker class.

        .. seealso::

            :func:`swift.common.ring.Ring.get_nodes` for a description
            of the node dicts.
        """
        begin = time()
        info = broker.get_info()
        if time() - float(Timestamp(info['delete_timestamp'])) <= \
                self.delay_reaping:
            return False
        account = info['account']
        self.logger.info(_('Beginning pass on account %s'), account)
        self.reset_stats()
        try:
            marker = ''
            while True:
                containers = \
                    list(broker.list_containers_iter(1000, marker, None, None,
                                                     None))
                if not containers:
                    break
                try:
                    for (container, _junk, _junk, _junk) in containers:
                        self.container_pool.spawn(self.reap_container, account,
                                                  partition, nodes, container)
                    self.container_pool.waitall()
                except (Exception, Timeout):
                    self.logger.exception(
                        _('Exception with containers for account %s'), account)
                marker = containers[-1][0]
                if marker == '':
                    break
            log = 'Completed pass on account %s' % account
        except (Exception, Timeout):
            self.logger.exception(
                _('Exception with account %s'), account)
            log = _('Incomplete pass on account %s') % account
        if self.stats_containers_deleted:
            log += _(', %s containers deleted') % self.stats_containers_deleted
        if self.stats_objects_deleted:
            log += _(', %s objects deleted') % self.stats_objects_deleted
        if self.stats_containers_remaining:
            log += _(', %s containers remaining') % \
                self.stats_containers_remaining
        if self.stats_objects_remaining:
            log += _(', %s objects remaining') % self.stats_objects_remaining
        if self.stats_containers_possibly_remaining:
            log += _(', %s containers possibly remaining') % \
                self.stats_containers_possibly_remaining
        if self.stats_objects_possibly_remaining:
            log += _(', %s objects possibly remaining') % \
                self.stats_objects_possibly_remaining
        if self.stats_return_codes:
            log += _(', return codes: ')
            for code in sorted(self.stats_return_codes):
                log += '%s %sxxs, ' % (self.stats_return_codes[code], code)
            log = log[:-2]
        log += _(', elapsed: %.02fs') % (time() - begin)
        self.logger.info(log)
        self.logger.timing_since('timing', self.start_time)
        delete_timestamp = Timestamp(info['delete_timestamp'])
        if self.stats_containers_remaining and \
           begin - float(delete_timestamp) >= self.reap_not_done_after:
            self.logger.warn(_('Account %s has not been reaped since %s') %
                             (account, delete_timestamp.isoformat))
        return True

    def reap_container(self, account, account_partition, account_nodes,
                       container):
        """
        Deletes the data and the container itself for the given container. This
        will call :func:`reap_object` up to sqrt(self.concurrency) times
        concurrently for the objects in the container.

        If there is any exception while deleting a single object, the process
        will continue for any other objects in the container and the failed
        objects will be tried again the next time this function is called with
        the same parameters.

        If there is any exception while listing the objects for deletion, the
        process will stop (but will obviously be tried again the next time this
        function is called with the same parameters). This is a possibility
        since the listing comes from querying just the primary remote container
        server.

        Once all objects have been attempted to be deleted, the container
        itself will be attempted to be deleted by sending a delete request to
        all container nodes. The format of the delete request is such that each
        container server will update a corresponding account server, removing
        the container from the account's listing.

        This function returns nothing and should raise no exception but only
        update various self.stats_* values for what occurs.

        :param account: The name of the account for the container.
        :param account_partition: The partition for the account on the account
                                  ring.
        :param account_nodes: The primary node dicts for the account.
        :param container: The name of the container to delete.

        * See also: :func:`swift.common.ring.Ring.get_nodes` for a description
          of the account node dicts.
        """
        account_nodes = list(account_nodes)
        part, nodes = self.get_container_ring().get_nodes(account, container)
        node = nodes[-1]
        pool = GreenPool(size=self.object_concurrency)
        marker = ''
        while True:
            objects = None
            try:
                headers, objects = direct_get_container(
                    node, part, account, container,
                    marker=marker,
                    conn_timeout=self.conn_timeout,
                    response_timeout=self.node_timeout)
                self.stats_return_codes[2] = \
                    self.stats_return_codes.get(2, 0) + 1
                self.logger.increment('return_codes.2')
            except ClientException as err:
                if self.logger.getEffectiveLevel() <= DEBUG:
                    self.logger.exception(
                        _('Exception with %(ip)s:%(port)s/%(device)s'), node)
                self.stats_return_codes[err.http_status / 100] = \
                    self.stats_return_codes.get(err.http_status / 100, 0) + 1
                self.logger.increment(
                    'return_codes.%d' % (err.http_status / 100,))
            if not objects:
                break
            try:
                policy_index = headers.get('X-Backend-Storage-Policy-Index', 0)
                for obj in objects:
                    if isinstance(obj['name'], unicode):
                        obj['name'] = obj['name'].encode('utf8')
                    pool.spawn(self.reap_object, account, container, part,
                               nodes, obj['name'], policy_index)
                pool.waitall()
            except (Exception, Timeout):
                self.logger.exception(_('Exception with objects for container '
                                        '%(container)s for account %(account)s'
                                        ),
                                      {'container': container,
                                       'account': account})
            marker = objects[-1]['name']
            if marker == '':
                break
        successes = 0
        failures = 0
        for node in nodes:
            anode = account_nodes.pop()
            try:
                direct_delete_container(
                    node, part, account, container,
                    conn_timeout=self.conn_timeout,
                    response_timeout=self.node_timeout,
                    headers={'X-Account-Host': '%(ip)s:%(port)s' % anode,
                             'X-Account-Partition': str(account_partition),
                             'X-Account-Device': anode['device'],
                             'X-Account-Override-Deleted': 'yes'})
                successes += 1
                self.stats_return_codes[2] = \
                    self.stats_return_codes.get(2, 0) + 1
                self.logger.increment('return_codes.2')
            except ClientException as err:
                if self.logger.getEffectiveLevel() <= DEBUG:
                    self.logger.exception(
                        _('Exception with %(ip)s:%(port)s/%(device)s'), node)
                failures += 1
                self.logger.increment('containers_failures')
                self.stats_return_codes[err.http_status / 100] = \
                    self.stats_return_codes.get(err.http_status / 100, 0) + 1
                self.logger.increment(
                    'return_codes.%d' % (err.http_status / 100,))
        if successes > failures:
            self.stats_containers_deleted += 1
            self.logger.increment('containers_deleted')
        elif not successes:
            self.stats_containers_remaining += 1
            self.logger.increment('containers_remaining')
        else:
            self.stats_containers_possibly_remaining += 1
            self.logger.increment('containers_possibly_remaining')

    def reap_object(self, account, container, container_partition,
                    container_nodes, obj, policy_index):
        """
        Deletes the given object by issuing a delete request to each node for
        the object. The format of the delete request is such that each object
        server will update a corresponding container server, removing the
        object from the container's listing.

        This function returns nothing and should raise no exception but only
        update various self.stats_* values for what occurs.

        :param account: The name of the account for the object.
        :param container: The name of the container for the object.
        :param container_partition: The partition for the container on the
                                    container ring.
        :param container_nodes: The primary node dicts for the container.
        :param obj: The name of the object to delete.
        :param policy_index: The storage policy index of the object's container

        * See also: :func:`swift.common.ring.Ring.get_nodes` for a description
          of the container node dicts.
        """
        container_nodes = list(container_nodes)
        ring = self.get_object_ring(policy_index)
        part, nodes = ring.get_nodes(account, container, obj)
        successes = 0
        failures = 0
        for node in nodes:
            cnode = container_nodes.pop()
            try:
                direct_delete_object(
                    node, part, account, container, obj,
                    conn_timeout=self.conn_timeout,
                    response_timeout=self.node_timeout,
                    headers={'X-Container-Host': '%(ip)s:%(port)s' % cnode,
                             'X-Container-Partition': str(container_partition),
                             'X-Container-Device': cnode['device'],
                             'X-Backend-Storage-Policy-Index': policy_index})
                successes += 1
                self.stats_return_codes[2] = \
                    self.stats_return_codes.get(2, 0) + 1
                self.logger.increment('return_codes.2')
            except ClientException as err:
                if self.logger.getEffectiveLevel() <= DEBUG:
                    self.logger.exception(
                        _('Exception with %(ip)s:%(port)s/%(device)s'), node)
                failures += 1
                self.logger.increment('objects_failures')
                self.stats_return_codes[err.http_status / 100] = \
                    self.stats_return_codes.get(err.http_status / 100, 0) + 1
                self.logger.increment(
                    'return_codes.%d' % (err.http_status / 100,))
            if successes > failures:
                self.stats_objects_deleted += 1
                self.logger.increment('objects_deleted')
            elif not successes:
                self.stats_objects_remaining += 1
                self.logger.increment('objects_remaining')
            else:
                self.stats_objects_possibly_remaining += 1
                self.logger.increment('objects_possibly_remaining')
Ejemplo n.º 50
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """

    def __init__(self, conf):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = get_logger(conf, log_route='object-replicator')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.vm_test_mode = config_true_value(conf.get('vm_test_mode', 'no'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.port = int(conf.get('bind_port', 6000))
        self.concurrency = int(conf.get('concurrency', 1))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.object_ring = Ring(self.swift_dir, ring_name='object')
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
        self.partition_times = []
        self.run_pause = int(conf.get('run_pause', 30))
        self.rsync_timeout = int(conf.get('rsync_timeout', 900))
        self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
        self.rsync_bwlimit = conf.get('rsync_bwlimit', '0')
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.sync_method = getattr(self, conf.get('sync_method') or 'rsync')
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.headers = {
            'Content-Length': '0',
            'user-agent': 'obj-replicator %s' % os.getpid()}
        self.rsync_error_log_line_length = \
            int(conf.get('rsync_error_log_line_length', 0))
        self.handoffs_first = config_true_value(conf.get('handoffs_first',
                                                         False))
        self.handoff_delete = config_auto_int_value(
            conf.get('handoff_delete', 'auto'), 0)
        self._diskfile_mgr = DiskFileManager(conf, self.logger)

    def sync(self, node, job, suffixes):  # Just exists for doc anchor point
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean indicating success or failure
        """
        return self.sync_method(node, job, suffixes)

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        ret_val = None
        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(_("Killing long-running rsync: %s"), str(args))
            proc.kill()
            return 1  # failure response code
        total_time = time.time() - start_time
        for result in results.split('\n'):
            if result == '':
                continue
            if result.startswith('cd+'):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            error_line = _('Bad rsync return code: %(ret)d <- %(args)s') % \
                {'args': str(args), 'ret': ret_val}
            if self.rsync_error_log_line_length:
                error_line = error_line[:self.rsync_error_log_line_length]
            self.logger.error(error_line)
        elif results:
            self.logger.info(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {'src': args[-2], 'dst': args[-1], 'time': total_time})
        else:
            self.logger.debug(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {'src': args[-2], 'dst': args[-1], 'time': total_time})
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Uses rsync to implement the sync method. This was the first
        sync method in Swift.
        """
        if not os.path.exists(job['path']):
            return False
        args = [
            'rsync',
            '--recursive',
            '--whole-file',
            '--human-readable',
            '--xattrs',
            '--itemize-changes',
            '--ignore-existing',
            '--timeout=%s' % self.rsync_io_timeout,
            '--contimeout=%s' % self.rsync_io_timeout,
            '--bwlimit=%s' % self.rsync_bwlimit,
        ]
        node_ip = rsync_ip(node['replication_ip'])
        if self.vm_test_mode:
            rsync_module = '%s::object%s' % (node_ip, node['replication_port'])
        else:
            rsync_module = '%s::object' % node_ip
        had_any = False
        for suffix in suffixes:
            spath = join(job['path'], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False
        args.append(join(rsync_module, node['device'],
                    'objects', job['partition']))
        return self._rsync(args) == 0

    def ssync(self, node, job, suffixes):
        return ssync_sender.Sender(self, node, job, suffixes)()

    def check_ring(self):
        """
        Check to see if the ring has been updated

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if self.object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """

        def tpool_get_suffixes(path):
            return [suff for suff in os.listdir(path)
                    if len(suff) == 3 and isdir(join(path, suff))]
        self.replication_count += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'],))
        begin = time.time()
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            if suffixes:
                for node in job['nodes']:
                    success = self.sync(node, job, suffixes)
                    if success:
                        with Timeout(self.http_timeout):
                            conn = http_connect(
                                node['replication_ip'],
                                node['replication_port'],
                                node['device'], job['partition'], 'REPLICATE',
                                '/' + '-'.join(suffixes), headers=self.headers)
                            conn.getresponse().read()
                    responses.append(success)
            if self.handoff_delete:
                # delete handoff if we have had handoff_delete successes
                delete_handoff = len([resp for resp in responses if resp]) >= \
                    self.handoff_delete
            else:
                # delete handoff if all syncs were successful
                delete_handoff = len(responses) == len(job['nodes']) and \
                    all(responses)
            if not suffixes or delete_handoff:
                self.logger.info(_("Removing partition: %s"), job['path'])
                tpool.execute(shutil.rmtree, job['path'], ignore_errors=True)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.delete.timing', begin)

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'],))
        begin = time.time()
        try:
        #MODIFIED LightSync
            local_hash = None
            part_hash_local = tpool_reraise(get_part_hash, job['path'])  
            #Partition has been modified
            if part_hash_local is None:
                hashed, local_hash = tpool_reraise(
                    get_hashes, job['path'],
                    do_listdir=(self.replication_count % 10) == 0,
                    reclaim_age=self.reclaim_age)
                self.suffix_hash += hashed
                self.logger.update_stats('suffix.hashes', hashed)  

                part_hash_local = tpool_reraise(get_part_hash, job['path'])               
            """hashed, local_hash = tpool_reraise(
                get_hashes, job['path'],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age)
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)"""
            attempts_left = True
            nodes = itertools.chain(job['nodes'])
            while (True):
        ##
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                try:
                #MODIFIED LightSync
                    req_suff = '' if part_hash_local is None else '/_SHORTREP_-'\
                               +part_hash_local
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            req_suff, headers=self.headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(_('%(ip)s/%(device)s responded'
                                                ' as unmounted'), node)
                            if(attempts_left):
                                attempts_left = False
                                ########To modify to start from current node's hand-off: Hash node info to get hand-off position
                                nodes = itertools.chain(
                                    self.object_ring.get_more_nodes(int(job['partition'])),
                                    nodes)
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(_("Invalid response %(resp)s "
                                                "from %(ip)s"),
                                                {'resp': resp.status,
                                                'ip': node['replication_ip']})
                            continue
                        part_hash_remote = pickle.loads(resp.read())
                        del resp
                    if part_hash_remote == "OK":
                        break
                    remote_hash = part_hash_remote
                    if local_hash is None:
                        hashed, local_hash = tpool_reraise(
                            get_hashes, job['path'],
                            do_listdir=(self.replication_count % 10) == 0,
                            reclaim_age=self.reclaim_age)
                        self.suffix_hash += hashed
                        self.logger.update_stats('suffix.hashes', hashed)
                    '''
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '', headers=self.headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(_('%(ip)s/%(device)s responded'
                                                ' as unmounted'), node)
                            attempts_left += 1
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(_("Invalid response %(resp)s "
                                                "from %(ip)s"),
                                              {'resp': resp.status,
                                               'ip': node['replication_ip']})
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    '''
                ##
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    if not suffixes:
                #MODIFIED LightSync
                        break
                ##
                    hashed, recalc_hash = tpool_reraise(
                        get_hashes,
                        job['path'], recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '/' + '-'.join(suffixes),
                            headers=self.headers)
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                #MODIFIED LightSync
                    break
                ##
                except (Exception, Timeout):
                    self.logger.exception(_("Error syncing with node: %s") %
                                          node)
#MODIFIED LightSync  (after if)
            self.suffix_count += len(local_hash) if local_hash is not None else 0
##
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        if self.replication_count:
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.replication_count / elapsed
            self.logger.info(
                _("%(replicated)d/%(total)d (%(percentage).2f%%)"
                  " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                  "%(remaining)s remaining)"),
                {'replicated': self.replication_count, 'total': self.job_count,
                 'percentage': self.replication_count * 100.0 / self.job_count,
                 'time': time.time() - self.start, 'rate': rate,
                 'remaining': '%d%s' % compute_eta(self.start,
                                                   self.replication_count,
                                                   self.job_count)})
            if self.suffix_count:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"),
                    {'checked': self.suffix_count,
                     'hashed': (self.suffix_hash * 100.0) / self.suffix_count,
                     'synced': (self.suffix_sync * 100.0) / self.suffix_count})
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"),
                    {'max': self.partition_times[-1],
                     'min': self.partition_times[0],
                     'med': self.partition_times[
                         len(self.partition_times) // 2]})
        else:
            self.logger.info(
                _("Nothing replicated for %s seconds."),
                (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the replicator finishes its replication
        pass in some eventuality.
        """
        while True:
            eventlet.sleep(self.lockup_timeout)
            if self.replication_count == self.last_replication_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_replication_count = self.replication_count

    def collect_jobs(self):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be synced.
        """
        jobs = []
        ips = whataremyips()
        for local_dev in [dev for dev in self.object_ring.devs
                          if dev and dev['replication_ip'] in ips and
                          dev['replication_port'] == self.port]:
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, 'objects')
            tmp_path = join(dev_path, 'tmp')
            if self.mount_check and not ismount(dev_path):
                self.logger.warn(_('%s is not mounted'), local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                try:
                    job_path = join(obj_path, partition)
                    if isfile(job_path):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning('Removing partition directory '
                                            'which was a file: %s', job_path)
                        os.remove(job_path)
                        continue
                    part_nodes = \
                        self.object_ring.get_part_nodes(int(partition))
		#MODIFIED LightSync
                    for mypos in range(len(part_nodes)):
                        if part_nodes[mypos]['id'] == local_dev['id']:
                            break
                    nodes = part_nodes[mypos+1:]+part_nodes[:mypos]
		##
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             partition=partition))
                except (ValueError, OSError):
                    continue
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff parts to the front of the list
            jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs

    def replicate(self, override_devices=None, override_partitions=None):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.partition_times = []

        if override_devices is None:
            override_devices = []
        if override_partitions is None:
            override_partitions = []

        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle

        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs()
            for job in jobs:
                if override_devices and job['device'] not in override_devices:
                    continue
                if override_partitions and \
                        job['partition'] not in override_partitions:
                    continue
                dev_path = join(self.devices_dir, job['device'])
                if self.mount_check and not ismount(dev_path):
                    self.logger.warn(_('%s is not mounted'), job['device'])
                    continue
                if not self.check_ring():
                    self.logger.info(_("Ring change detected. Aborting "
                                       "current replication pass."))
                    return
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()

    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object replicator in script mode."))
        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = list_from_csv(kwargs.get('partitions'))
        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions)
        total = (time.time() - start) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            dump_recon_cache({'object_replication_time': total,
                              'object_replication_last': time.time()},
                             self.rcache, self.logger)

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object replicator in daemon mode."))
        # Run the replicator continually
        while True:
            start = time.time()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            self.replicate()
            total = (time.time() - start) / 60
            self.logger.info(
                _("Object replication complete. (%.02f minutes)"), total)
            dump_recon_cache({'object_replication_time': total,
                              'object_replication_last': time.time()},
                             self.rcache, self.logger)
            self.logger.debug('Replication sleeping for %s seconds.',
                              self.run_pause)
            sleep(self.run_pause)
Ejemplo n.º 51
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """

    def __init__(self, conf, logger=None):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = logger or get_logger(conf, log_route='object-replicator')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.bind_ip = conf.get('bind_ip', '0.0.0.0')
        self.servers_per_port = int(conf.get('servers_per_port', '0') or 0)
        self.port = None if self.servers_per_port else \
            int(conf.get('bind_port', 6200))
        self.concurrency = int(conf.get('concurrency', 1))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.replication_cycle = random.randint(0, 9)
        self.partition_times = []
        self.interval = int(conf.get('interval') or
                            conf.get('run_pause') or 30)
        self.rsync_timeout = int(conf.get('rsync_timeout',
                                          DEFAULT_RSYNC_TIMEOUT))
        self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
        self.rsync_bwlimit = conf.get('rsync_bwlimit', '0')
        self.rsync_compress = config_true_value(
            conf.get('rsync_compress', 'no'))
        self.rsync_module = conf.get('rsync_module', '').rstrip('/')
        if not self.rsync_module:
            self.rsync_module = '{replication_ip}::object'
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.sync_method = getattr(self, conf.get('sync_method') or 'rsync')
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.default_headers = {
            'Content-Length': '0',
            'user-agent': 'object-replicator %s' % os.getpid()}
        self.rsync_error_log_line_length = \
            int(conf.get('rsync_error_log_line_length', 0))
        self.handoffs_first = config_true_value(conf.get('handoffs_first',
                                                         False))
        self.handoff_delete = config_auto_int_value(
            conf.get('handoff_delete', 'auto'), 0)
        if any((self.handoff_delete, self.handoffs_first)):
            self.logger.warning('Handoff only mode is not intended for normal '
                                'operation, please disable handoffs_first and '
                                'handoff_delete before the next '
                                'normal rebalance')
        self._df_router = DiskFileRouter(conf, self.logger)

    def _zero_stats(self):
        """Zero out the stats."""
        self.stats = {'attempted': 0, 'success': 0, 'failure': 0,
                      'hashmatch': 0, 'rsync': 0, 'remove': 0,
                      'start': time.time(), 'failure_nodes': {}}

    def _add_failure_stats(self, failure_devs_info):
        for node, dev in failure_devs_info:
            self.stats['failure'] += 1
            failure_devs = self.stats['failure_nodes'].setdefault(node, {})
            failure_devs.setdefault(dev, 0)
            failure_devs[dev] += 1

    def _get_my_replication_ips(self):
        my_replication_ips = set()
        ips = whataremyips()
        for policy in POLICIES:
            self.load_object_ring(policy)
            for local_dev in [dev for dev in policy.object_ring.devs
                              if dev and dev['replication_ip'] in ips and
                              dev['replication_port'] == self.port]:
                my_replication_ips.add(local_dev['replication_ip'])
        return list(my_replication_ips)

    # Just exists for doc anchor point
    def sync(self, node, job, suffixes, *args, **kwargs):
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean and dictionary, boolean indicating success or failure
        """
        return self.sync_method(node, job, suffixes, *args, **kwargs)

    def load_object_ring(self, policy):
        """
        Make sure the policy's rings are loaded.

        :param policy: the StoragePolicy instance
        :returns: appropriate ring object
        """
        policy.load_ring(self.swift_dir)
        return policy.object_ring

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        ret_val = None
        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(_("Killing long-running rsync: %s"), str(args))
            proc.kill()
            return 1  # failure response code
        total_time = time.time() - start_time
        for result in results.split('\n'):
            if result == '':
                continue
            if result.startswith('cd+'):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            error_line = _('Bad rsync return code: %(ret)d <- %(args)s') % \
                {'args': str(args), 'ret': ret_val}
            if self.rsync_error_log_line_length:
                error_line = error_line[:self.rsync_error_log_line_length]
            self.logger.error(error_line)
        else:
            log_method = self.logger.info if results else self.logger.debug
            log_method(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"),
                {'src': args[-2], 'dst': args[-1], 'time': total_time})
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Uses rsync to implement the sync method. This was the first
        sync method in Swift.
        """
        if not os.path.exists(job['path']):
            return False, {}
        args = [
            'rsync',
            '--recursive',
            '--whole-file',
            '--human-readable',
            '--xattrs',
            '--itemize-changes',
            '--ignore-existing',
            '--timeout=%s' % self.rsync_io_timeout,
            '--contimeout=%s' % self.rsync_io_timeout,
            '--bwlimit=%s' % self.rsync_bwlimit,
            '--exclude=.*.%s' % ''.join('[0-9a-zA-Z]' for i in range(6))
        ]
        if self.rsync_compress and \
                job['region'] != node['region']:
            # Allow for compression, but only if the remote node is in
            # a different region than the local one.
            args.append('--compress')
        rsync_module = rsync_module_interpolation(self.rsync_module, node)
        had_any = False
        for suffix in suffixes:
            spath = join(job['path'], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False, {}
        data_dir = get_data_dir(job['policy'])
        args.append(join(rsync_module, node['device'],
                    data_dir, job['partition']))
        return self._rsync(args) == 0, {}

    def ssync(self, node, job, suffixes, remote_check_objs=None):
        return ssync_sender.Sender(
            self, node, job, suffixes, remote_check_objs)()

    def check_ring(self, object_ring):
        """
        Check to see if the ring has been updated
        :param object_ring: the ring to check

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """

        def tpool_get_suffixes(path):
            return [suff for suff in os.listdir(path)
                    if len(suff) == 3 and isdir(join(path, suff))]
        self.replication_count += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'],))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        failure_devs_info = set()
        begin = time.time()
        handoff_partition_deleted = False
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            synced_remote_regions = {}
            delete_objs = None
            if suffixes:
                for node in job['nodes']:
                    self.stats['rsync'] += 1
                    kwargs = {}
                    if node['region'] in synced_remote_regions and \
                            self.conf.get('sync_method', 'rsync') == 'ssync':
                        kwargs['remote_check_objs'] = \
                            synced_remote_regions[node['region']]
                    # candidates is a dict(hash=>timestamp) of objects
                    # for deletion
                    success, candidates = self.sync(
                        node, job, suffixes, **kwargs)
                    if success:
                        with Timeout(self.http_timeout):
                            conn = http_connect(
                                node['replication_ip'],
                                node['replication_port'],
                                node['device'], job['partition'], 'REPLICATE',
                                '/' + '-'.join(suffixes), headers=headers)
                            conn.getresponse().read()
                        if node['region'] != job['region']:
                            synced_remote_regions[node['region']] = viewkeys(
                                candidates)
                    else:
                        failure_devs_info.add((node['replication_ip'],
                                               node['device']))
                    responses.append(success)
                for cand_objs in synced_remote_regions.values():
                    if delete_objs is None:
                        delete_objs = cand_objs
                    else:
                        delete_objs = delete_objs & cand_objs

            if self.handoff_delete:
                # delete handoff if we have had handoff_delete successes
                delete_handoff = len([resp for resp in responses if resp]) >= \
                    self.handoff_delete
            else:
                # delete handoff if all syncs were successful
                delete_handoff = len(responses) == len(job['nodes']) and \
                    all(responses)
            if delete_handoff:
                self.stats['remove'] += 1
                if (self.conf.get('sync_method', 'rsync') == 'ssync' and
                        delete_objs is not None):
                    self.logger.info(_("Removing %s objects"),
                                     len(delete_objs))
                    _junk, error_paths = self.delete_handoff_objs(
                        job, delete_objs)
                    # if replication works for a hand-off device and it failed,
                    # the remote devices which are target of the replication
                    # from the hand-off device will be marked. Because cleanup
                    # after replication failed means replicator needs to
                    # replicate again with the same info.
                    if error_paths:
                        failure_devs_info.update(
                            [(failure_dev['replication_ip'],
                              failure_dev['device'])
                             for failure_dev in job['nodes']])
                else:
                    self.delete_partition(job['path'])
                    handoff_partition_deleted = True
            elif not suffixes:
                self.delete_partition(job['path'])
                handoff_partition_deleted = True
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
            self._add_failure_stats(failure_devs_info)
        finally:
            target_devs_info = set([(target_dev['replication_ip'],
                                     target_dev['device'])
                                    for target_dev in job['nodes']])
            self.stats['success'] += len(target_devs_info - failure_devs_info)
            if not handoff_partition_deleted:
                self.handoffs_remaining += 1
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.delete.timing', begin)

    def delete_partition(self, path):
        self.logger.info(_("Removing partition: %s"), path)
        tpool.execute(shutil.rmtree, path)

    def delete_handoff_objs(self, job, delete_objs):
        success_paths = []
        error_paths = []
        for object_hash in delete_objs:
            object_path = storage_directory(job['obj_path'], job['partition'],
                                            object_hash)
            tpool.execute(shutil.rmtree, object_path, ignore_errors=True)
            suffix_dir = dirname(object_path)
            try:
                os.rmdir(suffix_dir)
                success_paths.append(object_path)
            except OSError as e:
                if e.errno not in (errno.ENOENT, errno.ENOTEMPTY):
                    error_paths.append(object_path)
                    self.logger.exception(
                        "Unexpected error trying to cleanup suffix dir:%r",
                        suffix_dir)
        return success_paths, error_paths

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'],))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        target_devs_info = set()
        failure_devs_info = set()
        begin = time.time()
        df_mgr = self._df_router[job['policy']]
        try:
            hashed, local_hash = tpool_reraise(
                df_mgr._get_hashes, job['device'],
                job['partition'], job['policy'],
                do_listdir=_do_listdir(
                    int(job['partition']),
                    self.replication_cycle))
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            synced_remote_regions = set()
            random.shuffle(job['nodes'])
            nodes = itertools.chain(
                job['nodes'],
                job['policy'].object_ring.get_more_nodes(
                    int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIteration it will be caught way below
                node = next(nodes)
                target_devs_info.add((node['replication_ip'], node['device']))
                attempts_left -= 1
                # if we have already synced to this remote region,
                # don't sync again on this replication pass
                if node['region'] in synced_remote_regions:
                    continue
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '', headers=headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(
                                _('%(replication_ip)s/%(device)s '
                                  'responded as unmounted'), node)
                            attempts_left += 1
                            failure_devs_info.add((node['replication_ip'],
                                                   node['device']))
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(_("Invalid response %(resp)s "
                                                "from %(ip)s"),
                                              {'resp': resp.status,
                                               'ip': node['replication_ip']})
                            failure_devs_info.add((node['replication_ip'],
                                                   node['device']))
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    if not suffixes:
                        self.stats['hashmatch'] += 1
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        df_mgr._get_hashes,
                        job['device'], job['partition'], job['policy'],
                        recalculate=suffixes)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    self.stats['rsync'] += 1
                    success, _junk = self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '/' + '-'.join(suffixes),
                            headers=headers)
                        conn.getresponse().read()
                    if not success:
                        failure_devs_info.add((node['replication_ip'],
                                               node['device']))
                    # add only remote region when replicate succeeded
                    if success and node['region'] != job['region']:
                        synced_remote_regions.add(node['region'])
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    failure_devs_info.add((node['replication_ip'],
                                           node['device']))
                    self.logger.exception(_("Error syncing with node: %s") %
                                          node)
            self.suffix_count += len(local_hash)
        except StopIteration:
            self.logger.error('Ran out of handoffs while replicating '
                              'partition %s of policy %d',
                              job['partition'], int(job['policy']))
        except (Exception, Timeout):
            failure_devs_info.update(target_devs_info)
            self.logger.exception(_("Error syncing partition"))
        finally:
            self._add_failure_stats(failure_devs_info)
            self.stats['success'] += len(target_devs_info - failure_devs_info)
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        if self.replication_count:
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.replication_count / elapsed
            self.logger.info(
                _("%(replicated)d/%(total)d (%(percentage).2f%%)"
                  " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                  "%(remaining)s remaining)"),
                {'replicated': self.replication_count, 'total': self.job_count,
                 'percentage': self.replication_count * 100.0 / self.job_count,
                 'time': time.time() - self.start, 'rate': rate,
                 'remaining': '%d%s' % compute_eta(self.start,
                                                   self.replication_count,
                                                   self.job_count)})
            self.logger.info(_('%(success)s successes, %(failure)s failures')
                             % self.stats)

            if self.suffix_count:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"),
                    {'checked': self.suffix_count,
                     'hashed': (self.suffix_hash * 100.0) / self.suffix_count,
                     'synced': (self.suffix_sync * 100.0) / self.suffix_count})
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"),
                    {'max': self.partition_times[-1],
                     'min': self.partition_times[0],
                     'med': self.partition_times[
                         len(self.partition_times) // 2]})
        else:
            self.logger.info(
                _("Nothing replicated for %s seconds."),
                (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the replicator finishes its replication
        pass in some eventuality.
        """
        while True:
            eventlet.sleep(self.lockup_timeout)
            if self.replication_count == self.last_replication_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_replication_count = self.replication_count

    def build_replication_jobs(self, policy, ips, override_devices=None,
                               override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        df_mgr = self._df_router[policy]
        self.all_devs_info.update(
            [(dev['replication_ip'], dev['device'])
             for dev in policy.object_ring.devs if dev])
        data_dir = get_data_dir(policy)
        found_local = False
        for local_dev in [dev for dev in policy.object_ring.devs
                          if (dev
                              and is_local_device(ips,
                                                  self.port,
                                                  dev['replication_ip'],
                                                  dev['replication_port'])
                              and (override_devices is None
                                   or dev['device'] in override_devices))]:
            found_local = True
            dev_path = check_drive(self.devices_dir, local_dev['device'],
                                   self.mount_check)
            if not dev_path:
                self._add_failure_stats(
                    [(failure_dev['replication_ip'],
                      failure_dev['device'])
                     for failure_dev in policy.object_ring.devs
                     if failure_dev])
                self.logger.warning(
                    _('%s is not mounted'), local_dev['device'])
                continue
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            unlink_older_than(tmp_path, time.time() -
                              df_mgr.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                if (partition.startswith('auditor_status_') and
                        partition.endswith('.json')):
                    # ignore auditor status files
                    continue

                part_nodes = None
                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(
                        int(partition))
                    nodes = [node for node in part_nodes
                             if node['id'] != local_dev['id']]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy=policy,
                             partition=partition,
                             region=local_dev['region']))
                except ValueError:
                    if part_nodes:
                        self._add_failure_stats(
                            [(failure_dev['replication_ip'],
                              failure_dev['device'])
                             for failure_dev in nodes])
                    else:
                        self._add_failure_stats(
                            [(failure_dev['replication_ip'],
                              failure_dev['device'])
                             for failure_dev in policy.object_ring.devs
                             if failure_dev])
                    continue
        if not found_local:
            self.logger.error("Can't find itself in policy with index %d with"
                              " ips %s and with port %s in ring file, not"
                              " replicating",
                              int(policy), ", ".join(ips), self.port)
        return jobs

    def collect_jobs(self, override_devices=None, override_partitions=None,
                     override_policies=None):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be rsynced.

        :param override_devices: if set, only jobs on these devices
            will be returned
        :param override_partitions: if set, only jobs on these partitions
            will be returned
        :param override_policies: if set, only jobs in these storage
            policies will be returned
        """
        jobs = []
        ips = whataremyips(self.bind_ip)
        for policy in POLICIES:
            # Skip replication if next_part_power is set. In this case
            # every object is hard-linked twice, but the replicator can't
            # detect them and would create a second copy of the file if not
            # yet existing - and this might double the actual transferred
            # and stored data
            next_part_power = getattr(
                policy.object_ring, 'next_part_power', None)
            if next_part_power is not None:
                self.logger.warning(
                    _("next_part_power set in policy '%s'. Skipping"),
                    policy.name)
                continue

            if policy.policy_type == REPL_POLICY:
                if (override_policies is not None and
                        str(policy.idx) not in override_policies):
                    continue
                # ensure rings are loaded for policy
                self.load_object_ring(policy)
                jobs += self.build_replication_jobs(
                    policy, ips, override_devices=override_devices,
                    override_partitions=override_partitions)
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff parts to the front of the list
            jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs

    def replicate(self, override_devices=None, override_partitions=None,
                  override_policies=None):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.replication_cycle = (self.replication_cycle + 1) % 10
        self.partition_times = []
        self.my_replication_ips = self._get_my_replication_ips()
        self.all_devs_info = set()
        self.handoffs_remaining = 0

        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle

        current_nodes = None
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs(override_devices=override_devices,
                                     override_partitions=override_partitions,
                                     override_policies=override_policies)
            for job in jobs:
                current_nodes = job['nodes']
                if override_devices and job['device'] not in override_devices:
                    continue
                if override_partitions and \
                        job['partition'] not in override_partitions:
                    continue
                dev_path = check_drive(self.devices_dir, job['device'],
                                       self.mount_check)
                if not dev_path:
                    self._add_failure_stats([(failure_dev['replication_ip'],
                                              failure_dev['device'])
                                             for failure_dev in job['nodes']])
                    self.logger.warning(_('%s is not mounted'), job['device'])
                    continue
                if self.handoffs_first and not job['delete']:
                    # in handoffs first mode, we won't process primary
                    # partitions until rebalance was successful!
                    if self.handoffs_remaining:
                        self.logger.warning(_(
                            "Handoffs first mode still has handoffs "
                            "remaining.  Aborting current "
                            "replication pass."))
                        break
                if not self.check_ring(job['policy'].object_ring):
                    self.logger.info(_("Ring change detected. Aborting "
                                       "current replication pass."))
                    return

                try:
                    if isfile(job['path']):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning(
                            'Removing partition directory '
                            'which was a file: %s', job['path'])
                        os.remove(job['path'])
                        continue
                except OSError:
                    continue
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            current_nodes = None
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            if current_nodes:
                self._add_failure_stats([(failure_dev['replication_ip'],
                                          failure_dev['device'])
                                         for failure_dev in current_nodes])
            else:
                self._add_failure_stats(self.all_devs_info)
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()
            self.stats['attempted'] = self.replication_count

    def run_once(self, *args, **kwargs):
        self._zero_stats()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = list_from_csv(kwargs.get('partitions'))
        override_policies = list_from_csv(kwargs.get('policies'))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None

        self.replicate(
            override_devices=override_devices,
            override_partitions=override_partitions,
            override_policies=override_policies)
        total = (time.time() - self.stats['start']) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            replication_last = time.time()
            dump_recon_cache({'replication_stats': self.stats,
                              'replication_time': total,
                              'replication_last': replication_last,
                              'object_replication_time': total,
                              'object_replication_last': replication_last},
                             self.rcache, self.logger)

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object replicator in daemon mode."))
        # Run the replicator continually
        while True:
            self._zero_stats()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            self.replicate()
            total = (time.time() - self.stats['start']) / 60
            self.logger.info(
                _("Object replication complete. (%.02f minutes)"), total)
            replication_last = time.time()
            dump_recon_cache({'replication_stats': self.stats,
                              'replication_time': total,
                              'replication_last': replication_last,
                              'object_replication_time': total,
                              'object_replication_last': replication_last},
                             self.rcache, self.logger)
            self.logger.debug('Replication sleeping for %s seconds.',
                              self.interval)
            sleep(self.interval)
Ejemplo n.º 52
0
class Service(object):
    def __init__(self, check_task_internal='', *task_info):
        """
        task_info should include the dictionary which have the keys of 'task_name' and 'task_desc'.
        """
        global DEBUG
        if DEBUG:
            self.logger = get_debug_logger("Service")
        else:
            self.logger = get_default_logger("Service")
        self.config = get_default_config()

        if check_task_internal and int(check_task_internal):
            self.check_task_internal = int(check_task_internal)
        else:
            self.check_task_internal = int(
                self.config.get_option_value('default', 'check_task_internal'))
        self.enable_backdoor = self.config.get_option_value(
            'eventlet_backdoor', 'enable')
        self.backdoor_port = self.config.get_option_value(
            'eventlet_backdoor', 'port')
        self.task_info = task_info

        self.pool = GreenPool()

        self.control = None
        self.task_queue_list = []
        self.task_status = {}
        self.task_thread_list = []

        self._done = event.Event()

    def _get_task_status(self, queue):
        task_status = {}
        status = queue.get()
        if status:
            for item in status.split(','):
                key, value = item.split(':')
                task_status[key.strip()] = value.strip()

        return task_status

    def launch_control_task(self, *queue_list, **task_status):
        while True:
            eventlet.sleep(self.check_task_internal)
            for queue in queue_list:
                status = self._get_task_status(queue)
                if status:
                    task_status[status['task_name']] = status

    def _get_task_obj(self, task_name):
        task_module = import_utils(task_name)
        for task_subclass_name, task_subclass in get_subclass(
                task_module, Task):
            if task_subclass:
                return task_subclass
        return None

    def start(self):

        if len(self.task_info) == 0:
            self.logger.info("No task is here for executing!!!")
            self.stop()
        wsgi_tasks = WSGITask()
        wsgi_task_names = []
        #wsgi_url_map_app = {}
        for task_info in self.task_info:
            task_name = task_info['task_name']
            task_obj = self._get_task_obj(task_name)
            task_queue = eventlet.queue.LifoQueue()
            self.task_queue_list.append(task_queue)
            task = task_obj(task_queue)
            if task.task_type == 'standalone':
                task_thread = self.pool.spawn(task.start)
                self.task_thread_list.append(task_thread)
            elif task.task_type == 'subwsgi':
                #if task.url_map_app:
                #
                # If there is the same url mapping to the different apps,
                # then the url will direct to the last app
                #
                #    wsgi_url_map_app.update(task.url_map_app)
                task.register(wsgi_tasks.mapper, wsgi_tasks.loader)
                wsgi_task_names.append(task.task_name)

        # Start the wsgi tasks binding to the whole port
        #for url, app in url_map_app.items():
        #    wsgi_tasks.register(url, app)
        if len(wsgi_task_names) >= 1:
            self.logger.info("Will run WSGI tasks:%s in a singal thread..." %
                             wsgi_task_names)
            task_thread = self.pool.spawn(wsgi_tasks.start)
            self.task_thread_list.append(task_thread)

        # launch the control task
        task_thread = self.pool.spawn(self.launch_control_task,
                                      *self.task_queue_list,
                                      **self.task_status)
        self.task_thread_list.append(task_thread)

        if self.enable_backdoor.lower() == 'true':
            self.open_backdoor()

    def wait(self):
        self._done.wait()

    def stop(self):
        for task_thread in self.task_thread_list:
            task_thread.kill()
        if not self._done.ready():
            self._done.send()

    def restart(self):
        pass

    def list_task_threads(self):
        print self.task_thread_list

    def list_task_queues(self):
        print self.task_queue_list

    def list_task_status(self):
        print self.task_status

    def open_backdoor(self):
        backdoor_locals = {
            'list_task_threads': self.list_task_threads,
            'list_task_queues': self.list_task_queues,
            'list_task_status': self.list_task_status,
            'stop': self.stop
        }
        self.backdoor_port = self.config.get_option_value(
            'eventlet_backdoor', 'port')
        self.pool.spawn(backdoor.backdoor_server,
                        eventlet.listen(
                            ('localhost', int(self.backdoor_port))),
                        locals=backdoor_locals)
Ejemplo n.º 53
0
class ObjectReplicator(Daemon):
    """
    Replicate objects.

    Encapsulates most logic and data needed by the object replication process.
    Each call to .replicate() performs one replication pass.  It's up to the
    caller to do this in a loop.
    """
    def __init__(self, conf, logger=None):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = logger or get_logger(conf, log_route='object-replicator')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.bind_ip = conf.get('bind_ip', '0.0.0.0')
        self.servers_per_port = int(conf.get('servers_per_port', '0') or 0)
        self.port = None if self.servers_per_port else \
            int(conf.get('bind_port', 6200))
        self.concurrency = int(conf.get('concurrency', 1))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
        self.replication_cycle = random.randint(0, 9)
        self.partition_times = []
        self.interval = int(
            conf.get('interval') or conf.get('run_pause') or 30)
        self.rsync_timeout = int(
            conf.get('rsync_timeout', DEFAULT_RSYNC_TIMEOUT))
        self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
        self.rsync_bwlimit = conf.get('rsync_bwlimit', '0')
        self.rsync_compress = config_true_value(
            conf.get('rsync_compress', 'no'))
        self.rsync_module = conf.get('rsync_module', '').rstrip('/')
        if not self.rsync_module:
            self.rsync_module = '{replication_ip}::object'
            if config_true_value(conf.get('vm_test_mode', 'no')):
                self.logger.warning('Option object-replicator/vm_test_mode '
                                    'is deprecated and will be removed in a '
                                    'future version. Update your '
                                    'configuration to use option '
                                    'object-replicator/rsync_module.')
                self.rsync_module += '{replication_port}'
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.sync_method = getattr(self, conf.get('sync_method') or 'rsync')
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.default_headers = {
            'Content-Length': '0',
            'user-agent': 'object-replicator %s' % os.getpid()
        }
        self.rsync_error_log_line_length = \
            int(conf.get('rsync_error_log_line_length', 0))
        self.handoffs_first = config_true_value(
            conf.get('handoffs_first', False))
        self.handoff_delete = config_auto_int_value(
            conf.get('handoff_delete', 'auto'), 0)
        if any((self.handoff_delete, self.handoffs_first)):
            self.logger.warning('Handoff only mode is not intended for normal '
                                'operation, please disable handoffs_first and '
                                'handoff_delete before the next '
                                'normal rebalance')
        self._diskfile_mgr = DiskFileManager(conf, self.logger)

    def _zero_stats(self):
        """Zero out the stats."""
        self.stats = {
            'attempted': 0,
            'success': 0,
            'failure': 0,
            'hashmatch': 0,
            'rsync': 0,
            'remove': 0,
            'start': time.time(),
            'failure_nodes': {}
        }

    def _add_failure_stats(self, failure_devs_info):
        for node, dev in failure_devs_info:
            self.stats['failure'] += 1
            failure_devs = self.stats['failure_nodes'].setdefault(node, {})
            failure_devs.setdefault(dev, 0)
            failure_devs[dev] += 1

    def _get_my_replication_ips(self):
        my_replication_ips = set()
        ips = whataremyips()
        for policy in POLICIES:
            self.load_object_ring(policy)
            for local_dev in [
                    dev for dev in policy.object_ring.devs
                    if dev and dev['replication_ip'] in ips
                    and dev['replication_port'] == self.port
            ]:
                my_replication_ips.add(local_dev['replication_ip'])
        return list(my_replication_ips)

    # Just exists for doc anchor point
    def sync(self, node, job, suffixes, *args, **kwargs):
        """
        Synchronize local suffix directories from a partition with a remote
        node.

        :param node: the "dev" entry for the remote node to sync with
        :param job: information about the partition being synced
        :param suffixes: a list of suffixes which need to be pushed

        :returns: boolean and dictionary, boolean indicating success or failure
        """
        return self.sync_method(node, job, suffixes, *args, **kwargs)

    def load_object_ring(self, policy):
        """
        Make sure the policy's rings are loaded.

        :param policy: the StoragePolicy instance
        :returns: appropriate ring object
        """
        policy.load_ring(self.swift_dir)
        return policy.object_ring

    def _rsync(self, args):
        """
        Execute the rsync binary to replicate a partition.

        :returns: return code of rsync process. 0 is successful
        """
        start_time = time.time()
        ret_val = None
        try:
            with Timeout(self.rsync_timeout):
                proc = subprocess.Popen(args,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.STDOUT)
                results = proc.stdout.read()
                ret_val = proc.wait()
        except Timeout:
            self.logger.error(_("Killing long-running rsync: %s"), str(args))
            proc.kill()
            return 1  # failure response code
        total_time = time.time() - start_time
        for result in results.split('\n'):
            if result == '':
                continue
            if result.startswith('cd+'):
                continue
            if not ret_val:
                self.logger.info(result)
            else:
                self.logger.error(result)
        if ret_val:
            error_line = _('Bad rsync return code: %(ret)d <- %(args)s') % \
                {'args': str(args), 'ret': ret_val}
            if self.rsync_error_log_line_length:
                error_line = error_line[:self.rsync_error_log_line_length]
            self.logger.error(error_line)
        else:
            log_method = self.logger.info if results else self.logger.debug
            log_method(
                _("Successful rsync of %(src)s at %(dst)s (%(time).03f)"), {
                    'src': args[-2],
                    'dst': args[-1],
                    'time': total_time
                })
        return ret_val

    def rsync(self, node, job, suffixes):
        """
        Uses rsync to implement the sync method. This was the first
        sync method in Swift.
        """
        if not os.path.exists(job['path']):
            return False, {}
        args = [
            'rsync', '--recursive', '--whole-file', '--human-readable',
            '--xattrs', '--itemize-changes', '--ignore-existing',
            '--timeout=%s' % self.rsync_io_timeout,
            '--contimeout=%s' % self.rsync_io_timeout,
            '--bwlimit=%s' % self.rsync_bwlimit,
            '--exclude=.*.%s' % ''.join('[0-9a-zA-Z]' for i in range(6))
        ]
        if self.rsync_compress and \
                job['region'] != node['region']:
            # Allow for compression, but only if the remote node is in
            # a different region than the local one.
            args.append('--compress')
        rsync_module = rsync_module_interpolation(self.rsync_module, node)
        had_any = False
        for suffix in suffixes:
            spath = join(job['path'], suffix)
            if os.path.exists(spath):
                args.append(spath)
                had_any = True
        if not had_any:
            return False, {}
        data_dir = get_data_dir(job['policy'])
        args.append(
            join(rsync_module, node['device'], data_dir, job['partition']))
        return self._rsync(args) == 0, {}

    def ssync(self, node, job, suffixes, remote_check_objs=None):
        return ssync_sender.Sender(self, node, job, suffixes,
                                   remote_check_objs)()

    def check_ring(self, object_ring):
        """
        Check to see if the ring has been updated
        :param object_ring: the ring to check

        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if object_ring.has_changed():
                return False
        return True

    def update_deleted(self, job):
        """
        High-level method that replicates a single partition that doesn't
        belong on this node.

        :param job: a dict containing info about the partition to be replicated
        """
        def tpool_get_suffixes(path):
            return [
                suff for suff in os.listdir(path)
                if len(suff) == 3 and isdir(join(path, suff))
            ]

        self.replication_count += 1
        self.logger.increment('partition.delete.count.%s' % (job['device'], ))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        failure_devs_info = set()
        begin = time.time()
        handoff_partition_deleted = False
        try:
            responses = []
            suffixes = tpool.execute(tpool_get_suffixes, job['path'])
            synced_remote_regions = {}
            delete_objs = None
            if suffixes:
                for node in job['nodes']:
                    self.stats['rsync'] += 1
                    kwargs = {}
                    if node['region'] in synced_remote_regions and \
                            self.conf.get('sync_method', 'rsync') == 'ssync':
                        kwargs['remote_check_objs'] = \
                            synced_remote_regions[node['region']]
                    # candidates is a dict(hash=>timestamp) of objects
                    # for deletion
                    success, candidates = self.sync(node, job, suffixes,
                                                    **kwargs)
                    if success:
                        with Timeout(self.http_timeout):
                            conn = http_connect(node['replication_ip'],
                                                node['replication_port'],
                                                node['device'],
                                                job['partition'],
                                                'REPLICATE',
                                                '/' + '-'.join(suffixes),
                                                headers=headers)
                            conn.getresponse().read()
                        if node['region'] != job['region']:
                            synced_remote_regions[node['region']] = viewkeys(
                                candidates)
                    else:
                        failure_devs_info.add(
                            (node['replication_ip'], node['device']))
                    responses.append(success)
                for region, cand_objs in synced_remote_regions.items():
                    if delete_objs is None:
                        delete_objs = cand_objs
                    else:
                        delete_objs = delete_objs & cand_objs

            if self.handoff_delete:
                # delete handoff if we have had handoff_delete successes
                delete_handoff = len([resp for resp in responses if resp]) >= \
                    self.handoff_delete
            else:
                # delete handoff if all syncs were successful
                delete_handoff = len(responses) == len(job['nodes']) and \
                    all(responses)
            if delete_handoff:
                self.stats['remove'] += 1
                if (self.conf.get('sync_method', 'rsync') == 'ssync'
                        and delete_objs is not None):
                    self.logger.info(_("Removing %s objects"),
                                     len(delete_objs))
                    _junk, error_paths = self.delete_handoff_objs(
                        job, delete_objs)
                    # if replication works for a hand-off device and it failed,
                    # the remote devices which are target of the replication
                    # from the hand-off device will be marked. Because cleanup
                    # after replication failed means replicator needs to
                    # replicate again with the same info.
                    if error_paths:
                        failure_devs_info.update([
                            (failure_dev['replication_ip'],
                             failure_dev['device'])
                            for failure_dev in job['nodes']
                        ])
                else:
                    self.delete_partition(job['path'])
                    handoff_partition_deleted = True
            elif not suffixes:
                self.delete_partition(job['path'])
                handoff_partition_deleted = True
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing handoff partition"))
            self._add_failure_stats(failure_devs_info)
        finally:
            target_devs_info = set([(target_dev['replication_ip'],
                                     target_dev['device'])
                                    for target_dev in job['nodes']])
            self.stats['success'] += len(target_devs_info - failure_devs_info)
            if not handoff_partition_deleted:
                self.handoffs_remaining += 1
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.delete.timing', begin)

    def delete_partition(self, path):
        self.logger.info(_("Removing partition: %s"), path)
        tpool.execute(shutil.rmtree, path)

    def delete_handoff_objs(self, job, delete_objs):
        success_paths = []
        error_paths = []
        for object_hash in delete_objs:
            object_path = storage_directory(job['obj_path'], job['partition'],
                                            object_hash)
            tpool.execute(shutil.rmtree, object_path, ignore_errors=True)
            suffix_dir = dirname(object_path)
            try:
                os.rmdir(suffix_dir)
                success_paths.append(object_path)
            except OSError as e:
                if e.errno not in (errno.ENOENT, errno.ENOTEMPTY):
                    error_paths.append(object_path)
                    self.logger.exception(
                        "Unexpected error trying to cleanup suffix dir:%r",
                        suffix_dir)
        return success_paths, error_paths

    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'], ))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        target_devs_info = set()
        failure_devs_info = set()
        begin = time.time()
        try:
            hashed, local_hash = tpool_reraise(self._diskfile_mgr._get_hashes,
                                               job['path'],
                                               do_listdir=_do_listdir(
                                                   int(job['partition']),
                                                   self.replication_cycle),
                                               reclaim_age=self.reclaim_age)
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            synced_remote_regions = set()
            random.shuffle(job['nodes'])
            nodes = itertools.chain(
                job['nodes'], job['policy'].object_ring.get_more_nodes(
                    int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIteration it will be caught way below
                node = next(nodes)
                target_devs_info.add((node['replication_ip'], node['device']))
                attempts_left -= 1
                # if we have already synced to this remote region,
                # don't sync again on this replication pass
                if node['region'] in synced_remote_regions:
                    continue
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '',
                                            headers=headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(
                                _('%(replication_ip)s/%(device)s '
                                  'responded as unmounted'), node)
                            attempts_left += 1
                            failure_devs_info.add(
                                (node['replication_ip'], node['device']))
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s "
                                  "from %(ip)s"), {
                                      'resp': resp.status,
                                      'ip': node['replication_ip']
                                  })
                            failure_devs_info.add(
                                (node['replication_ip'], node['device']))
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    if not suffixes:
                        self.stats['hashmatch'] += 1
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        self._diskfile_mgr._get_hashes,
                        job['path'],
                        recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    self.stats['rsync'] += 1
                    success, _junk = self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '/' + '-'.join(suffixes),
                                            headers=headers)
                        conn.getresponse().read()
                    if not success:
                        failure_devs_info.add(
                            (node['replication_ip'], node['device']))
                    # add only remote region when replicate succeeded
                    if success and node['region'] != job['region']:
                        synced_remote_regions.add(node['region'])
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    failure_devs_info.add(
                        (node['replication_ip'], node['device']))
                    self.logger.exception(
                        _("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            failure_devs_info.update(target_devs_info)
            self._add_failure_stats(failure_devs_info)
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.stats['success'] += len(target_devs_info - failure_devs_info)
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)

    def stats_line(self):
        """
        Logs various stats for the currently running replication pass.
        """
        if self.replication_count:
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.replication_count / elapsed
            self.logger.info(
                _("%(replicated)d/%(total)d (%(percentage).2f%%)"
                  " partitions replicated in %(time).2fs (%(rate).2f/sec, "
                  "%(remaining)s remaining)"), {
                      'replicated':
                      self.replication_count,
                      'total':
                      self.job_count,
                      'percentage':
                      self.replication_count * 100.0 / self.job_count,
                      'time':
                      time.time() - self.start,
                      'rate':
                      rate,
                      'remaining':
                      '%d%s' % compute_eta(self.start, self.replication_count,
                                           self.job_count)
                  })
            self.logger.info(
                _('%(success)s successes, %(failure)s failures') % self.stats)

            if self.suffix_count:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"), {
                          'checked': self.suffix_count,
                          'hashed':
                          (self.suffix_hash * 100.0) / self.suffix_count,
                          'synced':
                          (self.suffix_sync * 100.0) / self.suffix_count
                      })
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"), {
                          'max': self.partition_times[-1],
                          'min': self.partition_times[0],
                          'med':
                          self.partition_times[len(self.partition_times) // 2]
                      })
        else:
            self.logger.info(_("Nothing replicated for %s seconds."),
                             (time.time() - self.start))

    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass

    def heartbeat(self):
        """
        Loop that runs in the background during replication.  It periodically
        logs progress.
        """
        while True:
            eventlet.sleep(self.stats_interval)
            self.stats_line()

    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the replicator finishes its replication
        pass in some eventuality.
        """
        while True:
            eventlet.sleep(self.lockup_timeout)
            if self.replication_count == self.last_replication_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_replication_count = self.replication_count

    def build_replication_jobs(self,
                               policy,
                               ips,
                               override_devices=None,
                               override_partitions=None):
        """
        Helper function for collect_jobs to build jobs for replication
        using replication style storage policy
        """
        jobs = []
        self.all_devs_info.update([(dev['replication_ip'], dev['device'])
                                   for dev in policy.object_ring.devs if dev])
        data_dir = get_data_dir(policy)
        found_local = False
        for local_dev in [
                dev for dev in policy.object_ring.devs if
            (dev and is_local_device(ips, self.port, dev['replication_ip'],
                                     dev['replication_port']) and
             (override_devices is None or dev['device'] in override_devices))
        ]:
            found_local = True
            dev_path = join(self.devices_dir, local_dev['device'])
            obj_path = join(dev_path, data_dir)
            tmp_path = join(dev_path, get_tmp_dir(policy))
            if self.mount_check and not ismount(dev_path):
                self._add_failure_stats([
                    (failure_dev['replication_ip'], failure_dev['device'])
                    for failure_dev in policy.object_ring.devs if failure_dev
                ])
                self.logger.warning(_('%s is not mounted'),
                                    local_dev['device'])
                continue
            unlink_older_than(tmp_path, time.time() - self.reclaim_age)
            if not os.path.exists(obj_path):
                try:
                    mkdirs(obj_path)
                except Exception:
                    self.logger.exception('ERROR creating %s' % obj_path)
                continue
            for partition in os.listdir(obj_path):
                if (override_partitions is not None
                        and partition not in override_partitions):
                    continue

                if (partition.startswith('auditor_status_')
                        and partition.endswith('.json')):
                    # ignore auditor status files
                    continue

                part_nodes = None
                try:
                    job_path = join(obj_path, partition)
                    part_nodes = policy.object_ring.get_part_nodes(
                        int(partition))
                    nodes = [
                        node for node in part_nodes
                        if node['id'] != local_dev['id']
                    ]
                    jobs.append(
                        dict(path=job_path,
                             device=local_dev['device'],
                             obj_path=obj_path,
                             nodes=nodes,
                             delete=len(nodes) > len(part_nodes) - 1,
                             policy=policy,
                             partition=partition,
                             region=local_dev['region']))
                except ValueError:
                    if part_nodes:
                        self._add_failure_stats([
                            (failure_dev['replication_ip'],
                             failure_dev['device']) for failure_dev in nodes
                        ])
                    else:
                        self._add_failure_stats([
                            (failure_dev['replication_ip'],
                             failure_dev['device'])
                            for failure_dev in policy.object_ring.devs
                            if failure_dev
                        ])
                    continue
        if not found_local:
            self.logger.error(
                "Can't find itself in policy with index %d with"
                " ips %s and with port %s in ring file, not"
                " replicating", int(policy), ", ".join(ips), self.port)
        return jobs

    def collect_jobs(self,
                     override_devices=None,
                     override_partitions=None,
                     override_policies=None):
        """
        Returns a sorted list of jobs (dictionaries) that specify the
        partitions, nodes, etc to be rsynced.

        :param override_devices: if set, only jobs on these devices
            will be returned
        :param override_partitions: if set, only jobs on these partitions
            will be returned
        :param override_policies: if set, only jobs in these storage
            policies will be returned
        """
        jobs = []
        ips = whataremyips(self.bind_ip)
        for policy in POLICIES:
            if policy.policy_type == REPL_POLICY:
                if (override_policies is not None
                        and str(policy.idx) not in override_policies):
                    continue
                # ensure rings are loaded for policy
                self.load_object_ring(policy)
                jobs += self.build_replication_jobs(
                    policy,
                    ips,
                    override_devices=override_devices,
                    override_partitions=override_partitions)
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff parts to the front of the list
            jobs.sort(key=lambda job: not job['delete'])
        self.job_count = len(jobs)
        return jobs

    def replicate(self,
                  override_devices=None,
                  override_partitions=None,
                  override_policies=None):
        """Run a replication pass"""
        self.start = time.time()
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.replication_count = 0
        self.last_replication_count = -1
        self.replication_cycle = (self.replication_cycle + 1) % 10
        self.partition_times = []
        self.my_replication_ips = self._get_my_replication_ips()
        self.all_devs_info = set()
        self.handoffs_remaining = 0

        stats = eventlet.spawn(self.heartbeat)
        lockup_detector = eventlet.spawn(self.detect_lockups)
        eventlet.sleep()  # Give spawns a cycle

        current_nodes = None
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            jobs = self.collect_jobs(override_devices=override_devices,
                                     override_partitions=override_partitions,
                                     override_policies=override_policies)
            for job in jobs:
                current_nodes = job['nodes']
                if override_devices and job['device'] not in override_devices:
                    continue
                if override_partitions and \
                        job['partition'] not in override_partitions:
                    continue
                dev_path = join(self.devices_dir, job['device'])
                if self.mount_check and not ismount(dev_path):
                    self._add_failure_stats([(failure_dev['replication_ip'],
                                              failure_dev['device'])
                                             for failure_dev in job['nodes']])
                    self.logger.warning(_('%s is not mounted'), job['device'])
                    continue
                if self.handoffs_first and not job['delete']:
                    # in handoffs first mode, we won't process primary
                    # partitions until rebalance was successful!
                    if self.handoffs_remaining:
                        self.logger.warning(
                            _("Handoffs first mode still has handoffs "
                              "remaining.  Aborting current "
                              "replication pass."))
                        break
                if not self.check_ring(job['policy'].object_ring):
                    self.logger.info(
                        _("Ring change detected. Aborting "
                          "current replication pass."))
                    return
                try:
                    if isfile(job['path']):
                        # Clean up any (probably zero-byte) files where a
                        # partition should be.
                        self.logger.warning(
                            'Removing partition directory '
                            'which was a file: %s', job['path'])
                        os.remove(job['path'])
                        continue
                except OSError:
                    continue
                if job['delete']:
                    self.run_pool.spawn(self.update_deleted, job)
                else:
                    self.run_pool.spawn(self.update, job)
            current_nodes = None
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            if current_nodes:
                self._add_failure_stats([(failure_dev['replication_ip'],
                                          failure_dev['device'])
                                         for failure_dev in current_nodes])
            else:
                self._add_failure_stats(self.all_devs_info)
            self.logger.exception(_("Exception in top-level replication loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()
            self.stats['attempted'] = self.replication_count

    def run_once(self, *args, **kwargs):
        self._zero_stats()
        self.logger.info(_("Running object replicator in script mode."))

        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = list_from_csv(kwargs.get('partitions'))
        override_policies = list_from_csv(kwargs.get('policies'))
        if not override_devices:
            override_devices = None
        if not override_partitions:
            override_partitions = None
        if not override_policies:
            override_policies = None

        self.replicate(override_devices=override_devices,
                       override_partitions=override_partitions,
                       override_policies=override_policies)
        total = (time.time() - self.stats['start']) / 60
        self.logger.info(
            _("Object replication complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            replication_last = time.time()
            dump_recon_cache(
                {
                    'replication_stats': self.stats,
                    'replication_time': total,
                    'replication_last': replication_last,
                    'object_replication_time': total,
                    'object_replication_last': replication_last
                }, self.rcache, self.logger)

    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object replicator in daemon mode."))
        # Run the replicator continually
        while True:
            self._zero_stats()
            self.logger.info(_("Starting object replication pass."))
            # Run the replicator
            self.replicate()
            total = (time.time() - self.stats['start']) / 60
            self.logger.info(_("Object replication complete. (%.02f minutes)"),
                             total)
            replication_last = time.time()
            dump_recon_cache(
                {
                    'replication_stats': self.stats,
                    'replication_time': total,
                    'replication_last': replication_last,
                    'object_replication_time': total,
                    'object_replication_last': replication_last
                }, self.rcache, self.logger)
            self.logger.debug('Replication sleeping for %s seconds.',
                              self.interval)
            sleep(self.interval)
Ejemplo n.º 54
0
class Chewie:
    """Facilitates EAP supplicant and RADIUS server communication"""
    RADIUS_UDP_PORT = 1812
    PAE_GROUP_ADDRESS = MacAddress.from_string("01:80:C2:00:00:03")

    DEFAULT_PORT_UP_IDENTITY_REQUEST_WAIT_PERIOD = 20
    DEFAULT_PREEMPTIVE_IDENTITY_REQUEST_INTERVAL = 60

    # pylint: disable=too-many-arguments
    def __init__(self,
                 interface_name,
                 logger=None,
                 auth_handler=None,
                 failure_handler=None,
                 logoff_handler=None,
                 radius_server_ip=None,
                 radius_server_port=None,
                 radius_server_secret=None,
                 chewie_id=None):

        self.interface_name = interface_name
        self.log_name = Chewie.__name__
        if logger:
            self.log_name = logger.name + "." + Chewie.__name__

        self.logger = get_logger(self.log_name)
        self.auth_handler = auth_handler
        self.failure_handler = failure_handler
        self.logoff_handler = logoff_handler

        self.radius_server_ip = radius_server_ip
        self.radius_secret = radius_server_secret
        self.radius_server_port = self.RADIUS_UDP_PORT
        if radius_server_port:
            self.radius_server_port = radius_server_port
        self.radius_listen_ip = "0.0.0.0"
        self.radius_listen_port = 0

        self.chewie_id = "44-44-44-44-44-44:"  # used by the RADIUS Attribute
        # 'Called-Station' in Access-Request
        if chewie_id:
            self.chewie_id = chewie_id

        self.state_machines = {}  # port_id_str: { mac : state_machine}
        self.port_to_eapol_id = {
        }  # port_id: last ID used in preemptive identity request.
        # TODO for port_to_eapol_id - may want to set ID to null (-1...) if sent from the
        #  state machine.
        self.port_status = {}  # port_id: status (true=up, false=down)
        self.port_to_identity_job = {}  # port_id: timerJob

        self.eap_output_messages = Queue()
        self.radius_output_messages = Queue()

        self.radius_lifecycle = RadiusLifecycle(self.radius_secret,
                                                self.chewie_id, self.logger)
        self.timer_scheduler = timer_scheduler.TimerScheduler(self.logger)

        self.eap_socket = None
        self.mab_socket = None
        self.pool = None
        self.eventlets = None
        self.radius_socket = None
        self.interface_index = None

        self.eventlets = []

    def run(self):
        """setup chewie and start socket eventlet threads"""
        self.logger.info("Starting")
        self.setup_eap_socket()
        self.setup_mab_socket()
        self.setup_radius_socket()
        self.start_threads_and_wait()

    def running(self):  # pylint: disable=no-self-use
        """Used to nicely exit the event loops"""
        return True

    def shutdown(self):
        """kill eventlets and quit"""
        for eventlet in self.eventlets:
            eventlet.kill()

    def start_threads_and_wait(self):
        """Start the thread and wait until they complete (hopefully never)"""
        self.pool = GreenPool()

        self.eventlets.append(self.pool.spawn(self.send_eap_messages))
        self.eventlets.append(self.pool.spawn(self.receive_eap_messages))
        self.eventlets.append(self.pool.spawn(self.receive_mab_messages))

        self.eventlets.append(self.pool.spawn(self.send_radius_messages))
        self.eventlets.append(self.pool.spawn(self.receive_radius_messages))

        self.eventlets.append(self.pool.spawn(self.timer_scheduler.run))

        self.pool.waitall()

    def auth_success(self, src_mac, port_id, period, *args, **kwargs):  # pylint: disable=unused-variable
        """authentication shim between faucet and chewie
        Args:
            src_mac (MacAddress): the mac of the successful supplicant
            port_id (MacAddress): the 'mac' identifier of what switch port the success is on
            period (int): time (seconds) until the session times out.
            """

        if self.auth_handler:
            self.auth_handler(src_mac, port_id, *args, **kwargs)

        self.port_to_identity_job[port_id] = self.timer_scheduler.call_later(
            period, self.reauth_port, src_mac, port_id)

    def auth_failure(self, src_mac, port_id):
        """failure shim between faucet and chewie
        Args:
            src_mac (MacAddress): the mac of the failed supplicant
            port_id (MacAddress): the 'mac' identifier of what switch port
             the failure is on"""
        if self.failure_handler:
            self.failure_handler(src_mac, port_id)

    def auth_logoff(self, src_mac, port_id):
        """logoff shim between faucet and chewie
        Args:
            src_mac (MacAddress): the mac of the logoff supplicant
            port_id (MacAddress): the 'mac' identifier of what switch port
             the logoff is on"""
        if self.logoff_handler:
            self.logoff_handler(src_mac, port_id)

    def port_down(self, port_id):
        """
        should be called by faucet when port has gone down.
        Args:
            port_id (str): id of port.
        """
        # all chewie needs to do is change its internal state.
        # faucet will remove the acls by itself.
        self.set_port_status(port_id, False)

        job = self.port_to_identity_job.get(port_id, None)

        if port_id in self.state_machines:
            del self.state_machines[port_id]

        if job:
            job.cancel()
        self.port_to_eapol_id.pop(port_id, None)

    def port_up(self, port_id):
        """
        should be called by faucet when port has come up
        Args:
            port_id (str): id of port.
        """
        self.logger.info("port %s up", port_id)
        self.set_port_status(port_id, True)

        self.port_to_identity_job[port_id] = self.timer_scheduler.call_later(
            self.DEFAULT_PORT_UP_IDENTITY_REQUEST_WAIT_PERIOD,
            self.send_preemptive_identity_request_if_no_active_on_port,
            port_id)

    def send_preemptive_identity_request_if_no_active_on_port(self, port_id):
        """
        If there is no active (in progress, or in state success(2)) supplicant send out the
        preemptive identity request message.
        Args:
            port_id (str):
        """
        self.logger.debug(
            "thinking about executing timer preemptive on port %s", port_id)
        # schedule next request.
        self.port_to_identity_job[port_id] = self.timer_scheduler.call_later(
            self.DEFAULT_PREEMPTIVE_IDENTITY_REQUEST_INTERVAL,
            self.send_preemptive_identity_request_if_no_active_on_port,
            port_id)
        if not self.port_status.get(port_id, False):
            self.logger.debug('cant send output on port %s is down', port_id)
            return

        state_machines = self.state_machines.get(port_id, {})

        # pylint: disable=invalid-name
        for sm in state_machines.values():
            if sm.is_in_progress() or sm.is_success():
                self.logger.debug('port is active not sending on port %s',
                                  port_id)
                break
        else:
            self.logger.debug("executing timer premptive on port %s", port_id)
            self.send_preemptive_identity_request(port_id)

    def send_preemptive_identity_request(self, port_id, state_machine=None):
        """
        Message (EAP Identity Request) that notifies supplicant that port is using 802.1X
        Args:
            port_id (str):

        """
        _id = get_random_id()
        # ID of preemptive reauth attempt must be different to ID of initial authentication.
        if state_machine is not None and hasattr(state_machine, 'current_id'):
            while _id == state_machine.current_id:
                _id = get_random_id()
        data = IdentityMessage(self.PAE_GROUP_ADDRESS, _id, Eap.REQUEST, "")
        self.port_to_eapol_id[port_id] = _id
        self.eap_output_messages.put_nowait(
            EapQueueMessage(data, self.PAE_GROUP_ADDRESS,
                            MacAddress.from_string(port_id)))
        self.logger.info("sending premptive on port %s with ID %s", port_id,
                         _id)

    def reauth_port(self, src_mac, port_id):
        """
        Send an Identity Request to src_mac, on port_id. prompting
        the supplicant to re authenticate.
        Args:
            src_mac (MacAddress):
            port_id (str):
        """
        state_machine = self.state_machines.get(port_id,
                                                {}).get(str(src_mac), None)

        if state_machine and state_machine.is_success():
            self.logger.info('reauthenticating src_mac: %s on port: %s',
                             src_mac, port_id)
            self.send_preemptive_identity_request(port_id, state_machine)
        elif state_machine is None:
            self.logger.debug(
                'not reauthing. state machine on port: %s, mac: %s is none',
                port_id, src_mac)
        else:
            self.logger.debug(
                "not reauthing, authentication is not in success(2) (state: %s)'",
                state_machine.state)

    def set_port_status(self, port_id, status):
        """
        Send status of a port at port_id
        Args:
            port_id ():
            status ():
        """
        port_id_str = str(port_id)

        self.port_status[port_id] = status

        if port_id_str not in self.state_machines:
            self.state_machines[port_id_str] = {}

        for _, state_machine in self.state_machines[port_id_str].items():
            event = EventPortStatusChange(status)
            state_machine.event(event)

    def setup_eap_socket(self):
        """Setup EAP socket"""
        log_prefix = "%s.EapSocket" % self.logger.name
        self.eap_socket = EapSocket(self.interface_name, log_prefix)
        self.eap_socket.setup()

    def setup_mab_socket(self):
        """Setup Mab socket"""
        log_prefix = "%s.MabSocket" % self.logger.name
        self.mab_socket = MabSocket(self.interface_name, log_prefix)
        self.mab_socket.setup()

    def setup_radius_socket(self):
        """Setup Radius socket"""
        log_prefix = "%s.RadiusSocket" % self.logger.name
        self.radius_socket = RadiusSocket(self.radius_listen_ip,
                                          self.radius_listen_port,
                                          self.radius_server_ip,
                                          self.radius_server_port, log_prefix)
        self.radius_socket.setup()
        self.logger.info("Radius Listening on %s:%d", self.radius_listen_ip,
                         self.radius_listen_port)

    def send_eap_messages(self):
        """Send EAP messages to Supplicant forever."""
        while self.running():
            sleep(0)
            eap_queue_message = self.eap_output_messages.get()
            self.logger.info("Sending message %s from %s to %s",
                             eap_queue_message.message,
                             str(eap_queue_message.port_mac),
                             str(eap_queue_message.src_mac))
            self.eap_socket.send(
                MessagePacker.ethernet_pack(eap_queue_message.message,
                                            eap_queue_message.port_mac,
                                            eap_queue_message.src_mac))

    def send_eth_to_state_machine(self, packed_message):
        """Send an ethernet frame to MAB State Machine"""
        ethernet_packet = EthernetPacket.parse(packed_message)
        port_id = ethernet_packet.dst_mac
        src_mac = ethernet_packet.src_mac

        self.logger.info("Sending MAC to MAB State Machine: %s", src_mac)
        message_id = -2
        state_machine = self.get_state_machine(src_mac, port_id, message_id)
        event = EventMessageReceived(ethernet_packet, port_id)
        state_machine.event(event)
        # NOTE: Should probably throttle packets in once one is received

    def receive_eap_messages(self):
        """receive eap messages from supplicant forever."""
        while self.running():
            sleep(0)
            self.logger.info("waiting for eap.")
            packed_message = self.eap_socket.receive()
            self.logger.info("Received packed_message: %s",
                             str(packed_message))
            try:
                eap, dst_mac = MessageParser.ethernet_parse(packed_message)
            except MessageParseError as exception:
                self.logger.warning(
                    "MessageParser.ethernet_parse threw exception.\n"
                    " packed_message: '%s'.\n"
                    " exception: '%s'.", packed_message, exception)
                continue

            self.logger.info("Received eap message: %s", str(eap))
            self.send_eap_to_state_machine(eap, dst_mac)

    def receive_mab_messages(self):
        """Receive DHCP request for MAB."""
        while self.running():
            sleep(0)
            self.logger.info("waiting for MAB activity.")
            packed_message = self.mab_socket.receive()
            self.logger.info(
                "Received DHCP packet for MAB. packed_message: %s",
                str(packed_message))
            self.send_eth_to_state_machine(packed_message)

    def send_eap_to_state_machine(self, eap, dst_mac):
        """sends an eap message to the state machine"""
        self.logger.info("eap EAP(): %s", eap)
        message_id = getattr(eap, 'message_id', -1)
        state_machine = self.get_state_machine(eap.src_mac, dst_mac,
                                               message_id)

        # Check for response to preemptive_eap
        preemptive_eap_message_id = self.port_to_eapol_id.get(str(dst_mac), -2)
        if message_id != -1 and message_id == preemptive_eap_message_id:
            self.logger.debug(
                'eap packet is response to chewie initiated authentication')
            event = EventPreemptiveEAPResponseMessageReceived(
                eap, dst_mac, preemptive_eap_message_id)
        else:
            event = EventMessageReceived(eap, dst_mac)

        state_machine.event(event)

    def send_radius_messages(self):
        """send RADIUS messages to RADIUS Server forever."""
        while self.running():
            sleep(0)
            radius_output_bits = self.radius_output_messages.get()
            packed_message = self.radius_lifecycle.process_outbound(
                radius_output_bits)
            self.radius_socket.send(packed_message)
            self.logger.info("sent radius message.")

    def receive_radius_messages(self):
        """receive RADIUS messages from RADIUS server forever."""
        while self.running():
            sleep(0)
            self.logger.info("waiting for radius.")
            packed_message = self.radius_socket.receive()
            try:
                radius = MessageParser.radius_parse(packed_message,
                                                    self.radius_secret,
                                                    self.radius_lifecycle)
            except MessageParseError as exception:
                self.logger.warning(
                    "MessageParser.radius_parse threw exception.\n"
                    " packed_message: '%s'.\n"
                    " exception: '%s'.", packed_message, exception)
                continue
            self.logger.info("Received RADIUS message: %s", str(radius))
            self.send_radius_to_state_machine(radius)

    def send_radius_to_state_machine(self, radius):
        """sends a radius message to the state machine"""
        event = self.radius_lifecycle.build_event_radius_message_received(
            radius)
        state_machine = self.get_state_machine_from_radius_packet_id(
            radius.packet_id)
        state_machine.event(event)

    def get_state_machine_from_radius_packet_id(self, packet_id):
        """Gets a FullEAPStateMachine from the RADIUS message packet_id
        Args:
            packet_id (int): id of the received RADIUS message
        Returns:
            FullEAPStateMachine
        """
        return self.get_state_machine(
            **self.radius_lifecycle.packet_id_to_mac[packet_id])

    # TODO change message_id functionality
    def get_state_machine(self, src_mac, port_id, message_id=-1):
        """Gets or creates if it does not already exist an FullEAPStateMachine for the src_mac.
        Args:
            message_id (int): eap message id, -1 means none found.
            src_mac (MacAddress): who's to get.
            port_id (MacAddress): ID of the port where the src_mac is.

        Returns:
            FullEAPStateMachine
        """
        port_id_str = str(port_id)
        src_mac_str = str(src_mac)
        port_state_machines = self.state_machines.get(port_id_str, None)
        if port_state_machines is None:
            self.state_machines[port_id_str] = {}

        self.logger.info("Port based state machines are as follows: %s",
                         self.state_machines[port_id_str])
        state_machine = self.state_machines[port_id_str].get(src_mac_str, None)

        if not state_machine and message_id == -2:
            # Do MAB
            self.logger.info("Creating MAB State Machine")
            log_prefix = "%s.SM - port: %s, client: %s" % (
                self.logger.name, port_id_str, src_mac)
            state_machine = MacAuthenticationBypassStateMachine(
                self.radius_output_messages, src_mac, self.timer_scheduler,
                self.auth_success, self.auth_failure, log_prefix)
            self.state_machines[port_id_str][src_mac_str] = state_machine
            return state_machine

        if not state_machine:
            self.logger.info("Creating EAP FULL State Machine")
            log_prefix = "%s.SM - port: %s, client: %s" % (
                self.logger.name, port_id_str, src_mac)
            state_machine = FullEAPStateMachine(self.eap_output_messages,
                                                self.radius_output_messages,
                                                src_mac, self.timer_scheduler,
                                                self.auth_success,
                                                self.auth_failure,
                                                self.auth_logoff, log_prefix)
            self.state_machines[port_id_str][src_mac_str] = state_machine
            self.logger.debug(
                "created new state machine for '%s' on port '%s'", src_mac_str,
                port_id_str)

        return state_machine
Ejemplo n.º 55
0
    def run(self, *args, **kwargs):
        try:
            self.logger.info('event agent: starting')

            pool = GreenPool(len(self.workers))

            for worker in self.workers:
                pool.spawn(worker.start)

            def front(server, backend):
                while True:
                    msg = server.recv_multipart()
                    if validate_msg(msg):
                        try:
                            event_id = sqlite3.Binary(msg[2])
                            data = msg[3]
                            self.queue.put(event_id, data)
                            event = ['', msg[2], msg[3]]
                            backend.send_multipart(event)
                        except Exception:
                            pass
                        finally:
                            ack = msg[0:3]
                            server.send_multipart(ack)

            def back(backend):
                while True:
                    msg = backend.recv_multipart()
                    event_id = msg[1]
                    success = msg[2]
                    event_id = sqlite3.Binary(event_id)
                    if not success:
                        self.queue.failed(event_id)
                    else:
                        self.queue.delete(event_id)

            boss_pool = GreenPool(2)
            boss_pool.spawn_n(front, self.server, self.backend)
            boss_pool.spawn_n(back, self.backend)
            while True:
                sleep(1)

                now = time.time()
                if now - self.last_retry > self.retry_interval:
                    self.retry()
                    self.last_retry = now

                for w in self.workers:
                    if w.failed:
                        self.workers.remove(w)
                        self.logger.warn('restart worker "%s"', w.name)
                        new_w = EventWorker(self.conf, w.name, self.context)
                        self.workers.append(new_w)
                        pool.spawn(new_w.start)

        except Exception as e:
            self.logger.error('ERROR in main loop %s', e)
            raise e
        finally:
            self.logger.warn('event agent: stopping')
            self.stop_workers()
Ejemplo n.º 56
0
    def reap_container(self, account, account_partition, account_nodes,
                       container):
        """
        Deletes the data and the container itself for the given container. This
        will call :func:`reap_object` up to sqrt(self.concurrency) times
        concurrently for the objects in the container.

        If there is any exception while deleting a single object, the process
        will continue for any other objects in the container and the failed
        objects will be tried again the next time this function is called with
        the same parameters.

        If there is any exception while listing the objects for deletion, the
        process will stop (but will obviously be tried again the next time this
        function is called with the same parameters). This is a possibility
        since the listing comes from querying just the primary remote container
        server.

        Once all objects have been attempted to be deleted, the container
        itself will be attempted to be deleted by sending a delete request to
        all container nodes. The format of the delete request is such that each
        container server will update a corresponding account server, removing
        the container from the account's listing.

        This function returns nothing and should raise no exception but only
        update various self.stats_* values for what occurs.

        :param account: The name of the account for the container.
        :param account_partition: The partition for the account on the account
                                  ring.
        :param account_nodes: The primary node dicts for the account.
        :param container: The name of the container to delete.

        * See also: :func:`chase.common.ring.Ring.get_nodes` for a description
          of the account node dicts.
        """
        account_nodes = list(account_nodes)
        part, nodes = self.get_container_ring().get_nodes(account, container)
        node = nodes[-1]
        pool = GreenPool(size=self.object_concurrency)
        marker = ''
        while True:
            objects = None
            try:
                objects = direct_get_container(node, part, account, container,
                        marker=marker, conn_timeout=self.conn_timeout,
                        response_timeout=self.node_timeout)[1]
                self.stats_return_codes[2] = \
                    self.stats_return_codes.get(2, 0) + 1
            except ClientException, err:
                if self.logger.getEffectiveLevel() <= DEBUG:
                    self.logger.exception(
                        _('Exception with %(ip)s:%(port)s/%(device)s'), node)
                self.stats_return_codes[err.http_status / 100] = \
                    self.stats_return_codes.get(err.http_status / 100, 0) + 1
            if not objects:
                break
            try:
                for obj in objects:
                    if isinstance(obj['name'], unicode):
                        obj['name'] = obj['name'].encode('utf8')
                    pool.spawn(self.reap_object, account, container, part,
                               nodes, obj['name'])
                pool.waitall()
            except (Exception, Timeout):
                self.logger.exception(_('Exception with objects for container '
                    '%(container)s for account %(account)s'),
                    {'container': container, 'account': account})
            marker = objects[-1]['name']