Beispiel #1
0
 def _deindex_chunk(self, chunk):
     rdir = RdirClient(self.conf, pool_manager=self.conscience.pool_manager)
     url = chunk['url']
     volume_id = url.split('/', 3)[2]
     chunk_id = url.split('/', 3)[3]
     rdir.chunk_delete(volume_id, self.container_id,
                       self.content_id, chunk_id)
Beispiel #2
0
class ChunkOperator(object):
    """
    Execute maintenance operations on chunks.
    """

    def __init__(self, conf, logger=None):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.rdir_client = RdirClient(conf, logger=self.logger)
        self.content_factory = ContentFactory(conf, logger=self.logger)

    def rebuild(self, container_id, content_id, chunk_id_or_pos,
                rawx_id=None, try_chunk_delete=False, allow_same_rawx=True):
        """
        Try to find the chunk in the metadata of the specified object,
        then rebuild it.
        """
        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise OrphanChunk('Content not found: possible orphan chunk')

        chunk_size = 0
        chunk_pos = None
        if len(chunk_id_or_pos) < 32:
            chunk_pos = chunk_id_or_pos
            chunk_id = None
            metapos = int(chunk_pos.split('.', 1)[0])
            chunk_size = content.chunks.filter(metapos=metapos).all()[0].size
        else:
            if '/' in chunk_id_or_pos:
                chunk_id = chunk_id_or_pos.rsplit('/', 1)[-1]
            else:
                chunk_id = chunk_id_or_pos

            chunk = content.chunks.filter(id=chunk_id).one()
            if chunk is None:
                raise OrphanChunk(
                    'Chunk not found in content: possible orphan chunk')
            elif rawx_id and chunk.host != rawx_id:
                raise ValueError('Chunk does not belong to this rawx')
            chunk_size = chunk.size

        content.rebuild_chunk(
            chunk_id, allow_same_rawx=allow_same_rawx,
            chunk_pos=chunk_pos)

        if try_chunk_delete:
            try:
                content.blob_client.chunk_delete(chunk.url)
                self.logger.info("Chunk %s deleted", chunk.url)
            except NotFound as exc:
                self.logger.debug("Chunk %s: %s", chunk.url, exc)

        # This call does not raise exception if chunk is not referenced
        if chunk_id is not None:
            self.rdir_client.chunk_delete(
                chunk.host, container_id, content_id, chunk_id)

        return chunk_size
Beispiel #3
0
 def __init__(self,
              conf,
              logger,
              volume,
              try_chunk_delete=False,
              beanstalkd_addr=None,
              **kwargs):
     super(BlobRebuilder, self).__init__(conf, logger, volume, **kwargs)
     # rdir
     self.rdir_client = RdirClient(conf, logger=self.logger)
     self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100)
     # rawx
     self.try_chunk_delete = try_chunk_delete
     # beanstalk
     if beanstalkd_addr:
         self.beanstalkd_listener = BeanstalkdListener(
             beanstalkd_addr,
             conf.get('beanstalkd_tube', DEFAULT_REBUILDER_TUBE),
             self.logger, **kwargs)
     else:
         self.beanstalkd_listener = None
     # counters
     self.bytes_processed = 0
     self.total_bytes_processed = 0
     self.total_expected_chunks = None
     # distributed
     self.distributed = False
Beispiel #4
0
 def __init__(self, conf, **kwargs):
     super(BlobIndexer, self).__init__(conf)
     self.logger = get_logger(conf)
     volume = conf.get('volume')
     if not volume:
         raise exc.ConfigurationException('No volume specified for indexer')
     self.volume = volume
     self.passes = 0
     self.errors = 0
     self.successes = 0
     self.last_reported = 0
     self.total_since_last_reported = 0
     self.chunks_run_time = 0
     self.interval = int_value(
         conf.get('interval'), 300)
     self.report_interval = int_value(
         conf.get('report_interval'), 3600)
     self.max_chunks_per_second = int_value(
         conf.get('chunks_per_second'), 30)
     pm = get_pool_manager(pool_connections=10)
     self.index_client = RdirClient(conf, logger=self.logger,
                                    pool_manager=pm)
     self.namespace, self.volume_id = check_volume(self.volume)
     self.convert_chunks = true_value(conf.get('convert_chunks'))
     if self.convert_chunks:
         converter_conf = self.conf.copy()
         converter_conf['no_backup'] = True
         self.converter = BlobConverter(converter_conf, logger=self.logger,
                                        pool_manager=pm)
     else:
         self.converter = None
    def setUp(self):
        super(TestIndexerCrawler, self).setUp()

        self.namespace = self.conf['namespace']

        self.gridconf = {"namespace": self.namespace}
        self.rdir_client = RdirClient(self.gridconf)
Beispiel #6
0
 def __init__(self,
              conf,
              logger,
              volume,
              input_file=None,
              try_chunk_delete=False,
              beanstalkd_addr=None):
     self.conf = conf
     self.logger = logger or get_logger(conf)
     self.volume = volume
     self.run_time = 0
     self.passes = 0
     self.errors = 0
     self.last_reported = 0
     self.chunks_run_time = 0
     self.bytes_running_time = 0
     self.bytes_processed = 0
     self.total_bytes_processed = 0
     self.total_chunks_processed = 0
     self.dry_run = true_value(conf.get('dry_run', False))
     self.report_interval = int_value(conf.get('report_interval'), 3600)
     self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                            30)
     self.max_bytes_per_second = int_value(conf.get('bytes_per_second'),
                                           10000000)
     self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100)
     self.allow_same_rawx = true_value(conf.get('allow_same_rawx'))
     self.input_file = input_file
     self.rdir_client = RdirClient(conf, logger=self.logger)
     self.content_factory = ContentFactory(conf)
     self.try_chunk_delete = try_chunk_delete
     self.beanstalkd_addr = beanstalkd_addr
     self.beanstalkd_tube = conf.get('beanstalkd_tube', 'rebuild')
     self.beanstalk = None
Beispiel #7
0
class Harasser(object):
    def __init__(self, ns, max_containers=256, max_contents=256):
        conf = {'namespace': ns}
        self.cs = ConscienceClient(conf)
        self.rdir = RdirClient(conf)
        self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')]
        self.sent = set()
        self.max_containers = max_containers
        self.max_contents = max_contents

    def harass_put(self, loops=None):
        if loops is None:
            loops = random.randint(1000, 2000)
        print "Pushing %d fake chunks" % loops
        loop = loops
        count_start_container = random.randrange(2**20)
        count_start_content = random.randrange(2**20)
        start = time.time()
        nb_rawx = len(self.rawx_list)
        while loop > 0:
            args = {'mtime': int(start)}
            # vol_id = random.choice(self.rawx_list)
            # container_id = "%064X" % (random.randrange(self.max_containers))
            # content_id = "%032X" % (random.randrange(self.max_contents))
            vol_id = self.rawx_list[loop % nb_rawx]
            container_id = "%064X" % (loop + count_start_container)
            content_id = "%032X" % (loop + count_start_content)
            chunk_id = "http://%s/%064X" \
                % (vol_id, random.randrange(2**128))
            self.rdir.chunk_push(
                vol_id, container_id, content_id, chunk_id, **args)
            self.sent.add((vol_id, container_id, content_id, chunk_id))
            loop -= 1
        end = time.time()
        print "%d pushed in %.3fs, %d req/s" \
            % (loops, end-start, loops/(end-start))

    def harass_del(self, min_loops=0):
        min_loops = min(min_loops, len(self.sent))
        loops = random.randint(min_loops, len(self.sent))
        print "Removing %d fake chunks" % loops
        loop = loops
        start = time.time()
        while loop > 0:
            args = self.sent.pop()
            self.rdir.chunk_delete(*args)
            loop -= 1
        end = time.time()
        print "%d removed in %.3fs, %d req/s" \
            % (loops, end-start, loops/(end-start))

    def __call__(self):
        try:
            while True:
                self.harass_put()
                self.harass_del()
        except KeyboardInterrupt:
            print "Cleaning..."
            self.harass_del(len(self.sent))
Beispiel #8
0
class TestRdirClient(BaseTestCase):
    def setUp(self):
        super(TestRdirClient, self).setUp()
        self.namespace = self.conf['namespace']
        self.rdir_client = RdirClient({'namespace': self.namespace})
        self.rdir_client._get_rdir_addr = Mock(return_value="0.1.2.3:4567")

    def tearDown(self):
        super(TestRdirClient, self).tearDown()
        del self.rdir_client

    def test_fetch_one_req_post(self):
        self.rdir_client._direct_request = Mock(
            side_effect=[
                (
                    Mock(),
                    [
                        ["container1|content1|chunk1", {'mtime': 10}],
                        ["container2|content2|chunk2", {'mtime': 20}]
                    ]
                )
            ])
        gen = self.rdir_client.chunk_fetch("volume", limit=2)
        self.assertEqual(gen.next(),
                         ("container1", "content1", "chunk1", {'mtime': 10}))
        self.assertEqual(gen.next(),
                         ("container2", "content2", "chunk2", {'mtime': 20}))
        self.assertRaises(StopIteration, gen.next)
        self.assertEqual(self.rdir_client._direct_request.call_count, 2)

    def test_fetch_multi_req(self):
        self.rdir_client._direct_request = Mock(
            side_effect=[
                (
                    Mock(),
                    [
                        ["container1|content1|chunk1", {'mtime': 10}],
                        ["container2|content2|chunk2", {'mtime': 20}]
                    ]
                ),
                (
                    Mock(),
                    [
                        ["container3|content3|chunk3", {'mtime': 30}]
                    ]
                )
            ])
        gen = self.rdir_client.chunk_fetch("volume", limit=2)
        # print(gen.next())
        # print(gen.next())
        # print(gen.next())
        self.assertEqual(gen.next(),
                         ("container1", "content1", "chunk1", {'mtime': 10}))
        self.assertEqual(gen.next(),
                         ("container2", "content2", "chunk2", {'mtime': 20}))
        self.assertEqual(gen.next(),
                         ("container3", "content3", "chunk3", {'mtime': 30}))
        self.assertRaises(StopIteration, gen.next)
        self.assertEqual(self.rdir_client._direct_request.call_count, 3)
Beispiel #9
0
    def __init__(self,
                 namespace,
                 concurrency=50,
                 error_file=None,
                 rebuild_file=None,
                 check_xattr=True,
                 limit_listings=0,
                 request_attempts=1,
                 logger=None,
                 verbose=False,
                 check_hash=False,
                 **_kwargs):
        self.pool = GreenPool(concurrency)
        self.error_file = error_file
        self.check_xattr = bool(check_xattr)
        self.check_hash = bool(check_hash)
        self.logger = logger or get_logger(
            {'namespace': namespace}, name='integrity', verbose=verbose)
        # Optimisation for when we are only checking one object
        # or one container.
        # 0 -> do not limit
        # 1 -> limit account listings (list of containers)
        # 2 -> limit container listings (list of objects)
        self.limit_listings = limit_listings
        if self.error_file:
            outfile = open(self.error_file, 'a')
            self.error_writer = csv.writer(outfile, delimiter=' ')

        self.rebuild_file = rebuild_file
        if self.rebuild_file:
            self.fd = open(self.rebuild_file, 'a')
            self.rebuild_writer = csv.writer(self.fd, delimiter='|')

        self.api = ObjectStorageApi(namespace,
                                    logger=self.logger,
                                    max_retries=request_attempts - 1,
                                    request_attempts=request_attempts)
        self.rdir_client = RdirClient({"namespace": namespace},
                                      logger=self.logger)

        self.accounts_checked = 0
        self.containers_checked = 0
        self.objects_checked = 0
        self.chunks_checked = 0
        self.account_not_found = 0
        self.container_not_found = 0
        self.object_not_found = 0
        self.chunk_not_found = 0
        self.account_exceptions = 0
        self.container_exceptions = 0
        self.object_exceptions = 0
        self.chunk_exceptions = 0

        self.list_cache = {}
        self.running = {}
        self.running_lock = Semaphore(1)
        self.result_queue = Queue(concurrency)

        self.run_time = 0
Beispiel #10
0
 def setUp(self):
     super(TestMeta2Indexing, self).setUp()
     self.rdir_client = RdirClient(self.conf)
     self.directory_client = DirectoryClient(self.conf)
     self.container_client = ContainerClient(self.conf)
     self.containers = [random_str(14) for _ in range(0, randint(1, 10))]
     self.containers_svcs = {}
     self.event_agent_name = 'event-agent-1'
Beispiel #11
0
 def setUp(self):
     super(TestRdirMeta2Client, self).setUp()
     self.namespace = "dummy"
     self.volid = "e29b4c56-8522-4118-82ea"
     self.container_url = "OPENIO/testing/test1"
     self.container_id = "random833999id"
     self.mtime = 2874884.47
     self.rdir_client = RdirClient({'namespace': self.namespace},
                                   endpoint='127.0.0.0:6000')
    def test_rebuild_chunk(self):
        # push a new content
        content = TestContent(self.conf, self.account,
                              self.container_name, "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        rebuilder.chunk_rebuild(content.container_id, content.content_id,
                                content.chunks[0].id)

        # check meta2 information
        _, res = self.container_client.content_show(acct=content.account,
                                                    ref=content.container_name,
                                                    content=content.content_id)

        new_chunk_info = None
        for c in res:
            if (c['url'] != content.chunks[0].url and
                    c['url'] != content.chunks[1].url):
                new_chunk_info = c

        new_chunk_id = new_chunk_info['url'].split('/')[-1]

        self.assertEqual(new_chunk_info['hash'], content.chunks[0].hash)
        self.assertEqual(new_chunk_info['pos'], content.chunks[0].pos)
        self.assertEqual(new_chunk_info['size'], content.chunks[0].size)

        # check chunk information
        meta, stream = self.blob_client.chunk_get(new_chunk_info['url'])

        self.assertEqual(meta['content_size'], str(content.chunks[0].size))
        self.assertEqual(meta['content_path'], content.content_name)
        self.assertEqual(meta['content_cid'], content.container_id)
        self.assertEqual(meta['content_id'], content.content_id)
        self.assertEqual(meta['chunk_id'], new_chunk_id)
        self.assertEqual(meta['chunk_pos'], content.chunks[0].pos)
        self.assertEqual(meta['content_version'], content.version)
        self.assertEqual(meta['chunk_hash'], content.chunks[0].hash)

        self.assertEqual(stream.next(), content.chunks[0].data)

        # check rtime flag in rdir
        rdir_client = RdirClient(self.gridconf)
        res = rdir_client.chunk_fetch(self.conf['rawx'][0]['addr'])
        key = (content.container_id, content.content_id, content.chunks[0].id)
        for i_container, i_content, i_chunk, i_value in res:
            if (i_container, i_content, i_chunk) == key:
                check_value = i_value

        self.assertIsNotNone(check_value.get('rtime'))
    def test_rebuild_chunk(self):
        # push a new content
        content = TestContent(self.conf, self.account, self.container_name,
                              "mycontent", "TWOCOPIES")
        data = "azerty"
        content.add_chunk(data, pos='0', rawx=0)
        content.add_chunk(data, pos='0', rawx=1)

        self._push_content(content)

        # rebuild the first rawx
        rebuilder = BlobRebuilderWorker(self.gridconf, None,
                                        self.conf['rawx'][0]['addr'])

        rebuilder.chunk_rebuild(content.container_id, content.content_id,
                                content.chunks[0].id)

        # check meta2 information
        _, res = self.container_client.content_show(acct=content.account,
                                                    ref=content.container_name,
                                                    content=content.content_id)

        new_chunk_info = None
        for c in res:
            if (c['url'] != content.chunks[0].url
                    and c['url'] != content.chunks[1].url):
                new_chunk_info = c

        new_chunk_id = new_chunk_info['url'].split('/')[-1]

        self.assertEqual(new_chunk_info['hash'], content.chunks[0].hash)
        self.assertEqual(new_chunk_info['pos'], content.chunks[0].pos)
        self.assertEqual(new_chunk_info['size'], content.chunks[0].size)

        # check chunk information
        meta, stream = self.blob_client.chunk_get(new_chunk_info['url'])

        self.assertEqual(meta['content_size'], str(content.chunks[0].size))
        self.assertEqual(meta['content_path'], content.content_name)
        self.assertEqual(meta['content_cid'], content.container_id)
        self.assertEqual(meta['content_id'], content.content_id)
        self.assertEqual(meta['chunk_id'], new_chunk_id)
        self.assertEqual(meta['chunk_pos'], content.chunks[0].pos)
        self.assertEqual(meta['content_version'], content.version)
        self.assertEqual(meta['chunk_hash'], content.chunks[0].hash)

        self.assertEqual(stream.next(), content.chunks[0].data)

        # check rtime flag in rdir
        rdir_client = RdirClient(self.gridconf)
        res = rdir_client.chunk_fetch(self.conf['rawx'][0]['addr'])
        key = (content.container_id, content.content_id, content.chunks[0].id)
        for i_container, i_content, i_chunk, i_value in res:
            if (i_container, i_content, i_chunk) == key:
                check_value = i_value

        self.assertIsNotNone(check_value.get('rtime'))
Beispiel #14
0
class TestRdirClient(BaseTestCase):
    def setUp(self):
        super(TestRdirClient, self).setUp()
        self.namespace = self.conf["namespace"]
        self.rdir_client = RdirClient({"namespace": self.namespace})
        self.rdir_client._get_rdir_addr = Mock(return_value="0.1.2.3:4567")
        self.container_id_1 = random_id(64)
        self.container_id_2 = random_id(64)
        self.container_id_3 = random_id(64)
        self.content_id_1 = random_id(32)
        self.content_id_2 = random_id(32)
        self.content_id_3 = random_id(32)
        self.chunk_id_1 = random_id(64)
        self.chunk_id_2 = random_id(64)
        self.chunk_id_3 = random_id(64)

    def tearDown(self):
        super(TestRdirClient, self).tearDown()
        del self.rdir_client

    def test_fetch_one_req_post(self):
        self.rdir_client._direct_request = Mock(
            side_effect=[
                (
                    Mock(),
                    [
                        ["%s|%s|%s" % (self.container_id_1, self.content_id_1, self.chunk_id_1), {"mtime": 10}],
                        ["%s|%s|%s" % (self.container_id_2, self.content_id_2, self.chunk_id_2), {"mtime": 20}],
                    ],
                )
            ]
        )
        gen = self.rdir_client.chunk_fetch("volume", limit=2)
        self.assertEqual(gen.next(), (self.container_id_1, self.content_id_1, self.chunk_id_1, {"mtime": 10}))
        self.assertEqual(gen.next(), (self.container_id_2, self.content_id_2, self.chunk_id_2, {"mtime": 20}))
        self.assertRaises(StopIteration, gen.next)
        self.assertEqual(self.rdir_client._direct_request.call_count, 2)

    def test_fetch_multi_req(self):
        self.rdir_client._direct_request = Mock(
            side_effect=[
                (
                    Mock(),
                    [
                        ["%s|%s|%s" % (self.container_id_1, self.content_id_1, self.chunk_id_1), {"mtime": 10}],
                        ["%s|%s|%s" % (self.container_id_2, self.content_id_2, self.chunk_id_2), {"mtime": 20}],
                    ],
                ),
                (Mock(), [["%s|%s|%s" % (self.container_id_3, self.content_id_3, self.chunk_id_3), {"mtime": 30}]]),
            ]
        )
        gen = self.rdir_client.chunk_fetch("volume", limit=2)
        self.assertEqual(gen.next(), (self.container_id_1, self.content_id_1, self.chunk_id_1, {"mtime": 10}))
        self.assertEqual(gen.next(), (self.container_id_2, self.content_id_2, self.chunk_id_2, {"mtime": 20}))
        self.assertEqual(gen.next(), (self.container_id_3, self.content_id_3, self.chunk_id_3, {"mtime": 30}))
        self.assertRaises(StopIteration, gen.next)
        self.assertEqual(self.rdir_client._direct_request.call_count, 3)
Beispiel #15
0
    def setUp(self):
        super(TestIndexerCrawler, self).setUp()

        self.namespace = self.conf['namespace']

        self.rawx_conf = self.conf['services']['rawx'][0]
        self.conf = {"namespace": self.namespace,
                     "volume": self.rawx_conf['path']}
        self.rdir_client = RdirClient(self.conf)
Beispiel #16
0
    def setUp(self):
        super(TestRdirClient, self).setUp()
        self.rawx_conf = random.choice(self.conf['services']['rawx'])
        self.rawx_id = self.rawx_conf.get('service_id', self.rawx_conf['addr'])
        self.rdir = RdirClient(self.conf)
        self.rdir.admin_clear(self.rawx_id, clear_all=True)

        self._push_chunks()
        self.rdir._direct_request = Mock(side_effect=self.rdir._direct_request)
Beispiel #17
0
 def __init__(self, conf, logger, volume, try_chunk_delete=False, **kwargs):
     super(BlobRebuilderWorker, self).__init__(conf, logger, **kwargs)
     self.volume = volume
     self.bytes_processed = 0
     self.total_bytes_processed = 0
     self.dry_run = true_value(conf.get('dry_run', False))
     self.allow_same_rawx = true_value(conf.get('allow_same_rawx'))
     self.rdir_client = RdirClient(conf, logger=self.logger)
     self.content_factory = ContentFactory(conf, logger=self.logger)
     self.try_chunk_delete = try_chunk_delete
Beispiel #18
0
    def __init__(self, conf, input_file=None, service_id=None, **kwargs):
        super(Meta2Rebuilder, self).__init__(conf, **kwargs)

        # input
        self.input_file = input_file
        self.meta2_id = service_id

        # rawx/rdir
        self.rdir_client = RdirClient(self.conf, logger=self.logger)
        self.rdir_fetch_limit = int_value(self.conf.get('rdir_fetch_limit'),
                                          self.DEFAULT_RDIR_FETCH_LIMIT)
Beispiel #19
0
class TestRdirClient(BaseTestCase):
    def setUp(self):
        super(TestRdirClient, self).setUp()
        self.namespace = self.conf['namespace']
        self.rdir_client = RdirClient({'namespace': self.namespace})
        self.rdir_client._get_rdir_addr = Mock(return_value="0.1.2.3:4567")

    def tearDown(self):
        super(TestRdirClient, self).tearDown()
        del self.rdir_client

    def test_fetch_one_req_post(self):
        self.rdir_client._direct_request = Mock(
            side_effect=[(Mock(),
                          [["container1|content1|chunk1", {
                              'mtime': 10
                          }], ["container2|content2|chunk2", {
                              'mtime': 20
                          }]])])
        gen = self.rdir_client.chunk_fetch("volume", limit=2)
        self.assertEqual(gen.next(), ("container1", "content1", "chunk1", {
            'mtime': 10
        }))
        self.assertEqual(gen.next(), ("container2", "content2", "chunk2", {
            'mtime': 20
        }))
        self.assertRaises(StopIteration, gen.next)
        self.assertEqual(self.rdir_client._direct_request.call_count, 2)

    def test_fetch_multi_req(self):
        self.rdir_client._direct_request = Mock(side_effect=[(
            Mock(), [["container1|content1|chunk1", {
                'mtime': 10
            }], ["container2|content2|chunk2", {
                'mtime': 20
            }]]), (Mock(), [["container3|content3|chunk3", {
                'mtime': 30
            }]])])
        gen = self.rdir_client.chunk_fetch("volume", limit=2)
        # print(gen.next())
        # print(gen.next())
        # print(gen.next())
        self.assertEqual(gen.next(), ("container1", "content1", "chunk1", {
            'mtime': 10
        }))
        self.assertEqual(gen.next(), ("container2", "content2", "chunk2", {
            'mtime': 20
        }))
        self.assertEqual(gen.next(), ("container3", "content3", "chunk3", {
            'mtime': 30
        }))
        self.assertRaises(StopIteration, gen.next)
        self.assertEqual(self.rdir_client._direct_request.call_count, 3)
Beispiel #20
0
    def tier_content(self, config, stats, control):
        def _set(lock_, field, value):
            lock_.acquire()
            field.value = value
            lock_.release()

        def _add(lock_, field, value):
            lock_.acquire()
            field.value += value
            lock_.release()
        lock = control.get('lock')
        try:
            src = config.get('src')
            del config['src']
            self.client.lock_score(dict(type="rawx", addr=src))
            api = ObjectStorageApi(config["namespace"])
            rdir_client = RdirClient({'namespace': config["namespace"]})

            self.log.info("Starting tierer on %s with policy %s" % (src, config["policy"]))

            policies = dict()
            for part in config["policy"].split(','):
                policies[part.split(':')[0]] = part.split(':')[1]
                self.log.info("Parsed policy: " + part.split(':')[0] + " " + part.split(':')[1])

            for marker in config["markers"]:
                req = dict(
                    start_after=marker,
                    limit=1000,
                )
                _, resp_body = rdir_client._rdir_request(src, 'POST', 'fetch', json=req)
                for (key, value) in resp_body:
                    _, _, chunk = key.split('|')
                    res = requests.head("http://" + src + "/" + chunk)
                    policy = res.headers.get("x-oio-chunk-meta-content-storage-policy", "")
                    if policy not in policies.keys():
                        _add(lock, stats.get("skip"), 1)
                        continue
                    path = res.headers.get("x-oio-chunk-meta-full-path", "///")
                    path_parts = path.split('/')
                    if len(path_parts) < 3:
                        _add(lock, stats.get("skip"), 1)
                        continue
                    try:
                        api.object_change_policy(unquote(path_parts[0]), unquote(path_parts[1]), unquote(path_parts[2]), policies[policy])
                        _add(lock, stats.get("success"), 1)
                    except Exception as e:
                        self.log.info("Operation failed %s: %s (%s)" % (path, format_exc(e), policies[policy]))
                        _add(lock, stats.get("fail"), 1)
        except Exception as e:
            self.log.error("Tierer failed with %s" % format_exc(e))
        _set(lock, control.get('status'), 2)
        _set(lock, control.get('end'), int(time.time()))
Beispiel #21
0
 def __init__(self, ns, max_containers=256, max_contents=256):
     conf = {'namespace': ns}
     self.cs = ConscienceClient(conf)
     self.rdir = RdirClient(conf)
     self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')]
     self.sent = set()
     self.max_containers = max_containers
     self.max_contents = max_contents
     self.pushed_count = 0
     self.pushed_time = 0
     self.removed_count = 0
     self.removed_time = 0
Beispiel #22
0
 def setUp(self):
     super(TestRdirClient, self).setUp()
     self.namespace = self.conf['namespace']
     self.rdir_client = RdirClient({'namespace': self.namespace})
     self.rdir_client._get_rdir_addr = Mock(return_value="0.1.2.3:4567")
     self.container_id_1 = random_id(64)
     self.container_id_2 = random_id(64)
     self.container_id_3 = random_id(64)
     self.content_id_1 = random_id(32)
     self.content_id_2 = random_id(32)
     self.content_id_3 = random_id(32)
     self.chunk_id_1 = random_id(64)
     self.chunk_id_2 = random_id(64)
     self.chunk_id_3 = random_id(64)
Beispiel #23
0
    def __init__(self, volume_path, conf, pool_manager=None):
        """
        Initializes an Indexing worker for indexing meta2 databases.

        Possible values of conf relating to this worker are:
        - interval: (int) in sec time between two full scans. Default: half an
                    hour.
        - report_interval: (int) in sec, time between two reports: Default: 300
        - scanned_per_second: (int) maximum number of indexed databases /s.
        - try_removing_faulty_indexes : In the event where we encounter a
            database that's not supposed to be handled by this volume, attempt
            to remove it from this volume rdir index if it exists
            WARNING: The decision is based off of a proxy response, that could
            be affected by cache inconsistencies for example, use at your own
            risk. Default: False

        :param volume_path: The volume path to be indexed
        :param conf: The configuration to be passed to the needed services
        :param pool_manager: A connection pool manager. If none is given, a
                new one with a default size of 10 will be created.
        """
        self.logger = get_logger(conf)
        self._stop = False
        self.volume = volume_path
        self.success_nb = 0
        self.failed_nb = 0
        self.full_scan_nb = 0
        self.last_report_time = 0
        self.last_scan_time = 0
        self.last_index_time = 0
        self.start_time = 0
        self.indexed_since_last_report = 0
        self.scans_interval = int_value(
            conf.get('interval'), 1800)
        self.report_interval = int_value(
            conf.get('report_interval'), 300)
        self.max_indexed_per_second = int_value(
            conf.get('scanned_per_second'), 3000)
        self.namespace, self.volume_id = check_volume_for_service_type(
            self.volume, "meta2")
        self.attempt_bad_index_removal = boolean_value(
            conf.get('try_removing_faulty_indexes', False)
        )

        if not pool_manager:
            pool_manager = get_pool_manager(pool_connections=10)
        self.index_client = RdirClient(conf, logger=self.logger,
                                       pool_manager=pool_manager)
        self.dir_client = DirectoryClient(conf, logger=self.logger,
                                          pool_manager=pool_manager)
Beispiel #24
0
 def init(self):
     eventlet.monkey_patch(os=False)
     self.session = requests.Session()
     self.cs = ConscienceClient(self.conf)
     self.rdir = RdirClient(self.conf)
     self._acct_addr = None
     self.acct_update = 0
     self.graceful_timeout = 1
     self.acct_refresh_interval = int_value(
         self.conf.get('acct_refresh_interval'), 60)
     self.concurrency = int_value(self.conf.get('concurrency'), 1000)
     self.acct_update = true_value(self.conf.get('acct_update', True))
     self.rdir_update = true_value(self.conf.get('rdir_update', True))
     super(EventWorker, self).init()
Beispiel #25
0
 def test_rdir_repartition(self):
     client = RdirClient({'namespace': self.ns})
     all_rawx = client.assign_all_rawx()
     by_rdir = dict()
     total = 0
     for rawx in all_rawx:
         count = by_rdir.get(rawx['rdir']['addr'], 0)
         total += 1
         by_rdir[rawx['rdir']['addr']] = count + 1
     avg = total / float(len(by_rdir))
     print "Ideal number of bases per rdir: ", avg
     print "Current repartition: ", by_rdir
     for count in by_rdir.itervalues():
         self.assertLessEqual(count, avg + 1)
Beispiel #26
0
    def __init__(self, conf, input_file=None, service_id=None, **kwargs):
        super(BlobRebuilder, self).__init__(conf, **kwargs)

        # counters
        self.bytes_processed = 0
        self.total_bytes_processed = 0

        # input
        self.input_file = input_file
        self.rawx_id = service_id

        # rawx/rdir
        self.rdir_client = RdirClient(self.conf, logger=self.logger)
        self.rdir_fetch_limit = int_value(self.conf.get('rdir_fetch_limit'),
                                          self.DEFAULT_RDIR_FETCH_LIMIT)
Beispiel #27
0
 def setUp(self):
     super(TestRdirClient, self).setUp()
     self.namespace = "dummy"
     self.rdir_client = RdirClient({'namespace': self.namespace},
                                   endpoint='127.0.0.0:6000')
     self.rdir_client._get_rdir_addr = Mock(return_value="0.1.2.3:4567")
     self.container_id_1 = random_id(64)
     self.container_id_2 = random_id(64)
     self.container_id_3 = random_id(64)
     self.content_id_1 = random_id(32)
     self.content_id_2 = random_id(32)
     self.content_id_3 = random_id(32)
     self.chunk_id_1 = random_id(64)
     self.chunk_id_2 = random_id(64)
     self.chunk_id_3 = random_id(64)
Beispiel #28
0
 def __init__(self, conf, logger, volume):
     self.conf = conf
     self.logger = logger
     self.volume = volume
     self.passes = 0
     self.errors = 0
     self.last_reported = 0
     self.chunks_run_time = 0
     self.total_chunks_processed = 0
     self.report_interval = int_value(
         conf.get('report_interval'), 3600)
     self.max_chunks_per_second = int_value(
         conf.get('chunks_per_second'), 30)
     self.index_client = RdirClient(conf)
     self.namespace, self.volume_id = check_volume(self.volume)
Beispiel #29
0
    def init(self):
        self.concurrency = int_value(self.conf.get('concurrency'), 10)
        self.tube = self.conf.get("tube", DEFAULT_TUBE)
        acct_refresh_interval = int_value(
            self.conf.get('acct_refresh_interval'), 3600)
        self.app_env['account_client'] = AccountClient(
            self.conf,
            logger=self.logger,
            refresh_delay=acct_refresh_interval,
            pool_connections=3,  # 1 account, 1 proxy, 1 extra
        )
        self.app_env['rdir_client'] = RdirClient(
            self.conf,
            logger=self.logger,
            pool_maxsize=self.concurrency,  # 1 cnx per greenthread per host
        )

        if 'handlers_conf' not in self.conf:
            raise ValueError("'handlers_conf' path not defined in conf")
        self.handlers = loadhandlers(self.conf.get('handlers_conf'),
                                     global_conf=self.conf,
                                     app=self)

        for opt in ('acct_update', 'rdir_update', 'retries_per_second',
                    'batch_size'):
            if opt in self.conf:
                self.logger.warn('Deprecated option: %s', opt)

        super(EventWorker, self).init()
Beispiel #30
0
 def __init__(self, conf, logger, volume):
     self.conf = conf
     self.logger = logger or get_logger(conf)
     self.volume = volume
     self.run_time = 0
     self.passes = 0
     self.errors = 0
     self.last_reported = 0
     self.chunks_run_time = 0
     self.bytes_running_time = 0
     self.bytes_processed = 0
     self.total_bytes_processed = 0
     self.total_chunks_processed = 0
     self.dry_run = true_value(
         conf.get('dry_run', False))
     self.report_interval = int_value(
         conf.get('report_interval'), 3600)
     self.max_chunks_per_second = int_value(
         conf.get('chunks_per_second'), 30)
     self.max_bytes_per_second = int_value(
         conf.get('bytes_per_second'), 10000000)
     self.rdir_fetch_limit = int_value(
         conf.get('rdir_fetch_limit'), 100)
     self.rdir_client = RdirClient(conf)
     self.content_factory = ContentFactory(conf)
    def setUp(self):
        super(TestIndexerCrawler, self).setUp()

        self.namespace = self.conf['namespace']

        self.gridconf = {"namespace": self.namespace}
        self.rdir_client = RdirClient(self.gridconf)
Beispiel #32
0
 def __init__(self,
              conf,
              logger,
              volume,
              try_chunk_delete=False,
              beanstalkd_addr=None,
              **kwargs):
     super(BlobRebuilder, self).__init__(conf, logger, **kwargs)
     self.volume = volume
     self.rdir_client = RdirClient(conf, logger=self.logger)
     self.try_chunk_delete = try_chunk_delete
     self.beanstalkd_addr = beanstalkd_addr
     self.beanstalkd_tube = conf.get('beanstalkd_tube',
                                     DEFAULT_REBUILDER_TUBE)
     self.beanstalk = None
     self.rdir_fetch_limit = int_value(conf.get('rdir_fetch_limit'), 100)
Beispiel #33
0
 def __init__(self, conf, name, context, **kwargs):
     self.conf = conf
     self.name = name
     verbose = kwargs.pop('verbose', False)
     self.logger = get_logger(self.conf, verbose=verbose)
     self.init_zmq(context)
     self.cs = ConscienceClient(self.conf)
     self.rdir = RdirClient(self.conf)
     self._acct_addr = None
     self.acct_update = 0
     self.acct_refresh_interval = int_value(
         conf.get('acct_refresh_interval'), 60)
     self.acct_update = true_value(conf.get('acct_update', True))
     self.rdir_update = true_value(conf.get('rdir_update', True))
     self.session = requests.Session()
     self.failed = False
Beispiel #34
0
 def __init__(self, ns, max_containers=256, max_contents=256):
     conf = {'namespace': ns}
     self.cs = ConscienceClient(conf)
     self.rdir = RdirClient(conf)
     self.rawx_list = [x['addr'] for x in self.cs.all_services('rawx')]
     self.sent = set()
     self.max_containers = max_containers
     self.max_contents = max_contents
class Meta2DecommissionJob(XcuteRdirJob):

    JOB_TYPE = 'meta2-decommission'
    TASK_CLASS = Meta2DecommissionTask

    @classmethod
    def sanitize_params(cls, job_params):
        sanitized_job_params, _ = super(Meta2DecommissionJob,
                                        cls).sanitize_params(job_params)

        src = job_params.get('service_id')
        if not src:
            raise ValueError('Missing service ID')
        sanitized_job_params['service_id'] = src

        sanitized_job_params['dst'] = job_params.get('dst')

        return sanitized_job_params, 'meta2/%s' % src

    def __init__(self, conf, logger=None):
        super(Meta2DecommissionJob, self).__init__(conf, logger=logger)
        self.rdir_client = RdirClient(conf, logger=logger)

    def get_tasks(self, job_params, marker=None):
        containers = self._containers_from_rdir(job_params, marker)

        for marker, container_id in containers:
            yield marker, dict(container_id=container_id)

    def get_total_tasks(self, job_params, marker=None):
        containers = self._containers_from_rdir(job_params, marker)

        i = 0
        for i, (marker, _) in enumerate(containers, 1):
            if i % 1000 == 0:
                yield marker, 1000

        remaining = i % 1000
        if remaining == 0:
            return

        yield marker, remaining

    def _containers_from_rdir(self, job_params, marker):
        service_id = job_params['service_id']
        rdir_fetch_limit = job_params['rdir_fetch_limit']
        rdir_timeout = job_params['rdir_timeout']

        containers = self.rdir_client.meta2_index_fetch_all(
            service_id,
            marker=marker,
            timeout=rdir_timeout,
            limit=rdir_fetch_limit)
        for container_info in containers:
            container_url = container_info['container_url']
            container_id = container_info['container_id']

            yield container_url, container_id
Beispiel #36
0
 def __init__(self, conf, **kwargs):
     super(BlobIndexer, self).__init__(conf)
     self.logger = get_logger(conf)
     volume = conf.get('volume')
     if not volume:
         raise exc.ConfigurationException('No volume specified for indexer')
     self.volume = volume
     self.passes = 0
     self.errors = 0
     self.successes = 0
     self.last_reported = 0
     self.chunks_run_time = 0
     self.interval = int_value(conf.get('interval'), 300)
     self.report_interval = int_value(conf.get('report_interval'), 3600)
     self.max_chunks_per_second = int_value(conf.get('chunks_per_second'),
                                            30)
     self.index_client = RdirClient(conf, logger=self.logger)
     self.namespace, self.volume_id = check_volume(self.volume)
Beispiel #37
0
    def setUp(self):
        super(TestIndexerCrawler, self).setUp()

        self.namespace = self.conf['namespace']

        self.rawx_conf = self.conf['services']['rawx'][0]
        self.conf = {"namespace": self.namespace,
                     "volume": self.rawx_conf['path']}
        self.rdir_client = RdirClient(self.conf)
Beispiel #38
0
    def test_link_rdir_to_zero_scored_rawx(self):
        client = RdirClient({'namespace': self.ns})
        disp = RdirDispatcher({'namespace': self.ns})

        # Register a service, with score locked to zero
        new_rawx = self._srv('rawx', {'tag.loc': 'whatever'})
        new_rawx['score'] = 0
        self._register_srv(new_rawx)
        self._reload_proxy()

        all_rawx = disp.assign_all_rawx()
        all_rawx_keys = [x['addr'] for x in all_rawx]
        self.assertIn(new_rawx['addr'], all_rawx_keys)
        rdir_addr = client._get_rdir_addr(new_rawx['addr'])
        self.assertIsNotNone(rdir_addr)
        try:
            self.api.unlink('_RDIR', new_rawx['addr'], 'rdir')
            self.api.delete('_RDIR', new_rawx['addr'])
            # self._flush_cs('rawx')
        except Exception:
            pass
Beispiel #39
0
 def setUp(self):
     super(TestBlobIndexer, self).setUp()
     self.rdir_client = RdirClient(self.conf)
     self.blob_client = BlobClient(self.conf)
     _, self.rawx_path, rawx_addr, _ = \
         self.get_service_url('rawx')
     services = self.conscience.all_services('rawx')
     self.rawx_id = None
     for rawx in services:
         if rawx_addr == rawx['addr']:
             self.rawx_id = rawx['tags'].get('tag.service_id', None)
     if self.rawx_id is None:
         self.rawx_id = rawx_addr
     conf = self.conf.copy()
     conf['volume'] = self.rawx_path
     self.blob_indexer = BlobIndexer(conf)
     # clear rawx/rdir
     chunk_files = paths_gen(self.rawx_path)
     for chunk_file in chunk_files:
         os.remove(chunk_file)
     self.rdir_client.admin_clear(self.rawx_id, clear_all=True)
Beispiel #40
0
 def __init__(self, conf, logger, volume):
     self.conf = conf
     self.logger = logger
     self.volume = volume
     self.passes = 0
     self.errors = 0
     self.last_reported = 0
     self.chunks_run_time = 0
     self.total_chunks_processed = 0
     self.report_interval = int_value(
         conf.get('report_interval'), 3600)
     self.max_chunks_per_second = int_value(
         conf.get('chunks_per_second'), 30)
     self.index_client = RdirClient(conf)
Beispiel #41
0
 def setUp(self):
     super(TestRdirClient, self).setUp()
     self.namespace = self.conf["namespace"]
     self.rdir_client = RdirClient({"namespace": self.namespace})
     self.rdir_client._get_rdir_addr = Mock(return_value="0.1.2.3:4567")
     self.container_id_1 = random_id(64)
     self.container_id_2 = random_id(64)
     self.container_id_3 = random_id(64)
     self.content_id_1 = random_id(32)
     self.content_id_2 = random_id(32)
     self.content_id_3 = random_id(32)
     self.chunk_id_1 = random_id(64)
     self.chunk_id_2 = random_id(64)
     self.chunk_id_3 = random_id(64)
Beispiel #42
0
 def init(self):
     eventlet.monkey_patch(os=False)
     self.session = requests.Session()
     self.cs = ConscienceClient(self.conf)
     self.rdir = RdirClient(self.conf)
     self._acct_addr = None
     self.acct_update = 0
     self.graceful_timeout = 1
     self.acct_refresh_interval = int_value(
         self.conf.get('acct_refresh_interval'), 60
     )
     self.concurrency = int_value(self.conf.get('concurrency'), 1000)
     self.acct_update = true_value(self.conf.get('acct_update', True))
     self.rdir_update = true_value(self.conf.get('rdir_update', True))
     super(EventWorker, self).init()
Beispiel #43
0
 def __init__(self, conf, name, context, **kwargs):
     self.conf = conf
     self.name = name
     verbose = kwargs.pop("verbose", False)
     self.logger = get_logger(self.conf, verbose=verbose)
     self.init_zmq(context)
     self.cs = ConscienceClient(self.conf)
     self.rdir = RdirClient(self.conf)
     self._acct_addr = None
     self.acct_update = 0
     self.acct_refresh_interval = int_value(conf.get("acct_refresh_interval"), 60)
     self.acct_update = true_value(conf.get("acct_update", True))
     self.rdir_update = true_value(conf.get("rdir_update", True))
     self.session = requests.Session()
     self.failed = False
Beispiel #44
0
 def __init__(self, conf, **kwargs):
     super(BlobIndexer, self).__init__(conf)
     self.logger = get_logger(conf)
     volume = conf.get('volume')
     if not volume:
         raise exc.ConfigurationException('No volume specified for indexer')
     self.volume = volume
     self.passes = 0
     self.errors = 0
     self.last_reported = 0
     self.chunks_run_time = 0
     self.total_chunks_processed = 0
     self.interval = int_value(
         conf.get('interval'), 300)
     self.report_interval = int_value(
         conf.get('report_interval'), 3600)
     self.max_chunks_per_second = int_value(
         conf.get('chunks_per_second'), 30)
     self.index_client = RdirClient(conf)
     self.namespace, self.volume_id = check_volume(self.volume)
Beispiel #45
0
class BlobIndexer(Daemon):
    def __init__(self, conf, **kwargs):
        super(BlobIndexer, self).__init__(conf)
        self.logger = get_logger(conf)
        volume = conf.get('volume')
        if not volume:
            raise exc.ConfigurationException('No volume specified for indexer')
        self.volume = volume
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.total_chunks_processed = 0
        self.interval = int_value(
            conf.get('interval'), 300)
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.index_client = RdirClient(conf)
        self.namespace, self.volume_id = check_volume(self.volume)

    def index_pass(self):
        start_time = report_time = time.time()

        total_errors = 0

        paths = paths_gen(self.volume)

        for path in paths:
            self.safe_update_index(path)
            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(total).2f ' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'total': (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.errors = 0
                self.last_reported = now
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f ' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed
            }
        )
        if elapsed < self.interval:
            time.sleep(self.interval - elapsed)

    def safe_update_index(self, path):
        try:
            self.logger.debug('Updating index: %s' % path)
            self.update_index(path)
        except Exception:
            self.errors += 1
            self.logger.exception('ERROR while updating index for chunk %s',
                                  path)
        self.passes += 1

    def update_index(self, path):
        with open(path) as f:
            try:
                meta = read_chunk_metadata(f)
            except exc.MissingAttribute as e:
                raise exc.FaultyChunk(
                    'Missing extended attribute %s' % e)
            data = {
                'content_version': meta['content_version'],
                'content_nbchunks': meta['content_chunksnb'],
                'content_path': meta['content_path'],
                'content_size': meta['content_size'],
                'chunk_hash': meta['chunk_hash'],
                'chunk_position': meta['chunk_pos'],
                'chunk_size': meta['chunk_size'],
                'mtime': int(time.time())
            }
            self.index_client.chunk_push(self.volume_id,
                                         meta['content_cid'],
                                         meta['content_id'],
                                         meta['chunk_id'],
                                         **data)

    def run(self, *args, **kwargs):
        time.sleep(random() * self.interval)
        while True:
            try:
                self.index_pass()
            except Exception as e:
                self.logger.exception('ERROR during indexing: %s' % e)
class TestIndexerCrawler(BaseTestCase):
    def setUp(self):
        super(TestIndexerCrawler, self).setUp()

        self.namespace = self.conf['namespace']

        self.gridconf = {"namespace": self.namespace}
        self.rdir_client = RdirClient(self.gridconf)

    def tearDown(self):
        super(TestIndexerCrawler, self).tearDown()

    def _create_chunk(self, rawx_path):
        container_id = generate_id(64)
        content_id = generate_id(64)
        chunk_id = generate_id(64)

        chunk_dir = "%s/%s" % (rawx_path, chunk_id[0:2])
        if not os.path.isdir(chunk_dir):
            os.makedirs(chunk_dir)

        chunk_path = "%s/%s" % (chunk_dir, chunk_id)
        with open(chunk_path, "w") as f:
            f.write("toto")

        xattr.setxattr(chunk_path, 'user.grid.chunk.hash', 32 * '0')
        xattr.setxattr(chunk_path, 'user.grid.chunk.id', chunk_id)
        xattr.setxattr(chunk_path, 'user.grid.chunk.position', '0')
        xattr.setxattr(chunk_path, 'user.grid.chunk.size', '4')
        xattr.setxattr(chunk_path, 'user.grid.content.container', container_id)
        xattr.setxattr(chunk_path, 'user.grid.content.id', content_id)
        xattr.setxattr(chunk_path, 'user.grid.content.nbchunk', '1')
        xattr.setxattr(chunk_path, 'user.grid.content.path', 'toto')
        xattr.setxattr(chunk_path, 'user.grid.content.size', '4')
        xattr.setxattr(chunk_path, 'user.grid.content.mime_type',
                                   'application/octet-stream')
        xattr.setxattr(chunk_path, 'user.grid.content.storage_policy',
                                   'TESTPOLICY')
        xattr.setxattr(chunk_path, 'user.grid.content.chunk_method',
                                   'bytes')
        xattr.setxattr(chunk_path, 'user.grid.content.version', '0')

        return chunk_path, container_id, content_id, chunk_id

    def _rdir_get(self, rawx_addr, container_id, content_id, chunk_id):
        data = self.rdir_client.chunk_fetch(rawx_addr)
        key = (container_id, content_id, chunk_id)
        for i_container, i_content, i_chunk, i_value in data:
            if (i_container, i_content, i_chunk) == key:
                return i_value
        return None

    def test_index_chunk(self):
        rawx_conf = self.conf['rawx'][0]

        # create a fake chunk
        chunk_path, container_id, content_id, chunk_id = self._create_chunk(
            rawx_conf['path'])

        # index the chunk
        indexer = BlobIndexerWorker(self.gridconf, None, rawx_conf['path'])

        with mock.patch('oio.blob.indexer.time.time',
                        mock.MagicMock(return_value=1234)):
            indexer.update_index(chunk_path)

        # check rdir
        check_value = self._rdir_get(rawx_conf['addr'], container_id,
                                     content_id, chunk_id)

        self.assertIsNotNone(check_value)

        self.assertEqual(check_value['content_nbchunks'], 1)
        self.assertEqual(check_value['chunk_hash'], 32 * '0')
        self.assertEqual(check_value['content_size'], 4)
        self.assertEqual(check_value['content_path'], 'toto')
        self.assertEqual(check_value['chunk_position'], '0')
        self.assertEqual(check_value['chunk_size'], 4)
        self.assertEqual(check_value['mtime'], 1234)
        self.assertEqual(check_value['content_version'], 0)

        # index a chunk already indexed
        with mock.patch('oio.blob.indexer.time.time',
                        mock.MagicMock(return_value=4567)):
            indexer.update_index(chunk_path)

        # check rdir
        check_value = self._rdir_get(rawx_conf['addr'], container_id,
                                     content_id, chunk_id)

        self.assertIsNotNone(check_value)

        self.assertEqual(check_value['mtime'], 4567)

    def test_index_chunk_missing_xattr(self):
        rawx_conf = self.conf['rawx'][0]

        # create a fake chunk
        chunk_path, container_id, content_id, chunk_id = self._create_chunk(
            rawx_conf['path'])

        # remove mandatory xattr
        xattr.removexattr(chunk_path, 'user.grid.chunk.hash')

        # try to index the chunk
        indexer = BlobIndexerWorker(self.gridconf, None, rawx_conf['path'])

        self.assertRaises(FaultyChunk, indexer.update_index, chunk_path)
Beispiel #47
0
class BlobRebuilderWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.dry_run = true_value(
            conf.get('dry_run', False))
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.max_bytes_per_second = int_value(
            conf.get('bytes_per_second'), 10000000)
        self.rdir_fetch_limit = int_value(
            conf.get('rdir_fetch_limit'), 100)
        self.blob_client = BlobClient()
        self.container_client = ContainerClient(conf)
        self.rdir_client = RdirClient(conf)

    def rebuilder_pass_with_lock(self):
        self.rdir_client.admin_lock(self.volume,
                                    "rebuilder on %s" % gethostname())
        try:
            self.rebuilder_pass()
        finally:
            self.rdir_client.admin_unlock(self.volume)

    def rebuilder_pass(self):
        start_time = report_time = time.time()

        total_errors = 0
        rebuilder_time = 0

        chunks = self.rdir_client.chunk_fetch(self.volume,
                                              limit=self.rdir_fetch_limit,
                                              rebuild=True)
        for container_id, content_id, chunk_id, data in chunks:
            loop_time = time.time()

            if self.dry_run:
                self.dryrun_chunk_rebuild(container_id, content_id, chunk_id)
            else:
                self.safe_chunk_rebuild(container_id, content_id, chunk_id)

            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(rebuilder_time).2f'
                    '%(rebuilder_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time': rebuilder_time,
                        'rebuilder_rate': rebuilder_time / (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(rebuilder_time).2f '
            '%(rebuilder_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'rebuilder_time': rebuilder_time,
                'rebuilder_rate': rebuilder_time / elapsed
            }
        )

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info("[dryrun] Rebuilding "
                         "container %s, content %s, chunk %s"
                         % (container_id, content_id, chunk_id))
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)'
                         % (container_id, content_id, chunk_id))
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s',
                              container_id, content_id, chunk_id, e)

        self.passes += 1

    def _meta2_get_chunks_at_pos(self, container_id, content_id, chunk_id):
        current_chunk_url = 'http://%s/%s' % (self.volume, chunk_id)

        try:
            data = self.container_client.content_show(
                cid=container_id, content=content_id)
        except exc.NotFound:
            raise exc.OrphanChunk('Content not found')

        current_chunk = None
        for c in data:
            if c['url'] == current_chunk_url:
                current_chunk = c
                break
        if not current_chunk:
            raise exc.OrphanChunk('Chunk not found in content')

        duplicate_chunks = []
        for c in data:
            if c['pos'] == current_chunk['pos'] \
                    and c['url'] != current_chunk['url']:
                duplicate_chunks.append(c)
        if len(duplicate_chunks) == 0:
            raise exc.UnrecoverableContent('No copy of missing chunk')

        return current_chunk, duplicate_chunks

    def _meta2_get_spare_chunk(self, container_id, content_id, notin, broken):
        spare_data = {'notin': notin,
                      'broken': [broken],
                      'size': 0}
        try:
            spare_resp = self.container_client.content_spare(
                cid=container_id, content=content_id, data=spare_data)
        except ClientException as e:
            raise exc.SpareChunkException('No spare chunk (%s)' % e.message)

        return spare_resp['chunks'][0]

    def _meta2_replace_chunk(self, container_id, content_id,
                             current_chunk, new_chunk):
        old = [{'type': 'chunk',
                'id': current_chunk['url'],
                'hash': current_chunk['hash'],
                'size': current_chunk['size'],
                'pos': current_chunk['pos'],
                'content': content_id}]
        new = [{'type': 'chunk',
                'id': new_chunk['id'],
                'hash': current_chunk['hash'],
                'size': current_chunk['size'],
                'pos': current_chunk['pos'],
                'content': content_id}]
        update_data = {'old': old, 'new': new}

        self.container_client.container_raw_update(
            cid=container_id, data=update_data)

    # TODO rain support
    def chunk_rebuild(self, container_id, content_id, chunk_id):

        current_chunk, duplicate_chunks = self._meta2_get_chunks_at_pos(
            container_id, content_id, chunk_id)

        spare_chunk = self._meta2_get_spare_chunk(
            container_id, content_id, duplicate_chunks, current_chunk)

        uploaded = False
        for src in duplicate_chunks:
            try:
                self.blob_client.chunk_copy(src['url'], spare_chunk['id'])
                self.logger.debug('copy chunk from %s to %s',
                                  src['url'], spare_chunk['id'])
                uploaded = True
                break
            except Exception as e:
                self.logger.debug('Failed to copy chunk from %s to %s: %s',
                                  src['url'], spare_chunk['id'], type(e))
        if not uploaded:
            raise exc.UnrecoverableContent('No copy available '
                                           'of missing chunk')

        self._meta2_replace_chunk(container_id, content_id,
                                  current_chunk, spare_chunk)

        self.rdir_client.chunk_push(self.volume, container_id, content_id,
                                    chunk_id, rtime=int(time.time()))

        self.bytes_processed += current_chunk['size']
        self.total_bytes_processed += current_chunk['size']
Beispiel #48
0
class EventWorker(Worker):
    def init(self):
        eventlet.monkey_patch(os=False)
        self.session = requests.Session()
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.graceful_timeout = 1
        self.acct_refresh_interval = int_value(
            self.conf.get('acct_refresh_interval'), 60
        )
        self.concurrency = int_value(self.conf.get('concurrency'), 1000)
        self.acct_update = true_value(self.conf.get('acct_update', True))
        self.rdir_update = true_value(self.conf.get('rdir_update', True))
        super(EventWorker, self).init()

    def notify(self):
        """TODO"""
        pass

    def safe_decode_job(self, job):
        try:
            return json.loads(job)
        except Exception as e:
            self.logger.warn('ERROR decoding job "%s"', str(e.message))
            return None

    def run(self):
        queue_url = self.conf.get('queue_url', 'tcp://127.0.0.1:11300')
        self.beanstalk = Beanstalk.from_url(queue_url)

        gt = eventlet.spawn(
            self.handle)

        while self.alive:
            self.notify()
            try:
                eventlet.sleep(1.0)
            except AssertionError:
                self.alive = False
                break

        self.notify()
        try:
            with Timeout(self.graceful_timeout) as t:
                gt.kill(StopServe())
                gt.wait()
        except Timeout as te:
            if te != t:
                raise
            gt.kill()

    def handle(self):
        try:
            while True:
                job_id, data = self.beanstalk.reserve()
                try:
                    event = self.safe_decode_job(data)
                    if event:
                        self.process_event(event)
                    self.beanstalk.delete(job_id)
                except Exception:
                    self.logger.exception("ERROR handling event %s", job_id)
        except StopServe:
            self.logger.info('Stopping event handler')

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("ERROR no handler found for event")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker handle container put')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, json=event)

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker handle container update')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, json=event)

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker handle container destroy')
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker handle object delete')
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.warn('error while deleting chunk %s "%s"',
                                 chunk['id'], str(e.message))
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.debug('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker handle object put')

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker handle reference update')

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker skip chunk creation')
            return

        self.logger.debug('worker handle chunk creation')

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        if not self.rdir_update:
            self.logger.debug('worker skip chunk deletion')
            return

        self.logger.debug('worker handle chunk deletion')

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker handle ping')
 def setUp(self):
     super(TestRdirClient, self).setUp()
     self.rdir_client = RdirClient({'namespace': "NS"})
     self.rdir_client._get_rdir_addr = Mock(return_value="0.1.2.3:4567")
Beispiel #50
0
class BlobRebuilderWorker(object):
    def __init__(self, conf, logger, volume):
        self.conf = conf
        self.logger = logger or get_logger(conf)
        self.volume = volume
        self.run_time = 0
        self.passes = 0
        self.errors = 0
        self.last_reported = 0
        self.chunks_run_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_chunks_processed = 0
        self.dry_run = true_value(
            conf.get('dry_run', False))
        self.report_interval = int_value(
            conf.get('report_interval'), 3600)
        self.max_chunks_per_second = int_value(
            conf.get('chunks_per_second'), 30)
        self.max_bytes_per_second = int_value(
            conf.get('bytes_per_second'), 10000000)
        self.rdir_fetch_limit = int_value(
            conf.get('rdir_fetch_limit'), 100)
        self.rdir_client = RdirClient(conf)
        self.content_factory = ContentFactory(conf)

    def rebuilder_pass_with_lock(self):
        self.rdir_client.admin_lock(self.volume,
                                    "rebuilder on %s" % gethostname())
        try:
            self.rebuilder_pass()
        finally:
            self.rdir_client.admin_unlock(self.volume)

    def rebuilder_pass(self):
        start_time = report_time = time.time()

        total_errors = 0
        rebuilder_time = 0

        chunks = self.rdir_client.chunk_fetch(self.volume,
                                              limit=self.rdir_fetch_limit,
                                              rebuild=True)
        for container_id, content_id, chunk_id, data in chunks:
            loop_time = time.time()

            if self.dry_run:
                self.dryrun_chunk_rebuild(container_id, content_id, chunk_id)
            else:
                self.safe_chunk_rebuild(container_id, content_id, chunk_id)

            self.chunks_run_time = ratelimit(
                self.chunks_run_time,
                self.max_chunks_per_second
            )
            self.total_chunks_processed += 1
            now = time.time()

            if now - self.last_reported >= self.report_interval:
                self.logger.info(
                    '%(start_time)s '
                    '%(passes)d '
                    '%(errors)d '
                    '%(c_rate).2f '
                    '%(b_rate).2f '
                    '%(total).2f '
                    '%(rebuilder_time).2f'
                    '%(rebuilder_rate).2f' % {
                        'start_time': time.ctime(report_time),
                        'passes': self.passes,
                        'errors': self.errors,
                        'c_rate': self.passes / (now - report_time),
                        'b_rate': self.bytes_processed / (now - report_time),
                        'total': (now - start_time),
                        'rebuilder_time': rebuilder_time,
                        'rebuilder_rate': rebuilder_time / (now - start_time)
                    }
                )
                report_time = now
                total_errors += self.errors
                self.passes = 0
                self.bytes_processed = 0
                self.last_reported = now
            rebuilder_time += (now - loop_time)
        elapsed = (time.time() - start_time) or 0.000001
        self.logger.info(
            '%(elapsed).02f '
            '%(errors)d '
            '%(chunk_rate).2f '
            '%(bytes_rate).2f '
            '%(rebuilder_time).2f '
            '%(rebuilder_rate).2f' % {
                'elapsed': elapsed,
                'errors': total_errors + self.errors,
                'chunk_rate': self.total_chunks_processed / elapsed,
                'bytes_rate': self.total_bytes_processed / elapsed,
                'rebuilder_time': rebuilder_time,
                'rebuilder_rate': rebuilder_time / elapsed
            }
        )

    def dryrun_chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info("[dryrun] Rebuilding "
                         "container %s, content %s, chunk %s",
                         container_id, content_id, chunk_id)
        self.passes += 1

    def safe_chunk_rebuild(self, container_id, content_id, chunk_id):
        try:
            self.chunk_rebuild(container_id, content_id, chunk_id)
        except Exception as e:
            self.errors += 1
            self.logger.error('ERROR while rebuilding chunk %s|%s|%s) : %s',
                              container_id, content_id, chunk_id, e)

        self.passes += 1

    def chunk_rebuild(self, container_id, content_id, chunk_id):
        self.logger.info('Rebuilding (container %s, content %s, chunk %s)',
                         container_id, content_id, chunk_id)

        try:
            content = self.content_factory.get(container_id, content_id)
        except ContentNotFound:
            raise exc.OrphanChunk('Content not found')

        chunk = content.chunks.filter(id=chunk_id).one()
        if chunk is None:
            raise OrphanChunk("Chunk not found in content")
        chunk_size = chunk.size

        content.rebuild_chunk(chunk_id)

        self.rdir_client.chunk_push(self.volume, container_id, content_id,
                                    chunk_id, rtime=int(time.time()))

        self.bytes_processed += chunk_size
        self.total_bytes_processed += chunk_size
Beispiel #51
0
class EventWorker(object):
    def __init__(self, conf, name, context, **kwargs):
        self.conf = conf
        self.name = name
        verbose = kwargs.pop('verbose', False)
        self.logger = get_logger(self.conf, verbose=verbose)
        self.init_zmq(context)
        self.cs = ConscienceClient(self.conf)
        self.rdir = RdirClient(self.conf)
        self._acct_addr = None
        self.acct_update = 0
        self.acct_refresh_interval = int_value(
            conf.get('acct_refresh_interval'), 60
        )
        self.acct_update = true_value(
            conf.get('acct_update', True))
        self.session = requests.Session()
        self.failed = False

    def start(self):
        self.logger.info('worker "%s" starting', self.name)
        self.running = True
        self.run()

    def stop(self):
        self.logger.info('worker "%s" stopping', self.name)
        self.running = False

    def init_zmq(self, context):
        socket = context.socket(zmq.REP)
        socket.connect('inproc://event-front')
        self.socket = socket

    def safe_ack(self, msg):
        try:
            self.socket.send_multipart(msg)
        except Exception:
            self.logger.warn('Unable to ack event')

    def run(self):
        try:
            while self.running:
                msg = self.socket.recv_multipart()
                self.logger.debug("msg received: %s" % msg)
                event = decode_msg(msg)
                success = self.process_event(event)
                f = "0" if success else ""
                self.safe_ack([msg[0], f])
        except Exception as e:
            self.logger.warn('ERROR in worker "%s"', e)
            self.failed = True
            raise e
        finally:
            self.logger.info('worker "%s" stopped', self.name)

    def process_event(self, event):
        handler = self.get_handler(event)
        if not handler:
            self.logger.warn("No handler found")
            # mark as success
            return True
        success = True
        try:
            handler(event)
        except Exception:
            success = False
        finally:
            return success

    def get_handler(self, event):
        event_type = event.get('event')
        if not event_type:
            return None

        if event_type == EventType.CONTAINER_PUT:
            return self.handle_container_put
        elif event_type == EventType.CONTAINER_DESTROY:
            return self.handle_container_destroy
        elif event_type == EventType.CONTAINER_UPDATE:
            return self.handle_container_update
        elif event_type == EventType.OBJECT_PUT:
            return self.handle_object_put
        elif event_type == EventType.OBJECT_DELETE:
            return self.handle_object_delete
        elif event_type == EventType.REFERENCE_UPDATE:
            return self.handle_reference_update
        elif event_type == EventType.CHUNK_PUT:
            return self.handle_chunk_put
        elif event_type == EventType.CHUNK_DELETE:
            return self.handle_chunk_delete
        elif event_type == EventType.PING:
            return self.handle_ping
        else:
            return None

    @property
    def acct_addr(self):
        if not self._acct_addr or self.acct_refresh():
            try:
                acct_instance = self.cs.next_instance(ACCOUNT_SERVICE)
                self._acct_addr = acct_instance.get('addr')
                self.acct_update = time.time()
            except Exception:
                self.logger.warn('Unable to find account instance')
        return self._acct_addr

    def acct_refresh(self):
        return (time.time() - self.acct_update) > self.acct_refresh_interval

    def handle_container_put(self, event):
        """
        Handle container creation.
        :param event:
        """
        self.logger.debug('worker "%s" handle container put', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'mtime': mtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_update(self, event):
        """
        Handle container update.
        :param event:
        """
        self.logger.debug('worker "%s" handle container update', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        mtime = event.get('when')
        data = event.get('data')
        name = event.get('url').get('user')
        account = event.get('url').get('account')
        bytes_count = data.get('bytes-count', 0)
        object_count = data.get('object-count', 0)

        event = {
            'mtime': mtime,
            'name': name,
            'bytes': bytes_count,
            'objects': object_count
        }
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_container_destroy(self, event):
        """
        Handle container destroy.
        :param event:
        """
        self.logger.debug('worker "%s" handle container destroy', self.name)
        if not self.acct_update:
            return
        uri = 'http://%s/v1.0/account/container/update' % self.acct_addr
        dtime = event.get('when')
        data = event.get('data')
        name = data.get('url').get('user')
        account = data.get('url').get('account')

        event = {'dtime': dtime, 'name': name}
        self.session.post(uri, params={'id': account}, data=json.dumps(event))

    def handle_object_delete(self, event):
        """
        Handle object deletion.
        Delete the chunks of the object.
        :param event:
        """
        self.logger.debug('worker "%s" handle object delete', self.name)
        pile = GreenPile(PARALLEL_CHUNKS_DELETE)

        chunks = []

        for item in event.get('data'):
            if item.get('type') == 'chunks':
                chunks.append(item)
        if not len(chunks):
            self.logger.warn('No chunks found in event data')
            return

        def delete_chunk(chunk):
            resp = None
            try:
                with Timeout(CHUNK_TIMEOUT):
                    resp = self.session.delete(chunk['id'])
            except (Exception, Timeout) as e:
                self.logger.exception(e)
            return resp

        for chunk in chunks:
            pile.spawn(delete_chunk, chunk)

        resps = [resp for resp in pile if resp]

        for resp in resps:
            if resp.status_code == 204:
                self.logger.info('deleted chunk %s' % resp.url)
            else:
                self.logger.warn('failed to delete chunk %s' % resp.url)

    def handle_object_put(self, event):
        """
        Handle object creation.
        TODO
        :param event:
        """
        self.logger.debug('worker "%s" handle object put', self.name)

    def handle_reference_update(self, event):
        """
        Handle reference update.
        TODO
        :param event
        """
        self.logger.debug('worker "%s" handle reference update', self.name)

    def handle_chunk_put(self, event):
        """
        Handle chunk creation.
        :param event
        """
        self.logger.debug('worker "%s" handle chunk creation', self.name)

        when = event.get('when')
        data = event.get('data')
        volume_id = data.get('volume_id')
        del data['volume_id']
        container_id = data.get('container_id')
        del data['container_id']
        content_id = data.get('content_id')
        del data['content_id']
        chunk_id = data.get('chunk_id')
        del data['chunk_id']
        data['mtime'] = when
        self.rdir.chunk_push(volume_id, container_id, content_id, chunk_id,
                             **data)

    def handle_chunk_delete(self, event):
        """
        Handle chunk deletion.
        :param event
        """
        self.logger.debug('worker "%s" handle chunk deletion', self.name)

        data = event.get('data')
        volume_id = data.get('volume_id')
        container_id = data.get('container_id')
        content_id = data.get('content_id')
        chunk_id = data.get('chunk_id')
        self.rdir.chunk_delete(volume_id, container_id, content_id, chunk_id)

    def handle_ping(self, event):
        """
        Handle ping
        :param event
        """
        self.logger.debug('worker "%s" handle ping', self.name)
Beispiel #52
0
class TestIndexerCrawler(BaseTestCase):
    def setUp(self):
        super(TestIndexerCrawler, self).setUp()

        self.namespace = self.conf['namespace']

        self.rawx_conf = self.conf['services']['rawx'][0]
        self.conf = {"namespace": self.namespace,
                     "volume": self.rawx_conf['path']}
        self.rdir_client = RdirClient(self.conf)

    def tearDown(self):
        super(TestIndexerCrawler, self).tearDown()

    def _create_chunk(self, rawx_path, alias="toto"):
        container_id = random_id(64)
        content_id = random_id(32)
        chunk_id = random_id(64)

        chunk_dir = "%s/%s" % (rawx_path, chunk_id[0:3])
        if not os.path.isdir(chunk_dir):
            os.makedirs(chunk_dir)

        chunk_path = "%s/%s" % (chunk_dir, chunk_id)
        with open(chunk_path, "w") as f:
            f.write("toto")

        xattr.setxattr(
            chunk_path, 'user.' + chunk_xattr_keys['chunk_hash'], 32 * '0')
        xattr.setxattr(
            chunk_path, 'user.' + chunk_xattr_keys['chunk_id'], chunk_id)
        xattr.setxattr(
            chunk_path, 'user.' + chunk_xattr_keys['chunk_pos'], '0')
        xattr.setxattr(
            chunk_path, 'user.' + chunk_xattr_keys['chunk_size'], '4')
        xattr.setxattr(
            chunk_path, 'user.' + chunk_xattr_keys['container_id'],
            container_id)
        xattr.setxattr(
            chunk_path, 'user.' + chunk_xattr_keys['content_id'], content_id)
        xattr.setxattr(
            chunk_path, 'user.' + chunk_xattr_keys['content_path'], alias)
        xattr.setxattr(
            chunk_path, 'user.' + chunk_xattr_keys['content_policy'],
            'TESTPOLICY')
        xattr.setxattr(
            chunk_path, 'user.' + chunk_xattr_keys['content_chunkmethod'],
            'plain/nb_copy=3')
        xattr.setxattr(
            chunk_path, 'user.' + chunk_xattr_keys['content_version'], '0')

        return chunk_path, container_id, content_id, chunk_id

    def _rdir_get(self, rawx_addr, container_id, content_id, chunk_id):
        data = self.rdir_client.chunk_fetch(rawx_addr)
        key = (container_id, content_id, chunk_id)
        for i_container, i_content, i_chunk, i_value in data:
            if (i_container, i_content, i_chunk) == key:
                return i_value
        return None

    def _test_index_chunk(self, alias="toto"):

        # create a fake chunk
        chunk_path, container_id, content_id, chunk_id = self._create_chunk(
            self.rawx_conf['path'], alias)

        # index the chunk
        indexer = BlobIndexer(self.conf)

        with mock.patch('oio.blob.indexer.time.time',
                        mock.MagicMock(return_value=1234)):
            indexer.update_index(chunk_path)

        # check rdir
        check_value = self._rdir_get(self.rawx_conf['addr'], container_id,
                                     content_id, chunk_id)

        self.assertIsNotNone(check_value)

        self.assertEqual(check_value['mtime'], 1234)

        # index a chunk already indexed
        with mock.patch('oio.blob.indexer.time.time',
                        mock.MagicMock(return_value=4567)):
            indexer.update_index(chunk_path)

        # check rdir
        check_value = self._rdir_get(self.rawx_conf['addr'], container_id,
                                     content_id, chunk_id)

        self.assertIsNotNone(check_value)

        self.assertEqual(check_value['mtime'], 4567)

    def test_index_chunk(self):
        return self._test_index_chunk()

    def test_index_unicode_chunk(self):
        return self._test_index_chunk('a%%%s%d%xàç"\r\n{0}€ 1+1=2/\\$\t_')

    def test_index_chunk_missing_xattr(self):
        # create a fake chunk
        chunk_path, container_id, content_id, chunk_id = self._create_chunk(
            self.rawx_conf['path'])

        # remove mandatory xattr
        xattr.removexattr(
            chunk_path, 'user.' + chunk_xattr_keys['container_id'])

        # try to index the chunk
        indexer = BlobIndexer(self.conf)

        self.assertRaises(FaultyChunk, indexer.update_index, chunk_path)
Beispiel #53
0
class TestIndexerCrawler(BaseTestCase):
    def setUp(self):
        super(TestIndexerCrawler, self).setUp()

        self.namespace = self.conf["namespace"]

        self.gridconf = {"namespace": self.namespace}
        self.rdir_client = RdirClient(self.gridconf)

    def tearDown(self):
        super(TestIndexerCrawler, self).tearDown()

    def _create_chunk(self, rawx_path):
        container_id = generate_id(64)
        content_id = generate_id(64)
        chunk_id = generate_id(64)

        chunk_dir = "%s/%s" % (rawx_path, chunk_id[0:2])
        if not os.path.isdir(chunk_dir):
            os.makedirs(chunk_dir)

        chunk_path = "%s/%s" % (chunk_dir, chunk_id)
        with open(chunk_path, "w") as f:
            f.write("toto")

        xattr.setxattr(chunk_path, "user.grid.chunk.hash", 32 * "0")
        xattr.setxattr(chunk_path, "user.grid.chunk.id", chunk_id)
        xattr.setxattr(chunk_path, "user.grid.chunk.position", "0")
        xattr.setxattr(chunk_path, "user.grid.chunk.size", "4")
        xattr.setxattr(chunk_path, "user.grid.content.container", container_id)
        xattr.setxattr(chunk_path, "user.grid.content.id", content_id)
        xattr.setxattr(chunk_path, "user.grid.content.nbchunk", "1")
        xattr.setxattr(chunk_path, "user.grid.content.path", "toto")
        xattr.setxattr(chunk_path, "user.grid.content.size", "4")
        xattr.setxattr(chunk_path, "user.grid.content.version", "0")

        return chunk_path, container_id, content_id, chunk_id

    def _rdir_get(self, rawx_addr, container_id, content_id, chunk_id):
        data = self.rdir_client.chunk_fetch(rawx_addr)
        key = (container_id, content_id, chunk_id)
        for i_container, i_content, i_chunk, i_value in data:
            if (i_container, i_content, i_chunk) == key:
                return i_value
        return None

    def test_index_chunk(self):
        rawx_conf = self.conf["rawx"][0]

        # create a fake chunk
        chunk_path, container_id, content_id, chunk_id = self._create_chunk(rawx_conf["path"])

        # index the chunk
        indexer = BlobIndexerWorker(self.gridconf, None, rawx_conf["path"])

        with mock.patch("oio.blob.indexer.time.time", mock.MagicMock(return_value=1234)):
            indexer.update_index(chunk_path)

        # check rdir
        check_value = self._rdir_get(rawx_conf["addr"], container_id, content_id, chunk_id)

        self.assertIsNotNone(check_value)

        self.assertEqual(check_value["content_nbchunks"], 1)
        self.assertEqual(check_value["chunk_hash"], 32 * "0")
        self.assertEqual(check_value["content_size"], 4)
        self.assertEqual(check_value["content_path"], "toto")
        self.assertEqual(check_value["chunk_position"], "0")
        self.assertEqual(check_value["chunk_size"], 4)
        self.assertEqual(check_value["mtime"], 1234)
        self.assertEqual(check_value["content_version"], 0)

        # index a chunk already indexed
        with mock.patch("oio.blob.indexer.time.time", mock.MagicMock(return_value=4567)):
            indexer.update_index(chunk_path)

        # check rdir
        check_value = self._rdir_get(rawx_conf["addr"], container_id, content_id, chunk_id)

        self.assertIsNotNone(check_value)

        self.assertEqual(check_value["mtime"], 4567)

    def test_index_chunk_missing_xattr(self):
        rawx_conf = self.conf["rawx"][0]

        # create a fake chunk
        chunk_path, container_id, content_id, chunk_id = self._create_chunk(rawx_conf["path"])

        # remove mandatory xattr
        xattr.removexattr(chunk_path, "user.grid.chunk.hash")

        # try to index the chunk
        indexer = BlobIndexerWorker(self.gridconf, None, rawx_conf["path"])

        self.assertRaises(FaultyChunk, indexer.update_index, chunk_path)