Example #1
0
 def _get_hashes(self, policy, path, recalculate=None, do_listdir=False):
     df_mgr = self._df_router[policy]
     hashed, suffix_hashes = tpool_reraise(
         df_mgr._get_hashes, path, recalculate=recalculate,
         do_listdir=do_listdir, reclaim_age=self.reclaim_age)
     self.logger.update_stats('suffix.hashes', hashed)
     return suffix_hashes
Example #2
0
 def _get_hashes(self, device, partition, policy, recalculate=None,
                 do_listdir=False):
     df_mgr = self._df_router[policy]
     hashed, suffix_hashes = tpool_reraise(
         df_mgr._get_hashes, device, partition, policy,
         recalculate=recalculate, do_listdir=do_listdir)
     self.logger.update_stats('suffix.hashes', hashed)
     return suffix_hashes
Example #3
0
    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'], ))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        target_devs_info = set()
        failure_devs_info = set()
        begin = time.time()
        try:
            hashed, local_hash = tpool_reraise(self._diskfile_mgr._get_hashes,
                                               job['path'],
                                               do_listdir=_do_listdir(
                                                   int(job['partition']),
                                                   self.replication_cycle),
                                               reclaim_age=self.reclaim_age)
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            synced_remote_regions = set()
            random.shuffle(job['nodes'])
            nodes = itertools.chain(
                job['nodes'], job['policy'].object_ring.get_more_nodes(
                    int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIteration it will be caught way below
                node = next(nodes)
                target_devs_info.add((node['replication_ip'], node['device']))
                attempts_left -= 1
                # if we have already synced to this remote region,
                # don't sync again on this replication pass
                if node['region'] in synced_remote_regions:
                    continue
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '',
                                            headers=headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(
                                _('%(replication_ip)s/%(device)s '
                                  'responded as unmounted'), node)
                            attempts_left += 1
                            failure_devs_info.add(
                                (node['replication_ip'], node['device']))
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s "
                                  "from %(ip)s"), {
                                      'resp': resp.status,
                                      'ip': node['replication_ip']
                                  })
                            failure_devs_info.add(
                                (node['replication_ip'], node['device']))
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    if not suffixes:
                        self.stats['hashmatch'] += 1
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        self._diskfile_mgr._get_hashes,
                        job['path'],
                        recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    self.stats['rsync'] += 1
                    success, _junk = self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '/' + '-'.join(suffixes),
                                            headers=headers)
                        conn.getresponse().read()
                    if not success:
                        failure_devs_info.add(
                            (node['replication_ip'], node['device']))
                    # add only remote region when replicate succeeded
                    if success and node['region'] != job['region']:
                        synced_remote_regions.add(node['region'])
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    failure_devs_info.add(
                        (node['replication_ip'], node['device']))
                    self.logger.exception(
                        _("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            failure_devs_info.update(target_devs_info)
            self._add_failure_stats(failure_devs_info)
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.stats['success'] += len(target_devs_info - failure_devs_info)
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)
Example #4
0
    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'],))
        headers = dict(self.default_headers)
        headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        target_devs_info = set()
        failure_devs_info = set()
        begin = time.time()
        df_mgr = self._df_router[job['policy']]
        try:
            hashed, local_hash = tpool_reraise(
                df_mgr._get_hashes, job['device'],
                job['partition'], job['policy'],
                do_listdir=_do_listdir(
                    int(job['partition']),
                    self.replication_cycle))
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            synced_remote_regions = set()
            random.shuffle(job['nodes'])
            nodes = itertools.chain(
                job['nodes'],
                job['policy'].object_ring.get_more_nodes(
                    int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIteration it will be caught way below
                node = next(nodes)
                target_devs_info.add((node['replication_ip'], node['device']))
                attempts_left -= 1
                # if we have already synced to this remote region,
                # don't sync again on this replication pass
                if node['region'] in synced_remote_regions:
                    continue
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '', headers=headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(
                                _('%(replication_ip)s/%(device)s '
                                  'responded as unmounted'), node)
                            attempts_left += 1
                            failure_devs_info.add((node['replication_ip'],
                                                   node['device']))
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(_("Invalid response %(resp)s "
                                                "from %(ip)s"),
                                              {'resp': resp.status,
                                               'ip': node['replication_ip']})
                            failure_devs_info.add((node['replication_ip'],
                                                   node['device']))
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    if not suffixes:
                        self.stats['hashmatch'] += 1
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        df_mgr._get_hashes,
                        job['device'], job['partition'], job['policy'],
                        recalculate=suffixes)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    self.stats['rsync'] += 1
                    success, _junk = self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '/' + '-'.join(suffixes),
                            headers=headers)
                        conn.getresponse().read()
                    if not success:
                        failure_devs_info.add((node['replication_ip'],
                                               node['device']))
                    # add only remote region when replicate succeeded
                    if success and node['region'] != job['region']:
                        synced_remote_regions.add(node['region'])
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    failure_devs_info.add((node['replication_ip'],
                                           node['device']))
                    self.logger.exception(_("Error syncing with node: %s") %
                                          node)
            self.suffix_count += len(local_hash)
        except StopIteration:
            self.logger.error('Ran out of handoffs while replicating '
                              'partition %s of policy %d',
                              job['partition'], int(job['policy']))
        except (Exception, Timeout):
            failure_devs_info.update(target_devs_info)
            self.logger.exception(_("Error syncing partition"))
        finally:
            self._add_failure_stats(failure_devs_info)
            self.stats['success'] += len(target_devs_info - failure_devs_info)
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)
Example #5
0
    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'], ))
        self.headers[POLICY_INDEX] = job['policy_idx']
        begin = time.time()
        try:
            hashed, local_hash = tpool_reraise(
                get_hashes,
                job['path'],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age)
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            nodes = itertools.chain(
                job['nodes'],
                job['object_ring'].get_more_nodes(int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                attempts_left -= 1
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'],
                            node['replication_port'],
                            node['device'],
                            job['partition'],
                            'REPLICATE',
                            '',
                            headers=self.headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(
                                _('%(ip)s/%(device)s responded'
                                  ' as unmounted'), node)
                            attempts_left += 1
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s "
                                  "from %(ip)s"), {
                                      'resp': resp.status,
                                      'ip': node['replication_ip']
                                  })
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]
                    if not suffixes:
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        get_hashes,
                        job['path'],
                        recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [
                        suffix for suffix in local_hash
                        if local_hash[suffix] != remote_hash.get(suffix, -1)
                    ]

                    self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(node['replication_ip'],
                                            node['replication_port'],
                                            node['device'],
                                            job['partition'],
                                            'REPLICATE',
                                            '/' + '-'.join(suffixes),
                                            headers=self.headers)
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    self.logger.exception(
                        _("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)
Example #6
0
        by the object replicator to get hashes for directories.
        """
        try:
            device, partition, suffix = split_path(
                unquote(request.path), 2, 3, True)
            validate_device_partition(device, partition)
        except ValueError, e:
            return HTTPBadRequest(body=str(e), request=request,
                                  content_type='text/plain')
        if self.mount_check and not check_mount(self.devices, device):
            return HTTPInsufficientStorage(drive=device, request=request)
        path = os.path.join(self.devices, device, DATADIR, partition)
        if not os.path.exists(path):
            mkdirs(path)
        suffixes = suffix.split('-') if suffix else []
        _junk, hashes = tpool_reraise(get_hashes, path, recalculate=suffixes)
        return Response(body=pickle.dumps(hashes))

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    method = getattr(self, req.method)
    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'],))
        begin = time.time()
        try:
        #MODIFIED LightSync
            local_hash = None
            part_hash_local = tpool_reraise(get_part_hash, job['path'])  
            #Partition has been modified
            if part_hash_local is None:
                hashed, local_hash = tpool_reraise(
                    get_hashes, job['path'],
                    do_listdir=(self.replication_count % 10) == 0,
                    reclaim_age=self.reclaim_age)
                self.suffix_hash += hashed
                self.logger.update_stats('suffix.hashes', hashed)  

                part_hash_local = tpool_reraise(get_part_hash, job['path'])               
            """hashed, local_hash = tpool_reraise(
                get_hashes, job['path'],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age)
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)"""
            attempts_left = True
            nodes = itertools.chain(job['nodes'])
            while (True):
        ##
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                try:
                #MODIFIED LightSync
                    req_suff = '' if part_hash_local is None else '/_SHORTREP_-'\
                               +part_hash_local
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            req_suff, headers=self.headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(_('%(ip)s/%(device)s responded'
                                                ' as unmounted'), node)
                            if(attempts_left):
                                attempts_left = False
                                ########To modify to start from current node's hand-off: Hash node info to get hand-off position
                                nodes = itertools.chain(
                                    self.object_ring.get_more_nodes(int(job['partition'])),
                                    nodes)
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(_("Invalid response %(resp)s "
                                                "from %(ip)s"),
                                                {'resp': resp.status,
                                                'ip': node['replication_ip']})
                            continue
                        part_hash_remote = pickle.loads(resp.read())
                        del resp
                    if part_hash_remote == "OK":
                        break
                    remote_hash = part_hash_remote
                    if local_hash is None:
                        hashed, local_hash = tpool_reraise(
                            get_hashes, job['path'],
                            do_listdir=(self.replication_count % 10) == 0,
                            reclaim_age=self.reclaim_age)
                        self.suffix_hash += hashed
                        self.logger.update_stats('suffix.hashes', hashed)
                    '''
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '', headers=self.headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(_('%(ip)s/%(device)s responded'
                                                ' as unmounted'), node)
                            attempts_left += 1
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(_("Invalid response %(resp)s "
                                                "from %(ip)s"),
                                              {'resp': resp.status,
                                               'ip': node['replication_ip']})
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    '''
                ##
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    if not suffixes:
                #MODIFIED LightSync
                        break
                ##
                    hashed, recalc_hash = tpool_reraise(
                        get_hashes,
                        job['path'], recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '/' + '-'.join(suffixes),
                            headers=self.headers)
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                #MODIFIED LightSync
                    break
                ##
                except (Exception, Timeout):
                    self.logger.exception(_("Error syncing with node: %s") %
                                          node)
#MODIFIED LightSync  (after if)
            self.suffix_count += len(local_hash) if local_hash is not None else 0
##
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)
Example #8
0
    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment('partition.update.count.%s' % (job['device'],))
        self.headers['X-Backend-Storage-Policy-Index'] = job['policy_idx']
        begin = time.time()
        try:
            hashed, local_hash = tpool_reraise(
                get_hashes, job['path'],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age)
            self.suffix_hash += hashed
            self.logger.update_stats('suffix.hashes', hashed)
            attempts_left = len(job['nodes'])
            nodes = itertools.chain(
                job['nodes'],
                job['object_ring'].get_more_nodes(int(job['partition'])))
            while attempts_left > 0:
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                attempts_left -= 1
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '', headers=self.headers).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(_('%(ip)s/%(device)s responded'
                                                ' as unmounted'), node)
                            attempts_left += 1
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(_("Invalid response %(resp)s "
                                                "from %(ip)s"),
                                              {'resp': resp.status,
                                               'ip': node['replication_ip']})
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    if not suffixes:
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        get_hashes,
                        job['path'], recalculate=suffixes,
                        reclaim_age=self.reclaim_age)
                    self.logger.update_stats('suffix.hashes', hashed)
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if
                                local_hash[suffix] !=
                                remote_hash.get(suffix, -1)]
                    self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(
                            node['replication_ip'], node['replication_port'],
                            node['device'], job['partition'], 'REPLICATE',
                            '/' + '-'.join(suffixes),
                            headers=self.headers)
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats('suffix.syncs', len(suffixes))
                except (Exception, Timeout):
                    self.logger.exception(_("Error syncing with node: %s") %
                                          node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since('partition.update.timing', begin)
Example #9
0
    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment("partition.update.count.%s" % (job["device"],))
        headers = dict(self.default_headers)
        headers["X-Backend-Storage-Policy-Index"] = int(job["policy"])
        target_devs_info = set()
        failure_devs_info = set()
        begin = time.time()
        try:
            hashed, local_hash = tpool_reraise(
                self._diskfile_mgr._get_hashes,
                job["path"],
                do_listdir=(self.replication_count % 10) == 0,
                reclaim_age=self.reclaim_age,
            )
            self.suffix_hash += hashed
            self.logger.update_stats("suffix.hashes", hashed)
            attempts_left = len(job["nodes"])
            synced_remote_regions = set()
            random.shuffle(job["nodes"])
            nodes = itertools.chain(job["nodes"], job["policy"].object_ring.get_more_nodes(int(job["partition"])))
            while attempts_left > 0:
                # If this throws StopIteration it will be caught way below
                node = next(nodes)
                target_devs_info.add((node["replication_ip"], node["device"]))
                attempts_left -= 1
                # if we have already synced to this remote region,
                # don't sync again on this replication pass
                if node["region"] in synced_remote_regions:
                    continue
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node["replication_ip"],
                            node["replication_port"],
                            node["device"],
                            job["partition"],
                            "REPLICATE",
                            "",
                            headers=headers,
                        ).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(_("%(ip)s/%(device)s responded" " as unmounted"), node)
                            attempts_left += 1
                            failure_devs_info.add((node["replication_ip"], node["device"]))
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s " "from %(ip)s"),
                                {"resp": resp.status, "ip": node["replication_ip"]},
                            )
                            failure_devs_info.add((node["replication_ip"], node["device"]))
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)]
                    if not suffixes:
                        self.stats["hashmatch"] += 1
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        self._diskfile_mgr._get_hashes, job["path"], recalculate=suffixes, reclaim_age=self.reclaim_age
                    )
                    self.logger.update_stats("suffix.hashes", hashed)
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)]
                    self.stats["rsync"] += 1
                    success, _junk = self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(
                            node["replication_ip"],
                            node["replication_port"],
                            node["device"],
                            job["partition"],
                            "REPLICATE",
                            "/" + "-".join(suffixes),
                            headers=headers,
                        )
                        conn.getresponse().read()
                    if not success:
                        failure_devs_info.add((node["replication_ip"], node["device"]))
                    # add only remote region when replicate succeeded
                    if success and node["region"] != job["region"]:
                        synced_remote_regions.add(node["region"])
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats("suffix.syncs", len(suffixes))
                except (Exception, Timeout):
                    failure_devs_info.add((node["replication_ip"], node["device"]))
                    self.logger.exception(_("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            failure_devs_info.update(target_devs_info)
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.stats["success"] += len(target_devs_info - failure_devs_info)
            self._add_failure_stats(failure_devs_info)
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since("partition.update.timing", begin)
Example #10
0
        """
        try:
            device, partition, suffix = split_path(unquote(request.path), 2, 3,
                                                   True)
            validate_device_partition(device, partition)
        except ValueError, e:
            return HTTPBadRequest(body=str(e),
                                  request=request,
                                  content_type='text/plain')
        if self.mount_check and not check_mount(self.devices, device):
            return HTTPInsufficientStorage(drive=device, request=request)
        path = os.path.join(self.devices, device, DATADIR, partition)
        if not os.path.exists(path):
            mkdirs(path)
        suffixes = suffix.split('-') if suffix else []
        _junk, hashes = tpool_reraise(get_hashes, path, recalculate=suffixes)
        return Response(body=pickle.dumps(hashes))

    def __call__(self, env, start_response):
        """WSGI Application entry point for the Swift Object Server."""
        start_time = time.time()
        req = Request(env)
        self.logger.txn_id = req.headers.get('x-trans-id', None)

        if not check_utf8(req.path_info):
            res = HTTPPreconditionFailed(body='Invalid UTF8 or contains NULL')
        else:
            try:
                # disallow methods which have not been marked 'public'
                try:
                    method = getattr(self, req.method)
Example #11
0
    def update(self, job):
        """
        High-level method that replicates a single partition.

        :param job: a dict containing info about the partition to be replicated
        """
        self.replication_count += 1
        self.logger.increment("partition.update.count.%s" % (job["device"],))
        begin = time.time()
        try:
            hashed, local_hash = tpool_reraise(
                get_hashes, job["path"], do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age
            )
            self.suffix_hash += hashed
            self.logger.update_stats("suffix.hashes", hashed)
            attempts_left = len(job["nodes"])
            nodes = itertools.chain(job["nodes"], self.object_ring.get_more_nodes(int(job["partition"])))
            while attempts_left > 0:
                # If this throws StopIterator it will be caught way below
                node = next(nodes)
                attempts_left -= 1
                try:
                    with Timeout(self.http_timeout):
                        resp = http_connect(
                            node["replication_ip"],
                            node["replication_port"],
                            node["device"],
                            job["partition"],
                            "REPLICATE",
                            "",
                            headers=self.headers,
                        ).getresponse()
                        if resp.status == HTTP_INSUFFICIENT_STORAGE:
                            self.logger.error(_("%(ip)s/%(device)s responded" " as unmounted"), node)
                            attempts_left += 1
                            continue
                        if resp.status != HTTP_OK:
                            self.logger.error(
                                _("Invalid response %(resp)s " "from %(ip)s"),
                                {"resp": resp.status, "ip": node["replication_ip"]},
                            )
                            continue
                        remote_hash = pickle.loads(resp.read())
                        del resp
                    suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)]
                    if not suffixes:
                        continue
                    hashed, recalc_hash = tpool_reraise(
                        get_hashes, job["path"], recalculate=suffixes, reclaim_age=self.reclaim_age
                    )
                    self.logger.update_stats("suffix.hashes", hashed)
                    local_hash = recalc_hash
                    suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)]
                    self.sync(node, job, suffixes)
                    with Timeout(self.http_timeout):
                        conn = http_connect(
                            node["replication_ip"],
                            node["replication_port"],
                            node["device"],
                            job["partition"],
                            "REPLICATE",
                            "/" + "-".join(suffixes),
                            headers=self.headers,
                        )
                        conn.getresponse().read()
                    self.suffix_sync += len(suffixes)
                    self.logger.update_stats("suffix.syncs", len(suffixes))
                except (Exception, Timeout):
                    self.logger.exception(_("Error syncing with node: %s") % node)
            self.suffix_count += len(local_hash)
        except (Exception, Timeout):
            self.logger.exception(_("Error syncing partition"))
        finally:
            self.partition_times.append(time.time() - begin)
            self.logger.timing_since("partition.update.timing", begin)