def find_and_process(self):
        src_filename = time.strftime(self.filename_format)
        working_dir = os.path.join(self.target_dir, ".%-stats_tmp" % self.stats_type)
        shutil.rmtree(working_dir, ignore_errors=True)
        mkdirs(working_dir)
        tmp_filename = os.path.join(working_dir, src_filename)
        hasher = hashlib.md5()
        try:
            with open(tmp_filename, "wb") as statfile:
                statfile.write(self.get_header())
                for device in os.listdir(self.devices):
                    if self.mount_check and not check_mount(self.devices, device):
                        self.logger.error(_("Device %s is not mounted, skipping.") % device)
                        continue
                    db_dir = os.path.join(self.devices, device, self.data_dir)
                    if not os.path.exists(db_dir):
                        self.logger.debug(_("Path %s does not exist, skipping.") % db_dir)
                        continue
                    for root, dirs, files in os.walk(db_dir, topdown=False):
                        for filename in files:
                            if filename.endswith(".db"):
                                db_path = os.path.join(root, filename)
                                try:
                                    line_data = self.get_data(db_path)
                                except sqlite3.Error, err:
                                    self.logger.info(_("Error accessing db %s: %s") % (db_path, err))
                                    continue
                                if line_data:
                                    statfile.write(line_data)
                                    hasher.update(line_data)

            src_filename += hasher.hexdigest()
            renamer(tmp_filename, os.path.join(self.target_dir, src_filename))
Example #2
0
    def process_object_update(self, update_path, device):
        """
        Process the object information to be updated and update.

        :param update_path: path to pickled object update file
        :param device: path to device
        """
        try:
            update = pickle.load(open(update_path, "rb"))
        except Exception:
            self.logger.exception(_("ERROR Pickle problem, quarantining %s"), update_path)
            renamer(update_path, os.path.join(device, "quarantined", "objects", os.path.basename(update_path)))
            return
        successes = update.get("successes", [])
        part, nodes = self.get_container_ring().get_nodes(update["account"], update["container"])
        obj = "/%s/%s/%s" % (update["account"], update["container"], update["obj"])
        success = True
        for node in nodes:
            if node["id"] not in successes:
                status = self.object_update(node, part, update["op"], obj, update["headers"])
                if not (200 <= status < 300) and status != 404:
                    success = False
                else:
                    successes.append(node["id"])
        if success:
            self.successes += 1
            self.logger.debug(_("Update sent for %(obj)s %(path)s"), {"obj": obj, "path": update_path})
            os.unlink(update_path)
        else:
            self.failures += 1
            self.logger.debug(_("Update failed for %(obj)s %(path)s"), {"obj": obj, "path": update_path})
            update["successes"] = successes
            write_pickle(update, update_path, os.path.join(device, "tmp"))
Example #3
0
    def copy_put(self, fd, tmppath):
        
        tpool.execute(os.fsync, fd)
        
        if self.obj_path:
            dir_objs = self.obj_path.split('/')
            tmp_path = ''
            if len(dir_objs):
                for dir_name in dir_objs:
                    if tmp_path:
                        tmp_path = tmp_path + '/' + dir_name
                    else:
                        tmp_path = dir_name
                    if not self.create_dir_object(os.path.join(self.container_path,
                            tmp_path)):
                        self.logger.error("Failed in subdir %s",\
                                        os.path.join(self.container_path,tmp_path))
                        return False

        renamer(tmppath, os.path.join(self.datadir,
                                      self.obj))
        
        do_chown(os.path.join(self.datadir, self.obj), self.uid, self.gid)
        
        return True
Example #4
0
    def put(self, fd, metadata):
        """
        Finalize writing the file on disk, and renames it from the temp file to
        the real location.  This should be called after the data has been
        written to the temp file.

        :param fd: file descriptor of the temp file
        :param metadata: dictionary of metadata to be written
        """
        assert self.tmppath is not None
        assert self._type == 0
        # wait, what?
        #metadata['name'] = self.name
        timestamp = normalize_timestamp(metadata['X-Timestamp'])
        base_path = os.path.join(self.datadir, timestamp)
        # P3
        fp = open("/tmp/dump","a")
        print >>fp, "posix put old", self.tmppath, "new", base_path
        fp.close()
        write_meta_file(base_path + '.meta', metadata)
        #if 'Content-Length' in metadata:
        #    self.drop_cache(fd, 0, int(metadata['Content-Length']))
        # XXX os.fsync maybe?
        #tpool.execute(fsync, fd)
        renamer(self.tmppath, base_path + ".data")
        # but not setting self.data_file here, is this right?
        self.metadata = metadata
Example #5
0
    def put(self, metadata, extension='.data'):
        """
        Finalize writing the file on disk, and renames it from the temp file
        to the real location.  This should be called after the data has been
        written to the temp file.

        :param metadata: dictionary of metadata to be written
        :param extension: extension to be used when making the file
        """
        assert self.tmppath is not None
        timestamp = normalize_timestamp(metadata['X-Timestamp'])
        metadata['name'] = self.disk_file.name
        # Write the metadata before calling fsync() so that both data and
        # metadata are flushed to disk.
        write_metadata(self.fd, metadata)
        # We call fsync() before calling drop_cache() to lower the amount of
        # redundant work the drop cache code will perform on the pages (now
        # that after fsync the pages will be all clean).
        tpool.execute(fsync, self.fd)
        # From the Department of the Redundancy Department, make sure we
        # call drop_cache() after fsync() to avoid redundant work (pages
        # all clean).
        drop_buffer_cache(self.fd, 0, self.upload_size)
        invalidate_hash(os.path.dirname(self.disk_file.datadir))
        # After the rename completes, this object will be available for other
        # requests to reference.
        renamer(self.tmppath,
                os.path.join(self.disk_file.datadir, timestamp + extension))
        self.disk_file.metadata = metadata
Example #6
0
 def quarantine(self, reason):
     """
     The database will be quarantined and a
     sqlite3.DatabaseError will be raised indicating the action taken.
     """
     prefix_path = os.path.dirname(self.db_dir)
     partition_path = os.path.dirname(prefix_path)
     dbs_path = os.path.dirname(partition_path)
     device_path = os.path.dirname(dbs_path)
     quar_path = os.path.join(device_path, 'quarantined',
                              self.db_type + 's',
                              os.path.basename(self.db_dir))
     try:
         renamer(self.db_dir, quar_path, fsync=False)
     except OSError as e:
         if e.errno not in (errno.EEXIST, errno.ENOTEMPTY):
             raise
         quar_path = "%s-%s" % (quar_path, uuid4().hex)
         renamer(self.db_dir, quar_path, fsync=False)
     detail = _('Quarantined %(db_dir)s to %(quar_path)s due to '
                '%(reason)s') % {'db_dir': self.db_dir,
                                 'quar_path': quar_path,
                                 'reason': reason}
     self.logger.error(detail)
     raise sqlite3.DatabaseError(detail)
Example #7
0
def recalculate_hashes(partition_dir, suffixes, reclaim_age=ONE_WEEK):
    """
    Recalculates hashes for the given suffixes in the partition and updates
    them in the partition's hashes file.

    :param partition_dir: directory of the partition in which to recalculate
    :param suffixes: list of suffixes to recalculate
    :param reclaim_age: age in seconds at which tombstones should be removed
    """

    def tpool_listdir(partition_dir):
        return dict(((suff, None) for suff in os.listdir(partition_dir)
                     if len(suff) == 3 and isdir(join(partition_dir, suff))))
    hashes_file = join(partition_dir, HASH_FILE)
    with lock_path(partition_dir):
        try:
            with open(hashes_file, 'rb') as fp:
                hashes = pickle.load(fp)
        except Exception:
            hashes = tpool.execute(tpool_listdir, partition_dir)
        for suffix in suffixes:
            suffix_dir = join(partition_dir, suffix)
            if os.path.exists(suffix_dir):
                hashes[suffix] = hash_suffix(suffix_dir, reclaim_age)
            elif suffix in hashes:
                del hashes[suffix]
        with open(hashes_file + '.tmp', 'wb') as fp:
            pickle.dump(hashes, fp, PICKLE_PROTOCOL)
        renamer(hashes_file + '.tmp', hashes_file)
Example #8
0
 def possibly_quarantine(self, exc_type, exc_value, exc_traceback):
     """
     Checks the exception info to see if it indicates a quarantine situation
     (malformed or corrupted database). If not, the original exception will
     be reraised. If so, the database will be quarantined and a new
     sqlite3.DatabaseError will be raised indicating the action taken.
     """
     if 'database disk image is malformed' in str(exc_value):
         exc_hint = 'malformed'
     elif 'file is encrypted or is not a database' in str(exc_value):
         exc_hint = 'corrupted'
     else:
         raise exc_type, exc_value, exc_traceback
     prefix_path = os.path.dirname(self.db_dir)
     partition_path = os.path.dirname(prefix_path)
     dbs_path = os.path.dirname(partition_path)
     device_path = os.path.dirname(dbs_path)
     quar_path = os.path.join(device_path, 'quarantined',
                              self.db_type + 's',
                              os.path.basename(self.db_dir))
     try:
         renamer(self.db_dir, quar_path)
     except OSError as e:
         if e.errno not in (errno.EEXIST, errno.ENOTEMPTY):
             raise
         quar_path = "%s-%s" % (quar_path, uuid4().hex)
         renamer(self.db_dir, quar_path)
     detail = _('Quarantined %s to %s due to %s database') % \
               (self.db_dir, quar_path, exc_hint)
     self.logger.error(detail)
     raise sqlite3.DatabaseError(detail)
Example #9
0
    def put(self, fd, tmppath, metadata, extension=''):
        """
        Finalize writing the file on disk, and renames it from the temp file to
        the real location.  This should be called after the data has been
        written to the temp file.

        :params fd: file descriptor of the temp file
        :param tmppath: path to the temporary file being used
        :param metadata: dictionary of metadata to be written
        :param extention: extension to be used when making the file
        """
        #Marker dir.
        if extension == '.ts':
            return True
        if extension == '.meta':
            self.put_metadata(metadata)
            return True
        else:
            extension = ''
        if metadata[X_OBJECT_TYPE] == MARKER_DIR:
            self.create_dir_object(os.path.join(self.datadir, self.obj))
            self.put_metadata(metadata)
            self.data_file = self.datadir + '/' + self.obj
            return True
        #Check if directory already exists.
        if self.is_dir:
            self.logger.error('Directory already exists %s/%s' % \
                          (self.datadir , self.obj))
            return False
        #metadata['name'] = self.name
        timestamp = normalize_timestamp(metadata[X_TIMESTAMP])
        write_metadata(tmppath, metadata)
        if X_CONTENT_LENGTH in metadata:
            self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH]))
        tpool.execute(os.fsync, fd)
        if self.obj_path:
            dir_objs = self.obj_path.split('/')
            tmp_path = ''
            if len(dir_objs):
                for dir_name in dir_objs:
                    if tmp_path:
                        tmp_path = tmp_path + '/' + dir_name
                    else:
                        tmp_path = dir_name
                    if not self.create_dir_object(os.path.join(self.container_path,
                            tmp_path)):
                        self.logger.error("Failed in subdir %s",\
                                        os.path.join(self.container_path,tmp_path))
                        return False

        renamer(tmppath, os.path.join(self.datadir,
                                      self.obj + extension))
        do_chown(os.path.join(self.datadir, self.obj + extension), \
              self.uid, self.gid)
        self.metadata = metadata
        #self.logger.error("Meta %s", self.metadata)
        self.data_file = self.datadir + '/' + self.obj + extension
        return True
Example #10
0
    def process_object_update(self, update_path, device, policy):
        """
        Process the object information to be updated and update.

        :param update_path: path to pickled object update file
        :param device: path to device
        :param policy: storage policy of object update
        """
        try:
            update = pickle.load(open(update_path, 'rb'))
        except Exception:
            self.logger.exception(
                _('ERROR Pickle problem, quarantining %s'), update_path)
            self.stats.quarantines += 1
            self.logger.increment('quarantines')
            target_path = os.path.join(device, 'quarantined', 'objects',
                                       os.path.basename(update_path))
            renamer(update_path, target_path, fsync=False)
            return
        successes = update.get('successes', [])
        part, nodes = self.get_container_ring().get_nodes(
            update['account'], update['container'])
        obj = '/%s/%s/%s' % \
              (update['account'], update['container'], update['obj'])
        headers_out = HeaderKeyDict(update['headers'])
        headers_out['user-agent'] = 'object-updater %s' % os.getpid()
        headers_out.setdefault('X-Backend-Storage-Policy-Index',
                               str(int(policy)))
        events = [spawn(self.object_update,
                        node, part, update['op'], obj, headers_out)
                  for node in nodes if node['id'] not in successes]
        success = True
        new_successes = False
        for event in events:
            event_success, node_id = event.wait()
            if event_success is True:
                successes.append(node_id)
                new_successes = True
            else:
                success = False
        if success:
            self.stats.successes += 1
            self.logger.increment('successes')
            self.logger.debug('Update sent for %(obj)s %(path)s',
                              {'obj': obj, 'path': update_path})
            self.stats.unlinks += 1
            self.logger.increment('unlinks')
            os.unlink(update_path)
        else:
            self.stats.failures += 1
            self.logger.increment('failures')
            self.logger.debug('Update failed for %(obj)s %(path)s',
                              {'obj': obj, 'path': update_path})
            if new_successes:
                update['successes'] = successes
                write_pickle(update, update_path, os.path.join(
                    device, get_tmp_dir(policy)))
Example #11
0
 def complete_rsync(self, drive, db_file, args):
     old_filename = os.path.join(self.root, drive, 'tmp', args[0])
     if os.path.exists(db_file):
         return HTTPNotFound()
     if not os.path.exists(old_filename):
         return HTTPNotFound()
     broker = self.broker_class(old_filename)
     broker.newid(args[0])
     renamer(old_filename, db_file)
     return HTTPNoContent()
Example #12
0
    def put(self, fd, tmppath, metadata, extension=''):
        """
        Finalize writing the file on disk, and renames it from the temp file to
        the real location.  This should be called after the data has been
        written to the temp file.

        :params fd: file descriptor of the temp file
        :param tmppath: path to the temporary file being used
        :param metadata: dictionary of metadata to be written
        :param extention: extension to be used when making the file
        """
        #Marker dir.
        if metadata[X_OBJECT_TYPE] == MARKER_DIR:
            if os.path.exists(os.path.join(self.datadir, self.obj)) and \
               not os.path.isdir(os.path.join(self.datadir, self.obj)):
                os.unlink(os.path.join(self.datadir, self.obj))
            mkdirs(os.path.join(self.datadir, self.obj))
            os.chown(os.path.join(self.datadir, self.obj), self.uid, self.gid)
            self.put_metadata(metadata)
            self.data_file = self.datadir + '/' + self.obj
            return True
        #Check if directory already exists.
        if self.is_dir:
            logging.error('Directory already exists %s/%s' % \
                          (self.datadir , self.obj))
            return False
        #metadata['name'] = self.name
        timestamp = normalize_timestamp(metadata[X_TIMESTAMP])
        write_metadata(fd, metadata)
        if X_CONTENT_LENGTH in metadata:
            self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH]))
        tpool.execute(os.fsync, fd)
        if self.obj_path:
            dir_objs = self.obj_path.split('/')
            tmp_path = ''
            if len(dir_objs):
                for dir_name in dir_objs:
                    if tmp_path:
                        tmp_path = tmp_path + '/' + dir_name
                    else:
                        tmp_path = dir_name
                    if not self.create_dir_object(tmp_path, metadata[X_TIMESTAMP]):
                        return False
                                       
        #print 'Gaurav put tmppath', tmppath, os.path.join(self.datadir,
                                                          #self.obj+extension)
        #invalidate_hash(os.path.dirname(self.datadir))
        renamer(tmppath, os.path.join(self.datadir,
                                      self.obj + extension))
        os.chown(os.path.join(self.datadir, self.obj + extension), \
              self.uid, self.gid)
        self.metadata = metadata
        self.data_file = self.datadir + '/' + self.obj + extension
        return True
Example #13
0
    def process_object_update(self, update_path, device, policy_idx):
        """
        Process the object information to be updated and update.

        :param update_path: path to pickled object update file
        :param device: path to device
        :param policy_idx: storage policy index of object update
        """
        try:
            update = pickle.load(open(update_path, 'rb'))
        except Exception:
            self.logger.exception(
                _('ERROR Pickle problem, quarantining %s'), update_path)
            self.logger.increment('quarantines')
            renamer(update_path, os.path.join(
                    device, 'quarantined', 'objects',
                    os.path.basename(update_path)))
            return
        successes = update.get('successes', [])
        part, nodes = self.get_container_ring().get_nodes(
            update['account'], update['container'])
        obj = '/%s/%s/%s' % \
              (update['account'], update['container'], update['obj'])
        success = True
        new_successes = False
        for node in nodes:
            if node['id'] not in successes:
                headers = update['headers'].copy()
                headers.setdefault('X-Backend-Storage-Policy-Index',
                                   str(policy_idx))
                status = self.object_update(node, part, update['op'], obj,
                                            headers)
                if not is_success(status) and status != HTTP_NOT_FOUND:
                    success = False
                else:
                    successes.append(node['id'])
                    new_successes = True
        if success:
            self.successes += 1
            self.logger.increment('successes')
            self.logger.debug('Update sent for %(obj)s %(path)s',
                              {'obj': obj, 'path': update_path})
            self.logger.increment("unlinks")
            os.unlink(update_path)
        else:
            self.failures += 1
            self.logger.increment('failures')
            self.logger.debug('Update failed for %(obj)s %(path)s',
                              {'obj': obj, 'path': update_path})
            if new_successes:
                update['successes'] = successes
                write_pickle(update, update_path, os.path.join(
                    device, get_tmp_dir(policy_idx)))
Example #14
0
 def find_and_process(self):
     src_filename = time.strftime(self.filename_format)
     working_dir = os.path.join(self.target_dir, '.stats_tmp')
     shutil.rmtree(working_dir, ignore_errors=True)
     mkdirs(working_dir)
     tmp_filename = os.path.join(working_dir, src_filename)
     hasher = hashlib.md5()
     with open(tmp_filename, 'wb') as statfile:
         # csv has the following columns:
         # Account Name, Container Count, Object Count, Bytes Used
         for device in os.listdir(self.devices):
             if self.mount_check and not check_mount(self.devices, device):
                 self.logger.error(
                     _("Device %s is not mounted, skipping.") % device)
                 continue
             accounts = os.path.join(self.devices,
                                     device,
                                     account_server_data_dir)
             if not os.path.exists(accounts):
                 self.logger.debug(_("Path %s does not exist, skipping.") %
                     accounts)
                 continue
             for root, dirs, files in os.walk(accounts, topdown=False):
                 for filename in files:
                     if filename.endswith('.db'):
                         db_path = os.path.join(root, filename)
                         broker = AccountBroker(db_path)
                         if not broker.is_deleted():
                             (account_name,
                             _junk, _junk, _junk,
                             container_count,
                             object_count,
                             bytes_used,
                             _junk, _junk) = broker.get_info()
                             line_data = '"%s",%d,%d,%d\n' % (
                                 account_name, container_count,
                                 object_count, bytes_used)
                             statfile.write(line_data)
                             hasher.update(line_data)
     file_hash = hasher.hexdigest()
     hash_index = src_filename.find('*')
     if hash_index < 0:
         # if there is no * in the target filename, the uploader probably
         # won't work because we are crafting a filename that doesn't
         # fit the pattern
         src_filename = '_'.join([src_filename, file_hash])
     else:
         parts = src_filename[:hash_index], src_filename[hash_index + 1:]
         src_filename = ''.join([parts[0], file_hash, parts[1]])
     renamer(tmp_filename, os.path.join(self.target_dir, src_filename))
     shutil.rmtree(working_dir, ignore_errors=True)
Example #15
0
def quarantine_db(object_file, server_type):
    """
    In the case that a corrupt file is found, move it to a quarantined area to
    allow replication to fix it.

    :param object_file: path to corrupt file
    :param server_type: type of file that is corrupt
                        ('container' or 'account')
    """
    object_dir = os.path.dirname(object_file)
    quarantine_dir = os.path.abspath(os.path.join(object_dir, '..',
        '..', '..', '..', 'quarantined', server_type + 's',
        os.path.basename(object_dir)))
    renamer(object_dir, quarantine_dir)
Example #16
0
    def put(self, fd, metadata, extension='.data'):
        """
        Finalize writing the file on disk, and renames it from the temp file to
        the real location.  This should be called after the data has been
        written to the temp file.

        :param fd: file descriptor of the temp file
        :param metadata: dictionary of metadata to be written
        :param extension: extension to be used when making the file
        """
        # Our caller will use '.data' here; we just ignore it since we map the
        # URL directly to the file system.
        extension = ''

        metadata = _adjust_metadata(metadata)

        if metadata[X_OBJECT_TYPE] == MARKER_DIR:
            if not self.data_file:
                self.data_file = os.path.join(self.datadir, self._obj)
                self._create_dir_object(self.data_file)
            self.put_metadata(metadata)
            return

        # Check if directory already exists.
        if self._is_dir:
            # FIXME: How can we have a directory and it not be marked as a
            # MARKER_DIR (see above)?
            msg = 'File object exists as a directory: %s' % self.data_file
            raise AlreadyExistsAsDir(msg)

        timestamp = normalize_timestamp(metadata[X_TIMESTAMP])
        write_metadata(self.tmppath, metadata)
        if X_CONTENT_LENGTH in metadata:
            self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH]))
        tpool.execute(os.fsync, fd)
        if self._obj_path:
            dir_objs = self._obj_path.split('/')
            assert len(dir_objs) >= 1
            tmp_path = self._container_path
            for dir_name in dir_objs:
                tmp_path = os.path.join(tmp_path, dir_name)
                self._create_dir_object(tmp_path)

        newpath = os.path.join(self.datadir, self._obj)
        renamer(self.tmppath, newpath)
        do_chown(newpath, self.uid, self.gid)
        self.metadata = metadata
        self.data_file = newpath
        self.filter_metadata()
        return
Example #17
0
def get_hashes(partition_dir, do_listdir=True, reclaim_age=ONE_WEEK):
    """
    Get a list of hashes for the suffix dir.  do_listdir causes it to mistrust
    the hash cache for suffix existence at the (unexpectedly high) cost of a
    listdir.  reclaim_age is just passed on to hash_suffix.

    :param partition_dir: absolute path of partition to get hashes for
    :param do_listdir: force existence check for all hashes in the partition
    :param reclaim_age: age at which to remove tombstones

    :returns: tuple of (number of suffix dirs hashed, dictionary of hashes)
    """

    def tpool_listdir(hashes, partition_dir):
        return dict(((suff, hashes.get(suff, None))
                     for suff in os.listdir(partition_dir)
                     if len(suff) == 3 and isdir(join(partition_dir, suff))))
    hashed = 0
    hashes_file = join(partition_dir, HASH_FILE)
    with lock_path(partition_dir):
        modified = False
        hashes = {}
        try:
            with open(hashes_file, 'rb') as fp:
                hashes = pickle.load(fp)
        except Exception:
            do_listdir = True
        if do_listdir:
            hashes = tpool.execute(tpool_listdir, hashes, partition_dir)
            modified = True
        for suffix, hash_ in hashes.items():
            if not hash_:
                suffix_dir = join(partition_dir, suffix)
                if os.path.exists(suffix_dir):
                    try:
                        hashes[suffix] = hash_suffix(suffix_dir, reclaim_age)
                        hashed += 1
                    except OSError:
                        logging.exception(_('Error hashing suffix'))
                        hashes[suffix] = None
                else:
                    del hashes[suffix]
                modified = True
                sleep()
        if modified:
            with open(hashes_file + '.tmp', 'wb') as fp:
                pickle.dump(hashes, fp, PICKLE_PROTOCOL)
            renamer(hashes_file + '.tmp', hashes_file)
        return hashed, hashes
Example #18
0
 def rsync_then_merge(self, drive, db_file, args):
     old_filename = os.path.join(self.root, drive, 'tmp', args[0])
     if not os.path.exists(db_file) or not os.path.exists(old_filename):
         return HTTPNotFound()
     new_broker = self.broker_class(old_filename)
     existing_broker = self.broker_class(db_file)
     point = -1
     objects = existing_broker.get_items_since(point, 1000)
     while len(objects):
         new_broker.merge_items(objects)
         point = objects[-1]['ROWID']
         objects = existing_broker.get_items_since(point, 1000)
         sleep()
     new_broker.newid(args[0])
     renamer(old_filename, db_file)
     return HTTPNoContent()
Example #19
0
 def finalize_put():
     # Write the metadata before calling fsync() so that both data and
     # metadata are flushed to disk.
     write_metadata(self.fd, metadata)
     # We call fsync() before calling drop_cache() to lower the amount
     # of redundant work the drop cache code will perform on the pages
     # (now that after fsync the pages will be all clean).
     fsync(self.fd)
     # From the Department of the Redundancy Department, make sure
     # we call drop_cache() after fsync() to avoid redundant work
     # (pages all clean).
     drop_buffer_cache(self.fd, 0, self.upload_size)
     invalidate_hash(os.path.dirname(self.disk_file.datadir))
     # After the rename completes, this object will be available for
     # other requests to reference.
     renamer(self.tmppath, os.path.join(self.disk_file.datadir, timestamp + extension))
Example #20
0
    def put(self, fd, tmppath, metadata,extension=''):
        
        if extension == '.ts':
            # TombStone marker (deleted)
            return True
        
        metadata[X_TYPE] = OBJECT
        
        if extension == '.meta':
            # Metadata recorded separately from the file
            self.meta_put_metadata(metadata)
            return True

        # Check if directory already exists.
        if self.is_dir:
            self.logger.error('Directory already exists %s/%s' % \
                          (self.datadir , self.obj))
            return False

        meta_write_metadata(self.metafile, metadata)
        
        if X_CONTENT_LENGTH in metadata:
            self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH]))
        tpool.execute(os.fsync, fd)
        
        if self.obj_path:
            dir_objs = self.obj_path.split('/')
            tmp_path = ''
            if len(dir_objs):
                for dir_name in dir_objs:
                    if tmp_path:
                        tmp_path = tmp_path + '/' + dir_name
                    else:
                        tmp_path = dir_name
                    if not self.create_dir_object(os.path.join(self.container_path,
                            tmp_path)):
                        self.logger.error("Failed in subdir %s",\
                                        os.path.join(self.container_path,tmp_path))
                        return False

        renamer(tmppath, os.path.join(self.datadir,
                                      self.obj))
        
        do_chown(os.path.join(self.datadir, self.obj), self.uid, self.gid)
        self.metadata = metadata
        
        return True
Example #21
0
 def rsync_then_merge(self, drive, db_file, args):
     old_filename = os.path.join(self.root, drive, 'tmp', args[0])
     if not os.path.exists(db_file) or not os.path.exists(old_filename):
         return HTTPNotFound()
     new_broker = self.broker_class(old_filename)
     existing_broker = self.broker_class(db_file)
     point = -1
     objects = existing_broker.get_items_since(point, 1000)
     while len(objects):
         new_broker.merge_items(objects)
         point = objects[-1]['ROWID']
         objects = existing_broker.get_items_since(point, 1000)
         sleep()
     new_broker.newid(args[0])
     new_broker.update_metadata(existing_broker.metadata)
     renamer(old_filename, db_file)
     return HTTPNoContent()
Example #22
0
    def process_object_update(self, update_path, device):
        """
        Process the object information to be updated and update.

        :param update_path: path to pickled object update file
        :param device: path to device
        """
        try:
            update = pickle.load(open(update_path, 'rb'))
        except Exception:
            self.logger.exception(
                _('ERROR Pickle problem, quarantining %s'), update_path)
            self.logger.increment('quarantines')
            renamer(update_path, os.path.join(device,
                'quarantined', 'objects', os.path.basename(update_path)))
            return
        successes = update.get('successes', [])
        part, nodes = self.get_container_ring().get_nodes(
                                update['account'], update['container'])
        obj = '/%s/%s/%s' % \
              (update['account'], update['container'], update['obj'])
        success = True
        new_successes = False
        for node in nodes:
            if node['id'] not in successes:
                status = self.object_update(node, part, update['op'], obj,
                                        update['headers'])
                if not is_success(status) and status != HTTP_NOT_FOUND:
                    success = False
                else:
                    successes.append(node['id'])
                    new_successes = True
        if success:
            self.successes += 1
            self.logger.increment('successes')
            self.logger.debug(_('Update sent for %(obj)s %(path)s'),
                {'obj': obj, 'path': update_path})
            os.unlink(update_path)
        else:
            self.failures += 1
            self.logger.increment('failures')
            self.logger.debug(_('Update failed for %(obj)s %(path)s'),
                {'obj': obj, 'path': update_path})
            if new_successes:
                update['successes'] = successes
                write_pickle(update, update_path, os.path.join(device, 'tmp'))
Example #23
0
 def _finalize_put(self, metadata, target_path):
     # Write the metadata before calling fsync() so that both data and
     # metadata are flushed to disk.
     write_metadata(self._fd, metadata)
     # We call fsync() before calling drop_cache() to lower the amount of
     # redundant work the drop cache code will perform on the pages (now
     # that after fsync the pages will be all clean).
     fsync(self._fd)
     # From the Department of the Redundancy Department, make sure we call
     # drop_cache() after fsync() to avoid redundant work (pages all
     # clean).
     drop_buffer_cache(self._fd, 0, self._upload_size)
     invalidate_hash(dirname(self._datadir))
     # After the rename completes, this object will be available for other
     # requests to reference.
     renamer(self._tmppath, target_path)
     hash_cleanup_listdir(self._datadir)
Example #24
0
 def _finalize_put(self, metadata, target_path):
     # Write the metadata before calling fsync() so that both data and
     # metadata are flushed to disk.
     write_metadata(self._fd, metadata)
     # We call fsync() before calling drop_cache() to lower the amount of
     # redundant work the drop cache code will perform on the pages (now
     # that after fsync the pages will be all clean).
     fsync(self._fd)
     # From the Department of the Redundancy Department, make sure we call
     # drop_cache() after fsync() to avoid redundant work (pages all
     # clean).
     drop_buffer_cache(self._fd, 0, self._upload_size)
     invalidate_hash(dirname(self._datadir))
     # After the rename completes, this object will be available for other
     # requests to reference.
     renamer(self._tmppath, target_path)
     hash_cleanup_listdir(self._datadir)
Example #25
0
    def _finalize_put(self, metadata, target_path):
	#在调用fsync()之前写元数据,因此,元数据和数据都刷到磁盘上
        # Write the metadata before calling fsync() so that both data and
        # metadata are flushed to disk.
        write_metadata(self.fd, metadata)
        # We call fsync() before calling drop_cache() to lower the amount
        # of redundant work the drop cache code will perform on the pages
        # (now that after fsync the pages will be all clean).
        fsync(self.fd)
        # From the Department of the Redundancy Department, make sure
        # we call drop_cache() after fsync() to avoid redundant work
        # (pages all clean).
        drop_buffer_cache(self.fd, 0, self.upload_size)
        invalidate_hash(dirname(self.disk_file.datadir))
        # After the rename completes, this object will be available for
        # other requests to reference.
        renamer(self.tmppath, target_path)
        hash_cleanup_listdir(self.disk_file.datadir)
Example #26
0
 def finalize_put():
     # Write the metadata before calling fsync() so that both data and
     # metadata are flushed to disk.
     write_metadata(self.fd, metadata)
     # We call fsync() before calling drop_cache() to lower the amount
     # of redundant work the drop cache code will perform on the pages
     # (now that after fsync the pages will be all clean).
     fsync(self.fd)
     # From the Department of the Redundancy Department, make sure
     # we call drop_cache() after fsync() to avoid redundant work
     # (pages all clean).
     drop_buffer_cache(self.fd, 0, self.upload_size)
     invalidate_hash(dirname(self.disk_file.datadir))
     # After the rename completes, this object will be available for
     # other requests to reference.
     renamer(self.tmppath,
             join(self.disk_file.datadir, timestamp + extension))
     hash_cleanup_listdir(self.disk_file.datadir)
Example #27
0
    def process_object_update(self, update_path, device):
        """
        Process the object information to be updated and update.

        :param update_path: path to pickled object update file
        :param device: path to device
        """
        try:
            update = pickle.load(open(update_path, 'rb'))
        except Exception:
            self.logger.exception(
                _('ERROR Pickle problem, quarantining %s'), update_path)
            renamer(update_path, os.path.join(device,
                'quarantined', 'objects', os.path.basename(update_path)))
            return
        successes = update.get('successes', [])
        part, nodes = self.get_container_ring().get_nodes(
                                update['account'], update['container'])
        obj = '/%s/%s/%s' % \
              (update['account'], update['container'], update['obj'])
        success = True
        new_successes = False
        for node in nodes:
            if node['id'] not in successes:
                status = self.object_update(node, part, update['op'], obj,
                                        update['headers'])
                if not is_success(status) and status != HTTP_NOT_FOUND:
                    success = False
                else:
                    successes.append(node['id'])
                    new_successes = True
        if success:
            self.successes += 1
            self.logger.debug(_('Update sent for %(obj)s %(path)s'),
                {'obj': obj, 'path': update_path})
            os.unlink(update_path)
        else:
            self.failures += 1
            self.logger.debug(_('Update failed for %(obj)s %(path)s'),
                {'obj': obj, 'path': update_path})
            if new_successes:
                update['successes'] = successes
                write_pickle(update, update_path, os.path.join(device, 'tmp'))
Example #28
0
def quarantine_db(object_file, server_type):
    """
    In the case that a corrupt file is found, move it to a quarantined area to
    allow replication to fix it.

    :param object_file: path to corrupt file
    :param server_type: type of file that is corrupt
                        ('container' or 'account')
    """
    object_dir = os.path.dirname(object_file)
    quarantine_dir = os.path.abspath(os.path.join(object_dir, '..',
        '..', '..', '..', 'quarantined', server_type + 's',
        os.path.basename(object_dir)))
    try:
        renamer(object_dir, quarantine_dir)
    except OSError, e:
        if e.errno not in (errno.EEXIST, errno.ENOTEMPTY):
            raise
        quarantine_dir = "%s-%s" % (quarantine_dir, uuid.uuid4().hex)
        renamer(object_dir, quarantine_dir)
Example #29
0
    def put(self, fd, tmppath, metadata, extension='.data'):
        """
        Finalize writing the file on disk, and renames it from the temp file to
        the real location.  This should be called after the data has been
        written to the temp file.

        :params fd: file descriptor of the temp file
        :param tmppath: path to the temporary file being used
        :param metadata: dictionary of metadata to be written
        :param extention: extension to be used when making the file
        """
        metadata['name'] = self.name
        timestamp = normalize_timestamp(metadata['X-Timestamp'])
        write_metadata(fd, metadata)
        if 'Content-Length' in metadata:
            self.drop_cache(fd, 0, int(metadata['Content-Length']))
        tpool.execute(os.fsync, fd)
        invalidate_hash(os.path.dirname(self.datadir))
        renamer(tmppath, os.path.join(self.datadir, timestamp + extension))
        self.metadata = metadata
Example #30
0
    def put(self, fd, metadata, extension=".data"):
        """
        Finalize writing the file on disk, and renames it from the temp file to
        the real location.  This should be called after the data has been
        written to the temp file.

        :param fd: file descriptor of the temp file
        :param metadata: dictionary of metadata to be written
        :param extension: extension to be used when making the file
        """
        assert self.tmppath is not None
        metadata["name"] = self.name
        timestamp = normalize_timestamp(metadata["X-Timestamp"])
        write_metadata(fd, metadata)
        if "Content-Length" in metadata:
            self.drop_cache(fd, 0, int(metadata["Content-Length"]))
        tpool.execute(fsync, fd)
        invalidate_hash(os.path.dirname(self.datadir))
        renamer(self.tmppath, os.path.join(self.datadir, timestamp + extension))
        self.metadata = metadata
Example #31
0
def quarantine_db(object_file, server_type):
    """
    In the case that a corrupt file is found, move it to a quarantined area to
    allow replication to fix it.

    :param object_file: path to corrupt file
    :param server_type: type of file that is corrupt
                        ('container' or 'account')
    """
    object_dir = os.path.dirname(object_file)
    quarantine_dir = os.path.abspath(
        os.path.join(object_dir, '..', '..', '..', '..', 'quarantined',
                     server_type + 's', os.path.basename(object_dir)))
    try:
        renamer(object_dir, quarantine_dir, fsync=False)
    except OSError as e:
        if e.errno not in (errno.EEXIST, errno.ENOTEMPTY):
            raise
        quarantine_dir = "%s-%s" % (quarantine_dir, uuid.uuid4().hex)
        renamer(object_dir, quarantine_dir, fsync=False)
Example #32
0
    def put(self, fd, tmppath, metadata, extension='.data'):
        """
        Finalize writing the file on disk, and renames it from the temp file to
        the real location.  This should be called after the data has been
        written to the temp file.

        :params fd: file descriptor of the temp file
        :param tmppath: path to the temporary file being used
        :param metadata: dictionary of metadata to be written
        :param extension: extension to be used when making the file
        """
        metadata['name'] = self.name
        timestamp = normalize_timestamp(metadata['X-Timestamp'])
        write_metadata(fd, metadata)
        if 'Content-Length' in metadata:
            self.drop_cache(fd, 0, int(metadata['Content-Length']))
        tpool.execute(fsync, fd)
        invalidate_hash(os.path.dirname(self.datadir))
        renamer(tmppath, os.path.join(self.datadir, timestamp + extension))
        self.metadata = metadata
Example #33
0
 def possibly_quarantine(self, exc_type, exc_value, exc_traceback):
     """
     Checks the exception info to see if it indicates a quarantine situation
     (malformed or corrupted database). If not, the original exception will
     be reraised. If so, the database will be quarantined and a new
     sqlite3.DatabaseError will be raised indicating the action taken.
     """
     if 'database disk image is malformed' in str(exc_value):
         exc_hint = 'malformed'
     elif 'malformed database schema' in str(exc_value):
         exc_hint = 'malformed'
     elif ' is not a database' in str(exc_value):
         # older versions said 'file is not a database'
         # now 'file is encrypted or is not a database'
         exc_hint = 'corrupted'
     elif 'disk I/O error' in str(exc_value):
         exc_hint = 'disk error while accessing'
     else:
         six.reraise(exc_type, exc_value, exc_traceback)
     prefix_path = os.path.dirname(self.db_dir)
     partition_path = os.path.dirname(prefix_path)
     dbs_path = os.path.dirname(partition_path)
     device_path = os.path.dirname(dbs_path)
     quar_path = os.path.join(device_path, 'quarantined',
                              self.db_type + 's',
                              os.path.basename(self.db_dir))
     try:
         renamer(self.db_dir, quar_path, fsync=False)
     except OSError as e:
         if e.errno not in (errno.EEXIST, errno.ENOTEMPTY):
             raise
         quar_path = "%s-%s" % (quar_path, uuid4().hex)
         renamer(self.db_dir, quar_path, fsync=False)
     detail = _('Quarantined %(db_dir)s to %(quar_path)s due to '
                '%(exc_hint)s database') % {
                    'db_dir': self.db_dir,
                    'quar_path': quar_path,
                    'exc_hint': exc_hint
                }
     self.logger.error(detail)
     raise sqlite3.DatabaseError(detail)
Example #34
0
    def quarantine(self):
        """
        In the case that a file is corrupted, move it to a quarantined
        area to allow replication to fix it.

        :returns: if quarantine is successful, path to quarantined
                  directory otherwise None
        """
        #from swift.obj.replicator quarantine_renamer, get_hashes
        #if not (self.is_deleted() or self.quarantined_dir):
        #    self.quarantined_dir = quarantine_renamer(self.device_path,
        #                                              self.data_file)
        #    self.logger.increment('quarantines')
        #    return self.quarantined_dir

        # stub for now XXX
        # P3
        fp = open("/tmp/dump","a")
        print >>fp, "posix quarantine", self.data_file + ".quar"
        fp.close()
        renamer(self.data_file, self.data_file + ".quar")
    def find_and_process(self):
        src_filename = datetime.now(self.time_zone).strftime(
            self.filename_format)
        working_dir = os.path.join(self.target_dir,
                                   '.%-stats_tmp' % self.stats_type)
        shutil.rmtree(working_dir, ignore_errors=True)
        mkdirs(working_dir)
        tmp_filename = os.path.join(working_dir, src_filename)
        hasher = hashlib.md5()
        try:
            with open(tmp_filename, 'wb') as statfile:
                statfile.write(self.get_header())
                for device in os.listdir(self.devices):
                    if self.mount_check and not check_mount(
                            self.devices, device):
                        self.logger.error(
                            _("Device %s is not mounted, skipping.") % device)
                        continue
                    db_dir = os.path.join(self.devices, device, self.data_dir)
                    if not os.path.exists(db_dir):
                        self.logger.debug(
                            _("Path %s does not exist, skipping.") % db_dir)
                        continue
                    for root, dirs, files in os.walk(db_dir, topdown=False):
                        for filename in files:
                            if filename.endswith('.db'):
                                db_path = os.path.join(root, filename)
                                try:
                                    line_data = self.get_data(db_path)
                                except sqlite3.Error, err:
                                    self.logger.info(
                                        _("Error accessing db %s: %s") %
                                        (db_path, err))
                                    continue
                                if line_data:
                                    statfile.write(line_data)
                                    hasher.update(line_data)

            src_filename += hasher.hexdigest()
            renamer(tmp_filename, os.path.join(self.target_dir, src_filename))
Example #36
0
 def rsync_then_merge(self, drive, db_file, args):
     tmp_filename = os.path.join(self.root, drive, 'tmp', args[0])
     if self._abort_rsync_then_merge(db_file, tmp_filename):
         return HTTPNotFound()
     new_broker = self.broker_class(tmp_filename)
     existing_broker = self.broker_class(db_file)
     db_file = existing_broker.db_file
     point = -1
     objects = existing_broker.get_items_since(point, 1000)
     while len(objects):
         new_broker.merge_items(objects)
         point = objects[-1]['ROWID']
         objects = existing_broker.get_items_since(point, 1000)
         sleep()
     new_broker.merge_syncs(existing_broker.get_syncs())
     self._post_rsync_then_merge_hook(existing_broker, new_broker)
     new_broker.newid(args[0])
     new_broker.update_metadata(existing_broker.metadata)
     if self._abort_rsync_then_merge(db_file, tmp_filename):
         return HTTPNotFound()
     renamer(tmp_filename, db_file)
     return HTTPNoContent()
Example #37
0
 def quarantine(self, reason):
     """
     The database will be quarantined and a
     sqlite3.DatabaseError will be raised indicating the action taken.
     """
     device_path = self.get_device_path()
     quar_path = os.path.join(device_path, 'quarantined',
                              self.db_type + 's',
                              os.path.basename(self.db_dir))
     try:
         renamer(self.db_dir, quar_path, fsync=False)
     except OSError as e:
         if e.errno not in (errno.EEXIST, errno.ENOTEMPTY):
             raise
         quar_path = "%s-%s" % (quar_path, uuid4().hex)
         renamer(self.db_dir, quar_path, fsync=False)
     detail = _('Quarantined %(db_dir)s to %(quar_path)s due to '
                '%(reason)s') % {'db_dir': self.db_dir,
                                 'quar_path': quar_path,
                                 'reason': reason}
     self.logger.error(detail)
     raise sqlite3.DatabaseError(detail)
Example #38
0
 def rsync_then_merge(self, drive, db_file, args):
     tmp_filename = os.path.join(self.root, drive, 'tmp', args[0])
     if self._abort_rsync_then_merge(db_file, tmp_filename):
         return HTTPNotFound()
     new_broker = self.broker_class(tmp_filename, logger=self.logger)
     existing_broker = self.broker_class(db_file, logger=self.logger)
     db_file = existing_broker.db_file
     point = -1
     objects = existing_broker.get_items_since(point, 1000)
     while len(objects):
         new_broker.merge_items(objects)
         point = objects[-1]['ROWID']
         objects = existing_broker.get_items_since(point, 1000)
         sleep()
     new_broker.merge_syncs(existing_broker.get_syncs())
     self._post_rsync_then_merge_hook(existing_broker, new_broker)
     new_broker.newid(args[0])
     new_broker.update_metadata(existing_broker.metadata)
     if self._abort_rsync_then_merge(db_file, tmp_filename):
         return HTTPNotFound()
     renamer(tmp_filename, db_file)
     return HTTPNoContent()
Example #39
0
 def possibly_quarantine(self, exc_type, exc_value, exc_traceback):
     """
     Checks the exception info to see if it indicates a quarantine situation
     (malformed or corrupted database). If not, the original exception will
     be reraised. If so, the database will be quarantined and a new
     sqlite3.DatabaseError will be raised indicating the action taken.
     """
     if 'database disk image is malformed' in str(exc_value):
         exc_hint = 'malformed'
     elif 'malformed database schema' in str(exc_value):
         exc_hint = 'malformed'
     elif ' is not a database' in str(exc_value):
         # older versions said 'file is not a database'
         # now 'file is encrypted or is not a database'
         exc_hint = 'corrupted'
     elif 'disk I/O error' in str(exc_value):
         exc_hint = 'disk error while accessing'
     else:
         six.reraise(exc_type, exc_value, exc_traceback)
     prefix_path = os.path.dirname(self.db_dir)
     partition_path = os.path.dirname(prefix_path)
     dbs_path = os.path.dirname(partition_path)
     device_path = os.path.dirname(dbs_path)
     quar_path = os.path.join(device_path, 'quarantined',
                              self.db_type + 's',
                              os.path.basename(self.db_dir))
     try:
         renamer(self.db_dir, quar_path, fsync=False)
     except OSError as e:
         if e.errno not in (errno.EEXIST, errno.ENOTEMPTY):
             raise
         quar_path = "%s-%s" % (quar_path, uuid4().hex)
         renamer(self.db_dir, quar_path, fsync=False)
     detail = _('Quarantined %(db_dir)s to %(quar_path)s due to '
                '%(exc_hint)s database') % {'db_dir': self.db_dir,
                                            'quar_path': quar_path,
                                            'exc_hint': exc_hint}
     self.logger.error(detail)
     raise sqlite3.DatabaseError(detail)
Example #40
0
def quarantine_renamer(device_path, corrupted_file_path):
    """
    In the case that a file is corrupted, move it to a quarantined
    area to allow replication to fix it.

    :params device_path: The path to the device the corrupted file is on.
    :params corrupted_file_path: The path to the file you want quarantined.

    :returns: path (str) of directory the file was moved to
    :raises OSError: re-raises non errno.EEXIST / errno.ENOTEMPTY
                     exceptions from rename
    """
    from_dir = dirname(corrupted_file_path)
    to_dir = join(device_path, 'quarantined', 'objects', basename(from_dir))
    invalidate_hash(dirname(from_dir))
    try:
        renamer(from_dir, to_dir)
    except OSError, e:
        if e.errno not in (errno.EEXIST, errno.ENOTEMPTY):
            raise
        to_dir = "%s-%s" % (to_dir, uuid.uuid4().hex)
        renamer(from_dir, to_dir)
Example #41
0
def quarantine_renamer(device_path, corrupted_file_path):
    """
    In the case that a file is corrupted, move it to a quarantined
    area to allow replication to fix it.

    :params device_path: The path to the device the corrupted file is on.
    :params corrupted_file_path: The path to the file you want quarantined.

    :returns: path (str) of directory the file was moved to
    :raises OSError: re-raises non errno.EEXIST / errno.ENOTEMPTY
                     exceptions from rename
    """
    from_dir = dirname(corrupted_file_path)
    to_dir = join(device_path, 'quarantined', 'objects', basename(from_dir))
    invalidate_hash(dirname(from_dir))
    try:
        renamer(from_dir, to_dir)
    except OSError, e:
        if e.errno not in (errno.EEXIST, errno.ENOTEMPTY):
            raise
        to_dir = "%s-%s" % (to_dir, uuid.uuid4().hex)
        renamer(from_dir, to_dir)
Example #42
0
def invalidate_hash(suffix_dir):
    """
    Invalidates the hash for a suffix_dir in the partition's hashes file.

    :param suffix_dir: absolute path to suffix dir whose hash needs
                       invalidating
    """

    suffix = os.path.basename(suffix_dir)
    partition_dir = os.path.dirname(suffix_dir)
    hashes_file = join(partition_dir, HASH_FILE)
    with lock_path(partition_dir):
        try:
            with open(hashes_file, 'rb') as fp:
                hashes = pickle.load(fp)
            if suffix in hashes and not hashes[suffix]:
                return
        except Exception:
            return
        hashes[suffix] = None
        with open(hashes_file + '.tmp', 'wb') as fp:
            pickle.dump(hashes, fp, PICKLE_PROTOCOL)
        renamer(hashes_file + '.tmp', hashes_file)
Example #43
0
class AuditorWorker(object):
    """Walk through file system to audit object"""
    def __init__(self, conf, zero_byte_only_at_fps=0):
        self.conf = conf
        self.logger = get_logger(conf, log_route='object-auditor')
        self.devices = conf.get('devices', '/srv/node')
        self.mount_check = conf.get('mount_check', 'true').lower() in \
            TRUE_VALUES
        self.max_files_per_second = float(conf.get('files_per_second', 20))
        self.max_bytes_per_second = float(
            conf.get('bytes_per_second', 10000000))
        self.auditor_type = 'ALL'
        self.zero_byte_only_at_fps = zero_byte_only_at_fps
        if self.zero_byte_only_at_fps:
            self.max_files_per_second = float(self.zero_byte_only_at_fps)
            self.auditor_type = 'ZBF'
        self.log_time = int(conf.get('log_time', 3600))
        self.files_running_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        self.passes = 0
        self.quarantines = 0
        self.errors = 0

    def audit_all_objects(self, mode='once'):
        self.logger.info(
            _('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type)))
        begin = reported = time.time()
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        files_running_time = 0
        all_locs = audit_location_generator(self.devices,
                                            object_server.DATADIR,
                                            mount_check=self.mount_check,
                                            logger=self.logger)
        for path, device, partition in all_locs:
            self.object_audit(path, device, partition)
            self.files_running_time = ratelimit_sleep(
                self.files_running_time, self.max_files_per_second)
            self.total_files_processed += 1
            if time.time() - reported >= self.log_time:
                self.logger.info(
                    _('Object audit (%(type)s). '
                      'Since %(start_time)s: Locally: %(passes)d passed, '
                      '%(quars)d quarantined, %(errors)d errors '
                      'files/sec: %(frate).2f , bytes/sec: %(brate).2f') % {
                          'type': self.auditor_type,
                          'start_time': time.ctime(reported),
                          'passes': self.passes,
                          'quars': self.quarantines,
                          'errors': self.errors,
                          'frate': self.passes / (time.time() - reported),
                          'brate': self.bytes_processed /
                          (time.time() - reported)
                      })
                reported = time.time()
                self.passes = 0
                self.quarantines = 0
                self.errors = 0
                self.bytes_processed = 0
        elapsed = time.time() - begin
        self.logger.info(
            _('Object audit (%(type)s) "%(mode)s" mode '
              'completed: %(elapsed).02fs. '
              'Total files/sec: %(frate).2f , '
              'Total bytes/sec: %(brate).2f ') % {
                  'type': self.auditor_type,
                  'mode': mode,
                  'elapsed': elapsed,
                  'frate': self.total_files_processed / elapsed,
                  'brate': self.total_bytes_processed / elapsed
              })

    def object_audit(self, path, device, partition):
        """
        Audits the given object path.

        :param path: a path to an object
        :param device: the device the path is on
        :param partition: the partition the path is on
        """
        try:
            if not path.endswith('.data'):
                return
            try:
                name = object_server.read_metadata(path)['name']
            except Exception, exc:
                raise AuditException('Error when reading metadata: %s' % exc)
            _junk, account, container, obj = name.split('/', 3)
            df = object_server.DiskFile(self.devices,
                                        device,
                                        partition,
                                        account,
                                        container,
                                        obj,
                                        keep_data_fp=True)
            if df.data_file is None:
                # file is deleted, we found the tombstone
                return
            obj_size = os.path.getsize(df.data_file)
            if obj_size != int(df.metadata['Content-Length']):
                raise AuditException('Content-Length of %s does not match '
                                     'file size of %s' %
                                     (int(df.metadata['Content-Length']),
                                      os.path.getsize(df.data_file)))
            if self.zero_byte_only_at_fps and obj_size:
                return
            etag = md5()
            for chunk in df:
                self.bytes_running_time = ratelimit_sleep(
                    self.bytes_running_time,
                    self.max_bytes_per_second,
                    incr_by=len(chunk))
                etag.update(chunk)
                self.bytes_processed += len(chunk)
                self.total_bytes_processed += len(chunk)
            etag = etag.hexdigest()
            if etag != df.metadata['ETag']:
                raise AuditException("ETag of %s does not match file's md5 of "
                                     "%s" % (df.metadata['ETag'], etag))
        except AuditException, err:
            self.quarantines += 1
            self.logger.error(
                _('ERROR Object %(obj)s failed audit and will '
                  'be quarantined: %(err)s'), {
                      'obj': path,
                      'err': err
                  })
            object_dir = os.path.dirname(path)
            invalidate_hash(os.path.dirname(object_dir))
            renamer_path = os.path.dirname(path)
            to_path = os.path.join(self.devices, device, 'quarantined',
                                   'objects', os.path.basename(renamer_path))
            try:
                renamer(renamer_path, to_path)
            except OSError, e:
                if e.errno == errno.EEXIST:
                    to_path = "%s-%s" % (to_path, uuid.uuid4().hex)
                    renamer(renamer_path, to_path)
Example #44
0
    def put(self, fd, tmppath, metadata, extension=''):
        """
        Finalize writing the file on disk, and renames it from the temp file to
        the real location.  This should be called after the data has been
        written to the temp file.

        :params fd: file descriptor of the temp file
        :param tmppath: path to the temporary file being used
        :param metadata: dictionary of metadata to be written
        :param extention: extension to be used when making the file
        """
        if extension == '.ts':
            # TombStone marker (deleted)
            return True

        # Fix up the metadata to ensure it has a proper value for the
        # Content-Type metadata, as well as an X_TYPE and X_OBJECT_TYPE
        # metadata values.

        content_type = metadata['Content-Type']
        if not content_type:
            metadata['Content-Type'] = FILE_TYPE
            x_object_type = FILE
        else:
            x_object_type = MARKER_DIR if content_type.lower(
            ) == DIR_TYPE else FILE
        metadata[X_TYPE] = OBJECT
        metadata[X_OBJECT_TYPE] = x_object_type

        if extension == '.meta':
            # Metadata recorded separately from the file
            self.put_metadata(metadata)
            return True

        extension = ''

        if metadata[X_OBJECT_TYPE] == MARKER_DIR:
            self.create_dir_object(os.path.join(self.datadir, self.obj))
            self.put_metadata(metadata)
            self.data_file = self.datadir + '/' + self.obj
            return True

        # Check if directory already exists.
        if self.is_dir:
            self.logger.error('Directory already exists %s/%s' % \
                          (self.datadir , self.obj))
            return False

        timestamp = normalize_timestamp(metadata[X_TIMESTAMP])
        write_metadata(tmppath, metadata)
        if X_CONTENT_LENGTH in metadata:
            self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH]))
        tpool.execute(os.fsync, fd)
        if self.obj_path:
            dir_objs = self.obj_path.split('/')
            tmp_path = ''
            if len(dir_objs):
                for dir_name in dir_objs:
                    if tmp_path:
                        tmp_path = tmp_path + '/' + dir_name
                    else:
                        tmp_path = dir_name
                    if not self.create_dir_object(
                            os.path.join(self.container_path, tmp_path)):
                        self.logger.error("Failed in subdir %s",\
                                        os.path.join(self.container_path,tmp_path))
                        return False

        renamer(tmppath, os.path.join(self.datadir, self.obj + extension))
        do_chown(os.path.join(self.datadir, self.obj + extension), \
              self.uid, self.gid)
        self.metadata = metadata
        self.data_file = self.datadir + '/' + self.obj + extension
        return True
Example #45
0
 def revive_drive(self, device):
     disabled_name = device + "X"
     if os.path.isdir(disabled_name):
         renamer(disabled_name, device)
     else:
         os.system('sudo mount %s' % device)
Example #46
0
 def kill_drive(self, device):
     if os.path.ismount(device):
         os.system('sudo umount %s' % device)
     else:
         renamer(device, device + "X")
Example #47
0
    def process_object_update(self, update_path, device, policy):
        """
        Process the object information to be updated and update.

        :param update_path: path to pickled object update file
        :param device: path to device
        :param policy: storage policy of object update
        """
        try:
            update = pickle.load(open(update_path, 'rb'))
        except Exception as e:
            if getattr(e, 'errno', None) == errno.ENOENT:
                return
            self.logger.exception(_('ERROR Pickle problem, quarantining %s'),
                                  update_path)
            self.stats.quarantines += 1
            self.logger.increment('quarantines')
            target_path = os.path.join(device, 'quarantined', 'objects',
                                       os.path.basename(update_path))
            renamer(update_path, target_path, fsync=False)
            try:
                # If this was the last async_pending in the directory,
                # then this will succeed. Otherwise, it'll fail, and
                # that's okay.
                os.rmdir(os.path.dirname(update_path))
            except OSError:
                pass
            return

        def do_update():
            successes = update.get('successes', [])
            headers_out = HeaderKeyDict(update['headers'].copy())
            headers_out['user-agent'] = 'object-updater %s' % os.getpid()
            headers_out.setdefault('X-Backend-Storage-Policy-Index',
                                   str(int(policy)))
            headers_out.setdefault('X-Backend-Accept-Redirect', 'true')
            headers_out.setdefault('X-Backend-Accept-Quoted-Location', 'true')
            container_path = update.get('container_path')
            if container_path:
                acct, cont = split_path('/' + container_path, minsegs=2)
            else:
                acct, cont = update['account'], update['container']
            part, nodes = self.get_container_ring().get_nodes(acct, cont)
            obj = '/%s/%s/%s' % (acct, cont, update['obj'])
            events = [
                spawn(self.object_update, node, part, update['op'], obj,
                      headers_out) for node in nodes
                if node['id'] not in successes
            ]
            success = True
            new_successes = rewrite_pickle = False
            redirect = None
            redirects = set()
            for event in events:
                event_success, node_id, redirect = event.wait()
                if event_success is True:
                    successes.append(node_id)
                    new_successes = True
                else:
                    success = False
                if redirect:
                    redirects.add(redirect)

            if success:
                self.stats.successes += 1
                self.logger.increment('successes')
                self.logger.debug('Update sent for %(obj)s %(path)s', {
                    'obj': obj,
                    'path': update_path
                })
                self.stats.unlinks += 1
                self.logger.increment('unlinks')
                os.unlink(update_path)
                try:
                    # If this was the last async_pending in the directory,
                    # then this will succeed. Otherwise, it'll fail, and
                    # that's okay.
                    os.rmdir(os.path.dirname(update_path))
                except OSError:
                    pass
            elif redirects:
                # erase any previous successes
                update.pop('successes', None)
                redirect = max(redirects, key=lambda x: x[-1])[0]
                redirect_history = update.setdefault('redirect_history', [])
                if redirect in redirect_history:
                    # force next update to be sent to root, reset history
                    update['container_path'] = None
                    update['redirect_history'] = []
                else:
                    update['container_path'] = redirect
                    redirect_history.append(redirect)
                self.stats.redirects += 1
                self.logger.increment("redirects")
                self.logger.debug(
                    'Update redirected for %(obj)s %(path)s to %(shard)s', {
                        'obj': obj,
                        'path': update_path,
                        'shard': update['container_path']
                    })
                rewrite_pickle = True
            else:
                self.stats.failures += 1
                self.logger.increment('failures')
                self.logger.debug('Update failed for %(obj)s %(path)s', {
                    'obj': obj,
                    'path': update_path
                })
                if new_successes:
                    update['successes'] = successes
                    rewrite_pickle = True

            return rewrite_pickle, redirect

        rewrite_pickle, redirect = do_update()
        if redirect:
            # make one immediate retry to the redirect location
            rewrite_pickle, redirect = do_update()
        if rewrite_pickle:
            write_pickle(update, update_path,
                         os.path.join(device, get_tmp_dir(policy)))
Example #48
0
    def process_object_update(self, update_path, device, policy):
        """
        Process the object information to be updated and update.

        :param update_path: path to pickled object update file
        :param device: path to device
        :param policy: storage policy of object update
        """
        try:
            update = pickle.load(open(update_path, 'rb'))
        except Exception:
            self.logger.exception(_('ERROR Pickle problem, quarantining %s'),
                                  update_path)
            self.stats.quarantines += 1
            self.logger.increment('quarantines')
            target_path = os.path.join(device, 'quarantined', 'objects',
                                       os.path.basename(update_path))
            renamer(update_path, target_path, fsync=False)
            return
        successes = update.get('successes', [])
        part, nodes = self.get_container_ring().get_nodes(
            update['account'], update['container'])
        obj = '/%s/%s/%s' % \
              (update['account'], update['container'], update['obj'])
        headers_out = HeaderKeyDict(update['headers'])
        headers_out['user-agent'] = 'object-updater %s' % os.getpid()
        headers_out.setdefault('X-Backend-Storage-Policy-Index',
                               str(int(policy)))
        events = [
            spawn(self.object_update, node, part, update['op'], obj,
                  headers_out) for node in nodes if node['id'] not in successes
        ]
        success = True
        new_successes = False
        for event in events:
            event_success, node_id = event.wait()
            if event_success is True:
                successes.append(node_id)
                new_successes = True
            else:
                success = False
        if success:
            self.stats.successes += 1
            self.logger.increment('successes')
            self.logger.debug('Update sent for %(obj)s %(path)s', {
                'obj': obj,
                'path': update_path
            })
            self.stats.unlinks += 1
            self.logger.increment('unlinks')
            os.unlink(update_path)
        else:
            self.stats.failures += 1
            self.logger.increment('failures')
            self.logger.debug('Update failed for %(obj)s %(path)s', {
                'obj': obj,
                'path': update_path
            })
            if new_successes:
                update['successes'] = successes
                write_pickle(update, update_path,
                             os.path.join(device, get_tmp_dir(policy)))
Example #49
0
File: db.py Project: parkys1/swift
    def initialize(self, put_timestamp=None):
        """
        Create the DB

        :param put_timestamp: timestamp of initial PUT request
        """
        if self.db_file == ':memory:':
            tmp_db_file = None
            conn = get_db_connection(self.db_file, self.timeout)
        else:
            mkdirs(self.db_dir)
            fd, tmp_db_file = mkstemp(suffix='.tmp', dir=self.db_dir)
            os.close(fd)
            conn = sqlite3.connect(tmp_db_file,
                                   check_same_thread=False,
                                   factory=GreenDBConnection,
                                   timeout=0)
        # creating dbs implicitly does a lot of transactions, so we
        # pick fast, unsafe options here and do a big fsync at the end.
        with closing(conn.cursor()) as cur:
            cur.execute('PRAGMA synchronous = OFF')
            cur.execute('PRAGMA temp_store = MEMORY')
            cur.execute('PRAGMA journal_mode = MEMORY')
        conn.create_function('chexor', 3, chexor)
        conn.row_factory = sqlite3.Row
        conn.text_factory = str
        conn.executescript("""
            CREATE TABLE outgoing_sync (
                remote_id TEXT UNIQUE,
                sync_point INTEGER,
                updated_at TEXT DEFAULT 0
            );
            CREATE TABLE incoming_sync (
                remote_id TEXT UNIQUE,
                sync_point INTEGER,
                updated_at TEXT DEFAULT 0
            );
            CREATE TRIGGER outgoing_sync_insert AFTER INSERT ON outgoing_sync
            BEGIN
                UPDATE outgoing_sync
                SET updated_at = STRFTIME('%s', 'NOW')
                WHERE ROWID = new.ROWID;
            END;
            CREATE TRIGGER outgoing_sync_update AFTER UPDATE ON outgoing_sync
            BEGIN
                UPDATE outgoing_sync
                SET updated_at = STRFTIME('%s', 'NOW')
                WHERE ROWID = new.ROWID;
            END;
            CREATE TRIGGER incoming_sync_insert AFTER INSERT ON incoming_sync
            BEGIN
                UPDATE incoming_sync
                SET updated_at = STRFTIME('%s', 'NOW')
                WHERE ROWID = new.ROWID;
            END;
            CREATE TRIGGER incoming_sync_update AFTER UPDATE ON incoming_sync
            BEGIN
                UPDATE incoming_sync
                SET updated_at = STRFTIME('%s', 'NOW')
                WHERE ROWID = new.ROWID;
            END;
        """)
        if not put_timestamp:
            put_timestamp = normalize_timestamp(0)
        self._initialize(conn, put_timestamp)
        conn.commit()
        if tmp_db_file:
            conn.close()
            with open(tmp_db_file, 'r+b') as fp:
                os.fsync(fp.fileno())
            with lock_parent_directory(self.db_file, self.pending_timeout):
                if os.path.exists(self.db_file):
                    # It's as if there was a "condition" where different parts
                    # of the system were "racing" each other.
                    raise DatabaseAlreadyExists(self.db_file)
                renamer(tmp_db_file, self.db_file)
            self.conn = get_db_connection(self.db_file, self.timeout)
        else:
            self.conn = conn
Example #50
0
class ObjectAuditor(Daemon):
    """Audit objects."""
    def __init__(self, conf):
        self.conf = conf
        self.logger = get_logger(conf, 'object-auditor')
        self.devices = conf.get('devices', '/srv/node')
        self.mount_check = conf.get('mount_check', 'true').lower() in \
                              ('true', 't', '1', 'on', 'yes', 'y')
        self.max_files_per_second = float(conf.get('files_per_second', 20))
        self.max_bytes_per_second = float(
            conf.get('bytes_per_second', 10000000))
        self.log_time = int(conf.get('log_time', 3600))
        self.files_running_time = 0
        self.bytes_running_time = 0
        self.bytes_processed = 0
        self.total_bytes_processed = 0
        self.total_files_processed = 0
        self.passes = 0
        self.quarantines = 0
        self.errors = 0

    def run_forever(self):
        """Run the object audit until stopped."""
        while True:
            self.run_once('forever')
            self.total_bytes_processed = 0
            self.total_files_processed = 0
            time.sleep(30)

    def run_once(self, mode='once'):
        """Run the object audit once."""
        self.logger.info(_('Begin object audit "%s" mode' % mode))
        begin = reported = time.time()
        all_locs = audit_location_generator(self.devices,
                                            object_server.DATADIR,
                                            mount_check=self.mount_check,
                                            logger=self.logger)
        for path, device, partition in all_locs:
            self.object_audit(path, device, partition)
            self.files_running_time = ratelimit_sleep(
                self.files_running_time, self.max_files_per_second)
            self.total_files_processed += 1
            if time.time() - reported >= self.log_time:
                self.logger.info(
                    _('Since %(start_time)s: Locally: %(passes)d passed audit, '
                      '%(quars)d quarantined, %(errors)d errors '
                      'files/sec: %(frate).2f , bytes/sec: %(brate).2f') % {
                          'start_time': time.ctime(reported),
                          'passes': self.passes,
                          'quars': self.quarantines,
                          'errors': self.errors,
                          'frate': self.passes / (time.time() - reported),
                          'brate': self.bytes_processed /
                          (time.time() - reported)
                      })
                reported = time.time()
                self.passes = 0
                self.quarantines = 0
                self.errors = 0
                self.bytes_processed = 0
        elapsed = time.time() - begin
        self.logger.info(
            _('Object audit "%(mode)s" mode completed: %(elapsed).02fs. '
              'Total files/sec: %(frate).2f , '
              'Total bytes/sec: %(brate).2f ') % {
                  'mode': mode,
                  'elapsed': elapsed,
                  'frate': self.total_files_processed / elapsed,
                  'brate': self.total_bytes_processed / elapsed
              })

    def object_audit(self, path, device, partition):
        """
        Audits the given object path

        :param path: a path to an object
        :param device: the device the path is on
        :param partition: the partition the path is on
        """
        try:
            if not path.endswith('.data'):
                return
            try:
                name = object_server.read_metadata(path)['name']
            except Exception, exc:
                raise AuditException('Error when reading metadata: %s' % exc)
            _junk, account, container, obj = name.split('/', 3)
            df = object_server.DiskFile(self.devices,
                                        device,
                                        partition,
                                        account,
                                        container,
                                        obj,
                                        keep_data_fp=True)
            if df.data_file is None:
                # file is deleted, we found the tombstone
                return
            if os.path.getsize(df.data_file) != \
                    int(df.metadata['Content-Length']):
                raise AuditException('Content-Length of %s does not match '
                                     'file size of %s' %
                                     (int(df.metadata['Content-Length']),
                                      os.path.getsize(df.data_file)))
            etag = md5()
            for chunk in df:
                self.bytes_running_time = ratelimit_sleep(
                    self.bytes_running_time,
                    self.max_bytes_per_second,
                    incr_by=len(chunk))
                etag.update(chunk)
                self.bytes_processed += len(chunk)
                self.total_bytes_processed += len(chunk)
            etag = etag.hexdigest()
            if etag != df.metadata['ETag']:
                raise AuditException("ETag of %s does not match file's md5 of "
                                     "%s" % (df.metadata['ETag'], etag))
        except AuditException, err:
            self.quarantines += 1
            self.logger.error(
                _('ERROR Object %(obj)s failed audit and will '
                  'be quarantined: %(err)s'), {
                      'obj': path,
                      'err': err
                  })
            invalidate_hash(os.path.dirname(path))
            renamer_path = os.path.dirname(path)
            renamer(
                renamer_path,
                os.path.join(self.devices, device, 'quarantined', 'objects',
                             os.path.basename(renamer_path)))
            return