Example #1
0
File: reserve.py Project: Byron/bit
    def generate_fix_script(self, report, writer):
        last_host = None
        mode = self.settings_value().mode

        for rec in report.records:
            fs, reserve = rec[1], rec[4]
            if isinstance(fs, str):
                continue
            # end skip aggregation records - filesystem is a string in that case
            if last_host and last_host != fs.host:
                writer('# Cannot proceed as the host changed during iteration - please re-run the report with the hosts=name value set\n')
                break
            # end verify we stay on a single host
            if last_host is None:
                writer("# Reservation automation for host '%s'\n" % fs.host)
            # end initial info
            last_host = fs.host
            if reserve < fs.used:
                writer("# Reserve for '%s' is already to low (%s reserved vs %s used), consider increasing its zfs:priority\n" % (fs.url(),
                                                                                                                                 int_to_size_string(reserve), 
                                                                                                                                 int_to_size_string(fs.used)))
            else:
                writer("zfs set %s=%s %s\n" % (mode, int_to_size_string(reserve), fs.name))
            # end handle reserve issue
        # end for each record
        return True
Example #2
0
File: fsstat.py Project: Byron/bit
 def progress():
     elapsed = time() - st
     log.info(
         "Processed %i files with %s in %.2fs (%.2f files/s | %s MB/s)",
         nr,
         int_to_size_string(totalbcount),
         elapsed,
         nr / elapsed,
         mb(totalbcount) / elapsed,
     )
Example #3
0
File: fsstat.py Project: Byron/bit
 def _stream_end(self):
     """On-demand progress"""
     super(HashStreamer, self)._stream_end()
     if self._log:
         _mb = mb(self.bytes)
         self._log.info(
             "Done hashing %s in %.2f s (%.2f MB/s)",
             int_to_size_string(self.bytes),
             self.elapsed,
             _mb / self.elapsed,
         )
Example #4
0
File: fsstat.py Project: Byron/bit
 def _handle_chunk(self, chunk):
     super(HashStreamer, self)._handle_chunk(chunk)
     self._hasher.update(chunk)
     if self._compressor:
         ratio = len(chunk) / float(len(self._compressor(chunk)))
         # handle first chunk
         if self.ratio is None:
             self.ratio = ratio
         else:
             self.ratio = (self.ratio + ratio) / 2.0
         # end compute average, properly
     # end handle compression
     if self._log:
         self._log.info("Hashed %s", int_to_size_string(self.bytes))
Example #5
0
File: io_stat.py Project: Byron/bit
    def generate(self):
        report = self.ReportType(columns=self.report_schema)
        config = self._sanitize_configuration()
        record = report.records.append
        workers = list()

        def _record_worker_result():
            # poll them, as join will block
            while workers:
                for w in workers[:]:
                    if w.is_alive():
                        continue
                    # end ignore unfinished workers
                    self._error |= w.exception is not None
                    record((    w.name,
                                w.elapsed_file_generate_read,
                                mb(w.file_size / (w.elapsed_file_generate_read or 1)),
                                w.elapsed_file_generate_write,
                                mb(w.file_size / (w.elapsed_file_generate_write or 1)),
                                w.elapsed_file_generate,
                                w.elapsed_read_volume,
                                w.read_volume,
                                mb(w.read_volume / (w.elapsed_read_volume or 1)),
                                w.elapsed_write_volume and mb(w.read_volume / w.elapsed_write_volume) or 0,
                                w.exception))
                    workers.remove(w)
                # end for each worker
                sleep(0.5)
            # end while we have workers to check
            record(report.aggregate_record())
        # end utility
        print >> sys.stderr, self.configuration()
        print >> sys.stderr, "Creating %s dataset, and a %s %s volume, in %i threads" % \
                                                    (int_to_size_string(config.num_threads * config.file_size),
                                                     int_to_size_string(config.num_threads * config.random_read_volume),
                                                     config.random_writes and 'read and write' or 'read',
                                                     config.num_threads)

        use_mmap = config.num_threads == 1
        if use_mmap:
            print >> sys.stderr, "Using mmap in single-threaded mode, hoping to perfectly workaround the system's FS cache"
        # end 
        try:
            for wid in range(config.num_threads):
                worker = StressorTerminatableThread(config, use_mmap = use_mmap)
                workers.append(worker)
                worker.start()
            # end for each worker

            _record_worker_result()
        except KeyboardInterrupt:
            print >> sys.stderr, "Sending cancellation request to all workers"
            for worker in workers:
                worker.cancel()
            # end for each worker

            print >> sys.stderr, "Waiting for workers to finish - they will stop as soon as possible"
            _record_worker_result()
        # end handle SIGTERM

        return report
Example #6
0
File: fsstat.py Project: Byron/bit
    def _append_path_record(self, records, path, streamer, log, ex_stat=None, digest_ratio=None):
        """Append meta-data about the given path to the given list of records
        @param stat if you have received the stat already, we will not get it again
        @param digest_ratio if not None, we will use the given digest and ration  instead of creating our own
        @return stat structure of the path, or None if the path could not be read"""
        # minimize file access
        try:
            ascii_path = to_ascii(path)
            stat = ex_stat or lstat(ascii_path)

            if digest_ratio:
                digest, ratio = digest_ratio
            else:
                digest, ratio = None, None
            # end handle digest_ratio

            ldest = None
            fd = None

            if islink(stat.st_mode):
                # Don't follow symlinks as this tricks us into thinking we have duplicates.
                # Hower, we would also have to check for hardlinks, but tracking those
                # can easliy cost too much memory. Hardlinks are rare anyway, so its okay.
                ldest = unicode(readlink(ascii_path))
            elif isreg(stat.st_mode) and not digest:
                fd = os.open(ascii_path, os.O_RDONLY)
            # end open file
        except OSError:
            log.error("Could not stat or open '%s' - skipping", ascii_path, exc_info=False)
            return None
        # end skip failing file

        if fd is not None:
            try:
                extra_progress = stat.st_size >= self.big_file
                if extra_progress:
                    log.info("Streaming %s file at '%s'", int_to_size_string(stat.st_size), ascii_path)
                # end extra logging

                try:
                    digest = (
                        streamer.set_stream(lambda size: os.read(fd, size))
                        .set_log(extra_progress and log or None)
                        .stream()
                        .digest()
                    )
                    ratio = streamer.ratio
                except IOError:
                    log.error("Failed to stream file '%s' - skipping", ascii_path, exc_info=True)
                    return None
                # end handle io errors gracefully
            finally:
                os.close(fd)
            # end assure we close the file
        # end handle symlink

        try:
            path = unicode(path)
        except Exception:
            log.error("Failed to handle encoding of path '%s' - skipping", ascii_path, exc_info=True)
            return None
        # end ignore unicode conversion errors

        # symlinks have a null-digest, which is why they are symlinks.
        # NOTE: We don't care about their contents, it's just a filename and
        # we don't has it, as we are not interested about it's contents
        records.append(
            {
                "path": path,
                "size": stat.st_size,
                "atime": seconds_to_datetime(stat.st_atime),
                "ctime": seconds_to_datetime(stat.st_ctime),
                "mtime": seconds_to_datetime(stat.st_mtime),
                "uid": stat.st_uid,
                "gid": stat.st_gid,
                "nblocks": stat.st_blocks,
                "nlink": stat.st_nlink,
                "mode": stat.st_mode,
                "ldest": ldest,
                "sha1": digest,
                "ratio": ratio,
            }
        )

        return stat
Example #7
0
File: version.py Project: Byron/bit
    def _build_database(self, config):
        """@return our database ready to be used.
        It will be a list of tuples of (prefix, VersionBundleList) pairs
        Will load from cache or from an sql database (and building the cache in the process)"""
        if not config.cache_path:
            if not config.table:
                raise AssertionError("Please set either db_url and table or the cache_path to specify a data source")
            config.cache_path = self._cache_path(config.table)
            print >>sys.stderr, "Would use cache default at %s" % config.cache_path
        else:
            print >>sys.stderr, "Will attempt to use cache at %s" % config.cache_path
        # end handle cache_path

        db = None
        # prefer to use a cache
        if config.cache_path and config.cache_path.isfile():
            # LOAD EXISTING CACHE
            ######################
            st = time()
            db = self._deserialize_db(config.cache_path)
            elapsed = time() - st

            cstat = config.cache_path.stat()
            print >>sys.stderr, "Loaded cache of size %s from %s in %fs (%fMB/s)" % (
                int_to_size_string(cstat.st_size),
                config.cache_path,
                elapsed,
                (cstat.st_size / elapsed) / (1024 ** 2),
            )
        # end try loading cache
        elif config.db_url and config.table:
            # BUILD CACHE FROM DATABASE
            ############################
            print >>sys.stderr, "reading from database at '%s/%s'" % (config.db_url, config.table)

            engine = create_engine(config.db_url)
            mcon = engine.connect()
            md = MetaData(engine, reflect=True)

            if config.table not in md.tables:
                raise AssertionError(
                    "Table named '%s' didn't exist in database at '%s'" % (config.table, config.db_url)
                )
            # end verify table exists

            progress_every = 40000

            def record_iterator():
                c = md.tables[config.table].c
                selector = select(
                    [c.path, c.size, c.ctime, c.mtime, c.mode, c.ratio],
                    (c.ctime != None) & (c.mtime != None) & (c.sha1 != None),
                ).order_by(c.path)

                st = time()
                for rid, row in enumerate(mcon.execute(selector)):
                    if rid % progress_every == 0:
                        elapsed = time() - st
                        print >>sys.stderr, "Read %i records in %fs (%f records/s)" % (rid, elapsed, rid / elapsed)
                    # end handle progress
                    yield (row[0], (row[1], to_s(row[2]), to_s(row[3]), row[4], row[5] or 1.0))
                # end for each row

            # end record iterator

            st = time()
            db = FilteringVersionBundler(config).bundle(record_iterator())
            print >>sys.stderr, "Extracted version %i bundled in %fs" % (len(db), time() - st)

            # store cache file
            st = time()
            cpath = self._cache_path(config.table)
            csize = self._serialize_db(db, cpath)
            elapsed = time() - st
            print >>sys.stderr, "Wrote cache with size %s to '%s' in %fs (%f MB/s)" % (
                int_to_size_string(csize),
                cpath,
                elapsed,
                (csize / elapsed) / 1024 ** 2,
            )
        # end obtain raw database
        else:
            raise AssertionError(
                "Could not build cache database - set db_url and table, cache_path, or table to use a default cache from previous run"
            )
        # end handle cache or db url

        # DEBUILD RAW CACHE
        ####################
        # finally, rebuild and filter into our actual structure
        st = time()
        db = FilteringVersionBundler(config).rebuild_bundle(db)

        def prefix(t):
            return t[0]

        # end prefix getter

        def key_factory(attr):
            def meta_get(t):
                return getattr(t[1], attr)

            return meta_get

        # end factory

        # SORT INTO FLAT LIST
        #####################
        key_fun = config.sort_by == self.report_schema[0][0] and prefix or key_factory(config.sort_by)

        db = sorted(db.iteritems(), key=key_fun, reverse=config.sort_order == self.ORDER_DESC)
        elapsed = time() - st

        print >>sys.stderr, "Filtered database in %fs" % elapsed
        return db