Beispiel #1
0
class Servlet(Thread):
    def __init__(self, context):
        super().__init__()
        self.context = context

        logger_str = f"{utils.logger_str(__class__)} {context}"
        self.logger = logging.getLogger(logger_str)
        # self.logger.setLevel(logging.INFO)

        self.config = config.Config.instance()
        self.copies = int(self.config.get(self.context, "copies", 2))
        self.path = config.path_for(self.config.get(self.context, "source"))
        self.scanner = scanner.ScannerLite(self.context, self.path)
        self.rescan = utils.get_interval(self.config, "rescan", self.context)

        lazy_write = self.config.get(context, "LAZY WRITE", 5)
        lazy_write = utils.str_to_duration(lazy_write)
        # self.clients: { filename : { client: expiry_time, } }
        clients_state = f"/tmp/cb.{context}-clients.json.bz2"
        self.clients = PersistentDict(clients_state, lazy_write=5)
        self.stats = stats.Stats()
        self.handling = False

    def expire_claims(self):
        expires = 0
        if True or self.logger.getEffectiveLevel() < logging.DEBUG:
            for filename in self.clients:
                for client, stamp in self.clients[filename].items():
                    if stamp < time.time():
                        expires += 1
        if expires:
            self.logger.warn(f"Warning: about to expire {expires} files")
        for filename in self.clients:
            self.clients[filename] = { client: stamp \
                for client, stamp in self.clients[filename].items() \
                    if stamp > time.time() }

    # metadata(): returns a dict({'copies': ##, 'rescan': ##})
    def handle_metadata(self, args):
        return {'copies': self.copies, 'rescan': self.rescan}

    # list(): returns a dict( { filename : [ size, nclaims ] , })
    def handle_list(self, args):
        client = args[0]
        self.logger.debug(f"Listing all for {client}")
        listing = {}
        self.expire_claims()
        for filename in self.scanner:
            size = self.scanner[filename]
            if filename in self.clients:
                nclients = len(self.clients[filename])
            else:
                nclients = 0
            listing[filename] = [size, nclients]
        self.stats['files listed'].incr(len(listing))
        self.logger.debug(
            f"Returning {len(listing)} to {client}: {str(listing)[:200]}...")
        return listing

    # claim(client, [filename,]): increments the nclaims for each filename
    #    returns "ack" or None
    def handle_claim(self, args):
        client, files = args[:2]
        n = len(files)
        self.logger.debug(f"claiming {n} files for client {client}")
        for filename in files:
            if filename not in self.clients:
                self.clients[filename] = {}
            self.clients[filename][client] = time.time() + self.rescan
        self.stats['files claimed'].incr(len(files))
        self.logger.debug(str(self.clients.data)[:200])
        return "ack"

    # unclaim(client, [filename, ]): decrements the nclaims for each filename
    #     returns "ack" or None
    def handle_unclaim(self, args):
        client, files = args[:2]

        n = len(files)
        self.logger.debug(f"unclaiming {n} files for client {client}")
        for filename in files:
            if filename in self.clients:
                if len(self.clients[filename]) < self.copies:
                    self.logger.warn(
                        f"WARNING: {client} dropping {filename} prematurely\n"
                        * 10)
                if client in self.clients[filename]:
                    del self.clients[filename][client]
        self.stats['files unclaimed'].incr(n)
        return "ack"

    # unclaim_all(client): deletes all claims for this client
    #     returns "ack" or None
    def handle_unclaim_all(self, args):
        client = args[0]

        for filename in self.clients.keys():
            if client in self.clients[filename]:
                del self.clients[filename][client]
                self.stats['files unclaimed'].incr(1)
        return "ack"

    def histogram(self):
        hist = f"{len(self.scanner)} total files, need {self.copies} copies\n"
        buckets = {0: 0}
        bucketsize = {0: 0}
        self.expire_claims()

        with self.scanner:
            scanned_files = self.scanner.keys()
        for filename in scanned_files:
            size = self.scanner[filename]
            if filename in self.clients:
                bucket = len(self.clients[filename])
                if bucket not in buckets:
                    buckets[bucket] = 0
                    bucketsize[bucket] = 0
                buckets[bucket] += 1
                bucketsize[bucket] += size
            else:
                buckets[0] += 1
                bucketsize[0] += size
        for bucket in sorted(buckets.keys(), reverse=True):
            if buckets[bucket]:
                size = utils.bytes_to_str(bucketsize[bucket])
                hist += f"{buckets[bucket]:6d} files, {size.rjust(8)}: {'## ' * bucket}"
                if bucket < self.copies:
                    missing = self.copies - bucket
                    hist += "__ " * missing
            hist += "\n"
        return hist

    def dump(self):
        message = ""
        for filename in self.clients:
            message += f"{filename}: "
            for client in sorted(self.clients[filename].keys()):
                stamp = self.clients[filename][client]
                if stamp < time.time():
                    message += f"{client}! "
                else:
                    message += f"{client} "
            message += "\n"
        return message

    def audit(self):
        self.logger.info(f"Auditing:\n{self.histogram()}")
        for statistic in self.stats:
            self.logger.debug(f"{statistic}: {self.stats[statistic].qps()}")
        # self.logger.log(5, f"\n{self.dump()}")
        # self.clients.lazy_write()

    # handle an incoming action(args)
    # called in parallel from many serving threads
    def handle(self, action, args):
        if not self.handling:
            return None
        # self.logger.debug(f"requested: {action} ({args})")
        actions = {
            'list': self.handle_list,
            'claim': self.handle_claim,
            'unclaim': self.handle_unclaim,
            'unclaim all': self.handle_unclaim_all,
            'metadata': self.handle_metadata,
        }
        response = actions[action](args)
        # self.logger.debug(f"responding: {action} {args} -> {response}")
        return response

    # Server will call into my datagram functions; I just brood
    def run(self):
        self.bailout = False
        # pre-scan
        self.scanner.scan()
        self.logger.info("Ready to serve")
        self.handling = True
        while not self.bailout:
            timer = elapsed.ElapsedTimer()
            self.config.load()
            self.rescan = utils.get_interval(self.config, "rescan",
                                             self.context)
            self.scanner.scan()
            sleepy_time = max(self.rescan - timer.elapsed(), 10)
            sleep_msg = utils.duration_to_str(sleepy_time)
            self.logger.info(f"sleeping {sleep_msg} til next rescan")
            time.sleep(sleepy_time)
class CloneDict(object):
    '''
    Mimics ordinary dict objects, but allows cheap shallow copy operation

    >>> d = CloneDict({1: 2})
    >>> d
    CloneDict({1: 2})
    >>> d[42] = 42
    >>> d == {1: 2, 42: 42}
    True
    >>> d2 = d.__copy__()
    >>> del d[1]
    >>> d
    CloneDict({42: 42})
    >>> d2 == {1: 2, 42: 42}
    True
    '''

    __slots__ = [
        'base', # persistent dict
        'diff', # changes relative to it
        ]

    def __init__(self, d=None):
        if isinstance(d, CloneDict):
            self.base = d.base = d.base.update(d.diff)
            self.diff = {}
            d.diff = {}
        elif d is None:
            self.base = PersistentDict()
            self.diff = {}
        else:
            self.base = PersistentDict(d)
            self.diff = {}
    
    def __copy__(self):
        return CloneDict(self)

    def update(self, *args, **kwargs):
        self.diff.update(*args, **kwargs)

    def get(self, key, default=None):
        if key not in self.diff:
            return self.base.get(key, default)
        
        value = self.diff[key]
        if value is NO_VALUE:
            return default
        else:
            return value

    def __getitem__(self, key):
        if key not in self.diff:
            return self.base[key]
        value = self.diff[key]
        if value is NO_VALUE:
            raise KeyError()
        return value

    def __setitem__(self, key, value):
        self.diff[key] = value

    def __delitem__(self, key):
        self.diff[key] = NO_VALUE

    def __contains__(self, key):
        if key not in self.diff:
            return key in self.base
        return self.diff[key] is not NO_VALUE

    def keys(self):
        result = []
        for k, v in self.diff.items():
            if v is not NO_VALUE:
                result.append(k)
        for k in self.base.keys():
            if k not in self.diff:
                result.append(k)
        return result

    def items(self):
        result = []
        for kv in self.diff.items():
            k, v = kv
            if v is not NO_VALUE:
                result.append(kv)
        for kv in self.base.items():
            k, v = kv
            if k not in self.diff:
                result.append(kv)
        return result

    def __len__(self):
        result = 0
        for k, v in self.diff.items():
            if v is not NO_VALUE:
                result += 1
        for k in self.base.keys():
            if k not in self.diff:
                result += 1
        return result

    def __repr__(self):
        return 'CloneDict({!r})'.format(dict(self.items()))

    def __eq__(self, other):
        if self is other:
            return True
        return set(self.items()) == set(other.items())

    def __ne__(self, other):
        return not self.__eq__(other)