예제 #1
0
class LRUCache(StorageInterface):
    """In memory LRU cache"""

    def __init__(self, max_size=1024):
        if max_size < 1:
            raise ValueError("max_size must be a positive integer greater than 0")
        self.max_size = max_size
        self.engine = LRUCacheEngine(max_size)

    def __getitem__(self, key):
        value = self.engine.get(key)
        if value is None:
            raise KeyError
        return value

    def __setitem__(self, key, value):
        self.engine.put(key, value)

    def __contains__(self, key):
        value = self.engine.get(key)
        if value is not None:
            return True
        else:
            return False

    def __len__(self):
        return self.max_size
예제 #2
0
def find_injections(pcap_file):
    _cache = LRUCache(10000) #100.000 entries - should last at least 100 msec on a 100% utilized gigabit network
    _hitset = Set()
    with open(pcap_file, "rb") as f:
        pcap = dpkt.pcap.Reader(f)
        try:
            for ts, buf in pcap:
                ip = get_ip_packet(buf, pcap)
                try:
                    if(ip is not None and ip.p == dpkt.ip.IP_PROTO_TCP):
                        tcp = ip.data
                        if((tcp.sport in PORT_SET or tcp.dport in PORT_SET) and len(tcp.data) > 1):
                            key = get_key(ip, tcp)
                            #ip.len    : 11 bits
                            #ip.ttl    : 8 bits
                            #tcp.flags : 8 bits (normally)
                            value = ip.ttl<<24 ^ (tcp.flags<<16) ^ ip.len
                            if(_cache.get(key) is None):
                                _cache.put(key, value)
                            else:
                                if(_cache.get(key) != value):
                                    _hitset.add(key)
                except: pass

        except dpkt.dpkt.NeedData: pass
    injection_count = 0
    if(len(_hitset) > 0):
        _cache = LRUCache(1024)
        with open(pcap_file, "rb") as f:
            pcap = dpkt.pcap.Reader(f)
            try:
                for ts, buf in pcap:
                    ip = get_ip_packet(buf, pcap)
                    if(ip is not None and ip.p == dpkt.ip.IP_PROTO_TCP and (ip.data.sport in PORT_SET or ip.data.dport in PORT_SET)):
                        key = get_key(ip, ip.data)
                        if(key in _hitset and len(ip.data.data) > 1):
                            tcp = ip.data
                            _cached_tcp_data = _cache.get(key)
                            if(_cached_tcp_data is None):
                                _cache.put(key, tcp.data)
                            else:
                                if(tcp.data != _cached_tcp_data):
                                    if(len(tcp.data) > len(_cached_tcp_data)):
                                        #new data is longer, store that
                                        if(tcp.data[:len(_cached_tcp_data)] != _cached_tcp_data):
                                            injection_found(ip, tcp, _cached_tcp_data)
                                            injection_count+=1
                                        _cache.put(key, tcp.data)
                                    elif(len(tcp.data) < len(_cached_tcp_data)):
                                        if(tcp.data != _cached_tcp_data[:len(tcp.data)]):
                                            injection_found(ip, tcp, _cached_tcp_data)
                                            injection_count+=1
                                    else:
                                        injection_found(ip, tcp, _cached_tcp_data)
                                        injection_count+=1
            except dpkt.dpkt.NeedData:
                pass
    if(injection_count == 0):
        print(" - no injections")
예제 #3
0
class StaticsMiddleware(object):
    def _adapt_path(self, path):
        return normcase(normpath(path))

    def __init__(self, app, root_dir, cache_max_age=3600):
        self.app = app
        self.cache_max_age = cache_max_age
        self.doc_root = self._adapt_path(root_dir)
        self.paths_cache = LRUCache(1024)

    def __call__(self, environ, start_response):
        full_path = environ['PATH_INFO']
        filepath = self.paths_cache.get(full_path)

        if filepath is None:
            path = full_path.split('/')
            if INVALID_PATH_PARTS(path):
                return HTTPNotFound('Out of bounds: %s' %
                                    environ['PATH_INFO'])(environ,
                                                          start_response)
            filepath = self._adapt_path(join(self.doc_root, *path))
            self.paths_cache.put(full_path, filepath)

        if isfile(filepath):
            return FileServeApp(filepath, self.cache_max_age)(environ,
                                                              start_response)

        return self.app(environ, start_response)
예제 #4
0
파일: statics.py 프로젝트: 984958198/tg2
class StaticsMiddleware(object):
    def _adapt_path(self, path):
        return normcase(normpath(path))

    def __init__(self, app, root_dir, cache_max_age=3600):
        self.app = app
        self.cache_max_age = cache_max_age
        self.doc_root = self._adapt_path(root_dir)
        self.paths_cache = LRUCache(1024)

    def __call__(self, environ, start_response):
        full_path = environ['PATH_INFO']
        filepath = self.paths_cache.get(full_path)

        if filepath is None:
            path = full_path.split('/')
            if INVALID_PATH_PARTS(path):
                return HTTPNotFound('Out of bounds: %s' % environ['PATH_INFO'])(environ, start_response)
            filepath = self._adapt_path(join(self.doc_root, *path))
            self.paths_cache.put(full_path, filepath)

        if isfile(filepath):
            return FileServeApp(filepath, self.cache_max_age)(environ, start_response)

        return self.app(environ, start_response)
예제 #5
0
class AdapterRegistry(object):
    """ Registry of adapters"""

    _sentinel = object()

    def __init__(self):
        self.underlying = adapter.AdapterRegistry()
        self.cache = LRUCache(500)

    def lookup_adapter(self, typ):
        """ Lookup adapter for ``typ``"""
        adapter = self.cache.get(typ, self._sentinel)
        if adapter is self._sentinel:
            adapter = self.underlying.lookup([typ], IJSONSerializeable, "")
            self.cache.put(typ, adapter)
        return adapter

    def register_adapter(self, typ, adapter=None):
        """ Register ``adapter`` for type ``typ``

        If no ``adapter`` supplied then this method returns decorator.
        """
        if adapter is None:

            def decorator(adapter):
                self.register_adapter_impl(typ, adapter)
                return adapter

            return decorator
        return self.register_adapter_impl(typ, adapter)

    def register_adapter_impl(self, typ, adapter):
        self.underlying.register([implementedBy(typ)], IJSONSerializeable, "",
                                 adapter)
        self.cache.clear()
예제 #6
0
class AdapterRegistry(object):
    """ Registry of adapters"""

    _sentinel = object()

    def __init__(self):
        self.underlying = adapter.AdapterRegistry()
        self.cache = LRUCache(500)

    def lookup_adapter(self, typ):
        """ Lookup adapter for ``typ``"""
        adapter = self.cache.get(typ, self._sentinel)
        if adapter is self._sentinel:
            adapter = self.underlying.lookup([typ], IJSONSerializeable, "")
            self.cache.put(typ, adapter)
        return adapter

    def register_adapter(self, typ, adapter=None):
        """ Register ``adapter`` for type ``typ``

        If no ``adapter`` supplied then this method returns decorator.
        """
        if adapter is None:
            def decorator(adapter):
                self.register_adapter_impl(typ, adapter)
                return adapter
            return decorator
        return self.register_adapter_impl(typ, adapter)

    def register_adapter_impl(self, typ, adapter):
        self.underlying.register(
            [implementedBy(typ)], IJSONSerializeable, "", adapter)
        self.cache.clear()
예제 #7
0
class RMemorySessionStore(Singleton):
    def __init__(self, config):
        self._cache = LRUCache(config.session_cache_size)

    def push(self, token_id, data):
        self._cache.put(token_id, data)

    def get(self, token):
        return self._cache.get(token, None)

    def remove(self, token_id):
        try:
            self._cache.put(token_id, None)
        except KeyError:
            pass

    def contains(self, session_id):
        return self._cache.get(session_id) is not None
예제 #8
0
class VladCache(object):
    def __init__(self):
        self.word_cache = LRUCache(1)
        self.vlad_cache = LRUCache(1000)

    def load_words(self, data):
        words = self.word_cache.get('words')
        if words is None:
            words, _ = bow.load_vlad_words_and_frequencies(data.config)
            self.word_cache.put('words', words)
        return words

    def vlad_histogram(self, image, features, words):
        vlad = self.vlad_cache.get(image)
        if vlad is None:
            vlad = unnormalized_vlad(features, words)
            vlad = signed_square_root_normalize(vlad)
            self.vlad_cache.put(image, vlad)
        return vlad
예제 #9
0
class EDBag(Counter):
    def __init__(self):
        super(EDBag, self).__init__()
        self.cache1 = LRUCache(256) # values where distance=1
        self.cache2 = LRUCache(256) # values where distance>1

    def add(self, x):
        if not x in self:
            self.cache2.clear()
        self[x] += 1

    def closest_by_edit_distance(self, x):
        if x in self:
            # Optimization: if x is in multiset, then closest
            # edit dist = 0. Nothing can be any closer.
            return (x, 0)

        # Optimization: If we've looked up this value before, 
        # return previously computed answer.
        cached_answer = self.cache1.get(x)
        if cached_answer:
            return cached_answer
        cached_answer = self.cache2.get(x)
        if cached_answer:
            return cached_answer

        closest = None
        closest_dist = None
        for y,_ in self.most_common():
            d = editdistance.eval(x, y)
            if not closest_dist or d < closest_dist:
                closest = y
                closest_dist = d
                if d == 1:
                    # Optimization: nothing can be any closer, as
                    # we know there's nothing at edit distance 0 (x is not
                    # in the multiset).
                    self.cache1.put(x, (closest, closest_dist))
                    return (closest, closest_dist)

        self.cache2.put(x, (closest, closest_dist))
        return (closest, closest_dist)
예제 #10
0
class RMemorySessionStore(RUtils.singleton.Singleton):
    def __init__(self):
        if hasattr(self, '_init'):
            return
        self._init = True
        self.config = RUtils.config.RConfig()
        self._cache = LRUCache(self.config.session_cache_size)

    def push(self, token_id, data):
        self._cache.put(token_id, data)

    def get(self, token):
        return self._cache.get(token, None)

    def remove(self, session_id):
        try:
            self._cache.put(session_id, None)
        except KeyError:
            pass

    def contains(self, session_id):
        return self._cache.get(session_id) is not None
예제 #11
0
def trim(flist, flowmaxbytes, trimmed_extension, preserve_times, post_process):
    cache = LRUCache(10000)
    trimmed_bytes = 0
    for pcap_file in flist:
        trimmed_file = pcap_file + trimmed_extension
        with open(pcap_file, "rb") as f:
            try:
                if pcap_file.endswith("pcapng"):
                    pcap = dpkt.pcapng.Reader(f)
                else:
                    pcap = dpkt.pcap.Reader(f)
                with open(trimmed_file, "wb") as trimmed:
                    if pcap_file.endswith("pcapng"):
                        pcap_out = dpkt.pcapng.Writer(trimmed)
                    else:
                        pcap_out = dpkt.pcap.Writer(trimmed)
                    for ts, buf in pcap:
                        fivetuple = get_fivetuple(buf, pcap, pcap_file)
                        bytes = len(buf)
                        if not cache.get(fivetuple) is None:
                            bytes += cache.get(fivetuple)
                        cache.put(fivetuple, bytes)
                        if bytes < flowmaxbytes:
                            pcap_out.writepkt(buf, ts)
                        else:
                            trimmed_bytes += len(buf)
            except dpkt.dpkt.NeedData:
                pass
            except ValueError:
                pass
        if os.path.exists(trimmed_file):
            if preserve_times:
                stat = os.stat(pcap_file)
                os.utime(trimmed_file, (stat.st_atime, stat.st_mtime))
            if post_process:
                post_process(pcap_file, trimmed_file)
    return trimmed_bytes
예제 #12
0
class Collection(object):
    def __init__(self, directory, cache_size=128):
        self.dir = directory
        self.index = susi_native.load(self.dir)
        self._cache = LRUCache(cache_size)

    def lookup(self, pattern, k=10, snippet_size=1000000):
        key = (pattern, k, snippet_size)
        cached_result = self._cache.get(key)
        if cached_result is not None:
            return cached_result

        if len(pattern) < 3:
            return []
        res = susi_native.search(self.index, pattern.encode('latin1'), k,
                                 snippet_size)
        self._cache.put(key, res)
        return res
예제 #13
0
파일: shelf.py 프로젝트: topiaruss/birding
class LRUShelf(Shelf):
    """An in-memory Least-Recently Used shelf up to `maxsize`.."""
    def __init__(self, maxsize=1000):
        self.store = LRUCache(int(maxsize))

    def getitem(self, key):
        value = self.store.get(key, UNSET)
        if value is UNSET:
            raise KeyError(key)
        return value

    def setitem(self, key, value):
        self.store.put(key, value)

    def delitem(self, key):
        self.store.invalidate(key)

    def clear(self):
        self.store.clear()
예제 #14
0
class ClusterCache(object):
    def __init__(self, cache_size):
        self.lru = LRUCache(cache_size)
        self.hits = 0
        self.misses = 0

    def get(self, file_buffer, ptr):
        v = self.lru.get((file_buffer, ptr))
        if v is not None:
            self.hits += 1
            return v
        v = ClusterData(file_buffer, ptr)
        self.lru.put((file_buffer, ptr), v)
        self.misses += 1
        return v

    def clear(self):
        logger.debug("CACHE HITS " + str(self.hits) + " VS MISSES " + str(self.misses))
        self.lru.clear()
예제 #15
0
class ClusterCache(object):
    def __init__(self, cache_size):
        self.lru = LRUCache(cache_size)
        self.hits = 0
        self.misses = 0

    def get(self, file_buffer, ptr):
        v = self.lru.get((file_buffer, ptr))
        if v is not None:
            self.hits += 1
            return v
        v = ClusterData(file_buffer, ptr)
        self.lru.put((file_buffer, ptr), v)
        self.misses += 1
        return v

    def clear(self):
        logger.debug("CACHE HITS " + str(self.hits) + " VS MISSES " +
                     str(self.misses))
        self.lru.clear()
예제 #16
0
파일: shelf.py 프로젝트: Parsely/birding
class LRUShelf(Shelf):
    """An in-memory Least-Recently Used shelf up to `maxsize`.."""

    def __init__(self, maxsize=1000):
        self.store = LRUCache(int(maxsize))

    def getitem(self, key):
        value = self.store.get(key, UNSET)
        if value is UNSET:
            raise KeyError(key)
        return value

    def setitem(self, key, value):
        self.store.put(key, value)

    def delitem(self, key):
        self.store.invalidate(key)

    def clear(self):
        self.store.clear()
예제 #17
0
def partly_distinct(iterable):
    """
    Filters items from iterable and **tries to return only distincts**.
    Keeps order.

    :param Iterable iterable: Something iterable we have to filter.

    >>> list(partly_distinct([1, 2, 3, 2, 1, 2, 3, 4]))
    ... [1, 2, 3, 4]

    .. note::
        Unlike :py:func:`distinct` it won't guarantee that all elements would
        be distinct. But if you have rather small cardinality of the stream,
        this would work.

    .. note::
        Current implementation guarantees support for 10000 distinct values.
        If your cardinality is bigger, there might be some duplicates.
    """
    cache = LRUCache(10000)
    for item in iterable:
        if not cache.get(item):
            cache.put(item, True)
            yield item
예제 #18
0
class Context():
    def __init__(self):

        self.cli = False
        self.args = {}

        self.debug = False
        self.debug2 = False

        self.quiet = False

        self.profile = False

        self.components = OrderedDict()

        self.start_item = OrderedDict()
        self.start_nodes = []
        self.config_files = []
        self.included_files = []

        self.props = {}
        self.properties = self.props

        self.var = {}

        self.working_dir = os.getcwd()
        self.library_path = os.path.dirname(
            os.path.realpath(__file__)) + "/../../library"

        self.comp = Components(self)

        self._functions = {
            "text": functions,
            "xml": xmlfunctions,
            "datetime": datetime,
            "dt": datetime,
            "re": re,
            "sys": sys,
            "urllib": urllib,
            "random": random.Random()
        }
        self._globals = self._functions

        class Functions():
            pass

        self.f = Functions()
        for k, v in self._functions.items():
            setattr(self.f, k, v)

        self._compiled = LRUCache(512)  # TODO: Configurable

    @staticmethod
    def _class_from_frame(fr):
        try:
            class_type = fr.f_locals['self'].__class__
        except KeyError:
            class_type = None

        return class_type

    def get(self, uid, fail=True):
        #logger.debug("Getting component: %s" % component_id)

        if uid is None:
            raise ETLException("Cannot retrieve component with id None.")

        comp = self.components.get(uid, None)

        if comp is None and fail:
            raise ETLException("Component not found with id '%s'" % uid)

        return comp

    def key(self, comp):
        for k, c in self.components.items():
            if c == comp:
                return k
        return None

    def find(self, type):
        result = []
        for comp in self.components.values():
            if isinstance(comp, type):
                result.append(comp)
        return result

    def add(self, urn, component, description=None):

        # FIXME: TODO: Allow anonymous components? these would be exported in-line with their parents.
        # This assumes that components are initialized completely (possibly better for config comprehension)
        # Also would serve as a hint for deep-swallow copying (anonymous components are always deep copied?)

        if urn is None:
            raise Exception('Tried to add an object with no URN')
        if component is None:
            raise Exception('Tried to add a null object')
        if not isinstance(component, Component):
            raise Exception('Tried to add a non Component object: %s' %
                            component)
        if self.components.get(urn, None) is not None:
            raise Exception("Tried to add an already existing URN: %s" % urn)

        component.ctx = self
        component.urn = urn
        component.description = description

        self.components[urn] = component
        return component

    def interpolate(self, value, m=None, data=None):
        """
        Resolves expressions `${ ... }`, lambdas and functions in a value,
        with respect to the current context and the current message.

        Expressions are CubETL custom syntax for string interpolation.
        """

        # FIXME: TO BE REMOVED after migrating all interpolate calls
        if isinstance(value, dict):
            raise ETLException("Possibly invalid interpolate call!")

        if value is None:
            return None

        # If the value is a callable (function or lambda), inspect
        # its parameters. Acceptable signatures are:
        # (ctx), (m), (ctx, m)
        if callable(value):
            sig = inspect.signature(value)
            paramnames = list(sig.parameters.keys())
            if len(sig.parameters) == 1 and paramnames[0] == 'ctx':
                value = value(self)
            elif len(sig.parameters) == 1 and paramnames[0] == 'm':
                value = value(m)
            elif len(
                    sig.parameters
            ) == 2 and paramnames[0] == 'ctx' and paramnames[1] == 'm':
                value = value(self, m)
            else:
                raise ETLConfigurationException(
                    "Invalid lambda expression signature: %s" % sig)

        # If the value is not a string, it is immediately returned
        if not isinstance(value, str):
            return value

        # Process string values

        value = value.strip()

        pos = -1
        result = str(value)

        for dstart, dend in (('${|', '|}'), ('${', '}')):
            if (pos >= -1):
                pos = result.find(dstart)
            while (pos >= 0):
                pos_end = result.find(dend)
                expr = result[pos + len(dstart):pos_end].strip()

                compiled = self._compiled.get(expr)
                try:
                    if (not compiled):
                        compiled = compile(expr, '', 'eval')
                        self._compiled.put(expr, compiled)

                    c_locals = {
                        "m": m,
                        "ctx": self,
                        "f": self.f,
                        "props": self.props,
                        "var": self.var,
                        "cubetl": cubetl
                    }
                    if data:
                        c_locals.update(data)
                    res = eval(compiled, self._globals, c_locals)

                    if (self.debug2):
                        if (isinstance(res, str)):
                            logger.debug(
                                'Evaluated: %s = %r' %
                                (expr, res if
                                 (len(res) < 100) else res[:100] + ".."))
                        else:
                            logger.debug('Evaluated: %s = %r' % (expr, res))

                except (Exception) as e:
                    exc_type, exc_value, exc_traceback = sys.exc_info()

                    caller_component = None
                    frame = inspect.currentframe()
                    for caller in inspect.getouterframes(frame):
                        fc = Context._class_from_frame(caller[0])
                        if (isclass(fc) and issubclass(fc, Component)):
                            caller_component = caller[0].f_locals['self']
                            break

                    #logger.error("Error evaluating expression %s on data: %s" % (expr, m))
                    self._eval_error_message = m

                    logger.error(
                        'Error evaluating expression "%s" called from %s:\n%s'
                        % (expr, caller_component, ("".join(
                            traceback.format_exception_only(
                                exc_type, exc_value)))))
                    raise

                if (pos > 0) or (pos_end < len(result) - (len(dend))):
                    result = result[0:pos] + str(res) + result[pos_end +
                                                               (len(dend)):]
                    pos = result.find(dstart)
                else:
                    # Keep type of non-string types
                    result = res
                    pos = -2

        return result

    def copy_message(self, m):
        # TODO: Create a copy-on-write message instead of actually copying (?)
        if m is None:
            return {}
        else:
            return copy.copy(m)

    def _do_process(self, process, ctx, multiple):
        # TODO: When using multiple, this should allow to yield,
        # TODO: Also, this method shall be called "consume" or something, and public

        # Reduce the OrderedDict to a dict, but interpolate its attributes in order
        item = {}
        for k in ctx.start_item.keys():
            item[k] = ctx.interpolate(ctx.start_item[k], item)
        msgs = ctx.comp.process(process, item)
        count = 0
        result = [] if multiple else None
        for m in msgs:
            count = count + 1
            if multiple:
                result.append(m)
            else:
                result = m
        return (result, count)

    def run(self, start_node, multiple=False):

        ctx = self

        if isinstance(start_node, str):
            start_node_comp = ctx.get(start_node, fail=False)
        else:
            start_node_comp = start_node

        # Launch process
        if not start_node_comp:
            logger.error("Start process '%s' not found in configuration" %
                         start_node)
            if ctx.cli:
                sys.exit(1)
            else:
                raise Exception(
                    "Start process '%s' not found in configuration" %
                    start_node)

        result = None
        processed = 0

        # Launch process and consume items
        try:
            logger.debug("Initializing components")
            ctx.comp.initialize(start_node_comp)

            logger.info("Processing %s" % start_node_comp)

            if ctx.profile:
                logger.warning(
                    "Profiling execution (WARNING this is SLOW) and saving results to: %s"
                    % ctx.profile)
                cProfile.runctx(
                    "(result, processed) = self._do_process(start_node_comp, ctx, multiple=multiple)",
                    globals(), locals(), ctx.profile)
            else:
                (result, processed) = self._do_process(start_node_comp,
                                                       ctx,
                                                       multiple=multiple)

            logger.debug("%s items resulted from the process" % processed)

            logger.debug("Finalizing components")
            ctx.comp.finalize(start_node_comp)

            ctx.comp.cleanup()

        except KeyboardInterrupt as e:
            logger.error("User interrupted")
            sys.exit(1)

        except Exception as e:
            '''
            exc_type, exc_value, exc_traceback = sys.exc_info()
            logger.fatal("Error during process: %s" % ", ".join((traceback.format_exception_only(exc_type, exc_value))))

            if hasattr(ctx, "eval_error_message"):
                pp = pprint.PrettyPrinter(indent=4, depth=2)
                print(pp.pformat(ctx._eval_error_message))

            traceback.print_exception(exc_type, exc_value, exc_traceback)
            '''
            raise

        return result

    def include(self, configfile):
        configfile = self.interpolate(configfile)

        # Import only once
        abspath = os.path.abspath(configfile)
        if abspath in self.included_files:
            return
        self.included_files.append(abspath)

        logger.info("Including config file: %s", configfile)
        spec = importlib.util.spec_from_file_location("configmodule",
                                                      configfile)
        configmodule = importlib.util.module_from_spec(spec)
        try:
            spec.loader.exec_module(configmodule)
        except FileNotFoundError as e:
            raise ETLConfigurationException(
                "Config include file not found: %s" % (configfile))
        except Exception as e:
            raise ETLConfigurationException(
                "An error ocurred while loading '%s' config file: %s" %
                (configfile, e))
        configmodule.cubetl_config(self)
예제 #19
0
class Context():
    def __init__(self):

        self.cli = False
        self.args = {}

        self.debug = False
        self.debug2 = False

        self.quiet = False

        self.profile = False

        self.config_files = []

        self.start_node = None
        self.start_message = {
        }  # Bunch()  # {}   # TODO: Review if this is definitive, compare performance

        self.props = {}
        self.properties = self.props

        self.var = {}

        self.working_dir = os.getcwd()
        self.library_path = os.path.dirname(
            os.path.realpath(__file__)) + "/../../library"

        self._globals = {
            "text": functions,
            "xml": xmlfunctions,
            "cubetl": cubetl,
            "datetime": datetime,
            "re": re,
            "sys": sys,
            "urllib": urllib,
            "random": random.Random()
        }

        self._compiled = LRUCache(512)  # TODO: Configurable

        self.comp = Components(self)

    @staticmethod
    def _class_from_frame(fr):
        try:
            class_type = fr.f_locals['self'].__class__
        except KeyError:
            class_type = None

        return class_type

    def interpolate(self, m, value, data={}):

        if value == None:
            return None

        if not isinstance(value, basestring):
            return value

        value = value.strip()

        pos = -1
        result = str(value)

        for dstart, dend in (('${|', '|}'), ('${', '}')):
            if (pos >= -1):
                pos = result.find(dstart)
            while (pos >= 0):
                pos_end = result.find(dend)
                expr = result[pos + len(dstart):pos_end].strip()

                compiled = self._compiled.get(expr)
                try:
                    if (not compiled):
                        compiled = compile(expr, '', 'eval')
                        self._compiled.put(expr, compiled)

                    c_locals = {
                        "m": m,
                        "ctx": self,
                        "props": self.props,
                        "var": self.var,
                        "cubetl": cubetl
                    }
                    c_locals.update(data)
                    res = eval(compiled, self._globals, c_locals)

                    if (self.debug2):
                        if (isinstance(res, basestring)):
                            logger.debug(
                                'Evaluated: %s = %r' %
                                (expr, res if
                                 (len(res) < 100) else res[:100] + ".."))
                        else:
                            logger.debug('Evaluated: %s = %r' % (expr, res))

                except (Exception) as e:
                    exc_type, exc_value, exc_traceback = sys.exc_info()

                    caller_component = None
                    frame = inspect.currentframe()
                    for caller in inspect.getouterframes(frame):
                        fc = Context._class_from_frame(caller[0])
                        if (isclass(fc) and issubclass(fc, Component)):
                            caller_component = caller[0].f_locals['self']
                            break

                    #logger.error("Error evaluating expression %s on data: %s" % (expr, m))
                    self._eval_error_message = m

                    logger.error(
                        'Error evaluating expression "%s" called from %s:\n%s'
                        % (expr, caller_component, ("".join(
                            traceback.format_exception_only(
                                exc_type, exc_value)))))
                    raise

                if ((pos > 0) or (pos_end < len(result) - (len(dend)))):
                    result = result[0:pos] + str(res) + result[pos_end +
                                                               (len(dend)):]
                    pos = result.find(dstart)
                else:
                    # Keep non-string types
                    result = res
                    pos = -2

        return result

    def copy_message(self, m):
        if m == None:
            return {}
        else:
            return copy.copy(m)
예제 #20
0
class Mapper(SubMapperParent):
    """Mapper handles URL generation and URL recognition in a web
    application.

    Mapper is built handling dictionary's. It is assumed that the web
    application will handle the dictionary returned by URL recognition
    to dispatch appropriately.

    URL generation is done by passing keyword parameters into the
    generate function, a URL is then returned.

    """
    def __init__(self,
                 controller_scan=controller_scan,
                 directory=None,
                 always_scan=False,
                 register=True,
                 explicit=True):
        """Create a new Mapper instance

        All keyword arguments are optional.

        ``controller_scan``
            Function reference that will be used to return a list of
            valid controllers used during URL matching. If
            ``directory`` keyword arg is present, it will be passed
            into the function during its call. This option defaults to
            a function that will scan a directory for controllers.

            Alternatively, a list of controllers or None can be passed
            in which are assumed to be the definitive list of
            controller names valid when matching 'controller'.

        ``directory``
            Passed into controller_scan for the directory to scan. It
            should be an absolute path if using the default
            ``controller_scan`` function.

        ``always_scan``
            Whether or not the ``controller_scan`` function should be
            run during every URL match. This is typically a good idea
            during development so the server won't need to be restarted
            anytime a controller is added.

        ``register``
            Boolean used to determine if the Mapper should use
            ``request_config`` to register itself as the mapper. Since
            it's done on a thread-local basis, this is typically best
            used during testing though it won't hurt in other cases.

        ``explicit``
            Boolean used to determine if routes should be connected
            with implicit defaults of::

                {'controller':'content','action':'index','id':None}

            When set to True, these defaults will not be added to route
            connections and ``url_for`` will not use Route memory.

        Additional attributes that may be set after mapper
        initialization (ie, map.ATTRIBUTE = 'something'):

        ``encoding``
            Used to indicate alternative encoding/decoding systems to
            use with both incoming URL's, and during Route generation
            when passed a Unicode string. Defaults to 'utf-8'.

        ``decode_errors``
            How to handle errors in the encoding, generally ignoring
            any chars that don't convert should be sufficient. Defaults
            to 'ignore'.

        ``minimization``
            Boolean used to indicate whether or not Routes should
            minimize URL's and the generated URL's, or require every
            part where it appears in the path. Defaults to False.

        ``hardcode_names``
            Whether or not Named Routes result in the default options
            for the route being used *or* if they actually force url
            generation to use the route. Defaults to False.

        """
        self.matchlist = []
        self.maxkeys = {}
        self.minkeys = {}
        self.urlcache = LRUCache(1600)
        self._created_regs = False
        self._created_gens = False
        self._master_regexp = None
        self.prefix = None
        self.req_data = threading.local()
        self.directory = directory
        self.always_scan = always_scan
        self.controller_scan = controller_scan
        self._regprefix = None
        self._routenames = {}
        self.debug = False
        self.append_slash = False
        self.sub_domains = False
        self.sub_domains_ignore = []
        self.domain_match = r'[^\.\/]+?\.[^\.\/]+'
        self.explicit = explicit
        self.encoding = 'utf-8'
        self.decode_errors = 'ignore'
        self.hardcode_names = True
        self.minimization = False
        self.create_regs_lock = threading.Lock()
        if register:
            config = request_config()
            config.mapper = self

    def __str__(self):
        """Generates a tabular string representation."""
        def format_methods(r):
            if r.conditions:
                method = r.conditions.get('method', '')
                return type(method) is str and method or ', '.join(method)
            else:
                return ''

        table = [('Route name', 'Methods', 'Path', 'Controller', 'action')] + \
                [(r.name or '', format_methods(r), r.routepath or '',
                  r.defaults.get('controller', ''), r.defaults.get('action', ''))
                 for r in self.matchlist]

        widths = [
            max(len(row[col]) for row in table) for col in range(len(table[0]))
        ]

        return '\n'.join(' '.join(row[col].ljust(widths[col])
                                  for col in range(len(widths)))
                         for row in table)

    def _envget(self):
        try:
            return self.req_data.environ
        except AttributeError:
            return None

    def _envset(self, env):
        self.req_data.environ = env

    def _envdel(self):
        del self.req_data.environ

    environ = property(_envget, _envset, _envdel)

    def extend(self, routes, path_prefix=''):
        """Extends the mapper routes with a list of Route objects

        If a path_prefix is provided, all the routes will have their
        path prepended with the path_prefix.

        Example::

            >>> map = Mapper(controller_scan=None)
            >>> map.connect('home', '/', controller='home', action='splash')
            >>> map.matchlist[0].name == 'home'
            True
            >>> routes = [Route('index', '/index.htm', controller='home',
            ...                 action='index')]
            >>> map.extend(routes)
            >>> len(map.matchlist) == 2
            True
            >>> map.extend(routes, path_prefix='/subapp')
            >>> len(map.matchlist) == 3
            True
            >>> map.matchlist[2].routepath == '/subapp/index.htm'
            True

        .. note::

            This function does not merely extend the mapper with the
            given list of routes, it actually creates new routes with
            identical calling arguments.

        """
        for route in routes:
            if path_prefix and route.minimization:
                routepath = '/'.join([path_prefix, route.routepath])
            elif path_prefix:
                routepath = path_prefix + route.routepath
            else:
                routepath = route.routepath
            self.connect(route.name,
                         routepath,
                         conditions=route.conditions,
                         **route._kargs)

    def make_route(self, *args, **kargs):
        """Make a new Route object

        A subclass can override this method to use a custom Route class.
        """
        return Route(*args, **kargs)

    def connect(self, *args, **kargs):
        """Create and connect a new Route to the Mapper.

        Usage:

        .. code-block:: python

            m = Mapper()
            m.connect(':controller/:action/:id')
            m.connect('date/:year/:month/:day', controller="blog",
                      action="view")
            m.connect('archives/:page', controller="blog", action="by_page",
            requirements = { 'page':'\\d{1,2}' })
            m.connect('category_list', 'archives/category/:section',
                      controller='blog', action='category',
                      section='home', type='list')
            m.connect('home', '', controller='blog', action='view',
                      section='home')

        """
        routename = None
        if len(args) > 1:
            routename = args[0]
        else:
            args = (None, ) + args
        if '_explicit' not in kargs:
            kargs['_explicit'] = self.explicit
        if '_minimize' not in kargs:
            kargs['_minimize'] = self.minimization
        route = self.make_route(*args, **kargs)

        # Apply encoding and errors if its not the defaults and the route
        # didn't have one passed in.
        if (self.encoding != 'utf-8' or self.decode_errors != 'ignore') and \
           '_encoding' not in kargs:
            route.encoding = self.encoding
            route.decode_errors = self.decode_errors

        if not route.static:
            self.matchlist.append(route)

        if routename:
            self._routenames[routename] = route
            route.name = routename
        if route.static:
            return
        exists = False
        for key in self.maxkeys:
            if key == route.maxkeys:
                self.maxkeys[key].append(route)
                exists = True
                break
        if not exists:
            self.maxkeys[route.maxkeys] = [route]
        self._created_gens = False

    def _create_gens(self):
        """Create the generation hashes for route lookups"""
        # Use keys temporailly to assemble the list to avoid excessive
        # list iteration testing with "in"
        controllerlist = {}
        actionlist = {}

        # Assemble all the hardcoded/defaulted actions/controllers used
        for route in self.matchlist:
            if route.static:
                continue
            if 'controller' in route.defaults:
                controllerlist[route.defaults['controller']] = True
            if 'action' in route.defaults:
                actionlist[route.defaults['action']] = True

        # Setup the lists of all controllers/actions we'll add each route
        # to. We include the '*' in the case that a generate contains a
        # controller/action that has no hardcodes
        controllerlist = list(controllerlist.keys()) + ['*']
        actionlist = list(actionlist.keys()) + ['*']

        # Go through our list again, assemble the controllers/actions we'll
        # add each route to. If its hardcoded, we only add it to that dict key.
        # Otherwise we add it to every hardcode since it can be changed.
        gendict = {}  # Our generated two-deep hash
        for route in self.matchlist:
            if route.static:
                continue
            clist = controllerlist
            alist = actionlist
            if 'controller' in route.hardcoded:
                clist = [route.defaults['controller']]
            if 'action' in route.hardcoded:
                alist = [six.text_type(route.defaults['action'])]
            for controller in clist:
                for action in alist:
                    actiondict = gendict.setdefault(controller, {})
                    actiondict.setdefault(action, ([], {}))[0].append(route)
        self._gendict = gendict
        self._created_gens = True

    def create_regs(self, *args, **kwargs):
        """Atomically creates regular expressions for all connected
        routes
        """
        self.create_regs_lock.acquire()
        try:
            self._create_regs(*args, **kwargs)
        finally:
            self.create_regs_lock.release()

    def _create_regs(self, clist=None):
        """Creates regular expressions for all connected routes"""
        if clist is None:
            if self.directory:
                clist = self.controller_scan(self.directory)
            elif callable(self.controller_scan):
                clist = self.controller_scan()
            elif not self.controller_scan:
                clist = []
            else:
                clist = self.controller_scan

        for key, val in six.iteritems(self.maxkeys):
            for route in val:
                route.makeregexp(clist)

        regexps = []
        prefix2routes = collections.defaultdict(list)
        for route in self.matchlist:
            if not route.static:
                regexps.append(route.makeregexp(clist, include_names=False))
                # Group the routes by static prefix
                prefix = ''.join(
                    it.takewhile(lambda p: isinstance(p, str),
                                 route.routelist))
                if route.minimization and not prefix.startswith('/'):
                    prefix = '/' + prefix
                prefix2routes[prefix.rstrip("/")].append(route)
        self._prefix2routes = prefix2routes
        # Keep track of all possible prefix lengths in decreasing order
        self._prefix_lens = sorted(set(len(p) for p in prefix2routes),
                                   reverse=True)

        # Create our regexp to strip the prefix
        if self.prefix:
            self._regprefix = re.compile(self.prefix + '(.*)')

        # Save the master regexp
        regexp = '|'.join(['(?:%s)' % x for x in regexps])
        self._master_reg = regexp
        try:
            self._master_regexp = re.compile(regexp)
        except OverflowError:
            self._master_regexp = None
        self._created_regs = True

    def _match(self, url, environ):
        """Internal Route matcher

        Matches a URL against a route, and returns a tuple of the match
        dict and the route object if a match is successfull, otherwise
        it returns empty.

        For internal use only.

        """
        if not self._created_regs and self.controller_scan:
            self.create_regs()
        elif not self._created_regs:
            raise RoutesException("You must generate the regular expressions"
                                  " before matching.")

        if self.always_scan:
            self.create_regs()

        matchlog = []
        if self.prefix:
            if re.match(self._regprefix, url):
                url = re.sub(self._regprefix, r'\1', url)
                if not url:
                    url = '/'
            else:
                return (None, None, matchlog)

        environ = environ or self.environ
        sub_domains = self.sub_domains
        sub_domains_ignore = self.sub_domains_ignore
        domain_match = self.domain_match
        debug = self.debug

        if self._master_regexp is not None:
            # Check to see if its a valid url against the main regexp
            # Done for faster invalid URL elimination
            valid_url = re.match(self._master_regexp, url)
        else:
            # Regex is None due to OverflowError caused by too many routes.
            # This will allow larger projects to work but might increase time
            # spent invalidating URLs in the loop below.
            valid_url = True
        if not valid_url:
            return (None, None, matchlog)

        matchlist = it.chain.from_iterable(
            self._prefix2routes.get(url[:prefix_len], ())
            for prefix_len in self._prefix_lens)
        for route in matchlist:
            if route.static:
                if debug:
                    matchlog.append(dict(route=route, static=True))
                continue
            match = route.match(url, environ, sub_domains, sub_domains_ignore,
                                domain_match)
            if debug:
                matchlog.append(dict(route=route, regexp=bool(match)))
            if isinstance(match, dict) or match:
                return (match, route, matchlog)
        return (None, None, matchlog)

    def match(self, url=None, environ=None):
        """Match a URL against against one of the routes contained.

        Will return None if no valid match is found.

        .. code-block:: python

            resultdict = m.match('/joe/sixpack')

        """
        if url is None and not environ:
            raise RoutesException('URL or environ must be provided')

        if url is None:
            url = environ['PATH_INFO']

        result = self._match(url, environ)
        if self.debug:
            return result[0], result[1], result[2]
        if isinstance(result[0], dict) or result[0]:
            return result[0]
        return None

    def routematch(self, url=None, environ=None):
        """Match a URL against against one of the routes contained.

        Will return None if no valid match is found, otherwise a
        result dict and a route object is returned.

        .. code-block:: python

            resultdict, route_obj = m.match('/joe/sixpack')

        """
        if url is None and not environ:
            raise RoutesException('URL or environ must be provided')

        if url is None:
            url = environ['PATH_INFO']
        result = self._match(url, environ)
        if self.debug:
            return result[0], result[1], result[2]
        if isinstance(result[0], dict) or result[0]:
            return result[0], result[1]
        return None

    def generate(self, *args, **kargs):
        """Generate a route from a set of keywords

        Returns the url text, or None if no URL could be generated.

        .. code-block:: python

            m.generate(controller='content',action='view',id=10)

        """
        # Generate ourself if we haven't already
        if not self._created_gens:
            self._create_gens()

        if self.append_slash:
            kargs['_append_slash'] = True

        if not self.explicit:
            if 'controller' not in kargs:
                kargs['controller'] = 'content'
            if 'action' not in kargs:
                kargs['action'] = 'index'

        environ = kargs.pop('_environ', self.environ) or {}
        if 'SCRIPT_NAME' in environ:
            script_name = environ['SCRIPT_NAME']
        elif self.environ and 'SCRIPT_NAME' in self.environ:
            script_name = self.environ['SCRIPT_NAME']
        else:
            script_name = ""
        controller = kargs.get('controller', None)
        action = kargs.get('action', None)

        # If the URL didn't depend on the SCRIPT_NAME, we'll cache it
        # keyed by just by kargs; otherwise we need to cache it with
        # both SCRIPT_NAME and kargs:
        cache_key = six.text_type(args).encode('utf8') + \
            six.text_type(kargs).encode('utf8')

        if self.urlcache is not None:
            if six.PY3:
                cache_key_script_name = b':'.join(
                    (script_name.encode('utf-8'), cache_key))
            else:
                cache_key_script_name = '%s:%s' % (script_name, cache_key)

            # Check the url cache to see if it exists, use it if it does
            val = self.urlcache.get(cache_key_script_name, self)
            if val != self:
                return val

        controller = as_unicode(controller, self.encoding)
        action = as_unicode(action, self.encoding)

        actionlist = self._gendict.get(controller) or self._gendict.get(
            '*', {})
        if not actionlist and not args:
            return None
        (keylist, sortcache) = actionlist.get(action) or \
            actionlist.get('*', (None, {}))
        if not keylist and not args:
            return None

        keys = frozenset(kargs.keys())
        cacheset = False
        cachekey = six.text_type(keys)
        cachelist = sortcache.get(cachekey)
        if args:
            keylist = args
        elif cachelist:
            keylist = cachelist
        else:
            cacheset = True
            newlist = []
            for route in keylist:
                if len(route.minkeys - route.dotkeys - keys) == 0:
                    newlist.append(route)
            keylist = newlist

            class KeySorter:
                def __init__(self, obj, *args):
                    self.obj = obj

                def __lt__(self, other):
                    return self._keysort(self.obj, other.obj) < 0

                def _keysort(self, a, b):
                    """Sorts two sets of sets, to order them ideally for
                    matching."""
                    a = a.maxkeys
                    b = b.maxkeys

                    lendiffa = len(keys ^ a)
                    lendiffb = len(keys ^ b)
                    # If they both match, don't switch them
                    if lendiffa == 0 and lendiffb == 0:
                        return 0

                    # First, if a matches exactly, use it
                    if lendiffa == 0:
                        return -1

                    # Or b matches exactly, use it
                    if lendiffb == 0:
                        return 1

                    # Neither matches exactly, return the one with the most in
                    # common
                    if self._compare(lendiffa, lendiffb) != 0:
                        return self._compare(lendiffa, lendiffb)

                    # Neither matches exactly, but if they both have just as
                    # much in common
                    if len(keys & b) == len(keys & a):
                        # Then we return the shortest of the two
                        return self._compare(len(a), len(b))

                    # Otherwise, we return the one that has the most in common
                    else:
                        return self._compare(len(keys & b), len(keys & a))

                def _compare(self, obj1, obj2):
                    if obj1 < obj2:
                        return -1
                    elif obj1 < obj2:
                        return 1
                    else:
                        return 0

            keylist.sort(key=KeySorter)
            if cacheset:
                sortcache[cachekey] = keylist

        # Iterate through the keylist of sorted routes (or a single route if
        # it was passed in explicitly for hardcoded named routes)
        for route in keylist:
            fail = False
            for key in route.hardcoded:
                kval = kargs.get(key)
                if not kval:
                    continue
                kval = as_unicode(kval, self.encoding)
                if kval != route.defaults[key] and \
                        not callable(route.defaults[key]):
                    fail = True
                    break
            if fail:
                continue
            path = route.generate(**kargs)
            if path:
                if self.prefix:
                    path = self.prefix + path
                external_static = route.static and route.external
                if not route.absolute and not external_static:
                    path = script_name + path
                    key = cache_key_script_name
                else:
                    key = cache_key
                if self.urlcache is not None:
                    self.urlcache.put(key, str(path))
                return str(path)
            else:
                continue
        return None

    def resource(self, member_name, collection_name, **kwargs):
        """Generate routes for a controller resource

        The member_name name should be the appropriate singular version
        of the resource given your locale and used with members of the
        collection. The collection_name name will be used to refer to
        the resource collection methods and should be a plural version
        of the member_name argument. By default, the member_name name
        will also be assumed to map to a controller you create.

        The concept of a web resource maps somewhat directly to 'CRUD'
        operations. The overlying things to keep in mind is that
        mapping a resource is about handling creating, viewing, and
        editing that resource.

        All keyword arguments are optional.

        ``controller``
            If specified in the keyword args, the controller will be
            the actual controller used, but the rest of the naming
            conventions used for the route names and URL paths are
            unchanged.

        ``collection``
            Additional action mappings used to manipulate/view the
            entire set of resources provided by the controller.

            Example::

                map.resource('message', 'messages', collection={'rss':'GET'})
                # GET /message/rss (maps to the rss action)
                # also adds named route "rss_message"

        ``member``
            Additional action mappings used to access an individual
            'member' of this controllers resources.

            Example::

                map.resource('message', 'messages', member={'mark':'POST'})
                # POST /message/1/mark (maps to the mark action)
                # also adds named route "mark_message"

        ``new``
            Action mappings that involve dealing with a new member in
            the controller resources.

            Example::

                map.resource('message', 'messages', new={'preview':'POST'})
                # POST /message/new/preview (maps to the preview action)
                # also adds a url named "preview_new_message"

        ``path_prefix``
            Prepends the URL path for the Route with the path_prefix
            given. This is most useful for cases where you want to mix
            resources or relations between resources.

        ``name_prefix``
            Perpends the route names that are generated with the
            name_prefix given. Combined with the path_prefix option,
            it's easy to generate route names and paths that represent
            resources that are in relations.

            Example::

                map.resource('message', 'messages', controller='categories',
                    path_prefix='/category/:category_id',
                    name_prefix="category_")
                # GET /category/7/message/1
                # has named route "category_message"

        ``requirements``

           A dictionary that restricts the matching of a
           variable. Can be used when matching variables with path_prefix.

           Example::

                map.resource('message', 'messages',
                     path_prefix='{project_id}/',
                     requirements={"project_id": R"\\d+"})
                # POST /01234/message
                #    success, project_id is set to "01234"
                # POST /foo/message
                #    404 not found, won't be matched by this route


        ``parent_resource``
            A ``dict`` containing information about the parent
            resource, for creating a nested resource. It should contain
            the ``member_name`` and ``collection_name`` of the parent
            resource. This ``dict`` will
            be available via the associated ``Route`` object which can
            be accessed during a request via
            ``request.environ['routes.route']``

            If ``parent_resource`` is supplied and ``path_prefix``
            isn't, ``path_prefix`` will be generated from
            ``parent_resource`` as
            "<parent collection name>/:<parent member name>_id".

            If ``parent_resource`` is supplied and ``name_prefix``
            isn't, ``name_prefix`` will be generated from
            ``parent_resource`` as  "<parent member name>_".

            Example::

                >>> from routes.util import url_for
                >>> m = Mapper()
                >>> m.resource('location', 'locations',
                ...            parent_resource=dict(member_name='region',
                ...                                 collection_name='regions'))
                >>> # path_prefix is "regions/:region_id"
                >>> # name prefix is "region_"
                >>> url_for('region_locations', region_id=13)
                '/regions/13/locations'
                >>> url_for('region_new_location', region_id=13)
                '/regions/13/locations/new'
                >>> url_for('region_location', region_id=13, id=60)
                '/regions/13/locations/60'
                >>> url_for('region_edit_location', region_id=13, id=60)
                '/regions/13/locations/60/edit'

            Overriding generated ``path_prefix``::

                >>> m = Mapper()
                >>> m.resource('location', 'locations',
                ...            parent_resource=dict(member_name='region',
                ...                                 collection_name='regions'),
                ...            path_prefix='areas/:area_id')
                >>> # name prefix is "region_"
                >>> url_for('region_locations', area_id=51)
                '/areas/51/locations'

            Overriding generated ``name_prefix``::

                >>> m = Mapper()
                >>> m.resource('location', 'locations',
                ...            parent_resource=dict(member_name='region',
                ...                                 collection_name='regions'),
                ...            name_prefix='')
                >>> # path_prefix is "regions/:region_id"
                >>> url_for('locations', region_id=51)
                '/regions/51/locations'

        """
        collection = kwargs.pop('collection', {})
        member = kwargs.pop('member', {})
        new = kwargs.pop('new', {})
        path_prefix = kwargs.pop('path_prefix', None)
        name_prefix = kwargs.pop('name_prefix', None)
        parent_resource = kwargs.pop('parent_resource', None)

        # Generate ``path_prefix`` if ``path_prefix`` wasn't specified and
        # ``parent_resource`` was. Likewise for ``name_prefix``. Make sure
        # that ``path_prefix`` and ``name_prefix`` *always* take precedence if
        # they are specified--in particular, we need to be careful when they
        # are explicitly set to "".
        if parent_resource is not None:
            if path_prefix is None:
                path_prefix = '%s/:%s_id' % (
                    parent_resource['collection_name'],
                    parent_resource['member_name'])
            if name_prefix is None:
                name_prefix = '%s_' % parent_resource['member_name']
        else:
            if path_prefix is None:
                path_prefix = ''
            if name_prefix is None:
                name_prefix = ''

        # Ensure the edit and new actions are in and GET
        member['edit'] = 'GET'
        new.update({'new': 'GET'})

        # Make new dict's based off the old, except the old values become keys,
        # and the old keys become items in a list as the value
        def swap(dct, newdct):
            """Swap the keys and values in the dict, and uppercase the values
            from the dict during the swap."""
            for key, val in six.iteritems(dct):
                newdct.setdefault(val.upper(), []).append(key)
            return newdct

        collection_methods = swap(collection, {})
        member_methods = swap(member, {})
        new_methods = swap(new, {})

        # Insert create, update, and destroy methods
        collection_methods.setdefault('POST', []).insert(0, 'create')
        member_methods.setdefault('PUT', []).insert(0, 'update')
        member_methods.setdefault('DELETE', []).insert(0, 'delete')

        # If there's a path prefix option, use it with the controller
        controller = strip_slashes(collection_name)
        path_prefix = strip_slashes(path_prefix)
        path_prefix = '/' + path_prefix
        if path_prefix and path_prefix != '/':
            path = path_prefix + '/' + controller
        else:
            path = '/' + controller
        collection_path = path
        new_path = path + "/new"
        member_path = path + "/:(id)"

        options = {
            'controller': kwargs.get('controller', controller),
            '_member_name': member_name,
            '_collection_name': collection_name,
            '_parent_resource': parent_resource,
            '_filter': kwargs.get('_filter')
        }
        if 'requirements' in kwargs:
            options['requirements'] = kwargs['requirements']

        def requirements_for(meth):
            """Returns a new dict to be used for all route creation as the
            route options"""
            opts = options.copy()
            if method != 'any':
                opts['conditions'] = {'method': [meth.upper()]}
            return opts

        # Add the routes for handling collection methods
        for method, lst in six.iteritems(collection_methods):
            primary = (method != 'GET' and lst.pop(0)) or None
            route_options = requirements_for(method)
            for action in lst:
                route_options['action'] = action
                route_name = "%s%s_%s" % (name_prefix, action, collection_name)
                self.connect("formatted_" + route_name,
                             "%s/%s.:(format)" % (collection_path, action),
                             **route_options)
                self.connect(route_name, "%s/%s" % (collection_path, action),
                             **route_options)
            if primary:
                route_options['action'] = primary
                self.connect("%s.:(format)" % collection_path, **route_options)
                self.connect(collection_path, **route_options)

        # Specifically add in the built-in 'index' collection method and its
        # formatted version
        self.connect("formatted_" + name_prefix + collection_name,
                     collection_path + ".:(format)",
                     action='index',
                     conditions={'method': ['GET']},
                     **options)
        self.connect(name_prefix + collection_name,
                     collection_path,
                     action='index',
                     conditions={'method': ['GET']},
                     **options)

        # Add the routes that deal with new resource methods
        for method, lst in six.iteritems(new_methods):
            route_options = requirements_for(method)
            for action in lst:
                name = "new_" + member_name
                route_options['action'] = action
                if action == 'new':
                    path = new_path
                    formatted_path = new_path + '.:(format)'
                else:
                    path = "%s/%s" % (new_path, action)
                    name = action + "_" + name
                    formatted_path = "%s/%s.:(format)" % (new_path, action)
                self.connect("formatted_" + name_prefix + name, formatted_path,
                             **route_options)
                self.connect(name_prefix + name, path, **route_options)

        requirements_regexp = '[^\\/]+(?<!\\\\)'

        # Add the routes that deal with member methods of a resource
        for method, lst in six.iteritems(member_methods):
            route_options = requirements_for(method)
            route_options['requirements'] = {'id': requirements_regexp}
            if method not in ['POST', 'GET', 'any']:
                primary = lst.pop(0)
            else:
                primary = None
            for action in lst:
                route_options['action'] = action
                self.connect(
                    "formatted_%s%s_%s" % (name_prefix, action, member_name),
                    "%s/%s.:(format)" % (member_path, action), **route_options)
                self.connect("%s%s_%s" % (name_prefix, action, member_name),
                             "%s/%s" % (member_path, action), **route_options)
            if primary:
                route_options['action'] = primary
                self.connect("%s.:(format)" % member_path, **route_options)
                self.connect(member_path, **route_options)

        # Specifically add the member 'show' method
        route_options = requirements_for('GET')
        route_options['action'] = 'show'
        route_options['requirements'] = {'id': requirements_regexp}
        self.connect("formatted_" + name_prefix + member_name,
                     member_path + ".:(format)", **route_options)
        self.connect(name_prefix + member_name, member_path, **route_options)

    def redirect(self, match_path, destination_path, *args, **kwargs):
        """Add a redirect route to the mapper

        Redirect routes bypass the wrapped WSGI application and instead
        result in a redirect being issued by the RoutesMiddleware. As
        such, this method is only meaningful when using
        RoutesMiddleware.

        By default, a 302 Found status code is used, this can be
        changed by providing a ``_redirect_code`` keyword argument
        which will then be used instead. Note that the entire status
        code string needs to be present.

        When using keyword arguments, all arguments that apply to
        matching will be used for the match, while generation specific
        options will be used during generation. Thus all options
        normally available to connected Routes may be used with
        redirect routes as well.

        Example::

            map = Mapper()
            map.redirect('/legacyapp/archives/{url:.*}', '/archives/{url}')
            map.redirect('/home/index', '/',
                         _redirect_code='301 Moved Permanently')

        """
        both_args = ['_encoding', '_explicit', '_minimize']
        gen_args = ['_filter']

        status_code = kwargs.pop('_redirect_code', '302 Found')
        gen_dict, match_dict = {}, {}

        # Create the dict of args for the generation route
        for key in both_args + gen_args:
            if key in kwargs:
                gen_dict[key] = kwargs[key]
        gen_dict['_static'] = True

        # Create the dict of args for the matching route
        for key in kwargs:
            if key not in gen_args:
                match_dict[key] = kwargs[key]

        self.connect(match_path, **match_dict)
        match_route = self.matchlist[-1]

        self.connect('_redirect_%s' % id(match_route), destination_path,
                     **gen_dict)
        match_route.redirect = True
        match_route.redirect_status = status_code
예제 #21
0
class FeatureLoader(object):
    def __init__(self):
        self.points_cache = LRUCache(1000)
        self.colors_cache = LRUCache(1000)
        self.features_cache = LRUCache(200)
        self.words_cache = LRUCache(200)
        self.masks_cache = LRUCache(1000)
        self.index_cache = LRUCache(200)

    def clear_cache(self):
        self.points_cache.clear()
        self.colors_cache.clear()
        self.features_cache.clear()
        self.words_cache.clear()
        self.masks_cache.clear()

    def load_points_colors(self, data, image):
        points = self.points_cache.get(image)
        colors = self.colors_cache.get(image)
        if points is None or colors is None:
            points, _, colors = self._load_features_nocache(data, image)
            self.points_cache.put(image, points)
            self.colors_cache.put(image, colors)
        return points, colors

    def load_masks(self, data, image):
        points, _ = self.load_points_colors(data, image)
        masks = self.masks_cache.get(image)
        if masks is None:
            masks = data.load_features_mask(image, points[:, :2])
            self.masks_cache.put(image, masks)
        return masks

    def load_features_index(self, data, image, features):
        index = self.index_cache.get(image)
        current_features = self.load_points_features_colors(data, image)
        use_load = len(current_features) == len(features) and index is None
        use_rebuild = len(current_features) != len(features)
        if use_load:
            index = data.load_feature_index(image, features)
        if use_rebuild:
            index = ft.build_flann_index(features, data.config)
        if use_load or use_rebuild:
            self.index_cache.put(image, index)
        return index

    def load_points_features_colors(self, data, image):
        points = self.points_cache.get(image)
        features = self.features_cache.get(image)
        colors = self.colors_cache.get(image)
        if points is None or features is None or colors is None:
            points, features, colors = self._load_features_nocache(data, image)
            self.points_cache.put(image, points)
            self.features_cache.put(image, features)
            self.colors_cache.put(image, colors)
        return points, features, colors

    def load_words(self, data, image):
        words = self.words_cache.get(image)
        if words is None:
            words = data.load_words(image)
            self.words_cache.put(image, words)
        return words

    def _load_features_nocache(self, data, image):
        points, features, colors = data.load_features(image)
        if points is None:
            logger.error('Could not load features for image {}'.format(image))
        else:
            points = np.array(points[:, :3], dtype=float)
        return points, features, colors
예제 #22
0
파일: replica.py 프로젝트: michael-k/devpi
class FileReplicationSharedData(object):
    QUEUE_TIMEOUT = 1
    ERROR_QUEUE_DELAY_MULTIPLIER = 1.5
    ERROR_QUEUE_REPORT_DELAY = 2 * 60
    ERROR_QUEUE_MAX_DELAY = 60 * 60

    def __init__(self, xom):
        from queue import Empty, PriorityQueue
        self.Empty = Empty
        self.xom = xom
        self.queue = PriorityQueue()
        self.error_queue = PriorityQueue()
        self.deleted = LRUCache(100)
        self.index_types = LRUCache(1000)
        self.errors = ReplicationErrors()
        self.importer = ImportFileReplica(self.xom, self.errors)
        self._replica_in_sync_cv = threading.Condition()
        self.last_added = None
        self.last_errored = None
        self.last_processed = None

    def on_import(self, conn, serial, key, val, back_serial):
        # Do not queue anything until we have been in sync for the first
        # time. The InitialQueueThread will queue in one go on initial sync
        with self._replica_in_sync_cv:
            if self.xom.replica_thread.replica_in_sync_at is None:
                return
        try:
            is_from_mirror = self.is_from_mirror(key)
        except KeyError:
            stage = self.xom.model.getstage(key.params['user'],
                                            key.params['index'])
            self.index_types.put(stage.name, stage.ixconfig['type'])
            is_from_mirror = self.is_from_mirror(key)
        # note the negated serial for the PriorityQueue
        self.queue.put(
            (is_from_mirror, -serial, key.relpath, key.name, val, back_serial))
        self.last_added = time.time()

    def next_ts(self, delay):
        return time.time() + delay

    def add_errored(self,
                    is_from_mirror,
                    serial,
                    key,
                    keyname,
                    value,
                    back_serial,
                    ts=None,
                    delay=11):
        if ts is None:
            ts = self.next_ts(min(delay, self.ERROR_QUEUE_MAX_DELAY))
        # this priority queue is ordered by time stamp
        self.error_queue.put((ts, delay, is_from_mirror, serial, key, keyname,
                              value, back_serial))
        self.last_errored = time.time()

    def is_from_mirror(self, key, default=notset):
        index_name = "%s/%s" % (key.params['user'], key.params['index'])
        result = self.index_types.get(index_name)
        if result is None:
            if default is notset:
                raise KeyError
            return default
        return result == 'mirror'

    def is_in_future(self, ts):
        return ts > time.time()

    def process_next_errored(self, handler):
        try:
            # it seems like without the timeout this isn't triggered frequent
            # enough, the thread was waiting a long time even though there
            # were already/still items in the queue
            info = self.error_queue.get(timeout=self.QUEUE_TIMEOUT)
        except self.Empty:
            return
        (ts, delay, is_from_mirror, serial, key, keyname, value,
         back_serial) = info
        try:
            if self.is_in_future(ts):
                # not current yet, so re-add it
                self.add_errored(is_from_mirror,
                                 serial,
                                 key,
                                 keyname,
                                 value,
                                 back_serial,
                                 ts=ts,
                                 delay=delay)
                return
            handler(is_from_mirror, serial, key, keyname, value, back_serial)
        except Exception:
            # another failure, re-add with longer delay
            self.add_errored(is_from_mirror,
                             serial,
                             key,
                             keyname,
                             value,
                             back_serial,
                             delay=delay * self.ERROR_QUEUE_DELAY_MULTIPLIER)
            if delay > self.ERROR_QUEUE_REPORT_DELAY:
                threadlog.exception(
                    "There repeatedly has been an error during file download.")
        finally:
            self.error_queue.task_done()
            self.last_processed = time.time()

    def process_next(self, handler):
        try:
            # it seems like without the timeout this isn't triggered frequent
            # enough, the thread was waiting a long time even though there
            # were already/still items in the queue
            info = self.queue.get(timeout=self.QUEUE_TIMEOUT)
        except self.Empty:
            # when the regular queue is empty, we retry previously errored ones
            return self.process_next_errored(handler)
        (is_from_mirror, serial, key, keyname, value, back_serial) = info
        # negate again, because it was negated for the PriorityQueue
        serial = -serial
        try:
            handler(is_from_mirror, serial, key, keyname, value, back_serial)
        except Exception as e:
            threadlog.warn("Error during file replication: %s" % ''.join(
                traceback.format_exception_only(e.__class__, e)).strip())
            self.add_errored(is_from_mirror, serial, key, keyname, value,
                             back_serial)
        finally:
            self.queue.task_done()
            self.last_processed = time.time()

    def wait(self, error_queue=False):
        self.queue.join()
        if error_queue:
            self.error_queue.join()
예제 #23
0
파일: registry.py 프로젝트: jean/reg
class CachingKeyLookup(object):
    """
    A key lookup that caches.

    Implements the read-only API of :class:`Registry`, using
    a cache to speed up access.

    The cache is LRU.

    :param: key_lookup - the :class:`Registry` to cache.
    :param component_cache_size: how many cache entries to store for
      the :meth:`component` method. This is also used by dispatch
      calls.
    :param all_cache_size: how many cache entries to store for the
      the :meth:`all` method.
    :param fallback_cache_size: how many cache entries to store for
      the :meth:`fallback` method.
    """
    def __init__(self, key_lookup, component_cache_size, all_cache_size,
                 fallback_cache_size):
        self.key_lookup = key_lookup
        self.predicate_key = key_lookup.predicate_key
        self.key_dict_to_predicate_key = key_lookup.key_dict_to_predicate_key
        self.component_cache = LRUCache(component_cache_size)
        self.all_cache = LRUCache(all_cache_size)
        self.fallback_cache = LRUCache(fallback_cache_size)

    def component(self, key, predicate_key):
        """Lookup value in registry based on predicate_key.

        If value for predicate_key cannot be found, looks up first
        permutation of predicate_key for which there is a value. Permutations
        are made according to the predicates registered for the key.

        :param key: an immutable for which to look up the predicate_key.
        :param predicate_key: an immutable predicate key, constructed
          for predicates given for this key.
        :returns: a registered value, or ``None``.
        """
        result = self.component_cache.get((key, predicate_key), NOT_FOUND)
        if result is not NOT_FOUND:
            return result
        result = self.key_lookup.component(key, predicate_key)
        self.component_cache.put((key, predicate_key), result)
        return result

    def fallback(self, key, predicate_key):
        """Lookup fallback based on predicate_key.

        This finds the fallback for the most specific predicate
        that fails to match.

        :param key: an immutable for which to look up the predicate_key.
        :param predicate_key: an immutable predicate key, constructed
          for predicates given for this key.
        :returns: the fallback value for the most specific predicate
          the failed to match.
        """
        result = self.fallback_cache.get((key, predicate_key), NOT_FOUND)
        if result is not NOT_FOUND:
            return result
        result = self.key_lookup.fallback(key, predicate_key)
        self.fallback_cache.put((key, predicate_key), result)
        return result

    def all(self, key, predicate_key):
        """Lookup iterable of values registered for predicate_key.

        Looks up values registered for all permutations of
        predicate_key, the most specific first.

        :param key: an immutable for which to look up the values.
        :param predicate_key: an immutable predicate key, constructed for
          the predicates given for this key.
        :returns: An iterable of registered values.
        """
        result = self.all_cache.get((key, predicate_key), NOT_FOUND)
        if result is not NOT_FOUND:
            return result
        result = list(self.key_lookup.all(key, predicate_key))
        self.all_cache.put((key, predicate_key), result)
        return result

    def lookup(self):
        """A :class:`Lookup` for this registry.
        """
        return Lookup(self)
예제 #24
0
class Registry(object):
    """ A component registry.  The component registry supports the
    Python mapping interface and can be used as you might a regular
    dictionary.  It also support more advanced registrations and
    lookups that include a ``requires`` argument and a ``name`` via
    its ``register`` and ``lookup`` methods.  It may be treated as an
    component registry by using its ``resolve`` method."""
    def __init__(self, dict=None, **kwargs):
        self.data = {}
        self._lkpcache = LRUCache(1000)
        if dict is not None:
            self.update(dict)
        if len(kwargs):
            self.update(kwargs)
        self.listener_registered = False # at least one listener registered

    @property
    def _dictmembers(self):
        D = {}
        norequires = self.data.get((), {})
        for k, v in norequires.items():
            provides, name = k
            if name == '':
                D[provides] = v
        return D

    def __cmp__(self, dict):
        if isinstance(dict, Registry):
            return cmp(self.data, dict.data)
        else:
            return cmp(self._dictmembers, dict)

    def __len__(self):
        return len(self._dictmembers)

    def __getitem__(self, key):
        notrequires = self.data.get((), {})
        return notrequires[(key, '')]

    def __setitem__(self, key, val):
        self.register(key, val)

    def __delitem__(self, key):
        self._lkpcache.clear()
        notrequires = self.data.get((), {})
        try:
            del notrequires[(key, '')]
        except KeyError:
            raise KeyError(key)

    def clear(self, full=False):
        if full:
            self.data = {}
        else:
            notrequires = self.data.get((), {})
            for k, v in notrequires.items():
                provides, name = k
                if name == '':
                    del notrequires[k]
        self._lkpcache.clear()

    def copy(self):
        import copy
        return copy.copy(self)

    def items(self):
        return self._dictmembers.items()

    def keys(self):
        return self._dictmembers.keys()
    
    def values(self):
        return self._dictmembers.values()

    def iteritems(self):
        return iter(self.items())
    
    def iterkeys(self):
        return iter(self.keys())
    
    def itervalues(self):
        return iter(self.values())

    def __contains__(self, key):
        return key in self._dictmembers

    has_key = __contains__

    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default

    @classmethod
    def fromkeys(cls, iterable, value=None):
        d = cls()
        for key in iterable:
            d[key] = value
        return d

    def update(self, dict=None, **kw):
        if dict is not None:
            for k, v in dict.items():
                self.register(k, v)
        for k, v in kw.items():
            self.register(k, v)

    def setdefault(self, key, failobj=None):
        self._lkpcache.clear()
        val = self.get(key, default=failobj)
        if val is failobj:
            self[key] = failobj
        return self[key]

    def __iter__(self):
        return iter(self._dictmembers)

    def pop(self, key, *args):
        if len(args) > 1:
            raise TypeError, "pop expected at most 2 arguments, got "\
                              + repr(1 + len(args))
        try:
            value = self[key]
        except KeyError:
            if args:
                return args[0]
            raise
        del self[key]
        return value

    def popitem(self):
        try:
            k, v = self.iteritems().next()
        except StopIteration:
            raise KeyError, 'container is empty'
        del self[k]
        return (k, v)

    def register(self, provides, component, *requires, **kw):
        """ Register a component """
        name = kw.get('name', '')
        if name is ALL:
            raise ValueError('ALL cannot be used in a registration as a name')
        self._lkpcache.clear()
        if provides is _subscribers:
            self.listener_registered = True
        info = self.data.setdefault(requires, {})
        info[(provides, name)] =  component
        all = info.setdefault((provides, ALL), [])
        all.append(component)

    def unregister(self, provides, component, *requires, **kw):
        self._lkpcache.clear()
        name = kw.get('name', '')
        if name is ALL:
            del self.data[requires]
            return
        info = self.data.get(requires, {})
        del info[(provides, name)]
        all = info.get((provides, ALL), [])
        all.remove(component)
        if not all:
            del self.data[requires]

    def subscribe(self, fn, *requires, **kw):
        name = kw.get('name', '')
        if name is ALL:
            raise ValueError('ALL may not be used as a name to subscribe')
        newkw = {'name':name, 'default':_marker}
        subscribers = self.lookup(_subscribers, *requires, **newkw)
        if subscribers is _marker:
            subscribers = []
        subscribers.append(fn)
        self.register(_subscribers, subscribers, *requires, **kw)

    def unsubscribe(self, fn, *requires, **kw):
        name = kw.get('name', '')
        if name is ALL:
            raise ValueError('ALL may not be used as a name to unsubscribe')
        newkw = {'name':name, 'default':_marker}
        subscribers = self.lookup(_subscribers, *requires, **newkw)
        if subscribers is _marker:
            subscribers = []
        if fn in subscribers:
            subscribers.remove(fn)

    def notify(self, *objects, **kw):
        if not self.listener_registered:
            return # optimization
        subscribers = self.resolve(_subscribers, *objects, **kw)
        name = kw.get('name', '')
        if subscribers is not None:
            if name is ALL:
                for subscriberlist in subscribers:
                    for subscriber in subscriberlist:
                        subscriber(*objects)
            else:
                for subscriber in subscribers:
                    subscriber(*objects)

    def _lookup(self, provides, name, default, requires, default_requires):
        # the requires and default_requires arguments *must* be
        # hashable sequences of tuples composed of hashable objects
        reg = self.data

        cachekey = (provides, requires, name, default_requires)
        cached = self._lkpcache.get(cachekey, _marker)

        if cached is _marker:
            combinations = cached_augmented_product(requires, default_requires)
            regkey = (provides, name)
            for combo in combinations:
                try:
                    result = reg[combo][regkey]
                    self._lkpcache.put(cachekey, result)
                    return result
                except KeyError:
                    pass

            self._lkpcache.put(cachekey, _notfound)
            cached = _notfound
            
        if cached is _notfound:
            if default is _missing:
                raise LookupError(
                    "Couldn't find a component providing %s for requires "
                    "args %r with name `%s`" % (provides, list(requires), name))
            return default

        return cached

    def lookup(self, provides, *requires, **kw):
        req = []
        for val in requires:
            if not hasattr(val, '__iter__'):
                req.append((val,))
            else:
                req.append(tuple(val))
        name = kw.get('name', '')
        extras = ((None,),) * len(req)
        default = kw.get('default', _missing)
        return self._lookup(provides, name, default, tuple(req), extras)

    def resolve(self, provides, *objects, **kw):
        requires = tuple(
            [directlyprovidedby(obj)+alsoprovidedby(obj) for obj in objects ])
        extras = tuple([defaultprovidedby(obj) for obj in objects])
        name = kw.get('name', '')
        default = kw.get('default', _missing)
        return self._lookup(provides, name, default, requires, extras)
예제 #25
0
class ResolveScheduler(object):
    """ A class that can resolve multiple pages in a potentially
        multi-threaded way.
    """
    PAGE_REGISTRY_SIZE = 256

    def __init__(self, wiki, page_urls, registry_size=None):
        self.wiki = wiki
        self.page_urls = page_urls

        self._cache = LRUCache(registry_size or self.PAGE_REGISTRY_SIZE)
        self._pages_meta = None

        self._queue = None
        self._results = None
        self._pool = None
        self._done = False

    def getPage(self, url):
        page = self._cache.get(url)
        if page is None:
            logger.debug("Caching page in scheduler registry: %s" % url)
            fields = ['url', 'title', 'path', 'formatted_text', 'local_meta',
                      'local_links']
            page = self.wiki.db.getPage(url, fields=fields)
            self._cache.put(url, page)
        return page

    def getPagesMeta(self):
        if self._pages_meta is None:
            fields = ['url', 'title', 'local_meta']
            self._pages_meta = list(self.wiki.db.getPages(fields=fields))
        return self._pages_meta

    def run(self, num_workers=1):
        logger.info("Running resolve scheduler (%d workers)" % num_workers)

        if num_workers > 1:
            # Multi-threaded resolving.
            logger.debug("Main thread is %d" % threading.get_ident())

            self._done = False
            self._queue = Queue()
            self._results = Queue()

            self.getPagesMeta()

            job_count = 0
            for url in self.page_urls:
                self._queue.put_nowait(JobDesc(url))
                job_count += 1

            self._pool = []
            for i in range(num_workers):
                ctx = JobContext(self)
                self._pool.append(JobWorker(i, ctx))

            for thread in self._pool:
                thread.start()

            while job_count > 0:
                try:
                    url, page, exc = self._results.get(True, 10)
                except Empty:
                    logger.error("Resolve workers timed out, still have %d "
                                 "jobs to go." % job_count)
                    return

                job_count -= 1
                if page:
                    self.wiki.db.cachePage(page)
                if exc:
                    logger.error("Error resolving page: %s" % url)
                    logger.exception(exc)

            logger.debug("Queue is empty... terminating workers.")
            self._done = True

            for thread in self._pool:
                thread.join()
                logger.debug("Worker [%d] ended." % thread.wid)
        else:
            # Single-threaded resolving.
            for url in self.page_urls:
                page = self.getPage(url)
                r = PageResolver(
                        page,
                        page_getter=self.getPage,
                        pages_meta_getter=self.getPagesMeta)
                runner = PageResolverRunner(page, r)
                runner.run(raise_on_failure=True)
                self.wiki.db.cachePage(page)
예제 #26
0
파일: client.py 프로젝트: Roger/coucher
class Database(object):
    def __init__(self, name, server=None, create=False):
        self.server = server or Server()
        self.session = server.session
        self.name = name
        self.database = server.host + "/" + name

        self.cache = LRUCache(100)

        if create:
            self.create()
        else:
            response = self.session.head(self.database)
            if not response.ok:
                if response.status_code == 404:
                    raise excepts.DBNotExists
                raise Exception(response.status_code)

    def __getitem__(self, docid):
        """
        Returns a document by _id
        """

        return self.get_doc(docid)

    def __delitem__(self, docid):
        self.delete_doc(docid)

    def create(self):
        """
        Try to create a new database or raise error

        Posible Errors: DBExists, AuthFail
        """

        request = self.session.put(self.database)
        if not request.ok:
            if request.status_code == 401:
                raise excepts.AuthFail
            elif request.status_code == 412:
                raise excepts.DBExists
            raise Exception(request.status_code)

        response = request.json()
        ok = response.get("ok", False)
        if not ok:
            raise Exception(response)

    def delete_doc(self, doc):
        """
        Removes a document
        """

        if isinstance(doc, six.string_types):
            doc = self[doc]

        response = self.session.delete(self.database + "/" + doc["_id"],
                                       params=dict(rev=doc["_rev"]))
        if response.ok:
            return response.json()

        if response.status_code == 404:
            raise excepts.DocNotExists

    def changes(self,
                feed="continuous",
                include_docs=False,
                yield_beats=False,
                **opts):
        opts.update(dict(feed=feed, include_docs=include_docs))
        opts = encode_view_options(opts)

        if feed == "continuous":
            response = self.session.get(self.database + "/_changes",
                                        params=opts,
                                        stream=True)
            if not response.ok:
                raise Exception(response.status_code)

            for line in response.iter_lines(chunk_size=2048):
                if line:
                    yield json.loads(line.decode(response.encoding))
                elif yield_beats:
                    yield {}
        else:
            raise NotImplementedError("feed '%s' is not implemented" % feed)

    def delete(self):
        """
        Delete the database
        """

        self.server.delete_db(self.name)

    def save(self, doc, **options):
        """
        Creates or Updates a document
        """

        request = self.session.post(self.database,
                                    data=json.dumps(doc),
                                    params=options)
        if request.ok:
            response = request.json()
            doc = copy.copy(doc)
            doc["_id"] = response.get("id")
            doc["_rev"] = response.get("rev")
            if isinstance(doc, dict):
                doc = Document(doc)
            return doc

        if request.status_code == 409:
            raise excepts.DocConflict("_id: %s" % doc["_id"])

        raise Exception("Can't save doc '%s' error '%s'" %
                        (doc, request.status_code))

    def update(self, docs, **options):
        options.update(docs=docs)
        response = self.session.post(self.database + "/_bulk_docs",
                                     data=json.dumps(options))

        if response.ok:
            return response.json()
        raise Exception("Error updating docs %s" % response.status_code)

    def view(self, name, **options):
        return View(name, self, **options)

    def get_doc(self, docid, default=None):
        """
        Returns the a document
        """

        old_doc = self.cache.get(docid, None)
        headers = None
        if old_doc:
            headers = {'If-None-Match': old_doc[0]}

        response = self.session.get(self.database + "/" + docid,
                                    headers=headers)
        if not response.ok:
            if response.status_code == 404:
                if default:
                    return default
                raise excepts.DocNotExists
            raise Exception(response.status_code)

        if old_doc and response.headers["etag"] == old_doc[0]:
            doc = old_doc[1]
        else:
            doc = Document(response.json())
            self.cache.put(docid, (response.headers["etag"], doc))
        return doc

    def info(self):
        response = self.session.get(self.database)
        if response.ok:
            return response.json()
        raise Exception(response)

    def __repr__(self):
        return "<Database %s>" % self.name
예제 #27
0
파일: context.py 프로젝트: jjmontesl/cubetl
class Context():

    def __init__(self):

        self.cli = False
        self.args = {}

        self.debug = False
        self.debug2 = False

        self.quiet = False

        self.profile = False

        self.components = OrderedDict()

        self.start_item = OrderedDict()
        self.start_nodes = []
        self.config_files = []
        self.included_files = []

        self.props = {}
        self.properties = self.props

        self.var = {}

        self.working_dir = os.getcwd()
        self.library_path = os.path.dirname(os.path.realpath(__file__)) + "/../../library"

        self.comp = Components(self)

        self._functions = {"text": functions,
                           "xml": xmlfunctions,
                           "datetime": datetime,
                           "dt": datetime,
                           "re": re,
                           "sys": sys,
                           "urllib": urllib,
                           "random": random.Random()}
        self._globals = self._functions

        class Functions():
            pass
        self.f = Functions()
        for k, v in self._functions.items():
            setattr(self.f, k, v)

        self._compiled = LRUCache(512)  # TODO: Configurable


    @staticmethod
    def _class_from_frame(fr):
        try:
            class_type = fr.f_locals['self'].__class__
        except KeyError:
            class_type = None

        return class_type

    def get(self, uid, fail=True):
        #logger.debug("Getting component: %s" % component_id)

        if uid is None:
            raise ETLException("Cannot retrieve component with id None.")

        comp = self.components.get(uid, None)

        if comp is None and fail:
            raise ETLException("Component not found with id '%s'" % uid)

        return comp

    def key(self, comp):
        for k, c in self.components.items():
            if c == comp:
                return k
        return None

    def find(self, type):
        result = []
        for comp in self.components.values():
            if isinstance(comp, type):
                result.append(comp)
        return result

    def add(self, urn, component, description=None):

        # FIXME: TODO: Allow anonymous components? these would be exported in-line with their parents.
        # This assumes that components are initialized completely (possibly better for config comprehension)
        # Also would serve as a hint for deep-swallow copying (anonymous components are always deep copied?)

        if urn is None:
            raise Exception('Tried to add an object with no URN')
        if component is None:
            raise Exception('Tried to add a null object')
        if not isinstance(component, Component):
            raise Exception('Tried to add a non Component object: %s' % component)
        if self.components.get(urn, None) is not None:
            raise Exception("Tried to add an already existing URN: %s" % urn)

        component.ctx = self
        component.urn = urn
        component.description = description

        self.components[urn] = component
        return component

    def interpolate(self, value, m=None, data=None):
        """
        Resolves expressions `${ ... }`, lambdas and functions in a value,
        with respect to the current context and the current message.

        Expressions are CubETL custom syntax for string interpolation.
        """

        # FIXME: TO BE REMOVED after migrating all interpolate calls
        if isinstance(value, dict):
            raise ETLException("Possibly invalid interpolate call!")

        if value is None:
            return None

        # If the value is a callable (function or lambda), inspect
        # its parameters. Acceptable signatures are:
        # (ctx), (m), (ctx, m)
        if callable(value):
            spec = getargspec(value)
            if len(spec.args) == 1 and spec.args[0] == 'ctx':
                value = value(self)
            elif len(spec.args) == 1 and spec.args[0] == 'm':
                value = value(m)
            elif len(spec.args) == 2 and spec.args[0] == 'ctx' and spec.args[1] == 'm':
                value = value(self, m)
            else:
                raise ETLConfigurationException("Invalid lambda expression signature: %s" % spec.args)

        # If the value is not a string, it is immediately returned
        if not isinstance(value, str):
            return value

        # Process string values

        value = value.strip()

        pos = -1
        result = str(value)

        for dstart, dend in (('${|', '|}'), ('${', '}')):
            if (pos >= -1):
                pos = result.find(dstart)
            while (pos >= 0):
                pos_end = result.find(dend)
                expr = result[pos + len(dstart):pos_end].strip()

                compiled = self._compiled.get(expr)
                try:
                    if (not compiled):
                        compiled = compile(expr, '', 'eval')
                        self._compiled.put(expr, compiled)

                    c_locals = {"m": m, "ctx": self, "f": self.f, "props": self.props, "var": self.var, "cubetl": cubetl}
                    if data:
                        c_locals.update(data)
                    res = eval(compiled, self._globals, c_locals)

                    if (self.debug2):
                        if (isinstance(res, str)):
                            logger.debug('Evaluated: %s = %r' % (expr, res if (len(res) < 100) else res[:100] + ".."))
                        else:
                            logger.debug('Evaluated: %s = %r' % (expr, res))

                except (Exception) as e:
                    exc_type, exc_value, exc_traceback = sys.exc_info()

                    caller_component = None
                    frame = inspect.currentframe()
                    for caller in inspect.getouterframes(frame):
                        fc = Context._class_from_frame(caller[0])
                        if (isclass(fc) and issubclass(fc, Component)):
                            caller_component = caller[0].f_locals['self']
                            break

                    #logger.error("Error evaluating expression %s on data: %s" % (expr, m))
                    self._eval_error_message = m

                    logger.error('Error evaluating expression "%s" called from %s:\n%s' % (expr, caller_component, ("".join(traceback.format_exception_only(exc_type, exc_value)))))
                    raise

                if (pos > 0) or (pos_end < len(result) - (len(dend))):
                    result = result[0:pos] + str(res) + result[pos_end + (len(dend)):]
                    pos = result.find(dstart)
                else:
                    # Keep type of non-string types
                    result = res
                    pos = -2

        return result

    def copy_message(self, m):
        # TODO: Create a copy-on-write message instead of actually copying (?)
        if m is None:
            return {}
        else:
            return copy.copy(m)

    def _do_process(self, process, ctx, multiple):
        # TODO: When using multiple, this should allow to yield,
        # TODO: Also, this method shall be called "consume" or something, and public

        # Reduce the OrderedDict to a dict, but interpolate its attributes in order
        item = {}
        for k in ctx.start_item.keys():
            item[k] = ctx.interpolate(ctx.start_item[k], item)
        msgs = ctx.comp.process(process, item)
        count = 0
        result = [] if multiple else None
        for m in msgs:
            count = count + 1
            if multiple:
                result.append(m)
            else:
                result = m
        return (result, count)

    def run(self, start_node, multiple=False):

        ctx = self

        if isinstance(start_node, str):
            start_node_comp = ctx.get(start_node, fail=False)
        else:
            start_node_comp = start_node

        # Launch process
        if not start_node_comp:
            logger.error("Start process '%s' not found in configuration" % start_node)
            if ctx.cli:
                sys.exit(1)
            else:
                raise Exception("Start process '%s' not found in configuration" % start_node)

        result = None
        processed = 0

        # Launch process and consume items
        try:
            logger.debug("Initializing components")
            ctx.comp.initialize(start_node_comp)

            logger.info("Processing %s" % start_node_comp)

            if ctx.profile:
                logger.warning("Profiling execution (WARNING this is SLOW) and saving results to: %s" % ctx.profile)
                cProfile.runctx("(result, processed) = self._do_process(start_node_comp, ctx, multiple=multiple)", globals(), locals(), ctx.profile)
            else:
                (result, processed) = self._do_process(start_node_comp, ctx, multiple=multiple)

            logger.debug("%s items resulted from the process" % processed)

            logger.debug("Finalizing components")
            ctx.comp.finalize(start_node_comp)

            ctx.comp.cleanup()

        except KeyboardInterrupt as e:
            logger.error("User interrupted")
            sys.exit(1)

        except Exception as e:
            '''
            exc_type, exc_value, exc_traceback = sys.exc_info()
            logger.fatal("Error during process: %s" % ", ".join((traceback.format_exception_only(exc_type, exc_value))))

            if hasattr(ctx, "eval_error_message"):
                pp = pprint.PrettyPrinter(indent=4, depth=2)
                print(pp.pformat(ctx._eval_error_message))

            traceback.print_exception(exc_type, exc_value, exc_traceback)
            '''
            raise

        return result

    def include(self, configfile):
        configfile = self.interpolate(configfile)

        # Import only once
        abspath = os.path.abspath(configfile)
        if abspath in self.included_files:
            return
        self.included_files.append(abspath)

        logger.info("Including config file: %s", configfile)
        spec = importlib.util.spec_from_file_location("configmodule", configfile)
        configmodule = importlib.util.module_from_spec(spec)
        try:
            spec.loader.exec_module(configmodule)
        except Exception as e:
            raise ETLConfigurationException("Config include file not found: %s" % (configfile))
        configmodule.cubetl_config(self)
예제 #28
0
파일: keyfs.py 프로젝트: t-8ch/devpi
class Filesystem:
    def __init__(self, basedir, notify_on_commit):
        self.basedir = basedir
        self._notify_on_commit = notify_on_commit
        self._changelog_cache = LRUCache(1000)  # is thread safe
        with self.get_sqlconn() as conn:
            row = conn.execute("select max(serial) from changelog").fetchone()
            serial = row[0]
            if serial is None:
                self.next_serial = 0
            else:
                self.next_serial = serial + 1
                # perform some crash recovery
                data = self.get_raw_changelog_entry(serial)
                changes, rel_renames = loads(data)
                check_pending_renames(str(self.basedir), rel_renames)

    def write_transaction(self, sqlconn):
        return FSWriter(self, sqlconn)

    def get_raw_changelog_entry(self, serial):
        q = "SELECT data FROM changelog WHERE serial = ?"
        with self.get_sqlconn() as conn:
            conn.text_factory = bytes
            row = conn.execute(q, (serial, )).fetchone()
            if row is not None:
                return bytes(row[0])
            return None

    def get_changes(self, serial):
        changes = self._changelog_cache.get(serial)
        if changes is None:
            data = self.get_raw_changelog_entry(serial)
            changes, rel_renames = loads(data)
            self._changelog_cache.put(serial, changes)
        return changes

    def cache_commit_changes(self, serial, changes):
        self._changelog_cache.put(serial, changes)

    def get_sqlconn(self):
        path = self.basedir.join(".sqlite")
        if not path.exists():
            with sqlite3.connect(str(path)) as conn:
                threadlog.info("DB: Creating schema")
                c = conn.cursor()
                c.execute("""
                    CREATE TABLE kv (
                        key TEXT NOT NULL PRIMARY KEY,
                        keyname TEXT,
                        serial INTEGER
                    )
                """)
                c.execute("""
                    CREATE TABLE changelog (
                        serial INTEGER PRIMARY KEY,
                        data BLOB NOT NULL
                    )
                """)
        conn = sqlite3.connect(str(path), timeout=60)
        return conn

    def db_read_typedkey(self, relpath, conn=None):
        new_conn = conn is None
        if new_conn:
            conn = self.get_sqlconn()
        q = "SELECT keyname, serial FROM kv WHERE key = ?"
        try:
            c = conn.cursor()
            row = c.execute(q, (relpath, )).fetchone()
            if row is None:
                raise KeyError(relpath)
            return tuple(row[:2])
        finally:
            if new_conn:
                conn.close()
예제 #29
0
class Context():

    def __init__(self):

        self.args = {}

        self.debug = False
        self.debug2 = False

        self.quiet = False

        self.profile = False

        self.config_files = []

        self.start_node = None
        self.start_message = {}  # Bunch()  # {}   # TODO: Review if this is definitive, compare performance

        self.props = {}
        self.properties = self.props

        self.var = {}

        self.working_dir = os.getcwd()
        self.library_path = os.path.dirname(os.path.realpath(__file__)) + "/../../library"

        self._globals = {
                         "text": functions,
                         "xml": xmlfunctions,
                         "cubetl": cubetl,
                         "datetime": datetime,
                         "re": re,
                         "sys": sys,
                         "urllib": urllib,
                         "random": random.Random()
                         }

        self._compiled = LRUCache(512)  # TODO: Configurable

        self.comp = Components(self)


    @staticmethod
    def _class_from_frame(fr):
        try:
            class_type = fr.f_locals['self'].__class__
        except KeyError:
            class_type = None

        return class_type


    def interpolate(self, m, value, data = {}):

        if value == None:
            return None

        if not isinstance(value, basestring):
            return value

        value = value.strip()

        pos = -1
        result = unicode(value)

        for dstart, dend in (('${|', '|}'), ('${', '}')):
            if (pos >= -1):
                pos = result.find(dstart)
            while (pos >= 0):
                pos_end = result.find(dend)
                expr = result[pos + len(dstart):pos_end].strip()

                compiled = self._compiled.get(expr)
                try:
                    if (not compiled):
                        compiled = compile(expr, '', 'eval')
                        self._compiled.put(expr, compiled)

                    c_locals = { "m": m, "ctx": self, "props": self.props, "var": self.var, "cubetl": cubetl }
                    c_locals.update(data)
                    res = eval(compiled, self._globals, c_locals)

                    if (self.debug2):
                        if (isinstance(res, basestring)):
                            logger.debug('Evaluated: %s = %r' % (expr, res if (len(res) < 100) else res[:100] + ".."))
                        else:
                            logger.debug('Evaluated: %s = %r' % (expr, res))

                except (Exception) as e:
                    exc_type, exc_value, exc_traceback = sys.exc_info()

                    caller_component = None
                    frame = inspect.currentframe()
                    for caller in inspect.getouterframes(frame):
                        fc = Context._class_from_frame(caller[0])
                        if (isclass(fc) and issubclass(fc, Component)):
                            caller_component = caller[0].f_locals['self']
                            break

                    #logger.error("Error evaluating expression %s on data: %s" % (expr, m))
                    self._eval_error_message = m

                    logger.error('Error evaluating expression "%s" called from %s:\n%s' % (expr, caller_component, ("".join(traceback.format_exception_only(exc_type, exc_value)))))
                    raise

                if ((pos>0) or (pos_end < len(result) - (len(dend)))):
                    result = result[0:pos] + unicode(res) + result[pos_end + (len(dend)):]
                    pos = result.find(dstart)
                else:
                    # Keep non-string types
                    result = res
                    pos = -2

        return result

    def copy_message(self, m):
        if m == None:
            return {}
        else:
            return copy.copy(m)
예제 #30
0
class FeatureLoader(object):
    def __init__(self):
        self.points_cache = LRUCache(1000)
        self.colors_cache = LRUCache(1000)
        self.features_cache = LRUCache(200)
        self.words_cache = LRUCache(200)
        self.masks_cache = LRUCache(1000)
        self.index_cache = LRUCache(200)
        self.masked_index_cache = LRUCache(200)

    def clear_cache(self):
        self.points_cache.clear()
        self.colors_cache.clear()
        self.features_cache.clear()
        self.words_cache.clear()
        self.masks_cache.clear()

    def load_mask(self, data, image, points=None):
        masks = self.masks_cache.get(image)
        if masks is None:
            if points is None:
                points, _ = self.load_points_colors(data, image, masked=False)
            masks = data.load_features_mask(image, points[:, :2])
            self.masks_cache.put(image, masks)
        return masks

    def load_points_colors(self, data, image, masked=False):
        points = self.points_cache.get(image)
        colors = self.colors_cache.get(image)
        if points is None or colors is None:
            points, _, colors = self._load_features_nocache(data, image)
            self.points_cache.put(image, points)
            self.colors_cache.put(image, colors)
        if masked:
            mask = self.load_mask(data, image, points)
            if mask is not None:
                points = points[mask]
                colors = colors[mask]
        return points, colors

    def load_points_features_colors(self, data, image, masked=False):
        points = self.points_cache.get(image)
        features = self.features_cache.get(image)
        colors = self.colors_cache.get(image)
        if points is None or features is None or colors is None:
            points, features, colors = self._load_features_nocache(data, image)
            self.points_cache.put(image, points)
            self.features_cache.put(image, features)
            self.colors_cache.put(image, colors)
        if masked:
            mask = self.load_mask(data, image, points)
            if mask is not None:
                points = points[mask]
                features = features[mask]
                colors = colors[mask]
        return points, features, colors

    def load_features_index(self, data, image, masked=False):
        cache = self.masked_index_cache if masked else self.index_cache
        cached = cache.get(image)
        if cached is None:
            _, features, _ = self.load_points_features_colors(data, image,
                                                              masked)
            index = ft.build_flann_index(features, data.config)
            cache.put(image, (features, index))
        else:
            features, index = cached
        return index

    def load_words(self, data, image, masked):
        words = self.words_cache.get(image)
        if words is None:
            words = data.load_words(image)
            self.words_cache.put(image, words)
        if masked and words is not None:
            mask = self.load_mask(data, image)
            if mask is not None:
                words = words[mask]
        return words

    def _load_features_nocache(self, data, image):
        points, features, colors = data.load_features(image)
        if points is None:
            logger.error('Could not load features for image {}'.format(image))
        else:
            points = np.array(points[:, :3], dtype=float)
        return points, features, colors
예제 #31
0
파일: service.py 프로젝트: KavenC/Linot
class Service(ServiceBase):
    CMD = 'twitch'
    SUB_FILE = 'twitch_sublist.p'
    CHECK_PERIOD = 300

    def __init__(self, name_cache_size=512):
        ServiceBase.__init__(self)
        self._sublist_lock = Lock()
        self._twitch = TwitchEngine()
        self._channel_name_cache = LRUCache(name_cache_size)

    def _setup_argument(self, cmd_group):
        cmd_group.add_argument('-subscribe', nargs='+', func=self._subscribe,
                               help='Subscribe channels and receive notification when channel goes live.\n'
                               'ex: {} -subscribe kaydada'.format(self.CMD))
        cmd_group.add_argument('-unsubscribe', nargs='+', func=self._unsubscribe,
                               help='Unsubscribe channels.\n'
                               'ex: {} -unsubscribe kaydada'.format(self.CMD))
        cmd_group.add_argument('-unsuball', action='store_true', func=self._unsub_all,
                               help="Unsubscribe all channels in Linot. I won't send any notification to you anymore.")
        cmd_group.add_argument('-listchannel', action='store_true', func=self._list_channel,
                               help="List channels you've subscribed.")
        cmd_group.add_argument('-import', nargs=1, func=self._import,
                               help='Import the following list of a twitch user.\n'
                               'ex: {} -import kaydada'.format(self.CMD))

        # below, admin only
        cmd_group.add_argument('-refresh', action='store_true', func=self._refresh,
                               help=argparse.SUPPRESS)
        cmd_group.add_argument('-listusers', nargs='*', func=self._list_users,
                               help=argparse.SUPPRESS)
        cmd_group.add_direct_command(self._sub_by_url, 'twitch\.tv/(\w+)[\s\t,]*', re.IGNORECASE)

    def _start(self):
        # Load subscribe list
        try:
            logger.debug('Loading subscribe list from file')
            self._sublist = pickle.load(open(self.SUB_FILE, 'rb'))
            self._calculate_channel_sub_count()
        except IOError:
            logger.debug('Subscribe list file not found, create empty.')
            self._sublist = defaultdict(list)
            self._channel_sub_count = defaultdict(int)
        self._check_thread = Checker(
            self.CHECK_PERIOD, self._twitch, self.get_sublist)
        self._check_thread.start()

    def _stop(self):
        self._check_thread.stop()

    def get_sublist(self):
        self._sublist_lock.acquire(True)
        local_sublist = copy.copy(self._sublist)
        self._sublist_lock.release()
        return local_sublist

    def _sub_by_url(self, match_iter, cmd, sender):
        logger.debug('sub by url: ' + str(match_iter))
        logger.debug('sub by url, direct cmd: ' + cmd)
        self._subscribe(match_iter, sender)

    def _calculate_channel_sub_count(self):
        self._channel_sub_count = defaultdict(int)
        for subr in self._sublist:
            for ch in self._sublist[subr]:
                self._channel_sub_count[ch] += 1

    def _import(self, twitch_user, sender):
        # get the following list of twitch_user and subscribe them for sender
        user = twitch_user[0]
        followed_channels = self._twitch.get_followed_channels(user)
        if followed_channels is None:
            sender.send_message('Twitch user: {} not found'.format(user))
        else:
            if len(followed_channels) > 8:
                sender.send_message('Number of followed channels is more than 8. It may take a while to process.')
            self._subscribe(followed_channels, sender)

    def _unsub_all(self, value, sender):
        # unsubscribe all channels for sender
        # we can not send self._sublist directly, since unsub operates
        # self._sublist
        user_sub = copy.copy(self._sublist[sender])
        self._unsubscribe(user_sub, sender)

    def _subscribe(self, chs, sender):
        # Handles user request for subscribing channels
        # We actually let the LinotServant to follow these channels
        # so that we can check if they are online use streams/followed API

        # prompt a message to let user know i am still alive...
        sender.send_message('Processing ...')
        msg = io.BytesIO()

        not_found = []
        for ch in chs:
            check_name = ch.lower()
            # reduce api invocation
            if check_name in self._sublist[sender]:  # pragma: no cover
                continue
            ch_disp_name, stat = self._twitch.follow_channel(ch)
            if stat is False:
                not_found.append(ch)
            else:
                self._sublist_lock.acquire(True)
                self._sublist[sender].append(check_name)
                self._sublist_lock.release()
                self._channel_sub_count[check_name] += 1
                self._channel_name_cache.put(ch_disp_name.lower(), ch_disp_name)
                pickle.dump(self._sublist, open(self.SUB_FILE, 'wb+'))

        if len(not_found) > 0:
            print('Channel not found: ' + ' '.join(not_found), file=msg)
        print('Done', file=msg)
        sender.send_message(msg.getvalue())
        return

    def _unsubscribe(self, chs, sender):
        # prompt a message to let user know i am still alive...
        sender.send_message('Processing ...')
        msg = io.BytesIO()

        # Handles user request for unsubscribing channels
        not_found = []
        for ch in chs:
            check_name = ch.lower()
            self._sublist_lock.acquire(True)
            try:
                self._sublist[sender].remove(check_name)
            except ValueError:
                not_found.append(ch)
                self._sublist_lock.release()
                continue
            self._sublist_lock.release()
            self._channel_sub_count[check_name] -= 1
            if self._channel_sub_count[check_name] <= 0:
                # maybe we can try to not unfollow, so that we don't keep
                # generating follow message to the caster
                # self._twitch.unfollow_channel(ch)
                self._channel_sub_count.pop(check_name, None)

        if len(self._sublist[sender]) == 0:
            self._sublist_lock.acquire(True)
            self._sublist.pop(sender)
            self._sublist_lock.release()

        pickle.dump(self._sublist, open(self.SUB_FILE, 'wb+'))
        if len(not_found) > 0:
            print('Channel not found: ' + ' '.join(not_found), file=msg)
        print('Done', file=msg)
        sender.send_message(msg.getvalue())
        return

    def _list_channel(self, value, sender):
        msg = io.BytesIO()
        print('Your subscribed channels are:', file=msg)
        live_channels = self._check_thread.get_live_channels()
        for ch in self._sublist[sender]:
            if ch in [x.lower() for x in live_channels]:
                stat = '[LIVE]'
            else:
                stat = '[OFF]'
            display_name = self._channel_name_cache.get(ch)
            if display_name is None:
                display_name = self._twitch.get_channel_info(ch)['display_name']
                self._channel_name_cache.put(ch, display_name)
            print('{}\t{}'.format(stat, display_name), file=msg)
        sender.send_message(msg.getvalue())

    def _refresh(self, value, sender):
        # <Admin only>
        if sender.code == config['interface'][sender.interface_name]['admin_id']:
            self._check_thread.refresh()
            sender.send_message('Done')

    def _list_users(self, check_users, sender):
        # List all user who has subscription
        # <Admin only>
        if sender.code != config['interface'][sender.interface_name]['admin_id']:
            return

        user_list = self._sublist.keys()
        msg = io.StringIO()
        if len(check_users) == 0:
            # if no check_user list is inputed, list all user with sub count
            for user_index, user in enumerate(user_list):
                print(u'#{}) {}'.format(user_index, unicode(user)), file=msg)
                print(u'Subscribed count: {}'.format(len(self._sublist[user])), file=msg)
                print(u'----------------------------', file=msg)
        else:
            # list users sub channel list
            not_found = []
            for user_index in check_users:
                try:
                    index = int(user_index)
                    user = user_list[index]
                except (ValueError, IndexError):
                    not_found.append(user_index)
                    continue

                if user not in self._sublist:
                    not_found.append(user_index)
                    continue

                print(u'#{}) {}'.format(user_index, unicode(user)), file=msg)
                print(u'- Subscribed Channels: ', file=msg)
                for ch in self._sublist[user]:
                    print(unicode(ch), end=u', ', file=msg)
                print(u'', file=msg)
                print(u'- Total Count: {}'.format(len(self._sublist[user])), file=msg)
                print(u'----------------------------', file=msg)

            if len(not_found) > 0:
                print(u'Not found: ', end=u'', file=msg)
                for na in not_found:
                    print(unicode(na), end=u', ', file=msg)
                print(u'', file=msg)

        print(u'Done', file=msg)  # make sure we are sending something to user
        sender.send_message(msg.getvalue())
        return
예제 #32
0
class DockerClient(object):
    """
    Talk to the real Docker server directly.

    Some operations can take a while (e.g. stopping a container), so we
    use a thread pool. See https://clusterhq.atlassian.net/browse/FLOC-718
    for using a custom thread pool.

    :ivar unicode namespace: A namespace prefix to add to container names
        so we don't clobber other applications interacting with Docker.
    :ivar str base_url: URL for connection to the Docker server.
    :ivar int long_timeout: Maximum time in seconds to wait for
        long-running operations, particularly pulling an image.
    :ivar LRUCache _image_cache: Mapped cache of image IDs to their data.
    """
    def __init__(self,
                 namespace=BASE_NAMESPACE,
                 base_url=None,
                 long_timeout=600):
        self.namespace = namespace
        self._client = dockerpy_client(
            version="1.15",
            base_url=base_url,
            long_timeout=timedelta(seconds=long_timeout),
        )
        self._image_cache = LRUCache(100)

    def _to_container_name(self, unit_name):
        """
        Add the namespace to the container name.

        :param unicode unit_name: The unit's name.

        :return unicode: The container's name.
        """
        return self.namespace + unit_name

    def _parse_container_ports(self, data):
        """
        Parse the ports from a data structure representing the Ports
        configuration of a Docker container in the format returned by
        ``self._client.inspect_container`` and return a list containing
        ``PortMap`` instances mapped to the container and host exposed ports.

        :param dict data: The data structure for the representation of
            container and host port mappings in a single container.
            This takes the form of the ``NetworkSettings.Ports`` portion
            of a container's state and configuration as returned by inspecting
            the container. This is a dictionary mapping container ports to a
            list of host bindings, e.g.
            "3306/tcp": [{"HostIp": "0.0.0.0","HostPort": "53306"},
                         {"HostIp": "0.0.0.0","HostPort": "53307"}]

        :return list: A list that is either empty or contains ``PortMap``
            instances.
        """
        ports = []
        for internal, hostmap in data.items():
            internal_map = internal.split(u'/')
            internal_port = internal_map[0]
            internal_port = int(internal_port)
            if hostmap:
                for host in hostmap:
                    external_port = host[u"HostPort"]
                    external_port = int(external_port)
                    portmap = PortMap(internal_port=internal_port,
                                      external_port=external_port)
                    ports.append(portmap)
        return ports

    def _parse_restart_policy(self, data):
        """
        Parse the restart policy from the configuration of a Docker container
        in the format returned by ``self._client.inspect_container`` and return
        an ``IRestartPolicy``.

        :param dict data: The data structure representing the restart policy of
            a container, e.g.

            {"Name": "policy-name", "MaximumRetryCount": 0}

        :return IRestartPolicy: The model of the restart policy.

        :raises ValueError: if an unknown policy is passed.
        """
        POLICIES = {
            u"":
            lambda data: RestartNever(),
            u"always":
            lambda data: RestartAlways(),
            u"on-failure":
            lambda data: RestartOnFailure(maximum_retry_count=data[
                u"MaximumRetryCount"] or None)
        }
        try:
            # docker will treat an unknown plolicy as "never".
            # We error out here, in case new policies are added.
            return POLICIES[data[u"Name"]](data)
        except KeyError:
            raise ValueError("Unknown restart policy: %r" % (data[u"Name"], ))

    def _serialize_restart_policy(self, restart_policy):
        """
        Serialize the restart policy from an ``IRestartPolicy`` to the format
        expected by the docker API.

        :param IRestartPolicy restart_policy: The model of the restart policy.

        :returns: A dictionary suitable to pass to docker

        :raises ValueError: if an unknown policy is passed.
        """
        SERIALIZERS = {
            RestartNever: lambda policy: {
                u"Name": u""
            },
            RestartAlways: lambda policy: {
                u"Name": u"always"
            },
            RestartOnFailure: lambda policy: {
                u"Name": u"on-failure",
                u"MaximumRetryCount": policy.maximum_retry_count or 0
            },
        }
        try:
            return SERIALIZERS[restart_policy.__class__](restart_policy)
        except KeyError:
            raise ValueError("Unknown restart policy: %r" % (restart_policy, ))

    def _image_not_found(self, apierror):
        """
        Inspect a ``docker.errors.APIError`` to determine if it represents a
        failure to start a container because the container's image wasn't
        found.

        :return: ``True`` if this is the case, ``False`` if the error has
            another cause.
        :rtype: ``bool``
        """
        return apierror.response.status_code == NOT_FOUND

    def _address_in_use(self, apierror):
        """
        Inspect a ``docker.errors.APIError`` to determine if it represents a
        failure to start a container because the container is configured to use
        ports that are already in use on the system.

        :return: If this is the reason, an exception to raise describing the
            problem.  Otherwise, ``None``.
        """
        # Recognize an error (without newline) like:
        #
        # Cannot start container <name>: Error starting userland proxy:
        # listen tcp <ip>:<port>: bind: address already in use
        #
        # Or (without newline) like:
        #
        # Cannot start container <name>: Bind for <ip>:<port> failed:
        # port is already allocated
        #
        # because Docker can't make up its mind about which format to use.
        parts = apierror.explanation.split(b": ")
        if parts[-1] == b"address already in use":
            ip, port = parts[-3].split()[-1].split(b":")
        elif parts[-1] == b"port is already allocated":
            ip, port = parts[-2].split()[2].split(b":")
        else:
            return None
        return AddressInUse(address=(ip, int(port)), apierror=apierror)

    def _image_data(self, image):
        """
        Supply data about an image, by either inspecting it or returning
        cached data if available.

        :param unicode image: The ID of the image.

        :return: ``dict`` representing data about the image properties.
        """
        cached_image = self._image_cache.get(image)
        if cached_image is not None:
            LOG_CACHED_IMAGE(image=image).write()
            return cached_image
        try:
            image_data = self._client.inspect_image(image)
            Message.new(message_type="flocker:node:docker:image_inspected",
                        image=image).write()
        except APIError as e:
            if e.response.status_code == NOT_FOUND:
                # Image has been deleted, so just fill in some
                # stub data so we can return *something*. This
                # should happen only for stopped containers so
                # some inaccuracy is acceptable.
                # We won't cache stub data though.
                Message.new(message_type="flocker:node:docker:image_not_found",
                            image=image).write()
                image_data = {u"Config": {u"Env": [], u"Cmd": []}}
            else:
                raise
        cached_data = ImageDataCache(command=image_data[u"Config"][u"Cmd"],
                                     environment=image_data[u"Config"][u"Env"])
        self._image_cache.put(image, cached_data)
        Message.new(message_type="flocker:node:docker:image_data_cached",
                    image=image).write()
        return cached_data

    def add(self,
            unit_name,
            image_name,
            ports=None,
            environment=None,
            volumes=(),
            mem_limit=None,
            cpu_shares=None,
            restart_policy=RestartNever(),
            command_line=None,
            swappiness=0):
        container_name = self._to_container_name(unit_name)

        if environment is not None:
            environment = environment.to_dict()
        if ports is None:
            ports = []

        restart_policy_dict = self._serialize_restart_policy(restart_policy)

        def _create():
            binds = list(
                # The "Z" mode tells Docker to "relabel file objects" on the
                # volume.  This makes things work when SELinux is enabled, at
                # least in the default configuration on CentOS 7.  See
                # <https://docs.docker.com/reference/commandline/run/>, in the
                # `--volumes-from` section (or just search for SELinux).
                u"{}:{}:Z".format(volume.node_path.path,
                                  volume.container_path.path)
                for volume in volumes)
            port_bindings = {p.internal_port: p.external_port for p in ports}
            host_config = self._client.create_host_config(
                binds=binds,
                port_bindings=port_bindings,
                restart_policy=restart_policy_dict,
            )
            # We're likely to get e.g. pvector, so make sure we're passing
            # in something JSON serializable:
            command_line_values = command_line
            if command_line_values is not None:
                command_line_values = list(command_line_values)

            memswap_limit = -1
            if swappiness != 0:
                memswap_limit = mem_limit + mem_limit * swappiness

            self._client.create_container(
                name=container_name,
                image=image_name,
                command=command_line_values,
                environment=environment,
                ports=[p.internal_port for p in ports],
                mem_limit=mem_limit,
                cpu_shares=cpu_shares,
                host_config=host_config,
                memswap_limit=memswap_limit,
            )

        def _add():
            try:
                _create()
            except APIError as e:
                if self._image_not_found(e):
                    # Pull it and try again
                    self._client.pull(image_name)
                    _create()
                else:
                    # Unrecognized, just raise it.
                    raise

            # Just because we got a response doesn't mean Docker has
            # actually updated any internal state yet! So if e.g. we did a
            # start on this container Docker might well complain it knows
            # not the container of which we speak. To prevent this we poll
            # until it does exist.
            while True:
                try:
                    self._client.start(container_name)
                except NotFound:
                    sleep(0.01)
                else:
                    break

        d = deferToThread(_add)

        def _extract_error(failure):
            failure.trap(APIError)
            code = failure.value.response.status_code
            if code == 409:
                raise AlreadyExists(unit_name)

            in_use = self._address_in_use(failure.value)
            if in_use is not None:
                # We likely can't start the container because its
                # configuration conflicts with something else happening on
                # the system.  Reflect this failure condition in a more
                # easily recognized way.
                raise in_use

            return failure

        d.addErrback(_extract_error)
        return d

    def _blocking_exists(self, container_name):
        """
        Blocking API to check if container exists.

        :param unicode container_name: The name of the container whose
            existence we're checking.

        :return: ``True`` if unit exists, otherwise ``False``.
        """
        try:
            self._client.inspect_container(container_name)
            return True
        except APIError:
            return False

    def exists(self, unit_name):
        container_name = self._to_container_name(unit_name)
        return deferToThread(self._blocking_exists, container_name)

    def _stop_container(self, container_name):
        """Attempt to stop the given container.

        There is a race condition between a process dying and
        Docker noticing that fact:

        https://github.com/docker/docker/issues/5165#issuecomment-65753753

        If we get an error indicating that this race condition happened,
        return False. This means the caller should try again. If we *do*
        successfully stop the container, return True.

        :raise APIError: If the container failed to stop for some unknown
            reason.
        :return: True if we stopped the container, False otherwise.

        """
        try:
            with start_action(action_type='flocker:docker:container_stop',
                              container=container_name):
                self._client.stop(container_name)
        except APIError as e:
            if e.response.status_code == NOT_FOUND:
                # If the container doesn't exist, we swallow the error,
                # since this method is supposed to be idempotent.
                return True
            elif e.response.status_code == INTERNAL_SERVER_ERROR:
                # Docker returns this if the process had died, but
                # hasn't noticed it yet.
                return False
            else:
                raise
        return True

    def _remove_container(self, container_name):
        """
        Attempt to remove a container.

        Assumes the given container has already been stopped.

        :param unicode container_name: The fully-namespaced name of the
            container.
        :return: True if we removed the container, False otherwise.
        """
        try:
            # The ``docker.Client.stop`` method sometimes returns a
            # 404 error, even though the container exists.
            # See https://github.com/docker/docker/issues/13088
            # Wait until the container has actually stopped running
            # before attempting to remove it.  Otherwise we are
            # likely to see: 'docker.errors.APIError: 409 Client
            # Error: Conflict ("Conflict, You cannot remove a
            # running container. Stop the container before
            # attempting removal or use -f")'
            # This code should probably be removed once the above
            # issue has been resolved. See [FLOC-1850]
            self._client.wait(container_name)

            with start_action(action_type='flocker:docker:container_remove',
                              container=container_name):
                self._client.remove_container(container_name)
        except APIError as e:
            if e.response.status_code == NOT_FOUND:
                # If the container doesn't exist, we swallow the error,
                # since this method is supposed to be idempotent.
                return True
            elif e.response.status_code == INTERNAL_SERVER_ERROR:
                # Failure to remove container - see FLOC-3262 for an example.
                return False
            else:
                raise
        return True

    def remove(self, unit_name):
        container_name = self._to_container_name(unit_name)

        def _remove():
            # Previously, this looped forever and didn't pause between loops.
            # We've arbitrarily chosen a wait interval of 0.001 seconds and
            # 1000 retries (i.e. a second of polling). These values may need
            # tuning.
            poll_until(partial(self._stop_container, container_name),
                       repeat(0.001, 1000))

            # Previously, the container remove was only tried once. Again,
            # these parameters may need tuning.
            poll_until(partial(self._remove_container, container_name),
                       repeat(0.001, 1000))

        d = deferToThread(_remove)
        return d

    def list(self):
        def _list():
            result = set()
            ids = [
                d[u"Id"] for d in self._client.containers(quiet=True, all=True)
            ]
            for i in ids:

                try:
                    data = self._client.inspect_container(i)
                except APIError as e:
                    # The container ID returned by the list API call above, may
                    # have been removed in another thread.
                    if e.response.status_code == NOT_FOUND:
                        continue
                    else:
                        raise

                state = (u"active"
                         if data[u"State"][u"Running"] else u"inactive")
                name = data[u"Name"]
                # Since tags (e.g. "busybox") aren't stable, ensure we're
                # looking at the actual image by using the hash:
                image = data[u"Image"]
                image_tag = data[u"Config"][u"Image"]
                command = data[u"Config"][u"Cmd"]
                with start_action(
                        action_type=u"flocker:node:docker:inspect_image",
                        container=i,
                        running=data[u"State"][u"Running"]):
                    image_data = self._image_data(image)
                if image_data.command == command:
                    command = None
                port_bindings = data[u"NetworkSettings"][u"Ports"]
                if port_bindings is not None:
                    ports = self._parse_container_ports(port_bindings)
                else:
                    ports = list()
                volumes = []
                binds = data[u"HostConfig"]['Binds']
                if binds is not None:
                    for bind_config in binds:
                        parts = bind_config.split(':', 2)
                        node_path, container_path = parts[:2]
                        volumes.append(
                            Volume(container_path=FilePath(container_path),
                                   node_path=FilePath(node_path)))
                if name.startswith(u"/" + self.namespace):
                    name = name[1 + len(self.namespace):]
                else:
                    continue
                # Retrieve environment variables for this container,
                # disregarding any environment variables that are part
                # of the image, rather than supplied in the configuration.
                unit_environment = []
                container_environment = data[u"Config"][u"Env"]
                if image_data.environment is None:
                    image_environment = []
                else:
                    image_environment = image_data.environment
                if container_environment is not None:
                    for environment in container_environment:
                        if environment not in image_environment:
                            env_key, env_value = environment.split('=', 1)
                            unit_environment.append((env_key, env_value))
                unit_environment = (Environment(
                    variables=frozenset(unit_environment))
                                    if unit_environment else None)
                # Our Unit model counts None as the value for cpu_shares and
                # mem_limit in containers without specified limits, however
                # Docker returns the values in these cases as zero, so we
                # manually convert.
                cpu_shares = data[u"Config"][u"CpuShares"]
                cpu_shares = None if cpu_shares == 0 else cpu_shares
                mem_limit = data[u"Config"][u"Memory"]
                mem_limit = None if mem_limit == 0 else mem_limit
                restart_policy = self._parse_restart_policy(
                    data[U"HostConfig"][u"RestartPolicy"])
                result.add(
                    Unit(name=name,
                         container_name=self._to_container_name(name),
                         activation_state=state,
                         container_image=image_tag,
                         ports=frozenset(ports),
                         volumes=frozenset(volumes),
                         environment=unit_environment,
                         mem_limit=mem_limit,
                         cpu_shares=cpu_shares,
                         restart_policy=restart_policy,
                         command_line=command))
            return result

        return deferToThread(_list)
예제 #33
0
class DiscoveryProtocol(kademlia.WireInterface):

    """
    ## Packet Data
    All packets contain an `Expiration` date to guard against replay attacks.
    The date should be interpreted as a UNIX timestamp.
    The receiver should discard any packet whose `Expiration` value is in the past.
    """
    version = 4
    expiration = 60  # let messages expire after N secondes
    cmd_id_map = dict(ping=1, pong=2, find_node=3, neighbours=4)
    rev_cmd_id_map = dict((v, k) for k, v in cmd_id_map.items())

    # number of required top-level list elements for each cmd_id.
    # elements beyond this length are trimmed.
    cmd_elem_count_map = dict(ping=4, pong=3, find_node=2, neighbours=2)

    encoders = dict(cmd_id=chr,
                    expiration=rlp.sedes.big_endian_int.serialize)

    decoders = dict(cmd_id=safe_ord,
                    expiration=rlp.sedes.big_endian_int.deserialize)

    def __init__(self, app, transport):
        self.app = app
        self.transport = transport
        self.privkey = decode_hex(app.config['node']['privkey_hex'])
        self.pubkey = crypto.privtopub(self.privkey)
        self.nodes = LRUCache(2048)   # nodeid->Node,  fixme should be loaded
        self.this_node = Node(self.pubkey, self.transport.address)
        self.kademlia = KademliaProtocolAdapter(self.this_node, wire=self)
        this_enode = utils.host_port_pubkey_to_uri(self.app.config['discovery']['listen_host'],
                                                   self.app.config['discovery']['listen_port'],
                                                   self.pubkey)
        log.info('starting discovery proto', this_enode=this_enode)

    def get_node(self, nodeid, address=None):
        "return node or create new, update address if supplied"
        assert isinstance(nodeid, bytes)
        assert len(nodeid) == 512 // 8
        assert address or self.nodes.get(nodeid)
        if not self.nodes.get(nodeid):
            self.nodes.put(nodeid, Node(nodeid, address))
        node = self.nodes.get(nodeid)
        if address:
            assert isinstance(address, Address)
            node.address = address
        assert node.address
        return node

    def sign(self, msg):
        """
        signature: sign(privkey, sha3(packet-type || packet-data))
        signature: sign(privkey, sha3(pubkey || packet-type || packet-data))
            // implementation w/MCD
        """
        msg = crypto.sha3(msg)
        return crypto.sign(msg, self.privkey)

    def pack(self, cmd_id, payload):
        """
        UDP packets are structured as follows:

        hash || signature || packet-type || packet-data
        packet-type: single byte < 2**7 // valid values are [1,4]
        packet-data: RLP encoded list. Packet properties are serialized in the order in
                    which they're defined. See packet-data below.

        Offset  |
        0       | MDC       | Ensures integrity of packet,
        65      | signature | Ensures authenticity of sender, `SIGN(sender-privkey, MDC)`
        97      | type      | Single byte in range [1, 4] that determines the structure of Data
        98      | data      | RLP encoded, see section Packet Data

        The packets are signed and authenticated. The sender's Node ID is determined by
        recovering the public key from the signature.

            sender-pubkey = ECRECOVER(Signature)

        The integrity of the packet can then be verified by computing the
        expected MDC of the packet as:

            MDC = SHA3(sender-pubkey || type || data)

        As an optimization, implementations may look up the public key by
        the UDP sending address and compute MDC before recovering the sender ID.
        If the MDC values do not match, the packet can be dropped.
        """
        assert cmd_id in self.cmd_id_map.values()
        assert isinstance(payload, list)

        cmd_id = str_to_bytes(self.encoders['cmd_id'](cmd_id))
        expiration = self.encoders['expiration'](int(time.time() + self.expiration))
        encoded_data = rlp.encode(payload + [expiration])
        signed_data = crypto.sha3(cmd_id + encoded_data)
        signature = crypto.sign(signed_data, self.privkey)
        # assert crypto.verify(self.pubkey, signature, signed_data)
        # assert self.pubkey == crypto.ecdsa_recover(signed_data, signature)
        # assert crypto.verify(self.pubkey, signature, signed_data)
        assert len(signature) == 65
        mdc = crypto.sha3(signature + cmd_id + encoded_data)
        assert len(mdc) == 32
        return mdc + signature + cmd_id + encoded_data

    def unpack(self, message):
        """
        macSize  = 256 / 8 = 32
        sigSize  = 520 / 8 = 65
        headSize = macSize + sigSize = 97
        hash, sig, sigdata := buf[:macSize], buf[macSize:headSize], buf[headSize:]
        shouldhash := crypto.Sha3(buf[macSize:])
        """
        mdc = message[:32]
        if mdc != crypto.sha3(message[32:]):
            log.debug('packet with wrong mcd')
            raise WrongMAC()
        signature = message[32:97]
        assert len(signature) == 65
        signed_data = crypto.sha3(message[97:])
        remote_pubkey = crypto.ecdsa_recover(signed_data, signature)
        assert len(remote_pubkey) == 512 / 8
        # if not crypto.verify(remote_pubkey, signature, signed_data):
        #     raise InvalidSignature()
        cmd_id = self.decoders['cmd_id'](message[97])
        cmd = self.rev_cmd_id_map[cmd_id]
        payload = rlp.decode(message[98:], strict=False)
        assert isinstance(payload, list)
        # ignore excessive list elements as required by EIP-8.
        payload = payload[:self.cmd_elem_count_map.get(cmd, len(payload))]
        return remote_pubkey, cmd_id, payload, mdc

    def receive(self, address, message):
        log.debug('<<< message', address=address)
        assert isinstance(address, Address)
        try:
            remote_pubkey, cmd_id, payload, mdc = self.unpack(message)
            # Note: as of discovery version 4, expiration is the last element for all
            # packets. This might not be the case for a later version, but just popping
            # the last element is good enough for now.
            expiration = self.decoders['expiration'](payload.pop())
            if time.time() > expiration:
                raise PacketExpired()
        except DefectiveMessage:
            return
        cmd = getattr(self, 'recv_' + self.rev_cmd_id_map[cmd_id])
        nodeid = remote_pubkey
        remote = self.get_node(nodeid, address)
        log.debug("Dispatching received message", local=self.this_node, remoteid=remote,
                  cmd=self.rev_cmd_id_map[cmd_id])
        cmd(nodeid, payload, mdc)

    def send(self, node, message):
        assert node.address
        log.debug('>>> message', address=node.address)
        self.transport.send(node.address, message)

    def send_ping(self, node):
        """
        ### Ping (type 0x01)

        Ping packets can be sent and received at any time. The receiver should
        reply with a Pong packet and update the IP/Port of the sender in its
        node table.

        PingNode packet-type: 0x01

        PingNode packet-type: 0x01
        struct PingNode             <= 59 bytes
        {
            h256 version = 0x3;     <= 1
            Endpoint from;          <= 23
            Endpoint to;            <= 23
            unsigned expiration;    <= 9
        };

        struct Endpoint             <= 24 == [17,3,3]
        {
            unsigned address; // BE encoded 32-bit or 128-bit unsigned (layer3 address; size determins ipv4 vs ipv6)
            unsigned udpPort; // BE encoded 16-bit unsigned
            unsigned tcpPort; // BE encoded 16-bit unsigned
        }
        """
        assert isinstance(node, type(self.this_node)) and node != self.this_node
        log.debug('>>> ping', remoteid=node)
        version = rlp.sedes.big_endian_int.serialize(self.version)
        ip = self.app.config['discovery']['listen_host']
        udp_port = self.app.config['discovery']['listen_port']
        tcp_port = self.app.config['p2p']['listen_port']
        payload = [version,
                   Address(ip, udp_port, tcp_port).to_endpoint(),
                   node.address.to_endpoint()]
        assert len(payload) == 3
        message = self.pack(self.cmd_id_map['ping'], payload)
        self.send(node, message)
        return message[:32]  # return the MDC to identify pongs

    def recv_ping(self, nodeid, payload, mdc):
        """
        update ip, port in node table
        Addresses can only be learned by ping messages
        """
        if not len(payload) == 3:
            log.error('invalid ping payload', payload=payload)
            return
        node = self.get_node(nodeid)
        log.debug('<<< ping', node=node)
        remote_address = Address.from_endpoint(*payload[1])  # from address
        #my_address = Address.from_endpoint(*payload[2])  # my address
        self.get_node(nodeid).address.update(remote_address)
        self.kademlia.recv_ping(node, echo=mdc)

    def send_pong(self, node, token):
        """
        ### Pong (type 0x02)

        Pong is the reply to a Ping packet.

        Pong packet-type: 0x02
        struct Pong                 <= 66 bytes
        {
            Endpoint to;
            h256 echo;
            unsigned expiration;
        };
        """
        log.debug('>>> pong', remoteid=node)
        payload = [node.address.to_endpoint(), token]
        assert len(payload[0][0]) in (4, 16), payload
        message = self.pack(self.cmd_id_map['pong'], payload)
        self.send(node, message)

    def recv_pong(self, nodeid,  payload, mdc):
        if not len(payload) == 2:
            log.error('invalid pong payload', payload=payload)
            return
        assert len(payload[0]) == 3, payload

        # Verify address is valid
        Address.from_endpoint(*payload[0])
        echoed = payload[1]
        if self.nodes.get(nodeid):
            node = self.get_node(nodeid)
            self.kademlia.recv_pong(node, echoed)
        else:
            log.debug('<<< unexpected pong from unkown node')

    def send_find_node(self, node, target_node_id):
        """
        ### Find Node (type 0x03)

        Find Node packets are sent to locate nodes close to a given target ID.
        The receiver should reply with a Neighbors packet containing the `k`
        nodes closest to target that it knows about.

        FindNode packet-type: 0x03
        struct FindNode             <= 76 bytes
        {
            NodeId target; // Id of a node. The responding node will send back nodes closest to the target.
            unsigned expiration;
        };
        """
        assert is_integer(target_node_id)
        target_node_id = utils.int_to_big_endian(target_node_id).rjust(kademlia.k_pubkey_size // 8, b'\0')
        assert len(target_node_id) == kademlia.k_pubkey_size // 8
        log.debug('>>> find_node', remoteid=node)
        message = self.pack(self.cmd_id_map['find_node'], [target_node_id])
        self.send(node, message)

    def recv_find_node(self, nodeid, payload, mdc):
        node = self.get_node(nodeid)
        log.debug('<<< find_node', remoteid=node)
        assert len(payload[0]) == kademlia.k_pubkey_size / 8
        target = utils.big_endian_to_int(payload[0])
        self.kademlia.recv_find_node(node, target)

    def send_neighbours(self, node, neighbours):
        """
        ### Neighbors (type 0x04)

        Neighbors is the reply to Find Node. It contains up to `k` nodes that
        the sender knows which are closest to the requested `Target`.

        Neighbors packet-type: 0x04
        struct Neighbours           <= 1423
        {
            list nodes: struct Neighbour    <= 88: 1411; 76: 1219
            {
                inline Endpoint endpoint;
                NodeId node;
            };

            unsigned expiration;
        };
        """
        assert isinstance(neighbours, list)
        assert not neighbours or isinstance(neighbours[0], Node)
        nodes = []
        neighbours = sorted(neighbours)
        for n in neighbours:
            l = n.address.to_endpoint() + [n.pubkey]
            nodes.append(l)
        log.debug('>>> neighbours', remoteid=node, count=len(nodes), local=self.this_node,
                  neighbours=neighbours)
        # FIXME: don't brake udp packet size / chunk message / also when receiving
        message = self.pack(self.cmd_id_map['neighbours'], [nodes[:12]])  # FIXME
        self.send(node, message)

    def recv_neighbours(self, nodeid, payload, mdc):
        remote = self.get_node(nodeid)
        assert len(payload) == 1
        neighbours_lst = payload[0]
        assert isinstance(neighbours_lst, list)

        neighbours = []
        for n in neighbours_lst:
            nodeid = n.pop()
            address = Address.from_endpoint(*n)
            node = self.get_node(nodeid, address)
            assert node.address
            neighbours.append(node)

        log.debug('<<< neighbours', remoteid=remote, local=self.this_node, neighbours=neighbours,
                  count=len(neighbours_lst))
        self.kademlia.recv_neighbours(remote, neighbours)
예제 #34
0
class Registry(object):
    """ A component registry.  The component registry supports the
    Python mapping interface and can be used as you might a regular
    dictionary.  It also support more advanced registrations and
    lookups that include a ``requires`` argument and a ``name`` via
    its ``register`` and ``lookup`` methods.  It may be treated as an
    component registry by using its ``resolve`` method."""
    def __init__(self, dict=None, **kwargs):
        self.data = {}
        self._lkpcache = LRUCache(1000)
        if dict is not None:
            self.update(dict)
        if len(kwargs):
            self.update(kwargs)
        self.listener_registered = False  # at least one listener registered

    @property
    def _dictmembers(self):
        D = {}
        norequires = self.data.get((), {})
        for k, v in norequires.items():
            provides, name = k
            if name == '':
                D[provides] = v
        return D

    def __cmp__(self, dict):
        if isinstance(dict, Registry):
            return cmp(self.data, dict.data)
        else:
            return cmp(self._dictmembers, dict)

    def __len__(self):
        return len(self._dictmembers)

    def __getitem__(self, key):
        notrequires = self.data.get((), {})
        return notrequires[(key, '')]

    def __setitem__(self, key, val):
        self.register(key, val)

    def __delitem__(self, key):
        self._lkpcache.clear()
        notrequires = self.data.get((), {})
        try:
            del notrequires[(key, '')]
        except KeyError:
            raise KeyError(key)

    def clear(self, full=False):
        if full:
            self.data = {}
        else:
            notrequires = self.data.get((), {})
            for k, v in notrequires.items():
                provides, name = k
                if name == '':
                    del notrequires[k]
        self._lkpcache.clear()

    def copy(self):
        import copy
        return copy.copy(self)

    def items(self):
        return self._dictmembers.items()

    def keys(self):
        return self._dictmembers.keys()

    def values(self):
        return self._dictmembers.values()

    def iteritems(self):
        return iter(self.items())

    def iterkeys(self):
        return iter(self.keys())

    def itervalues(self):
        return iter(self.values())

    def __contains__(self, key):
        return key in self._dictmembers

    has_key = __contains__

    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default

    @classmethod
    def fromkeys(cls, iterable, value=None):
        d = cls()
        for key in iterable:
            d[key] = value
        return d

    def update(self, dict=None, **kw):
        if dict is not None:
            for k, v in dict.items():
                self.register(k, v)
        for k, v in kw.items():
            self.register(k, v)

    def setdefault(self, key, failobj=None):
        self._lkpcache.clear()
        val = self.get(key, default=failobj)
        if val is failobj:
            self[key] = failobj
        return self[key]

    def __iter__(self):
        return iter(self._dictmembers)

    def pop(self, key, *args):
        if len(args) > 1:
            raise TypeError, "pop expected at most 2 arguments, got "\
                              + repr(1 + len(args))
        try:
            value = self[key]
        except KeyError:
            if args:
                return args[0]
            raise
        del self[key]
        return value

    def popitem(self):
        try:
            k, v = self.iteritems().next()
        except StopIteration:
            raise KeyError, 'container is empty'
        del self[k]
        return (k, v)

    def register(self, provides, component, *requires, **kw):
        """ Register a component """
        name = kw.get('name', '')
        if name is ALL:
            raise ValueError('ALL cannot be used in a registration as a name')
        self._lkpcache.clear()
        if provides is _subscribers:
            self.listener_registered = True
        info = self.data.setdefault(requires, {})
        info[(provides, name)] = component
        all = info.setdefault((provides, ALL), [])
        all.append(component)

    def unregister(self, provides, component, *requires, **kw):
        self._lkpcache.clear()
        name = kw.get('name', '')
        if name is ALL:
            del self.data[requires]
            return
        info = self.data.get(requires, {})
        del info[(provides, name)]
        all = info.get((provides, ALL), [])
        all.remove(component)
        if not all:
            del self.data[requires]

    def subscribe(self, fn, *requires, **kw):
        name = kw.get('name', '')
        if name is ALL:
            raise ValueError('ALL may not be used as a name to subscribe')
        newkw = {'name': name, 'default': _marker}
        subscribers = self.lookup(_subscribers, *requires, **newkw)
        if subscribers is _marker:
            subscribers = []
        subscribers.append(fn)
        self.register(_subscribers, subscribers, *requires, **kw)

    def unsubscribe(self, fn, *requires, **kw):
        name = kw.get('name', '')
        if name is ALL:
            raise ValueError('ALL may not be used as a name to unsubscribe')
        newkw = {'name': name, 'default': _marker}
        subscribers = self.lookup(_subscribers, *requires, **newkw)
        if subscribers is _marker:
            subscribers = []
        if fn in subscribers:
            subscribers.remove(fn)

    def notify(self, *objects, **kw):
        if not self.listener_registered:
            return  # optimization
        subscribers = self.resolve(_subscribers, *objects, **kw)
        name = kw.get('name', '')
        if subscribers is not None:
            if name is ALL:
                for subscriberlist in subscribers:
                    for subscriber in subscriberlist:
                        subscriber(*objects)
            else:
                for subscriber in subscribers:
                    subscriber(*objects)

    def _lookup(self, provides, name, default, requires, default_requires):
        # the requires and default_requires arguments *must* be
        # hashable sequences of tuples composed of hashable objects
        reg = self.data

        cachekey = (provides, requires, name, default_requires)
        cached = self._lkpcache.get(cachekey, _marker)

        if cached is _marker:
            combinations = cached_augmented_product(requires, default_requires)
            regkey = (provides, name)
            for combo in combinations:
                try:
                    result = reg[combo][regkey]
                    self._lkpcache.put(cachekey, result)
                    return result
                except KeyError:
                    pass

            self._lkpcache.put(cachekey, _notfound)
            cached = _notfound

        if cached is _notfound:
            if default is _missing:
                raise LookupError(
                    "Couldn't find a component providing %s for requires "
                    "args %r with name `%s`" %
                    (provides, list(requires), name))
            return default

        return cached

    def lookup(self, provides, *requires, **kw):
        req = []
        for val in requires:
            if not hasattr(val, '__iter__'):
                req.append((val, ))
            else:
                req.append(tuple(val))
        name = kw.get('name', '')
        extras = ((None, ), ) * len(req)
        default = kw.get('default', _missing)
        return self._lookup(provides, name, default, tuple(req), extras)

    def resolve(self, provides, *objects, **kw):
        requires = tuple(
            [directlyprovidedby(obj) + alsoprovidedby(obj) for obj in objects])
        extras = tuple([defaultprovidedby(obj) for obj in objects])
        name = kw.get('name', '')
        default = kw.get('default', _missing)
        return self._lookup(provides, name, default, requires, extras)
예제 #35
0
파일: context.py 프로젝트: perryhau/cubetl
class Context:
    def __init__(self):

        self.args = {}

        self.debug = False
        self.debug2 = False

        self.quiet = False

        self.config_files = []

        self.start_node = None
        self.start_message = {}

        self.props = {}

        self._globals = {"text": text, "cubetl": cubetl}

        self._compiled = LRUCache(512)  # TODO: Configurable

        self.comp = Components(self)

    @staticmethod
    def _class_from_frame(fr):
        try:
            class_type = fr.f_locals["self"].__class__
        except KeyError:
            class_type = None

        return class_type

    def interpolate(self, m, value, data={}):

        # TODO: Naive interpolation

        # TODO: We are enforcing unicode working around Python Spring seems to give strings, not unicode
        # This shall not be necessary and it's possibly bad practice
        pos = -1
        result = unicode(value)

        for dstart, dend in (("${|", "|}"), ("${", "}")):
            if pos >= -1:
                pos = result.find(dstart)
            while pos >= 0:
                pos_end = result.find(dend)
                expr = result[pos + len(dstart) : pos_end].strip()

                compiled = self._compiled.get(expr)
                try:
                    if not compiled:
                        compiled = compile(expr, "", "eval")
                        self._compiled.put(expr, compiled)

                    c_locals = {"m": m, "ctx": self, "cubetl": cubetl}
                    c_locals.update(data)
                    res = eval(compiled, self._globals, c_locals)

                    if self.debug2:
                        if isinstance(res, basestring):
                            logger.debug("Evaluated: %s = %r" % (expr, res if (len(res) < 100) else res[:100] + ".."))
                        else:
                            logger.debug("Evaluated: %s = %r" % (expr, res))

                except (Exception) as e:
                    exc_type, exc_value, exc_traceback = sys.exc_info()

                    caller_component = None
                    frame = inspect.currentframe()
                    for caller in inspect.getouterframes(frame):
                        fc = Context._class_from_frame(caller[0])
                        if isclass(fc) and issubclass(fc, Component):
                            caller_component = caller[0].f_locals["self"]
                            break

                    # logger.error("Error evaluating expression %s on data: %s" % (expr, m))
                    raise Exception(
                        'Error evaluating expression "%s" called from %s:\n%s'
                        % (expr, caller_component, ("".join(traceback.format_exception_only(exc_type, exc_value))))
                    )

                if (pos > 0) or (pos_end < len(result) - (len(dend))):
                    result = result[0:pos] + unicode(res) + result[pos_end + (len(dend)) :]
                    pos = result.find(dstart)
                else:
                    # Keep non-string types
                    result = res
                    pos = -2

        return result

    def copy_message(self, m):
        return copy.copy(m)
예제 #36
0
class Service(ServiceBase):
    CMD = 'twitch'
    SUB_FILE = 'twitch_sublist.p'
    CHECK_PERIOD = 300

    def __init__(self, name_cache_size=512):
        ServiceBase.__init__(self)
        self._sublist_lock = Lock()
        self._twitch = TwitchEngine()
        self._channel_name_cache = LRUCache(name_cache_size)

    def _setup_argument(self, cmd_group):
        cmd_group.add_argument(
            '-subscribe',
            nargs='+',
            func=self._subscribe,
            help=
            'Subscribe channels and receive notification when channel goes live.\n'
            'ex: {} -subscribe kaydada'.format(self.CMD))
        cmd_group.add_argument('-unsubscribe',
                               nargs='+',
                               func=self._unsubscribe,
                               help='Unsubscribe channels.\n'
                               'ex: {} -unsubscribe kaydada'.format(self.CMD))
        cmd_group.add_argument(
            '-unsuball',
            action='store_true',
            func=self._unsub_all,
            help=
            "Unsubscribe all channels in Linot. I won't send any notification to you anymore."
        )
        cmd_group.add_argument('-listchannel',
                               action='store_true',
                               func=self._list_channel,
                               help="List channels you've subscribed.")
        cmd_group.add_argument(
            '-import',
            nargs=1,
            func=self._import,
            help='Import the following list of a twitch user.\n'
            'ex: {} -import kaydada'.format(self.CMD))

        # below, admin only
        cmd_group.add_argument('-refresh',
                               action='store_true',
                               func=self._refresh,
                               help=argparse.SUPPRESS)
        cmd_group.add_argument('-listusers',
                               nargs='*',
                               func=self._list_users,
                               help=argparse.SUPPRESS)
        cmd_group.add_direct_command(self._sub_by_url,
                                     'twitch\.tv/(\w+)[\s\t,]*', re.IGNORECASE)

    def _start(self):
        # Load subscribe list
        try:
            logger.debug('Loading subscribe list from file')
            self._sublist = pickle.load(open(self.SUB_FILE, 'rb'))
            self._calculate_channel_sub_count()
        except IOError:
            logger.debug('Subscribe list file not found, create empty.')
            self._sublist = defaultdict(list)
            self._channel_sub_count = defaultdict(int)
        self._check_thread = Checker(self.CHECK_PERIOD, self._twitch,
                                     self.get_sublist)
        self._check_thread.start()

    def _stop(self):
        self._check_thread.stop()

    def get_sublist(self):
        self._sublist_lock.acquire(True)
        local_sublist = copy.copy(self._sublist)
        self._sublist_lock.release()
        return local_sublist

    def _sub_by_url(self, match_iter, cmd, sender):
        logger.debug('sub by url: ' + str(match_iter))
        logger.debug('sub by url, direct cmd: ' + cmd)
        self._subscribe(match_iter, sender)

    def _calculate_channel_sub_count(self):
        self._channel_sub_count = defaultdict(int)
        for subr in self._sublist:
            for ch in self._sublist[subr]:
                self._channel_sub_count[ch] += 1

    def _import(self, twitch_user, sender):
        # get the following list of twitch_user and subscribe them for sender
        user = twitch_user[0]
        followed_channels = self._twitch.get_followed_channels(user)
        if followed_channels is None:
            sender.send_message('Twitch user: {} not found'.format(user))
        else:
            if len(followed_channels) > 8:
                sender.send_message(
                    'Number of followed channels is more than 8. It may take a while to process.'
                )
            self._subscribe(followed_channels, sender)

    def _unsub_all(self, value, sender):
        # unsubscribe all channels for sender
        # we can not send self._sublist directly, since unsub operates
        # self._sublist
        user_sub = copy.copy(self._sublist[sender])
        self._unsubscribe(user_sub, sender)

    def _subscribe(self, chs, sender):
        # Handles user request for subscribing channels
        # We actually let the LinotServant to follow these channels
        # so that we can check if they are online use streams/followed API

        # prompt a message to let user know i am still alive...
        sender.send_message('Processing ...')
        msg = io.BytesIO()

        not_found = []
        for ch in chs:
            check_name = ch.lower()
            # reduce api invocation
            if check_name in self._sublist[sender]:  # pragma: no cover
                continue
            ch_disp_name, stat = self._twitch.follow_channel(ch)
            if stat is False:
                not_found.append(ch)
            else:
                self._sublist_lock.acquire(True)
                self._sublist[sender].append(check_name)
                self._sublist_lock.release()
                self._channel_sub_count[check_name] += 1
                self._channel_name_cache.put(ch_disp_name.lower(),
                                             ch_disp_name)
                pickle.dump(self._sublist, open(self.SUB_FILE, 'wb+'))

        if len(not_found) > 0:
            print('Channel not found: ' + ' '.join(not_found), file=msg)
        print('Done', file=msg)
        sender.send_message(msg.getvalue())
        return

    def _unsubscribe(self, chs, sender):
        # prompt a message to let user know i am still alive...
        sender.send_message('Processing ...')
        msg = io.BytesIO()

        # Handles user request for unsubscribing channels
        not_found = []
        for ch in chs:
            check_name = ch.lower()
            self._sublist_lock.acquire(True)
            try:
                self._sublist[sender].remove(check_name)
            except ValueError:
                not_found.append(ch)
                self._sublist_lock.release()
                continue
            self._sublist_lock.release()
            self._channel_sub_count[check_name] -= 1
            if self._channel_sub_count[check_name] <= 0:
                # maybe we can try to not unfollow, so that we don't keep
                # generating follow message to the caster
                # self._twitch.unfollow_channel(ch)
                self._channel_sub_count.pop(check_name, None)

        if len(self._sublist[sender]) == 0:
            self._sublist_lock.acquire(True)
            self._sublist.pop(sender)
            self._sublist_lock.release()

        pickle.dump(self._sublist, open(self.SUB_FILE, 'wb+'))
        if len(not_found) > 0:
            print('Channel not found: ' + ' '.join(not_found), file=msg)
        print('Done', file=msg)
        sender.send_message(msg.getvalue())
        return

    def _list_channel(self, value, sender):
        msg = io.BytesIO()
        print('Your subscribed channels are:', file=msg)
        live_channels = self._check_thread.get_live_channels()
        for ch in self._sublist[sender]:
            if ch in [x.lower() for x in live_channels]:
                stat = '[LIVE]'
            else:
                stat = '[OFF]'
            display_name = self._channel_name_cache.get(ch)
            if display_name is None:
                display_name = self._twitch.get_channel_info(
                    ch)['display_name']
                self._channel_name_cache.put(ch, display_name)
            print('{}\t{}'.format(stat, display_name), file=msg)
        sender.send_message(msg.getvalue())

    def _refresh(self, value, sender):
        # <Admin only>
        if sender.code == config['interface'][
                sender.interface_name]['admin_id']:
            self._check_thread.refresh()
            sender.send_message('Done')

    def _list_users(self, check_users, sender):
        # List all user who has subscription
        # <Admin only>
        if sender.code != config['interface'][
                sender.interface_name]['admin_id']:
            return

        user_list = self._sublist.keys()
        msg = io.StringIO()
        if len(check_users) == 0:
            # if no check_user list is inputed, list all user with sub count
            for user_index, user in enumerate(user_list):
                print(u'#{}) {}'.format(user_index, unicode(user)), file=msg)
                print(u'Subscribed count: {}'.format(len(self._sublist[user])),
                      file=msg)
                print(u'----------------------------', file=msg)
        else:
            # list users sub channel list
            not_found = []
            for user_index in check_users:
                try:
                    index = int(user_index)
                    user = user_list[index]
                except (ValueError, IndexError):
                    not_found.append(user_index)
                    continue

                if user not in self._sublist:
                    not_found.append(user_index)
                    continue

                print(u'#{}) {}'.format(user_index, unicode(user)), file=msg)
                print(u'- Subscribed Channels: ', file=msg)
                for ch in self._sublist[user]:
                    print(unicode(ch), end=u', ', file=msg)
                print(u'', file=msg)
                print(u'- Total Count: {}'.format(len(self._sublist[user])),
                      file=msg)
                print(u'----------------------------', file=msg)

            if len(not_found) > 0:
                print(u'Not found: ', end=u'', file=msg)
                for na in not_found:
                    print(unicode(na), end=u', ', file=msg)
                print(u'', file=msg)

        print(u'Done', file=msg)  # make sure we are sending something to user
        sender.send_message(msg.getvalue())
        return
예제 #37
0
파일: keyfs.py 프로젝트: t-8ch/devpi
class Filesystem:
    def __init__(self, basedir, notify_on_commit):
        self.basedir = basedir
        self._notify_on_commit = notify_on_commit
        self._changelog_cache = LRUCache(1000)  # is thread safe
        with self.get_sqlconn() as conn:
            row = conn.execute("select max(serial) from changelog").fetchone()
            serial = row[0]
            if serial is None:
                self.next_serial = 0
            else:
                self.next_serial = serial + 1
                # perform some crash recovery
                data = self.get_raw_changelog_entry(serial)
                changes, rel_renames = loads(data)
                check_pending_renames(str(self.basedir), rel_renames)

    def write_transaction(self, sqlconn):
        return FSWriter(self, sqlconn)

    def get_raw_changelog_entry(self, serial):
        q = "SELECT data FROM changelog WHERE serial = ?"
        with self.get_sqlconn() as conn:
            conn.text_factory = bytes
            row = conn.execute(q, (serial,)).fetchone()
            if row is not None:
                return bytes(row[0])
            return None

    def get_changes(self, serial):
        changes = self._changelog_cache.get(serial)
        if changes is None:
            data = self.get_raw_changelog_entry(serial)
            changes, rel_renames = loads(data)
            self._changelog_cache.put(serial, changes)
        return changes

    def cache_commit_changes(self, serial, changes):
        self._changelog_cache.put(serial, changes)

    def get_sqlconn(self):
        path = self.basedir.join(".sqlite")
        if not path.exists():
            with sqlite3.connect(str(path)) as conn:
                threadlog.info("DB: Creating schema")
                c = conn.cursor()
                c.execute("""
                    CREATE TABLE kv (
                        key TEXT NOT NULL PRIMARY KEY,
                        keyname TEXT,
                        serial INTEGER
                    )
                """)
                c.execute("""
                    CREATE TABLE changelog (
                        serial INTEGER PRIMARY KEY,
                        data BLOB NOT NULL
                    )
                """)
        conn = sqlite3.connect(str(path), timeout=60)
        return conn

    def db_read_typedkey(self, relpath, conn=None):
        new_conn = conn is None
        if new_conn:
            conn = self.get_sqlconn()
        q = "SELECT keyname, serial FROM kv WHERE key = ?"
        try:
            c = conn.cursor()
            row = c.execute(q, (relpath,)).fetchone()
            if row is None:
                raise KeyError(relpath)
            return tuple(row[:2])
        finally:
            if new_conn:
                conn.close()
예제 #38
0
파일: _docker.py 프로젝트: AlexRRR/flocker
class DockerClient(object):
    """
    Talk to the real Docker server directly.

    Some operations can take a while (e.g. stopping a container), so we
    use a thread pool. See https://clusterhq.atlassian.net/browse/FLOC-718
    for using a custom thread pool.

    :ivar unicode namespace: A namespace prefix to add to container names
        so we don't clobber other applications interacting with Docker.
    :ivar str base_url: URL for connection to the Docker server.
    :ivar int long_timeout: Maximum time in seconds to wait for
        long-running operations, particularly pulling an image.
    :ivar LRUCache _image_cache: Mapped cache of image IDs to their data.
    """
    def __init__(
            self, namespace=BASE_NAMESPACE, base_url=None,
            long_timeout=600):
        self.namespace = namespace
        self._client = dockerpy_client(
            version="1.15", base_url=base_url,
            long_timeout=timedelta(seconds=long_timeout),
        )
        self._image_cache = LRUCache(100)

    def _to_container_name(self, unit_name):
        """
        Add the namespace to the container name.

        :param unicode unit_name: The unit's name.

        :return unicode: The container's name.
        """
        return self.namespace + unit_name

    def _parse_container_ports(self, data):
        """
        Parse the ports from a data structure representing the Ports
        configuration of a Docker container in the format returned by
        ``self._client.inspect_container`` and return a list containing
        ``PortMap`` instances mapped to the container and host exposed ports.

        :param dict data: The data structure for the representation of
            container and host port mappings in a single container.
            This takes the form of the ``NetworkSettings.Ports`` portion
            of a container's state and configuration as returned by inspecting
            the container. This is a dictionary mapping container ports to a
            list of host bindings, e.g.
            "3306/tcp": [{"HostIp": "0.0.0.0","HostPort": "53306"},
                         {"HostIp": "0.0.0.0","HostPort": "53307"}]

        :return list: A list that is either empty or contains ``PortMap``
            instances.
        """
        ports = []
        for internal, hostmap in data.items():
            internal_map = internal.split(u'/')
            internal_port = internal_map[0]
            internal_port = int(internal_port)
            if hostmap:
                for host in hostmap:
                    external_port = host[u"HostPort"]
                    external_port = int(external_port)
                    portmap = PortMap(internal_port=internal_port,
                                      external_port=external_port)
                    ports.append(portmap)
        return ports

    def _parse_restart_policy(self, data):
        """
        Parse the restart policy from the configuration of a Docker container
        in the format returned by ``self._client.inspect_container`` and return
        an ``IRestartPolicy``.

        :param dict data: The data structure representing the restart policy of
            a container, e.g.

            {"Name": "policy-name", "MaximumRetryCount": 0}

        :return IRestartPolicy: The model of the restart policy.

        :raises ValueError: if an unknown policy is passed.
        """
        POLICIES = {
            u"": lambda data:
                RestartNever(),
            u"always": lambda data:
                RestartAlways(),
            u"on-failure": lambda data:
                RestartOnFailure(
                    maximum_retry_count=data[u"MaximumRetryCount"] or None)
        }
        try:
            # docker will treat an unknown plolicy as "never".
            # We error out here, in case new policies are added.
            return POLICIES[data[u"Name"]](data)
        except KeyError:
            raise ValueError("Unknown restart policy: %r" % (data[u"Name"],))

    def _serialize_restart_policy(self, restart_policy):
        """
        Serialize the restart policy from an ``IRestartPolicy`` to the format
        expected by the docker API.

        :param IRestartPolicy restart_policy: The model of the restart policy.

        :returns: A dictionary suitable to pass to docker

        :raises ValueError: if an unknown policy is passed.
        """
        SERIALIZERS = {
            RestartNever: lambda policy:
                {u"Name": u""},
            RestartAlways: lambda policy:
                {u"Name": u"always"},
            RestartOnFailure: lambda policy:
                {u"Name": u"on-failure",
                 u"MaximumRetryCount": policy.maximum_retry_count or 0},
        }
        try:
            return SERIALIZERS[restart_policy.__class__](restart_policy)
        except KeyError:
            raise ValueError("Unknown restart policy: %r" % (restart_policy,))

    def _image_not_found(self, apierror):
        """
        Inspect a ``docker.errors.APIError`` to determine if it represents a
        failure to start a container because the container's image wasn't
        found.

        :return: ``True`` if this is the case, ``False`` if the error has
            another cause.
        :rtype: ``bool``
        """
        return apierror.response.status_code == NOT_FOUND

    def _address_in_use(self, apierror):
        """
        Inspect a ``docker.errors.APIError`` to determine if it represents a
        failure to start a container because the container is configured to use
        ports that are already in use on the system.

        :return: If this is the reason, an exception to raise describing the
            problem.  Otherwise, ``None``.
        """
        # Recognize an error (without newline) like:
        #
        # Cannot start container <name>: Error starting userland proxy:
        # listen tcp <ip>:<port>: bind: address already in use
        #
        # Or (without newline) like:
        #
        # Cannot start container <name>: Bind for <ip>:<port> failed:
        # port is already allocated
        #
        # because Docker can't make up its mind about which format to use.
        parts = apierror.explanation.split(b": ")
        if parts[-1] == b"address already in use":
            ip, port = parts[-3].split()[-1].split(b":")
        elif parts[-1] == b"port is already allocated":
            ip, port = parts[-2].split()[2].split(b":")
        else:
            return None
        return AddressInUse(address=(ip, int(port)), apierror=apierror)

    def _image_data(self, image):
        """
        Supply data about an image, by either inspecting it or returning
        cached data if available.

        :param unicode image: The ID of the image.

        :return: ``dict`` representing data about the image properties.
        """
        cached_image = self._image_cache.get(image)
        if cached_image is not None:
            LOG_CACHED_IMAGE(image=image).write()
            return cached_image
        try:
            image_data = self._client.inspect_image(image)
            Message.new(
                message_type="flocker:node:docker:image_inspected",
                image=image
            ).write()
        except APIError as e:
            if e.response.status_code == NOT_FOUND:
                # Image has been deleted, so just fill in some
                # stub data so we can return *something*. This
                # should happen only for stopped containers so
                # some inaccuracy is acceptable.
                # We won't cache stub data though.
                Message.new(
                    message_type="flocker:node:docker:image_not_found",
                    image=image
                ).write()
                image_data = {u"Config": {u"Env": [], u"Cmd": []}}
            else:
                raise
        cached_data = ImageDataCache(
            command=image_data[u"Config"][u"Cmd"],
            environment=image_data[u"Config"][u"Env"]
        )
        self._image_cache.put(image, cached_data)
        Message.new(
            message_type="flocker:node:docker:image_data_cached",
            image=image
        ).write()
        return cached_data

    def add(self, unit_name, image_name, ports=None, environment=None,
            volumes=(), mem_limit=None, cpu_shares=None,
            restart_policy=RestartNever(), command_line=None):
        container_name = self._to_container_name(unit_name)

        if environment is not None:
            environment = environment.to_dict()
        if ports is None:
            ports = []

        restart_policy_dict = self._serialize_restart_policy(restart_policy)

        def _create():
            binds = list(
                # The "Z" mode tells Docker to "relabel file objects" on the
                # volume.  This makes things work when SELinux is enabled, at
                # least in the default configuration on CentOS 7.  See
                # <https://docs.docker.com/reference/commandline/run/>, in the
                # `--volumes-from` section (or just search for SELinux).
                u"{}:{}:Z".format(
                    volume.node_path.path, volume.container_path.path
                )
                for volume in volumes
            )
            port_bindings = {
                p.internal_port: p.external_port
                for p in ports
            }
            host_config = self._client.create_host_config(
                binds=binds,
                port_bindings=port_bindings,
                restart_policy=restart_policy_dict,
            )
            # We're likely to get e.g. pvector, so make sure we're passing
            # in something JSON serializable:
            command_line_values = command_line
            if command_line_values is not None:
                command_line_values = list(command_line_values)

            self._client.create_container(
                name=container_name,
                image=image_name,
                command=command_line_values,
                environment=environment,
                ports=[p.internal_port for p in ports],
                mem_limit=mem_limit,
                cpu_shares=cpu_shares,
                host_config=host_config,
            )

        def _add():
            try:
                _create()
            except APIError as e:
                if self._image_not_found(e):
                    # Pull it and try again
                    self._client.pull(image_name)
                    _create()
                else:
                    # Unrecognized, just raise it.
                    raise

            # Just because we got a response doesn't mean Docker has
            # actually updated any internal state yet! So if e.g. we did a
            # start on this container Docker might well complain it knows
            # not the container of which we speak. To prevent this we poll
            # until it does exist.
            while True:
                try:
                    self._client.start(container_name)
                except NotFound:
                    sleep(0.01)
                else:
                    break

        d = deferToThread(_add)

        def _extract_error(failure):
            failure.trap(APIError)
            code = failure.value.response.status_code
            if code == 409:
                raise AlreadyExists(unit_name)

            in_use = self._address_in_use(failure.value)
            if in_use is not None:
                # We likely can't start the container because its
                # configuration conflicts with something else happening on
                # the system.  Reflect this failure condition in a more
                # easily recognized way.
                raise in_use

            return failure
        d.addErrback(_extract_error)
        return d

    def _blocking_exists(self, container_name):
        """
        Blocking API to check if container exists.

        :param unicode container_name: The name of the container whose
            existence we're checking.

        :return: ``True`` if unit exists, otherwise ``False``.
        """
        try:
            self._client.inspect_container(container_name)
            return True
        except APIError:
            return False

    def exists(self, unit_name):
        container_name = self._to_container_name(unit_name)
        return deferToThread(self._blocking_exists, container_name)

    def _stop_container(self, container_name):
        """Attempt to stop the given container.

        There is a race condition between a process dying and
        Docker noticing that fact:

        https://github.com/docker/docker/issues/5165#issuecomment-65753753

        If we get an error indicating that this race condition happened,
        return False. This means the caller should try again. If we *do*
        successfully stop the container, return True.

        :raise APIError: If the container failed to stop for some unknown
            reason.
        :return: True if we stopped the container, False otherwise.

        """
        try:
            with start_action(
                action_type='flocker:docker:container_stop',
                container=container_name
            ):
                self._client.stop(container_name)
        except APIError as e:
            if e.response.status_code == NOT_FOUND:
                # If the container doesn't exist, we swallow the error,
                # since this method is supposed to be idempotent.
                return True
            elif e.response.status_code == INTERNAL_SERVER_ERROR:
                # Docker returns this if the process had died, but
                # hasn't noticed it yet.
                return False
            else:
                raise
        return True

    def _remove_container(self, container_name):
        """
        Attempt to remove a container.

        Assumes the given container has already been stopped.

        :param unicode container_name: The fully-namespaced name of the
            container.
        :return: True if we removed the container, False otherwise.
        """
        try:
            # The ``docker.Client.stop`` method sometimes returns a
            # 404 error, even though the container exists.
            # See https://github.com/docker/docker/issues/13088
            # Wait until the container has actually stopped running
            # before attempting to remove it.  Otherwise we are
            # likely to see: 'docker.errors.APIError: 409 Client
            # Error: Conflict ("Conflict, You cannot remove a
            # running container. Stop the container before
            # attempting removal or use -f")'
            # This code should probably be removed once the above
            # issue has been resolved. See [FLOC-1850]
            self._client.wait(container_name)

            with start_action(
                action_type='flocker:docker:container_remove',
                container=container_name
            ):
                self._client.remove_container(container_name)
        except APIError as e:
            if e.response.status_code == NOT_FOUND:
                # If the container doesn't exist, we swallow the error,
                # since this method is supposed to be idempotent.
                return True
            elif e.response.status_code == INTERNAL_SERVER_ERROR:
                # Failure to remove container - see FLOC-3262 for an example.
                return False
            else:
                raise
        return True

    def remove(self, unit_name):
        container_name = self._to_container_name(unit_name)

        def _remove():
            # Previously, this looped forever and didn't pause between loops.
            # We've arbitrarily chosen a wait interval of 0.001 seconds and
            # 1000 retries (i.e. a second of polling). These values may need
            # tuning.
            poll_until(
                partial(self._stop_container, container_name),
                repeat(0.001, 1000))

            # Previously, the container remove was only tried once. Again,
            # these parameters may need tuning.
            poll_until(
                partial(self._remove_container, container_name),
                repeat(0.001, 1000))

        d = deferToThread(_remove)
        return d

    def list(self):
        def _list():
            result = set()
            ids = [d[u"Id"] for d in
                   self._client.containers(quiet=True, all=True)]
            for i in ids:

                try:
                    data = self._client.inspect_container(i)
                except APIError as e:
                    # The container ID returned by the list API call above, may
                    # have been removed in another thread.
                    if e.response.status_code == NOT_FOUND:
                        continue
                    else:
                        raise

                state = (u"active" if data[u"State"][u"Running"]
                         else u"inactive")
                name = data[u"Name"]
                # Since tags (e.g. "busybox") aren't stable, ensure we're
                # looking at the actual image by using the hash:
                image = data[u"Image"]
                image_tag = data[u"Config"][u"Image"]
                command = data[u"Config"][u"Cmd"]
                with start_action(
                    action_type=u"flocker:node:docker:inspect_image",
                    container=i,
                    running=data[u"State"][u"Running"]
                ):
                    image_data = self._image_data(image)
                if image_data.command == command:
                    command = None
                port_bindings = data[u"NetworkSettings"][u"Ports"]
                if port_bindings is not None:
                    ports = self._parse_container_ports(port_bindings)
                else:
                    ports = list()
                volumes = []
                binds = data[u"HostConfig"]['Binds']
                if binds is not None:
                    for bind_config in binds:
                        parts = bind_config.split(':', 2)
                        node_path, container_path = parts[:2]
                        volumes.append(
                            Volume(container_path=FilePath(container_path),
                                   node_path=FilePath(node_path))
                        )
                if name.startswith(u"/" + self.namespace):
                    name = name[1 + len(self.namespace):]
                else:
                    continue
                # Retrieve environment variables for this container,
                # disregarding any environment variables that are part
                # of the image, rather than supplied in the configuration.
                unit_environment = []
                container_environment = data[u"Config"][u"Env"]
                if image_data.environment is None:
                    image_environment = []
                else:
                    image_environment = image_data.environment
                if container_environment is not None:
                    for environment in container_environment:
                        if environment not in image_environment:
                            env_key, env_value = environment.split('=', 1)
                            unit_environment.append((env_key, env_value))
                unit_environment = (
                    Environment(variables=frozenset(unit_environment))
                    if unit_environment else None
                )
                # Our Unit model counts None as the value for cpu_shares and
                # mem_limit in containers without specified limits, however
                # Docker returns the values in these cases as zero, so we
                # manually convert.
                cpu_shares = data[u"Config"][u"CpuShares"]
                cpu_shares = None if cpu_shares == 0 else cpu_shares
                mem_limit = data[u"Config"][u"Memory"]
                mem_limit = None if mem_limit == 0 else mem_limit
                restart_policy = self._parse_restart_policy(
                    data[U"HostConfig"][u"RestartPolicy"])
                result.add(Unit(
                    name=name,
                    container_name=self._to_container_name(name),
                    activation_state=state,
                    container_image=image_tag,
                    ports=frozenset(ports),
                    volumes=frozenset(volumes),
                    environment=unit_environment,
                    mem_limit=mem_limit,
                    cpu_shares=cpu_shares,
                    restart_policy=restart_policy,
                    command_line=command)
                )
            return result
        return deferToThread(_list)
예제 #39
0
class TermEval(object):
    def __init__(self, blocklen=128, deg=1, *args, **kwargs):
        # block length in bits, term size.
        self.blocklen = blocklen

        # term degree
        self.deg = deg

        # evaluated base terms of deg=1
        self.base = []
        self.cur_tv_size = None
        self.cur_evals = None
        self.last_base_size = None

        # caches
        self.sim_norm_cache = LRUCache(64)

    def base_size(self):
        """
        Returns base size of the vector - same as size of the base
        :return:
        """
        return len(self.base[0])

    def new_buffer(self):
        """
        Returns the allocated bitarray - non-intiialized of the size of the base.
        :return:
        """
        return empty_bitarray(len(self.base[0]))

    def gen_term(self, indices, blocklen=None):
        """
        Generates a bit term mask from the indices created by term_generator().
        blocklen wide.
        :param indices: array of bit indices, e.g., [0,8] -> x0x8
        :param blocklen: length of the term bit representation. If none, default block len is used
        :return: bit representation of the term
        """
        if blocklen is None:
            blocklen = self.blocklen

        term = empty_bitarray(blocklen)
        for bitpos in indices:
            term[bitpos] = 1
        return term

    def mask_with_term(self, term, block):
        """
        Masks input with the term.
        block has to be a multiple of the term size. term is evaluated by sliding window of size=term.
        :param term: bit representation of the term
        :param block: bit representation of the input
        :return: bit representation of the result, size = size of block.
        """
        ln = len(block)
        lnt = len(term)
        res = empty_bitarray()
        for idx in range(0, ln, lnt):
            res.append(block[idx:idx + self.blocklen] & term)
        return res

    def eval_term_raw_single(self, term, block):
        """
        Evaluates term on the raw input - bit array. Uses [] operator to access bits in block.
        Returns a single number, evaluates polynomial on single block
        :param term: term in the index notation
        :param block: bitarray indexable by []
        :return:
        """
        cval = 1
        for idx in term:
            if block[idx] == 0:
                cval = 0
                break
        return cval

    def eval_term_raw(self, term, block):
        """
        Evaluates term on the bitarray input. Uses & on the whole term and the block slices.
        Block has to be a multiple of the term size. term is evaluated by sliding window of size=term.
        In result each bit represents a single term evaluation on the given sub-block
        :param term: bit representation of the term
        :param block: bit representation of the input, bit array of term evaluations. size = size of block / size of term.
        :return:
        """
        ln = len(block)
        lnt = len(term)
        ctr = 0
        res_size = ln // lnt
        res = empty_bitarray(res_size)
        for idx in range(0, ln, lnt):
            res[ctr] = ((block[idx:idx + self.blocklen] & term) == term)
            ctr += 1
        return res

    def eval_poly_raw_single(self, poly, block):
        """
        Evaluates polynomial on the raw input - bit array. Uses [] operator to access bits in block.
        Returns a single number, evaluates polynomial on single block
        :param poly: polynomial in the index notation
        :param block: bitarray indexable by []
        :return:
        """
        res = 0

        # for each term &&-operation
        for term in poly:
            cval = 1
            for idx in term:
                if block[idx] == 0:
                    cval = 0
                    break
            res ^= cval
        return res

    def hw(self, block):
        """
        Computes hamming weight of the block
        :param block: bit representation of the input
        :return:
        """
        if FAST_IMPL:
            return block.count()
        else:
            return block.count(True)

    def term_generator(self, deg=None):
        """
        Returns term generator for given deg (internal if none is given) and blocklen
        :return:
        """
        if deg is None:
            deg = self.deg

        return term_generator(deg, self.blocklen - 1)

    def load(self, block, **kwargs):
        """
        Precomputes data
        :param block:
        :return:
        """
        self.gen_base(block, **kwargs)

    def gen_base(self, block, eval_only_vars=None, **kwargs):
        """
        Generate base for term evaluation from the block.
        Evaluates each base term (deg=1) on the input, creates a base for further evaluation of high order terms.
        :param block: bit representation of the input
        :param eval_only_vars: if not None, evals only those variables mentioned
        :return:
        """
        if (len(block) % self.blocklen) != 0:
            raise ValueError('Input data not multiple of block length')

        self.cur_tv_size = len(block) / 8
        self.cur_evals = len(block) / self.blocklen

        ln = len(block)
        res_size = ln // self.blocklen

        if self.base is None or self.last_base_size != (self.blocklen,
                                                        res_size):
            self.base = [None] * self.blocklen

        for bitpos in range(0, self.blocklen):
            ctr = 0
            if bitpos != 0 and eval_only_vars is not None and bitpos not in eval_only_vars:
                continue

            if self.last_base_size != (self.blocklen, res_size):
                self.base[bitpos] = empty_bitarray(res_size)

            if FAST_IMPL_PH4:
                self.base[bitpos].eval_monic(block, bitpos, self.blocklen)

            else:
                # For verification purposes we have also another independent evaluation here.
                for idx in range(0, ln, self.blocklen):
                    self.base[bitpos][ctr] = block[idx + bitpos] == 1
                    ctr += 1
                assert ctr == res_size

            if not FAST_IMPL:
                self.base[bitpos] = Bits(self.base[bitpos])

        self.last_base_size = (self.blocklen, res_size)

    def num_terms(self, deg, include_all_below=False, exact=False):
        """
        Computes number of terms of given degree.
        :param deg:
        :param include_all_below: if true, all lower degree counts are summed
        :param exact: if true exact value is computed. Otherwise just approximation is given (larger).
        :return:
        """
        if deg == 1:
            return self.blocklen

        rng = range(1 if include_all_below else deg, deg + 1)
        if exact:
            return sum([(comb(self.blocklen, x, True)) for x in rng])
        else:
            return sum([(comb(self.blocklen, x, False) + 2) for x in rng])

    def eval_term(self, term, res=None):
        """
        Evaluates term on the block using the precomputed base.
        :param term: term represented as an array of bit positions
        :param res: bitarray buffer to put result to
        :return: bit representation of the result, each bit represents single term evaluation on the given sub-block
        """
        ln = len(term)
        idx_start = 1
        if res is None:
            res = to_bitarray(self.base[term[0]], const=False)
        else:
            if FAST_IMPL_PH4:
                res.fast_copy(self.base[term[0]])
            else:
                idx_start = 0
                res.setall(True)

        for i in range(idx_start, ln):
            res &= self.base[term[i]]
        return res

    def eval_terms(self, deg=None):
        """
        Evaluates all terms on the input data precomputed in the base.
        Returns array of hamming weights.
        :param deg: degree of the terms to generate. If none, default degree is taken.
        :return: array of hamming weights. idx = 0 -> HW for term with index 0 evaluated on input data.
        """
        if deg is None:
            deg = self.deg

        hws = [0] * self.num_terms(deg, False, exact=True)
        res = empty_bitarray(len(self.base[0]))

        if not FAST_IMPL_PH4:
            return self.eval_terms_raw_slow(deg, False, hws, res=res)

        ctr = 0
        for term in self.term_generator(deg):
            res.fast_copy(
                self.base[term[0]])  # Copy the first term to the result
            for i in range(1, deg - 1):  # And the remaining terms
                res &= self.base[term[i]]
            hws[ctr] = res.fast_hw_and(self.base[term[deg - 1]])
            ctr += 1
        assert ctr == len(hws)
        return hws

    def eval_all_terms(self, deg=None):
        """
        Evaluates all terms of deg [1, deg].

        Evaluation is done with a caching of the results from higher orders. e.g., when evaluating a+b+c,
        the c goes over all possible options, a+b result is cached until b is changed.
        The last order is evaluated in memory without actually storing an AND result anywhere.

        The lower orders are evaluated as a side product of caching - each new cache entry means a new combination
        of the lower order.

        Some lower orders evaluations are not included in the caching, e.g., for 128, 3 combination, the highest
        term in the ordering is [125, 126, 127] so with caching you cannot get [126, 127] evaluation.
        To fill missing gaps we have a term generator for each lower degree, it runs in parallel with the caching
        and if there are some missing terms we compute it mannualy - raw AND, without cache.

        :warning: Works only with fast ph4r05 implementation.
        :param deg:
        :return:
        """
        if deg is None:
            deg = self.deg

        hw = [None] * (deg + 1)
        hw[0] = []
        for idx in range(1, deg + 1):
            hw[idx] = [0] * self.num_terms(idx, False, exact=True)

        # center_hw = len(self.base[0]) * 2 ** (-1 * deg)
        # logger.info('Now the funny part! %s' % center_hw)
        # arr = bitarray.eval_all_terms(self.base, deg=deg, topk=128, hw_center=center_hw)
        # logger.info('Done')
        #logger.info('heap:   %s' % arr)
        #logger.info('sorted: %s' % sorted(arr, reverse=True))

        # deg1 is simple - just use HW on the basis
        hw[1] = [x.count() for x in self.base]
        if deg <= 1:
            return hw

        # deg2 is simple to compute without optimisations, if it is the top order we are interested in.
        if deg == 2:
            hw[2] = [0] * self.num_terms(2, False, exact=True)
            for idx, term in enumerate(self.term_generator(2)):
                hw[2][idx] = self.base[term[0]].fast_hw_and(self.base[term[1]])
            return hw

        # deg3 and more - optimisations in place.
        # temp buffer for computing missing evaluations
        res = self.new_buffer()

        # Sub evaluations of high orders.
        # Has deg-1 as it makes no sense to cache the last term - it is the result directly.
        # sub[0] = a      - deg1 result - basis reference
        # sub[1] = a+b    - deg2 result
        # sub[2] = a+b+c  - deg3 result
        sub = [self.new_buffer() for _ in range(0, deg - 1)]

        # Lower degree indices update here.
        subdg = [0] * deg

        # Lower degree generators for filling up missing pieces
        subgen = [self.term_generator(x) for x in range(1, deg + 1)]

        # Last term indices to detect changes in high orders
        # lst = [0,1,2,3,4,5] - for deg 6, triggers new caching if top 5 indices changes: [0,1,2,3,5,6]
        lst = [-1] * deg

        # Do the combination of the highest degree, cache when lower degree sub-combination changes.
        for idx, term in enumerate(self.term_generator(deg)):
            # Has high order cached element changed?
            # Make a diff term vs lst. If there is a diff, recompute cached sub-results.
            if term[:-1] != lst[:-1]:
                # Get the leftmost index in the term list where the change happened.
                # The 0 index is not considered as this change is not interesting - it is base[] anyway.
                # Thus domain of changed_from is 1 .. deg-2
                changed_from = deg - 2
                for chidx in range(0, deg - 1):
                    changed_from = chidx
                    if term[chidx] != lst[chidx]:
                        break

                # Recompute changed, from the more general to less. e.g., from a+b to a+b+c+d+e+f....
                for chidx in range(changed_from, deg - 1):
                    if chidx == 0:
                        sub[chidx] = self.base[term[0]]
                    else:  # recursive definition - use the previous result.
                        sub[chidx].fast_copy(sub[chidx - 1])
                        sub[chidx] &= self.base[term[chidx]]

                        # Run update generator up to this position to fill missing pieces
                        # Missing piece = [126,127] for deg = 3 cos max elem is [125,126,127]
                        for missing_piece in subgen[chidx]:
                            if missing_piece == term[0:1 + chidx]:
                                break
                            res.fast_copy(self.base[missing_piece[0]])
                            for subi in range(1, 1 + chidx):
                                res &= self.base[missing_piece[subi]]

                            hw[1 + chidx][subdg[chidx]] = res.count()
                            subdg[chidx] += 1
                            # logger.info('Fill in missing: %s, cur: %s' % (missing_piece, term[0: 1 + chidx]))

                        # Update lower degree HW
                        hw[1 + chidx][subdg[chidx]] = sub[chidx].count()
                        subdg[chidx] += 1

            # Evaluate the current expression using the cached results + fast hw.
            hw[deg][idx] = sub[deg - 2].fast_hw_and(self.base[term[deg - 1]])

            # copy the last term
            lst = term

        # Finish generators - add missing combinations not reached by the caching from the higher ones.
        # E.g. for (128, 3) the higher combination is [125, 126, 127] so the maximal cached deg 2
        # is [125, 126]. This finishes the sequence for [126, 127].
        for finish_deg in range(2, deg):
            for missing_piece in subgen[finish_deg - 1]:
                res.fast_copy(self.base[missing_piece[0]])
                for subi in range(1, finish_deg):
                    res &= self.base[missing_piece[subi]]

                hw[finish_deg][subdg[finish_deg - 1]] = res.count()
                subdg[finish_deg - 1] += 1
                # logger.info('Missing piece: %s' % missing_piece)

        return hw

    def eval_terms_raw_slow(self, deg, include_all_below, hws, res=None):
        """
        Subroutine for evaluating all terms, the slower one without our bitarray optimisations.
        Should be used only for testing.
        :param deg:
        :param include_all_below:
        :param hws: hamming weight accumulator
        :param res: working bitarray buffer
        :return:
        """
        if res is None:
            res = self.new_buffer()

        ctr = 0
        for cur_deg in range(1 if include_all_below else deg, deg + 1):
            for term in self.term_generator(deg):
                res.setall(True)
                for i in range(0, deg):
                    res &= self.base[term[i]]

                hw = self.hw(res)
                hws[ctr] = hw
                ctr += 1

        return hws

    def eval_poly(self, poly, res=None, subres=None):
        """
        Evaluates a polynomial on the input precomputed data
        :param poly: polynomial specified as [term, term, term], e.g. [[1,2], [3,4], [5,6]] == x1x2 + x3x4 + x5x6
        :param res: buffer to use to store the result (optimization purposes)
        :param subres: working buffer for temporary computations (optimization purposes)
        :return:
        """
        ln = len(poly)
        if res is None:
            res = self.new_buffer()
        if subres is None:
            subres = self.new_buffer()

        self.eval_term(poly[0], res=res)
        for i in range(1, ln):
            res ^= self.eval_term(poly[i], res=subres)
        return res

    def expp_term_deg(self, deg):
        """
        Returns expected probability of result=1 of a term with given degree under null hypothesis of uniformity.
        :param deg:
        :return:
        """
        return math.pow(2, -1 * deg)

    def expp_term(self, term):
        """
        Computes expected probability of result=1 of the given term under null hypothesis of uniformity.
        O(1) time, O(n lg n) w.r.t. term length (distinct bit positions).
        :param term:
        :return:
        """
        dislen = len(set(term))
        return math.pow(2, -1 * dislen)

    def expp_xor_indep(self, p1, p2):
        """
        Probability of term t1 XOR t2 being 1 if t1 is 1 with p1 and t2 is 1 with p2.
        t1 and t2 has to be independent (no common sub-term).
        Due to associativity can be computed on multiple terms: t1 ^ t2 ^ t3 ^ t4 = (((t1 ^ t2) ^ t3) ^ t4) - zipping.

        XOR:
          a b | r
          ----+---
          1 1 | 0
          1 0 | 1  = p1    * (1-p2)
          0 1 | 1  = (1-p1)* p2
          0 0 | 0

        :param p1:
        :param p2:
        :return:
        """
        return p1 * (1 - p2) + (1 - p1) * p2

    def term_remap(self, term):
        """
        Remaps the term to the lower indices - for simulation.
        remapping may lead to degree reduction, e.g., x1x2x3x3 -> x0x1x2
        :param term:
        :return:
        """
        return range(0, len(set(term)))

    def poly_remap(self, poly):
        """
        Remaps the polynomial to lower indices
        e.g., x7x8x9 + x110x112 -> x0x1x2 + x3x4
        :param poly:
        :return: new normalized polynomial, var idx -> new var idx map
        """
        # mapping phase
        idx = 0
        idx_map_rev = {}  # orig idx -> new idx

        res_poly = []
        for term in poly:
            res_term = []
            for bitpos in term:
                if bitpos not in idx_map_rev:
                    idx_map_rev[bitpos] = idx
                    res_term.append(idx)
                    idx += 1
                else:
                    res_term.append(idx_map_rev[bitpos])
            if len(res_term) > 0:
                res_poly.append(sorted(res_term))

        return res_poly, idx_map_rev

    def poly_fix_var(self, poly, neg, idx, val):
        """
        Reduces normed polynomial = fixes variable with bitpos=idx to val.

        Neg represents constant 1 as it has no representation in this polynomial form.
        Empty terms [] evaluates to 0 by definition. To be able to express term after reduction
        like p = 1 + 1 + 1 + x1x2 the neg is a constant part XORed to the result of the polynomial evaluation.
        In this case neg = 1 + 1 + 1 = 1. for x1=1 and x2=1 the p evaluates to neg + 1 = 1 + 1 = 0.
        For x1=1 and x2=0 the p evaluates to neg + 0 = 1 + 0 = 1.
        Neg is some kind of carry value.

        Method is used in the recursive polynomial evaluation with branch pruning.

        :param poly: polynomial representation
        :param neg: carry value for the XOR constant term
        :param idx: variable idx to fix
        :param val: value to fix variable to
        :return: (poly, neg)
        """
        res_poly = []
        for term in poly:
            # variable not in term - add to the polynomial unmodified
            if idx not in term:
                res_poly.append(term)
                continue

            # idx is in the term. remove
            if val == 0:
                # val is 0 -> whole term is zero, do not add.
                continue

            # val is 1 -> remove from term as it is constant now.
            n_term = [x for x in term if x != idx]

            if len(n_term) == 0:
                # term is empty -> is 1, xor with neg
                neg ^= 1

            else:
                # term is non-empty (at least one variable), add to poly
                res_poly.append(n_term)

        return res_poly, neg

    def expnum_poly_sim(self, poly):
        """
        Simulates the given polynomial w.r.t. null hypothesis, for all variable values combinations.
        O(2^n)
        :param poly:
        :return: number of polynomial evaluations to 1
        """
        npoly, idx_map_rev = self.poly_remap(poly)
        return self.expnum_poly_sim_norm_cached(poly, len(idx_map_rev))

    def expnum_poly_sim_norm_cached(self, poly, deg):
        """
        Computes how many times the given polynomial evaluates to 1 for all variable combinations.
        :param poly:
        :param deg:
        :return: number of polynomial evaluations to 1
        """
        if self.sim_norm_cache is None:
            return self.expnum_poly_sim_norm(poly, deg)

        # LRU cached sim variant
        key = ','.join(['-'.join([str(y) for y in x]) for x in poly])
        val = self.sim_norm_cache.get(key)
        if val is not None:
            return val

        val = self.expnum_poly_sim_norm(poly, deg)
        self.sim_norm_cache.put(key, val)
        return val

    def expnum_poly_sim_norm(self, poly, deg):
        """
        Computes how many times the given polynomial evaluates to 1 for all variable combinations.
        :param poly:
        :param deg:
        :return: number of polynomial evaluations to 1
        """
        # current evaluation is simple - iterate over all possible values of variables.
        num_one = 0

        gen = pos_generator(dim=deg, maxelem=1)
        for val in gen:
            # val is [x0,x1,x2,x3,x4,...] current variable value vector.
            # res = current polynomial evaluation
            res = 0

            # for each term &&-operation
            for term in poly:
                cval = 1
                for idx in term:
                    if val[idx] == 0:
                        cval = 0
                        break
                res ^= cval

            if res > 0:
                num_one += 1

        return num_one

    def expp_poly_dep(self, poly, neg=0):
        """
        Computes expected probability of result=1 of the given polynomial under null hypothesis of uniformity.
        It is assumed each term in the polynomial shares at least one variable with a different term
        in the polynomial so it cannot be easily optimised.
        :param poly:
        :param neg: internal, for recursion
        :return: probability of polynomial evaluating to 1 over all possibilities of variables
        """

        # at first - degenerate cases
        ln = len(poly)
        if ln == 1:
            # only one term - evaluate independently
            return self.expp_term(poly[0])

        # More than 1 term. Remap term for evaluation & degree detection.
        npoly, idx_map_rev = self.poly_remap(poly)
        deg = len(idx_map_rev)

        # if degree is small do the all combinations algorithm
        ones = self.expnum_poly_sim_norm_cached(npoly, deg)
        return float(ones) / float(2**deg)

        # for long degree or long polynomial we can do this:
        # a) Isolate independent variables, substitute them with a single one:
        #  x1x2x3x7x8 + x2x3x4x9x10x11 + x1x4x12x23 can be simplified to
        #  x1x2x3A    + x2x3x4B        + x1x4C - we don't need to iterate over independent variables
        #  here (e.g., x7x8x9x10,...). New variables A, B, C aggregate independent variables in the
        #  original equation. We evaluate A,B,C only one, A is 1 with prob 1/4, B 1/8, C 1/4. This is considered
        #  during the evaluation.
        #
        # b) recursive evaluation with branch pruning.
        # recursively do:
        #  - Is polynomial a const? Return.
        #  - Is polynomial of 1 term only? Return.
        #  - 1. fix the fist variable x1=0, use poly_fix_var, (some terms drop out), evaluate recursively.
        #  - 2. fix the fist variable x1=1, use poly_fix_var, evaluate recursively.
        #  - result = 0.5 * fix0 + 0.5 * fix1
        #  The pruning on the 0 branches can potentially save a lot of evaluations.

    def expp_poly(self, poly):
        """
        Computes expected probability of result=1 of the given polynomial under null hypothesis of uniformity.
        Due to non-independence between terms this evaluation can take some time - simulating.
        Independent terms are simulated, i.e. all variable combinations are computed.
        :param poly:
        :return: probability of polynomial evaluating to 1 over all possibilities of variables
        """
        # Optimization: find independent terms, move aside, can be evaluated without simulation.
        # independent terms are XORed with expp_xor_indep()
        #
        # Non-independent terms need to be evaluated together. Naive method is trying all combinations of involved
        # variables and compute number of 1s.
        #
        # Optimization can be done if this is performed recursively: let x1=0, (and x1=1 (two rec. branches))
        # then reduce polynomial and apply recursively. Often a lot of branches can be pruned from the computation as
        # it leads to 0 due to ANDs. For non-zero branches, number of 1 evaluations need to be computed.
        # same principle can be used for this.
        #
        # Find independent terms.
        ln = len(poly)

        # degenerate case = 1 term
        if ln == 1:
            return self.expp_term(poly[0])
        terms = [set(x) for x in poly]

        # degenerate case = 2 terms
        if ln == 2:
            if len(terms[0] & terms[1]) == 0:
                return self.expp_xor_indep(self.expp_term(poly[0]),
                                           self.expp_term(poly[1]))
            else:
                return self.expp_poly_dep(poly)

        # General case:
        #   Finding independent terms = create a graph from the terms in the polynomial.
        #   There is a connection if t1 and t2 share at least one element.
        #   Find connected components of the graph. Union-find (disjoint sets) data structure is helping with it.
        uf = ufh.UnionFind()
        for idx, term in enumerate(terms):
            uf.make_set(idx)
        for idx, term in enumerate(terms):
            for idx2 in range(idx + 1, ln):
                if len(term & terms[idx2]) > 0:
                    uf.union(idx, idx2)
                pass
            pass
        pass

        # clusterize terms related to each other.
        # [[t1,t2,t3], [t4], [t5]]
        clusters = [[poly[y] for y in x] for x in uf.get_set_map().values()]

        # Each cluster can be evaluated independently and XORed with the rest.
        probs = [self.expp_poly_dep(x) for x in clusters]

        # reduce the prob list with independent term-xor formula..
        res = reduce(lambda x, y: self.expp_xor_indep(x, y), probs)
        return res
예제 #40
0
파일: mapper.py 프로젝트: ranosch/routes
class Mapper(SubMapperParent):
    """Mapper handles URL generation and URL recognition in a web
    application.
    
    Mapper is built handling dictionary's. It is assumed that the web
    application will handle the dictionary returned by URL recognition
    to dispatch appropriately.
    
    URL generation is done by passing keyword parameters into the
    generate function, a URL is then returned.
    
    """
    def __init__(self, controller_scan=controller_scan, directory=None, 
                 always_scan=False, register=True, explicit=True):
        """Create a new Mapper instance
        
        All keyword arguments are optional.
        
        ``controller_scan``
            Function reference that will be used to return a list of
            valid controllers used during URL matching. If
            ``directory`` keyword arg is present, it will be passed
            into the function during its call. This option defaults to
            a function that will scan a directory for controllers.
            
            Alternatively, a list of controllers or None can be passed
            in which are assumed to be the definitive list of
            controller names valid when matching 'controller'.
        
        ``directory``
            Passed into controller_scan for the directory to scan. It
            should be an absolute path if using the default 
            ``controller_scan`` function.
        
        ``always_scan``
            Whether or not the ``controller_scan`` function should be
            run during every URL match. This is typically a good idea
            during development so the server won't need to be restarted
            anytime a controller is added.
        
        ``register``
            Boolean used to determine if the Mapper should use 
            ``request_config`` to register itself as the mapper. Since
            it's done on a thread-local basis, this is typically best
            used during testing though it won't hurt in other cases.
        
        ``explicit``
            Boolean used to determine if routes should be connected
            with implicit defaults of::
                
                {'controller':'content','action':'index','id':None}
            
            When set to True, these defaults will not be added to route
            connections and ``url_for`` will not use Route memory.
                
        Additional attributes that may be set after mapper
        initialization (ie, map.ATTRIBUTE = 'something'):
        
        ``encoding``
            Used to indicate alternative encoding/decoding systems to
            use with both incoming URL's, and during Route generation
            when passed a Unicode string. Defaults to 'utf-8'.
        
        ``decode_errors``
            How to handle errors in the encoding, generally ignoring
            any chars that don't convert should be sufficient. Defaults
            to 'ignore'.
        
        ``minimization``
            Boolean used to indicate whether or not Routes should
            minimize URL's and the generated URL's, or require every
            part where it appears in the path. Defaults to True.
        
        ``hardcode_names``
            Whether or not Named Routes result in the default options
            for the route being used *or* if they actually force url
            generation to use the route. Defaults to False.
        
        """
        self.matchlist = []
        self.maxkeys = {}
        self.minkeys = {}
        self.urlcache = LRUCache(1600)
        self._created_regs = False
        self._created_gens = False
        self._master_regexp = None
        self.prefix = None
        self.req_data = threading.local()
        self.directory = directory
        self.always_scan = always_scan
        self.controller_scan = controller_scan
        self._regprefix = None
        self._routenames = {}
        self.debug = False
        self.append_slash = False
        self.sub_domains = False
        self.sub_domains_ignore = []
        self.domain_match = '[^\.\/]+?\.[^\.\/]+'
        self.explicit = explicit
        self.encoding = 'utf-8'
        self.decode_errors = 'ignore'
        self.hardcode_names = True
        self.minimization = False
        self.create_regs_lock = threading.Lock()
        if register:
            config = request_config()
            config.mapper = self
    
    def __str__(self):
        """Generates a tabular string representation."""
        def format_methods(r):
            if r.conditions:
                method = r.conditions.get('method', '')
                return type(method) is str and method or ', '.join(method)
            else:
                return ''

        table = [('Route name', 'Methods', 'Path')] + \
                [(r.name or '', format_methods(r), r.routepath or '')
                 for r in self.matchlist]
            
        widths = [max(len(row[col]) for row in table)
                  for col in range(len(table[0]))]
        
        return '\n'.join(
            ' '.join(row[col].ljust(widths[col])
                     for col in range(len(widths)))
            for row in table)

    def _envget(self):
        try:
            return self.req_data.environ
        except AttributeError:
            return None
    def _envset(self, env):
        self.req_data.environ = env
    def _envdel(self):
        del self.req_data.environ
    environ = property(_envget, _envset, _envdel)
    
    def extend(self, routes, path_prefix=''):
        """Extends the mapper routes with a list of Route objects
        
        If a path_prefix is provided, all the routes will have their
        path prepended with the path_prefix.
        
        Example::
            
            >>> map = Mapper(controller_scan=None)
            >>> map.connect('home', '/', controller='home', action='splash')
            >>> map.matchlist[0].name == 'home'
            True
            >>> routes = [Route('index', '/index.htm', controller='home',
            ...                 action='index')]
            >>> map.extend(routes)
            >>> len(map.matchlist) == 2
            True
            >>> map.extend(routes, path_prefix='/subapp')
            >>> len(map.matchlist) == 3
            True
            >>> map.matchlist[2].routepath == '/subapp/index.htm'
            True
        
        .. note::
            
            This function does not merely extend the mapper with the
            given list of routes, it actually creates new routes with
            identical calling arguments.
        
        """
        for route in routes:
            if path_prefix and route.minimization:
                routepath = '/'.join([path_prefix, route.routepath])
            elif path_prefix:
                routepath = path_prefix + route.routepath
            else:
                routepath = route.routepath
            self.connect(route.name, routepath, **route._kargs)
                
    def connect(self, *args, **kargs):
        """Create and connect a new Route to the Mapper.
        
        Usage:
        
        .. code-block:: python
        
            m = Mapper()
            m.connect(':controller/:action/:id')
            m.connect('date/:year/:month/:day', controller="blog", action="view")
            m.connect('archives/:page', controller="blog", action="by_page",
            requirements = { 'page':'\d{1,2}' })
            m.connect('category_list', 'archives/category/:section', controller='blog', action='category',
            section='home', type='list')
            m.connect('home', '', controller='blog', action='view', section='home')
        
        """
        routename = None
        if len(args) > 1:
            routename = args[0]
        else:
            args = (None,) + args
        if '_explicit' not in kargs:
            kargs['_explicit'] = self.explicit
        if '_minimize' not in kargs:
            kargs['_minimize'] = self.minimization
        route = Route(*args, **kargs)
                
        # Apply encoding and errors if its not the defaults and the route 
        # didn't have one passed in.
        if (self.encoding != 'utf-8' or self.decode_errors != 'ignore') and \
           '_encoding' not in kargs:
            route.encoding = self.encoding
            route.decode_errors = self.decode_errors
        
        if not route.static:
            self.matchlist.append(route)
        
        if routename:
            self._routenames[routename] = route
            route.name = routename
        if route.static:
            return
        exists = False
        for key in self.maxkeys:
            if key == route.maxkeys:
                self.maxkeys[key].append(route)
                exists = True
                break
        if not exists:
            self.maxkeys[route.maxkeys] = [route]
        self._created_gens = False
    
    def _create_gens(self):
        """Create the generation hashes for route lookups"""
        # Use keys temporailly to assemble the list to avoid excessive
        # list iteration testing with "in"
        controllerlist = {}
        actionlist = {}
        
        # Assemble all the hardcoded/defaulted actions/controllers used
        for route in self.matchlist:
            if route.static:
                continue
            if route.defaults.has_key('controller'):
                controllerlist[route.defaults['controller']] = True
            if route.defaults.has_key('action'):
                actionlist[route.defaults['action']] = True
        
        # Setup the lists of all controllers/actions we'll add each route
        # to. We include the '*' in the case that a generate contains a
        # controller/action that has no hardcodes
        controllerlist = controllerlist.keys() + ['*']
        actionlist = actionlist.keys() + ['*']
        
        # Go through our list again, assemble the controllers/actions we'll
        # add each route to. If its hardcoded, we only add it to that dict key.
        # Otherwise we add it to every hardcode since it can be changed.
        gendict = {} # Our generated two-deep hash
        for route in self.matchlist:
            if route.static:
                continue
            clist = controllerlist
            alist = actionlist
            if 'controller' in route.hardcoded:
                clist = [route.defaults['controller']]
            if 'action' in route.hardcoded:
                alist = [unicode(route.defaults['action'])]
            for controller in clist:
                for action in alist:
                    actiondict = gendict.setdefault(controller, {})
                    actiondict.setdefault(action, ([], {}))[0].append(route)
        self._gendict = gendict
        self._created_gens = True

    def create_regs(self, *args, **kwargs):
        """Atomically creates regular expressions for all connected
        routes
        """
        self.create_regs_lock.acquire()
        try:
            self._create_regs(*args, **kwargs)
        finally:
            self.create_regs_lock.release()
    
    def _create_regs(self, clist=None):
        """Creates regular expressions for all connected routes"""
        if clist is None:
            if self.directory:
                clist = self.controller_scan(self.directory)
            elif callable(self.controller_scan):
                clist = self.controller_scan()
            elif not self.controller_scan:
                clist = []
            else:
                clist = self.controller_scan
        
        for key, val in self.maxkeys.iteritems():
            for route in val:
                route.makeregexp(clist)
        
        regexps = []
        routematches = []
        for route in self.matchlist:
            if not route.static:
                routematches.append(route)
                regexps.append(route.makeregexp(clist, include_names=False))
        self._routematches = routematches
        
        # Create our regexp to strip the prefix
        if self.prefix:
            self._regprefix = re.compile(self.prefix + '(.*)')
        
        # Save the master regexp
        regexp = '|'.join(['(?:%s)' % x for x in regexps])
        self._master_reg = regexp
        self._master_regexp = re.compile(regexp)
        self._created_regs = True
    
    def _match(self, url, environ):
        """Internal Route matcher
        
        Matches a URL against a route, and returns a tuple of the match
        dict and the route object if a match is successfull, otherwise
        it returns empty.
        
        For internal use only.
        
        """
        if not self._created_regs and self.controller_scan:
            self.create_regs()
        elif not self._created_regs:
            raise RoutesException("You must generate the regular expressions"
                                 " before matching.")
        
        if self.always_scan:
            self.create_regs()
        
        matchlog = []
        if self.prefix:
            if re.match(self._regprefix, url):
                url = re.sub(self._regprefix, r'\1', url)
                if not url:
                    url = '/'
            else:
                return (None, None, matchlog)
                
        environ = environ or self.environ
        sub_domains = self.sub_domains
        sub_domains_ignore = self.sub_domains_ignore
        domain_match = self.domain_match
        debug = self.debug
        
        # Check to see if its a valid url against the main regexp
        # Done for faster invalid URL elimination
        valid_url = re.match(self._master_regexp, url)
        if not valid_url:
            return (None, None, matchlog)
        
        for route in self.matchlist:
            if route.static:
                if debug:
                    matchlog.append(dict(route=route, static=True))
                continue
            match = route.match(url, environ, sub_domains, sub_domains_ignore,
                                domain_match)
            if debug:
                matchlog.append(dict(route=route, regexp=bool(match)))
            if isinstance(match, dict) or match:
                return (match, route, matchlog)
        return (None, None, matchlog)
    
    def match(self, url=None, environ=None):
        """Match a URL against against one of the routes contained.
        
        Will return None if no valid match is found.
        
        .. code-block:: python
            
            resultdict = m.match('/joe/sixpack')
        
        """
        if not url and not environ:
            raise RoutesException('URL or environ must be provided')
        
        if not url:
            url = environ['PATH_INFO']
                
        result = self._match(url, environ)
        if self.debug:
            return result[0], result[1], result[2]
        if isinstance(result[0], dict) or result[0]:
            return result[0]
        return None
    
    def routematch(self, url=None, environ=None):
        """Match a URL against against one of the routes contained.
        
        Will return None if no valid match is found, otherwise a
        result dict and a route object is returned.
        
        .. code-block:: python
        
            resultdict, route_obj = m.match('/joe/sixpack')
        
        """
        if not url and not environ:
            raise RoutesException('URL or environ must be provided')
        
        if not url:
            url = environ['PATH_INFO']
        result = self._match(url, environ)
        if self.debug:
            return result[0], result[1], result[2]
        if isinstance(result[0], dict) or result[0]:
            return result[0], result[1]
        return None
    
    def generate(self, *args, **kargs):
        """Generate a route from a set of keywords
        
        Returns the url text, or None if no URL could be generated.
        
        .. code-block:: python
            
            m.generate(controller='content',action='view',id=10)
        
        """
        # Generate ourself if we haven't already
        if not self._created_gens:
            self._create_gens()
        
        if self.append_slash:
            kargs['_append_slash'] = True
        
        if not self.explicit:
            if 'controller' not in kargs:
                kargs['controller'] = 'content'
            if 'action' not in kargs:
                kargs['action'] = 'index'
        
        environ = kargs.pop('_environ', self.environ)
        controller = kargs.get('controller', None)
        action = kargs.get('action', None)

        # Cache the URL keyed by SCRIPT_NAME and kargs

        cache_key = unicode(args).encode('utf8') + \
            unicode(kargs).encode('utf8')

        if self.environ:
            cache_key = '{0}:{1}'.format(self.environ.get('SCRIPT_NAME', '@&?NO_SCRIPT_NAME?&@'), cache_key)
        else:
            cache_key = '@&?NO_ENVIRON?&@:' + cache_key
        
        if self.urlcache is not None:
            # Check the url cache to see if it exists, use it if it does
            val = self.urlcache.get(cache_key)
            if val is not None:
                return val
        
        actionlist = self._gendict.get(controller) or self._gendict.get('*', {})
        if not actionlist and not args:
            return None
        (keylist, sortcache) = actionlist.get(action) or \
                               actionlist.get('*', (None, {}))
        if not keylist and not args:
            return None

        keys = frozenset(kargs.keys())
        cacheset = False
        cachekey = unicode(keys)
        cachelist = sortcache.get(cachekey)
        if args:
            keylist = args
        elif cachelist:
            keylist = cachelist
        else:
            cacheset = True
            newlist = []
            for route in keylist:
                if len(route.minkeys - route.dotkeys - keys) == 0:
                    newlist.append(route)
            keylist = newlist
            
            def keysort(a, b):
                """Sorts two sets of sets, to order them ideally for
                matching."""
                am = a.minkeys
                a = a.maxkeys
                b = b.maxkeys
                
                lendiffa = len(keys^a)
                lendiffb = len(keys^b)
                # If they both match, don't switch them
                if lendiffa == 0 and lendiffb == 0:
                    return 0
                
                # First, if a matches exactly, use it
                if lendiffa == 0:
                    return -1
                
                # Or b matches exactly, use it
                if lendiffb == 0:
                    return 1
                
                # Neither matches exactly, return the one with the most in 
                # common
                if cmp(lendiffa, lendiffb) != 0:
                    return cmp(lendiffa, lendiffb)
                
                # Neither matches exactly, but if they both have just as much 
                # in common
                if len(keys&b) == len(keys&a):
                    # Then we return the shortest of the two
                    return cmp(len(a), len(b))
                
                # Otherwise, we return the one that has the most in common
                else:
                    return cmp(len(keys&b), len(keys&a))
            
            keylist.sort(keysort)
            if cacheset:
                sortcache[cachekey] = keylist
                
        # Iterate through the keylist of sorted routes (or a single route if
        # it was passed in explicitly for hardcoded named routes)
        for route in keylist:
            fail = False
            for key in route.hardcoded:
                kval = kargs.get(key)
                if not kval:
                    continue
                if isinstance(kval, str):
                    kval = kval.decode(self.encoding)
                else:
                    kval = unicode(kval)
                if kval != route.defaults[key] and not callable(route.defaults[key]):
                    fail = True
                    break
            if fail:
                continue
            path = route.generate(**kargs)
            if path:
                if self.prefix:
                    path = self.prefix + path

                external_static = route.static and route.external
                if environ and environ.get('SCRIPT_NAME', '') != ''\
                        and not route.absolute and not external_static:
                    path = environ['SCRIPT_NAME'] + path

                if self.urlcache is not None:
                    self.urlcache.put(cache_key, str(path))
                return str(path)
            else:
                continue
        return None
    
    def resource(self, member_name, collection_name, **kwargs):
        """Generate routes for a controller resource
        
        The member_name name should be the appropriate singular version
        of the resource given your locale and used with members of the
        collection. The collection_name name will be used to refer to
        the resource collection methods and should be a plural version
        of the member_name argument. By default, the member_name name
        will also be assumed to map to a controller you create.
        
        The concept of a web resource maps somewhat directly to 'CRUD' 
        operations. The overlying things to keep in mind is that
        mapping a resource is about handling creating, viewing, and
        editing that resource.
        
        All keyword arguments are optional.
        
        ``controller``
            If specified in the keyword args, the controller will be
            the actual controller used, but the rest of the naming
            conventions used for the route names and URL paths are
            unchanged.
        
        ``collection``
            Additional action mappings used to manipulate/view the
            entire set of resources provided by the controller.
            
            Example::
                
                map.resource('message', 'messages', collection={'rss':'GET'})
                # GET /message/rss (maps to the rss action)
                # also adds named route "rss_message"
        
        ``member``
            Additional action mappings used to access an individual
            'member' of this controllers resources.
            
            Example::
                
                map.resource('message', 'messages', member={'mark':'POST'})
                # POST /message/1/mark (maps to the mark action)
                # also adds named route "mark_message"
        
        ``new``
            Action mappings that involve dealing with a new member in
            the controller resources.
            
            Example::
                
                map.resource('message', 'messages', new={'preview':'POST'})
                # POST /message/new/preview (maps to the preview action)
                # also adds a url named "preview_new_message"
        
        ``path_prefix``
            Prepends the URL path for the Route with the path_prefix
            given. This is most useful for cases where you want to mix
            resources or relations between resources.
        
        ``name_prefix``
            Perpends the route names that are generated with the
            name_prefix given. Combined with the path_prefix option,
            it's easy to generate route names and paths that represent
            resources that are in relations.
            
            Example::
                
                map.resource('message', 'messages', controller='categories', 
                    path_prefix='/category/:category_id', 
                    name_prefix="category_")
                # GET /category/7/message/1
                # has named route "category_message"
                
        ``parent_resource`` 
            A ``dict`` containing information about the parent
            resource, for creating a nested resource. It should contain
            the ``member_name`` and ``collection_name`` of the parent
            resource. This ``dict`` will 
            be available via the associated ``Route`` object which can
            be accessed during a request via
            ``request.environ['routes.route']``
 
            If ``parent_resource`` is supplied and ``path_prefix``
            isn't, ``path_prefix`` will be generated from
            ``parent_resource`` as
            "<parent collection name>/:<parent member name>_id". 

            If ``parent_resource`` is supplied and ``name_prefix``
            isn't, ``name_prefix`` will be generated from
            ``parent_resource`` as  "<parent member name>_". 
 
            Example:: 
 
                >>> from routes.util import url_for 
                >>> m = Mapper() 
                >>> m.resource('location', 'locations', 
                ...            parent_resource=dict(member_name='region', 
                ...                                 collection_name='regions'))
                >>> # path_prefix is "regions/:region_id" 
                >>> # name prefix is "region_"  
                >>> url_for('region_locations', region_id=13) 
                '/regions/13/locations'
                >>> url_for('region_new_location', region_id=13) 
                '/regions/13/locations/new'
                >>> url_for('region_location', region_id=13, id=60) 
                '/regions/13/locations/60'
                >>> url_for('region_edit_location', region_id=13, id=60) 
                '/regions/13/locations/60/edit'

            Overriding generated ``path_prefix``::

                >>> m = Mapper()
                >>> m.resource('location', 'locations',
                ...            parent_resource=dict(member_name='region',
                ...                                 collection_name='regions'),
                ...            path_prefix='areas/:area_id')
                >>> # name prefix is "region_"
                >>> url_for('region_locations', area_id=51)
                '/areas/51/locations'

            Overriding generated ``name_prefix``::

                >>> m = Mapper()
                >>> m.resource('location', 'locations',
                ...            parent_resource=dict(member_name='region',
                ...                                 collection_name='regions'),
                ...            name_prefix='')
                >>> # path_prefix is "regions/:region_id" 
                >>> url_for('locations', region_id=51)
                '/regions/51/locations'

        """
        collection = kwargs.pop('collection', {})
        member = kwargs.pop('member', {})
        new = kwargs.pop('new', {})
        path_prefix = kwargs.pop('path_prefix', None)
        name_prefix = kwargs.pop('name_prefix', None)
        parent_resource = kwargs.pop('parent_resource', None)
        
        # Generate ``path_prefix`` if ``path_prefix`` wasn't specified and 
        # ``parent_resource`` was. Likewise for ``name_prefix``. Make sure
        # that ``path_prefix`` and ``name_prefix`` *always* take precedence if
        # they are specified--in particular, we need to be careful when they
        # are explicitly set to "".
        if parent_resource is not None: 
            if path_prefix is None: 
                path_prefix = '%s/:%s_id' % (parent_resource['collection_name'], 
                                             parent_resource['member_name']) 
            if name_prefix is None:
                name_prefix = '%s_' % parent_resource['member_name']
        else:
            if path_prefix is None: path_prefix = ''
            if name_prefix is None: name_prefix = ''
        
        # Ensure the edit and new actions are in and GET
        member['edit'] = 'GET'
        new.update({'new': 'GET'})
        
        # Make new dict's based off the old, except the old values become keys,
        # and the old keys become items in a list as the value
        def swap(dct, newdct):
            """Swap the keys and values in the dict, and uppercase the values
            from the dict during the swap."""
            for key, val in dct.iteritems():
                newdct.setdefault(val.upper(), []).append(key)
            return newdct
        collection_methods = swap(collection, {})
        member_methods = swap(member, {})
        new_methods = swap(new, {})
        
        # Insert create, update, and destroy methods
        collection_methods.setdefault('POST', []).insert(0, 'create')
        member_methods.setdefault('PUT', []).insert(0, 'update')
        member_methods.setdefault('DELETE', []).insert(0, 'delete')
        
        # If there's a path prefix option, use it with the controller
        controller = strip_slashes(collection_name)
        path_prefix = strip_slashes(path_prefix)
        path_prefix = '/' + path_prefix
        if path_prefix and path_prefix != '/':
            path = path_prefix + '/' + controller
        else:
            path = '/' + controller
        collection_path = path
        new_path = path + "/new"
        member_path = path + "/:(id)"
        
        options = { 
            'controller': kwargs.get('controller', controller),
            '_member_name': member_name,
            '_collection_name': collection_name,
            '_parent_resource': parent_resource,
            '_filter': kwargs.get('_filter')
        }
        
        def requirements_for(meth):
            """Returns a new dict to be used for all route creation as the
            route options"""
            opts = options.copy()
            if method != 'any': 
                opts['conditions'] = {'method':[meth.upper()]}
            return opts
        
        # Add the routes for handling collection methods
        for method, lst in collection_methods.iteritems():
            primary = (method != 'GET' and lst.pop(0)) or None
            route_options = requirements_for(method)
            for action in lst:
                route_options['action'] = action
                route_name = "%s%s_%s" % (name_prefix, action, collection_name)
                self.connect("formatted_" + route_name, "%s/%s.:(format)" % \
                             (collection_path, action), **route_options)
                self.connect(route_name, "%s/%s" % (collection_path, action),
                                                    **route_options)
            if primary:
                route_options['action'] = primary
                self.connect("%s.:(format)" % collection_path, **route_options)
                self.connect(collection_path, **route_options)
        
        # Specifically add in the built-in 'index' collection method and its 
        # formatted version
        self.connect("formatted_" + name_prefix + collection_name, 
            collection_path + ".:(format)", action='index', 
            conditions={'method':['GET']}, **options)
        self.connect(name_prefix + collection_name, collection_path, 
                     action='index', conditions={'method':['GET']}, **options)
        
        # Add the routes that deal with new resource methods
        for method, lst in new_methods.iteritems():
            route_options = requirements_for(method)
            for action in lst:
                path = (action == 'new' and new_path) or "%s/%s" % (new_path, 
                                                                    action)
                name = "new_" + member_name
                if action != 'new':
                    name = action + "_" + name
                route_options['action'] = action
                formatted_path = (action == 'new' and new_path + '.:(format)') or \
                    "%s/%s.:(format)" % (new_path, action)
                self.connect("formatted_" + name_prefix + name, formatted_path, 
                             **route_options)
                self.connect(name_prefix + name, path, **route_options)

        requirements_regexp = '[^\/]+(?<!\\\)'

        # Add the routes that deal with member methods of a resource
        for method, lst in member_methods.iteritems():
            route_options = requirements_for(method)
            route_options['requirements'] = {'id':requirements_regexp}
            if method not in ['POST', 'GET', 'any']:
                primary = lst.pop(0)
            else:
                primary = None
            for action in lst:
                route_options['action'] = action
                self.connect("formatted_%s%s_%s" % (name_prefix, action, 
                                                    member_name),
                    "%s/%s.:(format)" % (member_path, action), **route_options)
                self.connect("%s%s_%s" % (name_prefix, action, member_name),
                    "%s/%s" % (member_path, action), **route_options)
            if primary:
                route_options['action'] = primary
                self.connect("%s.:(format)" % member_path, **route_options)
                self.connect(member_path, **route_options)
        
        # Specifically add the member 'show' method
        route_options = requirements_for('GET')
        route_options['action'] = 'show'
        route_options['requirements'] = {'id':requirements_regexp}
        self.connect("formatted_" + name_prefix + member_name, 
                     member_path + ".:(format)", **route_options)
        self.connect(name_prefix + member_name, member_path, **route_options)
    
    def redirect(self, match_path, destination_path, *args, **kwargs):
        """Add a redirect route to the mapper
        
        Redirect routes bypass the wrapped WSGI application and instead
        result in a redirect being issued by the RoutesMiddleware. As
        such, this method is only meaningful when using
        RoutesMiddleware.
        
        By default, a 302 Found status code is used, this can be
        changed by providing a ``_redirect_code`` keyword argument
        which will then be used instead. Note that the entire status
        code string needs to be present.
        
        When using keyword arguments, all arguments that apply to
        matching will be used for the match, while generation specific
        options will be used during generation. Thus all options
        normally available to connected Routes may be used with
        redirect routes as well.
        
        Example::
            
            map = Mapper()
            map.redirect('/legacyapp/archives/{url:.*}, '/archives/{url})
            map.redirect('/home/index', '/', _redirect_code='301 Moved Permanently')
        
        """
        both_args = ['_encoding', '_explicit', '_minimize']
        gen_args = ['_filter']
        
        status_code = kwargs.pop('_redirect_code', '302 Found')
        gen_dict, match_dict = {}, {}
        
        # Create the dict of args for the generation route
        for key in both_args + gen_args:
            if key in kwargs:
                gen_dict[key] = kwargs[key]
        gen_dict['_static'] = True
        
        # Create the dict of args for the matching route
        for key in kwargs:
            if key not in gen_args:
                match_dict[key] = kwargs[key]
        
        self.connect(match_path, **match_dict)
        match_route = self.matchlist[-1]
        
        self.connect('_redirect_%s' % id(match_route), destination_path,
                     **gen_dict)
        match_route.redirect = True
        match_route.redirect_status = status_code
예제 #41
0
파일: client.py 프로젝트: Roger/coucher
class Database(object):
    def __init__(self, name, server=None, create=False):
        self.server = server or Server()
        self.session = server.session
        self.name = name
        self.database = server.host + "/" + name

        self.cache = LRUCache(100)

        if create:
            self.create()
        else:
            response = self.session.head(self.database)
            if not response.ok:
                if response.status_code == 404:
                    raise excepts.DBNotExists
                raise Exception(response.status_code)

    def __getitem__(self, docid):
        """
        Returns a document by _id
        """

        return self.get_doc(docid)

    def __delitem__(self, docid):
        self.delete_doc(docid)

    def create(self):
        """
        Try to create a new database or raise error

        Posible Errors: DBExists, AuthFail
        """

        request = self.session.put(self.database)
        if not request.ok:
            if request.status_code == 401:
                raise excepts.AuthFail
            elif request.status_code == 412:
                raise excepts.DBExists
            raise Exception(request.status_code)

        response = request.json()
        ok = response.get("ok", False)
        if not ok:
            raise Exception(response)


    def delete_doc(self, doc):
        """
        Removes a document
        """

        if isinstance(doc, six.string_types):
            doc = self[doc]

        response = self.session.delete(self.database + "/" + doc["_id"],
                params=dict(rev=doc["_rev"]))
        if response.ok:
            return response.json()

        if response.status_code == 404:
            raise excepts.DocNotExists

    def changes(self, feed="continuous", include_docs=False, yield_beats=False,
                **opts):
        opts.update(dict(feed=feed, include_docs=include_docs))
        opts = encode_view_options(opts)

        if feed == "continuous":
            response = self.session.get(self.database + "/_changes",
                                        params=opts, stream=True)
            if not response.ok:
                raise Exception(response.status_code)

            for line in response.iter_lines(chunk_size=2048):
                if line:
                    yield json.loads(line.decode(response.encoding))
                elif yield_beats:
                    yield {}
        else:
            raise NotImplementedError("feed '%s' is not implemented" % feed)

    def delete(self):
        """
        Delete the database
        """

        self.server.delete_db(self.name)

    def save(self, doc, **options):
        """
        Creates or Updates a document
        """

        request = self.session.post(self.database,
                data=json.dumps(doc), params=options)
        if request.ok:
            response = request.json()
            doc = copy.copy(doc)
            doc["_id"] = response.get("id")
            doc["_rev"] = response.get("rev")
            if isinstance(doc, dict):
                doc = Document(doc)
            return doc

        if request.status_code == 409:
            raise excepts.DocConflict("_id: %s" % doc["_id"])

        raise Exception("Can't save doc '%s' error '%s'" % (doc,
                request.status_code))

    def update(self, docs, **options):
        options.update(docs=docs)
        response = self.session.post(self.database + "/_bulk_docs",
                data=json.dumps(options))

        if response.ok:
            return response.json()
        raise Exception("Error updating docs %s" % response.status_code)

    def view(self, name, **options):
        return View(name, self, **options)

    def get_doc(self, docid, default=None):
        """
        Returns the a document
        """

        old_doc = self.cache.get(docid, None)
        headers = None
        if old_doc:
            headers = {'If-None-Match': old_doc[0]}

        response = self.session.get(self.database + "/" + docid,
                                    headers=headers)
        if not response.ok:
            if response.status_code == 404:
                if default:
                    return default
                raise excepts.DocNotExists
            raise Exception(response.status_code)

        if old_doc and response.headers["etag"] == old_doc[0]:
            doc = old_doc[1]
        else:
            doc = Document(response.json())
            self.cache.put(docid, (response.headers["etag"], doc))
        return doc

    def info(self):
        response = self.session.get(self.database)
        if response.ok:
            return response.json()
        raise Exception(response)

    def __repr__(self):
        return "<Database %s>" % self.name