def __init__(self, key_lookup, component_cache_size, all_cache_size, fallback_cache_size): self.key_lookup = key_lookup self.component = lru_cache(component_cache_size)(key_lookup.component) self.fallback = lru_cache(fallback_cache_size)(key_lookup.fallback) self.all = lru_cache(all_cache_size)( lambda key: list(key_lookup.all(key)))
def test_partial(self): #lru_cache decorator must not crash on functools.partial instances def add(a,b): return a + b from functools import partial from repoze.lru import lru_cache add_five = partial(add, 5) decorated = lru_cache(20)(add_five) self.assertEqual(decorated(3), 8)
def __init__(self, host, port=3772, timeout=None, cache_size=50, reconnect=False): self.host = host self.port = port self.timeout = timeout cache_size = 100 self.cache = lru_cache(maxsize=cache_size) self.execute = self.cache(self.execute) self.reconnect = reconnect if not reconnect: self.connect()
def __init__(self, host, port=3772, timeout=None, cache_size=50, reconnect=False, use_plain_sasl=False): self.host = host self.port = port self.timeout = timeout cache_size = 100 self.cache = lru_cache(maxsize=cache_size) self.execute = self.cache(self.execute) self.reconnect = reconnect self.use_plain_sasl = use_plain_sasl if not reconnect: self.connect()
def shoot( context, group_by,): """Produce a dict of the data found in the line. and use group_by to group according to option (that can contain a data_filter) * group_by : a lambda returning a Hankyu (dict) used to extract the valid informations * option.cache : cache strategy (beaker, repoze, dict, fixed, no_cache) * option.data_filter : f(data) => bool applies to the data dict, if True extract the current data * option.diagnose array of string : * match : tells on stderr wich line were rejected * rejected : tells stderr wich data were filtered out * option.skill : enable costly extraction of : * geo_ip : geoip informations * user_agent : user_agent parsing * option.log_format : apache_log_combined or lighttpd tells the regexp to use to extract the fields from the line also used to select the datetime parser """ context.log=dict(error=[],warning=[]) aggregator=Hankyu({}) if 'user_agent' in context.skill: import httpagentparser look_for = log_pattern[context.log_format].search match = None dt_format = dt_formater_from_format(date_pattern[context.log_format]) parse_user_agent = lru_cache(context.cache_size)(normalize_user_agent) if "geo_ip" in context.skill: from pygeoip import GeoIP gi = GeoIP(context.geoip) country_by_ip = lru_cache(context.cache_size)(gi.country_code_by_addr) _input = fileinput.input(context.files) if not context.silent: sys.stderr.write("parsing:\n %s\n" % "\n-" . join(context.files)) try: for line in _input: match = look_for(line) if not context.silent and not _input.lineno() % 10000: sys.stderr.write("*") if match: data = match.groupdict() if data.get("datetime"): data['_datetime']=dt_format(data["datetime"]) if 'geo_ip' in context.skill: data.update( {"_country":country_by_ip(data["ip"])}) if 'user_agent' in context.skill: data.update( parse_user_agent(data["agent"]) ) if context.data_filter and not context.data_filter(data): if "rejected" in context.diagnose: if context.silent: context.log["warning"]+=[ "REJECTED:at %s:%s:%s"%( _input.lineno(),_input.filename(),data) ] else: sys.stderr.write("at %s:%s:" % ( _input.lineno(),_input.filename()) ) sys.stderr.write("REJECTED:{0}\n".format(data)) else: aggregator += group_by(data) elif "match" in context.diagnose: if context.silent: context.log["warning"]+=[ "NOTMATCH:at %s:%s:\%s not match" % ( _input.lineno(),_input.filename(), line)] else: sys.stderr.write("at %s:%s:" % ( _input.lineno(),_input.filename()) ) sys.stderr.write("NOT MATCHED:«{0}»\n".format(line)) except Exception as e: if context.silent is True: context.log["error"]+=["ARRG(%s):at %s:%s" % (e, _input.lineno(),_input.filename())] else: sys.stderr.write("ARRG:at %s:%s\n" % ( _input.lineno(),_input.filename()) ) sys.stderr.write("CONTEXT:match %s:data : %s\n" % ( match and match.groupdict() or "no_match",data)) raise Exception(e) finally: ## causes a problem with stdin/stderr #_input.close() if not context.silent: sys.stderr.write("\n%s lines parsed\n" % _input.lineno()) return aggregator
def __init__(cls, name, bases, namespace): for key, value in namespace.items(): if isinstance(value, FunctionType): setattr(cls, key, lru_cache(maxsize=500)(value)) return super(Cache, cls).__init__(name, bases, namespace)
import math from stemming.porter2 import stem from repoze.lru import lru_cache SPLIT_RE = re.compile(r'[\n\r\s\t' + string.punctuation + ']') EMAIL_RE = re.compile(r"<?(mailto:)?[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>?") URL_RE = re.compile(r'https?://[^ \t\r\n\<]+') DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') # cached version of stem # maxsize 4096 will use about 128 kb of memory stem = lru_cache(maxsize=4096)(stem) class Etiquetador(object): def __init__(self, word_min_size=2, min_occurrences=0, max_tags=None, weight_range=None, stopwords=None): self.word_min_size = word_min_size self.min_occurrences = min_occurrences self.max_tags = max_tags self.weight_range = weight_range self.init_tags() self.init_stopwords(stopwords) def init_stopwords(self, stopwords): stopwords_file_path = os.path.join(DATA_DIR, 'stop_ptbr.txt')