Beispiel #1
0
def cache_get_or_set(cache_name, function, timeout=3600):
    cache = FileSystemCache(cache_dir=abspath('tmp'))
    cache_data = cache.get(cache_name)
    if cache_data is None:
        cache_data = function()
        cache.set(cache_name, cache_data, timeout=timeout)
    return cache_data
def number_gen(first_part, start_num, end_num, semester, dept, subjects):
    driver = webdriver.Firefox()  # Firefox used for testing. Change it to PhantomJS
    driver.implicitly_wait(30)
    base_url = "http://result.pondiuni.edu.in/candidate.asp"
    url = base_url
    driver.get(base_url)
    # os.mkdir(str(first_part))
    os.chdir("results")
    os.chdir(str(first_part))
    cache = FileSystemCache('.cachedir', threshold=100000)
    for number in range(start_num, end_num + 1):
        current_num = "%04d" % number
        numb = first_part + str(current_num)
        driver.find_element_by_id("txtregno").clear()
        driver.find_element_by_id("txtregno").send_keys(numb)
        Select(driver.find_element_by_id("cmbdegree")).select_by_visible_text(dept)
        Select(driver.find_element_by_id("cmbexamno")).select_by_visible_text(semester)
        driver.find_element_by_id("button1").click()

        # copying the content
        page_source = cache.get(url)
        page_source = driver.page_source
        cache.set(url, page_source, timeout=60 * 60 * 24 * 7)  # week in seconds
        root = html.document_fromstring(page_source)
        Cleaner(kill_tags=['noscript'], style=True)(root)  # lxml >= 2.3.1

        # pasting to file
        filename = str(numb) + ".txt"
        fp = open(filename, 'w')
        fp.write((root.text_content()).encode('utf-8'))
        fp.close()
        driver.back()
    driver.close()
    return analyze(subjects)
Beispiel #3
0
def get_configuration():

    sheet_scope = ['https://www.googleapis.com/auth/spreadsheets.readonly']
    credentials = ServiceAccountCredentials.from_json_keyfile_name(
        SERVICE_KEY, sheet_scope)

    http_auth = credentials.authorize(Http())
    sheets_service = build('sheets', 'v4', http=http_auth)

    fileId = '1I8un3pP8aE3b2ixeQTN9mVPwMzTsE_TEkaFzAHEWZ-A'
    data_range = 'configuration'
    data_result = sheets_service.spreadsheets().values().get(
        spreadsheetId=fileId, range=data_range).execute()
    return_data = data_result.get('values', [])

    if not return_data:
        return -1

    configuration = {}
    for row in return_data:
        configuration[row[0]] = row[1]

    cc = FileSystemCache('/var/www/html/gs-cookies/cache/config',
                         default_timeout=300)

    if cc.get('first_monday') is None:
        for key, value in configuration.iteritems():
            cc.set(key, value)

    return configuration
Beispiel #4
0
def get_last_digests():
    cache = FileSystemCache(Config.CACHE_DIR, default_timeout=0)
    cache_key = 'last_digests'

    api = Api(host=Config.HOST)
    contract = {
        'args': '["5"]',
        'function': 'getLast',
    }

    try:
        response = api.call(
            from_addr=Config.ADDRESS,
            to_addr=Config.ADDRESS,
            value='0',
            nonce=0,
            gasprice=Config.GASPRICE,
            gaslimit=Config.GASLIMIT,
            contract=contract)
    except RequestException:
        result = cache.get(cache_key)
    else:
        if response.status_code == 200:
            result = response.json().get('result', {}).get('result', '{}')
            result = json.loads(result)
            if isinstance(result, str):
                result = json.loads(result)
                cache.set(cache_key, result)

    return result
Beispiel #5
0
class Cache:
    def __init__(self):
        self._cache = FileSystemCache(Cache.cache_dir(),
                                      default_timeout=10 * 60)

    @staticmethod
    def cache_dir():
        return os.path.join(os.getcwd(), 'forum', 'src', 'storage', 'cache')

    def set(self, key, value, timeout=None):
        try:
            return self._cache.set(key, json.dumps(value), timeout=timeout)

        except Exception as exception:
            print(exception)

    def get(self, key):
        try:
            data = self._cache.get(key)
            if data:
                return json.loads(data)

        except Exception as exception:
            print(exception)

    def has(self, key):
        return self._cache.has(key) is not None
Beispiel #6
0
class FileSystemSessionInterface(SessionInterface):
    """Uses the :class:`werkzeug.contrib.cache.FileSystemCache` as a session
    backend.

    :param cache_dir: the directory where session files are stored.
    :param threshold: the maximum number of items the session stores before it
                      starts deleting some.
    :param mode: the file mode wanted for the session files, default 0600
    :param key_prefix: A prefix that is added to FileSystemCache store keys.
    """

    session_class = FileSystemSession

    def __init__(self, cache_dir, threshold, mode, key_prefix):
        from werkzeug.contrib.cache import FileSystemCache
        self.cache = FileSystemCache(cache_dir, threshold=threshold, mode=mode)
        self.key_prefix = key_prefix

    def _generate_sid(self):
        return str(uuid4())

    def open_session(self, app, request):
        sid = request.cookies.get(app.session_cookie_name)
        if not sid:
            sid = self._generate_sid()
            return self.session_class(sid=sid)
        data = self.cache.get(self.key_prefix + sid)
        if data is not None:
            return self.session_class(data, sid=sid)
        return self.session_class(sid=sid)
    
    def save_session(self, app, session, response):
        domain = self.get_cookie_domain(app)
        path = self.get_cookie_path(app)
        if not session:
            if session.modified:
                self.cache.delete(self.key_prefix + session.sid)
                response.delete_cookie(app.session_cookie_name,
                                       domain=domain, path=path)
            return

        # Modification case.  There are upsides and downsides to
        # emitting a set-cookie header each request.  The behavior
        # is controlled by the :meth:`should_set_cookie` method
        # which performs a quick check to figure out if the cookie
        # should be set or not.  This is controlled by the
        # SESSION_REFRESH_EACH_REQUEST config flag as well as
        # the permanent flag on the session itself.
        #if not self.should_set_cookie(app, session):
        #    return

        httponly = self.get_cookie_httponly(app)
        secure = self.get_cookie_secure(app)
        expires = self.get_expiration_time(app, session)
        data = dict(session)
        self.cache.set(self.key_prefix + session.sid, data,
                         int(app.permanent_session_lifetime.total_seconds()))
        response.set_cookie(app.session_cookie_name, session.sid,
                            expires=expires, httponly=httponly,
                            domain=domain, path=path, secure=secure)
Beispiel #7
0
class FileSystemSessionInterface(SessionInterface):
    """Uses the :class:`werkzeug.contrib.cache.FileSystemCache` as a session
    backend.

    :param cache_dir: the directory where session files are stored.
    :param threshold: the maximum number of items the session stores before it
                      starts deleting some.
    :param mode: the file mode wanted for the session files, default 0600
    :param key_prefix: A prefix that is added to FileSystemCache store keys.
    """

    session_class = FileSystemSession

    def __init__(self, cache_dir, threshold, mode, key_prefix):
        from werkzeug.contrib.cache import FileSystemCache
        self.cache = FileSystemCache(cache_dir, threshold=threshold, mode=mode)
        self.key_prefix = key_prefix

    def _generate_sid(self):
        return str(uuid4())

    def open_session(self, app, request):
        sid = request.cookies.get(app.session_cookie_name)
        if not sid:
            sid = self._generate_sid()
            return self.session_class(sid=sid)
        data = self.cache.get(self.key_prefix + sid)
        if data is not None:
            return self.session_class(data, sid=sid)
        return self.session_class(sid=sid)
    
    def save_session(self, app, session, response):
        domain = self.get_cookie_domain(app)
        path = self.get_cookie_path(app)
        if not session:
            if session.modified:
                self.cache.delete(self.key_prefix + session.sid)
                response.delete_cookie(app.session_cookie_name,
                                       domain=domain, path=path)
            return

        # Modification case.  There are upsides and downsides to
        # emitting a set-cookie header each request.  The behavior
        # is controlled by the :meth:`should_set_cookie` method
        # which performs a quick check to figure out if the cookie
        # should be set or not.  This is controlled by the
        # SESSION_REFRESH_EACH_REQUEST config flag as well as
        # the permanent flag on the session itself.
        # if not self.should_set_cookie(app, session):
        #    return

        httponly = self.get_cookie_httponly(app)
        secure = self.get_cookie_secure(app)
        expires = self.get_expiration_time(app, session)
        data = dict(session)
        self.cache.set(self.key_prefix + session.sid, data,
                         int(app.permanent_session_lifetime.total_seconds()))
        response.set_cookie(app.session_cookie_name, session.sid,
                            expires=expires, httponly=httponly,
                            domain=domain, path=path, secure=secure)
Beispiel #8
0
class Message:
    def __init__(self):
        #获取当前目录
        cur_path = os.path.split(os.path.realpath(__file__))[0]
        #获取父级目录
        parent_path = os.path.dirname(cur_path)
        #配置文件路径
        ini_file= "%s/conf.ini" % parent_path
        #缓存文件路径
        cache_path = "%s/cache/" % parent_path

        cf = ConfigParser.ConfigParser()
        cf.read(ini_file)
        self.sendkey = cf.get('pushbear', 'sendkey')
        self.text = cf.get('pushbear', 'sendname')
        self.push_url = 'https://pushbear.ftqq.com/sub'
        #缓存
        self.cache = FileSystemCache(cache_path)
        #push次数key
        self.cache_push_times_key  = 'today_push_times'

    #计算距离当天24点还多少秒
    def _shengyu(self):
        h = int(time.strftime('%H',time.localtime(time.time())))
        m = int(time.strftime('%M',time.localtime(time.time())))
        s = int(time.strftime('%S',time.localtime(time.time())))

        shengyu = int(24*60 - (h*60 + m + 1)) * 60
        return shengyu + (60 - s)

    #发送消息
    def _send(self, desp):
        push_data = {
            'sendkey' : self.sendkey,
            'text' : self.text,
            'desp' : desp
        }
        r = requests.post(self.push_url, data=push_data)
        #print r.text

    #获取当前已经提醒了几次, 如果大于3次了, 今天不再提醒
    def push(self, desp, max_remind_times=3):
        push_times = self.cache.get(self.cache_push_times_key)


        if push_times is None:
            #今天还没有发送过消息时
            push_times = 1
        else:
            #今天已经发送过时
            if push_times < max_remind_times:
                push_times = push_times + 1
            else:
                print "[今日已提醒超过%s次,不再提醒]" % (max_remind_times)
                return False

        self.cache.set(self.cache_push_times_key, push_times, timeout=self._shengyu())
        desp = ("%s [今日已提醒%s次]") % (desp, push_times)
        self._send(desp)
Beispiel #9
0
 def check_cache( self, bib_id ):
     """ Checks cache for marc. """
     cache = FileSystemCache( self.cache_dir, threshold=500, default_timeout=self.cache_hours, mode=0664 )  # http://werkzeug.pocoo.org/docs/0.9/contrib/cache/
     cache_key = bib_id
     marc = cache.get( cache_key )
     if marc == None:
         self.log.debug( u'in app_helper.Helper.check_cache(); marc not found in cache' )
     else:
         self.log.debug( u'in app_helper.Helper.check_cache(); marc found in cache' )
     return ( marc, cache )
Beispiel #10
0
def get_content(url, params, cache):
    c = FileSystemCache('cache',
                        threshold=cache['threshold'],
                        default_timeout=cache['default_timeout'])
    cache_id = url + str(params)
    cache_content = c.get(cache_id)
    if cache_content is not None:
        return cache_content
    headers = {'user-agent': UserAgent().chrome}
    resp = requests.get(url, params=params, headers=headers)
    resp.raise_for_status()
    content = resp.text
    c.set(cache_id, content)
    return content
Beispiel #11
0
def test_filesystemcache_set_get():
    """
    test if FileSystemCache.set/get works
    """
    tmp_dir = tempfile.mkdtemp()
    try:
        cache = FileSystemCache(cache_dir=tmp_dir)
        for i in range(3):
            cache.set(str(i), i * i)
        for i in range(3):
            result = cache.get(str(i))
            assert result == i * i
    finally:
        shutil.rmtree(tmp_dir)
Beispiel #12
0
def test_filesystemcache_set_get():
    """
    test if FileSystemCache.set/get works
    """
    tmp_dir = tempfile.mkdtemp()
    try:
        cache = FileSystemCache(cache_dir=tmp_dir)
        for i in range(3):
            cache.set(str(i), i * i)
        for i in range(3):
            result = cache.get(str(i))
            assert result == i * i
    finally:
        shutil.rmtree(tmp_dir)
Beispiel #13
0
class BaseProvider(object):

    __metaclass__ = ABCMeta

    def __init__(self, cache_dir=None, default_timeout=60 * 60 * 24,
                 api_key=None):
        # store it in cache for 1 day. using file system cache because
        # memcached is too mainstream. :)
        self.cache = FileSystemCache(cache_dir=cache_dir or '/tmp/__arcoiro__',
                                     default_timeout=default_timeout)
        self._api_key = api_key

    @abstractproperty
    def name(self):
        pass

    @abstractproperty
    def url(self):
        pass

    @abstractmethod
    def get_urls_from_tag(self, tag):
        pass

    @property
    def display_name(self):
        return self.name

    @property
    def api_key(self):
        if self._api_key is not None:
            return self._api_key
        config_key = '%s_API_KEY' % self.name.upper()
        key = current_app.config.get(config_key)
        if key is None:
            raise RuntimeError('%s not defined!' % config_key)
        return key

    def get_cached_urls_from_tag(self, tag):
        cache_key = '%s:%s' % (self.name, tag)
        urls = self.cache.get(cache_key)
        if urls is not None:
            return urls
        urls = self.get_urls_from_tag(tag)
        if urls is None:
            return None
        self.cache.set(cache_key, urls)
        return urls
Beispiel #14
0
def weather_data(latitude, longitude):
    cache = FileSystemCache('./cache')
    cache_key = '{}.{}'.format(latitude, longitude)
    data = cache.get(cache_key)
    if data is None:
        apikey = '3a4c7bb6f812d00b3bb9d19ceace7222'
        fio = ForecastIO.ForecastIO(apikey,
                                    units=ForecastIO.ForecastIO.UNITS_SI,
                                    lang=ForecastIO.ForecastIO.LANG_ENGLISH,
                                    latitude=latitude,
                                    longitude=longitude)

        currently = fio.currently
        data = currently
        cache.set(cache_key, data, timeout=300)  # 5 minutes
    return data
Beispiel #15
0
class Message:
    def __init__(self):
        self.sendkey = '1562-20254d9020f4f5883c56d0836c8bf5cb'
        self.text = 'Lazy'
        self.push_url = 'https://pushbear.ftqq.com/sub'
        # 缓存
        self.cache = FileSystemCache('/tmp/jjz_cache')
        # push次数
        self.cache_push_times_key = 'today_push_times'

    # 计算距离当天24点还多少秒
    def _shengyu(self):
        h = int(time.strftime('%H', time.localtime(time.time())))
        m = int(time.strftime('%M', time.localtime(time.time())))
        s = int(time.strftime('%S', time.localtime(time.time())))

        shengyu = int(24 * 60 - (h * 60 + m + 1)) * 60
        return shengyu + (60 - s)

    # 发送消息
    def _send(self, desp):
        push_data = {
            'sendkey': self.sendkey,
            'text': self.text,
            'desp': desp
        }
        r = requests.post(self.push_url, data=push_data, verify=False)
        # print r.text

    # 获取当前已经提醒了几次, 如果大于3次了, 今天不再提醒
    def push(self, desp, max_remind_times=3):
        push_times = self.cache.get(self.cache_push_times_key)

        if push_times is None:
            # 今天还没有发送过消息时
            push_times = 1
        else:
            # 今天已经发送过时
            if push_times < max_remind_times:
                push_times = push_times + 1
            else:
                print "[今日已提醒超过%s次,不再提醒]" % (max_remind_times)
                return False

        self.cache.set(self.cache_push_times_key, push_times, timeout=self._shengyu())
        desp = ("%s [今日已提醒%s次]") % (desp, push_times)
        self._send(desp)
Beispiel #16
0
def parse(url):
    cache = FileSystemCache('.cachedir', threshold=100000)

    # get page
    page_source = cache.get(url)
    if page_source is None:
        # use firefox to get page with javascript generated content
        with closing(Firefox()) as browser:
            browser.get(url)
            page_source = browser.page_source
            cache.set(url, page_source, timeout=60*60*24*7) # week in seconds

    # extract text
    root = html.document_fromstring(page_source)
    # remove flash, images, <script>,<style>, etc
    Cleaner(kill_tags=['noscript'], style=True)(root) # lxml >= 2.3.1
    return root.text_content() # extract text
Beispiel #17
0
class WechatCache(WechatSogouBase):
    """基于文件的缓存

    """
    def __init__(self, cache_dir='cache', default_timeout=300):
        """初始化

        cache_dir是缓存目录
        """
        self.cache = FileSystemCache(cache_dir,
                                     default_timeout=default_timeout)

    def clear(self):
        """清空缓存
        """
        return self.cache.clear()

    def get(self, key):
        """获取缓存

        获取键值key的缓存值
        如果没有对应缓存,返回None
        """
        return self.cache.get(key)

    def add(self, key, value, timeout=None):
        """增加缓存

        如果键值key对应的缓存不存在,那么增加值value到键值key,过期时间timeout,默认300秒
        否则返回False(即不能覆盖设置缓存)
        """
        return self.cache.add(key, value, timeout)

    def set(self, key, value, timeout=None):
        """设置缓存

        设置键值key的缓存为value,过期时间300秒
        """
        return self.cache.set(key, value, timeout)

    def delete(self, key):
        """删除缓存

        删除键值key存储的缓存
        """
        return self.cache.delete(key)
Beispiel #18
0
class WechatCache(object):
    """基于文件的缓存

    """

    def __init__(self, cache_dir='cache', default_timeout=300):
        """初始化

        cache_dir是缓存目录
        """
        self.cache = FileSystemCache(cache_dir, default_timeout=default_timeout)

    def clear(self):
        """清空缓存
        """
        return self.cache.clear()

    def get(self, key):
        """获取缓存

        获取键值key的缓存值
        如果没有对应缓存,返回None
        """
        return self.cache.get(key)

    def add(self, key, value, timeout=None):
        """增加缓存

        如果键值key对应的缓存不存在,那么增加值value到键值key,过期时间timeout,默认300秒
        否则返回False(即不能覆盖设置缓存)
        """
        return self.cache.add(key, value, timeout)

    def set(self, key, value, timeout=None):
        """设置缓存

        设置键值key的缓存为value,过期时间300秒
        """
        return self.cache.set(key, value, timeout)

    def delete(self, key):
        """删除缓存

        删除键值key存储的缓存
        """
        return self.cache.delete(key)
Beispiel #19
0
def parse(url):
    cache = FileSystemCache('.cachedir', threshold=100000)

    # get page
    page_source = cache.get(url)
    if page_source is None:
        # use firefox to get page with javascript generated content
        with closing(Firefox()) as browser:
            browser.get(url)
            page_source = browser.page_source
            cache.set(url, page_source,
                      timeout=60 * 60 * 24 * 7)  # week in seconds

    # extract text
    root = html.document_fromstring(page_source)
    # remove flash, images, <script>,<style>, etc
    Cleaner(kill_tags=['noscript'], style=True)(root)  # lxml >= 2.3.1
    return root.text_content()  # extract text
Beispiel #20
0
def getSpaceInfo(spaceId):
    global spaceCache
    if spaceCache is None:
        mkdir_p(DURACLOUD_SPACE_CACHE_DIR)
        spaceCache = FileSystemCache(DURACLOUD_SPACE_CACHE_DIR, threshold=50, default_timeout=(24*3600), mode=384)

    # check spaceCache, otherwise fetch info from DuraCloud
    result = spaceCache.get(spaceId)
    if result is None:
        url = DURACLOUD_URL+ "/duradmin/download/contentItem"
        auth = HTTPBasicAuth(DURACLOUD_USERNAME, DURACLOUD_PASSWORD)
        payload = {'spaceId': spaceId, 'contentId': 'info.json'}
        try:
            response = requests.get(url, params=payload, auth=auth)
            result = response.json()
            spaceCache.set(spaceId, result)
        except RequestException as e:
            print e
            raise
    return result
Beispiel #21
0
class WechatCache:
    def __init__(self, cache_dir='cache', default_timeout=300):

        self.cache = FileSystemCache(cache_dir=cache_dir,
                                     default_timeout=default_timeout)

    def clear(self):
        return self.cache.clear()

    def get(self, key):
        return self.cache.get(key)

    def add(self, key, value, timeout=None):
        return self.cache.add(key, value, timeout)

    def set(self, key, value, timeout=None):
        return self.cache.set(key, value, timeout)

    def delete(self, key):
        return self.cache.delete(key)
Beispiel #22
0
def number_gen(first_part, start_num, end_num, semester, dept, subjects):
    driver = webdriver.Firefox(
    )  # Firefox used for testing. Change it to PhantomJS
    driver.implicitly_wait(30)
    base_url = "http://result.pondiuni.edu.in/candidate.asp"
    url = base_url
    driver.get(base_url)
    # os.mkdir(str(first_part))
    os.chdir("results")
    os.chdir(str(first_part))
    cache = FileSystemCache('.cachedir', threshold=100000)
    for number in range(start_num, end_num + 1):
        current_num = "%04d" % number
        numb = first_part + str(current_num)
        driver.find_element_by_id("txtregno").clear()
        driver.find_element_by_id("txtregno").send_keys(numb)
        Select(driver.find_element_by_id("cmbdegree")).select_by_visible_text(
            dept)
        Select(driver.find_element_by_id("cmbexamno")).select_by_visible_text(
            semester)
        driver.find_element_by_id("button1").click()

        # copying the content
        page_source = cache.get(url)
        page_source = driver.page_source
        cache.set(url, page_source,
                  timeout=60 * 60 * 24 * 7)  # week in seconds
        root = html.document_fromstring(page_source)
        Cleaner(kill_tags=['noscript'], style=True)(root)  # lxml >= 2.3.1

        # pasting to file
        filename = str(numb) + ".txt"
        fp = open(filename, 'w')
        fp.write((root.text_content()).encode('utf-8'))
        fp.close()
        driver.back()
    driver.close()
    return analyze(subjects)
Beispiel #23
0
def getSpaceInfo(spaceId):
    global spaceCache
    if spaceCache is None:
        mkdir_p(DURACLOUD_SPACE_CACHE_DIR)
        spaceCache = FileSystemCache(DURACLOUD_SPACE_CACHE_DIR,
                                     threshold=50,
                                     default_timeout=(24 * 3600),
                                     mode=384)

    # check spaceCache, otherwise fetch info from DuraCloud
    result = spaceCache.get(spaceId)
    if result is None:
        url = DURACLOUD_URL + "/duradmin/download/contentItem"
        auth = HTTPBasicAuth(DURACLOUD_USERNAME, DURACLOUD_PASSWORD)
        payload = {'spaceId': spaceId, 'contentId': 'info.json'}
        try:
            response = requests.get(url, params=payload, auth=auth)
            result = response.json()
            spaceCache.set(spaceId, result)
        except RequestException as e:
            print e
            raise
    return result
async def get_content(url, payload, cache):
    """Fetch content from url or get it from cache."""
    cache = FileSystemCache(
        'cache',
        threshold=cache['threshold'],
        default_timeout=cache['default_timeout'],
    )
    cache_id = url + str(payload)
    cache_content = cache.get(cache_id)
    if cache_content is not None:
        return cache_content
    headers = {'user-agent': UserAgent().chrome}
    resp = await asks.get(
        url,
        params=payload,
        headers=headers,
        timeout=TIMEOUT,
        retries=LIMITS,
    )
    resp.raise_for_status()
    content = resp.text
    cache.set(cache_id, content)
    return content
Beispiel #25
0
class FileSystemSessionInterface(SessionInterface):
    """Uses the :class:`werkzeug.contrib.cache.FileSystemCache` as a session
    backend.

    .. versionadded:: 0.2
        The `use_signer` parameter was added.

    :param cache_dir: the directory where session files are stored.
    :param threshold: the maximum number of items the session stores before it
                      starts deleting some.
    :param mode: the file mode wanted for the session files, default 0600
    :param key_prefix: A prefix that is added to FileSystemCache store keys.
    :param use_signer: Whether to sign the session id cookie or not.
    :param permanent: Whether to use permanent session or not.
    """

    session_class = FileSystemSession

    def __init__(self, cache_dir, threshold, mode, key_prefix,
                 use_signer=False, permanent=True):
        from werkzeug.contrib.cache import FileSystemCache
        self.cache = FileSystemCache(cache_dir, threshold=threshold, mode=mode)
        self.key_prefix = key_prefix
        self.use_signer = use_signer
        self.permanent = permanent

    def open_session(self, app, request):
        sid = request.cookies.get(app.session_cookie_name)
        if not sid:
            sid = self._generate_sid()
            return self.session_class(sid=sid, permanent=self.permanent)
        if self.use_signer:
            signer = self._get_signer(app)
            if signer is None:
                return None
            try:
                sid = signer.unsign(sid)
            except BadSignature:
                sid = self._generate_sid()
                return self.session_class(sid=sid, permanent=self.permanent)

        data = self.cache.get(self.key_prefix + sid)
        if data is not None:
            return self.session_class(data, sid=sid)
        return self.session_class(sid=sid, permanent=self.permanent)

    def save_session(self, app, session, response):
        domain = self.get_cookie_domain(app)
        path = self.get_cookie_path(app)
        if not session:
            if session.modified:
                self.cache.delete(self.key_prefix + session.sid)
                response.delete_cookie(app.session_cookie_name,
                                       domain=domain, path=path)
            return

        httponly = self.get_cookie_httponly(app)
        secure = self.get_cookie_secure(app)
        expires = self.get_expiration_time(app, session)
        data = dict(session)
        self.cache.set(self.key_prefix + session.sid, data,
                       total_seconds(app.permanent_session_lifetime))
        if self.use_signer:
            session_id = self._get_signer(app).sign(session.sid)
        else:
            session_id = session.sid
        response.set_cookie(app.session_cookie_name, session_id,
                            expires=expires, httponly=httponly,
                            domain=domain, path=path, secure=secure)
Beispiel #26
0
import sys
from contextlib import closing

import lxml.html as html # pip install 'lxml>=2.3.1'
from lxml.html.clean        import Cleaner
from selenium.webdriver     import Firefox         # pip install selenium
from werkzeug.contrib.cache import FileSystemCache # pip install werkzeug

cache = FileSystemCache('.cachedir', threshold=100000)

url = sys.argv[1] if len(sys.argv) > 1 else "http://www.schibsted.cl/testqa/"


# get page
page_source = cache.get(url)
if page_source is None:
    # use firefox to get page with javascript generated content
    with closing(Firefox()) as browser:
        browser.get(url)
        page_source = browser.page_source
    cache.set(url, page_source, timeout=60*60*24*7) # week in seconds


# extract text
root = html.document_fromstring(page_source)
# remove flash, images, <script>,<style>, etc
Cleaner(kill_tags=['noscript'], style=True)(root) # lxml >= 2.3.1
webtext = root.text_content() # extract text
f = open("C:/schibsted/data/Test.txt", "w");
print f
value = (webtext)
Beispiel #27
0
        stderr.write("""Nutzung: partycluster.py [Feedliste] [Grenzwert]

    Feedliste ist eine Datei mit einem URL zu einem ATOM-Feed pro Zeile.
    Grenzwert ist die maximale Entfernung von Partyteilnehmern in Sekunden
    unter der Annahme, dass die Lichtgeschwindigkeit 1 m/s beträgt.
""")
        exit(1)

    current_events = {}
    with open(filename, 'r') as feeds:
        progress = ProgressBar(maxval=len(feeds.readlines()))

    with open(filename, 'r') as feeds:
        for line in feeds:
            feed_url = line.strip()
            cached_content = feed_cache.get(feed_url)
            if not cached_content:
                request = get(feed_url)
                events = getEvents(StringIO(request.text.encode('utf-8')))
                feed_cache.set(feed_url, request.text.encode('utf-8'))
            else:
                events = getEvents(StringIO(cached_content))
            current_events = updateEvents(current_events, events)
            progress.update(progress.currval+1)

    clustering = HierarchicalClustering(current_events.values(), timelikeInterval)
    clusters = clustering.getlevel(threshold)

    for cluster in clusters:
        if len(cluster) > 2:
            partyPrint(cluster, threshold)
class TestSaxonStream(TestCase):
    def setUp(self):
        self.cache = FileSystemCache("./cache")
        self.saxon = SaxonStreamTransform("./jars/saxon.jar",
                                          "./tests/data/xsl/ciham.xsl",
                                          cache=self.cache)
        self.nautilus = NautilusRetriever(folders=["./tests/data/repo"])
        self.nautilus.logger.setLevel(logging.ERROR)

        app = Flask("Nemo")
        app.debug = True
        nemo = Nemo(app=app,
                    base_url="",
                    retriever=self.nautilus,
                    transform={"default": self.saxon.transform})

        self.client = app.test_client()

    def tearDown(self):
        # We clean the cache folder to ensure that no cache is passed from one test to the other
        self.cache.clear()

    def test_simple_transformation(self):
        """ Test transformation works fine"""
        read = self.client.get("/read/froLit/jns915/jns1856/ciham-fro1/1")
        data = read.data.decode()
        self.assertIn('<span class="expan">et </span>', data,
                      "Text content should be transformed")
        self.assertIn('Facsimilaire', data, "Other content should be added")

        cached = self.cache.get(
            "urn:cts:froLit:jns915.jns1856.ciham-fro1:1").decode()
        self.assertIn('<aside class="text-left">', cached,
                      "Assert cache is made")

    def test_cache_retrieved(self):
        """ Test that cache is nicely used and built """
        read = self.client.get("/read/froLit/jns915/jns1856/ciham-fro1/1")
        data = read.data.decode()
        self.assertIn('<span class="expan">et </span>', data,
                      "Text content should be transformed")
        self.assertIn('Facsimilaire', data, "Other content should be added")

        cached = self.cache.get(
            "urn:cts:froLit:jns915.jns1856.ciham-fro1:1").decode()
        self.assertIn('<aside class="text-left">', cached,
                      "Assert cache is made")

        with mock.patch("nemo_xslttwo_plugin.shell") as shell:
            read = self.client.get("/read/froLit/jns915/jns1856/ciham-fro1/1")
            cached_response = read.data.decode()
            self.assertEqual(cached_response, data,
                             "Text content should the same in cache")
            self.assertEqual(
                shell.call_count, 0,
                "Shell should not be called because we use cache")

    def test_two_transformations(self):
        """ Test transformation works fine"""
        read = self.client.get("/read/froLit/jns915/jns1856/ciham-fro1/1")
        read = self.client.get("/read/froLit/jns915/jns1856/ciham-fro1/2")
        data = read.data.decode()
        self.assertIn('<span class="expan">et </span>', data,
                      "Text content should be transformed")
        self.assertIn('Facsimilaire', data, "Other content should be added")

        cached = self.cache.get(
            "urn:cts:froLit:jns915.jns1856.ciham-fro1:1").decode()
        self.assertIn('<aside class="text-left">', cached,
                      "Assert cache is made")
Beispiel #29
0
class weather():
    def __init__(self):
        self.cache = FileSystemCache('/tmp/leju_weather_cache_dir')
        self.weather_cache_key = 'dongcheng_weather'
        self.aqi_cache_key = 'aqicn_num'
        self.cache_time = 3600

    def get_weather(self):
        weather = self.__cache_get(self.weather_cache_key)
        aqi = self.__cache_get(self.aqi_cache_key)
        return [weather, aqi]

    def __get_weather(self):
        url = 'http://beijing.tianqi.com/dongchengqu/'
        headers = {
            'Accept-Language':
            'zh-CN,zh;q=0.8',
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36',
            'Host':
            'beijing.tianqi.com',
            'Cookie':
            'bdshare_firstime=1451003806108; cs_prov=01; cs_city=0101; ccity=101011501; a8205_pages=175; a8205_times=1; Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2=1451003806; Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2=1451004424'
        }
        #content = BeautifulSoup(requests.get(url, headers=headers, timeout=(10, 3600)).content)
        try:
            content = BeautifulSoup(
                requests.get(url, headers=headers, timeout=10).content,
                "html.parser")
            return content.find(class_='fuzhitxt')['value']
        except requests.exceptions.ConnectTimeout as e:
            #超时信息存入缓存3600秒
            #self.__get_weather()
            return '获取室外数据超时'

    def __get_usa_aqi(self):
        url = 'http://aqicn.org/city/beijing/'
        headers = {
            'Accept-Language':
            'zh-CN,zh;q=0.8',
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36',
            'Host':
            'aqicn.org',
            'Cookie':
            'waqi-m-history=[{%22url%22:%22http://aqicn.org/city/beijing/chaoyangaotizhongxin/cn/m/%22%2C%22id%22:%22@450%22%2C%22name%22:%22%E6%9C%9D%E9%98%B3%E5%A5%A5%E4%BD%93%E4%B8%AD%E5%BF%83%22%2C%22time%22:%222016-09-21T02:59:23.642Z%22}%2C{%22url%22:%22http://aqicn.org/city/beijing/us-embassy/cn/m/%22%2C%22id%22:%22@3303%22%2C%22name%22:%22%E5%8C%97%E4%BA%AC%E7%BE%8E%E5%9B%BD%E5%A4%A7%E4%BD%BF%E9%A6%86%22%2C%22time%22:%222016-09-21T02:59:04.177Z%22}]; __uvt=; __atuvc=2%7C42; waqi-w-station={%22url%22:%22http://aqicn.org/city/beijing/%22%2C%22name%22:%22Beijing%22%2C%22idx%22:1451%2C%22time%22:%222016-11-16T02:07:56.247Z%22}; waqi-w-history=[{%22url%22:%22http://aqicn.org/city/beijing/%22%2C%22name%22:%22Beijing%22%2C%22idx%22:1451%2C%22time%22:%222016-11-16T02:07:56.247Z%22}%2C{%22url%22:%22http://aqicn.org/city/beijing/us-embassy/%22%2C%22name%22:%22Beijing%20US%20Embassy%22%2C%22idx%22:3303%2C%22time%22:%222016-11-14T23:39:31.677Z%22}]; __utma=42180789.1319042313.1474426745.1479166782.1479261892.7; __utmb=42180789.2.10.1479261892; __utmc=42180789; __utmz=42180789.1479166782.6.4.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided); uvts=5AqenfALEYF052Ho'
        }
        #content = BeautifulSoup(requests.get(url, headers=headers, timeout=(10, 3600)).content)
        try:
            content = BeautifulSoup(
                requests.get(url, headers=headers, timeout=10).content,
                "html.parser")
            return "大使馆AQI: %s" % content.select(
                '#aqiwgtvalue')[0].get_text().encode('utf-8')
        except requests.exceptions.ConnectTimeout as e:
            #超时信息存入缓存3600秒
            #self.__get_weather()
            return '获取大使馆AQI数据超时'

    def __cache_get(self, cache_key):
        cache_val = self.cache.get(cache_key)
        if cache_val is None:
            if cache_key == 'dongcheng_weather':
                string = self.__get_weather()
            else:
                string = self.__get_usa_aqi()
            self.cache.set(cache_key, string, self.cache_time)
            return string
        else:
            return cache_val
Beispiel #30
0
class Cache(object):
    """
    This class is used to control the cache objects.

    If TESTING is True it will use NullCache.
    """

    def __init__(self, app=None):
        self.cache = None

        if app is not None:
            self.init_app(app)
        else:
            self.app = None

    def init_app(self, app):
        "This is used to initialize cache with your app object"

        app.config.setdefault('CACHE_DEFAULT_TIMEOUT', 300)
        app.config.setdefault('CACHE_THRESHOLD', 500)
        app.config.setdefault('CACHE_KEY_PREFIX', None)
        app.config.setdefault('CACHE_MEMCACHED_SERVERS', None)
        app.config.setdefault('CACHE_DIR', None)
        app.config.setdefault('CACHE_TYPE', 'NullCache')

        self.app = app

        self._set_cache()

    def _set_cache(self):
        if self.app.config['TESTING']:
            self.cache = NullCache()
        else:
            if self.app.config['CACHE_TYPE'] == 'Null':
                self.cache = NullCache()
            elif self.app.config['CACHE_TYPE'] == 'Simple':
                self.cache = SimpleCache(
                    threshold=self.app.config['CACHE_THRESHOLD'],
                    default_timeout=self.app.config['CACHE_DEFAULT_TIMEOUT'])
            elif self.app.config['CACHE_TYPE'] == 'Memcached':
                self.cache = MemcachedCache(
                    self.app.config['CACHE_MEMCACHED_SERVERS'],
                    default_timeout=self.app.config['CACHE_DEFAULT_TIMEOUT'],
                    key_prefix=self.app.config['CACHE_KEY_PREFIX'])
            elif self.app.config['CACHE_TYPE'] == 'GAE':
                self.cache = GAEMemcachedCache(
                    default_timeout=self.app.config['CACHE_DEFAULT_TIMEOUT'],
                    key_prefix=self.app.config['CACHE_KEY_PREFIX'])
            elif self.app.config['CACHE_TYPE'] == 'FileSystem':
                self.cache = FileSystemCache(
                    self.app.config['CACHE_DIR'],
                    threshold=self.app.config['CACHE_THRESHOLD'],
                    default_timeout=self.app.config['CACHE_DEFAULT_TIMEOUT'])

    def get(self, *args, **kwargs):
        "Proxy function for internal cache object."
        return self.cache.get(*args, **kwargs)

    def set(self, *args, **kwargs):
        "Proxy function for internal cache object."
        self.cache.set(*args, **kwargs)

    def add(self, *args, **kwargs):
        "Proxy function for internal cache object."
        self.cache.add(*args, **kwargs)

    def delete(self, *args, **kwargs):
        "Proxy function for internal cache object."
        self.cache.delete(*args, **kwargs)

    def cached(self, timeout=None, key_prefix='view/%s', unless=None):
        """
        Decorator. Use this to cache a function. By default the cache key
        is `view/request.path`. You are able to use this decorator with any
        function by changing the `key_prefix`. If the token `%s` is located
        within the `key_prefix` then it will replace that with `request.path`

        Example::

            # An example view function
            @cache.cached(timeout=50)
            def big_foo():
                return big_bar_calc()

            # An example misc function to cache.
            @cache.cached(key_prefix='MyCachedList')
            def get_list():
                return [random.randrange(0, 1) for i in range(50000)]

        .. code-block:: pycon

            >>> my_list = get_list()

        :param timeout: Default None. If set to an integer, will cache for that
                        amount of time.
        :param key_prefix: Default 'view/%(request.path)s'. Beginning key to .
                           use for the cache key.
        :param unless: Default None. Cache will *always* execute the caching
                       facilities unless this callable is true.
                       This will bypass the caching entirely.
        """

        def decorator(f):

            @wraps(f)
            def decorated_function(*args, **kwargs):
                #: Bypass the cache entirely.
                if callable(unless) and unless() is True:
                        return f(*args, **kwargs)

                if '%s' in key_prefix:
                    cache_key = key_prefix % request.path
                else:
                    cache_key = key_prefix

                rv = self.cache.get(cache_key)
                if not rv or current_app.debug:
                    rv = f(*args, **kwargs)
                    self.cache.set(cache_key, rv, timeout=timeout)
                return rv
            return decorated_function
        return decorator

    def memoize(self, timeout=None):
        """
        Use this to cache the result of a function, taking its arguments into
        account in the cache key.

        Information on
        `Memoization <http://en.wikipedia.org/wiki/Memoization>`_.

        Example::

            @cache.memoize(timeout=50)
            def big_foo(a, b):
                return a + b + random.randrange(0, 1000)

        .. code-block:: pycon

            >>> big_foo(5, 2)
            753
            >>> big_foo(5, 3)
            234
            >>> big_foo(5, 2)
            753

        :param timeout: Default None. If set to an integer, will cache for that
                        amount of time.
        """

        def memoize(f):

            @wraps(f)
            def decorated_function(*args, **kwargs):
                cache_key = (f.__name__, id(f), args, str(kwargs))

                rv = self.cache.get(cache_key)
                if rv is None:
                    rv = f(*args, **kwargs)
                    self.cache.set(cache_key, rv)
                return rv
            return decorated_function
        return memoize
Beispiel #31
0
class FileSystemSessionInterface(SessionInterface):
    """Uses the :class:`werkzeug.contrib.cache.FileSystemCache` as a session
    backend.

    .. versionadded:: 0.2
        The `use_signer` parameter was added.

    :param cache_dir: the directory where session files are stored.
    :param threshold: the maximum number of items the session stores before it
                      starts deleting some.
    :param mode: the file mode wanted for the session files, default 0600
    :param key_prefix: A prefix that is added to FileSystemCache store keys.
    :param use_signer: Whether to sign the session id cookie or not.
    """

    session_class = FileSystemSession

    def __init__(self, cache_dir, threshold, mode, key_prefix,
                 use_signer=False):
        from werkzeug.contrib.cache import FileSystemCache
        self.cache = FileSystemCache(cache_dir, threshold=threshold, mode=mode) #FileSystemCache是从上句导入的,threshold代表最多纪录条数
        self.key_prefix = key_prefix
        self.use_signer = use_signer

    def open_session(self, app, request):
        sid = request.cookies.get(app.session_cookie_name)
        if not sid:
            sid = self._generate_sid()
            return self.session_class(sid=sid)
        if self.use_signer:
            signer = self._get_signer(app)
            if signer is None:
                return None
            try:
                sid = signer.unsign(sid)
            except BadSignature:
                sid = None

        data = self.cache.get(self.key_prefix + sid)    #这是根据session_id获取session值
        if data is not None:
            return self.session_class(data, sid=sid)
        return self.session_class(sid=sid)
    
    def save_session(self, app, session, response):
        domain = self.get_cookie_domain(app)
        path = self.get_cookie_path(app)
        if not session:     #验证过了,就是判断session是否为空(空字典)
            if session.modified:
                self.cache.delete(self.key_prefix + session.sid)
                response.delete_cookie(app.session_cookie_name,
                                       domain=domain, path=path)
            return

        # Modification case.  There are upsides and downsides to
        # emitting a set-cookie header each request.  The behavior
        # is controlled by the :meth:`should_set_cookie` method
        # which performs a quick check to figure out if the cookie
        # should be set or not.  This is controlled by the
        # SESSION_REFRESH_EACH_REQUEST config flag as well as
        # the permanent flag on the session itself.
        #if not self.should_set_cookie(app, session):
        #    return

        httponly = self.get_cookie_httponly(app)
        secure = self.get_cookie_secure(app)
        expires = self.get_expiration_time(app, session)
        #data = dict(session)
        #self.cache.set(self.key_prefix + session.sid, data,
        #                 int(app.permanent_session_lifetime.total_seconds()))
        if self.use_signer: #签名加密
            session_id = self._get_signer(app).sign(session.sid)
        else:
            session_id = session.sid
        response.set_cookie(app.session_cookie_name, session_id,
                            expires=expires, httponly=httponly,
                            domain=domain, path=path, secure=secure)

    
    def save_session_without_response(self, app, session):

        #httponly = self.get_cookie_httponly(app)
        #secure = self.get_cookie_secure(app)
        #expires = self.get_expiration_time(app, session)
        data = dict(session)
        self.cache.set(self.key_prefix + session.sid, data,    
                     int(app.permanent_session_lifetime.total_seconds()))

    def judge_attack(self, app, request):   #判断两次间隔的访问是否大于三秒
        sid = request.cookies.get(app.session_cookie_name)
        if not sid:
            return False
        current_time = time.time()
        try:
            last_time = self.cache.get(self.key_prefix + sid)['time']    #这是根据session_id获取session值
        except:
            return False
        if current_time-last_time < 3:
            return True
        return False
Beispiel #32
0
class Cache(object):
    """Base class for TimeGate caches."""

    def __init__(self, path, tolerance, expiration, max_values, run_tests=True,
                 max_file_size=0):
        """Constructor method.

        :param path: The path of the cache database file.
        :param tolerance: The tolerance, in seconds to which a TimeMap is
        considered young enough to be used as is.
        :param expiration: How long, in seconds, the cache entries are stored
        every get will be a CACHE MISS.
        :param max_values: The maximum number of TimeMaps stored in cache
        before some are deleted
        :param run_tests: (Optional) Tests the cache at initialization.
        :param max_file_size: (Optional) The maximum size (in Bytes) for a
        TimeMap cache value. When max_file_size=0, there is no limit to
        a cache value. When max_file_size=X > 0, the cache will not
        store TimeMap that require more than X Bytes on disk.
        """
        # Parameters Check
        if tolerance <= 0 or expiration <= 0 or max_values <= 0:
            raise CacheError("Cannot create cache: all parameters must be > 0")

        self.tolerance = relativedelta(seconds=tolerance)
        self.path = path.rstrip('/')
        self.max_file_size = max(max_file_size, 0)
        self.CHECK_SIZE = self.max_file_size > 0
        self.max_values = max_values
        self.backend = FileSystemCache(path,
                                       threshold=self.max_values,
                                       default_timeout=expiration)

        # Testing cache
        if run_tests:
            try:
                key = '1'
                val = 1
                self.backend.set(key, val)
                assert (not self.CHECK_SIZE) or self._check_size(key) > 0
                assert self.backend.get(key) == val
                os.remove(self.path + '/' + md5(key).hexdigest())
            except Exception as e:
                raise CacheError("Error testing cache: %s" % e)

        logging.debug(
            "Cache created. max_files = %d. Expiration = %d. "
            "max_file_size = %d" % (
                self.max_values, expiration, self.max_file_size))

    def get_until(self, uri_r, date):
        """Returns the TimeMap (memento,datetime)-list for the requested
        Memento. The TimeMap is guaranteed to span at least until the 'date'
        parameter, within the tolerance.

        :param uri_r: The URI-R of the resource as a string.
        :param date: The target date. It is the accept-datetime for TimeGate
        requests, and the current date. The cache will return all
        Mementos prior to this date (within cache.tolerance parameter)
        :return: [(memento_uri_string, datetime_obj),...] list if it is
        in cache and if it is within the cache tolerance for *date*,
        None otherwise.
        """
        # Query the backend for stored cache values to that memento
        key = uri_r
        try:
            val = self.backend.get(key)
        except Exception as e:
            logging.error("Exception loading cache content: %s" % e)
            return None

        if val:
            # There is a value in the cache
            timestamp, timemap = val
            logging.info("Cached value exists for %s" % uri_r)
            if date > timestamp + self.tolerance:
                logging.info("Cache MISS: value outdated for %s" % uri_r)
                timemap = None
            else:
                logging.info("Cache HIT: found value for %s" % uri_r)
        else:
            # Cache MISS: No value
            logging.info("Cache MISS: No cached value for %s" % uri_r)
            timemap = None

        return timemap

    def get_all(self, uri_r):
        """Request the whole TimeMap for that uri.

        :param uri_r: the URI-R of the resource.
        :return: [(memento_uri_string, datetime_obj),...] list if it is in
        cache and if it is within the cache tolerance, None otherwise.
        """
        return self.get_until(uri_r, timegate_utils.now())

    def refresh(self, uri_r, getter, *args, **kwargs):
        """Refreshes the cached TimeMap for a specific resource and returns it.

        :param uri_r: The original resource URI to refresh the TimeMap
        :param getter: The function to call to get a fresh TimeMap
        :param args: *getter* arguments
        :param kwargs: *getter* keywords arguments
        :return: The fresh TimeMap

        """
        timemap = parsed_request(getter, *args, **kwargs)
        # timemap,new_uri = parsed_request(getter, *args, **kwargs)
        # if new_uri:
        # uri_r = new_uri

        # Creates or refreshes the new timemap for that URI-R
        self._set(uri_r, timemap)
        return timemap

    def _set(self, uri_r, timemap):
        """Sets / refreshes the cached TimeMap for that URI-R. And appends it
        with a timestamp of when it is stored.

        :param uri_r: The URI-R of the original resource.
        :param timemap: The value to cache.
        :return: The backend setter method return value.
        """
        logging.info("Updating cache for %s" % uri_r)
        timestamp = timegate_utils.now()
        val = (timestamp, timemap)
        key = uri_r
        try:
            self.backend.set(key, val)
            if self.CHECK_SIZE:
                self._check_size(uri_r)
        except Exception as e:
            logging.error("Error setting cache value: %s" % e)

    def _check_size(self, key, delete=True):
        """Check the size that a specific TimeMap value is using on disk.

        It deletes if it is more than the maximum size.

        :param key: The TimeMap original resource.
        :param delete: (Optional) When true, the value is deleted.
        Else only a warning is raised.
        :return: The size of the value on disk (0 if it was deleted).
        """
        try:
            fname = md5(key).hexdigest()  # werkzeug key
            fpath = self.path + '/' + fname
            size = os.path.getsize(fpath)
            if size > self.max_file_size and delete:
                message = ("Cache value too big (%dB, max %dB) "
                           "for the TimeMap of %s")
                if delete:
                    message += ". Deleting cached value."
                    os.remove(fpath)
                    size = 0
                logging.warning(message % (size, self.max_file_size, key))
            return size
        except Exception as e:
            logging.error(
                "Exception checking cache value size for TimeMap of %s "
                "Exception: %s" % (key, e))
            return 0
Beispiel #33
0
class FileSystemSessionInterface(SessionInterface):
    """Uses the :class:`werkzeug.contrib.cache.FileSystemCache` as a session
    backend.

    .. versionadded:: 0.2
        The `use_signer` parameter was added.

    :param cache_dir: the directory where session files are stored.
    :param threshold: the maximum number of items the session stores before it
                      starts deleting some.
    :param mode: the file mode wanted for the session files, default 0600
    :param key_prefix: A prefix that is added to FileSystemCache store keys.
    :param use_signer: Whether to sign the session id cookie or not.
    :param permanent: Whether to use permanent session or not.
    """

    session_class = FileSystemSession

    def __init__(self,
                 cache_dir,
                 threshold,
                 mode,
                 key_prefix,
                 use_signer=False,
                 permanent=True):
        from werkzeug.contrib.cache import FileSystemCache
        self.cache = FileSystemCache(cache_dir, threshold=threshold, mode=mode)
        self.key_prefix = key_prefix
        self.use_signer = use_signer
        self.permanent = permanent

    def open_session(self, app, request):
        sid = request.cookies.get(app.session_cookie_name)
        if not sid:
            sid = self._generate_sid()
            return self.session_class(sid=sid, permanent=self.permanent)
        if self.use_signer:
            signer = self._get_signer(app)
            if signer is None:
                return None
            try:
                sid_as_bytes = signer.unsign(sid)
                sid = sid_as_bytes.decode()
            except BadSignature:
                sid = self._generate_sid()
                return self.session_class(sid=sid, permanent=self.permanent)

        data = self.cache.get(self.key_prefix + sid)
        if data is not None:
            return self.session_class(data, sid=sid)
        return self.session_class(sid=sid, permanent=self.permanent)

    def save_session(self, app, session, response):
        domain = self.get_cookie_domain(app)
        path = self.get_cookie_path(app)
        if not session:
            if session.modified:
                self.cache.delete(self.key_prefix + session.sid)
                response.delete_cookie(app.session_cookie_name,
                                       domain=domain,
                                       path=path)
            return

        httponly = self.get_cookie_httponly(app)
        secure = self.get_cookie_secure(app)
        expires = self.get_expiration_time(app, session)
        samesite = self.get_cookie_samesite(app)
        data = dict(session)
        self.cache.set(self.key_prefix + session.sid, data,
                       total_seconds(app.permanent_session_lifetime))
        if self.use_signer:
            session_id = self._get_signer(app).sign(want_bytes(session.sid))
        else:
            session_id = session.sid
        response.set_cookie(app.session_cookie_name,
                            session_id,
                            expires=expires,
                            httponly=httponly,
                            domain=domain,
                            path=path,
                            secure=secure,
                            samesite=samesite)
Beispiel #34
0
class Cache(object):
    """Cache module based on werkzeug.contrib.cache. This is a mixed
    version of NullCache, SimpleCache, FileSystemCache, MemcachedCache,
    and RedisCache.

    :param app: Flask app instance.
    :param config_prefix: Define a prefix for Flask app config.
    :param kwargs: Extra parameters.

    You need to configure a type of the cache, and its related configurations.
    The default ``config_prefix`` is ``AUTHLIB``, so it requires a config of::

        AUTHLIB_CACHE_TYPE = 'simple'

    If ``config_prefix`` is something else, like ``EXAMPLE``, it would be::

        EXAMPLE_CACHE_TYPE = 'simple'

    The available cache types are:

    * null: It will not cache anything. No configuration.

    * simple: It caches things in memory.
      The only configuration is ``threshold``::

          AUTHLIB_CACHE_THRESHOLD = 500

    * memcache: It caches things in Memcache. Available configurations::

          AUTHLIB_CACHE_MEMCACHED_SERVERS = []
          AUTHLIB_CACHE_KEY_PREFIX = None

    * redis: It caches things in Redis. Available configurations::

          AUTHLIB_CACHE_REDIS_HOST = 'localhost'
          AUTHLIB_CACHE_REDIS_PORT = 6379
          AUTHLIB_CACHE_REDIS_PASSWORD = None
          AUTHLIB_CACHE_REDIS_DB = 0
          AUTHLIB_CACHE_KEY_PREFIX = None

    * filesystem: It caches things in local filesystem. Available
      configurations::

          AUTHLIB_CACHE_DIR = ''  # required
          AUTHLIB_CACHE_THRESHOLD = 500
    """
    def __init__(self, app, config_prefix='AUTHLIB', **kwargs):
        deprecate(DEPRECATE_MESSAGE, 0.7)

        self.config_prefix = config_prefix
        self.config = app.config

        cache_type = self._config('type')
        kwargs.update(
            dict(default_timeout=self._config('DEFAULT_TIMEOUT', 100)))

        if cache_type == 'null':
            self.cache = NullCache()
        elif cache_type == 'simple':
            kwargs.update(dict(threshold=self._config('threshold', 500)))
            self.cache = SimpleCache(**kwargs)
        elif cache_type == 'memcache':
            kwargs.update(
                dict(
                    servers=self._config('MEMCACHED_SERVERS'),
                    key_prefix=self._config('KEY_PREFIX', None),
                ))
            self.cache = MemcachedCache(**kwargs)
        elif cache_type == 'redis':
            kwargs.update(
                dict(
                    host=self._config('REDIS_HOST', 'localhost'),
                    port=self._config('REDIS_PORT', 6379),
                    password=self._config('REDIS_PASSWORD', None),
                    db=self._config('REDIS_DB', 0),
                    key_prefix=self._config('KEY_PREFIX', None),
                ))
            self.cache = RedisCache(**kwargs)
        elif cache_type == 'filesystem':
            kwargs.update(dict(threshold=self._config('threshold', 500), ))
            self.cache = FileSystemCache(self._config('DIR'), **kwargs)
        else:
            raise RuntimeError('`%s` is not a valid cache type!' % cache_type)
        app.extensions[config_prefix.lower() + '_cache'] = self.cache

    def _config(self, key, default=_missing):
        key = key.upper()
        prior = '%s_CACHE_%s' % (self.config_prefix, key)
        if prior in self.config:
            return self.config[prior]
        fallback = 'CACHE_%s' % key
        if fallback in self.config:
            return self.config[fallback]
        if default is _missing:
            raise RuntimeError('%s is missing.' % prior)
        return default

    def get(self, key):
        """Look up key in the cache and return the value for it.

        :param key: the key to be looked up.
        :returns: The value if it exists and is readable, else ``None``.
        """
        return self.cache.get(key)

    def delete(self, key):
        """Delete `key` from the cache.

        :param key: the key to delete.
        :returns: Whether the key existed and has been deleted.
        """
        return self.cache.delete(key)

    def get_many(self, *keys):
        """Returns a list of values for the given keys.
        For each key a item in the list is created::

            foo, bar = cache.get_many("foo", "bar")

        Has the same error handling as :meth:`get`.

        :param keys: The function accepts multiple keys as positional
                     arguments.
        """
        return [self.cache.get(k) for k in keys]

    def get_dict(self, *keys):
        """Like :meth:`get_many` but return a dict::

            d = cache.get_dict("foo", "bar")
            foo = d["foo"]
            bar = d["bar"]

        :param keys: The function accepts multiple keys as positional
                     arguments.
        """
        return self.cache.get_dict(*keys)

    def set(self, key, value, timeout=None):
        """Add a new key/value to the cache (overwrites value, if key already
        exists in the cache).

        :param key: the key to set
        :param value: the value for the key
        :param timeout: the cache timeout for the key in seconds (if not
                        specified, it uses the default timeout). A timeout of
                        0 idicates that the cache never expires.
        :returns: ``True`` if key has been updated, ``False`` for backend
                  errors. Pickling errors, however, will raise a subclass of
                  ``pickle.PickleError``.
        """
        return self.cache.set(key, value, timeout)

    def add(self, key, value, timeout=None):
        """Works like :meth:`set` but does not overwrite the values of already
        existing keys.

        :param key: the key to set
        :param value: the value for the key
        :param timeout: the cache timeout for the key in seconds (if not
                        specified, it uses the default timeout). A timeout of
                        0 idicates that the cache never expires.
        :returns: Same as :meth:`set`, but also ``False`` for already
                  existing keys.
        """
        return self.cache.add(key, value, timeout)

    def set_many(self, mapping, timeout=None):
        """Sets multiple keys and values from a mapping.

        :param mapping: a mapping with the keys/values to set.
        :param timeout: the cache timeout for the key in seconds (if not
                        specified, it uses the default timeout). A timeout of
                        0 idicates that the cache never expires.
        :returns: Whether all given keys have been set.
        """
        return self.cache.set_many(mapping, timeout)

    def delete_many(self, *keys):
        """Deletes multiple keys at once.

        :param keys: The function accepts multiple keys as positional
                     arguments.
        :returns: Whether all given keys have been deleted.
        :rtype: boolean
        """
        return self.cache.delete_many(*keys)

    def has(self, key):
        """Checks if a key exists in the cache without returning it. This is a
        cheap operation that bypasses loading the actual data on the backend.

        This method is optional and may not be implemented on all caches.

        :param key: the key to check
        """
        return self.cache.has(key)

    def clear(self):
        """Clears the cache.  Keep in mind that not all caches support
        completely clearing the cache.

        :returns: Whether the cache has been cleared.
        """
        return self.cache.clear()

    def inc(self, key, delta=1):
        """Increments the value of a key by `delta`.  If the key does
        not yet exist it is initialized with `delta`.

        For supporting caches this is an atomic operation.

        :param key: the key to increment.
        :param delta: the delta to add.
        :returns: The new value or ``None`` for backend errors.
        """
        return self.cache.inc(key, delta=delta)

    def dec(self, key, delta=1):
        """Decrements the value of a key by `delta`.  If the key does
        not yet exist it is initialized with `-delta`.

        For supporting caches this is an atomic operation.

        :param key: the key to increment.
        :param delta: the delta to subtract.
        :returns: The new value or `None` for backend errors.
        """
        return self.cache.dec(key, delta=delta)
Beispiel #35
0
class Cache(object):
    """Base class for TimeGate caches."""
    def __init__(self,
                 path,
                 tolerance,
                 expiration,
                 max_values,
                 run_tests=True,
                 max_file_size=0):
        """Constructor method.

        :param path: The path of the cache database file.
        :param tolerance: The tolerance, in seconds to which a TimeMap is
        considered young enough to be used as is.
        :param expiration: How long, in seconds, the cache entries are stored
        every get will be a CACHE MISS.
        :param max_values: The maximum number of TimeMaps stored in cache
        before some are deleted
        :param run_tests: (Optional) Tests the cache at initialization.
        :param max_file_size: (Optional) The maximum size (in Bytes) for a
        TimeMap cache value. When max_file_size=0, there is no limit to
        a cache value. When max_file_size=X > 0, the cache will not
        store TimeMap that require more than X Bytes on disk.
        """
        # Parameters Check
        if tolerance <= 0 or expiration <= 0 or max_values <= 0:
            raise CacheError('Cannot create cache: all parameters must be > 0')

        self.tolerance = relativedelta(seconds=tolerance)
        self.path = path.rstrip('/')
        self.max_file_size = max(max_file_size, 0)
        self.CHECK_SIZE = self.max_file_size > 0
        self.max_values = max_values
        self.backend = FileSystemCache(path,
                                       threshold=self.max_values,
                                       default_timeout=expiration)

        # Testing cache
        if run_tests:
            try:
                key = b'1'
                val = 1
                self.backend.set(key, val)
                assert (not self.CHECK_SIZE) or self._check_size(key) > 0
                assert self.backend.get(key) == val
                os.remove(os.path.join(self.path, md5(key).hexdigest()))
            except Exception as e:
                raise CacheError('Error testing cache: %s' % e)

        logging.debug('Cache created. max_files = %d. Expiration = %d. '
                      'max_file_size = %d' %
                      (self.max_values, expiration, self.max_file_size))

    def get_until(self, uri_r, date):
        """Returns the TimeMap (memento,datetime)-list for the requested
        Memento. The TimeMap is guaranteed to span at least until the 'date'
        parameter, within the tolerance.

        :param uri_r: The URI-R of the resource as a string.
        :param date: The target date. It is the accept-datetime for TimeGate
        requests, and the current date. The cache will return all
        Mementos prior to this date (within cache.tolerance parameter)
        :return: [(memento_uri_string, datetime_obj),...] list if it is
        in cache and if it is within the cache tolerance for *date*,
        None otherwise.
        """
        # Query the backend for stored cache values to that memento
        key = uri_r
        try:
            val = self.backend.get(key)
        except Exception as e:
            logging.error('Exception loading cache content: %s' % e)
            return None

        if val:
            # There is a value in the cache
            timestamp, timemap = val
            logging.info('Cached value exists for %s' % uri_r)
            if date > timestamp + self.tolerance:
                logging.info('Cache MISS: value outdated for %s' % uri_r)
                timemap = None
            else:
                logging.info('Cache HIT: found value for %s' % uri_r)
        else:
            # Cache MISS: No value
            logging.info('Cache MISS: No cached value for %s' % uri_r)
            timemap = None

        return timemap

    def get_all(self, uri_r):
        """Request the whole TimeMap for that uri.

        :param uri_r: the URI-R of the resource.
        :return: [(memento_uri_string, datetime_obj),...] list if it is in
        cache and if it is within the cache tolerance, None otherwise.
        """
        until = datetime.utcnow().replace(tzinfo=tzutc())
        return self.get_until(uri_r, until)

    def set(self, uri_r, timemap):
        """Set the cached TimeMap for that URI-R.

        It appends it with a timestamp of when it is stored.

        :param uri_r: The URI-R of the original resource.
        :param timemap: The value to cache.
        :return: The backend setter method return value.
        """
        logging.info('Updating cache for %s' % uri_r)
        timestamp = datetime.utcnow().replace(tzinfo=tzutc())
        val = (timestamp, timemap)
        key = uri_r
        try:
            self.backend.set(key, val)
            if self.CHECK_SIZE:
                self._check_size(uri_r)
        except Exception as e:
            logging.error('Error setting cache value: %s' % e)

    def _check_size(self, key, delete=True):
        """Check the size that a specific TimeMap value is using on disk.

        It deletes if it is more than the maximum size.

        :param key: The TimeMap original resource.
        :param delete: (Optional) When true, the value is deleted.
        Else only a warning is raised.
        :return: The size of the value on disk (0 if it was deleted).
        """
        try:
            fname = md5(key).hexdigest()  # werkzeug key
            fpath = self.path + '/' + fname
            size = os.path.getsize(fpath)
            if size > self.max_file_size and delete:
                message = ('Cache value too big (%dB, max %dB) '
                           'for the TimeMap of %s')
                if delete:
                    message += '. Deleting cached value.'
                    os.remove(fpath)
                    size = 0
                logging.warning(message % (size, self.max_file_size, key))
            return size
        except Exception as e:
            logging.error(
                'Exception checking cache value size for TimeMap of %s '
                'Exception: %s' % (key, e))
            return 0