def callback_read(self, query :Query) -> tuple: data = None cache_filename = self.make_cache_filename(query) if self.is_cached(query): with open(cache_filename, self.read_mode) as f: data = self.callback_load(f) Log.debug("Cache hit: [%s]" % cache_filename) return data
def read(self, query: Query): (data, success) = (None, False) try: data = self.callback_read(query) success = (data != None) except: Log.error("CacheConnector.read(%s): Cannot read cache:\n%s" % (query, traceback.format_exc())) return (data, success)
def write(self, query: Query, data) -> bool: success = True try: self.callback_write(query, data) except: Log.error("CacheConnector.write(%s, %s): Cannot write cache:\n%s" % (traceback.format_exc(), query, data)) success = False return success
def check_base_dir(cache_connectors: list, dummy_cache_connectors: list = list()): query = Query() for cache_connector in cache_connectors: cache_filename = cache_connector.make_cache_filename(query) Log.debug(cache_filename) assert cache_filename.startswith(DEFAULT_CACHE_STORAGE_BASE_DIR) for dummy_cache_connector in dummy_cache_connectors: cache_filename = dummy_cache_connector.make_cache_filename(query) Log.debug(cache_filename) assert cache_filename.startswith(DUMMY_BASE_DIR)
def query(self, query :Query): (data, success) = (None, False) if self.is_cached(query): (data, success) = self.read(query) if not success: Log.warning("CacheConnector.query(%s): Unreadable cache" % query) if not success: data = self.child.query(query) if query.action == ACTION_READ and self.is_cachable(query, data): self.write(query, data) return self.answer(query, data)
def _country_code_to_name(country_code :str) -> str: # For obsolete version of pycountry Log.warning("Please update python3-pycountry; apt-get update && apt-get upgrade") ret = None try: country = pycountry.countries.get(alpha_2 = country_code.upper()) ret = country.name except KeyError as e: Log.warning("Unknown country %r" % country_code) return ret
def country_code_to_name(country_code :str) -> str: ret = None try: country = pycountry.countries.get(alpha_2 = country_code.upper()) ret = country.name except KeyError as e: if "%s" % e == "alpha_2": ret = _country_code_to_name(country_code) else: Log.warning("Unknown country %r" % country_code) return ret
def test_minifold_config_loads(): config = Config() config.loads(DEFAULT_MINIFOLD_CONFIG) k1 = "dblp:dagstuhl" k2 = "dblp:uni-trier" assert set(config.keys()) == {k1, k2} from minifold.dblp import DblpConnector Log.debug(Connector.subclasses) dblp1 = config.make_connector(k1) assert isinstance(dblp1, DblpConnector) dblp2 = config.make_connector(k2) assert isinstance(dblp2, DblpConnector)
def test_offset_limit(): entries_connector = EntriesConnector(ENTRIES) attributes = ["a", "b", "c"] for offset in range(len(ENTRIES)): for limit in range(len(ENTRIES)): q = Query(attributes=attributes, offset=offset, limit=limit) Log.debug(q) result = entries_connector.query(q) Log.debug(pformat(result)) assert len(result) == min(limit, len(ENTRIES) - offset),\ "Invalid #entries for %s:\n%s" % (str(q), pformat(result)) expected = [ {k : entry.get(k) for k in attributes} \ for entry in ENTRIES[offset : offset + limit] ] assert result == expected, """ Got : %s\n Expected : %s\n """ % (result, expected)
def test_offset_limit(): for cache_connector in CACHE_CONNECTORS: attributes = ("a", "b", "c") for offset in range(len(ENTRIES)): for limit in range(len(ENTRIES)): query = Query(attributes=attributes, offset=offset, limit=limit) Log.info(query) result = cache_connector.query(query) Log.info(pformat(result)) assert len(result) == min(limit, len(ENTRIES) - offset),\ "Invalid #entries for %s:\n%s" % (str(query), pformat(result)) expected = [ {k : entry.get(k) for k in attributes} \ for entry in ENTRIES[offset : offset + limit] ] assert result == expected, """ Got : %s\n Expected : %s\n """ % (result, expected)
def test_cache_rebase(): DUMMY_BASE_DIR = "/tmp/.minifold" def check_base_dir(cache_connectors: list, dummy_cache_connectors: list = list()): query = Query() for cache_connector in cache_connectors: cache_filename = cache_connector.make_cache_filename(query) Log.debug(cache_filename) assert cache_filename.startswith(DEFAULT_CACHE_STORAGE_BASE_DIR) for dummy_cache_connector in dummy_cache_connectors: cache_filename = dummy_cache_connector.make_cache_filename(query) Log.debug(cache_filename) assert cache_filename.startswith(DUMMY_BASE_DIR) # CACHE_CONNECTORS should be stored in DEFAULT_CACHE_STORAGE_BASE_DIR. check_base_dir(CACHE_CONNECTORS, []) # We now rebase the default cache directory to DUMMY_BASE_DIR. # Caches newly created should be stored in DUMMY_BASE_DIR but the caches # previously created should remain in their place. Log.info("Setting StorageCacheConnector.base_dir to [%s]" % DUMMY_BASE_DIR) StorageCacheConnector.base_dir = DUMMY_BASE_DIR dummy_cache_connectors = [ cls(EntriesConnector(ENTRIES)) \ for cls in STORAGE_CONNECTOR_CLASSES ] check_base_dir(CACHE_CONNECTORS, dummy_cache_connectors) # We now rebase the default cache directory the standard cache directory. # Caches newly created should be stored in DEFAULT_CACHE_STORAGE_BASE_DIR but the caches # previously created should remain in their place. Log.info("Setting StorageCacheConnector.base_dir to [%s]" % DEFAULT_CACHE_STORAGE_BASE_DIR) StorageCacheConnector.base_dir = DEFAULT_CACHE_STORAGE_BASE_DIR check_base_dir(CACHE_CONNECTORS, dummy_cache_connectors)
def tweet_to_dict(tweet): # Fetch tweet content = None try: content = tweet._json["full_text"] except: content = tweet._json["text"] if content: content = content.split(" https://t.co/")[0] # Get tweet image image = None try: image = tweet._json["entities"]["media"][0]["media_url"] except: # Try to get image from youtube, e.g: https://img.youtube.com/vi/iZCyv0e2RLM/1.jpg try: if "youtu" in tweet._json["entities"]["urls"][0]["display_url"]: image = "https://img.youtube.com/vi/%s/1.jpg" % tweet._json[ "entities"]["urls"][0]["display_url"].split("/")[-1] except: Log.warning("No image found for tweet %s" % content) # Get author name author_name = None try: author_name = tweet._json["user"]["name"] except: Log.warning("No author name found for tweet %s" % content) # Get author image author_image = None try: author_image = tweet._json["user"]["profile_image_url"] except: Log.warning("No author image found for tweet %s" % content) # Get tweet date date_list = tweet._json["created_at"].split() display_date = "%(m)s %(d)s, %(y)s" % { "d": date_list[2], "m": date_list[1], "y": date_list[5] } date = datetime.datetime.strptime(display_date, "%b %d, %Y") display_date = date.strftime("%d/%m/%Y") # Return entry return { "text": content, "image": image, "date": date, "display_date": display_date, "author_name": author_name, "author_image": author_image, }
__author__ = "Marc-Olivier Buob" __maintainer__ = "Marc-Olivier Buob" __email__ = "*****@*****.**" __copyright__ = "Copyright (C) 2018, Nokia" __license__ = "BSD-3" from minifold.log import Log try: import pycountry except ImportError as e: from .log import Log Log.warning( "\n".join([ "Please install requests", " APT: sudo apt install python3-pycountry", " PIP: sudo pip3 install --upgrade pycountry", ]) ) raise e def _country_code_to_name(country_code :str) -> str: # For obsolete version of pycountry Log.warning("Please update python3-pycountry; apt-get update && apt-get upgrade") ret = None try: country = pycountry.countries.get(alpha_2 = country_code.upper()) ret = country.name except KeyError as e:
def clear_cache(self): if os.path.exists(self.cache_dir) and os.path.isdir(self.cache_dir): Log.debug("StorageCacheConnector: Removing cache [%s]" % self.cache_dir) rm(self.cache_dir, recursive=True)
def clear_query(self, query :Query): cache_filename = self.make_cache_filename(query) if os.path.exists(cache_filename): Log.debug("StorageCacheConnector: Removing query [%s]" % cache_filename) rm(cache_filename)
# # This file is part of the minifold project. # https://github.com/nokia/minifold __author__ = "Marc-Olivier Buob" __maintainer__ = "Marc-Olivier Buob" __email__ = "*****@*****.**" __copyright__ = "Copyright (C) 2018, Nokia" __license__ = "BSD-3" from minifold.log import Log try: import pycountry except ImportError: Log.warning("Please install pycountry: apt-get install python3-pycountry") def _country_code_to_name(country_code :str) -> str: # For obsolete version of pycountry Log.warning("Please update python3-pycountry; apt-get update && apt-get upgrade") ret = None try: country = pycountry.countries.get(alpha_2 = country_code.upper()) ret = country.name except KeyError as e: Log.warning("Unknown country %r" % country_code) return ret
def __init__( self, load_entries, cache_filename :str, load_cache, save_cache, read_mode, write_mode, with_cache :bool = True ): loaded_from_cache = False if with_cache: try: with open(cache_filename, read_mode) as f: Log.info("%s: Loading cache from [%s]" % (type(self), cache_filename)) entries = load_cache(f) Log.info("Loaded %d entries" % len(entries)) loaded_from_cache = True except FileNotFoundError: Log.debug("%s: Cache [%s] not found" % (type(self), cache_filename)) pass except Exception as e: Log.debug("%s: Cache [%s] corrupted" % (type(self), cache_filename)) Log.error(e) pass # Parse the input data (if needed) if not loaded_from_cache: entries = load_entries() Log.info("Loaded %d entries" % len(entries)) # Save into cache (if needed) if with_cache and not loaded_from_cache: Log.info("%s: Saving data into cache [%s]" % (type(self), cache_filename)) mkdir(os.path.dirname(cache_filename)) with open(cache_filename, write_mode) as f: save_cache(entries, f) super().__init__(entries)
def error(self, message): Log.error(message)
def test_query_select_where(): query = Query(attributes=["a", "c", "d"], filters=BinaryPredicate(BinaryPredicate("a", "<=", 100), "&&", BinaryPredicate("b", ">", 20))) expected = [{ "a": 100, "c": 300, "d": None }, { "a": 100, "c": None, "d": 400 }] for cache_connector in CACHE_CONNECTORS: Log.info("Clearing cache" + ("-" * 80)) cache_connector.clear_query(query) Log.info("Non-cached query" + ("-" * 80)) Log.info("Check if not cached") assert cache_connector.is_cached(query) == False Log.info("Query") result = cache_connector.query(query) assert result == expected Log.info("Cached query" + ("-" * 80)) Log.info("Check if cached") assert cache_connector.is_cached(query) == True Log.info("Query") result = cache_connector.query(query) assert result == expected
__author__ = "Marc-Olivier Buob" __maintainer__ = "Marc-Olivier Buob" __email__ = "*****@*****.**" __copyright__ = "Copyright (C) 2018, Nokia" __license__ = "BSD-3" import os, sys from minifold.cache import DEFAULT_CACHE_STORAGE_BASE_DIR, DEFAULT_CACHE_STORAGE_LIFETIME from minifold.filesystem import check_writable_directory, mkdir try: import requests_cache except ImportError as e: from minifold.log import Log Log.warning("Please install requests-cache.\n" " APT: sudo apt install python3-requests-cache\n" " PIP: sudo pip3 install --upgrade requests-cache\n") raise e def install_cache(cache_filename: str = None): if not cache_filename: directory = DEFAULT_CACHE_STORAGE_BASE_DIR cache_filename = os.path.join(DEFAULT_CACHE_STORAGE_BASE_DIR, "requests_cache") else: directory = os.path.dirname(cache_filename) mkdir(directory) check_writable_directory(directory) requests_cache.install_cache(cache_filename, expire_after=DEFAULT_CACHE_STORAGE_LIFETIME)