Ejemplo n.º 1
0
 def test_database_type(self):
     param = {}
     # test wrong database type
     with self.assertRaises(ValueError):
         db = NoSQL("rdeis", param)
         db = NoSQL("Redis", param)
         db = NoSQL("BDb", param)
     # test right database type
     try:
         db = NoSQL("redis", param)
     except Exception:
         self.fail("Correct database name:redis, unexpected exceptions")
Ejemplo n.º 2
0
def UrlChecker(job, param, headers):
    if "last-modified" in headers:
        last_update = eut.parsedate(headers["last-modified"])
    else:
        last_update = eut.parsedate(strftime("%a, %d %b %Y %H:%M:%S GMT",
                                             gmtime()))
    db = NoSQL(param["database"]["engine"],
               {"host": param["database"]["host"],
                "port": param["database"]["port"],
                "db": param["database"]["db"]["urlcache"]})
    result_str = db.get(job.identifier)
    # For the url that has never been cached before or deleted by LRU
    if result_str is None:
        result = {
            "last-modified": last_update,
            "url": job.url,
        }
        # TODO: shouldn't pickle at this levl
        db.set(job.identifier, pk.dumps(result))
        return False, result
    result = pk.loads(result_str)
    # For the urls that is not cached but has the same identifer
    if result["url"] != job.url:
        result["url"] = job.url
        result["last-modified"] = last_update
        db.set(job.identifier, pk.dumps(result))
        return False, result
    cached_date = result["last-modified"]
    hour_diff = (mktime(last_update) - mktime(cached_date))/3600
    if hour_diff >= param["crawlperiod"]:
        result["last-modified"] = last_update
        db.set(job.identifier, pk.dumps(result))
        return False, result
    return True, result
Ejemplo n.º 3
0
def UrlChecker(job, param, headers):
    if "last-modified" in headers:
        last_update = eut.parsedate(headers["last-modified"])
    else:
        last_update = eut.parsedate(
            strftime("%a, %d %b %Y %H:%M:%S GMT", gmtime()))
    db = NoSQL(
        param["database"]["engine"], {
            "host": param["database"]["host"],
            "port": param["database"]["port"],
            "db": param["database"]["db"]["urlcache"]
        })
    result_str = db.get(job.identifier)
    # For the url that has never been cached before or deleted by LRU
    if result_str is None:
        result = {
            "last-modified": last_update,
            "url": job.url,
        }
        # TODO: shouldn't pickle at this levl
        db.set(job.identifier, pk.dumps(result))
        return False, result
    result = pk.loads(result_str)
    # For the urls that is not cached but has the same identifer
    if result["url"] != job.url:
        result["url"] = job.url
        result["last-modified"] = last_update
        db.set(job.identifier, pk.dumps(result))
        return False, result
    cached_date = result["last-modified"]
    hour_diff = (mktime(last_update) - mktime(cached_date)) / 3600
    if hour_diff >= param["crawlperiod"]:
        result["last-modified"] = last_update
        db.set(job.identifier, pk.dumps(result))
        return False, result
    return True, result
Ejemplo n.º 4
0
 def test_redis_init(self):
     # test bad init parameters
     param = {"host": "localhost", "port": 6379, "db": 0}
     try:
         db = NoSQL("redis", param)
         db.set("foo", "bar")
     except Exception:  # TODO: Change this to dictionary no key exception
         self.fail("Unexpected redis NoSQL instance init exception")
     param = {"host": "localhost", "port": "6379", "db": 0}
     try:
         db = NoSQL("redis", param)
         db.set("foo", "bar")
     except Exception:
         self.fail("Unexpected redis NoSQL instance init exception")
     with self.assertRaises(redis.ConnectionError):
         # Port number cannot be string
         param = {"host": "localhost", "port": "6379", "db": "0"}
         db = NoSQL("redis", param)
         db.set("foo", "bar")
         param = {"host": "big", "port": "6379", "db": 0}
         db = NoSQL("redis", param)
         db.set("foo", "bar")
         param = {"host": "localhost", "port": "dog", "db": 0}
         db = NoSQL("redis", param)
         db.set("foo", "bar")
         param = {"host": "localhost", "port": "6379", "db": "5f"}
         db = NoSQL("redis", param)
         db.set("foo", "bar")
         param = {"host": "localhost", "port": "6d379", "db": 0}
         db = NoSQL("redis", param)
         db.set("foo", "bar")
Ejemplo n.º 5
0
def worker6(value):
    param = {}
    param["db"] = value
    db = NoSQL("redis", param)
    db.get("foo")
Ejemplo n.º 6
0
def worker5(value):
    param = {}
    param["db"] = value
    db = NoSQL("redis", param)
    db.set("foo", "bar")
Ejemplo n.º 7
0
def worker4(value):
    param = {}
    db = NoSQL("redis", param)
    db.get("foo")
Ejemplo n.º 8
0
def worker3(pairs):
    param = {}
    db = NoSQL("redis", param)
    db.get(pairs[0])
Ejemplo n.º 9
0
def worker2(value):
    param = {}
    db = NoSQL("redis", param)
    db.set("foo", value)
Ejemplo n.º 10
0
def worker3(pairs):
    param = {}
    db = NoSQL("redis", param)
    db.get(pairs[0])
Ejemplo n.º 11
0
def worker(pairs):
    param = {}
    db = NoSQL("redis", param)
    db.set(pairs[0], pairs[1])
Ejemplo n.º 12
0
 def test_redis_init(self):
     # test bad init parameters
     param = {"host": "localhost", "port": 6379, "db": 0}
     try:
         db = NoSQL("redis", param)
         db.set("foo", "bar")
     except Exception:  # TODO: Change this to dictionary no key exception
         self.fail("Unexpected redis NoSQL instance init exception")
     param = {"host": "localhost", "port": "6379", "db": 0}
     try:
         db = NoSQL("redis", param)
         db.set("foo", "bar")
     except Exception:
         self.fail("Unexpected redis NoSQL instance init exception")
     with self.assertRaises(redis.ConnectionError):
         # Port number cannot be string
         param = {"host": "localhost", "port": "6379", "db": "0"}
         db = NoSQL("redis", param)
         db.set("foo", "bar")
         param = {"host": "big", "port": "6379", "db": 0}
         db = NoSQL("redis", param)
         db.set("foo", "bar")
         param = {"host": "localhost", "port": "dog", "db": 0}
         db = NoSQL("redis", param)
         db.set("foo", "bar")
         param = {"host": "localhost", "port": "6379", "db": "5f"}
         db = NoSQL("redis", param)
         db.set("foo", "bar")
         param = {"host": "localhost", "port": "6d379", "db": 0}
         db = NoSQL("redis", param)
         db.set("foo", "bar")
Ejemplo n.º 13
0
def worker6(value):
    param = {}
    param["db"] = value
    db = NoSQL("redis", param)
    db.get("foo")
Ejemplo n.º 14
0
def worker5(value):
    param = {}
    param["db"] = value
    db = NoSQL("redis", param)
    db.set("foo", "bar")
Ejemplo n.º 15
0
def worker4(value):
    param = {}
    db = NoSQL("redis", param)
    db.get("foo")
Ejemplo n.º 16
0
def worker(pairs):
    param = {}
    db = NoSQL("redis", param)
    db.set(pairs[0], pairs[1])
Ejemplo n.º 17
0
def worker2(value):
    param = {}
    db = NoSQL("redis", param)
    db.set("foo", value)