Esempio n. 1
0
def find_and_modify_url_info_md5(url, md5_hash):
    """
    Enabled cache
    update md5 fields
    """

    update_map = {"md5" : md5_hash}
    inc_map = None
    fields = ["md5"]

    url_info = UrlCacheClient.find_and_modify_url_info_by_not_md5(url, md5_hash, update_map, inc_map, fields)

    cond = default_cond(url)
    cond["md5"] = {"$ne" : md5_hash}

    if url_info is None:
        url_info =  _cond_update_url_info(cond, update_map, inc_map, fields)
    elif url_info == False:
        url_info = None
    else:
        _async_update_url_info(cond, update_map, inc_map)

    if url_info is None:
        return 0 #duplicate md5
    elif url_info["md5"] is not None:
        return 1 #md5 changed
    else:
        return 2 #first md5
Esempio n. 2
0
def update_url_info(url, update_map, inc_map = None):
    """
    Enabled cache
    """

    UrlCacheClient.update_url_info(url, update_map, inc_map)
    cond = default_cond(url)
    _async_update_url_info(cond, update_map, inc_map)
Esempio n. 3
0
def get_url_info(url, fields):
    """
    Enabled cache
    """
    url_info = UrlCacheClient.get_url_info(url, fields)
    if url_info is not None:
        return url_info
    else:
        return _cond_get_url_info(default_cond(url), fields)
Esempio n. 4
0
def update_url_info_by_status(url, crawl_status, update_map, inc_map = None):
    """
    Enabled cache
    """

    success = UrlCacheClient.update_url_info_by_status(url, crawl_status, update_map, inc_map)
    cond = default_cond(url)
    cond["crawl_status"] = crawl_status
    _async_update_url_info(cond, update_map, inc_map)
Esempio n. 5
0
def find_and_modify_url_info(url, update_map, inc_map, fields):
    """
    Enabled cache
    """

    cond = default_cond(url)
    url_info = UrlCacheClient.find_and_modify_url_info(url, update_map, inc_map, fields)
    if url_info is None:
        return _cond_update_url_info(cond, update_map, inc_map, fields)
    else:
        _async_update_url_info(cond, update_map, inc_map)
        return url_info
Esempio n. 6
0
def get_url_info_by_status(url, crawl_status, fields):
    """
    Enabled cache
    """
    url_info = UrlCacheClient.get_url_info_by_status(url, crawl_status, fields)
    if url_info is None:
        cond = default_cond(url)
        cond["crawl_status"] = crawl_status
        return _cond_get_url_info(cond, fields)
    elif url_info == False:
        return None
    else:
        return url_info
Esempio n. 7
0
def find_and_modify_url_info_by_status(url, crawl_status, update_map, inc_map, fields):
    """
    Enabled cache
    """

    cond = default_cond(url)
    cond["crawl_status"] = crawl_status
    url_info = UrlCacheClient.find_and_modify_url_info_by_status(url, crawl_status, update_map, inc_map, fields)
    if url_info is None:
        return _cond_update_url_info(cond, update_map, inc_map, fields)
    elif url_info == False:
        return None
    else:
        _async_update_url_info(cond, update_map, inc_map)
        return url_info
Esempio n. 8
0
def get_url_info_meta(url, fields):
    cond = default_cond(url)
    fields = make_fields(fields)
    return _cond_get_url_info_meta(cond, fields)
Esempio n. 9
0
def update_raw_doc(url, update_map):
    db.rawDocs.update(default_cond(url), update_map)
Esempio n. 10
0
def remove_redirect_url(url):
    _db.urlRedirects.remove(default_cond(url))
Esempio n. 11
0
def get_redirect_url(url):
    redirect_info = _db.urlRedirects.find_one(default_cond(url), fields={"redirect_url" : 1})
    return redirect_info["redirect_url"] if redirect_info is not None else None
Esempio n. 12
0
def update_result(url, update):
    cond = default_cond(url)
    update = {'$set':update}
    _db.results.update(cond, update)