Exemple #1
0
def get_global_meta_from_redis():
    global _meta_data, _meta_last_load_time
    reload = False
    if _meta_data is None or _meta_last_load_time is None:
        reload = True
    elif time.time() > _meta_last_load_time + 10 * 60:  # 10 minute
        reload = True
    if reload:
        _meta_data = json.loads(get_redis_cli().get(META_KEY).decode("utf8"))
        _meta_last_load_time = time.time()
    return _meta_data
Exemple #2
0
def get_unfinished_images():
    """
    获取全部未完成下载图片的生成器
    :return:
    """
    redis_cli = get_redis_cli()
    all_groups = json.loads(redis_cli.get(META_KEY).decode("utf8"))
    for group in all_groups:
        group_id = group["id"]
        all_images = json.loads(
            redis_cli.get(KEY_TEMPLATE.format(idx=group_id)).decode("utf8"))
        yield from (img_info for img_info in all_images
                    if img_info["nos_key"] == "")
Exemple #3
0
def revise_meta(group_id, img_id, key):
    """
    填充 nos_key, nos_url
    :param group_id:
    :param img_id:
    :return:
    """
    LOCK.acquire()
    redis_cli = get_redis_cli()
    img_info_list = json.loads(
        redis_cli.get(KEY_TEMPLATE.format(idx=group_id)).decode("utf8"))
    for info in img_info_list:
        if info["id"] == img_id:
            info["nos_key"] = key
            break
    redis_cli.set(KEY_TEMPLATE.format(idx=group_id), json.dumps(img_info_list))
    LOCK.release()
def single_spide_group(group_id):
    time.sleep(random.random() / 10)
    meta_logger.info("{}: 图片组信息下载".format(group_id))
    inner_group_info_url = INNER_GROUP_INFO_URL.format(index=group_id)

    redis_cli = get_redis_cli()
    group_key = "group:{}".format(group_id)

    origin_group_info_str = redis_cli.get(group_key)
    origin_group_info_str = b"[]" if origin_group_info_str is None else origin_group_info_str
    origin_group_info = json.loads(origin_group_info_str.decode("utf8"))  # 原 meta 数据
    meta_logger.info("{}: 原图片数量: {}".format(group_id, len(origin_group_info)))
    origin_img_index_set = {img_info["id"] for img_info in origin_group_info}  # 原来组内所有图片的 id

    try:
        remote_group_info = json.loads(
            proxy_request_get(inner_group_info_url).content.decode("utf8")
        )["info"]
    except:
        meta_logger.error("{}: 退出: 下载图片组信息失败".format(group_id))
        meta_logger.error(traceback.format_exc())
        return False
    else:  # 爬取成功
        counter = 0
        for group_info in remote_group_info:
            if group_info["id"] not in origin_img_index_set:  # 不在原来的数据里,添加
                new_image_info = {
                    "id": group_info["id"],
                    "group_id": group_id,
                    "origin_url": group_info["url"],
                    "nos_key": "",
                    "nos_url": "",
                    "local_file_path": "",
                    "file_name": str(group_info["url"]).split("/")[-1]
                }
                origin_group_info = [new_image_info] + origin_group_info
                counter += 1
        redis_cli.set(group_key, json.dumps(origin_group_info).encode("utf8"))
        meta_logger.info("{}: 退出: 新增图片 {} 张".format(group_id, counter))
    return True
def spide_group_meta():
    """
    获取全部图片组 meta 信息,并将其存入数据库
    """
    meta_logger.info("下载图片组信息")
    redis_cli = get_redis_cli()

    origin_meta_str = redis_cli.get("group:meta")
    origin_meta_str = b"[]" if origin_meta_str is None else origin_meta_str
    origin_meta = json.loads(origin_meta_str.decode("utf8"))  # 原 meta 数据
    meta_logger.info("原组长: {}".format(len(origin_meta)))
    origin_group_index_set = {meta["id"] for meta in origin_meta}  # 原来所有图片组的 id 集合

    try:
        remote_meta = json.loads(
            proxy_request_get(GROUP_INFO_URL).content.decode("utf8")
        )["indexes"]
    except:
        meta_logger.error('退出: 下载"全部"图片组信息失败')
        meta_logger.error(traceback.format_exc())
    else:  # 爬取成功
        counter = 0
        for meta in remote_meta:
            if meta["index"] not in origin_group_index_set:  # 不在原来的数据里,添加
                new_meta_unit = {
                    "id": meta["index"],
                    "group_name": meta["des"],
                    "origin_url": meta["url"],
                    "nos_key": "",
                    "nos_url": "",
                    "local_file_path": ""
                }
                origin_meta = [new_meta_unit] + origin_meta
                counter += 1
        redis_cli.set("group:meta", json.dumps(origin_meta).encode("utf8"))
        meta_logger.info("退出: 新增图片组信息 {} 组".format(counter))
    return origin_meta
Exemple #6
0
def get_group_meta_from_redis(group_id):
    group_key = "group:{}".format(group_id)
    group_meta = json.loads(get_redis_cli().get(group_key).decode("utf8"))
    return group_meta