Esempio n. 1
0
 def update_text(**kw):
     update_doc = {
         "name": name,
         "description": desc,
         "priority": priority,
     }
     try:
         mongo_t.db.spider_sites.update({"domain": domain},
                                        {"$set": update_doc})
         return "OK"
     except Exception as e:
         logging.warning(e)
         return jsonify(JSON_SERVER_ERROR)
Esempio n. 2
0
def remove_channel(**kwargs):
    """
    移除二级数据源接口(同时会删除相应配置)
    :return:
    """
    cid = request.args.get("id", "").strip()
    try:
        mongo_t.db.qidian_map.remove({"channel": cid}, multi=True)
        mongo_t.db.spider_configs.remove({"channel": cid}, multi=True)
        mongo_t.db.spider_channels.remove({"_id": ObjectId(cid)})
    except Exception as e:
        logging.warning(e)
        return jsonify(JSON_SERVER_ERROR)
    return jsonify(JSON_SUCCESS)
Esempio n. 3
0
 def insert_file(**kw):
     icon_file = request.files.get("icon-file")
     if icon_file:
         file_mime = icon_file.mimetype
         icon_url = upload_file_to_oss(file_mime, icon_file.read())
     else:
         icon_url = request.form.get("icon-url", "")
     try:
         mongo_t.db.spider_sites.update({"domain": domain},
                                        {"$set": {
                                            "icon": icon_url
                                        }})
         return "OK"
     except Exception as e:
         logging.warning(e)
         return jsonify(JSON_SERVER_ERROR)
Esempio n. 4
0
def upload_file_to_oss(file_mime, data):
    """上传图片到oss"""
    if not is_allowed(file_mime):
        return False
    target_name = str(uuid.uuid1().hex) + ".jpg"
    try:
        respond = bucket.put_object(target_name, data)
    except Exception as e:
        logging.warning(e)
        logging.warning("upload image exception")
        return False
    if respond.status != 200:
        logging.info("upload image to oss error: %s" % respond.status)
        return False
    pic_url = 'https://oss-cn-hangzhou.aliyuncs.com/bdp-images/' + target_name
    return pic_url
Esempio n. 5
0
def add_filter_rules(**kwargs):
    string = request.form.get("words", "").strip()
    channel = request.form.get("channel", "").strip()
    words = string.split(",")
    if not words or not channel:
        return jsonify(JSON_FIELD_LACK)
    first_channel_id = int(channel)
    result = list()
    for word in words:
        word = word.strip()
        if len(word) < 2:
            continue
        doc = {"word": word, "chid": first_channel_id}
        try:
            mongo_t.db["qidian_filter_rule"].insert(doc)
        except Exception as e:
            logging.warning(e.message)
        else:
            doc["_id"] = str(doc["_id"])
            result.append(doc)
    return jsonify(result)
Esempio n. 6
0
 def is_login(cls):
     """根据cookie判断是否登陆"""
     result = False
     token = request.cookies.get('token')
     if not token:
         token = request.form.get("token")
     if not token:
         return False
     token_s = Serializer(SECRET_KEY)
     try:
         ver_info = token_s.loads(token)
         user = ver_info.get("name", "")
         pw = ver_info.get("pw", "")
         result = cls.verify_password(user, pw)
     except SignatureExpired as e:
         logging.warning(e)
         result = False
     except BadSignature as e:
         logging.warning(e)
         result = False
     except Exception as e:
         logging.warning(e)
         result = False
     return result
Esempio n. 7
0
def modify_channel(**kwargs):
    """
    修改二级频道接口(网站频道、微信公众号etc)
    :return:
    """
    cid = request.form.get("cid", "").strip()
    cname = request.form.get("cname", "").strip()
    media_type = request.form.get("media_type", "news").strip()
    priority = int(request.form.get("priority", 5))
    cate1 = request.form.get("cate1")
    cate2 = request.form.get("cate2")
    qd_cate1 = int(request.form.get("qd_cate1", 0))
    qd_cate2 = int(request.form.get("qd_cate2", 0))
    target_type = request.form.getlist("target_type")
    target_type = "#".join(target_type)
    if cate1 == "0":
        cate1 = ""
        cate2 = ""
    elif cate2 == "0":
        cate2 = ""
    if qd_cate1 == 0:
        qd_cate1 = None
        qd_cate2 = None
    elif qd_cate2 == 0:
        qd_cate2 = None
    map_info = {
        "first_cid": qd_cate1,
        "second_cid": qd_cate2,
        "type": target_type
    }

    desc = request.form.get("desc", "").strip()
    crawler = request.form.get("crawler", "").strip()
    url = request.form.get("url", "")
    headers = request.form.get("headers", "").strip()
    if headers:
        headers = str_to_header(headers)
    else:
        headers = dict()

    params = request.form.get("params", "").strip().strip("&").strip("?")
    if params:
        params = url_to_dic(params)
    else:
        params = dict()
    method = request.form.get("method")
    ua_type = request.form.get("ua_type")

    rules = request.form.get("rules", "").strip()
    rules = [rule for rule in rules.strip().split("#")]
    is_sch = request.form.get("is_sch")
    is_sch = True if is_sch == "1" else False

    author = request.form.get("author", "")
    biz = request.form.get("biz", "")
    certification = request.form.get("certification", "")

    if not cname:
        return jsonify(JSON_FIELD_LACK)

    update_cha_doc = {
        "category1": cate1,
        "category2": cate2,
        "name": cname,
        "form": media_type,
        "description": desc,
        "priority": priority,
        "schedule": is_sch,
        "meta": {
            "name": author,
            "_biz": biz,
            "certification": certification
        }
    }
    try:
        mongo_t.db.spider_channels.update({"_id": ObjectId(cid)},
                                          {"$set": update_cha_doc})
    except Exception as e:
        logging.warning(e)
        return jsonify(JSON_SERVER_ERROR)

    try:
        mongo_t.db.qidian_map.update({"channel": str(cid)}, {"$set": map_info})
    except Exception as e:
        logging.warning(e)
        return jsonify(JSON_SERVER_ERROR)

    icon_file = request.files.get("icon-file")
    if icon_file:
        file_mime = icon_file.mimetype
        icon_url = upload_file_to_oss(file_mime, icon_file.read())
    else:
        icon_url = request.form.get("icon-url", "")
        if icon_url == "None":
            icon_url = ""
    try:
        mongo_t.db.spider_channels.update({"_id": ObjectId(cid)},
                                          {"$set": {
                                              "icon": icon_url
                                          }})
    except Exception as e:
        logging.warning(e)
        return jsonify(JSON_SERVER_ERROR)

    update_conf_doc = {
        "rules": rules,
        "request": {
            "url": url,
            "method": method,
            "user_agent_type": ua_type,
            "params": params,
            "headers": headers
        },
        "crawler": crawler
    }

    try:
        mongo_t.db.spider_configs.update_one({"channel": str(cid)},
                                             {"$set": update_conf_doc})
    except Exception as e:
        logging.warning(e)
        return jsonify(JSON_SERVER_ERROR)
    return jsonify(JSON_SUCCESS)
Esempio n. 8
0
def add_channel(**kwargs):
    """
    添加二级数据源(网站频道、微信账号etc)
    :return:
    """
    domain = request.form.get("domain", "").strip()
    cname = request.form.get("cname", "").strip()
    media_type = request.form.get("media_type", "news").strip()
    priority = int(request.form.get("priority", 5))
    cate1 = request.form.get("cate1", 0)
    cate2 = request.form.get("cate2", 0)
    target_type = request.form.getlist("target_type")
    target_type = "#".join(target_type)
    if cate1 == "0":
        cate1 = ""
        cate2 = ""
    elif cate2 == "0":
        cate2 = ""
    desc = request.form.get("desc", "").strip()
    crawler = request.form.get("crawler", "").strip()
    url = request.form.get("url", "").strip()
    headers = request.form.get("headers", "").strip()
    if headers:
        headers = str_to_header(headers)
    else:
        headers = dict()

    params = request.form.get("params", "").strip().strip("?").strip("&")
    if params:
        params = url_to_dic(params)
    else:
        params = dict()
    method = request.form.get("method")
    ua_type = request.form.get("ua_type")

    rules = request.form.get("rules", "")
    rules = [rule for rule in rules.strip().split("#")]
    is_sch = request.form.get("is_sch")
    is_sch = True if is_sch == "1" else False

    author = request.form.get("author", "")
    biz = request.form.get("biz", "")
    certification = request.form.get("certification", "")

    check_site = list(mongo_t.db.spider_sites.find({"domain": domain}))
    if not check_site:
        return jsonify(JSON_NOT_EXIST)
    else:
        site = str(check_site[0]["_id"])
    if not cname:
        return jsonify(JSON_FIELD_LACK)

    insert_cha_doc = {
        "category1": cate1,
        "category2": cate2,
        "name": cname,
        "form": media_type,
        "description": desc,
        "priority": priority,
        "site": site,
        "schedule": is_sch,
        "config": [],
        "status": 0,
        "meta": {
            "name": author,
            "_biz": biz,
            "certification": certification
        }
    }
    try:
        cid = mongo_t.db.spider_channels.insert(insert_cha_doc)
    except DuplicateKeyError:
        return jsonify(JSON_DUP_ERROR)
    except Exception as e:
        logging.warning(e)
        return jsonify(JSON_SERVER_ERROR)
    icon_file = request.files.get("icon-file")
    if icon_file:
        file_mime = icon_file.mimetype
        icon_url = upload_file_to_oss(file_mime, icon_file.read())
    else:
        icon_url = request.form.get("icon-url", "")
    try:
        mongo_t.db.spider_channels.update({
            "site": site,
            "name": cname
        }, {"$set": {
            "icon": icon_url
        }})
    except Exception as e:
        logging.warning(e)
        return jsonify(JSON_SERVER_ERROR)

    insert_conf_doc = {
        "channel": str(cid),
        "rules": rules,
        "request": {
            "url": url,
            "method": method,
            "user_agent_type": ua_type,
            "params": params,
            "headers": headers
        },
        "crawler": crawler,
    }

    try:
        conf_id = mongo_t.db.spider_configs.insert(insert_conf_doc)
    except Exception as e:
        logging.warning(e)
        return jsonify(JSON_SERVER_ERROR)
    try:
        mongo_t.db.spider_channels.update({"_id": cid},
                                          {"$push": {
                                              "config": str(conf_id)
                                          }})
    except Exception as e:
        logging.warning(e)
        return jsonify(JSON_SERVER_ERROR)

    qd_cate1 = int(request.form.get("qd_cate1", 0))
    qd_cate2 = int(request.form.get("qd_cate2", 0))
    if qd_cate1 == 0:
        qd_cate1 = None
    if qd_cate2 == 0:
        qd_cate2 = None
    qidian_map = {
        "first_cid": qd_cate1,
        "second_cid": qd_cate2,
        "channel": str(cid),
        "online": True,
        "type": target_type
    }
    try:
        mongo_t.db.qidian_map.insert(qidian_map)
    except Exception as e:
        logging.warning(e)
        return jsonify(JSON_SERVER_ERROR)

    return jsonify(JSON_SUCCESS)