Exemple #1
0
 def __init__(self):
     self.recipe_list = RecipeListSpider()
     self.table_columns = (("id", "int"), ("img_url", "varchar"),
                           ("video_id", "varchar"), ("preparation",
                                                     "longtext"),
                           ("ingredients", "text"), ("name", "varchar"),
                           ("list_id", "int"))
     self.handle_num = 0
     self.db = DBConfig()
 def __init__(self):
     self.db = DBConfig()
     self.table_columns = (("id", "int"), ("img_src", "varchar"),
                           ("origin_src", "varchar"), ("url", "varchar"),
                           ("description", "text"), ("description_poster",
                                                     "text"),
                           ("star", "varchar"), ("title", "varchar"),
                           ("page_views", "int"), ("label", "text"),
                           ("category_id", "int"), ("director", "varchar"))
     self.handle_num = 0
 def __init__(self, flfgID, zlsxid, showDetailType, province, index):
     threading.Thread.__init__(self)
     self.count = 0
     self.flfgID = flfgID
     self.zlsxid = zlsxid
     self.showDetailType = showDetailType
     self.province = province
     self.index = index
     self.rv = 0
     self.db = DBConfig()
 def __init__(self, page_list, category):
     """
     :param page_list:
     :param category:
     """
     self.page_list = page_list
     self.category = category
     self.handle_num = 0
     # 自己定义字段可以避免重复查询字段的类型自动拼接
     self.table_columns = (("id", "int"), ("name", "varchar"),
                           ("url", "varchar"), ("img_url", "varchar"),
                           ("introduce", "text"), ("recipe_type_id", "int"),
                           ("status", "int"), ("page_views", "int"))
     self.db = DBConfig()
 def __init__(self):
     self.db = DBConfig()
Exemple #6
0
 def __init__(self):
     self.db = DBConfig()
     recipe_type = RecipeType()
     self.category = recipe_type.get()
Exemple #7
0
 def __init__(self):
     self.cookie = {}
     # self.get_cookie()
     self.db = DBConfig()
Exemple #8
0
 def __init__(self):
     self.db = DBConfig()
     self.recipe_type = RecipeType()
Exemple #9
0
def getConstitutionData(flfgID, zlsxid, province):
    # 经过浏览。很明显,具体的宪法数据源url为如下的url,包含两个get类型参数  flfgID zlsxid keyword 前两个是必须的,通过列表传递的js数据拿到
    flag = False
    url = "http://law.npc.gov.cn:8081/FLFG/flfgByID.action"
    get = dict()
    get['flfgID'] = flfgID
    get['zlsxid'] = zlsxid
    get['keyword'] = ""
    get = urlencode(get)
    url = url + "?" + get
    data = curlData(url, get, url)
    try:
        data = data.decode("utf-8")
    except:
        pass
    # with open("constitution.txt", "wb") as f:
    #     f.write(data.encode("utf-8"))
    #     f.close()
    # with open("constitution.txt", "rb") as f:
    #     data = f.read().decode("utf-8")
    #     f.close()
    handleDataAll = BeautifulSoup(data, "html.parser")
    handleData = handleDataAll.find_all("table")
    columns_list = [
        'type', "department_type", 'office', 'reference_num', 'issue_date',
        'execute_date', 'timeliness'
    ]
    columns_name_list = [
        '资料属性:', '部门分类:', '制定机关:', '颁布文号:', '颁布日期:', '施行日期:', '时 效 性:'
    ]
    # 获取头部基本信息
    try:
        table_data = handleData[0].find_all("td")
    except:
        table_data = "数据获取出错"
        flag = True
    type_data = dict()
    type_data['url'] = url
    for k, v in enumerate(table_data):
        try:
            if (k + 1) % 2 == 1:
                type_data[columns_list[columns_name_list.index(
                    table_data[k].getText().strip())]] = table_data[
                        k + 1].getText().strip()
        except:
            type_data[columns_list[columns_name_list.index(
                table_data[k].getText().strip())]] = "数据获取出错"
    # 接下来获取标题和内容
    try:
        type_data['title'] = handleDataAll.find_all(
            "div", attrs={"class": "bt"})[0].getText().strip()
    except:
        type_data['title'] = "标题获取出错"
        flag = True
    # 进行内容获取
    try:
        type_data['content'] = str(
            handleDataAll.find_all("div", attrs={"id": "content"})[0])
    except:
        flag = True
    type_data['province'] = province
    if flag:
        type_data['is_get_error'] = 1
    else:
        type_data['is_get_error'] = 0
    DB = DBConfig()
    sql = DB.getInsertSql(type_data, "constitutions")
    result = DB.insert(sql)
    return result
 def __init__(self):
     self.table_columns = (("id", "int"), ("parent_id", "int"), ("url",
                                                                 "text"))
     self.cookie = dict()
     self.cookie_get_num = 0
     self.db = DBConfig()
 def __init__(self):
     # 数据库连接全局变量
     # self.ws_db = phoenix_db.DBConfig()
     self.count = 0
     self.db = DBConfig()