def __init__(self): self.recipe_list = RecipeListSpider() self.table_columns = (("id", "int"), ("img_url", "varchar"), ("video_id", "varchar"), ("preparation", "longtext"), ("ingredients", "text"), ("name", "varchar"), ("list_id", "int")) self.handle_num = 0 self.db = DBConfig()
def __init__(self): self.db = DBConfig() self.table_columns = (("id", "int"), ("img_src", "varchar"), ("origin_src", "varchar"), ("url", "varchar"), ("description", "text"), ("description_poster", "text"), ("star", "varchar"), ("title", "varchar"), ("page_views", "int"), ("label", "text"), ("category_id", "int"), ("director", "varchar")) self.handle_num = 0
def __init__(self, flfgID, zlsxid, showDetailType, province, index): threading.Thread.__init__(self) self.count = 0 self.flfgID = flfgID self.zlsxid = zlsxid self.showDetailType = showDetailType self.province = province self.index = index self.rv = 0 self.db = DBConfig()
def __init__(self, page_list, category): """ :param page_list: :param category: """ self.page_list = page_list self.category = category self.handle_num = 0 # 自己定义字段可以避免重复查询字段的类型自动拼接 self.table_columns = (("id", "int"), ("name", "varchar"), ("url", "varchar"), ("img_url", "varchar"), ("introduce", "text"), ("recipe_type_id", "int"), ("status", "int"), ("page_views", "int")) self.db = DBConfig()
def __init__(self): self.db = DBConfig()
def __init__(self): self.db = DBConfig() recipe_type = RecipeType() self.category = recipe_type.get()
def __init__(self): self.cookie = {} # self.get_cookie() self.db = DBConfig()
def __init__(self): self.db = DBConfig() self.recipe_type = RecipeType()
def getConstitutionData(flfgID, zlsxid, province): # 经过浏览。很明显,具体的宪法数据源url为如下的url,包含两个get类型参数 flfgID zlsxid keyword 前两个是必须的,通过列表传递的js数据拿到 flag = False url = "http://law.npc.gov.cn:8081/FLFG/flfgByID.action" get = dict() get['flfgID'] = flfgID get['zlsxid'] = zlsxid get['keyword'] = "" get = urlencode(get) url = url + "?" + get data = curlData(url, get, url) try: data = data.decode("utf-8") except: pass # with open("constitution.txt", "wb") as f: # f.write(data.encode("utf-8")) # f.close() # with open("constitution.txt", "rb") as f: # data = f.read().decode("utf-8") # f.close() handleDataAll = BeautifulSoup(data, "html.parser") handleData = handleDataAll.find_all("table") columns_list = [ 'type', "department_type", 'office', 'reference_num', 'issue_date', 'execute_date', 'timeliness' ] columns_name_list = [ '资料属性:', '部门分类:', '制定机关:', '颁布文号:', '颁布日期:', '施行日期:', '时 效 性:' ] # 获取头部基本信息 try: table_data = handleData[0].find_all("td") except: table_data = "数据获取出错" flag = True type_data = dict() type_data['url'] = url for k, v in enumerate(table_data): try: if (k + 1) % 2 == 1: type_data[columns_list[columns_name_list.index( table_data[k].getText().strip())]] = table_data[ k + 1].getText().strip() except: type_data[columns_list[columns_name_list.index( table_data[k].getText().strip())]] = "数据获取出错" # 接下来获取标题和内容 try: type_data['title'] = handleDataAll.find_all( "div", attrs={"class": "bt"})[0].getText().strip() except: type_data['title'] = "标题获取出错" flag = True # 进行内容获取 try: type_data['content'] = str( handleDataAll.find_all("div", attrs={"id": "content"})[0]) except: flag = True type_data['province'] = province if flag: type_data['is_get_error'] = 1 else: type_data['is_get_error'] = 0 DB = DBConfig() sql = DB.getInsertSql(type_data, "constitutions") result = DB.insert(sql) return result
def __init__(self): self.table_columns = (("id", "int"), ("parent_id", "int"), ("url", "text")) self.cookie = dict() self.cookie_get_num = 0 self.db = DBConfig()
def __init__(self): # 数据库连接全局变量 # self.ws_db = phoenix_db.DBConfig() self.count = 0 self.db = DBConfig()