Esempio n. 1
0
class MvImg(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        self.handle_data()

    def handle_data(self):
        data = self.get_img_list()
        thread_pool = ThreadPoolExecutor(max_workers=15)
        task_list = list()
        for item in data:
            task = thread_pool.submit(self.__handle_data, item)
            task_list.append(task)
            # break
        for i in as_completed(task_list):
            result = i.result()

    def __handle_data(self, item):
        url = "https://www.pelisplay.tv" + item['img_src']
        header = {
            # "Referer": "https://www.pelisplay.tv/",
            "User-Agent": getUserAgent(),
            "Accept": "image/webp,image/apng,image/*,*/*;q=0.8"
        }
        data = curlData(url, header=header)
        with open("static/images/{id}.jpg".format(id=item['id']), "wb") as f:
            try:
                data = data.encode("utf-8")
            except Exception as e:
                debug(e)
            f.write(data)
            self.__update_data(item)
            f.close()
        return {"code": 0}

    def __update_data(self, item):
        update_arr = {
            "table": "list",
            "set": {
                "img_status": 1
            },
            "condition": ['id={id}'.format(id=item['id'])]
        }
        lock.acquire()
        result = self.db.update(update_arr, is_close_db=False)
        lock.release()
        return result

    def get_img_list(self):
        select_arr = {
            "table": "list",
            "columns": ["id", "img_src"],
            "condition": ['img_status=0']
        }
        data = self.db.select(select_arr, is_close_db=False)
        return data
Esempio n. 2
0
class GetImgUrlLarge(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        self.handle()

    def handle(self):
        data = self.get_data()
        for item in data:
            result = self.__handle(item)
            if result['result_1'] == 1 and result['result_2'] == 1 and result[
                    'result_3'] == 1:
                debug(item['img_url'])
            else:
                break

    def __handle(self, item):
        img_url = item['img_url']
        try:
            s = re.findall('squarethumbnails\/([\w\W]*.)', img_url)[0]
        except Exception as e:
            s = ''
            debug(e)
        if s == '':
            return
        s = 'http://www.laurainthekitchen.com/largethumbnails/' + s
        result = self.__update_data(s, item)
        return result

    def __update_data(self, s, item):
        update_arr_list = {
            "table": "list",
            "set": {
                "img_url_large": s,
                "status": 1
            },
            "condition": ["id={id}".format(id=item['id'])]
        }
        result_1 = self.db.update(update_arr_list, is_close_db=False)
        del update_arr_list['set']['status']
        update_arr_list['table'] = "tmp_list"
        result_2 = self.db.update(update_arr_list, is_close_db=False)
        update_arr_list['table'] = "content"
        update_arr_list['condition'] = ["list_id={id}".format(id=item['id'])]
        result_3 = self.db.update(update_arr_list, is_close_db=False)
        return {
            "result_1": result_1,
            "result_2": result_2,
            "result_3": result_3
        }

    def get_data(self):
        select_arr = {
            "table": "list",
            "columns": ["img_url", "id"],
            "condition": ["status=0"]
        }
        data = self.db.select(select_arr, is_close_db=False)
        return data
Esempio n. 3
0
class RecipeGetPage(object):
    def __init__(self):
        self.db = DBConfig()
        recipe_type = RecipeType()
        self.category = recipe_type.get()

    def __del__(self):
        self.db.closeDB()

    def get(self):
        """
        :return:
        """
        self.__get_recipe_page()

    def __get_recipe_page(self):
        """
        :return:
        """
        for item in self.category:
            url = CommonFunc().generate_url(category=item['keyword'])
            try:
                self.__get_recipe_page_data(url, item['id'])
            except Exception as e:
                debug("页面数量抓取出错,出错信息:{error}".format(error=e))

    def __get_recipe_page_data(self, url, recipe_category_id):
        """
        :param url:
        :param recipe_category_id:
        :return:
        """
        page_resource = curlData(url, open_virtual_ip=True)
        # with open("tmp/category_page_data.txt", "rb") as f:
        #     page_resource = f.read().decode("utf-8")
        #     f.close()
        bs = BeautifulSoup(page_resource, "html.parser")
        page_ul = bs.find_all("ul", attrs={"class": "page-numbers"})
        # remove prev page and next page
        for k, v in enumerate(page_ul[0]('a', attrs={"class": "next"})):
            v.extract()
        page_a = page_ul[0].find_all("a")
        page_span = page_ul[0].find("span")
        page_list = ""
        for k, v in enumerate(page_a):
            if k == 0:
                page_list = page_list + str(v.get_text()).strip()
            else:
                page_list = page_list + "," + str(v.get_text()).strip()
        page_list = page_list + "," + page_span.get_text().strip()
        page_list = {"page_list": page_list}
        # update to mysql
        update_arr = {
            "table": "type",
            "set": {
                "page_num": json.dumps(page_list)
            },
            "condition": ['id={id}'.format(id=recipe_category_id)]
        }
        result = self.db.update(update_arr, is_close_db=False)
        if result == 1:
            debug("id为{id}的菜谱类型页面数据抓取成功".format(id=recipe_category_id))
        else:
            debug("id为{id}的菜谱类型页面数据抓取失败".format(id=recipe_category_id))
Esempio n. 4
0
class GetImages(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        self.handle()

    def handle(self):
        data = self.get_data()
        self.get_images(data, "large", img_url="img_url_large")

    @classmethod
    def start_thread(cls, data, fun, path, img_url, prefix):
        thread_pool = ThreadPoolExecutor(max_workers=15)
        task_list = list()
        result = list()
        for item in data:
            task = thread_pool.submit(fun, item, path, img_url, prefix)
            task_list.append(task)
        for i in as_completed(task_list):
            result.append(i.result())
        return result

    def get_images(self, data, path, img_url, prefix=""):
        self.start_thread(data, self.__get_images, path, img_url, prefix)

    def __get_images(self, item, path, img_url, prefix):
        page_resource = self.get_page_resource(prefix + item[img_url])
        with open(
                "static/images/{path}/{id}.jpg".format(path=path,
                                                       id=item['id']),
                "wb") as f:
            try:
                page_resource = page_resource.encode("utf-8")
            except Exception as e:
                debug(e)
            f.write(page_resource)
            f.close()
            update_data = {"status": 1}
            condition = ["id={id}".format(id=item['id'])]
            self.__update_data(update_data, "list", condition)

    @classmethod
    def get_page_resource(cls, url):
        data = curlData(url, open_virtual_ip=True)
        return data

    def __update_data(self, update_data, table, condition):
        update_arr = {
            "table": table,
            "set": update_data,
            "condition": condition
        }
        lock.acquire()
        self.db.update(update_arr, is_close_db=False)
        lock.release()

    def get_data(self):
        data = self.db.select(
            {
                "table": "list",
                "columns": ["id", "img_url", "img_url_large"],
                "condition": ["status=0"]
            },
            is_close_db=False)
        return data
Esempio n. 5
0
class RecipeContentThread(object):
    def __init__(self):
        self.recipe_list = RecipeListSpider()
        self.table_columns = (("id", "int"), ("img_url", "varchar"),
                              ("video_id", "varchar"), ("preparation",
                                                        "longtext"),
                              ("ingredients", "text"), ("name", "varchar"),
                              ("list_id", "int"))
        self.handle_num = 0
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        """
        :return:
        """
        data = self.get_list()
        self.start(data)

    def start(self, data):
        """
        :param data:
        :return:
        """
        thread_pool = ThreadPoolExecutor(max_workers=15)
        task_list = list()
        for item in data:
            task = thread_pool.submit(self.handle_data, item)
            task_list.append(task)
        for i in as_completed(task_list):
            result = i.result()
        debug("本次处理成功 {num} 个线程".format(num=self.handle_num))

    def get_list(self):
        """
        :return:
        """
        data = self.recipe_list.get_list()
        return data

    def handle_data(self, item):
        """
        :param item:
        :return:
        """
        page_resource = self.get_data(item)
        result = self.__handle_data(page_resource, item)
        if result['code'] == 0:
            debug("菜谱存储出错 --> {name}".format(name=item['name']))
        else:
            debug("菜谱存储成功 --> {name}".format(name=item['name']))
            self.__update_status(result['code'])
            lock.acquire()
            self.handle_num = self.handle_num + 1
            lock.release()
        return {"code": 0}

    @classmethod
    def get_data(cls, item):
        """
        :param item:
        :return:
        """
        url = CommonFunc().generate_content_url(item['url'])
        data = curlData(url, open_virtual_ip=True)
        return data

    def __handle_data(self, page_resource, item):
        """
        :param page_resource:
        :param item:
        :return:
        """
        bs = BeautifulSoup(page_resource, "html.parser")
        insert_arr = dict()
        insert_arr['video_id'] = self.__get_video_id(bs)
        insert_arr['img_url'] = self.__get_img_url(page_resource)
        insert_arr['name'] = item['name']
        insert_arr['preparation'] = self.__get_preparation(bs)
        insert_arr['ingredients'] = self.__get_ingredients(bs)
        insert_arr['list_id'] = item['id']
        result = self.__save_data(insert_arr)
        return {'code': result}

    def __save_data(self, insert_arr):
        lock.acquire()
        sql = self.db.getInsertSql(insert_arr,
                                   table="content",
                                   table_columns=self.table_columns)
        result = self.db.insertLastId(sql, is_close_db=False)
        lock.release()
        return result

    def __update_status(self, recipe_list_id):
        update_arr = {
            "table":
            "list",
            "set": {
                "status": "1"
            },
            "condition":
            ["id={recipe_list_id}".format(recipe_list_id=recipe_list_id)]
        }
        lock.acquire()
        self.db.update(update_arr, is_close_db=False)
        lock.release()

    @classmethod
    def __get_video_id(cls, bs):
        """
        :param bs:
        :return:
        """
        video_id = bs.find("iframe")
        try:
            video_id = video_id.attrs['src']
            video_id = re.findall('https://www.youtube.com/embed/([\w\W]*?)\?',
                                  video_id)[0]
        except Exception as e:
            video_id = ""
            debug("视频播放id获取出错,错误信息:{error}".format(error=e))
        return video_id

    @classmethod
    def __get_img_url(cls, bs):
        # img_url = bs.find("div", attrs={"class": "ytp-cued-thumbnail-overlay-image"})
        try:
            # img_url = img_url.attrs['style']
            img_url = re.findall('"image": "([\w\W]*?)"', str(bs))[0]
        except Exception as e:
            img_url = ""
            debug("菜谱图片链接获取出错,错误信息:{error}".format(error=e))
        return img_url

    @classmethod
    def __get_preparation(cls, bs):
        """
        :param bs:
        :return:
        """
        preparation = bs.find("div",
                              attrs={"class": "cs-recipe-single-preparation"})
        try:
            preparation = preparation.find("ul")
            preparation = str(preparation)
            preparation = re.findall("<ul>([\w\W]*?)<\/ul>", preparation)[0]
        except Exception as e:
            preparation = ""
            debug("菜谱做法获取出错,错误信息:{error}".format(error=e))
        return preparation

    @classmethod
    def __get_ingredients(cls, bs):
        ingredients = bs.find("div",
                              attrs={"class": "cs-ingredients-check-list"})
        ingredients_str = ""
        try:
            ingredients = ingredients.find("ul")
            ingredients = ingredients.find_all("li")
            for k, v in enumerate(ingredients):
                if k == 0:
                    ingredients_str = ingredients_str + v.get_text().strip()
                else:
                    ingredients_str = ingredients_str + "," + v.get_text(
                    ).strip()
        except Exception as e:
            ingredients_str = ""
            debug("配料获取出错,错误信息:{error}".format(error=e))
        return ingredients_str
Esempio n. 6
0
class GetVideoSrc(object):
    def __init__(self):
        self.cookie = {}
        # self.get_cookie()
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        data = self.get_content_list()
        self.handle_data(data)

    def handle_data(self, data):
        thread_pool = ThreadPoolExecutor(max_workers=15)
        task_list = list()
        for item in data:
            if item['url'] == '':
                continue
            else:
                task = thread_pool.submit(self.__handle_data, item)
                task_list.append(task)
                # self.__handle_data(item)
        for i in as_completed(task_list):
            result = i.result()

    def __handle_data(self, item):
        update_data = dict()
        update_data['status'] = 1
        update_data['video_src'] = self.__get_video_src(item)
        debug(update_data['video_src'])
        self.__update_data(item['id'], update_data)
        return {"code": 0}

    def __get_video_src(self, item):
        header = {
            # "Referer": "http://www.wyysdsa.com/",
            "User-Agent": getUserAgent(),
            # "Cache-Control": "max-age=0",
            # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"
        }
        # url = "http://zeus.pelisplay.tv/embed/vip.php?u=Q1A5NUZJM1VDTWlUTk8wTEFmWGNQZDhnbWRIcmt6UVU0VGIxakpXOUF4Mi9yZW51Zi9yaXZlcXFoYnlwL3picC5hYm1uem4uampqLy86ZmNnZ3U&fondo_requerido="
        # url = "https://nl.tan90.club/test/testHeader.html"
        data = curlData(url=item['url'], header=header, cookie=self.cookie)
        # with open("tmp/content_detail.txt", "rb") as f:
        #     data = f.read().decode("utf-8")
        #     f.close()
        try:
            src = re.findall("JSON\.parse\('([\w\W]*?)'\)\);", data)[0]
            src = src.replace("\\", "")
            src = json.loads(src)
            src = src[0]['file']
        except Exception as e:
            src = ""
            debug(e)
        return src

    def __update_data(self, content_id, update_data):
        update_arr = {
            "table": "content",
            "set": update_data,
            "condition": ['id={content_id}'.format(content_id=content_id)]
        }
        lock.acquire()
        result = self.db.update(update_arr, is_close_db=False)
        lock.release()
        return result

    def get_content_list(self):
        data = self.db.select({
            "table": "content",
            "columns": ['id', 'url'],
            "condition": ['status=0']
        }, is_close_db=False)
        return data

    def get_cookie(self):
        header = {
            "User-Agent": getUserAgent(),
            # "Cache-Control": "max-age=0",
            # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"
        }
        url = "https://www.pelisplay.tv/"
        self.cookie = getCookie(url, header=header)
        debug(self.cookie)
Esempio n. 7
0
class GetRecipeVideo(object):
    def __init__(self):
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        self.download()

    def download(self):
        data = self.get_tmp_content()
        self.__download(data)

    def __download(self, data):
        for item in data:
            url = "https://www.youtube.com/watch?v=%s" % item['video_id']
            debug("开始抓取:--> {video_id}".format(video_id=item['video_id']))
            try:
                youtube = YouTube(url)
                youtube.streams.filter(subtype="mp4").first().download(
                    "/Users/cpx/code/py/recipe/data/recipe/",
                    filename=item['video_id'])
                self.__update_data(item['id'])
            except Exception as e:
                debug(e)

    def __update_data(self, list_id):
        """
        :param list_id:
        :return:
        """
        update_arr = {
            "table": "tmp_content",
            "set": {
                "status": 1
            },
            "condition": ['id={list_id}'.format(list_id=str(list_id))]
        }
        result = self.db.update(update_arr, is_close_db=False)
        return result

    def get_tmp_content(self):
        data = self.db.select(
            {
                "table": "tmp_content",
                "columns": ['id', 'video_id'],
                "condition": ['status=0']
            },
            is_close_db=False)
        return data

    def handle_data(self):
        self.move_data()
        # data = self.get_data()

    def get_data(self):
        select_arr = {"table": "recipe_content"}
        data = self.db.select(select_arr, is_close_db=False)
        return data

    def move_data(self):
        category = self.get_category()
        for item in category:
            data = self.get_list_by_type_id(item['id'])
            self.__move_data(data)

    def __move_data(self, data):
        for item in data:
            content = self.get_content_by_list_id(item['id'])
            try:
                content = content[0]
                content['status'] = 0
                self.__insert_data(content)
            except Exception as e:
                debug(e)

    def __insert_data(self, insert_arr):
        sql = self.db.getInsertSql(insert_arr, "tmp_content")
        result = self.db.insert(sql, is_close_db=False)
        return result

    def get_list_by_type_id(self, type_id):
        data = self.db.select(
            {
                "table": "list",
                "condition":
                ['recipe_type_id={type_id}'.format(type_id=type_id)],
                "limit": [0, 20]
            },
            is_close_db=False)
        return data

    def get_content_by_list_id(self, list_id):
        data = self.db.select(
            {
                "table": "content",
                "columns": ['video_id', 'list_id'],
                "condition":
                ["list_id={list_id}".format(list_id=str(list_id))]
            },
            is_close_db=False)
        return data

    def get_category(self):
        data = self.db.select({
            "table": "type",
            "condition": ['keyword<>""']
        },
                              is_close_db=False)
        return data
Esempio n. 8
0
class RecipeListSpider(object):
    def __init__(self):
        self.db = DBConfig()
        self.recipe_type = RecipeType()

    def __del__(self):
        self.db.closeDB()

    def run(self):
        """
        start get recipe list
        :return:
        """
        self.get_recipe_list()

    def get_list(self, condition=[], limit=[]):
        """
        :return:
        """
        select_arr = {"table": "list", "condition": ['status=0']}
        data = self.db.select(select_arr, is_close_db=False)
        return data

    def get_category(self):
        """
        get a category's all page num
        :return:
        """
        return self.recipe_type.get_category()

    def get_recipe_list(self):
        """
        :return:
        """
        self.__get_recipe_list()

    def __get_recipe_list(self):
        """
        :return:
        """
        info = self.get_category()
        for item in info:
            self.__get_recipe_list_child(item)

    def __set_status(self, category_id):
        """
        :param category_id:
        :return:
        """
        update_arr = {
            "table": "type",
            "set": {
                "status": 1
            },
            "condition": ['id={category_id}'.format(category_id=category_id)]
        }
        result = self.db.update(update_arr, is_close_db=False)
        if result == 0:
            debug("更新状态出错, 出错原因:unknown")
            return

    def __get_recipe_list_child(self, info):
        """
        :param info:
        :return:
        """
        try:
            page_list = json.loads(info['page_num'])['page_list']
        except Exception as e:
            debug(e)
            self.__set_status(info['id'])
            return
        category = info['keyword']
        if category == "":
            self.__set_status(info['id'])
            return
        page_list = page_list.split(",")
        recipe_list_thread = RecipeListThread(page_list, info)
        recipe_list_thread.run()
        self.__set_status(info['id'])
Esempio n. 9
0
class GetConstitutionList(object):
    def __init__(self):
        # 数据库连接全局变量
        # self.ws_db = phoenix_db.DBConfig()
        self.count = 0
        self.db = DBConfig()

    def __del__(self):
        self.db.closeDB()
        debug("本次一共获取到了%s条数据" % str(self.count))
        # self.ws_db.closeDB()

    def getAllConstitutionStart(self):
        try:
            record = self.db.select({"table": "constitutions_record", "condition": ['is_over=0']}, is_close_db=False)
            next_page = record[0]['page']
        except:
            next_page = 1
        while True:
            try:
                data = self.getConstitutionList(next_page)
            except:
                debug("内容获取出错,重新获取")
                continue
            # 获取下一页的页码
            try:
                tmpNextPage = re.findall('href="javascript:toUpDownPage\(\'(\d+)\'\);">下一页<\/a>', data)[0]
                debug("当前的页码是:%s" % str(next_page))
                debug("获取到的下一页页码是:%s" % str(tmpNextPage))
            except:
                debug("下一页的页码获取出错")
                break
            self.getAllConstitutionHandle(data, "北京")
            updatetArr = {
                "table": "constitutions_record",
                "condition": ['id=2'],
                "set": {
                    "page": tmpNextPage,
                    "is_over": 0
                }
            }
            self.db.update(updatetArr, is_close_db=False)
            if int(next_page) >= int(tmpNextPage):
                break
            else:
                next_page = tmpNextPage
        debug("本次抓取完毕")
        updatetArr = {
            "table": "constitutions_record",
            "condition": ['id=2'],
            "set": {
                "is_over": 1
            }
        }
        self.db.update(updatetArr, is_close_db=False)

    def getAllConstitution(self, fun):
        """
        获取所有法律法规
        :param fun:
        :return:
        """
        url = "http://210.82.32.100:8081/FLFG/"
        dcap = dict(DesiredCapabilities.FIREFOX)
        ip = virtualIp()
        dcap['phantomjs.page.customHeaders.X-FORWARDED-FOR'] = ip
        dcap['phantomjs.page.customHeaders.CLIENT-IP'] = ip
        firefox_options = Options()
        firefox_options.add_argument("--headless")
        firefox_options.add_argument('--disable-gpu')
        driver = webdriver.Firefox(firefox_options=firefox_options, desired_capabilities=dcap)
        driver.get(url)
        sleep(3)
        cloumn = driver.find_elements_by_class_name("cloumn")
        try:
            cloumn = cloumn[3]
        except:
            while True:
                try:
                    cloumn = cloumn[3]
                    break
                except:
                    sleep(1)
        cloumntitle = cloumn.find_elements_by_class_name("threecloumntitle")
        cloumntitleLength = len(cloumntitle)
        current_handle = driver.current_window_handle
        for i in range(cloumntitleLength):
            try:
                list_a = cloumntitle[i].find_elements_by_tag_name("a")
            except:
                list_a = list()
                debug("省份列表获取出错")
            list_a_len = len(list_a)
            for k in range(list_a_len):
                # 获取省份名
                try:
                    province = list_a[k].text
                except:
                    debug("省份获取出错,继续执行,省份标记锚点为" + str(k))
                    province = str(k)
                debug(province + ":")
                try:
                    list_a[k].click()
                except:
                    debug("点击失败")
                sleep(3)
                all_handles = driver.window_handles
                sleep(3)
                for handle in all_handles:
                    if handle != current_handle:
                        driver.switch_to_window(handle)
                        sleep(1)
                        data = driver.page_source
                        htmlData = BeautifulSoup(data, "html.parser")
                        try:
                            url = htmlData.find_all("iframe", attrs={"id": "rightpage"})[0].attrs['src']
                            url = re.sub("(有效)", "有效,已被修正,失效", url)
                            driver.execute_script("location.href='" + url + "'")
                            sleep(3)
                            # 进行点击50篇每页
                            try:
                                driver.find_element_by_id("span_pagesize_50").click()
                                sleep(3)
                            except:
                                pass
                            data = driver.page_source
                            # 获取每一页的text以便稍后判断
                            try:
                                nextPage = re.findall(r'<a[\w\W]*?href="([\w\W]*?)">下一页', data)
                                nextPage = re.findall(r'<a[\w\W]*?href="([\w\W]*?)\)', nextPage[0])
                                nextPage = re.findall("(\d+)", nextPage[1])
                                nextPage = nextPage[0]
                            except:
                                nextPage = 0
                            # 处理数据
                            while True:
                                tmpPage = int(nextPage) - 1
                                debug("第" + str(tmpPage) + "页:")
                                fun(data, province)
                                nextPageElement = driver.find_element_by_class_name("td")
                                try:
                                    nextPageElement = nextPageElement.find_elements_by_tag_name("a")[1]
                                    nextPageElement.click()
                                    sleep(3)
                                    data = driver.page_source
                                    try:
                                        tmpNextPage = re.findall(r'<a[\w\W]*?href="([\w\W]*?)">下一页', data)
                                        tmpNextPage = re.findall(r'<a[\w\W]*?href="([\w\W]*?)\)', tmpNextPage[0])
                                        tmpNextPage = re.findall("(\d+)", tmpNextPage[1])
                                        tmpNextPage = tmpNextPage[0]
                                        if nextPage == tmpNextPage:
                                            break
                                        else:
                                            nextPage = tmpNextPage
                                    except:
                                        break
                                except:
                                    break
                            # 点击下一页
                        except Exception as e:
                            debug(e)
                        debug("")
                        driver.close()
                        sleep(1)
                        driver.switch_to_window(all_handles[0])
                        sleep(2)
        driver.quit()

    def getConstitutionList(self, cur_page):
        url = "http://210.82.32.100:8081/FLFG/flfgGjjsAction.action"
        referer = "http://210.82.32.100:8081/FLFG/flfgGjjsAction.action"
        post = {
            "pagesize": "20",
            "pageCount": "500",
            "curPage": cur_page,
            "resultSearch": "false",
            # "lastStrWhere": "+SFYX:(有效)++^+ZLSX:(01~02~03~04~05~06~08~09~10~11~12~23)+NOT+TXTID=bj+^+SFFB=Y+",
            "lastStrWhere": "  SFYX:(有效~已被修正~失效) ^(ZLSX:1111 ~ZLSX=01)  ^ BMFL:(03)  ^ SFFB=Y ",
            "bt": "",
            "flfgnr": "",
            "sxx": "有效,已被修正,失效",
            # "sxx": "有效",
            "zlsxid": "12",
            "bmflid": "",
            "xldj": "",
            "bbrqbegin": "2018-09-01",
            "bbrqend": "2018-12-17",
            "sxrqbegin": "",
            "sxrqend": "",
            "zdjg": "",
            "bbwh": ""
        }
        header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
        }
        data = curlData(url=url, value=post, referer=referer, header=header)
        return data

    def getAllConstitutionHandle(self, data, province):
        data = re.findall(r'<a[\w\W]*?href="javascript:showLocation([\w\W]*?);"', data)
        old1 = ""
        old5 = ""
        i = 0
        thread_list = list()
        for k, v in enumerate(data):
            data[k] = tuple(v.split("'"))
            try:
                if data[k][1] == old1 and data[k][7] == old5:
                    continue
                i = i + 1
                thread_list.append(ConstitutionThread(data[k][1], data[k][7], data[k][3], province, i))
                old1 = data[k][1]
                old5 = data[k][7]
            except:
                pass
        i = len(thread_list)
        for m in range(i):
            thread_list[m].start()
        for m in range(i):
            thread_list[m].join()
        i = thread_list[0].getRv()
        # 重置计数器
        thread_list[0].reset()
        self.count = self.count + i
        return 1

    def getConstitutionData(self, flfgID, zlsxid, showDetailType, province):
        # 经过浏览。很明显,具体的宪法数据源url为如下的url,包含两个get类型参数  flfgID zlsxid keyword 前两个是必须的,通过列表传递的js数据拿到
        flag = False
        url = "http://210.82.32.100:8081/FLFG/flfgByID.action"
        get = dict()
        get['flfgID'] = flfgID
        get['showDetailType'] = showDetailType
        get['zlsxid'] = zlsxid
        get['keyword'] = ""
        get = urlencode(get)
        url = url + "?" + get
        while True:
            try:
                data = curlData(url, get, url)
                break
            except:
                pass
        try:
            data = data.decode("utf-8")
        except:
            pass
        # with open("constitution.txt", "wb") as f:
        #     f.write(data.encode("utf-8"))
        #     f.close()
        # with open("constitution.txt", "rb") as f:
        #     data = f.read().decode("utf-8")
        #     f.close()
        handleDataAll = BeautifulSoup(data, "html.parser")
        handleData = handleDataAll.find_all("table")
        columns_list = ['type', "department_type", 'office', 'reference_num', 'issue_date', 'execute_date',
                        'timeliness']
        columns_name_list = ['资料属性:', '部门分类:', '制定机关:', '颁布文号:', '颁布日期:', '施行日期:', '时 效 性:']
        # 获取头部基本信息
        try:
            table_data = handleData[0].find_all("td")
        except:
            table_data = "数据获取出错"
            flag = True
        type_data = dict()
        type_data['url'] = url
        for k, v in enumerate(table_data):
            try:
                if (k + 1) % 2 == 1:
                    type_data[columns_list[columns_name_list.index(table_data[k].getText().strip())]] = table_data[
                        k + 1].getText().strip()
            except:
                type_data[columns_list[columns_name_list.index(table_data[k].getText().strip())]] = "数据获取出错"
        # 接下来获取标题和内容
        try:
            type_data['title'] = handleDataAll.find_all("div", attrs={"class": "bt"})[0].getText().strip()
        except:
            type_data['title'] = "标题获取出错"
            flag = True
        # 进行内容获取
        try:
            type_data['content'] = str(handleDataAll.find_all("div", attrs={"id": "content"})[0])
        except:
            flag = True
        type_data['province'] = province
        if flag:
            type_data['is_get_error'] = 1
        else:
            type_data['is_get_error'] = 0
        while True:
            try:
                sql = self.db.getInsertSql(type_data, "constitutions")
                result = self.db.insert(sql, is_close_db=False)
                break
            except Exception as e:
                debug(e)
        return result