Example #1
0
 def handle_comic_data(self, data):
     try:
         data = json.loads(data)
     except Exception as e:
         debug(e)
     if not data["succ"]:
         debug("not succ")
         return
     debug("succ 获取成功 ====> ", data["succ"])
     try:
         chapter_list = data["result"]["list"]
     except Exception as e:
         debug(e)
         return
     for item in chapter_list:
         item["comic_id"] = item["id"]
         del item["cjid"]
         del item["id"]
         item["view"] = 0
         lock.acquire()
         sql = self.db.getInsertSql(item, self.comic_table)
         result = self.db.insert(sql, is_close_db=False)
         lock.release()
         if result == 0:
             debug("数据插入失败了")
Example #2
0
 def __handle(self, item):
     try:
         game_page_url = item.find(name="a").attrs['href']
         game_page = GamePage(game_page_url, self)
         game_page.run()
     except Exception as e:
         debug(e)
Example #3
0
 def __handle(self, game_title):
     """
     具体逻辑处
     :param game_title:
     :return:
     """
     data = self.game_spider.db.select(
         {
             "table":
             "game",
             "condition": [
                 "title like '%{title}%' and game_url not like '%youtube%' and game_url not like '%google%' and game_url != ''"
                 .format(title=game_title)
             ],
             # "columns": ['title', 'id']
         },
         get_all=False,
         is_close_db=False)
     try:
         del data['id']
     except:
         return
     data['category'] = 9
     data['title'] = game_title
     # debug(data)
     result = self.__insert(data)
     if result != 0:
         debug("数据存储 =====> 成功")
     else:
         debug("数据存储 =====> 失败")
Example #4
0
def get_job():
    """
    获取配置文件
    :return: json
    """
    err_data = {"data": {}, "error_code": 1}
    operator_code = request.values.get("operator_code", 0)
    if operator_code == 0:
        try:
            post = request.get_data().decode("utf-8")
            post = json.loads(post)
            operator_code = post['operator_code']
        except Exception as e:
            debug(e)
            return Reply.json(err_data)
    operator_code = "op_{operator_code}".format(operator_code=operator_code)
    data = redis.get(operator_code)
    if data is None:
        return Reply.json(err_data)
    try:
        data = json.loads(data)
    except Exception as e:
        data = err_data
        debug(e)
    return Reply.json(data)
 def __change_top_ncx(self, tmp_dir, item):
     """
     改变目录页的内容
     :param tmp_dir:
     :return:
     """
     filename = ""
     for name in os.listdir(tmp_dir):
         if name.endswith("ncx"):
             filename = tmp_dir + name
             break
     if filename == "":
         return False
     with open(filename, "rb") as f:
         data = f.read().decode("utf-8")
     bs4 = BeautifulSoup(data, "xml")
     result = bs4.find_all("navPoint", attrs={"playOrder": "1"})
     try:
         result = result[0].find("text")
         result = str(result)
         debug(result + " ============>    " +
               "<text>{title}</text>".format(title=item['title']))
         data = re.sub(result,
                       "<text>{title}</text>".format(title=item['title']),
                       data)
     except Exception as e:
         debug(e)
     with open(filename, "wb") as f:
         f.write(data.encode("utf-8"))
     return True
Example #6
0
 def run(self):
     self.db.insert({
         "table": "p_user",
         "username": "******"
     },
                    is_close_db=False)
     debug("ok")
Example #7
0
 def __get_game_li_list(cls, bs_html):
     game_ul = bs_html.find_all(name="ul", attrs={"class": "_2tY3C"})
     try:
         game_li = game_ul[0].find_all(name="li", attrs={"class": "_1cn3x"})
     except Exception as e:
         game_li = list()
         debug(e)
     return game_li
 def test(self):
     url = "https://www.crunchyroll.com/videos/anime/popular/ajax_page?pg=3"
     data = curl_data(
         url,
         referer=
         "https://www.crunchyroll.com/videos/anime/popular/ajax_page?pg=3",
         open_virtual_ip=True)
     debug(data)
Example #9
0
 def __get_title(cls, data):
     try:
         title_bs4 = data.find(name="div",
                               attrs={"class": "item-header__title"})
         title_bs4 = title_bs4.find(name="h1").get_text().strip()
     except Exception as e:
         debug("get title error: {error}".format(error=e.__str__()))
         title_bs4 = ""
     return title_bs4
Example #10
0
 def __get_describe(cls, data):
     try:
         describe = data.find(name="div",
                              attrs={"class": "user-html__with-lazy-load"})
         describe = str(describe)
     except Exception as e:
         debug("get describe error: {error}".format(error=e.__str__()))
         describe = ""
     return describe
Example #11
0
 def __get_img(cls, data):
     img_bs4 = data.find(name="div", attrs={"class": "-preview-live"})
     try:
         img_bs4 = img_bs4.find(name="img")
         img_bs4 = img_bs4.attrs['src']
     except Exception as e:
         debug("get img error: {error}".format(error=e.__str__()))
         img_bs4 = ""
     return img_bs4
 def __handle(self, item):
     with self.auto_handle_exception():
         debug("开始下载 ==========> {name}".format(name=item["download_url"]))
         data = curl_data(item["download_url"])
         debug(data)
         with open(
                 "static/spider/game_download/{name}.apk".format(
                     name=item["id"]), "wb") as f:
             f.write(data)
             f.close()
Example #13
0
 def __get_next_page(cls, data):
     next_page = data.find(name="a", attrs={"class", "k89zG"})
     is_continue = True
     try:
         next_page = "https://codecanyon.net" + next_page.attrs['href']
     except Exception as e:
         debug("get next page error: {error}".format(error=e.__str__()))
         next_page = ""
         is_continue = False
     return next_page, is_continue
Example #14
0
 def __insert(self, insert_arr):
     lock.acquire()
     sql = self.game_spider.db.getInsertSql(insert_arr, "game")
     result = self.game_spider.db.insert(sql, is_close_db=False)
     lock.release()
     if result == 0:
         debug("游戏:{name} ============> 插入成功".format(
             name=insert_arr['title']))
     else:
         debug("游戏:{name} ============> 插入成功".format(
             name=insert_arr['title']))
Example #15
0
 def __get_game_url(self, game_url):
     data = self.__get_frame_page(game_url)
     data = BeautifulSoup(data, "html.parser")
     game_url = data.find(name="iframe",
                          attrs={"class": "full-screen-preview__frame"})
     try:
         game_url = game_url.attrs['src']
     except Exception as e:
         debug("get game_url error: {error}".format(error=e.__str__()))
         game_url = ""
     return game_url
 def vote(self):
     url = "http://fyxqt.fuyuxiangqi.cn/wxtp/web/aipainew/aipainewAction!dianji.action?t={time_stamp}".format(
         time_stamp=int(time.time() * 1000))
     params = {
         "id": self.id,
         "hdid": self.wx_id,
         "yz": ""
     }
     header = self.__get_header()
     data = curl_data(url, value=params, cookie=self.cookie, header=header)
     debug(data)
Example #17
0
 def test_ip(self):
     url = "https://a-vrv.akamaized.net/evs/1631771ddd0df6e6f7c60770955fe64f/assets/p/6bbmnx58kgajfsd_,1278465.mp4,1278467.mp4,1278463.mp4,1278461.mp4,1278451.mp4,.urlset/fragment-21-f1-a1-x3.m4s?t=exp=1565753706~acl=/evs/1631771ddd0df6e6f7c60770955fe64f/assets/*~hmac=be0ef2b7b8215367e2069db78781d28627a051399f80b10240d73da945ffc162"
     # url = "https://nl.tan90.club/"
     data = curl_data(
         url=url,
         referer="https://static.crunchyroll.com/vilos/player.html",
         open_virtual_ip=True)
     with open("test.mp4", "wb") as f:
         f.write(data)
         f.close()
     debug(data)
 def __get_tr_data(cls, bs_4, info):
     """
     获取具体数据
     :param bs_4:
     :return:
     """
     try:
         data = bs_4.get_text().strip()
     except Exception as e:
         data = ""
         debug("{info}, error: {error}".format(info=info, error=e.__str__()))
     return data
Example #19
0
 def test(self):
     # url = "https://www.crunchyroll.com/videos/anime"
     url = "https://www.crunchyroll.com/videos/anime/popular/ajax_page?pg=1"
     res = requests.get(url)
     data = res.text
     debug(data)
     with open("./test.html", "wb") as f:
         f.write(data.encode("utf-8"))
         f.close()
     html = etree.parse("./test.html", etree.HTMLParser())
     result = html.xpath("//*[@id='main_content']//li/@id")
     debug(result)
Example #20
0
 def handle(self):
     """
     :return:
     """
     excel = xlrd.open_workbook(self.origin_path)
     self.new_excel = copy(excel)
     # self.aim_excel_data = self.get_origin_name()
     sheet_list = excel.sheets()
     for k, sheet in enumerate(sheet_list):
         debug(sheet.name)
         self.__handle(sheet, self.new_excel.get_sheet(k))
     self.new_excel.save("static/excel/new_excel.xls")
Example #21
0
 def get_comic_detail(self):
     flag = True
     comic_list = self.get_comic_list_from_db()
     for item in comic_list:
         # if flag:
         #     if item["comic_id"] != 349:
         #         continue
         #     else:
         #         flag = False
         self.get_comic_data(item)
         debug("等待3秒再继续")
         sleep(2)
 def __handle(self, item, path, name, category_id):
     insert_arr = dict()
     seconds, minutes = self.__get_music_time(path + name + "/" + item)
     singer, song = self.__get_singer_and_name(item)
     insert_arr["singer"] = singer
     insert_arr["minutes"] = minutes
     insert_arr["second"] = seconds
     insert_arr["name"] = song
     insert_arr["category"] = category_id
     result = self.__insert(insert_arr, "music_list")
     if result:
         shutil.copy(path + name + "/" + item, self.aim_pos + str(result) + ".mp3")
     debug(name + " => " + item)
Example #23
0
 def auto_handle_exception(self,
                           before_callback=default_callback,
                           error_callback=default_callback,
                           after_callback=default_callback,
                           throw_exception_flag=False,
                           **kwargs):
     try:
         before_callback(**kwargs)
         yield
         after_callback(**kwargs)
     except Exception as e:
         error_callback(**kwargs)
         if throw_exception_flag:
             debug(e)
 def __get_source_url(cls, bs_4):
     """
     获取资源链接
     :param bs_4:
     :return:
     """
     table = bs_4.find(name="table", attrs={"class": "files"})
     try:
         td = table.find(name="td", attrs={"content": "application/epub+zip"})
         source_url = td.find(name="a").attrs['href']
     except Exception as e:
         source_url = False
         debug("资源链接获取出错,线程终止, error: {error}".format(error=e.__str__()))
     return source_url
 def __get_url(cls, item):
     """
     获取书籍详情页的 url
     :param item:
     :return:
     """
     try:
         url = item.find(name="a").attrs['href']
         li_content = item.get_text()
         if "Dutch" not in li_content:
             return False
     except Exception as e:
         url = False
         debug("书籍url获取出错, 线程停止,error: {error}".format(error=e.__str__()))
     return url
Example #26
0
 def __get_cover_img(cls, data):
     cover_img = data.find(name="div",
                           attrs={"class": "item-preview-image__gallery"})
     cover_str = ""
     try:
         cover_img = cover_img.find_all(name="a")
         for k, item in enumerate(cover_img):
             if k == 0:
                 cover_str = item.attrs['href']
             else:
                 cover_str = cover_str + "," + item.attrs['href']
     except Exception as e:
         debug("get cover img error: {error}".format(error=e.__str__()))
         cover_str = ""
     return cover_str
 def handle(self):
     # url = "https://i.ytimg.com/vi/9OHkwJpS6u4/hqdefault.jpg?sqp=-oaymwEZCPYBEIoBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLDEO8flAyYWStTIWI3aLoirwz73yg"
     # url = "https://i.ytimg.com/vi/9OHkwJpS6u4/hqdefault.jpg"
     # url = "https://www.google.com/"
     url = "http://192.168.50.177:8083/download"
     # url = "https://www386.hlsmp4.com/token=b2LDM4PEjOWh5XvREsjfdw/1567685699/0.0.0.0/67/f/9b/11b5f88fd13540ae36950a5a0daa19bf-480p.mp4"
     header = {
         "User-Agent":
         "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
         # "upgrade-insecure-requests": "1"
     }
     data = curl_data(url,
                      value={"name": "ok"},
                      header=header,
                      open_virtual_ip=True)
     debug(data)
 def __handle(self, item):
     # 设置自动处理错误
     with self.auto_handle_exception(throw_exception_flag=True):
         tmp_dir = self.tmp_path + "{dirname}/".format(dirname=item['id'])
         if not os.path.exists(tmp_dir):
             os.mkdir(tmp_dir)
         filename = self.dir + "{filename}.epub".format(filename=item['id'])
         try:
             self.__unzip(filename, tmp_dir)
             self.__get_cover(tmp_dir, item)
         except Exception as e:
             debug("此电子书 ============================>  无cover图片,删除")
             # self.draw_cover(item)
             self.__delete(item)
             os.remove(self.dir + str(item['id']) + ".epub")
         shutil.rmtree(tmp_dir)
Example #29
0
def search_by_code():
    """
    根据 operator_code 查询数据
    :return:
    """
    data = redis.keys()
    debug(data)
    operator_code = request.values.get("operator_code", 0)
    if operator_code == 0:
        return Reply.error("failed")
    data = redis.get("op_{operator_code}".format(operator_code=operator_code))
    return Reply.error("empty") if data is None else Reply.success(
        [{
            "operator_code": operator_code,
            "config": data
        }])
Example #30
0
 def __download(self, item):
     # 检查文件是否已经存在
     if os.path.exists("static/spider/epub/{filename}.epub".format(
             filename=item['id'])):
         debug("电子书:{title} ========> 已经存在, 跳过".format(title=item['title']))
         return
     with self.auto_handle_exception(error_callback=self.__error_callback,
                                     throw_exception_flag=True,
                                     item=item):
         data = curl_data(self.url_prefix + item['source_url'])
         with open(
                 "static/spider/epub/{filename}.epub".format(
                     filename=item['id']), "wb") as f:
             f.write(data)
             f.close()
         debug("电子书:{title} =======> 下载成功".format(title=item['title']))