Ejemplo n.º 1
0
    def __init__(self):
        print "Do spider ablum_youku_dota."
        self.program = copy.deepcopy(BASE_CONTENT["program"])
        self.program_sub = PROGRAM_SUB
        self.seedList = ['http://i.youku.com/i/UMzIzOTQwMTg0/videos'#舞儿
                        ,'http://i.youku.com/i/UMzIzOTQwMTg0/videos'#舞儿
                        ,'http://i.youku.com/i/UMTEwNDcxNDAw/videos'#离子DOTA
                        ,'http://i.youku.com/u/UNjM3ODczNjA=/videos'#傻黑
                        ,'http://i.youku.com/i/UMzE2OTY2NjUy/videos'#伍声2009
                        ,'http://i.youku.com/u/UMjU3MzI2NDMy/videos'#小满
                        ,'http://i.youku.com/u/UMjg4MjQ1MzQ0/videos'#满楼水平
                        ,'http://i.youku.com/i/UNjAzOTE5NTI=/videos'#nada
                        ,'http://i.youku.com/i/UMzYwNTg1NjI0/videos'#牛蛙
                        ,'http://i.youku.com/i/UMzcxMDA5OTI4/videos'#凯文
                        ,'http://i.youku.com/i/UNTA3ODY5NzUy/videos'#小乖
                        ,'http://i.youku.com/u/UMzcyMTMxODQ4/videos'#Pis
                        ,'http://i.youku.com/i/UMzE3MTg2MjQ0/videos'#梅西
                        ,'http://i.youku.com/i/UNDI1NTMxMjMy/videos'#情书
                        ,'http://i.youku.com/i/UMTk4ODk3NzAw/videos'#演员
                        ,'http://i.youku.com/i/UMzAxOTg1MjU2/videos'#丸子
                        ,'http://i.youku.com/i/UMTQxMzc0MzQ2MA==/videos'#朴一生
                        ,'http://i.youku.com/u/UMzM2ODMxMDIw/videos'#夜流来袭
                        ,'http://i.youku.com/i/UMzU4NjU5ODQ4/videos'#Zero
                        ,'http://i.youku.com/i/UMzA0MDY4OTE2/videos'#Music咖啡
                        ,'http://i.youku.com/i/UMTcwMDM5MjM0NA==/videos'#Colour
                        ,'http://i.youku.com/i/UNjA0Mzc0NjY4/videos'#小K

        ]
        self.dataDir = '.' + os.path.sep + 'data'
        self.today = time.strftime('%Y%m%d', time.localtime())
        self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "ablum_youku_dota_" + self.today + ".txt")
Ejemplo n.º 2
0
 def __init__(self):
     print "Do spider mgtv_variety_1."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedList = [
         "http://www.mgtv.com/v/1/294178/f/3310829.html",
         'http://www.mgtv.com/v/1/294178/f/3310829.html'  #爱豆万万碎
         ,
         'http://www.mgtv.com/v/1/102607/f/3303906.html'  #八卦鉴定事务所
         ,
         'http://www.mgtv.com/v/1/292503/f/3309795.html'  #爱笑麻瓜秀
         ,
         'http://www.mgtv.com/v/6/292376/f/3309069.html'  #香蕉打卡
         ,
         'http://www.mgtv.com/v/1/291580/f/3308484.html'  #综艺哈哈秀
         ,
         'http://www.mgtv.com/v/1/294293/f/3304436.html'  #大本营的秘密花园
         ,
         'http://www.mgtv.com/v/1/110061/f/3305216.html'  #OMG玩美咖
         ,
         'http://www.mgtv.com/v/1/292521/f/3304855.html'  #超人妈妈带娃记
         ,
         'http://www.mgtv.com/v/1/294266/f/3304312.html'  #迷妹大本营
         ,
         'http://www.mgtv.com/v/1/291976/f/3285994.html'  #明星大侦探
         ,
         'http://www.mgtv.com/v/1/291840/f/3181892.html'  #橘子明星专访
         ,
         'http://www.mgtv.com/v/1/292098/f/3104557.html'  #敢ZUO敢为女声秀
     ]
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "mgtv_variety_1_" + self.today +
                                         ".txt")
Ejemplo n.º 3
0
    def __init__(self):
        self.base = [
            {
                "http://www.bilibili.com/video/tv-drama-1.html#!page=1": "34"
            }  #完结剧集
            ,
            {
                "http://www.bilibili.com/video/tv-drama-1.html#!page=1": "34"
            }  #完结剧集
            ,
            {
                "http://www.bilibili.com/video/tv-drama-1.html#!page=2": "34"
            }  # 完结剧集
            ,
            {
                "http://www.bilibili.com/video/tv-drama-1.html#!page=3": "34"
            }  # 完结剧集
            ,
            {
                "http://www.bilibili.com/video/tv-drama-1.html#!page=4": "34"
            }  # 完结剧集
            ,
            {
                "http://www.bilibili.com/video/tv-drama-1.html#!page=5": "34"
            }  # 完结剧集
        ]

        self.seedList = []
        self.program = copy.deepcopy(BASE_CONTENT["program"])
        self.program_sub = PROGRAM_SUB
        self.dataDir = '.' + os.path.sep + 'data'
        self.today = time.strftime('%Y%m%d', time.localtime())
        self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                            "all_bilibili_tv_" + self.today +
                                            ".txt")
Ejemplo n.º 4
0
 def __init__(self):
     print "Do spider all_mgtv_movie."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = [
         "http://list.mgtv.com/3/--------2835073-5-1-0--.html",
         "http://list.mgtv.com/3/--------2835073-5-1-0--.html",
         "http://list.mgtv.com/3/--------2835073-5-2-0--.html",
         "http://list.mgtv.com/3/--------2835073-5-3-0--.html",
         "http://list.mgtv.com/3/--------2835073-5-4-0--.html",
         "http://list.mgtv.com/3/--------2835073-5-5-0--.html",
         "http://list.mgtv.com/3/--------2848093-5-1-0--.html"  #网络电影
         ,
         "http://list.mgtv.com/3/--------2848093-5-2-0--.html",
         "http://list.mgtv.com/3/--------2835073-5-3-0--.html",
         "http://list.mgtv.com/3/--------2835073-5-4-0--.html",
         "http://list.mgtv.com/3/--------2835073-5-5-0--.html"
     ]
     self.seedList = []
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.prgdata = {}
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "all_mgtv_movie_" + self.today +
                                         ".txt")
Ejemplo n.º 5
0
 def __init__(self):
     print "Do spider all_mgtv_vipmovie."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = [
         "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=1&pc=60",
         "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=1&pc=60",
         "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=2&pc=60",
         "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=3&pc=60",
         "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=4&pc=60",
         "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=5&pc=60"
         # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=6&pc=60"
         # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=7&pc=60"
         # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=8&pc=60"
         # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=9&pc=60"
         # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=10&pc=60"
         # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=11&pc=60"
         # , "http://s5.hunantv.com/v5/list/pc?ic=1&ty=3&chargeInfo=b2&if=0&sort=2&pn=12&pc=60"
     ]
     self.seedList = []
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.seqNocnt = 1
     self.prgdata = {}
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "all_mgtv_vipmovie_" + self.today +
                                         ".txt")
Ejemplo n.º 6
0
 def __init__(self):
     print "Do spider ablum_iqiyi_variety_1."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = copy.deepcopy(PROGRAM_SUB)
     self.seedList = [
         "http://www.iqiyi.com/a_19rrguentx.html",
         "http://www.iqiyi.com/a_19rrguentx.html"  #XFUN吃货俱乐部
         ,
         'http://www.iqiyi.com/lib/m_208247014.html?src=search'  #艾伦秀第13季
         "http://www.iqiyi.com/a_19rrgjdzt1.html",  #春色无边搞笑盘点
         "http://www.iqiyi.com/a_19rrhal04x.html#vfrm=2-3-0-1",  #十三亿分贝之一派方言
         "http://www.iqiyi.com/a_19rrhanf9p.html",  #咱们穿越吧第2季
         "http://www.iqiyi.com/a_19rrhao8al.html"  #星厨驾到第三季
         ,
         'http://www.iqiyi.com/a_19rrhanya1.html#vfrm=2-3-0-1'  #说出我的世界
         ,
         'http://www.iqiyi.com/a_19rrhasbel.html#vfrm=2-3-0-1'  #加油向未来
         ,
         'http://www.iqiyi.com/a_19rrgu9s19.html#vfrm=2-3-0-1'  #一呼柏应
         ,
         'http://www.iqiyi.com/a_19rrgi7art.html#vfrm=2-3-0-1'  #第一书记(2016)
         ,
         'http://www.iqiyi.com/a_19rrgucd8p.html#vfrm=2-3-0-1'  #幸福在哪里
         ,
         'http://www.iqiyi.com/a_19rrgjahfl.html#vfrm=2-3-0-1'  #我是大医生(2016)
         ,
         'http://www.iqiyi.com/a_19rrh9rtx1.html'  #今夜百乐门
     ]
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "ablum_iqiyi_variety_1_" +
                                         self.today + ".txt")
Ejemplo n.º 7
0
 def __init__(self):
     print "Do spider all_iqiyi_children."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = [
         "http://list.iqiyi.com/www/4/---304----------4-1-1-iqiyi--.html",
         "http://list.iqiyi.com/www/4/---304----------4-1-1-iqiyi--.html"  #0-3
         ,
         "http://list.iqiyi.com/www/4/---304----------4-2-1-iqiyi--.html"  #0-3
         ,
         "http://list.iqiyi.com/www/4/---1283----------4-1-1-iqiyi--.html"  #4-6
         ,
         "http://list.iqiyi.com/www/4/---1283----------4-2-1-iqiyi--.html"  # 4-6
         ,
         "http://list.iqiyi.com/www/4/---305----------4-1-1-iqiyi--.html"  #7-13
         ,
         "http://list.iqiyi.com/www/4/---305----------4-2-1-iqiyi--.html"  # 7-13
         ,
         "http://list.iqiyi.com/www/4/---306----------4-1-1-iqiyi--.html"  # 14-17
         ,
         "http://list.iqiyi.com/www/4/---306----------4-2-1-iqiyi--.html"  # 14-17
     ]
     self.seedList = []
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "all_iqiyi_children_" +
                                         self.today + ".txt")
Ejemplo n.º 8
0
 def __init__(self):
     print "Do spider all_le_children."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = [
         "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511001&md=&o=20&p=1&s=1"  #0-6
         ,
         "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511001&md=&o=20&p=1&s=1",
         "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511001&md=&o=20&p=2&s=1",
         "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511001&md=&o=20&p=3&s=1",
         "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511002&md=&o=20&p=1&s=1"  # 6-12
         ,
         "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511002&md=&o=20&p=2&s=1"  # 6-12
         ,
         "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511002&md=&o=20&p=3&s=1"  # 6-12
         ,
         "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511003&md=&o=20&p=1&s=1"  #12-18
         ,
         "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511003&md=&o=20&p=2&s=1"  # 12-18
         ,
         "http://list.le.com/apin/chandata.json?a=50041&c=5&d=1&f=511003&md=&o=20&p=3&s=1"  # 12-18
     ]
     self.seedList = []
     self.images = {}
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "all_le_children_" + self.today +
                                         ".txt")
Ejemplo n.º 9
0
 def __init__(self):
     print "Do spider all_le_movie."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = ["http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=1&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=1&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=2&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=3&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=4&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=5&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=6&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=7&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=8&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=9&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=10&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=11&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=12&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=13&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=14&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=15&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=16&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                      # , "http://api.vip.le.com/search/interface?from=pc_03&sales_area=cn&user_setting_country=cn&cg=1&ph=420001&pt=141001&dt=1&src=1&ispay=1&stype=1&lang=zh_cn&stt=1&ps=60&pn=17&lh=0&vt=180001&sc=&ar=&yr=&or=1&eid=8372802130253217715"
                     ]
     self.seedList = []
     self.images = {}
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.seqNocnt = 1
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_le_vipmovie_" + self.today + ".txt")
Ejemplo n.º 10
0
 def __init__(self):
     print "Do spider ablum_qq_wdsj."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedList = [
         'http://v.qq.com/vplus/xiaoben520/videos'  #小本
         ,
         'http://v.qq.com/vplus/xiaoben520/videos'  #小本
         ,
         'http://v.qq.com/vplus/chengzi233/videos'  #大橙子
         ,
         'http://v.qq.com/vplus/zhenshiqiguaile/videos'  #真是奇怪了
         ,
         'http://v.qq.com/vplus/maxkim0314/videos'  #MaxKim
         ,
         'http://v.qq.com/vplus/kamu/videos'  #卡慕
         ,
         'http://v.qq.com/vplus/yanhuang/videos'  #炎黄
         ,
         'http://v.qq.com/vplus/kaiqi/videos'  #凯麒
         ,
         'http://v.qq.com/vplus/xiaobaiMC/videos'  #T-RO小白
         ,
         'http://v.qq.com/vplus/moon/videos'  #明月庄主
         ,
         'http://v.qq.com/vplus/anyijun/videos'  #安逸菌
         ,
         'http://v.qq.com/vplus/xiaoxianjun/videos'  #小贤菌
         ,
         'http://v.qq.com/vplus/dahaitv/videos'  #大海
         ,
         'http://v.qq.com/vplus/youranxiaotian/videos'  #悠然小天
         ,
         'http://v.qq.com/vplus/pinkfish0319/videos'  #粉鱼
         ,
         'http://v.qq.com/vplus/tianluo/videos'  #甜萝酱
         ,
         'http://v.qq.com/vplus/xiaoA0v0/videos'  #小A
         ,
         'http://v.qq.com/vplus/biantao233/videos'  #扁桃
         ,
         'http://v.qq.com/vplus/fou4242/videos'  #四二四儿
         ,
         'http://v.qq.com/vplus/fuhao/videos'  #负豪
         ,
         'http://v.qq.com/vplus/qingyou/videos'  #彼岸清幽
         ,
         'http://v.qq.com/vplus/imaimu/videos'  #叆霂
         ,
         'http://v.qq.com/vplus/feixiongMC/videos'  #飞熊TV我的世界
         ,
         'http://v.qq.com/vplus/pika233/videos'  #皮卡
         ,
         'http://v.qq.com/vplus/xiaoyujun/videos'  #小鱼
     ]
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "ablum_qq_wdsj_" + self.today +
                                         ".txt")
Ejemplo n.º 11
0
    def __init__(self):
        self.base = [
            {
                "http://www.bilibili.com/video/movie_west_1.html#!page=1":
                "145"
            }  #欧美
            ,
            {
                "http://www.bilibili.com/video/movie_west_1.html#!page=1":
                "145"
            }  #欧美
            ,
            {
                "http://www.bilibili.com/video/movie_japan_1.html!page=1":
                "146"
            }  #日本
            ,
            {
                "http://www.bilibili.com/video/movie_chinese_1.html!page=1":
                "147"
            }  #国产
            ,
            {
                "http://www.bilibili.com/video/movie-movie-1.html!page=1": "83"
            }  #其它
        ]

        self.seedList = []
        self.program = copy.deepcopy(BASE_CONTENT["program"])
        self.program_sub = PROGRAM_SUB
        self.dataDir = '.' + os.path.sep + 'data'
        self.today = time.strftime('%Y%m%d', time.localtime())
        self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                            "all_bilibili_movie_" +
                                            self.today + ".txt")
Ejemplo n.º 12
0
 def __init__(self):
     self.base = [
         "http://www.acfun.tv/v/list84/index.htm#page=1"  #主机单机
         ,
         "http://www.acfun.tv/v/list84/index.htm#page=1"  #主机单机
         ,
         "http://www.acfun.tv/v/list83/index.htm#page=1"  #游戏集锦
         ,
         "http://www.acfun.tv/v/list145/index.htm#page=1"  #电子竞技
         ,
         "http://www.acfun.tv/v/list85/index.htm#page=1"  #英雄联盟
         ,
         "http://www.acfun.tv/v/list170/index.htm#page=1"  #守望先锋
         ,
         "http://www.acfun.tv/v/list165/index.htm#page=1"  #桌游卡牌
         ,
         "http://www.acfun.tv/v/list72/index.htm#page=1"  #Mugen
         #,"http://www.acfun.tv/v/list175/index.htm#page=1" #游戏直播
     ]
     self.seedList = []
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "ablum_acfun_game_" + self.today +
                                         ".txt")
Ejemplo n.º 13
0
 def __init__(self):
     print "Do spider mgtv_short_1."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedList = [
         "http://www.mgtv.com/v/6/101613/f/1094144.html",
         'http://www.mgtv.com/v/6/101613/f/1094144.html'  #妹子说热剧
         ,
         'http://www.mgtv.com/v/6/158563/f/1777920.html'  #小咖秀
         ,
         'http://www.mgtv.com/v/6/292430/f/3115009.html'  #鲜炸社
         ,
         'http://www.mgtv.com/v/6/168030/f/1863425.html'  #看够了没
         ,
         'http://www.mgtv.com/v/6/291965/f/3144194.html'  #大咖头条
         ,
         'http://www.mgtv.com/v/6/291842/f/3012181.html'  #混剪侠
         ,
         'http://www.mgtv.com/v/6/293477/f/3207951.html'  #饭爱豆
         ,
         'http://www.mgtv.com/v/6/158354/f/2949121.html'  #感觉自己萌萌哒
         ,
         'http://www.mgtv.com/v/6/292435/f/3172352.html'  #天天打娱
         ,
         'http://www.mgtv.com/v/6/166144/f/3268355.html'  #萌眼看重口
         ,
         'http://www.mgtv.com/v/6/168584/f/2931968.html'  #哔哔娱乐秀
         ,
         'http://www.mgtv.com/v/6/167993/f/2973185.html'  #马栏山牛人馆
         ,
         'http://www.mgtv.com/v/6/294063/f/3276032.html'  #饭团私货朋友圈
         ,
         'http://www.mgtv.com/v/6/293394/f/3267841.html'  #红人爱自拍
         ,
         'http://www.mgtv.com/v/6/167785/f/3143680.html'  #马栏山剪刀手
         ,
         'http://www.mgtv.com/v/6/150882/f/2928653.html'  #问题大了
         ,
         'http://www.mgtv.com/v/6/293268/f/3300386.html'  #拐猫蜜
         ,
         'http://www.mgtv.com/v/8/157416/f/1740557.html'  #女神TV
         ,
         'http://www.mgtv.com/v/6/155923/f/1755650.html'  #萝莉侃剧
         ,
         'http://www.mgtv.com/v/6/292983/f/3300867.html'  #笑死不偿命
         ,
         'http://www.mgtv.com/v/6/291094/f/3109397.html'  #暴走看啥片儿第三季
         ,
         'http://www.mgtv.com/v/6/159440/f/2954242.html'  #二更视频
         ,
         'http://www.mgtv.com/v/6/290272/f/3290851.html'  #爱豆bibi社
         ,
         'http://www.mgtv.com/v/6/294171/f/3285505.html'  #毒舌烧脑研究所
     ]
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "mgtv_short_1_" + self.today +
                                         ".txt")
Ejemplo n.º 14
0
 def __init__(self):
     print "Do spider qq_tv_1."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedList = ["http://film.qq.com/cover/f/fdug0j3etx4ioja.html"]
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "qq_tv_1_" + self.today + ".txt")
Ejemplo n.º 15
0
 def __init__(self):
     print "Do spider all_mfsp_tv."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = []
     self.seedList = []
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_mfsp_tv_" + self.today + ".txt")
Ejemplo n.º 16
0
    def __init__(self):
        print "Do spider a_tmp_seed_spider."

        self.seedList = []
        self.reSeedList = []
        self.dataDir = '.' + os.path.sep + 'data'
        self.today = time.strftime('%Y%m%d', time.localtime())
        self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                            "seed" + self.today + ".txt")
Ejemplo n.º 17
0
 def __init__(self):
     print "Do spider ablum_iqiyi_cartoon_2."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedList = ["http://www.iqiyi.com/a_19rrhb3xvl.html#vfrm=2-3-0-1" #航海王
     ]
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "ablum_iqiyi_cartoon_2_" + self.today + ".txt")
Ejemplo n.º 18
0
 def __init__(self):
     print "Do spider beauty_mmwu_1."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = [ 'http://www.mmwu.tv/vod/opt/new'
     ]
     self.seedList = []
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "beauty_mmwu_1_" + self.today + ".txt")
Ejemplo n.º 19
0
 def __init__(self):
     print "Do spider le_short_1."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedList = ["http://www.le.com/ptv/vplay/24794432.html"]
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "le_short_1_" + self.today +
                                         ".txt")
Ejemplo n.º 20
0
 def __init__(self):
     print "Do spider weixindianying."
     self.data = {}
     self.seedBase = ["http://www.ppypp.com/dy/index.html"]
     self.seedList = []
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "weixindianying" + self.today +
                                         ".txt")
Ejemplo n.º 21
0
 def __init__(self):
     print "Do spider all_ppypp_movie."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = ["http://www.ppypp.com/dy/index.html"]
     self.seedList = []
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "all_ppypp_movie_" + self.today +
                                         ".txt")
Ejemplo n.º 22
0
 def __init__(self):
     print "Do spider youku_short_2."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedList = [
         "http://i.youku.com/u/UMzMzODQ1Njg5Ng",
     ]
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "youku_short_2_" + self.today +
                                         ".txt")
Ejemplo n.º 23
0
 def __init__(self):
     print "Do spider beauty_youku_1."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = [ 'http://www.soku.com/search_video/q_%E7%BE%8E%E5%A5%B3%E5%86%99%E7%9C%9F_limitdate_0?site=14&_lg=10&orderby=2&spm=0.0.0.0.nfPeVF'#美女写真
                       ,'http://www.soku.com/search_video/q_%E7%BE%8E%E5%A5%B3%E5%86%99%E7%9C%9F_limitdate_0?site=14&_lg=10&orderby=2&spm=0.0.0.0.nfPeVF'#美女写真
                      ,'http://www.soku.com/search_video/q_%E7%BE%8E%E5%A5%B3%E7%83%AD%E8%88%9E_limitdate_0?site=14&_lg=10&orderby=2&spm=0.0.0.0.NziPkR'#美女热舞
     ]
     self.seedList = []
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "beauty_youku_1_" + self.today + ".txt")
Ejemplo n.º 24
0
 def __init__(self):
     print "Do spider all_sohu_cartoon."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = ["http://so.tv.sohu.com/list_p1115_p2_p3_p4_p5_p66_p73_p8_p9_p101_p11_p12_p13.html" #18-
                    ,"http://so.tv.sohu.com/list_p1115_p2_p3_p4_p5_p66_p73_p8_p9_p102_p11_p12_p13.html" #18-
                    ,"http://so.tv.sohu.com/list_p1115_p2_p3_p4_p5_p6_p73_p8_p9_p101_p11_p12_p13.html"
     ]
     self.seedList = []
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_sohu_cartoon_" + self.today + ".txt")
Ejemplo n.º 25
0
 def __init__(self):
     print "Do spider all_youku_cartoon."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = ["http://list.youku.com/category/show/c_100_s_6_d_1_ag_5.html" #16~
                      , "http://list.youku.com/category/show/c_100_s_6_d_1_ag_5.html"
                      , "http://list.youku.com/category/show/c_100_s_6_d_1_p_1.html" # new update
     ]
     self.seedList = []
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_youku_cartoon2_" + self.today + ".txt")
Ejemplo n.º 26
0
 def __init__(self):
     print "Do spider all_iqiyi_viptv."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = ["http://www.iqiyi.com/dianshiju/VIP.html"
                      , "http://www.iqiyi.com/dianshiju/VIP.html"
                     ]
     self.seedList = []
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.seqNocnt =1
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "all_iqiyi_viptv_" + self.today + ".txt")
Ejemplo n.º 27
0
 def __init__(self):
     self.base=[
             {"http://www.bilibili.com/video/ent-variety-1.html#!page=1": "71"}  # 综艺
             ,{"http://www.bilibili.com/video/ent-variety-1.html#!page=1": "71"}  # 综艺
             ,{"http://www.bilibili.com/video/ent-circle-1.html!page=1": "137"}  # 明星
             ,{"http://www.bilibili.com/video/ent-circle-1.html!page=1": "131"}  # Korea相关
         ]
     self.seedList = []
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "amuse_bilibili_1_" + self.today + ".txt")
Ejemplo n.º 28
0
 def __init__(self):
     print "Do spider ablum_youku_cartoon_1."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedBase = []
     self.seedList = [
         "http://www.youku.com/show_page/id_z31c53954e34c11e5a2a2.html"  #双星之阴阳师
     ]
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "ablum_youku_cartoon_1_" +
                                         self.today + ".txt")
Ejemplo n.º 29
0
 def __init__(self):
     print "Do spider youku_tv_1."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = PROGRAM_SUB
     self.seedList = ["http://list.youku.com/show/id_z1800388c336c11e6b432.html"
                     ,"http://list.youku.com/show/id_zfb0e65ccdb6a11e58bfb.html"
                     ,"http://list.youku.com/show/id_z42b9ab741ec511e5b522.html"
                     ,"http://list.youku.com/show/id_z45f85ed6ba6d11e5b522.html"
                     ,"http://list.youku.com/show/id_zfacfcb0cec2511e583e8.html"
     ]
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep + "youku_tv_1_" + self.today + ".txt")
Ejemplo n.º 30
0
 def __init__(self):
     print "Do spider iqiyi_short_3."
     self.program = copy.deepcopy(BASE_CONTENT["program"])
     self.program_sub = copy.deepcopy(PROGRAM_SUB)
     self.seedList = [  #"http://www.iqiyi.com/lib/m_209866614.html",#2、晚安,朋友圈 爱奇艺
         "http://www.iqiyi.com/playlist295748402.html",  #6、神剧亮了 爱奇艺###########
         "http://www.iqiyi.com/playlist396149102.html",  #17、笑不能停 爱奇艺@@@@@@@@@
     ]
     self.dataDir = '.' + os.path.sep + 'data'
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.dataFile = spiderTool.openFile(self.dataDir + os.path.sep +
                                         "iqiyi_short_3_" + self.today +
                                         ".txt")