Esempio n. 1
0
 def run(self):
     channels = self.parseChannel()
     dbVPN = db.DbVPN()
     ops = db_ops.DbOps(dbVPN)
     for channel in channels:
         ops.inertImgChannel(channel)
     dbVPN.commit()
     for obj in channels:
         channel = obj['url']
         for i in range(1, maxImgPage):
             page_url = obj['url']
             if i != 1:
                 page_url = "%s%s%s" % (page_url.replace('.html',
                                                         '-'), i, ".html")
             print page_url
             count = self.update(page_url, ops, channel, i)
             dbVPN.commit()
             if count == 0:
                 break
Esempio n. 2
0
 def run(self):
     print '解析列表页 channel:', self.t_channel
     dbVPN = db.DbVPN()
     ops = db_ops.DbOps(dbVPN)
     try:
         url = self.t_obj['url']
         for i in range(1, 6):
             if i!=1:
                 url = "%s%s%s"%(self.t_obj['url'].replace(".html", "-"),i,".html")
             print url
             count = self.update(url, ops)
             dbVPN.commit()
             if count == 0:
                 break
         dbVPN.close()
     except Exception as e:
         print common.format_exception(e)
         dbVPN.commit()
         dbVPN.close()
Esempio n. 3
0
    def run(self):
        dbVPN = db.DbVPN()
        ops = db_ops.DbOps(dbVPN)
        ops.inertImgChannel(self.t_obj)
        dbVPN.commit()
        # 有分页
        sortType = dateutil.y_m_d()
        #         channel = self.t_obj['url']
        #         channel = urlparse(self.t_obj['baseurl']).netloc
        for name, url in img_channels.items():
            obj = {}
            obj['name'] = name
            obj['channel'] = self.t_obj['url']
            obj['updateTime'] = datetime.datetime.now()
            obj['fileDate'] = ''
            obj['baseurl'] = baseurl
            obj['showType'] = 3
            #             obj['url'] = url.replace("&", "")
            obj['url'] = urlparse(self.t_obj['baseurl']).path
            print obj['url']
            #             obj['pics'] = len(pics)
            obj['sortType'] = sortType
            pics = []
            for i in range(1, 3):
                url = url + str(i)

                alist = self.fetchDataHead(url)
                print '解析', i, "页--", len(alist)
                for item in alist:
                    pic = self.fetchImgItemData(item.get("href"))
                    if pic == None:
                        continue
                    pics.append(pic)
            obj['picList'] = pics
            obj['pics'] = len(pics)
            ops.inertImgItems(obj)
            for picItem in obj['picList']:
                item = {}
                item['itemUrl'] = obj['url']
                item['picUrl'] = picItem
                ops.inertImgItems_item(item)
            dbVPN.commit()
Esempio n. 4
0
 def run(self):
     channels = self.parseChannel()
     dbVPN = db.DbVPN()
     ops = db_ops.DbOps(dbVPN)
     for channel in channels:
         ops.inertImgChannel(channel)
     dbVPN.commit()
     for obj in channels:
         url = obj['url']
         channel = url
         for i in range(1, maxImgPage):
             if i == 1:
                 url = url + 'index.html'
             else:
                 url = (url + 'index_%s.html') % (i)
             print url
             count = self.update(baseurl + url, ops, channel, i)
             dbVPN.commit()
             if count == 0:
                 break
Esempio n. 5
0
    def run(self):

        try:
            dbVPN = db.DbVPN()
            ops = db_ops.DbOps(dbVPN)
            sortType = dateutil.y_m_d()
            #             sortType = "2017-07-12"
            for i in range(0, 20000):
                #                 ret = ops.getTextChannelItems(self.t_item["url"], i)
                ret = ops.getTextChannelItemsById(i, sortType)
                if len(ret) == 0:
                    print '写入完毕'
                    break
                print '开始写入 channel :', self.t_item["url"],
                cloase = False
                for item in ret:
                    #                     path = filePATH + str(item['id']) + ".txt"
                    #                     if os.path.exists(path) == False:
                    #                         output = open(path, 'w')
                    #                         output.write(item['file'])
                    #                         output.close()
                    #                         print '写完文件:' + path
                    #                     path = filePATHWeb + str(item['id']) + ".txt"
                    #                     if os.path.exists(path) == False:
                    #                         output = open(path, 'w')
                    #                         output.write(html_parse.filter_tags(item['file']))
                    #                         output.close()
                    #                         print '写完文件:' + path
                    path = filePATHHtml + str(item['id']) + ".html"
                    #                     if os.path.exists(path) == False:
                    output = open(path, 'w')
                    output.write(
                        html_parse.txtToHtml(
                            html_parse.filter_tags(item['file'])))
                    output.close()
                    print '写完文件:' + path
                print '写完页', i
            print 'channel :', self.t_item["url"], '同步完成 len=', len(ret)
            dbVPN.close()
        except Exception as e:
            print common.format_exception(e)
Esempio n. 6
0
    def videoParse(self, channel, url, userId):
        dataList = []
        soup = self.fetchUrl(url)
        trs = soup.findAll("tr", {"class": "tr3 t_one"})
        for tr in trs:
            h3 = tr.first("h3")
            if h3 != None:
                ahref = h3.first("a")
                if ahref != None and ahref.get("href").count("html_data") > 0:
                    obj = {}
                    mp4Url = self.parseDomVideo(ahref.get("href"))
                    if mp4Url == None:
                        print '没有mp4 文件:', ahref.get("href")
                        continue
                    obj['url'] = mp4Url
                    obj['pic'] = ""
                    obj['name'] = h3.text

                    videourl = urlparse(obj['url'])
                    obj['path'] = "aotu" + videourl.path
                    obj['rate'] = 1.2
                    obj['updateTime'] = datetime.datetime.now()
                    obj['userId'] = userId
                    obj['baseUrl'] = baseurl
                    obj['showType'] = 3
                    if mp4Url.count("m3u8") == 0 and mp4Url.count("mp4") == 0:
                        obj['videoType'] = "webview"
                    else:
                        obj['videoType'] = "normal"
                    print obj['videoType'], obj['name'], mp4Url, obj['pic']
                    dataList.append(obj)
        dbVPN = db.DbVPN()
        ops = db_ops.DbOps(dbVPN)
        for obj in dataList:
            ops.inertVideoUserItem(obj)

        print 'clsmik video --解析完毕 ; channel =', channel, '; len=', len(
            dataList), url
        dbVPN.commit()
        dbVPN.close()
Esempio n. 7
0
    def videoParse(self, channel, url,userId):
        dataList = []
        soup = self.fetchUrl(url)
        tab = soup.first("div", {'class': 'list_video'})
        if tab != None:
            lis = tab.findAll("li")
            for li in lis:
                ahref = li.first("a")
                if ahref != None:
                    obj = {}
                    mp4Url = self.parseDomVideo(ahref.get("href"))
                    if mp4Url == None:
                        print '没有mp4 文件:', ahref.get("href")
                        continue
                    obj['url'] = mp4Url
                    obj['pic'] = li.first("img").get("src")
                    obj['name'] = li.first("img").get("alt")
        
                    videourl = urlparse(obj['url'])
                    obj['path'] = "jjr"+videourl.path
                    obj['rate'] = 1.2
                    obj['updateTime'] = datetime.datetime.now()
                    obj['userId'] = userId
                    obj['baseUrl'] = baseurl
                    obj['showType'] = 3
                    if mp4Url.count("m3u8")==0 and mp4Url.count("mp4")==0:
                        obj['videoType'] = "webview"
                    else:
                        obj['videoType'] = "normal"
                    print obj['videoType'],obj['name'],mp4Url,obj['pic']
                    dataList.append(obj)
        dbVPN = db.DbVPN()
        ops = db_ops.DbOps(dbVPN)
        for obj in dataList:
            ops.inertVideoUserItem(obj)

        print 'clsmik video --解析完毕 ; channel =', channel, '; len=', len(dataList), url
        dbVPN.commit()
        dbVPN.close()
Esempio n. 8
0
    def run(self):
        dbVPN = db.DbVPN()
        ops = db_ops.DbOps(dbVPN)
        ops.inertTextChannel(self.t_obj)
        dbVPN.commit()
        print self.t_obj
        try:
            channel = self.t_obj['url']
            for i in range(1, maxTextPage):
                url = self.t_obj['url'].replace(".html",
                                                "-") + str(i) + ".html"
                count = self.update(url, ops, channel)
                dbVPN.commit()
                if count == 0:
                    break
            else:
                self.update(url, ops, channel)
                dbVPN.commit()

            dbVPN.close()
        except Exception as e:
            print common.format_exception(e)
            dbVPN.commit()
            dbVPN.close()
Esempio n. 9
0
}


def getLocal(ip, id):
    param = {'ip': ip}
    ret = httputil.getData(url, param, headers)
    obj = {}
    obj['id'] = id
    if ret['code'] == 0:
        data = ret.get('data', {})
        obj['local'] = data.get('country') + '-' + data.get(
            'city') + '-' + data.get('isp')
        print 'ip=', ip, ' ;local=', obj['local']
    else:
        obj['local'] = ''
    return obj


if __name__ == '__main__':
    dbVPN = db.DbVPN()
    ops = db_ops.DbOps(dbVPN)
    rows = ops.getAllwannaIplocalnull()
    print 'need update len=', len(rows)
    objs = []
    for row in rows:
        item = getLocal(row['ip'], row['id'])
        objs.append(item)
        time.sleep(1)
    ops.updateWannaIpLocal(objs)
    #getLocal('223.72.96.151', 1)
Esempio n. 10
0
def getImgs():
    dbVPN = db.DbVPN()
    ops = db_ops.DbOps(dbVPN)
    items = ops.getImgItems_itemId()
    dbVPN.close()
    return items
Esempio n. 11
0
 def updateImgUrl(self, itemUrl):
     dbVPN = db.DbVPN()
     ops = db_ops.DbOps(dbVPN)
     ops.updateImgItemsFileUrl(itemUrl, self.imgCdnUrl, self.imgUrl)
     dbVPN.commit()
     dbVPN.close()