def run(self): channels = self.parseChannel() dbVPN = db.DbVPN() ops = db_ops.DbOps(dbVPN) for channel in channels: ops.inertImgChannel(channel) dbVPN.commit() for obj in channels: channel = obj['url'] for i in range(1, maxImgPage): page_url = obj['url'] if i != 1: page_url = "%s%s%s" % (page_url.replace('.html', '-'), i, ".html") print page_url count = self.update(page_url, ops, channel, i) dbVPN.commit() if count == 0: break
def run(self): print '解析列表页 channel:', self.t_channel dbVPN = db.DbVPN() ops = db_ops.DbOps(dbVPN) try: url = self.t_obj['url'] for i in range(1, 6): if i!=1: url = "%s%s%s"%(self.t_obj['url'].replace(".html", "-"),i,".html") print url count = self.update(url, ops) dbVPN.commit() if count == 0: break dbVPN.close() except Exception as e: print common.format_exception(e) dbVPN.commit() dbVPN.close()
def run(self): dbVPN = db.DbVPN() ops = db_ops.DbOps(dbVPN) ops.inertImgChannel(self.t_obj) dbVPN.commit() # 有分页 sortType = dateutil.y_m_d() # channel = self.t_obj['url'] # channel = urlparse(self.t_obj['baseurl']).netloc for name, url in img_channels.items(): obj = {} obj['name'] = name obj['channel'] = self.t_obj['url'] obj['updateTime'] = datetime.datetime.now() obj['fileDate'] = '' obj['baseurl'] = baseurl obj['showType'] = 3 # obj['url'] = url.replace("&", "") obj['url'] = urlparse(self.t_obj['baseurl']).path print obj['url'] # obj['pics'] = len(pics) obj['sortType'] = sortType pics = [] for i in range(1, 3): url = url + str(i) alist = self.fetchDataHead(url) print '解析', i, "页--", len(alist) for item in alist: pic = self.fetchImgItemData(item.get("href")) if pic == None: continue pics.append(pic) obj['picList'] = pics obj['pics'] = len(pics) ops.inertImgItems(obj) for picItem in obj['picList']: item = {} item['itemUrl'] = obj['url'] item['picUrl'] = picItem ops.inertImgItems_item(item) dbVPN.commit()
def run(self): channels = self.parseChannel() dbVPN = db.DbVPN() ops = db_ops.DbOps(dbVPN) for channel in channels: ops.inertImgChannel(channel) dbVPN.commit() for obj in channels: url = obj['url'] channel = url for i in range(1, maxImgPage): if i == 1: url = url + 'index.html' else: url = (url + 'index_%s.html') % (i) print url count = self.update(baseurl + url, ops, channel, i) dbVPN.commit() if count == 0: break
def run(self): try: dbVPN = db.DbVPN() ops = db_ops.DbOps(dbVPN) sortType = dateutil.y_m_d() # sortType = "2017-07-12" for i in range(0, 20000): # ret = ops.getTextChannelItems(self.t_item["url"], i) ret = ops.getTextChannelItemsById(i, sortType) if len(ret) == 0: print '写入完毕' break print '开始写入 channel :', self.t_item["url"], cloase = False for item in ret: # path = filePATH + str(item['id']) + ".txt" # if os.path.exists(path) == False: # output = open(path, 'w') # output.write(item['file']) # output.close() # print '写完文件:' + path # path = filePATHWeb + str(item['id']) + ".txt" # if os.path.exists(path) == False: # output = open(path, 'w') # output.write(html_parse.filter_tags(item['file'])) # output.close() # print '写完文件:' + path path = filePATHHtml + str(item['id']) + ".html" # if os.path.exists(path) == False: output = open(path, 'w') output.write( html_parse.txtToHtml( html_parse.filter_tags(item['file']))) output.close() print '写完文件:' + path print '写完页', i print 'channel :', self.t_item["url"], '同步完成 len=', len(ret) dbVPN.close() except Exception as e: print common.format_exception(e)
def videoParse(self, channel, url, userId): dataList = [] soup = self.fetchUrl(url) trs = soup.findAll("tr", {"class": "tr3 t_one"}) for tr in trs: h3 = tr.first("h3") if h3 != None: ahref = h3.first("a") if ahref != None and ahref.get("href").count("html_data") > 0: obj = {} mp4Url = self.parseDomVideo(ahref.get("href")) if mp4Url == None: print '没有mp4 文件:', ahref.get("href") continue obj['url'] = mp4Url obj['pic'] = "" obj['name'] = h3.text videourl = urlparse(obj['url']) obj['path'] = "aotu" + videourl.path obj['rate'] = 1.2 obj['updateTime'] = datetime.datetime.now() obj['userId'] = userId obj['baseUrl'] = baseurl obj['showType'] = 3 if mp4Url.count("m3u8") == 0 and mp4Url.count("mp4") == 0: obj['videoType'] = "webview" else: obj['videoType'] = "normal" print obj['videoType'], obj['name'], mp4Url, obj['pic'] dataList.append(obj) dbVPN = db.DbVPN() ops = db_ops.DbOps(dbVPN) for obj in dataList: ops.inertVideoUserItem(obj) print 'clsmik video --解析完毕 ; channel =', channel, '; len=', len( dataList), url dbVPN.commit() dbVPN.close()
def videoParse(self, channel, url,userId): dataList = [] soup = self.fetchUrl(url) tab = soup.first("div", {'class': 'list_video'}) if tab != None: lis = tab.findAll("li") for li in lis: ahref = li.first("a") if ahref != None: obj = {} mp4Url = self.parseDomVideo(ahref.get("href")) if mp4Url == None: print '没有mp4 文件:', ahref.get("href") continue obj['url'] = mp4Url obj['pic'] = li.first("img").get("src") obj['name'] = li.first("img").get("alt") videourl = urlparse(obj['url']) obj['path'] = "jjr"+videourl.path obj['rate'] = 1.2 obj['updateTime'] = datetime.datetime.now() obj['userId'] = userId obj['baseUrl'] = baseurl obj['showType'] = 3 if mp4Url.count("m3u8")==0 and mp4Url.count("mp4")==0: obj['videoType'] = "webview" else: obj['videoType'] = "normal" print obj['videoType'],obj['name'],mp4Url,obj['pic'] dataList.append(obj) dbVPN = db.DbVPN() ops = db_ops.DbOps(dbVPN) for obj in dataList: ops.inertVideoUserItem(obj) print 'clsmik video --解析完毕 ; channel =', channel, '; len=', len(dataList), url dbVPN.commit() dbVPN.close()
def run(self): dbVPN = db.DbVPN() ops = db_ops.DbOps(dbVPN) ops.inertTextChannel(self.t_obj) dbVPN.commit() print self.t_obj try: channel = self.t_obj['url'] for i in range(1, maxTextPage): url = self.t_obj['url'].replace(".html", "-") + str(i) + ".html" count = self.update(url, ops, channel) dbVPN.commit() if count == 0: break else: self.update(url, ops, channel) dbVPN.commit() dbVPN.close() except Exception as e: print common.format_exception(e) dbVPN.commit() dbVPN.close()
} def getLocal(ip, id): param = {'ip': ip} ret = httputil.getData(url, param, headers) obj = {} obj['id'] = id if ret['code'] == 0: data = ret.get('data', {}) obj['local'] = data.get('country') + '-' + data.get( 'city') + '-' + data.get('isp') print 'ip=', ip, ' ;local=', obj['local'] else: obj['local'] = '' return obj if __name__ == '__main__': dbVPN = db.DbVPN() ops = db_ops.DbOps(dbVPN) rows = ops.getAllwannaIplocalnull() print 'need update len=', len(rows) objs = [] for row in rows: item = getLocal(row['ip'], row['id']) objs.append(item) time.sleep(1) ops.updateWannaIpLocal(objs) #getLocal('223.72.96.151', 1)
def getImgs(): dbVPN = db.DbVPN() ops = db_ops.DbOps(dbVPN) items = ops.getImgItems_itemId() dbVPN.close() return items
def updateImgUrl(self, itemUrl): dbVPN = db.DbVPN() ops = db_ops.DbOps(dbVPN) ops.updateImgItemsFileUrl(itemUrl, self.imgCdnUrl, self.imgUrl) dbVPN.commit() dbVPN.close()