def getDetail(): getObj = videoDemo() dbObj = Dbobj('redio', 're_') arr = [] # if type == 0: # _sort = pymongo.DESCENDING # else: # _sort = pymongo.ASCENDING curTableObj = dbObj.getTbname('redios') base = 'https://www.qq.com/' while True: data = curTableObj.find({ "status": 0 }).sort('_id', pymongo.DESCENDING).limit(10) if data is None: sleep(10) continue #g = GoogleTranslator() for v in data: _rel = v.get('rel', 0) if _rel == 0: _rel = v.get('url') curUrl = base + _rel.strip('/') #curTableObj.run(curUrl,i['id']) #curUrl='https://www.qq.com/video13860839/502_-_horny_asian_couple_had_sex_on_bed' tags = getObj.vdetail(curUrl, v['id']) v['title'] = v['title'].replace(''', '').replace('&', '') #s=g.translate(v['title']) _curUpData = {} # if s == '': # _curUpData['status'] = 3 # else: # _curUpData['status'] = 1 # _curUpData['ctitle'] = s _curUpData['status'] = 1 if tags != False: _curUpData['tags'] = tags data = curTableObj.update({"id": v['id']}, {"$set": _curUpData}) time.sleep(0.5)
def poolDetail(start, end): getObj = videoDemo() dbObj = Dbobj('redio', 're_') arr = [] # if type == 0: # _sort = pymongo.DESCENDING # else: # _sort = pymongo.ASCENDING curTableObj = dbObj.getTbname('redios') base = 'https://www.qq.com/' while True: data = curTableObj.find({ "status": 0, "_id": { "$gte": start, "$lt": end } }).sort('_id', pymongo.DESCENDING).limit(10) if data is None: break g = GoogleTranslator() for v in data: curUrl = base + v['rel'].strip('/') #curTableObj.run(curUrl,i['id']) #curUrl='https://www.qq.com/video13860839/502_-_horny_asian_couple_had_sex_on_bed' tags = getObj.vdetail(curUrl, v['id']) v['title'] = v['title'].replace(''', '').replace('&', '') s = g.translate(v['title']) _curUpData = {} if s == '': _curUpData['status'] = 3 else: _curUpData['status'] = 1 _curUpData['ctitle'] = s if tags != False: _curUpData['tags'] = tags data = curTableObj.update({"id": v['id']}, {"$set": _curUpData}) time.sleep(0.5)
def getDetail(): getObj = videoDemo(); dbObj = Dbobj('redio','re_') arr = []; # if type == 0: # _sort = pymongo.DESCENDING # else: # _sort = pymongo.ASCENDING curTableObj = dbObj.getTbname('local_stars') base = 'https://www.xvideos.com/' while True: data = curTableObj.find({"imgs":{"$exists":False}}).sort('_id', pymongo.DESCENDING).limit(10) if data is None: sleep(10); continue; for v in data: _curUpData = {} _rel = v.get('rel',0) curUrl = base+_rel.strip('/')+'#_tabAboutMe' content = getObj.mainContent(curUrl) if content is None: continue #print(content.encode('gbk', 'ignore').decode('gbk')) pattern = '<div class="profile-pic">([.|\s|\S|\n]*?)<img src="(.*)" onerror=([.|\s|\S|\n]*?)</div>' listCates = re.findall(pattern,content) if listCates is None: break img = listCates[1] #print(img[1]);exit('5') #print(content.encode('gbk', 'ignore').decode('gbk'));exit(3) file = starDir(v['_id']) getObj.getOtherSource(file,img[1]) _curUpData['imgs'] = '' data = curTableObj.update({"_id":v['_id']},{"$set":_curUpData}) time.sleep(0.5)
def failedLog(self, content): dbObj = Dbobj('redio', 're_') curTableObj = dbObj.getTbname('failedLog') curTableObj.insert(content)
if htmlContent is None: exit(1) #print(htmlContent.encode('gbk', 'ignore').decode('gbk'));exit('3'); dbObj = Dbobj('redio', 're_') #print(dbObj.getNextValue('trends'));exit('3') pattern = '<nav aria-label="secondary">([.|\s|\S|\n]*?)</nav>' trends = re.findall(pattern, htmlContent) if trends is None: exit(1) trends = trends[0] pattern = '<li><a class="btn btn-default" href="([.|\s|\S|\n]*?)">([.|\s|\S|\n]*?)</a></li>' trends = re.findall(pattern, trends) #print(trends);#.encode('utf-8', 'ignore').decode('utf-8') if trends is None or len(trends) < 1: exit(2) curTableObj = dbObj.getTbname('trends') reTrends = [] for i in trends: curTitle = i[1].strip() curUrl = i[0].replace('&', '&').strip() curInfo = curTableObj.find_one( {"$or": [{ "title": curTitle }, { "url": curUrl }]}) if curInfo is None: #print(i[0].replace('&','&'));exit('1') reTrends.append({ 'url': curUrl,
tags = tags[0] #print(tags) #pattern = '<a href="(.*)"(.*)>(.*)</a>' pattern = '<a href="([.|\s|\S|\n]*?)" class="btn btn-default">([.|\s|\S|\n]*?)</a>' listTags = re.findall(pattern, tags) #print(listTags);exit(557) #&top & reTags = [] for i in listTags: if len(i[1]) > 0: reTags.append({ 'url': i[0].replace('&', '&'), 'title': i[1], '_id': dbObj.getNextValue('category') }) curTableObj = dbObj.getTbname('category') curTableObj.insert_many(reTags) arr = [] pattern = '<div class="home-trends ordered-label-list">([.|\s|\S|\n]*?)</div>' trends = re.findall(pattern, htmlContent) if trends is None or len(trends[0]) < 0: exit(3) pattern = '<a class="btn btn-default" href="([.|\s|\S|\n]*?)">([.|\s|\S|\n]*?)</a>' tags = re.findall(pattern, trends[0]) if tags is None or len(tags) < 1: exit(4) curTableObj = dbObj.getTbname('trends') reTrends = [] for i in tags:
def __dbInfo(self, tableName): getObj = videoDemo() dbObj = Dbobj('redio', 're_') return dbObj.getTbname(tableName)