def getList(obj,num): dbObj = Dbobj('redio','re_') arr = []; curTableObj = dbObj.getTbname('stars') url = "https://www.123fanhao.com/special-show-p-"+str(num)+".html" content = obj.mainContent(url) if content is None: return False pattern = '<div class="col-xs-6 col-sm-2 placeholder">([.|\s|\S|\n]*?)<a href="([.|\s|\S|\n]*?)" target="_blank"><img src="([.|\s|\S|\n]*?)" title="([.|\s|\S|\n]*?)" class="img-thumbnail"></a><h4><a href="([.|\s|\S|\n]*?)" target="_blank">([.|\s|\S|\n]*?)</a>([.|\s|\S|\n]*?)</div>' listCates = re.findall(pattern,content) if listCates is None: return False #baseUrl = 'https://www.123fanhao.com' data = [] for v in listCates: if len(v) < 1: continue curDict = {} curDict['_id'] = dbObj.getNextValue('stars') curDict['ename'] = v[3].split('/')[1].strip() curDict['rel'] = v[1] curDict['img'] = v[2] curDict['cname']= v[5].strip() curDict['status']= 0 data.append(curDict) curTableObj.insert_many(data);
def deRange(num): getObj = videoDemo(); dbObj = Dbobj('redio','re_') arr = []; curTableObj = dbObj.getTbname('redios') if num < 1: url="https://www.qq.com/" else: url="https://www.qq.com/new/"+str(num); for v in getObj.getVedioUrl(url): if v['id']!='': v['_id'] = dbObj.getNextValue('redios') v['tags']=v['cates']='' v['status']=0 curTableObj.update({"id":v['id'].strip()},{"$setOnInsert":v}, upsert=True);
trends = re.findall(pattern, htmlContent) if trends is None: exit(1) trends = trends[0] pattern = '<li><a class="btn btn-default" href="([.|\s|\S|\n]*?)">([.|\s|\S|\n]*?)</a></li>' trends = re.findall(pattern, trends) #print(trends);#.encode('utf-8', 'ignore').decode('utf-8') if trends is None or len(trends) < 1: exit(2) curTableObj = dbObj.getTbname('trends') reTrends = [] for i in trends: curTitle = i[1].strip() curUrl = i[0].replace('&', '&').strip() curInfo = curTableObj.find_one( {"$or": [{ "title": curTitle }, { "url": curUrl }]}) if curInfo is None: #print(i[0].replace('&','&'));exit('1') reTrends.append({ 'url': curUrl, 'title': curTitle, '_id': dbObj.getNextValue('trends') }) if len(reTrends) > 0: curTableObj.insert_many(reTrends)
if tags is None or len(tags) < 1: exit(2) tags = tags[0] #print(tags) #pattern = '<a href="(.*)"(.*)>(.*)</a>' pattern = '<a href="([.|\s|\S|\n]*?)" class="btn btn-default">([.|\s|\S|\n]*?)</a>' listTags = re.findall(pattern, tags) #print(listTags);exit(557) #&top & reTags = [] for i in listTags: if len(i[1]) > 0: reTags.append({ 'url': i[0].replace('&', '&'), 'title': i[1], '_id': dbObj.getNextValue('category') }) curTableObj = dbObj.getTbname('category') curTableObj.insert_many(reTags) arr = [] pattern = '<div class="home-trends ordered-label-list">([.|\s|\S|\n]*?)</div>' trends = re.findall(pattern, htmlContent) if trends is None or len(trends[0]) < 0: exit(3) pattern = '<a class="btn btn-default" href="([.|\s|\S|\n]*?)">([.|\s|\S|\n]*?)</a>' tags = re.findall(pattern, trends[0]) if tags is None or len(tags) < 1: exit(4) curTableObj = dbObj.getTbname('trends')