예제 #1
0
def getList(obj,num):
    dbObj = Dbobj('redio','re_')
    arr = [];
    curTableObj = dbObj.getTbname('stars')
    url = "https://www.123fanhao.com/special-show-p-"+str(num)+".html"
    content = obj.mainContent(url)
    if content is None:
        return False 
    pattern = '<div class="col-xs-6 col-sm-2 placeholder">([.|\s|\S|\n]*?)<a href="([.|\s|\S|\n]*?)" target="_blank"><img src="([.|\s|\S|\n]*?)" title="([.|\s|\S|\n]*?)" class="img-thumbnail"></a><h4><a href="([.|\s|\S|\n]*?)" target="_blank">([.|\s|\S|\n]*?)</a>([.|\s|\S|\n]*?)</div>'
    listCates = re.findall(pattern,content) 
    if listCates is None:
        return False  
    #baseUrl = 'https://www.123fanhao.com'
    data = []
    for v in listCates:
        if len(v) < 1:
            continue
        curDict = {}    
        curDict['_id'] = dbObj.getNextValue('stars')
        curDict['ename'] = v[3].split('/')[1].strip()
        curDict['rel'] = v[1]
        curDict['img'] = v[2]
        curDict['cname']= v[5].strip()
        curDict['status']= 0
        data.append(curDict)
    curTableObj.insert_many(data);
예제 #2
0
def deRange(num):
    getObj = videoDemo();
    dbObj = Dbobj('redio','re_')
    arr = [];
    curTableObj = dbObj.getTbname('redios')
    if num < 1:
        url="https://www.qq.com/"
    else:    
        url="https://www.qq.com/new/"+str(num);

    for v in getObj.getVedioUrl(url):
        if v['id']!='':
           v['_id'] = dbObj.getNextValue('redios')
           v['tags']=v['cates']=''
           v['status']=0
           curTableObj.update({"id":v['id'].strip()},{"$setOnInsert":v}, upsert=True);   
예제 #3
0
trends = re.findall(pattern, htmlContent)
if trends is None:
    exit(1)
trends = trends[0]
pattern = '<li><a class="btn btn-default" href="([.|\s|\S|\n]*?)">([.|\s|\S|\n]*?)</a></li>'
trends = re.findall(pattern, trends)
#print(trends);#.encode('utf-8', 'ignore').decode('utf-8')
if trends is None or len(trends) < 1:
    exit(2)
curTableObj = dbObj.getTbname('trends')
reTrends = []
for i in trends:
    curTitle = i[1].strip()
    curUrl = i[0].replace('&amp;', '&').strip()
    curInfo = curTableObj.find_one(
        {"$or": [{
            "title": curTitle
        }, {
            "url": curUrl
        }]})

    if curInfo is None:
        #print(i[0].replace('&amp;','&'));exit('1')
        reTrends.append({
            'url': curUrl,
            'title': curTitle,
            '_id': dbObj.getNextValue('trends')
        })

if len(reTrends) > 0:
    curTableObj.insert_many(reTrends)
예제 #4
0
if tags is None or len(tags) < 1:
    exit(2)
tags = tags[0]
#print(tags)
#pattern = '<a href="(.*)"(.*)>(.*)</a>'
pattern = '<a href="([.|\s|\S|\n]*?)" class="btn btn-default">([.|\s|\S|\n]*?)</a>'
listTags = re.findall(pattern, tags)
#print(listTags);exit(557)
#&amp;top &
reTags = []
for i in listTags:
    if len(i[1]) > 0:
        reTags.append({
            'url': i[0].replace('&amp;', '&'),
            'title': i[1],
            '_id': dbObj.getNextValue('category')
        })
curTableObj = dbObj.getTbname('category')
curTableObj.insert_many(reTags)
arr = []

pattern = '<div class="home-trends ordered-label-list">([.|\s|\S|\n]*?)</div>'
trends = re.findall(pattern, htmlContent)
if trends is None or len(trends[0]) < 0:
    exit(3)
pattern = '<a class="btn btn-default" href="([.|\s|\S|\n]*?)">([.|\s|\S|\n]*?)</a>'
tags = re.findall(pattern, trends[0])

if tags is None or len(tags) < 1:
    exit(4)
curTableObj = dbObj.getTbname('trends')