コード例 #1
0
def getDetail():
    getObj = videoDemo()
    dbObj = Dbobj('redio', 're_')
    arr = []
    # if type == 0:
    #    _sort = pymongo.DESCENDING
    # else:
    #    _sort = pymongo.ASCENDING
    curTableObj = dbObj.getTbname('redios')
    base = 'https://www.qq.com/'
    while True:
        data = curTableObj.find({
            "status": 0
        }).sort('_id', pymongo.DESCENDING).limit(10)

        if data is None:
            sleep(10)
            continue
        #g = GoogleTranslator()
        for v in data:
            _rel = v.get('rel', 0)
            if _rel == 0:
                _rel = v.get('url')
            curUrl = base + _rel.strip('/')
            #curTableObj.run(curUrl,i['id'])
            #curUrl='https://www.qq.com/video13860839/502_-_horny_asian_couple_had_sex_on_bed'
            tags = getObj.vdetail(curUrl, v['id'])

            v['title'] = v['title'].replace('&#039', '').replace('&', '')

            #s=g.translate(v['title'])
            _curUpData = {}
            # if s == '':
            #      _curUpData['status'] = 3
            # else:
            #      _curUpData['status'] = 1
            #      _curUpData['ctitle'] = s
            _curUpData['status'] = 1
            if tags != False:
                _curUpData['tags'] = tags
            data = curTableObj.update({"id": v['id']}, {"$set": _curUpData})
        time.sleep(0.5)
コード例 #2
0
def poolDetail(start, end):
    getObj = videoDemo()
    dbObj = Dbobj('redio', 're_')
    arr = []
    # if type == 0:
    #    _sort = pymongo.DESCENDING
    # else:
    #    _sort = pymongo.ASCENDING
    curTableObj = dbObj.getTbname('redios')
    base = 'https://www.qq.com/'
    while True:
        data = curTableObj.find({
            "status": 0,
            "_id": {
                "$gte": start,
                "$lt": end
            }
        }).sort('_id', pymongo.DESCENDING).limit(10)

        if data is None:
            break
        g = GoogleTranslator()
        for v in data:
            curUrl = base + v['rel'].strip('/')
            #curTableObj.run(curUrl,i['id'])
            #curUrl='https://www.qq.com/video13860839/502_-_horny_asian_couple_had_sex_on_bed'
            tags = getObj.vdetail(curUrl, v['id'])

            v['title'] = v['title'].replace('&#039', '').replace('&', '')

            s = g.translate(v['title'])
            _curUpData = {}
            if s == '':
                _curUpData['status'] = 3
            else:
                _curUpData['status'] = 1
                _curUpData['ctitle'] = s
            if tags != False:
                _curUpData['tags'] = tags
            data = curTableObj.update({"id": v['id']}, {"$set": _curUpData})
        time.sleep(0.5)
コード例 #3
0
ファイル: getStarImg.py プロジェクト: phil1988132/VEDIO
def getDetail():
    getObj = videoDemo();
    dbObj = Dbobj('redio','re_')
    arr = [];
    # if type == 0:
    #    _sort = pymongo.DESCENDING
    # else:
    #    _sort = pymongo.ASCENDING  
    curTableObj = dbObj.getTbname('local_stars')
    base = 'https://www.xvideos.com/'
    while True:
       data = curTableObj.find({"imgs":{"$exists":False}}).sort('_id', pymongo.DESCENDING).limit(10)

       if data is None:
          sleep(10);
          continue;
       for v in data:
          _curUpData = {}
          _rel = v.get('rel',0)
          curUrl = base+_rel.strip('/')+'#_tabAboutMe'  
          content = getObj.mainContent(curUrl) 
          if content is None:
             continue
          #print(content.encode('gbk', 'ignore').decode('gbk'))
          pattern = '<div class="profile-pic">([.|\s|\S|\n]*?)<img src="(.*)" onerror=([.|\s|\S|\n]*?)</div>' 
          listCates = re.findall(pattern,content)
          if listCates is None:
             break
          img = listCates[1]
          #print(img[1]);exit('5')
          #print(content.encode('gbk', 'ignore').decode('gbk'));exit(3)
          file = starDir(v['_id'])
          getObj.getOtherSource(file,img[1])
          _curUpData['imgs'] = ''
          data = curTableObj.update({"_id":v['_id']},{"$set":_curUpData})
       time.sleep(0.5)   
コード例 #4
0
 def failedLog(self, content):
     dbObj = Dbobj('redio', 're_')
     curTableObj = dbObj.getTbname('failedLog')
     curTableObj.insert(content)
コード例 #5
0
ファイル: getNewTrends.py プロジェクト: phil1988132/VEDIO
if htmlContent is None:
    exit(1)
#print(htmlContent.encode('gbk', 'ignore').decode('gbk'));exit('3');
dbObj = Dbobj('redio', 're_')
#print(dbObj.getNextValue('trends'));exit('3')
pattern = '<nav aria-label="secondary">([.|\s|\S|\n]*?)</nav>'
trends = re.findall(pattern, htmlContent)
if trends is None:
    exit(1)
trends = trends[0]
pattern = '<li><a class="btn btn-default" href="([.|\s|\S|\n]*?)">([.|\s|\S|\n]*?)</a></li>'
trends = re.findall(pattern, trends)
#print(trends);#.encode('utf-8', 'ignore').decode('utf-8')
if trends is None or len(trends) < 1:
    exit(2)
curTableObj = dbObj.getTbname('trends')
reTrends = []
for i in trends:
    curTitle = i[1].strip()
    curUrl = i[0].replace('&amp;', '&').strip()
    curInfo = curTableObj.find_one(
        {"$or": [{
            "title": curTitle
        }, {
            "url": curUrl
        }]})

    if curInfo is None:
        #print(i[0].replace('&amp;','&'));exit('1')
        reTrends.append({
            'url': curUrl,
コード例 #6
0
tags = tags[0]
#print(tags)
#pattern = '<a href="(.*)"(.*)>(.*)</a>'
pattern = '<a href="([.|\s|\S|\n]*?)" class="btn btn-default">([.|\s|\S|\n]*?)</a>'
listTags = re.findall(pattern, tags)
#print(listTags);exit(557)
#&amp;top &
reTags = []
for i in listTags:
    if len(i[1]) > 0:
        reTags.append({
            'url': i[0].replace('&amp;', '&'),
            'title': i[1],
            '_id': dbObj.getNextValue('category')
        })
curTableObj = dbObj.getTbname('category')
curTableObj.insert_many(reTags)
arr = []

pattern = '<div class="home-trends ordered-label-list">([.|\s|\S|\n]*?)</div>'
trends = re.findall(pattern, htmlContent)
if trends is None or len(trends[0]) < 0:
    exit(3)
pattern = '<a class="btn btn-default" href="([.|\s|\S|\n]*?)">([.|\s|\S|\n]*?)</a>'
tags = re.findall(pattern, trends[0])

if tags is None or len(tags) < 1:
    exit(4)
curTableObj = dbObj.getTbname('trends')
reTrends = []
for i in tags:
コード例 #7
0
 def __dbInfo(self, tableName):
     getObj = videoDemo()
     dbObj = Dbobj('redio', 're_')
     return dbObj.getTbname(tableName)