Exemple #1
0
def getData(keyWord = '',type = ''):
    targetUrl = getUrl(keyWord,type)
    todayTime = datetime.date.today()
    filePath = FileHelper.getPath(todayTime,'tencent',urls[keyWord])
    fileName = '%s/html.txt' % filePath
    FileHelper.getContent2FileWithoutProcess(fileName=fileName,targetUrl=targetUrl)
    content = open(fileName,'r')
    temp = content.read().decode('utf-8')
    soup = BeautifulSoup(temp,'html.parser')
    count = -1 #过滤第一个
    Rank = 0
    resultFileName=''
    if type == TYPE_DAILY:
        resultFileName='%s/RankDaily.txt' % filePath
    if type == TYPE_WEEKLY:
        resultFileName='%s/RankWeekly.txt' % filePath
    if type == TYPE_MONTHLY:
        resultFileName='%s/RankMonthly.txt' % filePath
    for tag in soup.findAll(True):
        if(tag.name == 'a' and tag.has_attr('href') and (''.join(tag['href']).find('cover') > 0)):
            Rank = Rank+1
            FileHelper.save2File(content = 'Rank.%s %s ' % (Rank,tag['title'] ), fileName=resultFileName,type= FileHelper.TYPE_APPEND)
        if (tag.has_attr('class') and tag.name == 'strong' and ''.join(tag['class']) == 'num'):
            FileHelper.save2File(content = 'VV : %s \n' % tag.string , fileName=resultFileName,type= FileHelper.TYPE_APPEND)

    content.close()
    FileHelper.delFile(fileName)
Exemple #2
0
def getData(keyWord = '',page = -1,type = ''):
    targetUrl = getUrl(keyWord,page,type)
    todayTime = datetime.date.today()
    filePath = FileHelper.getPath(todayTime,'youku',urls[keyWord])
    fileName = '%s/html.txt' % filePath
    FileHelper.getContent2FileWithoutProcess(fileName=fileName,targetUrl=targetUrl)
    content = open(fileName,'r')
    temp = content.read().decode('utf-8')
    soup = BeautifulSoup(temp,'html.parser')
    count = -1 #过滤第一个
    Rank = (page-1) * 42
    resultFileName=''
    if type == TYPE_DAILY:
        resultFileName='%s/RankDaily.txt' % filePath
    if type == TYPE_WEEKLY:
        resultFileName='%s/RankWeekly.txt' % filePath
    for tag in soup.findAll(True):
        if(tag.name == 'img' and tag.has_attr('alt') and tag.has_attr('src') and not tag.has_attr('attr')):
            Rank = Rank+1
            FileHelper.save2File(content = 'Rank.%s %s ' % (Rank,tag['alt'] ), fileName=resultFileName,type= FileHelper.TYPE_APPEND)
        if (tag.has_attr('class') and tag.name == 'span' and ''.join(tag['class']) == 'p-num'):
            FileHelper.save2File(content = 'VV : %s \n' % tag.string , fileName=resultFileName,type= FileHelper.TYPE_APPEND)

    content.close()
    FileHelper.delFile(fileName)
Exemple #3
0
def getData(keyWord = '',type = ''):
    targetUrl = getUrl(keyWord,type)
    todayTime = datetime.date.today()
    filePath = FileHelper.getPath(todayTime,'aqy',urls[keyWord])
    fileName = '%s/json.txt' % filePath
    FileHelper.getContent2FileWithoutProcess(fileName=fileName,targetUrl=targetUrl)
    content = open(fileName,'r')
    temp = content.read().decode('utf-8')
    dic = json.loads(temp)
    key = ''
    resultFileName = ''
    if type == TYPE_DAILY:
        resultFileName='%s/RankDaily.txt' % filePath
        key = 'album_count_yesterday'
    if type == TYPE_WEEKLY:
        resultFileName='%s/RankWeekly.txt' % filePath
        key = 'album_count_lastweek'
    count = len(dic['data'])
    for i in range(count):
        FileHelper.save2File(content= 'Rank.%s %s VV : %s \n' % (i+1,dic['data'][i]['album_name'],dic['data'][i][key]), fileName=resultFileName,type= FileHelper.TYPE_APPEND)

    content.close()
    FileHelper.delFile(fileName)
Exemple #4
0
def getData(keyWord = ''):
    targetUrl = getUrl(keyWord)
    todayTime = datetime.date.today()
    filePath = FileHelper.getPath(todayTime,'letv',urls[keyWord])
    fileName = '%s/html.txt' % filePath
    FileHelper.getContent2FileWithoutProcess(fileName=fileName,targetUrl=targetUrl)
    content = open(fileName,'r')
    temp = content.read().decode('utf-8')
    soup = BeautifulSoup(temp,'html.parser')
    rank = 0
    for tag in soup.findAll(True):
        # 每50条为分割
        if(tag.name == 'a' and tag.has_attr('href') and (''.join(tag['href']).find('www.le.com') > 0) and rank<150):
            rank = rank + 1
            if rank <= 50:
                FileHelper.save2File(content = 'Rank.%s %s ' % (rank,tag.string ), fileName='%s/RankDaily.txt' % filePath,type= FileHelper.TYPE_APPEND)
            if rank <=100 and rank >50:
                FileHelper.save2File(content = 'Rank.%s %s ' % (rank-50,tag.string ), fileName='%s/RankWeekly.txt' % filePath,type= FileHelper.TYPE_APPEND)
            if rank <= 150 and rank > 100:
                FileHelper.save2File(content = 'Rank.%s %s ' % (rank-100,tag.string ), fileName='%s/RankMonthly.txt' % filePath,type= FileHelper.TYPE_APPEND)

        if (tag.has_attr('class') and tag.name == 'span' and ''.join(tag['class']) == 't-5'  and rank<150):
            if rank <= 50:
                FileHelper.save2File(content = 'VV : %s \n' % tag.string , fileName='%s/RankDaily.txt' % filePath,type= FileHelper.TYPE_APPEND)
            if rank <=100 and rank >50:
                FileHelper.save2File(content = 'VV : %s \n' % tag.string , fileName='%s/RankWeekly.txt' % filePath,type= FileHelper.TYPE_APPEND)
            if rank <= 150 and rank > 100:
                FileHelper.save2File(content = 'VV : %s \n' % tag.string , fileName='%s/RankMonthly.txt' % filePath,type= FileHelper.TYPE_APPEND)

    content.close()
    FileHelper.delFile(fileName)
Exemple #5
0
def getData(keyWord = ''):
    if keyWord == '':
        print 'params lost'
        return False
    todayTime = datetime.date.today()
    targetUrl = 'http://tv.sohu.com/%s' % keyWord
    filePath = FileHelper.getPath(todayTime,'sohu',urls[keyWord])
    fileName = '%s/html.txt' % filePath
    FileHelper.getContent2FileWithoutProcess(fileName=fileName,targetUrl=targetUrl)
    content = open(fileName,'r')
    temp = content.read().decode('utf-8')
    soup = BeautifulSoup(temp,'html.parser')
    count = -1 #过滤第一个
    for tag in soup.findAll(True):
        if tag.has_attr('class'):
            #print ''.join(tag['class'])
            if (''.join(tag['class']) == 'at' and tag.name == 'a') or (tag.name == 'span' and ''.join(tag['class'])  == 'vTotal'):
                count = count + 1
                if count <= 200 and count >= 1 :
                    if count == 1:
                        FileHelper.save2File(content= 'RankDaily \n' , fileName=('%s/RankDaily.txt'% filePath),type= FileHelper.TYPE_APPEND)
                    #日榜数据
                    FileHelper.save2File(content= '%s \n' %tag.string , fileName=('%s/RankDaily.txt'% filePath ) ,type= FileHelper.TYPE_APPEND)
                if count > 200 and count  <= 400 :
                    if count == 201:
                        FileHelper.save2File(content= 'RankWeekly \n' , fileName=('%s/RankWeekly.txt'% filePath ) ,type= FileHelper.TYPE_APPEND)
                    #周榜数据
                    FileHelper.save2File(content= '%s \n' %tag.string , fileName=('%s/RankWeekly.txt'% filePath ) ,type= FileHelper.TYPE_APPEND)
                if count > 400 and count <= 600 :
                    if count == 401:
                        FileHelper.save2File(content= 'RankMonth \n' , fileName=('%s/RankMonthly.txt'% filePath ) ,type= FileHelper.TYPE_APPEND)
                    #月榜数据
                    FileHelper.save2File(content= '%s \n' %tag.string , fileName=('%s/RankMonthly.txt'% filePath ) ,type= FileHelper.TYPE_APPEND)
                if count > 600 :
                    if count == 601:
                        FileHelper.save2File(content= 'RankTotal \n' , fileName=('%s/RankTotal.txt'% filePath ) ,type= FileHelper.TYPE_APPEND)
                    #总计数据
                    FileHelper.save2File(content= '%s \n' %tag.string , fileName=('%s/RankTotal.txt'% filePath) ,type= FileHelper.TYPE_APPEND)
                    #print tag.string

    content.close()
    FileHelper.delFile(fileName)