def getData(keyWord = '',type = ''): targetUrl = getUrl(keyWord,type) todayTime = datetime.date.today() filePath = FileHelper.getPath(todayTime,'tencent',urls[keyWord]) fileName = '%s/html.txt' % filePath FileHelper.getContent2FileWithoutProcess(fileName=fileName,targetUrl=targetUrl) content = open(fileName,'r') temp = content.read().decode('utf-8') soup = BeautifulSoup(temp,'html.parser') count = -1 #过滤第一个 Rank = 0 resultFileName='' if type == TYPE_DAILY: resultFileName='%s/RankDaily.txt' % filePath if type == TYPE_WEEKLY: resultFileName='%s/RankWeekly.txt' % filePath if type == TYPE_MONTHLY: resultFileName='%s/RankMonthly.txt' % filePath for tag in soup.findAll(True): if(tag.name == 'a' and tag.has_attr('href') and (''.join(tag['href']).find('cover') > 0)): Rank = Rank+1 FileHelper.save2File(content = 'Rank.%s %s ' % (Rank,tag['title'] ), fileName=resultFileName,type= FileHelper.TYPE_APPEND) if (tag.has_attr('class') and tag.name == 'strong' and ''.join(tag['class']) == 'num'): FileHelper.save2File(content = 'VV : %s \n' % tag.string , fileName=resultFileName,type= FileHelper.TYPE_APPEND) content.close() FileHelper.delFile(fileName)
def getData(keyWord = '',page = -1,type = ''): targetUrl = getUrl(keyWord,page,type) todayTime = datetime.date.today() filePath = FileHelper.getPath(todayTime,'youku',urls[keyWord]) fileName = '%s/html.txt' % filePath FileHelper.getContent2FileWithoutProcess(fileName=fileName,targetUrl=targetUrl) content = open(fileName,'r') temp = content.read().decode('utf-8') soup = BeautifulSoup(temp,'html.parser') count = -1 #过滤第一个 Rank = (page-1) * 42 resultFileName='' if type == TYPE_DAILY: resultFileName='%s/RankDaily.txt' % filePath if type == TYPE_WEEKLY: resultFileName='%s/RankWeekly.txt' % filePath for tag in soup.findAll(True): if(tag.name == 'img' and tag.has_attr('alt') and tag.has_attr('src') and not tag.has_attr('attr')): Rank = Rank+1 FileHelper.save2File(content = 'Rank.%s %s ' % (Rank,tag['alt'] ), fileName=resultFileName,type= FileHelper.TYPE_APPEND) if (tag.has_attr('class') and tag.name == 'span' and ''.join(tag['class']) == 'p-num'): FileHelper.save2File(content = 'VV : %s \n' % tag.string , fileName=resultFileName,type= FileHelper.TYPE_APPEND) content.close() FileHelper.delFile(fileName)
def getData(keyWord = '',type = ''): targetUrl = getUrl(keyWord,type) todayTime = datetime.date.today() filePath = FileHelper.getPath(todayTime,'aqy',urls[keyWord]) fileName = '%s/json.txt' % filePath FileHelper.getContent2FileWithoutProcess(fileName=fileName,targetUrl=targetUrl) content = open(fileName,'r') temp = content.read().decode('utf-8') dic = json.loads(temp) key = '' resultFileName = '' if type == TYPE_DAILY: resultFileName='%s/RankDaily.txt' % filePath key = 'album_count_yesterday' if type == TYPE_WEEKLY: resultFileName='%s/RankWeekly.txt' % filePath key = 'album_count_lastweek' count = len(dic['data']) for i in range(count): FileHelper.save2File(content= 'Rank.%s %s VV : %s \n' % (i+1,dic['data'][i]['album_name'],dic['data'][i][key]), fileName=resultFileName,type= FileHelper.TYPE_APPEND) content.close() FileHelper.delFile(fileName)
def getData(keyWord = ''): targetUrl = getUrl(keyWord) todayTime = datetime.date.today() filePath = FileHelper.getPath(todayTime,'letv',urls[keyWord]) fileName = '%s/html.txt' % filePath FileHelper.getContent2FileWithoutProcess(fileName=fileName,targetUrl=targetUrl) content = open(fileName,'r') temp = content.read().decode('utf-8') soup = BeautifulSoup(temp,'html.parser') rank = 0 for tag in soup.findAll(True): # 每50条为分割 if(tag.name == 'a' and tag.has_attr('href') and (''.join(tag['href']).find('www.le.com') > 0) and rank<150): rank = rank + 1 if rank <= 50: FileHelper.save2File(content = 'Rank.%s %s ' % (rank,tag.string ), fileName='%s/RankDaily.txt' % filePath,type= FileHelper.TYPE_APPEND) if rank <=100 and rank >50: FileHelper.save2File(content = 'Rank.%s %s ' % (rank-50,tag.string ), fileName='%s/RankWeekly.txt' % filePath,type= FileHelper.TYPE_APPEND) if rank <= 150 and rank > 100: FileHelper.save2File(content = 'Rank.%s %s ' % (rank-100,tag.string ), fileName='%s/RankMonthly.txt' % filePath,type= FileHelper.TYPE_APPEND) if (tag.has_attr('class') and tag.name == 'span' and ''.join(tag['class']) == 't-5' and rank<150): if rank <= 50: FileHelper.save2File(content = 'VV : %s \n' % tag.string , fileName='%s/RankDaily.txt' % filePath,type= FileHelper.TYPE_APPEND) if rank <=100 and rank >50: FileHelper.save2File(content = 'VV : %s \n' % tag.string , fileName='%s/RankWeekly.txt' % filePath,type= FileHelper.TYPE_APPEND) if rank <= 150 and rank > 100: FileHelper.save2File(content = 'VV : %s \n' % tag.string , fileName='%s/RankMonthly.txt' % filePath,type= FileHelper.TYPE_APPEND) content.close() FileHelper.delFile(fileName)
def getData(keyWord = ''): if keyWord == '': print 'params lost' return False todayTime = datetime.date.today() targetUrl = 'http://tv.sohu.com/%s' % keyWord filePath = FileHelper.getPath(todayTime,'sohu',urls[keyWord]) fileName = '%s/html.txt' % filePath FileHelper.getContent2FileWithoutProcess(fileName=fileName,targetUrl=targetUrl) content = open(fileName,'r') temp = content.read().decode('utf-8') soup = BeautifulSoup(temp,'html.parser') count = -1 #过滤第一个 for tag in soup.findAll(True): if tag.has_attr('class'): #print ''.join(tag['class']) if (''.join(tag['class']) == 'at' and tag.name == 'a') or (tag.name == 'span' and ''.join(tag['class']) == 'vTotal'): count = count + 1 if count <= 200 and count >= 1 : if count == 1: FileHelper.save2File(content= 'RankDaily \n' , fileName=('%s/RankDaily.txt'% filePath),type= FileHelper.TYPE_APPEND) #日榜数据 FileHelper.save2File(content= '%s \n' %tag.string , fileName=('%s/RankDaily.txt'% filePath ) ,type= FileHelper.TYPE_APPEND) if count > 200 and count <= 400 : if count == 201: FileHelper.save2File(content= 'RankWeekly \n' , fileName=('%s/RankWeekly.txt'% filePath ) ,type= FileHelper.TYPE_APPEND) #周榜数据 FileHelper.save2File(content= '%s \n' %tag.string , fileName=('%s/RankWeekly.txt'% filePath ) ,type= FileHelper.TYPE_APPEND) if count > 400 and count <= 600 : if count == 401: FileHelper.save2File(content= 'RankMonth \n' , fileName=('%s/RankMonthly.txt'% filePath ) ,type= FileHelper.TYPE_APPEND) #月榜数据 FileHelper.save2File(content= '%s \n' %tag.string , fileName=('%s/RankMonthly.txt'% filePath ) ,type= FileHelper.TYPE_APPEND) if count > 600 : if count == 601: FileHelper.save2File(content= 'RankTotal \n' , fileName=('%s/RankTotal.txt'% filePath ) ,type= FileHelper.TYPE_APPEND) #总计数据 FileHelper.save2File(content= '%s \n' %tag.string , fileName=('%s/RankTotal.txt'% filePath) ,type= FileHelper.TYPE_APPEND) #print tag.string content.close() FileHelper.delFile(fileName)