def updateShowDetail(self,s_id): #用来仅仅更新一部剧的所有季和集的方法 db = Database(self.log,self.config) urlTarget = self.config.url+db.getOneLinkBySid(s_id) cookie = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) req = urllib2.Request( url = urlTarget ) htmlData = "" htmlData = opener.open(req).read() if htmlData: bsContent = BeautifulSoup(htmlData) pinfo = bsContent.find('p',attrs={'class':'sumtext'}).get_text() #要取到的剧的介绍 pinfo = pinfo.replace("'", "\\'") DivLarge = bsContent.find('aside',attrs={'class':'quikinfo'}) DivSmall = DivLarge.findAll('li') #处理每周日期 update_time = DivSmall[0].a.get_text() #处理每集长度 length = DivSmall[1].get_text() length = length[17:] #查找地区、电视台 area = DivSmall[3].get_text() area = area[10:] channel = DivSmall[2].get_text() channel = channel[10:] status = DivSmall[5].get_text() status = status[14:] #print status DetailOfShow = { 's_id' : s_id, 's_description' : pinfo, 'update_time' : update_time, 'length' : length, 'area' : area, 'channel' : channel, 'status' : status } print DetailOfShow
def workWithOneShowsEp(self,s_id): #用来仅仅更新一部剧的所有季和集的方法 db = Database(self.log,self.config) urlTarget = self.config.url+db.getOneLinkBySid(s_id) cookie = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) req = urllib2.Request( url = urlTarget ) htmlData = "" htmlData = opener.open(req).read() if htmlData: bsContent = BeautifulSoup(htmlData) bsLists = bsContent.findAll('li',attrs = {'class':'parent'}) print ('the len of biLists is '+str(len(bsLists))) for oneSeason in bsLists: #print oneSeason se_id = oneSeason.strong.get_text() se_id = re.search('Season\s\d{1,2}',se_id).group() se_id = se_id[7:] epList = oneSeason.findAll('li',attrs = {'class':'ep info RAWR'}) + oneSeason.findAll('li',attrs = {'class':'ep info '}) for oneEpisode in epList: #print oneEpisode #集数 e_num = oneEpisode.find('span',attrs = {'class':'pnumber'}).get_text() if e_num[0] == '0' and len(e_num) > 1: e_num = e_num[1:] #集名 e_name = oneEpisode.find('a',attrs = {'itemprop':'url'}).get_text() #e_name = MySQLdb.escape_string(e_name) #播放时间 time_temp = oneEpisode.find('span',attrs = {'class':'datepub'}) e_time = time_temp['content'] time_temp = time_temp.get_text() time = time_temp[-7:] hour = re.search('\d{1,2}:',time).group() hour = hour[:-1] hour = string.atoi(hour) minute = re.search(':\d{2}[a|p]m',time).group() #注意调整am和pm的时间差,另外需要注意的是这里的时间都是标准UTC时间,天朝使用需要+8 if (minute[-2] == 'p') and (hour != 12): hour += 12 minute = minute[1:-2] if len(str(hour)) < 2: hour = '0' + str(hour) e_time += ' ' + str(hour) + ':' + minute + ':00' status_temp = oneEpisode.find('span',attrs = {'class':'paired'}) if status_temp: e_status = u'已播放' else: e_status = u'即将播出' episodeInfoToBeAired = { 's_id' : s_id, 'se_id' : se_id, 'e_num' : e_num, 'e_name' : e_name, 'e_status' : e_status, 'e_description' : '', 'e_time' : e_time } print episodeInfoToBeAired flag = True if flag == False: break