コード例 #1
0
ファイル: tools.py プロジェクト: zhuyf8899/TvCalendarShellNew
 def updateShowDetail(self,s_id):
     #用来仅仅更新一部剧的所有季和集的方法
     db = Database(self.log,self.config)
     urlTarget = self.config.url+db.getOneLinkBySid(s_id)
     cookie = cookielib.CookieJar()
     opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
     req = urllib2.Request(
         url = urlTarget
     )
     htmlData = ""
     htmlData = opener.open(req).read()
     if htmlData:
         bsContent = BeautifulSoup(htmlData)
         pinfo = bsContent.find('p',attrs={'class':'sumtext'}).get_text()  #要取到的剧的介绍
         pinfo = pinfo.replace("'", "\\'")
         DivLarge = bsContent.find('aside',attrs={'class':'quikinfo'})
         DivSmall = DivLarge.findAll('li')
         #处理每周日期
         update_time = DivSmall[0].a.get_text()
         #处理每集长度
         length = DivSmall[1].get_text()
         length = length[17:]
         #查找地区、电视台
         area = DivSmall[3].get_text()
         area = area[10:]
         channel = DivSmall[2].get_text()
         channel = channel[10:]
         status = DivSmall[5].get_text()
         status = status[14:]
         #print status
     
         DetailOfShow = {
             's_id' : s_id,
             's_description' : pinfo,
             'update_time' : update_time,
             'length' : length,
             'area' : area,
             'channel' : channel,
             'status' : status
         }
         print DetailOfShow
コード例 #2
0
ファイル: tools.py プロジェクト: zhuyf8899/TvCalendarShellNew
    def workWithOneShowsEp(self,s_id):
        #用来仅仅更新一部剧的所有季和集的方法
        db = Database(self.log,self.config)
        urlTarget = self.config.url+db.getOneLinkBySid(s_id)
        cookie = cookielib.CookieJar()
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
        req = urllib2.Request(
            url = urlTarget
        )
        htmlData = ""
        htmlData = opener.open(req).read()
        if htmlData:
            bsContent = BeautifulSoup(htmlData)
            bsLists = bsContent.findAll('li',attrs = {'class':'parent'})
            print ('the len of biLists is '+str(len(bsLists)))
            for oneSeason in bsLists:
                #print oneSeason
                se_id = oneSeason.strong.get_text()
                se_id = re.search('Season\s\d{1,2}',se_id).group()
                se_id = se_id[7:]

                epList = oneSeason.findAll('li',attrs = {'class':'ep info  RAWR'}) + oneSeason.findAll('li',attrs = {'class':'ep info '})

                for oneEpisode in epList:
                    #print oneEpisode
                    #集数
                    e_num = oneEpisode.find('span',attrs = {'class':'pnumber'}).get_text()
                    if e_num[0] == '0' and len(e_num) > 1:
                        e_num = e_num[1:]
                    #集名
                    e_name = oneEpisode.find('a',attrs = {'itemprop':'url'}).get_text()
                    #e_name = MySQLdb.escape_string(e_name)
                    #播放时间

                    time_temp = oneEpisode.find('span',attrs = {'class':'datepub'})
                    e_time = time_temp['content']
                    time_temp = time_temp.get_text()
                    time = time_temp[-7:]

                    hour = re.search('\d{1,2}:',time).group()
                    hour = hour[:-1]
                    hour = string.atoi(hour)
                    minute = re.search(':\d{2}[a|p]m',time).group() 
                    #注意调整am和pm的时间差,另外需要注意的是这里的时间都是标准UTC时间,天朝使用需要+8
                    if (minute[-2] == 'p') and (hour != 12):
                        hour += 12
                    minute = minute[1:-2]

                    if len(str(hour)) < 2:
                        hour = '0' + str(hour)
                    
                    e_time += ' ' + str(hour) + ':' + minute + ':00'

                    status_temp = oneEpisode.find('span',attrs = {'class':'paired'})
                    if status_temp:
                        e_status = u'已播放'
                    else:
                        e_status = u'即将播出'

                    episodeInfoToBeAired = {
                        's_id' : s_id,
                        'se_id' : se_id,
                        'e_num' : e_num,
                        'e_name' : e_name,
                        'e_status' : e_status,
                        'e_description' : '',
                        'e_time' : e_time

                    }
                    print episodeInfoToBeAired
                flag = True
                if flag == False:
                    break