Beispiel #1
0
 def getFlesh(self):
     try:
         for cinemaId in self.cinemas:
             cinemaUrl = self.getCinemaUrl(cinemaId)
             pageCode = getPageCode(cinemaUrl)
             # pattern = re.compile('<p class="cb-tel">.*?([\d].*?)</p>', re.S)
             # items = re.findall(pattern, pageCode)
             # cinemaTel = items[0].strip()
             pattern = re.compile('movieId="(.*?)".*?<img src="(.*?)"', re.S)
             items = re.findall(pattern, pageCode)
             for item in items:
                 movieId = item[0]
                 movieImg = item[1]
                 if movieId not in self.movies:
                     self.movies[movieId] = {}
                     self.movies[movieId]['info'] = {}
                     self.movies[movieId]['cinemas'] = {}
                     self.movies[movieId]['info']['img'] = movieImg
                 self.movies[movieId]['cinemas'][cinemaId] = {}
                 movieUrl = self.getMovieUrl(cinemaId, movieId)
                 pageCode = getPageCode(movieUrl)
                 if 'title' not in self.movies[movieId]['info']:
                     self.getMovieInfo(pageCode, self.movies[movieId]['info'])
                     self.moviesIdToTitle[movieId] = self.movies[movieId]['info']['title']
                     self.moviesTitleToId[self.movies[movieId]['info']['title']] = movieId
                 self.getMovieCinema(pageCode, self.movies[movieId]['cinemas'][cinemaId])
     except urllib.error.URLError as e:
         if hasattr(e, 'code'):
             print(e.code)
         if hasattr(e, 'reason'):
             print(e.reason)
     return self.movies 
Beispiel #2
0
 def getFlesh(self):
     try:
         for cinemaId in self.cinemas:
             # t = self.cinemasMap[cinemaId];
             # url = 'http://bj.meituan.com/shop/'+ t
             # pagecode = getPageCode(url)
             # pattern = re.compile("class='field-title'>电话:.*?>(.*?)</div>", re.S)
             # items = re.findall(pattern, pagecode)
             # for item in items:
             #     cinemaTel = item.strip()
             cinemaUrl = self.getCinemaUrl(cinemaId)
             pagecode = getPageCode(cinemaUrl)
             pattern = re.compile('"cat":(.*?)"id":(.*?),.*?"nm":"(.*?)"', re.S)
             items = re.findall(pattern, pagecode)
             for item in items:
                 movieinfo = item[0].strip();
                 movieId = item[1].strip();
                 if movieId not in self.movies: 
                     self.movies[movieId] = {}
                     self.movies[movieId]['cinemas'] = {}
                 self.movies[movieId]['info'] = {}
                 self.movies[movieId]['info']['title'] = item[2].strip()
                 self.moviesIdToTitle[movieId] = self.movies[movieId]['info']['title']
                 self.moviesTitleToId[self.movies[movieId]['info']['title']] = movieId
                 self.movies[movieId]['cinemas'][cinemaId] = {}
                 self.getMovieStatus(movieinfo, self.movies[movieId]['cinemas'][cinemaId])
     except urllib.error.URLError as e:
         if hasattr(e, 'code'):
             print(e.code)
         if hasattr(e, 'reason'):
             print(e.reason)
Beispiel #3
0
 def getFlesh(self):
     try:
         for cinemaId in self.cinemas:
             # url = 'https://dianying.taobao.com/cinemaDetail.htm?cinemaId='+cinemaId
             # pageCode = getPageCode(url)
             # pattern = re.compile('<li>联系电话:(.*?)</li>', re.S)
             # items = re.findall(pattern, pageCode)
             # for item in items:
             #     cinemaTel = item.strip()
             cinemaUrl = self.getCinemaUrl(cinemaId)
             c_pageCode = getPageCode(cinemaUrl)
             pattern = re.compile('showId=(.*?)&', re.S)
             items = re.findall(pattern, c_pageCode)
             items = set(items)
             for item in items:
                 movieId = item
                 movieUrl = 'http://dianying.taobao.com/cinemaDetailSchedule.htm?cinemaId='+ cinemaId +'&showId='+ movieId           
                 m_pageCode = getPageCode(movieUrl)
                 if movieId not in self.movies:  
                     self.movies[movieId] = {}
                     self.movies[movieId]['info'] = {}
                     self.movies[movieId]['cinemas'] = {}
                     self.getMovieInfo(m_pageCode, self.movies[movieId]['info'])
                     self.moviesIdToTitle[movieId] = self.movies[movieId]['info']['title']
                     self.moviesTitleToId[self.movies[movieId]['info']['title']] = movieId
                 pattern = re.compile('showId='+movieId+'&showDate=(.*?)&', re.S)
                 dates = re.findall(pattern, m_pageCode)
                 dates = set(dates)
                 self.movies[movieId]['cinemas'][cinemaId] = {}
                 for date in dates:
                     url = 'http://dianying.taobao.com/cinemaDetailSchedule.htm?cinemaId='+ cinemaId +'&showId='+ movieId +'&showDate='+date
                     date = date[5:]
                     d_pagecode = getPageCode(url)
                     self.movies[movieId]['cinemas'][cinemaId][date] = {}
                     self.getMovieStatus(d_pagecode, self.movies[movieId]['cinemas'][cinemaId][date])
     except urllib.error.URLError as e:
         if hasattr(e, 'code'):
             print(e.code)
         if hasattr(e, 'reason'):
             print(e.reason)
     return self.movies