def getFlesh(self): try: for cinemaId in self.cinemas: cinemaUrl = self.getCinemaUrl(cinemaId) pageCode = getPageCode(cinemaUrl) # pattern = re.compile('<p class="cb-tel">.*?([\d].*?)</p>', re.S) # items = re.findall(pattern, pageCode) # cinemaTel = items[0].strip() pattern = re.compile('movieId="(.*?)".*?<img src="(.*?)"', re.S) items = re.findall(pattern, pageCode) for item in items: movieId = item[0] movieImg = item[1] if movieId not in self.movies: self.movies[movieId] = {} self.movies[movieId]['info'] = {} self.movies[movieId]['cinemas'] = {} self.movies[movieId]['info']['img'] = movieImg self.movies[movieId]['cinemas'][cinemaId] = {} movieUrl = self.getMovieUrl(cinemaId, movieId) pageCode = getPageCode(movieUrl) if 'title' not in self.movies[movieId]['info']: self.getMovieInfo(pageCode, self.movies[movieId]['info']) self.moviesIdToTitle[movieId] = self.movies[movieId]['info']['title'] self.moviesTitleToId[self.movies[movieId]['info']['title']] = movieId self.getMovieCinema(pageCode, self.movies[movieId]['cinemas'][cinemaId]) except urllib.error.URLError as e: if hasattr(e, 'code'): print(e.code) if hasattr(e, 'reason'): print(e.reason) return self.movies
def getFlesh(self): try: for cinemaId in self.cinemas: # t = self.cinemasMap[cinemaId]; # url = 'http://bj.meituan.com/shop/'+ t # pagecode = getPageCode(url) # pattern = re.compile("class='field-title'>电话:.*?>(.*?)</div>", re.S) # items = re.findall(pattern, pagecode) # for item in items: # cinemaTel = item.strip() cinemaUrl = self.getCinemaUrl(cinemaId) pagecode = getPageCode(cinemaUrl) pattern = re.compile('"cat":(.*?)"id":(.*?),.*?"nm":"(.*?)"', re.S) items = re.findall(pattern, pagecode) for item in items: movieinfo = item[0].strip(); movieId = item[1].strip(); if movieId not in self.movies: self.movies[movieId] = {} self.movies[movieId]['cinemas'] = {} self.movies[movieId]['info'] = {} self.movies[movieId]['info']['title'] = item[2].strip() self.moviesIdToTitle[movieId] = self.movies[movieId]['info']['title'] self.moviesTitleToId[self.movies[movieId]['info']['title']] = movieId self.movies[movieId]['cinemas'][cinemaId] = {} self.getMovieStatus(movieinfo, self.movies[movieId]['cinemas'][cinemaId]) except urllib.error.URLError as e: if hasattr(e, 'code'): print(e.code) if hasattr(e, 'reason'): print(e.reason)
def getFlesh(self): try: for cinemaId in self.cinemas: # url = 'https://dianying.taobao.com/cinemaDetail.htm?cinemaId='+cinemaId # pageCode = getPageCode(url) # pattern = re.compile('<li>联系电话:(.*?)</li>', re.S) # items = re.findall(pattern, pageCode) # for item in items: # cinemaTel = item.strip() cinemaUrl = self.getCinemaUrl(cinemaId) c_pageCode = getPageCode(cinemaUrl) pattern = re.compile('showId=(.*?)&', re.S) items = re.findall(pattern, c_pageCode) items = set(items) for item in items: movieId = item movieUrl = 'http://dianying.taobao.com/cinemaDetailSchedule.htm?cinemaId='+ cinemaId +'&showId='+ movieId m_pageCode = getPageCode(movieUrl) if movieId not in self.movies: self.movies[movieId] = {} self.movies[movieId]['info'] = {} self.movies[movieId]['cinemas'] = {} self.getMovieInfo(m_pageCode, self.movies[movieId]['info']) self.moviesIdToTitle[movieId] = self.movies[movieId]['info']['title'] self.moviesTitleToId[self.movies[movieId]['info']['title']] = movieId pattern = re.compile('showId='+movieId+'&showDate=(.*?)&', re.S) dates = re.findall(pattern, m_pageCode) dates = set(dates) self.movies[movieId]['cinemas'][cinemaId] = {} for date in dates: url = 'http://dianying.taobao.com/cinemaDetailSchedule.htm?cinemaId='+ cinemaId +'&showId='+ movieId +'&showDate='+date date = date[5:] d_pagecode = getPageCode(url) self.movies[movieId]['cinemas'][cinemaId][date] = {} self.getMovieStatus(d_pagecode, self.movies[movieId]['cinemas'][cinemaId][date]) except urllib.error.URLError as e: if hasattr(e, 'code'): print(e.code) if hasattr(e, 'reason'): print(e.reason) return self.movies