def get_movies_amc(theater, date): """Get movie names and times from AMC's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.amctheatres.com/movie-theatres/{}/{}/showtimes/all/{}/{}/all' D_THEATERS = { 'amc boston common': ('boston', 'amc-boston-common-19'), 'the waterfront': ('pittsburgh', 'amc-waterfront-22') } theaterplace, theatername = D_THEATERS[theater.lower()] soup = soup_me( BASE_URL.format(theaterplace, theatername, date, theatername)) movies = soup('div', class_='ShowtimesByTheatre-film') movie_names = [m.h2.text for m in movies] #soup('h2')] movie_datetimes = [ [ [ DATETIME_SEP.join((date, clean_time(time.text))) for time in times('div', class_='Showtime') if not time.find('div', { 'aria-hidden': "true" }).text == 'Sold Out' ] # TODO print sold-out times as xed-out ? for times in m('div', class_=re.compile('^Showtimes-Section Showtimes-Section')) ] for m in movies ] # flatten timelists for movies with multiple formats # TODO sometimes lists separate times for same format -- combine ? n_timelists_per_movie = [len(timelsts) for timelsts in movie_datetimes] movie_names = list( chain.from_iterable( [name] * n for name, n in zip(movie_names, n_timelists_per_movie))) movie_datetimes = flatten(movie_datetimes) movie_times = filter_past(movie_datetimes) # annotate with format movie_formats = [[fmt.text for fmt in m('h4')] for m in movies] movie_times = [ (times if fmt == 'Digital' or not times else times + [f'[ {fmt} ]']) for times, fmt in zip(movie_times, flatten(movie_formats)) ] # movie_names, movie_times = combine_times(*filter_movies(movie_names, movie_times)) # TODO combine does not know formats movie_names, movie_times = filter_movies(movie_names, movie_times) return movie_names, movie_times
def get_movies_alamo(theater, date): """Get movie names and times from Alamo's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://feeds.drafthouse.com/adcService/showtimes.svc/calendar/2101/' djson = json_me(BASE_URL) # filter months -> weeks -> day day, *_ = flatten( [[d for d in week['Days'] if d['Date'].startswith(date)] for week in flatten( month['Weeks'] for month in djson['Calendar']['Cinemas'][0]['Months'])]) try: movies = day['Films'] except (KeyError): return [], [] movie_names = [movie['FilmName'] for movie in movies] # extract format from name, if any PATTERN = re.compile('in ((35|70)mm)$', re.I) def extract_fmt(m): m, *fmt = re.split(PATTERN, m)[:2] # only name and (35|70)mm, if any return m, ''.join(fmt).lower() # (cleaned) movie name, movie fmt movie_names, movie_formats = zip(*(extract_fmt(m) for m in movie_names)) # TODO print sold-out times as xed-out ? movie_times = [ flatten([ flatten([ [ '{}m'.format(( sesh['SessionTime'].lower() # e.g. p -> pm .replace('noon', '12:00p'))) for sesh in f['Sessions'] if (sesh['SessionStatus'] != 'soldout' and # `onsale` only sesh['SessionStatus'] != 'past') ] for f in series[ 'Formats'] # format doesn't seem to mean anything here - e.g. 70mm still coded as "Digital" ]) for series in movie['Series'] ]) for movie in movies ] # annotate with formats movie_times = [(times if not times or not fmt else times + [f'[ {fmt} ]']) for times, fmt in zip(movie_times, movie_formats)] movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_landmark(theater, date): """Get movie names and times from Kendall Landmark's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://movie-lmt.peachdigital.com/movies/GetFilmsByCinema/21/151' djson = json_me(BASE_URL) movie_names = [movie['Title'] for movie in djson['Result']] movie_datetimes = [ flatten([[ '{} @ {}'.format(date, t['StartTime']) for t in sesh['Times'] if convert_date(sesh['DisplayDate']) == date ] for sesh in seshes]) for seshes in (movie['Sessions'] for movie in djson['Result']) ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_alamo(theater, date): """Get movie names and times from Alamo's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://feeds.drafthouse.com/adcService/showtimes.svc/calendar/2101/' djson = json_me(BASE_URL) # filter months -> weeks -> day day, = flatten([[d for d in week['Days'] if d['Date'].startswith(date)] for week in flatten( month['Weeks'] for month in djson['Calendar']['Cinemas'][0]['Months']) ]) movies = day['Films'] movie_names = [movie['FilmName'] for movie in movies] # TODO print sold-out times as xed-out ? movie_times = [ flatten([ flatten([ [ '{}m'.format(sesh['SessionTime']) # e.g. p -> pm for sesh in f['Sessions'] if (sesh['SessionStatus'] != 'soldout' and # `onsale` only sesh['SessionStatus'] != 'past') ] for f in series[ 'Formats'] # format doesn't seem to mean anything here - e.g. 70mm still coded as "Digital" ]) for series in movie['Series'] ]) for movie in movies ] movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_showtimes(theater, date): """Get movie names and times from Showtimes' website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.showtimes.com/movie-theaters/{}' D_THEATERS = { 'regal fenway': lambda *args: 'regal-fenway-stadium-13-rpx-6269', 'ua court st': lambda *args: 'ua-court-street-stadium-12-rpx-6608' } try: soup = soup_me( BASE_URL.format( D_THEATERS.get(theater.lower(), get_theaterpg_showtimes)( theater))) # fallback for unlisted theater # (phrased as functions, so theaterpg scraper won't run until necessary) movies = soup('li', class_='movie-info-box') except (Exception) as e: print(error_str.format(e)) # error msg only movies = [] # no matching theater movie_names = [ ''.join((re.sub('[\r\n].*', '', name.text.strip()) for name in m('h2', class_='media-heading'))) for m in movies ] nested_buttons = [ # [[day, time, time, day, time], ..] -> [[[day, time, time], [day, time]], ..] list( split_before((button.text for button in m('button', type='button')), lambda txt: ',' in txt)) for m in movies ] movie_datetimes = [ flatten( [['{} @ {}'.format(day.replace(':', ''), time) for time in times] for day, *times in buttons if (convert_date(day.replace(':', '')) == date)]) for buttons in nested_buttons ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def print_fancy(beer, d_stats, sep='|', spacer=' ', **kwargs): PATTERN = '~*~' # SPACER = ' ' # SEP = '|' # d_reviews = {k: v['rating'] for k,v in d_stats.items() # if v} # skip empty / not found style = get_info_ranked('style') abv = get_info_ranked('abv') # header print('\n{pattern} {} {pattern} ({}, {})\n'.format(beer, style, abv, pattern=PATTERN)) # reviews sitetxt = ''.join(( '{spacer}', # '{spacer}{sep}{spacer}'.join(['({})'] * len(d_stats)), '{spacer}{sep}{spacer}'.join(['({})'] * len(D_ACTIONS)), # '{spacer}')).format(*d_stats.keys(), spacer=spacer, sep=sep) '{spacer}')).format(*D_ACTIONS.keys(), spacer=spacer, sep=sep) # sitetxt = ''.join( # ('{spacer}', # '{spacer}{sep}{spacer}'.join(['({})'] * len(d_reviews)), # '{spacer}')).format(*d_reviews.keys(), spacer=SPACER, sep=SEP) # sitetxt = '{spacer}({}){spacer}{sep}{spacer}({}){spacer}{sep}{spacer}({}){spacer}'.format(*d_reviews.keys(), spacer=SPACER, sep=SEP) widths = (len(_) for _ in sitetxt.split(sep)) # reviewtxt = '{sep}'.join(['{:^{}}'] * len(d_stats)).format( # *flatten(zip((stats.get('rating', '') for stats in d_stats.values()), # widths)), sep=sep) reviewtxt = '{sep}'.join(['{:^{}}'] * len(D_ACTIONS)).format( *flatten( zip( (stats.get('rating', '') for site, stats in d_stats.items() if site in D_ACTIONS.keys()), # ratings sites only widths)), sep=sep) # reviewtxt = '{sep}'.join(['{:^{}}'] * len(d_reviews)).format( # *flatten(zip(d_reviews.values(), widths)), sep=SEP) # reviewtxt = '{:^{}}{sep}{:^{}}{sep}{:^{}}'.format( # *flatten(zip(d_reviews.values(), widths)), sep=SEP) print('\n'.join((reviewtxt, sitetxt))) print()
def get_movies_google(theater, date, *args, **kwargs): """Get movie names and times from Google search :theater: str :date: str (yyyy-mm-dd) (default: today) :args, kwargs: other search terms, e.g. zip code :returns: (list of movie names, list of lists of movie times) """ # date = convert_date(date, fmt_out='%A %m/%d') fdate = convert_date(date, fmt_out='%A') # formatted for search fdate = fdate if fdate != convert_date('today', fmt_out='%A') else 'today' #'' # date = convert_date(date, fmt_out='%m/%d') # /%y') BASE_URL = 'https://www.google.com/search' PARAMS = { 'q': safe_encode('showtimes', '"{}"'.format(theater), fdate), 'ie': 'utf-8', 'client': 'firefox-b-1-e' } # soup = soup_me(BASE_URL, PARAMS) #, **kwargs) # ^ passing params directly to requests gives problems with extraneous % encoding soup = soup_me(compose_query(BASE_URL, PARAMS)) # TODO google static html only returns up to 10 movies.. CLASS = AttrDict(timelist='lr_c_fcc', time=re.compile('^(std-ts)|(lr_c_stnl)$'), fmt='lr_c_vn') try: relevant_div = soup.find('div', {'data-date': True}) # check date date_found = relevant_div.attrs['data-date'] assert convert_date(date_found) == date, '{} != {}'.format( date_found, date) movies = relevant_div('div', {'data-movie-name': True}) except (AssertionError, AttributeError) as e: # print(error_str.format(e)) # error msg only # movies = [] # no movies found for desired theater/date print(error_str.format('No matching theater on google')) raise (NoMoviesException(e)) movie_names = [m.span.text for m in movies] movie_times = [ # nested times per format per movie [[time.text for time in timelst('div', class_=CLASS.time)] for timelst in m('div', class_=CLASS.timelist)] for m in movies ] movie_formats = [ [ getattr(timelst.find('div', class_=CLASS.fmt), 'text', None) # default if no format listed for timelst in m('div', class_=CLASS.timelist) ] for m in movies ] # flatten timelists for movies with multiple formats n_timelists_per_movie = [len(timelsts) for timelsts in movie_times] movie_names = list( chain.from_iterable( [name] * n for name, n in zip(movie_names, n_timelists_per_movie))) # annotate with format movie_times = [ (times if fmt == 'Standard' or not times or not fmt else times + ['[ {} ]'.format(fmt)]) for times, fmt in zip(flatten(movie_times), flatten(movie_formats)) ] # no need to filter - tags only correspond to upcoming movie times return movie_names, movie_times