def get_movies_somerville(theater, date): """Get movie names and times from Somerville Theater's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://somervilletheatre.com/wp-content/themes/somerville/showtimes.xml' soup = soup_me(BASE_URL) movies = soup('filmtitle') movie_names = [m.shortname.text for m in movies] # /or/ m.find('name').text convert = lambda date: date[-4:] + date[:-4] # mmddyyyy -> yyyymmdd movie_datetimes = [ [ ( dparser.parse(' '.join( (convert(d.text), t.text))) # yyyymmdd hhmm -> .strftime('%Y-%m-%d @ %l:%M%P')) # yyyy-mm-dd @ hh:mm {a,p}m for d, t in zip(m('date'), m('time')) if d.text == convert_date(date, fmt_out='%m%d%Y') ] for m in movies ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_landmark(theater, date): """Get movie names and times from Kendall Landmark's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://movie-lmt.peachdigital.com/movies/GetFilmsByCinema/21/151' djson = json_me(BASE_URL) movie_names = [movie['Title'] for movie in djson['Result']] movie_datetimes = [ flatten([[ '{} @ {}'.format(date, t['StartTime']) for t in sesh['Times'] if convert_date(sesh['DisplayDate']) == date ] for sesh in seshes]) for seshes in (movie['Sessions'] for movie in djson['Result']) ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_ifc(theater, date): """Get movie names and times from IFC's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'http://www.ifccenter.com/' soup = soup_me(BASE_URL) day, = [ day for day in soup('div', class_=re.compile('^daily-schedule')) if day.h3.text != 'Coming Soon' and convert_date(day.h3.text) == date ] movie_divs = day('div') movie_names = [mdiv.h3.text for mdiv in movie_divs] movie_datetimes = [[ '{} @ {}'.format(date, time.text) for time in mdiv('li') ] for mdiv in movie_divs] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_quad(theater, date): """Get movie names and times from Quad's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://quadcinema.com/all/' soup = soup_me(BASE_URL) day, = [ d for d in soup('div', class_='now-single-day') if convert_date(d.h1.text) == date ] movie_names = [movie.text for movie in day('h4')] movie_datetimes = [[ '{} @ {}'.format(date, time.text.replace('.', ':')) for time in movie('li') ] for movie in day('div', class_='single-listing')] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_mfa(theater, date): """Get movie names and times from Museum of Fine Arts' website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.mfa.org/programs/film' PARAMS = {'field_date_value_1': date} soup = soup_me(BASE_URL, PARAMS) relevant_movies = [ div for div in soup('div', class_='col-sm-8') if div.span and convert_date(div.span.contents[0]) == date ] movie_names = [m.a.text for m in relevant_movies] def convert(contentlst): date, _, timestr = contentlst start, end = timestr.split('–') return DATETIME_SEP.join((convert_date(date), start)) movie_datetimes = [convert(m.span.contents) for m in relevant_movies] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_moma(theater, date): """Get movie names and times from Museum of Modern Arts's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.moma.org/calendar/?utf8=%E2%9C%93&happening_filter=Films&date={}&location=both' soup = soup_me(BASE_URL.format(date)) relevant_movies = [ m for m in soup('div', class_='calendar-tile calendar-tile--tall-image') if date == convert_date(( m.find('div', class_='center balance-text').text.replace( u'\xa0', ' ') # -> " " .split(', ')[1])) # extract month & day from full datetime ] nested_movie_names = [ # list per showing.. some have multiple films [m.text for m in ms.h3('em')] if ms.h3('em') else [ms.h3.text] for ms in relevant_movies ] movie_names = [ms[-1] for ms in nested_movie_names ] # main attraction is the last film movie_formats = [ '+ {}'.format(','.join(ms[:-1])) if len(ms) > 1 else '' for ms in nested_movie_names ] PATTERN = re.compile('–[0-9]*:?[0-9]*') movie_datetimes = [ ( dparser.parse( re.sub( PATTERN, '', # remove any time ranges m.find('div', class_='center balance-text').text)).strftime( DATETIME_SEP.join(('%Y-%m-%d', '%l:%M%P'))) ) # yyyy-mm-dd @ hh:mm {a,p}m for m in relevant_movies ] movie_times = filter_past(movie_datetimes) # annotate with format movie_times = [(times if not times or not fmt else times + [f'[ {fmt} ]']) for times, fmt in zip(movie_times, movie_formats)] movie_names, movie_times = filter_movies(movie_names, movie_times) return movie_names, movie_times
def get_movies_showtimes(theater, date): """Get movie names and times from Showtimes' website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.showtimes.com/movie-theaters/{}' D_THEATERS = { 'regal fenway': lambda *args: 'regal-fenway-stadium-13-rpx-6269', 'ua court st': lambda *args: 'ua-court-street-stadium-12-rpx-6608' } try: soup = soup_me( BASE_URL.format( D_THEATERS.get(theater.lower(), get_theaterpg_showtimes)( theater))) # fallback for unlisted theater # (phrased as functions, so theaterpg scraper won't run until necessary) movies = soup('li', class_='movie-info-box') except (Exception) as e: print(error_str.format(e)) # error msg only movies = [] # no matching theater movie_names = [ ''.join((re.sub('[\r\n].*', '', name.text.strip()) for name in m('h2', class_='media-heading'))) for m in movies ] nested_buttons = [ # [[day, time, time, day, time], ..] -> [[[day, time, time], [day, time]], ..] list( split_before((button.text for button in m('button', type='button')), lambda txt: ',' in txt)) for m in movies ] movie_datetimes = [ flatten( [['{} @ {}'.format(day.replace(':', ''), time) for time in times] for day, *times in buttons if (convert_date(day.replace(':', '')) == date)]) for buttons in nested_buttons ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_somerville(theater, date): """Get movie names and times from Somerville Theater's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://somervilletheatre.com/wp-content/themes/somerville/showtimes.xml' soup = soup_me(BASE_URL) movies = soup('filmtitle') movie_names = [m.shortname.text for m in movies] # /or/ m.find('name').text PATTERN = re.compile(' ((35|70)mm)$', re.I) def extract_fmt(m): m, *fmt = re.split(PATTERN, m)[:2] # only name and (35|70)mm, if any return m, ''.join(fmt).lower() # (cleaned) movie name, movie fmt movie_names, movie_formats = zip(*(extract_fmt(m) for m in movie_names)) convert = lambda date: date[-4:] + date[:-4] # mmddyyyy -> yyyymmdd movie_datetimes = [ [ ( dparser.parse(' '.join( (convert(d.text), t.text))) # yyyymmdd hhmm -> .strftime(DATETIME_SEP.join( ('%Y-%m-%d', '%l:%M%P')))) # yyyy-mm-dd @ hh:mm {a,p}m for d, t in zip(m('date'), m('time')) if d.text == convert_date(date, fmt_out='%m%d%Y') ] for m in movies ] movie_times = filter_past(movie_datetimes) # annotate with formats movie_times = [(times if not times or not fmt else times + [f'[ {fmt} ]']) for times, fmt in zip(movie_times, movie_formats)] movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_pghfilmmakers(theater, date): """Get movie names and times from Pittsburgh Filmmakers website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'http://cinema.pfpca.org/films/showtimes?location={}' D_THEATERS = { 'regent square theater': 24, 'harris theater': 20, 'melwood screening room': 18 } soup = soup_me(BASE_URL.format(D_THEATERS[theater.lower()])) # get date block try: block, = [ day for day in soup('caption') if day.text == convert_date(date, fmt_out='%a, %b %-d') ] except (ValueError): # indexing into empty list return [], [] movie_names = [ name.text for name in block.next.next.next('a', href=re.compile('/films/*')) ] movie_datetimes = [ ' @ '.join((date, div.next.next.next.text.strip())) for div in block.next.next.next( 'td', class_='views-field views-field-field-location') ] movie_times = filter_past(movie_datetimes) # filter movies with no future times # & combine times for same movie movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_quad(theater, date): """Get movie names and times from Quad's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://quadcinema.com/all/' soup = soup_me(BASE_URL) try: day, = [ d for d in soup('div', class_='now-single-day') if convert_date(d.h1.text) == date ] except (ValueError): # no matching date listed yet return [], [] movie_names = [movie.text for movie in day('h4')] movies = day('div', class_='single-listing') PATTERN = re.compile('^time') movie_datetimes = [[ DATETIME_SEP.join((date, time.text.replace('.', ':'))) for time in m('li', class_=PATTERN) ] for m in movies] movie_times = filter_past(movie_datetimes) ANTIPATTERN = re.compile('^[^(time)]') # non-showtime `li`s movie_formats = [[fmt.text for fmt in m('li', class_=ANTIPATTERN)] for m in movies] # annotate with formats movie_times = [(times if not times or not fmt else times + ['[ {} ]'.format(','.join(fmt))]) for times, fmt in zip(movie_times, movie_formats)] movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
try: city = maybe_city theaters = get_theaters(city) date = maybe_date except (FileNotFoundError, AssertionError): # date rather than city try: city = maybe_date # could be None.. theaters = get_theaters(city) except (FileNotFoundError, AssertionError): # date rather than city city = CITY theaters = get_theaters(city) date = maybe_city if maybe_city is not None else DATE moviegetter = partial(get_movies, date=convert_date(date)) # do stuff need_ratings = args.filter_by > 0 or not args.simple if need_ratings: d_cached = {} try: from ratings import get_ratings except (Exception) as e: # e.g. missing secrets msg, = e.args print(msg + '\n\n') need_ratings = False for theater in theaters:
def get_movies_google(theater, date, *args, **kwargs): """Get movie names and times from Google search :theater: str :date: str (yyyy-mm-dd) (default: today) :args, kwargs: other search terms, e.g. zip code :returns: (list of movie names, list of lists of movie times) """ # date = convert_date(date, fmt_out='%A %m/%d') fdate = convert_date(date, fmt_out='%A') # formatted for search fdate = fdate if fdate != convert_date('today', fmt_out='%A') else 'today' #'' # date = convert_date(date, fmt_out='%m/%d') # /%y') BASE_URL = 'https://www.google.com/search' PARAMS = { 'q': safe_encode('showtimes', '"{}"'.format(theater), fdate), 'ie': 'utf-8', 'client': 'firefox-b-1-e' } # soup = soup_me(BASE_URL, PARAMS) #, **kwargs) # ^ passing params directly to requests gives problems with extraneous % encoding soup = soup_me(compose_query(BASE_URL, PARAMS)) # TODO google static html only returns up to 10 movies.. CLASS = AttrDict(timelist='lr_c_fcc', time=re.compile('^(std-ts)|(lr_c_stnl)$'), fmt='lr_c_vn') try: relevant_div = soup.find('div', {'data-date': True}) # check date date_found = relevant_div.attrs['data-date'] assert convert_date(date_found) == date, '{} != {}'.format( date_found, date) movies = relevant_div('div', {'data-movie-name': True}) except (AssertionError, AttributeError) as e: # print(error_str.format(e)) # error msg only # movies = [] # no movies found for desired theater/date print(error_str.format('No matching theater on google')) raise (NoMoviesException(e)) movie_names = [m.span.text for m in movies] movie_times = [ # nested times per format per movie [[time.text for time in timelst('div', class_=CLASS.time)] for timelst in m('div', class_=CLASS.timelist)] for m in movies ] movie_formats = [ [ getattr(timelst.find('div', class_=CLASS.fmt), 'text', None) # default if no format listed for timelst in m('div', class_=CLASS.timelist) ] for m in movies ] # flatten timelists for movies with multiple formats n_timelists_per_movie = [len(timelsts) for timelsts in movie_times] movie_names = list( chain.from_iterable( [name] * n for name, n in zip(movie_names, n_timelists_per_movie))) # annotate with format movie_times = [ (times if fmt == 'Standard' or not times or not fmt else times + ['[ {} ]'.format(fmt)]) for times, fmt in zip(flatten(movie_times), flatten(movie_formats)) ] # no need to filter - tags only correspond to upcoming movie times return movie_names, movie_times
def convert(contentlst): date, _, timestr = contentlst start, end = timestr.split('–') return DATETIME_SEP.join((convert_date(date), start))