def get_movies_film_noir(theater, date): """Get movie names and times from Film Noir website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.filmnoircinema.com/program' soup = soup_me(BASE_URL) date = dparser.parse(date) movie_divs = soup('a', class_='eventlist-title-link', href=re.compile('/program/{}/{}/{}/'.format( date.year, date.month, date.day))) # no zero-padding movie_names = [movie_div.text for movie_div in movie_divs] # get times filtered by past movie_datetimes = list( chain.from_iterable(([ ' @ '.join((time_div['datetime'], time_div.text)) for time_div in movie_div.next.next.next('time', class_='event-time-12hr-start') ] for movie_div in movie_divs))) movie_times = filter_past(movie_datetimes) # filter movies with no future times # & combine times for same movie movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_filmlinc(theater, date): """Get movie names and times from Film at Lincoln Center's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.filmlinc.org/wp-content/themes/filmlinc/api-events.php' PARAMS = {'start': date, 'end': date} djson = json_me(BASE_URL, PARAMS) movie_names = [movie['title'] for movie in djson] movie_datetimes = [ ( datetime.fromtimestamp(movie['start'] / 1000) # epoch (in ms) -> .strftime('%Y-%m-%d @ %l:%M%P')) # yyyy-mm-dd @ hh:mm {a,p}m for movie in djson ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_quad(theater, date): """Get movie names and times from Quad's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://quadcinema.com/all/' soup = soup_me(BASE_URL) day, = [ d for d in soup('div', class_='now-single-day') if convert_date(d.h1.text) == date ] movie_names = [movie.text for movie in day('h4')] movie_datetimes = [[ '{} @ {}'.format(date, time.text.replace('.', ':')) for time in movie('li') ] for movie in day('div', class_='single-listing')] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_cinema_village(theater, date): """Get movie names and times from Cinema Village's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.cinemavillage.com/showtimes/' soup = soup_me(BASE_URL) days = [ day.contents[-1].strip().replace('.', '-') for day in soup('a', {'data-toggle': 'tab'}) ] iday = index_into_days(days, date=date) day = soup.find('div', id='tab_default_{}'.format(iday)) movie_names = [movie.text for movie in day('a')] movie_datetimes = [[ '{} @ {}'.format(date, time.text) for time in times('span') ] for times in day('div', class_='sel-time')] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_videology(theater, date): """Get movie names and times from Videology website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://videologybarandcinema.com/events/{}' soup = soup_me(BASE_URL.format(date)) movie_names = [ movie_div.a['title'] for movie_div in soup('h2', class_='tribe-events-list-event-title summary') ] # get times filtered by past movie_datetimes = [ # date @ time time_div.span.contents[0] for time_div in soup('div', class_='tribe-updated published time-details') ] movie_times = filter_past(movie_datetimes) # filter movies with no future times movie_names, movie_times = filter_movies(movie_names, movie_times) return movie_names, movie_times
def get_movies_village_east_or_angelika(theater, date): """Get movie names and times from Village East Cinema or Angelika Film Center's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.{}/showtimes-and-tickets/now-playing/{}' D_THEATERS = { 'village east cinema': 'citycinemas.com/villageeast', 'angelika film center': 'angelikafilmcenter.com/nyc' } soup = soup_me(BASE_URL.format(D_THEATERS[theater.lower()], date)) movie_names = [movie.text for movie in soup('h4', class_='name')] movie_datetimes = [[ '{} @ {}'.format(date, time.attrs['value']) for time in times('input', class_='showtime reserved-seating') ] for times in soup('div', class_="showtimes-wrapper")] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_momi(theater, date): """Get movie names and times from Museum of the Moving Image's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'http://www.movingimage.us/visit/calendar/{}/day/type/1' soup = soup_me(BASE_URL.format(date.replace('-', '/'))) PATTERN = re.compile('calendar/{}'.format(date.replace('-', '/'))) movies = soup('a', href=PATTERN) movie_names = [ m.find('span', class_=re.compile("^color")).text for m in movies ] movie_datetimes = [[ DATETIME_SEP.join((date, (m.em.text.split(' | ')[0].replace('.', '')))) ] for m in movies] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_somerville(theater, date): """Get movie names and times from Somerville Theater's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://somervilletheatre.com/wp-content/themes/somerville/showtimes.xml' soup = soup_me(BASE_URL) movies = soup('filmtitle') movie_names = [m.shortname.text for m in movies] # /or/ m.find('name').text convert = lambda date: date[-4:] + date[:-4] # mmddyyyy -> yyyymmdd movie_datetimes = [ [ ( dparser.parse(' '.join( (convert(d.text), t.text))) # yyyymmdd hhmm -> .strftime('%Y-%m-%d @ %l:%M%P')) # yyyy-mm-dd @ hh:mm {a,p}m for d, t in zip(m('date'), m('time')) if d.text == convert_date(date, fmt_out='%m%d%Y') ] for m in movies ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_coolidge(theater, date): """Get movie names and times from Coolidge Corner's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://coolidge.org/showtimes' PARAMS = {'date': date} soup = soup_me(BASE_URL, PARAMS) movies = soup('div', class_='film-card') movie_names = [m.h2.text for m in movies] movie_datetimes = [[ '{} @ {}'.format(date, time.text) for time in m('span', class_='showtime-ticket__time') ] for m in movies] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_hfa(theater, date): """Get movie names and times from Harvard Film Archive's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://harvardfilmarchive.org' soup = soup_me(BASE_URL) try: day, = [ d for d in soup('div', class_='grid m-calendar__row') if d.time.attrs['datetime'] == date ] except (ValueError): # no matching days return [], [] movie_names = [m.text.strip() for m in day('h5')] movie_datetimes = [ '{} @ {}'.format(date, time.text) for time in day('div', class_='event__time') ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_landmark(theater, date): """Get movie names and times from Kendall Landmark's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://movie-lmt.peachdigital.com/movies/GetFilmsByCinema/21/151' djson = json_me(BASE_URL) movie_names = [movie['Title'] for movie in djson['Result']] movie_datetimes = [ flatten([[ '{} @ {}'.format(date, t['StartTime']) for t in sesh['Times'] if convert_date(sesh['DisplayDate']) == date ] for sesh in seshes]) for seshes in (movie['Sessions'] for movie in djson['Result']) ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_nitehawk(theater, date): """Get movie names and times from Nitehawk's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://nitehawkcinema.com/{}/{}' D_THEATERS = { 'nitehawk': 'williamsburg', 'nitehawk prospect park': 'prospectpark' } soup = soup_me(BASE_URL.format(D_THEATERS[theater.lower()], date)) movie_names = [movie.text for movie in soup('div', class_='show-title')] movie_datetimes = [ [ '{} @ {}'.format(date, clean_datetime( t.text.strip())) # ignore any junk after {a,p}m for t in times('a', class_='showtime') ] for times in soup('div', class_='showtimes-container clearfix') ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_metrograph(theater, date): """Get movie names and times from Metrograph website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'http://metrograph.com/film' PARAMS = {'d': date} soup = soup_me(BASE_URL, PARAMS) movie_names = [ movie_div.a.contents[0] for movie_div in soup('h4', class_='title') ] movie_times = [[time.contents[0] for time in time_div('a')] for time_div in soup('div', class_='showtimes')] movie_formats = [ specs.text.split(' / ')[-1] for specs in soup('span', class_='specs') ] # annotate with format movie_times = [(times if fmt == 'DCP' or not times or not fmt else times + [f'[ {fmt} ]']) for times, fmt in zip(movie_times, movie_formats)] # filter movies with no future times movie_names, movie_times = filter_movies(movie_names, movie_times) return movie_names, movie_times
def get_movies_ifc(theater, date): """Get movie names and times from IFC's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'http://www.ifccenter.com/' soup = soup_me(BASE_URL) day, = [ day for day in soup('div', class_=re.compile('^daily-schedule')) if day.h3.text != 'Coming Soon' and convert_date(day.h3.text) == date ] movie_divs = day('div') movie_names = [mdiv.h3.text for mdiv in movie_divs] movie_datetimes = [[ '{} @ {}'.format(date, time.text) for time in mdiv('li') ] for mdiv in movie_divs] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_mfa(theater, date): """Get movie names and times from Museum of Fine Arts' website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.mfa.org/programs/film' PARAMS = {'field_date_value_1': date} soup = soup_me(BASE_URL, PARAMS) relevant_movies = [ div for div in soup('div', class_='col-sm-8') if div.span and convert_date(div.span.contents[0]) == date ] movie_names = [m.a.text for m in relevant_movies] def convert(contentlst): date, _, timestr = contentlst start, end = timestr.split('–') return DATETIME_SEP.join((convert_date(date), start)) movie_datetimes = [convert(m.span.contents) for m in relevant_movies] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_village_east_or_angelika(theater, date): """Get movie names and times from Village East Cinema or Angelika Film Center's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.{}/showtimes-and-tickets/now-playing/{}' D_THEATERS = { 'village east cinema': 'citycinemas.com/villageeast', 'angelika film center': 'angelikafilmcenter.com/nyc' } soup = soup_me(BASE_URL.format(D_THEATERS[theater.lower()], date)) movie_names = [movie.text for movie in soup('h4', class_='name')] movie_statuses = [ first((cls for cls in d['class'] if cls.startswith('status'))) for d in soup('div', class_=re.compile('^status')) ] assert len(movie_names) == len( movie_statuses), f'{len(movie_names)} != {len(movie_statuses)}' # filter for currently playing only movie_names = [ m for m, status in zip(movie_names, movie_statuses) if not status.endswith('coming_soon') ] if not movie_names: return [], [] movie_datetimes = [[ DATETIME_SEP.join((date, time['value'])) for time in times('input', class_='showtime reserved-seating') ] for times in soup('div', class_="showtimes-wrapper")] movie_times = filter_past(movie_datetimes) # extract format from name, if any PATTERN = re.compile('in ((35|70)mm)$', re.I) def extract_fmt(m): m, *fmt = re.split(PATTERN, m)[:2] # only name and (35|70)mm, if any return m, ''.join(fmt).lower() # (cleaned) movie name, movie fmt movie_names, movie_formats = zip(*(extract_fmt(m) for m in movie_names)) # annotate with format movie_times = [(times if not times or not fmt else times + [f'[ {fmt} ]']) for times, fmt in zip(movie_times, movie_formats)] movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_amc(theater, date): """Get movie names and times from AMC's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.amctheatres.com/movie-theatres/{}/{}/showtimes/all/{}/{}/all' D_THEATERS = { 'amc boston common': ('boston', 'amc-boston-common-19'), 'the waterfront': ('pittsburgh', 'amc-waterfront-22') } theaterplace, theatername = D_THEATERS[theater.lower()] soup = soup_me( BASE_URL.format(theaterplace, theatername, date, theatername)) movies = soup('div', class_='ShowtimesByTheatre-film') movie_names = [m.h2.text for m in movies] #soup('h2')] movie_datetimes = [ [ [ DATETIME_SEP.join((date, clean_time(time.text))) for time in times('div', class_='Showtime') if not time.find('div', { 'aria-hidden': "true" }).text == 'Sold Out' ] # TODO print sold-out times as xed-out ? for times in m('div', class_=re.compile('^Showtimes-Section Showtimes-Section')) ] for m in movies ] # flatten timelists for movies with multiple formats # TODO sometimes lists separate times for same format -- combine ? n_timelists_per_movie = [len(timelsts) for timelsts in movie_datetimes] movie_names = list( chain.from_iterable( [name] * n for name, n in zip(movie_names, n_timelists_per_movie))) movie_datetimes = flatten(movie_datetimes) movie_times = filter_past(movie_datetimes) # annotate with format movie_formats = [[fmt.text for fmt in m('h4')] for m in movies] movie_times = [ (times if fmt == 'Digital' or not times else times + [f'[ {fmt} ]']) for times, fmt in zip(movie_times, flatten(movie_formats)) ] # movie_names, movie_times = combine_times(*filter_movies(movie_names, movie_times)) # TODO combine does not know formats movie_names, movie_times = filter_movies(movie_names, movie_times) return movie_names, movie_times
def get_movies_alamo(theater, date): """Get movie names and times from Alamo's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://feeds.drafthouse.com/adcService/showtimes.svc/calendar/2101/' djson = json_me(BASE_URL) # filter months -> weeks -> day day, *_ = flatten( [[d for d in week['Days'] if d['Date'].startswith(date)] for week in flatten( month['Weeks'] for month in djson['Calendar']['Cinemas'][0]['Months'])]) try: movies = day['Films'] except (KeyError): return [], [] movie_names = [movie['FilmName'] for movie in movies] # extract format from name, if any PATTERN = re.compile('in ((35|70)mm)$', re.I) def extract_fmt(m): m, *fmt = re.split(PATTERN, m)[:2] # only name and (35|70)mm, if any return m, ''.join(fmt).lower() # (cleaned) movie name, movie fmt movie_names, movie_formats = zip(*(extract_fmt(m) for m in movie_names)) # TODO print sold-out times as xed-out ? movie_times = [ flatten([ flatten([ [ '{}m'.format(( sesh['SessionTime'].lower() # e.g. p -> pm .replace('noon', '12:00p'))) for sesh in f['Sessions'] if (sesh['SessionStatus'] != 'soldout' and # `onsale` only sesh['SessionStatus'] != 'past') ] for f in series[ 'Formats'] # format doesn't seem to mean anything here - e.g. 70mm still coded as "Digital" ]) for series in movie['Series'] ]) for movie in movies ] # annotate with formats movie_times = [(times if not times or not fmt else times + [f'[ {fmt} ]']) for times, fmt in zip(movie_times, movie_formats)] movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_moma(theater, date): """Get movie names and times from Museum of Modern Arts's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.moma.org/calendar/?utf8=%E2%9C%93&happening_filter=Films&date={}&location=both' soup = soup_me(BASE_URL.format(date)) relevant_movies = [ m for m in soup('div', class_='calendar-tile calendar-tile--tall-image') if date == convert_date(( m.find('div', class_='center balance-text').text.replace( u'\xa0', ' ') # -> " " .split(', ')[1])) # extract month & day from full datetime ] nested_movie_names = [ # list per showing.. some have multiple films [m.text for m in ms.h3('em')] if ms.h3('em') else [ms.h3.text] for ms in relevant_movies ] movie_names = [ms[-1] for ms in nested_movie_names ] # main attraction is the last film movie_formats = [ '+ {}'.format(','.join(ms[:-1])) if len(ms) > 1 else '' for ms in nested_movie_names ] PATTERN = re.compile('–[0-9]*:?[0-9]*') movie_datetimes = [ ( dparser.parse( re.sub( PATTERN, '', # remove any time ranges m.find('div', class_='center balance-text').text)).strftime( DATETIME_SEP.join(('%Y-%m-%d', '%l:%M%P'))) ) # yyyy-mm-dd @ hh:mm {a,p}m for m in relevant_movies ] movie_times = filter_past(movie_datetimes) # annotate with format movie_times = [(times if not times or not fmt else times + [f'[ {fmt} ]']) for times, fmt in zip(movie_times, movie_formats)] movie_names, movie_times = filter_movies(movie_names, movie_times) return movie_names, movie_times
def get_movies_nitehawk(theater, date): """Get movie names and times from Nitehawk's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://nitehawkcinema.com/{}/{}' D_THEATERS = { 'nitehawk': 'williamsburg', 'nitehawk prospect park': 'prospectpark' } soup = soup_me(BASE_URL.format(D_THEATERS[theater.lower()], date)) movie_names = [movie.text for movie in soup('div', class_='show-title')] if not movie_names: return [], [] # extract format from name, if any PATTERN = re.compile(' \(.*(DCP|(35|70)mm)\)$', re.I) def extract_fmt(m): m, *fmt = re.split(PATTERN, m)[:2] # only name and DCP / (35|70)mm, if any return m, ''.join(fmt).lower() # (cleaned) movie name, movie fmt movie_names, movie_formats = zip(*(extract_fmt(m) for m in movie_names)) movie_datetimes = [ [ DATETIME_SEP.join(( date, clean_time(( t.contents[0] # ignore any junk after {a,p}m .strip().lower().replace( 'midnite', '11:59pm'))))) # else, wld be next day for t in times('a', class_='showtime') ] for times in soup('div', class_='showtimes-container clearfix') ] movie_times = filter_past(movie_datetimes) # annotate with format movie_times = [(times if fmt == 'dcp' or not times or not fmt else times + [f'[ {fmt} ]']) for times, fmt in zip(movie_times, movie_formats)] # movie_names, movie_times = combine_times(*filter_movies(movie_names, movie_times)) movie_names, movie_times = filter_movies(movie_names, movie_times) return movie_names, movie_times
def get_movies_showtimes(theater, date): """Get movie names and times from Showtimes' website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://www.showtimes.com/movie-theaters/{}' D_THEATERS = { 'regal fenway': lambda *args: 'regal-fenway-stadium-13-rpx-6269', 'ua court st': lambda *args: 'ua-court-street-stadium-12-rpx-6608' } try: soup = soup_me( BASE_URL.format( D_THEATERS.get(theater.lower(), get_theaterpg_showtimes)( theater))) # fallback for unlisted theater # (phrased as functions, so theaterpg scraper won't run until necessary) movies = soup('li', class_='movie-info-box') except (Exception) as e: print(error_str.format(e)) # error msg only movies = [] # no matching theater movie_names = [ ''.join((re.sub('[\r\n].*', '', name.text.strip()) for name in m('h2', class_='media-heading'))) for m in movies ] nested_buttons = [ # [[day, time, time, day, time], ..] -> [[[day, time, time], [day, time]], ..] list( split_before((button.text for button in m('button', type='button')), lambda txt: ',' in txt)) for m in movies ] movie_datetimes = [ flatten( [['{} @ {}'.format(day.replace(':', ''), time) for time in times] for day, *times in buttons if (convert_date(day.replace(':', '')) == date)]) for buttons in nested_buttons ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_somerville(theater, date): """Get movie names and times from Somerville Theater's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://somervilletheatre.com/wp-content/themes/somerville/showtimes.xml' soup = soup_me(BASE_URL) movies = soup('filmtitle') movie_names = [m.shortname.text for m in movies] # /or/ m.find('name').text PATTERN = re.compile(' ((35|70)mm)$', re.I) def extract_fmt(m): m, *fmt = re.split(PATTERN, m)[:2] # only name and (35|70)mm, if any return m, ''.join(fmt).lower() # (cleaned) movie name, movie fmt movie_names, movie_formats = zip(*(extract_fmt(m) for m in movie_names)) convert = lambda date: date[-4:] + date[:-4] # mmddyyyy -> yyyymmdd movie_datetimes = [ [ ( dparser.parse(' '.join( (convert(d.text), t.text))) # yyyymmdd hhmm -> .strftime(DATETIME_SEP.join( ('%Y-%m-%d', '%l:%M%P')))) # yyyy-mm-dd @ hh:mm {a,p}m for d, t in zip(m('date'), m('time')) if d.text == convert_date(date, fmt_out='%m%d%Y') ] for m in movies ] movie_times = filter_past(movie_datetimes) # annotate with formats movie_times = [(times if not times or not fmt else times + [f'[ {fmt} ]']) for times, fmt in zip(movie_times, movie_formats)] movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_pghfilmmakers(theater, date): """Get movie names and times from Pittsburgh Filmmakers website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'http://cinema.pfpca.org/films/showtimes?location={}' D_THEATERS = { 'regent square theater': 24, 'harris theater': 20, 'melwood screening room': 18 } soup = soup_me(BASE_URL.format(D_THEATERS[theater.lower()])) # get date block try: block, = [ day for day in soup('caption') if day.text == convert_date(date, fmt_out='%a, %b %-d') ] except (ValueError): # indexing into empty list return [], [] movie_names = [ name.text for name in block.next.next.next('a', href=re.compile('/films/*')) ] movie_datetimes = [ ' @ '.join((date, div.next.next.next.text.strip())) for div in block.next.next.next( 'td', class_='views-field views-field-field-location') ] movie_times = filter_past(movie_datetimes) # filter movies with no future times # & combine times for same movie movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_quad(theater, date): """Get movie names and times from Quad's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://quadcinema.com/all/' soup = soup_me(BASE_URL) try: day, = [ d for d in soup('div', class_='now-single-day') if convert_date(d.h1.text) == date ] except (ValueError): # no matching date listed yet return [], [] movie_names = [movie.text for movie in day('h4')] movies = day('div', class_='single-listing') PATTERN = re.compile('^time') movie_datetimes = [[ DATETIME_SEP.join((date, time.text.replace('.', ':'))) for time in m('li', class_=PATTERN) ] for m in movies] movie_times = filter_past(movie_datetimes) ANTIPATTERN = re.compile('^[^(time)]') # non-showtime `li`s movie_formats = [[fmt.text for fmt in m('li', class_=ANTIPATTERN)] for m in movies] # annotate with formats movie_times = [(times if not times or not fmt else times + ['[ {} ]'.format(','.join(fmt))]) for times, fmt in zip(movie_times, movie_formats)] movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_manor(theater, date): """Get movie names and times from The Manor's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://plugin.retrieverapi.com/getSchedule' PARAMS = {'date': date} headers = { 'Host': 'plugin.retrieverapi.com', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0', 'Accept': 'application/json', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', 'Referer': 'https://plugin.retrieverapi.com/embed/4227729?print', 'Authorization': 'Basic NDIyNzcyOToxMjM=', 'DNT': '1', 'Connection': 'keep-alive' } djson = json_me(BASE_URL, PARAMS, headers=headers) movies = djson['movies'] movie_names = [m['movie_name'] for m in movies] movie_datetimes = [ [ (dparser.parse(show['date_time']).strftime( DATETIME_SEP.join( ('%Y-%m-%d', '%l:%M%P')))) # yyyy-mm-dd @ hh:mm {a,p}m for show in m['showtimes'] ] for m in movies ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_syndicated(theater, date): """Get movie names and times from Syndicated's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://syndicatedbk.com/events/' soup = soup_me(BASE_URL) movie_strs = [ div.text.strip() for div in soup('div', id=re.compile(f'tribe-events-event-[0-9]*-{date}')) ] if not movie_strs or movie_strs[0].lower() == 'closed for private event': return [], [] matches = [ re.search(' \([0-9:]* [ap]m\)', movie_str, re.I) for movie_str in movie_strs ] movie_names = [ movie_str[:m.start(0)] # extract name for m, movie_str in zip(matches, movie_strs) ] movie_datetimes = [ DATETIME_SEP.join((date, time)) for time in ( movie_str[m.start(0) + 2:m.end(0) - 1] # extract time (while removing trailing " (" & ")") for m, movie_str in zip(matches, movie_strs)) ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_anthology(theater, date): """Get movie names and times from Anthology's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'http://anthologyfilmarchives.org/film_screenings/calendar?view=list' soup = soup_me(BASE_URL.format(date)) days = soup('h3', class_='current-day') try: iday = index_into_days([ ''.join((_ for _ in day.contents if isinstance(_, str))).strip() for day in days ], date=date) except (AssertionError): # no matching days return [], [] border = (days[iday + 1] if iday < len(days) - 1 else soup.find('div', id='footer')) next_movies = days[iday].find_all_next('div', class_='showing-details') prev_movies = border.find_all_previous('div', class_='showing-details') movies = list(set(next_movies) & set(prev_movies)) # get intersection b/w borders movie_names = [m.find('span', class_='film-title').text for m in movies] movie_datetimes = [[ DATETIME_SEP.join((date, time.text)) for time in movie('a', {'name': re.compile("^showing-")}) ] for movie in movies] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_alamo(theater, date): """Get movie names and times from Alamo's website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'https://feeds.drafthouse.com/adcService/showtimes.svc/calendar/2101/' djson = json_me(BASE_URL) # filter months -> weeks -> day day, = flatten([[d for d in week['Days'] if d['Date'].startswith(date)] for week in flatten( month['Weeks'] for month in djson['Calendar']['Cinemas'][0]['Months']) ]) movies = day['Films'] movie_names = [movie['FilmName'] for movie in movies] # TODO print sold-out times as xed-out ? movie_times = [ flatten([ flatten([ [ '{}m'.format(sesh['SessionTime']) # e.g. p -> pm for sesh in f['Sessions'] if (sesh['SessionStatus'] != 'soldout' and # `onsale` only sesh['SessionStatus'] != 'past') ] for f in series[ 'Formats'] # format doesn't seem to mean anything here - e.g. 70mm still coded as "Digital" ]) for series in movie['Series'] ]) for movie in movies ] movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) return movie_names, movie_times
def get_movies_metrograph(theater, date): """Get movie names and times from Metrograph website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'http://metrograph.com/film' PARAMS = {'d': date} soup = soup_me(BASE_URL, PARAMS) movie_names = [ movie_div.a.contents[0] for movie_div in soup('h4', class_='title') ] movie_times = [[time.contents[0] for time in time_div('a')] for time_div in soup('div', class_='showtimes')] # filter movies with no future times movie_names, movie_times = filter_movies(movie_names, movie_times) return movie_names, movie_times
def get_movies_loews_theater(theater, date): """Get movie names and times from Landmark Loew's Jersey website :theater: str :date: str (yyyy-mm-dd) (default: today) :returns: (list of movie names, list of lists of movie times) """ BASE_URL = 'http://loewsjersey.org/calendar/?tribe-bar-date={}' soup = soup_me(BASE_URL.format(date[:-3])) # yyy-mm movie_headers = [ h for h in soup('h3', class_="tribe-events-month-event-title") if h.text.lower().startswith("film screening") ] relevant_movies = [ h for h in movie_headers if h.parent.attrs['id'][-10:] == date ] if relevant_movies: movie_names = [ h.text.replace('Film Screening: “', '').replace('”', '') for h in relevant_movies ] movie_datetimes = [ json.loads( h.parent.attrs['data-tribejson'])['startTime'] # date @ time for h in relevant_movies ] movie_times = filter_past(movie_datetimes) movie_names, movie_times = combine_times( *filter_movies(movie_names, movie_times)) else: movie_names, movie_times = [], [] return movie_names, movie_times