Python flattenの例、CLIppy.flatten Pythonの例

コード例 #1

0

ファイルを表示

ファイル: scrapers.py プロジェクト: meereeum/cinematic

def get_movies_amc(theater, date):
    """Get movie names and times from AMC's website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://www.amctheatres.com/movie-theatres/{}/{}/showtimes/all/{}/{}/all'

    D_THEATERS = {
        'amc boston common': ('boston', 'amc-boston-common-19'),
        'the waterfront': ('pittsburgh', 'amc-waterfront-22')
    }
    theaterplace, theatername = D_THEATERS[theater.lower()]

    soup = soup_me(
        BASE_URL.format(theaterplace, theatername, date, theatername))

    movies = soup('div', class_='ShowtimesByTheatre-film')

    movie_names = [m.h2.text for m in movies]  #soup('h2')]

    movie_datetimes = [
        [
            [
                DATETIME_SEP.join((date, clean_time(time.text)))
                for time in times('div', class_='Showtime')
                if not time.find('div', {
                    'aria-hidden': "true"
                }).text == 'Sold Out'
            ]
            # TODO print sold-out times as xed-out ?
            for times in
            m('div', class_=re.compile('^Showtimes-Section Showtimes-Section'))
        ] for m in movies
    ]

    # flatten timelists for movies with multiple formats
    # TODO sometimes lists separate times for same format -- combine ?
    n_timelists_per_movie = [len(timelsts) for timelsts in movie_datetimes]
    movie_names = list(
        chain.from_iterable(
            [name] * n for name, n in zip(movie_names, n_timelists_per_movie)))
    movie_datetimes = flatten(movie_datetimes)

    movie_times = filter_past(movie_datetimes)

    # annotate with format
    movie_formats = [[fmt.text for fmt in m('h4')] for m in movies]
    movie_times = [
        (times if fmt == 'Digital' or not times else times + [f'[ {fmt} ]'])
        for times, fmt in zip(movie_times, flatten(movie_formats))
    ]

    # movie_names, movie_times = combine_times(*filter_movies(movie_names, movie_times)) # TODO combine does not know formats
    movie_names, movie_times = filter_movies(movie_names, movie_times)

    return movie_names, movie_times

コード例 #2

0

ファイルを表示

ファイル: scrapers.py プロジェクト: meereeum/cinematic

def get_movies_alamo(theater, date):
    """Get movie names and times from Alamo's website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://feeds.drafthouse.com/adcService/showtimes.svc/calendar/2101/'

    djson = json_me(BASE_URL)

    # filter months -> weeks -> day
    day, *_ = flatten(
        [[d for d in week['Days'] if d['Date'].startswith(date)]
         for week in flatten(
             month['Weeks']
             for month in djson['Calendar']['Cinemas'][0]['Months'])])
    try:
        movies = day['Films']
    except (KeyError):
        return [], []

    movie_names = [movie['FilmName'] for movie in movies]

    # extract format from name, if any
    PATTERN = re.compile('in ((35|70)mm)$', re.I)

    def extract_fmt(m):
        m, *fmt = re.split(PATTERN, m)[:2]  # only name and (35|70)mm, if any
        return m, ''.join(fmt).lower()  # (cleaned) movie name, movie fmt

    movie_names, movie_formats = zip(*(extract_fmt(m) for m in movie_names))

    # TODO print sold-out times as xed-out ?
    movie_times = [
        flatten([
            flatten([
                [
                    '{}m'.format((
                        sesh['SessionTime'].lower()  # e.g. p -> pm
                        .replace('noon', '12:00p'))) for sesh in f['Sessions']
                    if (sesh['SessionStatus'] != 'soldout' and  # `onsale` only
                        sesh['SessionStatus'] != 'past')
                ] for f in series[
                    'Formats']  # format doesn't seem to mean anything here - e.g. 70mm still coded as "Digital"
            ]) for series in movie['Series']
        ]) for movie in movies
    ]

    # annotate with formats
    movie_times = [(times if not times or not fmt else times + [f'[ {fmt} ]'])
                   for times, fmt in zip(movie_times, movie_formats)]

    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times

コード例 #3

0

ファイルを表示

def get_movies_landmark(theater, date):
    """Get movie names and times from Kendall Landmark's website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://movie-lmt.peachdigital.com/movies/GetFilmsByCinema/21/151'

    djson = json_me(BASE_URL)

    movie_names = [movie['Title'] for movie in djson['Result']]

    movie_datetimes = [
        flatten([[
            '{} @ {}'.format(date, t['StartTime']) for t in sesh['Times']
            if convert_date(sesh['DisplayDate']) == date
        ] for sesh in seshes])
        for seshes in (movie['Sessions'] for movie in djson['Result'])
    ]

    movie_times = filter_past(movie_datetimes)
    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times

コード例 #4

0

ファイルを表示

def get_movies_alamo(theater, date):
    """Get movie names and times from Alamo's website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://feeds.drafthouse.com/adcService/showtimes.svc/calendar/2101/'

    djson = json_me(BASE_URL)

    # filter months -> weeks -> day
    day, = flatten([[d for d in week['Days'] if d['Date'].startswith(date)]
                    for week in flatten(
                        month['Weeks']
                        for month in djson['Calendar']['Cinemas'][0]['Months'])
                    ])
    movies = day['Films']

    movie_names = [movie['FilmName'] for movie in movies]

    # TODO print sold-out times as xed-out ?
    movie_times = [
        flatten([
            flatten([
                [
                    '{}m'.format(sesh['SessionTime'])  # e.g. p -> pm
                    for sesh in f['Sessions']
                    if (sesh['SessionStatus'] != 'soldout' and  # `onsale` only
                        sesh['SessionStatus'] != 'past')
                ] for f in series[
                    'Formats']  # format doesn't seem to mean anything here - e.g. 70mm still coded as "Digital"
            ]) for series in movie['Series']
        ]) for movie in movies
    ]

    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times

コード例 #5

0

ファイルを表示

def get_movies_showtimes(theater, date):
    """Get movie names and times from Showtimes' website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://www.showtimes.com/movie-theaters/{}'

    D_THEATERS = {
        'regal fenway': lambda *args: 'regal-fenway-stadium-13-rpx-6269',
        'ua court st': lambda *args: 'ua-court-street-stadium-12-rpx-6608'
    }

    try:
        soup = soup_me(
            BASE_URL.format(
                D_THEATERS.get(theater.lower(), get_theaterpg_showtimes)(
                    theater)))  # fallback for unlisted theater
        # (phrased as functions, so theaterpg scraper won't run until necessary)

        movies = soup('li', class_='movie-info-box')

    except (Exception) as e:
        print(error_str.format(e))  # error msg only
        movies = []  # no matching theater

    movie_names = [
        ''.join((re.sub('[\r\n].*', '', name.text.strip())
                 for name in m('h2', class_='media-heading'))) for m in movies
    ]

    nested_buttons = [  # [[day, time, time, day, time], ..] -> [[[day, time, time], [day, time]], ..]
        list(
            split_before((button.text
                          for button in m('button', type='button')),
                         lambda txt: ',' in txt)) for m in movies
    ]

    movie_datetimes = [
        flatten(
            [['{} @ {}'.format(day.replace(':', ''), time) for time in times]
             for day, *times in buttons
             if (convert_date(day.replace(':', '')) == date)])
        for buttons in nested_buttons
    ]

    movie_times = filter_past(movie_datetimes)
    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times

コード例 #6

0

ファイルを表示

def print_fancy(beer, d_stats, sep='|', spacer='  ', **kwargs):
    PATTERN = '~*~'
    # SPACER = '  '
    # SEP = '|'

    # d_reviews = {k: v['rating'] for k,v in d_stats.items()
    #              if v} # skip empty / not found

    style = get_info_ranked('style')
    abv = get_info_ranked('abv')

    # header
    print('\n{pattern} {} {pattern} ({}, {})\n'.format(beer,
                                                       style,
                                                       abv,
                                                       pattern=PATTERN))

    # reviews
    sitetxt = ''.join((
        '{spacer}',
        # '{spacer}{sep}{spacer}'.join(['({})'] * len(d_stats)),
        '{spacer}{sep}{spacer}'.join(['({})'] * len(D_ACTIONS)),
        # '{spacer}')).format(*d_stats.keys(), spacer=spacer, sep=sep)
        '{spacer}')).format(*D_ACTIONS.keys(), spacer=spacer, sep=sep)
    # sitetxt = ''.join(
    #     ('{spacer}',
    #      '{spacer}{sep}{spacer}'.join(['({})'] * len(d_reviews)),
    #      '{spacer}')).format(*d_reviews.keys(), spacer=SPACER, sep=SEP)
    # sitetxt = '{spacer}({}){spacer}{sep}{spacer}({}){spacer}{sep}{spacer}({}){spacer}'.format(*d_reviews.keys(), spacer=SPACER, sep=SEP)

    widths = (len(_) for _ in sitetxt.split(sep))

    # reviewtxt = '{sep}'.join(['{:^{}}'] * len(d_stats)).format(
    #     *flatten(zip((stats.get('rating', '') for stats in d_stats.values()),
    #                  widths)), sep=sep)
    reviewtxt = '{sep}'.join(['{:^{}}'] * len(D_ACTIONS)).format(
        *flatten(
            zip(
                (stats.get('rating', '') for site, stats in d_stats.items()
                 if site in D_ACTIONS.keys()),  # ratings sites only
                widths)),
        sep=sep)
    # reviewtxt = '{sep}'.join(['{:^{}}'] * len(d_reviews)).format(
    #     *flatten(zip(d_reviews.values(), widths)), sep=SEP)
    # reviewtxt = '{:^{}}{sep}{:^{}}{sep}{:^{}}'.format(
    #     *flatten(zip(d_reviews.values(), widths)), sep=SEP)

    print('\n'.join((reviewtxt, sitetxt)))
    print()

コード例 #7

0

ファイルを表示

def get_movies_google(theater, date, *args, **kwargs):
    """Get movie names and times from Google search

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :args, kwargs: other search terms, e.g. zip code
    :returns: (list of movie names, list of lists of movie times)
    """
    # date = convert_date(date, fmt_out='%A %m/%d')
    fdate = convert_date(date, fmt_out='%A')  # formatted for search
    fdate = fdate if fdate != convert_date('today',
                                           fmt_out='%A') else 'today'  #''
    # date = convert_date(date, fmt_out='%m/%d') # /%y')

    BASE_URL = 'https://www.google.com/search'

    PARAMS = {
        'q': safe_encode('showtimes', '"{}"'.format(theater), fdate),
        'ie': 'utf-8',
        'client': 'firefox-b-1-e'
    }

    # soup = soup_me(BASE_URL, PARAMS) #, **kwargs)
    # ^ passing params directly to requests gives problems with extraneous % encoding
    soup = soup_me(compose_query(BASE_URL, PARAMS))

    # TODO google static html only returns up to 10 movies..

    CLASS = AttrDict(timelist='lr_c_fcc',
                     time=re.compile('^(std-ts)|(lr_c_stnl)$'),
                     fmt='lr_c_vn')

    try:
        relevant_div = soup.find('div', {'data-date': True})

        # check date
        date_found = relevant_div.attrs['data-date']
        assert convert_date(date_found) == date, '{} != {}'.format(
            date_found, date)

        movies = relevant_div('div', {'data-movie-name': True})

    except (AssertionError, AttributeError) as e:
        # print(error_str.format(e)) # error msg only
        # movies = []                # no movies found for desired theater/date
        print(error_str.format('No matching theater on google'))
        raise (NoMoviesException(e))

    movie_names = [m.span.text for m in movies]

    movie_times = [  # nested times per format per movie
        [[time.text for time in timelst('div', class_=CLASS.time)]
         for timelst in m('div', class_=CLASS.timelist)] for m in movies
    ]

    movie_formats = [
        [
            getattr(timelst.find('div', class_=CLASS.fmt), 'text',
                    None)  # default if no format listed
            for timelst in m('div', class_=CLASS.timelist)
        ] for m in movies
    ]

    # flatten timelists for movies with multiple formats
    n_timelists_per_movie = [len(timelsts) for timelsts in movie_times]
    movie_names = list(
        chain.from_iterable(
            [name] * n for name, n in zip(movie_names, n_timelists_per_movie)))

    # annotate with format
    movie_times = [
        (times if fmt == 'Standard' or not times or not fmt else times +
         ['[ {} ]'.format(fmt)])
        for times, fmt in zip(flatten(movie_times), flatten(movie_formats))
    ]

    # no need to filter - tags only correspond to upcoming movie times
    return movie_names, movie_times