예제 #1
0
 def delete(self):
     """ Deletes the list from Letterboxd. This cannot be undone!
     NOTE: after deleting a list, the instance will become unusable. 
     """
     if not util.yn(
             "Are you sure you want to delete the list? This cannot be undone!"
     ):
         return
     SESSION.request("POST", self.suburl_delete)
     self.soup = None
예제 #2
0
    def new(cls, name, **kwargs):
        """
        :: Alternative Constructor ::

        Creates a new list, as opposed to initialising this class
        regularly, which expects the name passed to already exist as a list on Letterboxd.
        This method makes a request first to create the list
        It then returns an instance in the regular way by calling the __init__ method(),
        which anticipates an existing list. Since we have already created the list, this is fine.
        Parameters:
            - name (str) - the name of the list 
        
        Optional Parameters
            - tags (list) - e.g. [horror, 1980s]
            - public (bool)
            - ranked (bool)
            - description (str) - e.g. "These are my favourite films"
            - entries (list of dicts) - films in the list and any notes about them
        """

        # Edge case - list name passed is not type string
        if not name or not isinstance(name, str):
            raise TypeError(f"name must be non-empty string, not {name}")

        # Default values for the list which will be used
        # in the event that the corresponding keyword arguments are not provided
        default_values = {
            'tags': [],
            'public': False,
            'ranked': False,
            'description': '',
            'entries': []
        }

        ## Add default values for any missing keys
        list_data = {
            attribute: value if attribute not in kwargs else kwargs[attribute]
            for attribute, value in default_values.items()
        }

        ## Add list_name and empty_id
        # (the id_ will be generated automatically when making the list creation request)
        list_data['name'] = name
        list_data['list_id'] = ''

        ## Convert the list_data into values which can be passed to a request
        # This involves changing the types of some of the values
        post_data = cls.make_post_data(list_data)

        ## Create list
        SESSION.request("POST", suburl=cls.save_url, data=post_data)

        # Since the list has been created, creating an instance should now work
        # the same way that it would with any existing list
        return cls(name)
예제 #3
0
 def comment_soup(self):
     """ Returns the soup containing information about the list's existing comments."""
     response = SESSION.request("GET",
                                f"csi/list/{self._id}/comments-section/?",
                                params={'esiAllowUser': True})
     soup = make_soup(response)
     return soup
예제 #4
0
 def __get_rating_soup(self):
     """ The film's rating info is loaded from a different page
     Hence we make the request to this separate page to get it
     r-type: BeautifulSoup """
     suburl = f"csi/film/{self.path}/rating-histogram/"
     request = SESSION.request("GET", suburl)
     return make_soup(request)
예제 #5
0
 def __get_info_soup(self):
     """ Go the main film_page and grab the soup. 
     r-type: BeautifulSoup"""
     request = SESSION.request("GET", self.suburl)
     soup = make_soup(request)
     page_wrapper = soup.find('div', id='film-page-wrapper')
     return page_wrapper
예제 #6
0
 def load(self, *args):
     """ Overload of load from parent class.
     Uses the edit view rather than standard list view. """
     list_name = self.get_formatted_name()
     edit_url = f"{SESSION.username}/list/{list_name}/edit"
     request = SESSION.request("GET", edit_url)
     soup = make_soup(request)
     self.soup = soup
예제 #7
0
    def get_film_names(self):
        """ Returns each id in the film list together with the corresponding film_name. """

        response = SESSION.request("GET", self.view_list)
        soup = make_soup(response)

        if not (page_navigator := soup.find('div', class_='pagination')):
            last_page = 1
def get_blocked():
    """ Returns a list of the users in your block list.
    NOTE: You can only see who you've blocked, hence there is no
    username argument for this function unlike following and followers. """
    username = SESSION.username
    request = SESSION.request("GET", f"{username}/blocked/")
    soup = make_soup(request)
    return __get_people(soup)
예제 #9
0
    def load(self, username):
        """ load an instance for an existing list, given its name. """
        list_name = self.get_formatted_name()
        view_list = f"{username}/list/{list_name}/"

        # Make request to list url on Letterboxd
        response = SESSION.request("GET", view_list)
        soup = make_soup(response)
        self.soup = soup
예제 #10
0
    def get_page_of_film_names(self, page_num):
        """ Returns a dictionary 
            key: film_id
            value: film_name
        for all the films on that page of the list. 
            
        Example: {film_id: film_name}
        """
        response = SESSION.request("GET", f"{self.view_list}page/{page_num}/")
        soup = make_soup(response)

        ul = soup.find('ul', class_='film-list')
        page_results = {
            int(li.find('div').get('data-film-id')): li.find('img').get('alt')
            for li in ul.find_all('li')
        }
        return page_results
예제 #11
0
 def add_comment(self, comment):
     """ Adds a comment to the list. """
     SESSION.request("POST",
                     self.add_comment_url,
                     data={'comment': comment})
예제 #12
0
 def get_comment_text(suburl):
     """ Returns the body of the comment. """
     response = SESSION.request("GET", suburl)
     return make_soup(response).get_text()
예제 #13
0
        if not (comments := self.comments):
            raise Exception("No comments to delete!")
        if type(comment_id) not in (str, int):
            raise TypeError(
                f"Invalid type for comment_id: {type(comment_id)}. Should be int"
            )
        if isinstance(comment_id, str):
            comment_id = int(comment_id)

        if comment_id not in [i['id'] for i in comments]:
            raise Exception(f"Unable to locate id: {comment_id}")

        delete_comment_url = f"ajax/filmListComment:{comment_id}/delete-comment/"

        # Make post request to delete comment
        SESSION.request("POST", suburl=delete_comment_url)

    """
    ** Film names **
    """

    def get_page_of_film_names(self, page_num):
        """ Returns a dictionary 
            key: film_id
            value: film_name
        for all the films on that page of the list. 
            
        Example: {film_id: film_name}
        """
        response = SESSION.request("GET", f"{self.view_list}page/{page_num}/")
        soup = make_soup(response)
예제 #14
0
def get_followers(username=SESSION.username):
    """ Returns a list of the users a given user is followed by. """
    request = SESSION.request("GET", f"{username}/followers/")
    soup = make_soup(request)
    return __get_people(soup)
예제 #15
0
    def __call__(self, **kwargs):
        """
        Returns a list of film_ids that correspond with the given search parameters.

        If not parameters are given, all film_ids in the watched_list will be returned

        Keyword Arguments:

            rated_only(bool)

            year(str or None):
                Options :-
                - 4 digits e.g. 1975
                - 4 digits + s e.g. 1970s # functions as decade

            genre(str or None):
                Contraints :-
                - must be in genre_list

            service(str or None):
                Constraints :-
                - must be in service_list

            rating(float or None):
                Constraints :-
                    - must be in inclusive range (0.5, 5)
                    - decimal must be 0.5 or 0, like Letterboxd ratings

            sort_by(str):
                How do you want the results sorted?
                Constraints :-
                - must be in sort_list
                Options :-
                - name
                - popular
                - date-earliest (release date)
                - date-latest
                - rating (average rating)
                - rating-lowest
                - your-rating (session user's rating)
                - your-rating-lowest
                - entry-rating (username's rating)
                - entry-rating-lowest
                - shortest (film length)
                - longest

            filters(list):
                Constraints :-
                - must be in SESSION's filters_dict
                Options :- (updated: 2020-11-20)
                - show-liked OR hide-liked
                - show-logged OR hide-logged
                - show-reviewed OR hide-reviewed
                - show-watchlisted OR hide-watchlisted
                - show-shorts OR hide-shorts
                - hide-docs
                - hide-unreleased

        Example suburl in full:
        - username/films/ratings/   year(or decade)/2015/genre/horror/on/amazon-gbr/by/rating
        """

        # Get valid filters for the request
        if 'filters' in kwargs:
            filters = self.get_valid_filters(kwargs.pop('filters'))
        else:
            filters = ''

        # Set cookie according to filters
        requests_jar = requests.cookies.RequestsCookieJar()
        requests_jar.set('filmFilter', filters)

        # Get the suburl for request
        suburl = self.build_suburl(**kwargs)

        film_ids = []
        page_num = 1
        while len(film_ids) % 18 == 0:
            print("page", page_num)
            request = SESSION.request("GET",
                                      suburl + f"page/{page_num}/",
                                      cookies=requests_jar)
            soup = make_soup(request)

            films_on_page = [
                i.find('div').get('data-film-id')
                for i in soup.find_all('li', class_='poster-container')
            ]
            """ Edge case: the last page has exactly 18 films.
            The scraper goes to the next page which is blank, 
            This means that the films_on_page list is empty, so can use this to break from the loop. """
            if not films_on_page:
                break

            film_ids += films_on_page
            page_num += 1
        return film_ids
class FilmRaters():
    
    ## Max pages Letterboxd allows
    page_limit = 10

    ## Ratings per page
    ratings_per_page = 500

    ## Max results in total
    max_results = page_limit * ratings_per_page

    ## suburls for getting ratings
    suburl_rating_highest = 'ratings/'
    suburl_rating_lowest = 'ratings/by/entry-rating-lowest/'

    def __init__(self, film):
        """ Ensure film in correct format """
        if not film or type(film) is not str:
            raise Exception(f"Invalid film name {film}, must be string")
        self.film = film.replace(' ', '-')

        ## Get information about the film's overall rating and ratings spread
        try:
            film_ratings = FilmInfo(self.film).ratings
            self.film_ratings = [v for k,v in sorted(film_ratings.items())]
        except:
            raise Exception("Failed to obtain film data for film:", self.film)

    def __repr__(self):
        """ Example:
            < FilmRaters  Film: Citizen-kane > """
        return f"< {self.__class__.__name__}\tFilm: {self.film.title()} >"

    def __len__(self):
        """ Returns the total number of ratings a film has. """
        return sum(self.film_ratings)

    @property
    def suburl_film(self):
        return f"film/{self.film}/"

    def __get_route(self, target_rating, target_rating_count):
        """ Determines the method (in a non-pythonic sense) by which
        to sort the request url in order to make sure all results 
        can be obtained.
        It will return the appropriate suburl, whether it be
        a regular or reverese (starting from the lowest rating) search.
        In addition, it will return the page number at which
        the rating starts. """

        ## Total ratings lower and higher than the target rating.
        # For example if the target_rating is 3, 
        # and there are 10 ratings of 1 and 5 ratings of 2,
        # the lower_ratings would be 15
        lower_ratings = sum([v for v in self.film_ratings[0:target_rating-1]])      
        higher_ratings = sum([v for v in self.film_ratings[target_rating+1:len(self.film_ratings)]])

        # Cannot get users with this rating because there are not enough pages to get to the middle
        # ratings. Since you can only view ratings from the top or bottom.
        if not any([i < self.max_results for i in (lower_ratings, higher_ratings)]):
            return False

        # There are less ratings above than below the target_rating
        # So we'll scrape by sorting ratings highest to lowest
        elif higher_ratings <= lower_ratings:
            page_start = ( higher_ratings // self.ratings_per_page ) + 1
            page_end = ( ( higher_ratings + target_rating_count ) // self.ratings_per_page ) + 1
            sort_by = self.suburl_rating_highest
        
        # The opposite is true: there are less ratings below than above
        # So we'll scrape by lowest to highest
        elif lower_ratings < higher_ratings:
            page_start = ( lower_ratings // self.ratings_per_page ) + 1
            page_end = ( ( lower_ratings + target_rating_count ) // self.ratings_per_page ) + 1
            sort_by = self.suburl_rating_lowest

        # Ensure that target_rating has not pushed us over maximum page limit
        if page_end > 10: page_end = 10
        
        return sort_by, page_start, page_end

    def __call__(self, target_rating=4, limit=None):
        """ Returns a list of users who've rated a film
        a given rating.
        In some instances there are too many ratings to obtain middle-ground
        ratings like 5 or 6. This is because Letterboxd limits the number of pages
        to 10, and you can only sort by highest or lowest.
        In such instances, the function will simply return False. 
        
        r-type: list (or False, if could not get results)
        """

        ## Edge cases
        if type(target_rating) is not int or target_rating not in range(1,11):
            raise ValueError("Rating must be int value within inclusive range 1-10")

        target_rating_count = self.film_ratings[target_rating-1]

        ## Get route to getting results
        if not (route := self.__get_route(target_rating, target_rating_count)):
            # Could not get any results
            return False
        sort_by, page_start, page_end = route

        ## Begin scraping process
        users = [] # results list
        if not limit: limit = target_rating_count # loop will break at result limit
        suburl = f"{self.suburl_film}{sort_by}"
        page_num = page_start
        while page_num in range(page_start, page_end+1) and len(users) < limit:

            ## Make request to each page
            full_suburl = f"{suburl}page/{page_num}"
            request = SESSION.request("GET", full_suburl)
            soup = make_soup(request)

            ## Could not find tag associated with target_rating
            if not (target_rating := soup.find('span', class_=f'rated-large-{target_rating}')):
                if not users:
                    # Failed to get any results
                    raise Exception("Could not get results")
                else:
                    # There is no section for the int(rating) on this page
                    break
            
            # Parent tag that contains the information on users listed under each rating
            rating_group = target_rating.parent.parent
            page_results = [i.get('href')[1:-1] for i in rating_group.find_all('a', class_='avatar')]

            users += page_results
            page_num += 1