def delete(self): """ Deletes the list from Letterboxd. This cannot be undone! NOTE: after deleting a list, the instance will become unusable. """ if not util.yn( "Are you sure you want to delete the list? This cannot be undone!" ): return SESSION.request("POST", self.suburl_delete) self.soup = None
def new(cls, name, **kwargs): """ :: Alternative Constructor :: Creates a new list, as opposed to initialising this class regularly, which expects the name passed to already exist as a list on Letterboxd. This method makes a request first to create the list It then returns an instance in the regular way by calling the __init__ method(), which anticipates an existing list. Since we have already created the list, this is fine. Parameters: - name (str) - the name of the list Optional Parameters - tags (list) - e.g. [horror, 1980s] - public (bool) - ranked (bool) - description (str) - e.g. "These are my favourite films" - entries (list of dicts) - films in the list and any notes about them """ # Edge case - list name passed is not type string if not name or not isinstance(name, str): raise TypeError(f"name must be non-empty string, not {name}") # Default values for the list which will be used # in the event that the corresponding keyword arguments are not provided default_values = { 'tags': [], 'public': False, 'ranked': False, 'description': '', 'entries': [] } ## Add default values for any missing keys list_data = { attribute: value if attribute not in kwargs else kwargs[attribute] for attribute, value in default_values.items() } ## Add list_name and empty_id # (the id_ will be generated automatically when making the list creation request) list_data['name'] = name list_data['list_id'] = '' ## Convert the list_data into values which can be passed to a request # This involves changing the types of some of the values post_data = cls.make_post_data(list_data) ## Create list SESSION.request("POST", suburl=cls.save_url, data=post_data) # Since the list has been created, creating an instance should now work # the same way that it would with any existing list return cls(name)
def comment_soup(self): """ Returns the soup containing information about the list's existing comments.""" response = SESSION.request("GET", f"csi/list/{self._id}/comments-section/?", params={'esiAllowUser': True}) soup = make_soup(response) return soup
def __get_rating_soup(self): """ The film's rating info is loaded from a different page Hence we make the request to this separate page to get it r-type: BeautifulSoup """ suburl = f"csi/film/{self.path}/rating-histogram/" request = SESSION.request("GET", suburl) return make_soup(request)
def __get_info_soup(self): """ Go the main film_page and grab the soup. r-type: BeautifulSoup""" request = SESSION.request("GET", self.suburl) soup = make_soup(request) page_wrapper = soup.find('div', id='film-page-wrapper') return page_wrapper
def get_film_names(self): """ Returns each id in the film list together with the corresponding film_name. """ response = SESSION.request("GET", self.view_list) soup = make_soup(response) if not (page_navigator := soup.find('div', class_='pagination')): last_page = 1
def load(self, *args): """ Overload of load from parent class. Uses the edit view rather than standard list view. """ list_name = self.get_formatted_name() edit_url = f"{SESSION.username}/list/{list_name}/edit" request = SESSION.request("GET", edit_url) soup = make_soup(request) self.soup = soup
def get_blocked(): """ Returns a list of the users in your block list. NOTE: You can only see who you've blocked, hence there is no username argument for this function unlike following and followers. """ username = SESSION.username request = SESSION.request("GET", f"{username}/blocked/") soup = make_soup(request) return __get_people(soup)
def load(self, username): """ load an instance for an existing list, given its name. """ list_name = self.get_formatted_name() view_list = f"{username}/list/{list_name}/" # Make request to list url on Letterboxd response = SESSION.request("GET", view_list) soup = make_soup(response) self.soup = soup
def delete_post(post_id): user = SESSION.get(request.headers.get('Authorization')) if user is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) from model import Post post_exist = Post.find(post_id) if post_exist is None: abort(400, {'message': 'POST_NOT_FOUND'}) post_exist.delete() return "DELETED"
def delete_comment(post_id, comment_id): user = SESSION.get(request.headers.get('Authorization')) if user is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) from model import Comment comment_exist = Comment.find(comment_id) if comment_exist is None: abort(400, {'message': 'COMMENT_NOT_FOUND'}) comment_exist.delete() return "DELETED"
def delete_like(post_id, like_id): user = SESSION.get(request.headers.get('Authorization')) if user is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) from model import Like like_exist = Like.find(like_id) if like_exist is None: abort(400, {'message': 'NOT_YET_LIKED'}) like_exist.delete() return jsonify("Deleted")
def create_post(): user = SESSION.get(request.headers.get('Authorization')) if user is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) input_data = request.get_json() from model import Post post = Post() post.user_id = user.id post.content = input_data['content'] post.save() return jsonify(post.to_dict())
def update_post(post_id): user = SESSION.get(request.headers.get('Authorization')) if user is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) input_data = request.get_json() from model import Post post_exist = Post.find(post_id) if post_exist is None: abort(400, {'message': 'POST_NOT_FOUND'}) post_exist.content = input_data['content'] post_exist.save() return jsonify(post_exist.to_dict())
def get_post(post_id): user = SESSION.get(request.headers.get('Authorization')) if user is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) from model import Post post_exist = Post.find(post_id) if post_exist is None: abort(400, {'message': 'POST_NOT_FOUND'}) posts_response = { 'content': post_exist.content } return jsonify(posts_response)
def list_posts(): user = SESSION.get(request.headers.get('Authorization')) if user is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) from model import User posts_response = [] for post in User.find(user.id).posts: posts_response.append({ 'id': post.id, 'content': post.content }) return jsonify(posts_response)
def create_like(post_id): user = SESSION.get(request.headers.get('Authorization')) if user is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) # input_data = request.get_json() from model import Like, Post post_exist = Post.find(post_id) if post_exist is None: abort(400, {'message','POST_NOT_FOUND'}) like = Like() like.post_id = post_id like.user_id = user.id like.save() return "Liked."
def list_profiles(): profile = SESSION.get(request.headers.get('Authorization')) if profile is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) from model import Profile profile_response = [] for profile in Profile.query.all(): profile_response.append({ 'name': profile.name, 'dob': profile.dob, 'img': profile.img, 'star': profile.star, 'age': profile.age }) return jsonify(profile_response)
def create_comment(post_id): user = SESSION.get(request.headers.get('Authorization')) if user is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) input_data = request.get_json() from model import Comment, Post post_exist = Post.find(post_id) if post_exist is None: abort(400, {'message','POST_NOT_FOUND'}) comment = Comment() comment.post_id = post_id comment.user_id = user.id comment.body = input_data['body'] comment.save() return jsonify("Created.")
def get_likes(post_id): user = SESSION.get(request.headers.get('Authorization')) if user is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) from model import Like, Post p = Post.find(post_id) s = p.likes if p is None: abort(400, {'message': 'NO_LIKES_FOUND'}) likes_response = [] for i in s: print(i.user_id) likes_response.append({ 'user' : i.user_id }) return jsonify(likes_response)
def get_comments(post_id): user = SESSION.get(request.headers.get('Authorization')) if user is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) from model import Comment, Post p = Post.find(post_id) s = p.comments if p is None: abort(400, {'message': 'COMMENT_NOT_FOUND'}) comments_response = [] for i in s: print(i.body) comments_response.append({ 'comment' : i.body }) return jsonify(comments_response)
def get_page_of_film_names(self, page_num): """ Returns a dictionary key: film_id value: film_name for all the films on that page of the list. Example: {film_id: film_name} """ response = SESSION.request("GET", f"{self.view_list}page/{page_num}/") soup = make_soup(response) ul = soup.find('ul', class_='film-list') page_results = { int(li.find('div').get('data-film-id')): li.find('img').get('alt') for li in ul.find_all('li') } return page_results
class FilmRaters(): ## Max pages Letterboxd allows page_limit = 10 ## Ratings per page ratings_per_page = 500 ## Max results in total max_results = page_limit * ratings_per_page ## suburls for getting ratings suburl_rating_highest = 'ratings/' suburl_rating_lowest = 'ratings/by/entry-rating-lowest/' def __init__(self, film): """ Ensure film in correct format """ if not film or type(film) is not str: raise Exception(f"Invalid film name {film}, must be string") self.film = film.replace(' ', '-') ## Get information about the film's overall rating and ratings spread try: film_ratings = FilmInfo(self.film).ratings self.film_ratings = [v for k,v in sorted(film_ratings.items())] except: raise Exception("Failed to obtain film data for film:", self.film) def __repr__(self): """ Example: < FilmRaters Film: Citizen-kane > """ return f"< {self.__class__.__name__}\tFilm: {self.film.title()} >" def __len__(self): """ Returns the total number of ratings a film has. """ return sum(self.film_ratings) @property def suburl_film(self): return f"film/{self.film}/" def __get_route(self, target_rating, target_rating_count): """ Determines the method (in a non-pythonic sense) by which to sort the request url in order to make sure all results can be obtained. It will return the appropriate suburl, whether it be a regular or reverese (starting from the lowest rating) search. In addition, it will return the page number at which the rating starts. """ ## Total ratings lower and higher than the target rating. # For example if the target_rating is 3, # and there are 10 ratings of 1 and 5 ratings of 2, # the lower_ratings would be 15 lower_ratings = sum([v for v in self.film_ratings[0:target_rating-1]]) higher_ratings = sum([v for v in self.film_ratings[target_rating+1:len(self.film_ratings)]]) # Cannot get users with this rating because there are not enough pages to get to the middle # ratings. Since you can only view ratings from the top or bottom. if not any([i < self.max_results for i in (lower_ratings, higher_ratings)]): return False # There are less ratings above than below the target_rating # So we'll scrape by sorting ratings highest to lowest elif higher_ratings <= lower_ratings: page_start = ( higher_ratings // self.ratings_per_page ) + 1 page_end = ( ( higher_ratings + target_rating_count ) // self.ratings_per_page ) + 1 sort_by = self.suburl_rating_highest # The opposite is true: there are less ratings below than above # So we'll scrape by lowest to highest elif lower_ratings < higher_ratings: page_start = ( lower_ratings // self.ratings_per_page ) + 1 page_end = ( ( lower_ratings + target_rating_count ) // self.ratings_per_page ) + 1 sort_by = self.suburl_rating_lowest # Ensure that target_rating has not pushed us over maximum page limit if page_end > 10: page_end = 10 return sort_by, page_start, page_end def __call__(self, target_rating=4, limit=None): """ Returns a list of users who've rated a film a given rating. In some instances there are too many ratings to obtain middle-ground ratings like 5 or 6. This is because Letterboxd limits the number of pages to 10, and you can only sort by highest or lowest. In such instances, the function will simply return False. r-type: list (or False, if could not get results) """ ## Edge cases if type(target_rating) is not int or target_rating not in range(1,11): raise ValueError("Rating must be int value within inclusive range 1-10") target_rating_count = self.film_ratings[target_rating-1] ## Get route to getting results if not (route := self.__get_route(target_rating, target_rating_count)): # Could not get any results return False sort_by, page_start, page_end = route ## Begin scraping process users = [] # results list if not limit: limit = target_rating_count # loop will break at result limit suburl = f"{self.suburl_film}{sort_by}" page_num = page_start while page_num in range(page_start, page_end+1) and len(users) < limit: ## Make request to each page full_suburl = f"{suburl}page/{page_num}" request = SESSION.request("GET", full_suburl) soup = make_soup(request) ## Could not find tag associated with target_rating if not (target_rating := soup.find('span', class_=f'rated-large-{target_rating}')): if not users: # Failed to get any results raise Exception("Could not get results") else: # There is no section for the int(rating) on this page break # Parent tag that contains the information on users listed under each rating rating_group = target_rating.parent.parent page_results = [i.get('href')[1:-1] for i in rating_group.find_all('a', class_='avatar')] users += page_results page_num += 1
def get_followers(username=SESSION.username): """ Returns a list of the users a given user is followed by. """ request = SESSION.request("GET", f"{username}/followers/") soup = make_soup(request) return __get_people(soup)
def __call__(self, **kwargs): """ Returns a list of film_ids that correspond with the given search parameters. If not parameters are given, all film_ids in the watched_list will be returned Keyword Arguments: rated_only(bool) year(str or None): Options :- - 4 digits e.g. 1975 - 4 digits + s e.g. 1970s # functions as decade genre(str or None): Contraints :- - must be in genre_list service(str or None): Constraints :- - must be in service_list rating(float or None): Constraints :- - must be in inclusive range (0.5, 5) - decimal must be 0.5 or 0, like Letterboxd ratings sort_by(str): How do you want the results sorted? Constraints :- - must be in sort_list Options :- - name - popular - date-earliest (release date) - date-latest - rating (average rating) - rating-lowest - your-rating (session user's rating) - your-rating-lowest - entry-rating (username's rating) - entry-rating-lowest - shortest (film length) - longest filters(list): Constraints :- - must be in SESSION's filters_dict Options :- (updated: 2020-11-20) - show-liked OR hide-liked - show-logged OR hide-logged - show-reviewed OR hide-reviewed - show-watchlisted OR hide-watchlisted - show-shorts OR hide-shorts - hide-docs - hide-unreleased Example suburl in full: - username/films/ratings/ year(or decade)/2015/genre/horror/on/amazon-gbr/by/rating """ # Get valid filters for the request if 'filters' in kwargs: filters = self.get_valid_filters(kwargs.pop('filters')) else: filters = '' # Set cookie according to filters requests_jar = requests.cookies.RequestsCookieJar() requests_jar.set('filmFilter', filters) # Get the suburl for request suburl = self.build_suburl(**kwargs) film_ids = [] page_num = 1 while len(film_ids) % 18 == 0: print("page", page_num) request = SESSION.request("GET", suburl + f"page/{page_num}/", cookies=requests_jar) soup = make_soup(request) films_on_page = [ i.find('div').get('data-film-id') for i in soup.find_all('li', class_='poster-container') ] """ Edge case: the last page has exactly 18 films. The scraper goes to the next page which is blank, This means that the films_on_page list is empty, so can use this to break from the loop. """ if not films_on_page: break film_ids += films_on_page page_num += 1 return film_ids
def view_profile(): profile = SESSION.get(request.heades.get('Authorization')) if profile is None: abort(400, {'message': 'TOKEN_NOT_FOUND'}) from model import Profile profile_response = [] for profile in Profile.query.all(): profile_response.append({ 'id': profile.id, 'imageUrls': profile.imageUrls, 'name': profile.name, 'email': profile.email, 'password': profile.password, 'gender': profile.gender, 'dob': profile.dob, 'birth_time': profile.birth_time, 'birth_place': profile.birth_place, 'religion': profile.religion, 'caste': profile.caste, 'subcaste': profile.subcaste, 'gothram': profile.gothram, 'star': profile.star, 'qualification': profile.qualification, 'job': profile.job, 'workplace': profile.workplace, 'income': profile.income, 'height': profile.height, 'weight': profile.weight, 'mother_tongue': profile.mother_tongue, 'known_language': profile.known_language, 'nativity': profile.nativity, 'marital_status': profile.marital_status, 'talents': profile.talents, 'hobbies': profile.hobbies, 'vehicle_driving': profile.vehicle_driving, 'disabilities': profile.disabilities, 'box11': profile.box11, 'box12': profile.box12, 'box13': profile.box13, 'box14': profile.box14, 'box15': profile.box15, 'box16': profile.box16, 'box17': profile.box17, 'box18': profile.box18, 'box19': profile.box19, 'box110': profile.box110, 'box111': profile.box111, 'box112': profile.box112, 'box21': profile.box21, 'box22': profile.box22, 'box23': profile.box23, 'box24': profile.box24, 'box25': profile.box25, 'box26': profile.box26, 'box27': profile.box27, 'box28': profile.box28, 'box29': profile.box29, 'box210': profile.box210, 'box211': profile.box211, 'box212': profile.box212, 'father_name': profile.father_name, 'father_occupation': profile.father_occupation, 'mother_name': profile.mother_name, 'mother_occupation': profile.mother_occupation, 'contact1': profile.contact1, 'contact2': profile.contact2, 'sibiling_count': profile.sibiling_count, 'family_status': profile.family_status, 'properties': profile.properties, 'anydetails': profile.anydetails, 'expected_qualification': profile.expected_qualification, 'expected_place': profile.expected_place, 'expected_income': profile.expected_income, 'expected_caste': profile.expected_caste, 'expected_subcaste': profile.expected_subcaste, 'age_difference': profile.age_difference, 'expected_height': profile.expected_height, 'expected_weight': profile.expected_weight, 'expectations': profile.expectations }) return jsonify(profile_response)
def add_comment(self, comment): """ Adds a comment to the list. """ SESSION.request("POST", self.add_comment_url, data={'comment': comment})
def get_comment_text(suburl): """ Returns the body of the comment. """ response = SESSION.request("GET", suburl) return make_soup(response).get_text()
if not (comments := self.comments): raise Exception("No comments to delete!") if type(comment_id) not in (str, int): raise TypeError( f"Invalid type for comment_id: {type(comment_id)}. Should be int" ) if isinstance(comment_id, str): comment_id = int(comment_id) if comment_id not in [i['id'] for i in comments]: raise Exception(f"Unable to locate id: {comment_id}") delete_comment_url = f"ajax/filmListComment:{comment_id}/delete-comment/" # Make post request to delete comment SESSION.request("POST", suburl=delete_comment_url) """ ** Film names ** """ def get_page_of_film_names(self, page_num): """ Returns a dictionary key: film_id value: film_name for all the films on that page of the list. Example: {film_id: film_name} """ response = SESSION.request("GET", f"{self.view_list}page/{page_num}/") soup = make_soup(response)