def movie_launcher(): """[summary: launch the script to scrap all information of imb movie platform] """ entity = Movie() # instantiation of the movie objet for step in range(1, 13117, 50): # we loop into 13117 movies that contains the imb # build the url of each page url = 'https://www.imdb.com/search/title/?at=0&num_votes=5000,&sort=user_rating,desc&start=' + str( step) + '&title_type=feature' res = requests.get(url) if res.ok: soup = BeautifulSoup(res.content.decode('utf-8', 'ignore'), 'html.parser') # we get the soup content tags = soup.find_all('div', attrs={'class': 'lister-item-content'}) for tag in tags: r = requests.get( _link(getlink(tag)) ) # 'https://www.imdb.com/title/tt0111161/?ref_=adv_li_tt' if r.ok: print('i') content = BeautifulSoup( r.text, 'html.parser') # we get the soup content manager = Manager( entity, [content, tag]) # initialization of Manager of movie manager.parse_json( entity) # convert the current objet to dictionnary manager.to_csv( ) # register the current entity of movie into the csv file time.sleep(4)
def test_getMovieById(self): if self.controller.getMovieById('11111111') is False: self.controller.addMovie('11111111', 'sfsdfsf', 'sdfdsfsfdsfdsfs', 'action') self.assertEqual( self.controller.getMovieById('11111111'), Movie('11111111', 'sfsdfsf', 'sdfdsfsfdsfdsfs', 'action'))
def updateMovieId(self, movieId, newId): ''' A functions that updates a movie's ID and also provides data validation :param movieId: integer :param newId: intege :return: True if the movie was updated, False, otherwise ''' try: movieId = int(movieId) except ValueError: raise ValueError("Error: \n Movie Id cannot be string") try: newId = int(newId) except ValueError: raise ValueError("Error: \n New Movie Id cannot be string") item = self.getItemById(movieId) newItem = self.getItemById(newId) if newItem: raise ValueError("Error:\n The given id is already taken") if item is not False: item = Movie(*item) item.id = newId return self.updateItemById(movieId, item)
def test_strMovie(self): self.movie.id = 3 self.movie.genre = 'action' self.movie.title = 'Titanic4' self.movie.description = 'supers' m = Movie(3, 'sd', 'sds', 'action') self.assertNotEqual(m, self.movie) m = self.movie self.assertEqual(m, self.movie) self.assertEqual(str(self.movie), '3, Titanic4, supers, action')
def test_search(self): c = MovieController(Repository("testSearch", "d")) c.addItem(Movie(3, "action", "sdf", "family")) self.assertEqual(c.searchMovieByGenre("action"), []) c.removeMovie(3) self.assertEqual(c.searchMovieByTitle("d"), []) self.assertEqual(c.searchMovieByDescription("descriere"), []) self.assertRaises(ValueError, c.searchMovieByDescription, "") self.assertRaises(ValueError, c.searchMovieByGenre, "") self.assertRaises(ValueError, c.searchMovieByTitle, "")
def addMovie(self, movieId, title, description, genre): ''' A function that validates and adds a movie to the repository :param movieId: integer :param title: string / cannot be empty :param description: string / cannot be empty :param genre: one of MOVIE_GENRE :return: True if the movie was added with success, raises errors if there are any validation problems ''' if MovieValidator.validate(movieId, title, description, genre): movieId = int(movieId) movie = Movie(movieId, title, description, genre) if self.repo.getItemById(int(movieId)) is not False: raise ValueError("Error:\n The given id is already taken") self.addItem(movie) return True
def updateMovieGenre(self, movieId, newGenre): ''' A functions that updates a movie's genre and also provides data validation :param movieId: integer :param newGenre: string // cannot be empty :return: True if the movie was updated, False, otherwise ''' if ',' in newGenre: raise ValueError( "Error:\n Parameters should not contain the ',' character") try: movieId = int(movieId) except ValueError: raise ValueError("Error: \n Movie Id cannot be string") if newGenre not in MOVIE_GENRE: raise ValueError("Error: \n Movie genre isn't valid") item = self.getItemById(movieId) if item is not False: item = Movie(*item) item.genre = newGenre return self.repo.updateItemById(movieId, item)
def setUp(self): self.movie = Movie(3, 'Titanic4', 'supers', 'action') self.repo = Repository('testMovies', 'testMovies') self.controller = MovieController(self.repo) if self.controller.getMovieById(77) is not False: self.controller.removeMovie(77) if self.controller.getMovieById(772) is not False: self.controller.removeMovie(772) if self.controller.getMovieById(773) is not False: self.controller.removeMovie(773) if self.controller.getMovieById(23) is False: self.controller.addMovie("23", "title", "description", "action") if self.controller.getMovieById('233333') is not False: self.controller.removeMovie('233333') if self.controller.getMovieById(20) is False: self.controller.addMovie("20", "title", "description", "action") if self.controller.getMovieById(200) is False: self.controller.addMovie("200", "title", "description", "action") if self.controller.getMovieById(90) is False: self.controller.addMovie("90", "title", "description", "action") if self.controller.getMovieById(123) is False: self.controller.addMovie("123", "title", "description", "action") if self.controller.getMovieById(234) is False: self.controller.addMovie("234", "title", "description", "action") if self.controller.getMovieById(33) is False: self.controller.addMovie("33", "title", "description", "action") if self.controller.getMovieById(90) is not False: self.controller.removeMovie(99) if self.controller.getMovieById(323) is False: self.controller.addMovie("323", "title", "description", "action") if self.controller.getMovieById(929) is not False: self.controller.removeMovie(929) if self.controller.getMovieById(33) is False: self.controller.addMovie("333", "title", "description", "action") if self.controller.getMovieById(939) is not False: self.controller.removeMovie(939)
def updateMovieName(self, movieId, movieTitle): ''' A functions that updates a movie's title and also provides data validation :param movieId: integer :param movieTitle: string // cannot be empty :return: True if the movie was updated, False, otherwise ''' if ',' in movieTitle: raise ValueError( "Error:\n Parameters should not contain the ',' character") try: movieId = int(movieId) except ValueError: raise ValueError("Error:\n Movie id must be integer") if len(movieTitle.strip()) == 0: raise ValueError("Error: \n Movie Title should not be empty") item = self.getItemById(movieId) if item is not False: item = Movie(*item) if len(movieTitle.strip()) > 0: item.title = movieTitle return self.updateItemById(int(movieId), item) else: raise ValueError("Error:\n Movie Id cannot be find")
def test_UpdateMovie5(self): self.controller.addItem(Movie(20000000000, "sds", "sdfs", "action"))
def test_UpdateMovie4(self): self.controller.addItem(Movie(20000000000, "sds", "sdfs", "action")) self.assertRaises(ValueError, self.controller.updateMovie, '23', '233333', "", "sdfsdfssfs", "action") self.controller.removeMovie(20000000000)
def test_GetAllMovies(self): repo2 = Repository('movieUpdateTest', "test2") movie = Movie("2", "223", '22320', 'action') controller2 = MovieController(repo2) l = controller2.getAllMovies() self.assertEqual(controller2.getAllMovies(), [movie])