def __init__(self): self.base_url = 'https://www.imdb.com/event/ev0000003/{}/1/' self.list_movie_awards = [ 'Best Motion Picture of the Year', 'Best Picture', 'Best Animated Feature Film', 'Best Adapted Screenplay', 'Best Achievement in Cinematography', 'Best Cinematography', 'Best Achievement in Film Editing', 'Best Film Editing', 'Best Achievement in Production Design', 'Best Achievement in Costume Design', 'Best Achievement in Visual Effects', 'Best Documentary Feature', 'Best Documentary Short Subject', 'Best Animated Feature Film' ] self.list_people_awards = [ 'Best Performance by an Actor in a Leading Role', 'Best Actor in a Leading Role', 'Best Performance by an Actress in a Leading Role', 'Best Actress in a Leading Role', 'Best Performance by an Actor in a Supporting Role', 'Best Actor in a Supporting Role', 'Best Performance by an Actress in a Supporting Role', 'Best Actress in a Supporting Role', 'Best Achievement in Directing', 'Best Director' ] self.movie_awards_idx = [] self.movie_winners = [] self.movie_nominees = [] self.credit_awards_idx = [] self.credit_winners = [] self.credit_nominees = [] self.db = db.DataBase('oscars') self.db.create_table() self.browser = webdriver.Chrome()
def __init__(self, base_url, idx): self.movie = {} self.person = {} self.credits = [] self.db = db.DataBase('data') self.db.create_table() self.page = BeautifulSoup( requests.get(base_url.format(f'{idx}'.rjust(12, '0'))).content, 'lxml')
sql.insert_movie_oscars(movie_nominee, 'nominee') except Exception as ex: print(ex) pass for credit_winner in self.credit_winners: try: sql.insert_credit_oscars(credit_winner, 'winner') except Exception as ex: print(ex) pass for credit_nominee in self.credit_nominees: try: sql.insert_credit_oscars(credit_nominee, 'nominee') except Exception as ex: print(ex) pass if __name__ == '__main__': sql = db.DataBase('oscars') scrapper = Scrapper() for year in range(2010, 2012): print(year) scrapper.oscars(year) sql.conn.commit() scrapper.browser.close() sql.close_db() print('done')
def __init__(self, url): self.db = db.DataBase('data') self.db.create_table() self.source = requests.get(url).text
elif self.person.get('actress') is not None: self.person['acting'] = int(self.person.get('actress')) else: pass self.db.insert_people(pidx, self.person) def movie_credits_commit(self, idx, credits): self.db.insert_credits(idx, credits) def commit(self): self.db.conn.commit() if __name__ == '__main__': sql = db.DataBase('data') sql.cursor.execute('SELECT id FROM Movies') data = sql.cursor.fetchall()[:5] for idx in data: scrapper = Scrapper('https://www.imdb.com/title/tt000000{}/', idx[0]) scrapper.details(idx[0]) scrapper.companies(idx[0]) scrapper.countries(idx[0]) scrapper.genres(idx[0]) scrapper.languages(idx[0]) scrapper.ratings(idx[0]) scrapper.commit() scrapper = Scrapper('https://www.imdb.com/title/tt{}/plotsummary', idx[0])