def download_movie(self, thread_id, url, movie): """ Method to get all songs from a movie website. :param thread_id: As usual :param url: URL of movie :param movie: Name of movie """ movie_website = self.start_url + url raw_html = open_request(movie_website, delayed=self.delay_request) song_with_url = self.get_songs_with_url(raw_html) # No new songs added if db_operations.number_of_songs(self.start_url, url) == len(song_with_url): db_operations.update_last_crawl(self.start_url, url) print_util.print_warning( '{0} --> Movie {1} contains no new songs. Skipping.'.format( thread_id, movie)) return # Add all songs for song_url, song in song_with_url: self.task_queue.put({ 'type': 2, 'url': song_url, 'song': song, 'movie': movie, 'movie_url': url, 'n_errors': 0 })
def get_song(self, thread_id, url, song, artist): """ Get song from a URL :param thread_id: As usual :param url: As usual :param song: As usual :param artist: Artist of song """ if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} --> Song {1} already exists. Skipping.'.format( thread_id, song)) complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) album, lyrics, lyricist, additional_artists = self.get_song_details( raw_html ) # Note: additional_artists are artist(s) featured in the song db_operations.save(song, url, album, url, self.start_url, lyrics, additional_artists + [ artist, ], [ artist, ], lyricist)
def get_artist(self, thread_id, url, artist): """ Get songs for artist from URL in two parts: 1. Get songs from first page (:param url) 2. Add all other pages to task queue :param thread_id: :param url: :param artist: """ complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) pages = self.get_pages_for_artist(raw_html) # Add all songs from current page for song_url, song in self.get_songs(raw_html): self.task_queue.put({ 'type': 3, 'url': song_url, 'song': song, 'artist': artist, 'n_errors': 0 }) # Add rest of pages in task queue for page in pages[1:]: self.task_queue.put({ 'type': 2, 'url': page, 'artist': artist, 'n_errors': 0 })
def get_song(self, thread_id, url, song, album, album_url, artist): """ Method to get details of a song and save in database :param thread_id: As usual :param url: As usual :param song: Song title :param album: Album name :param album_url: URL of album (same as artist) on the website :param artist: As usual """ if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} -> Song {1} already exists. Skipping'.format( thread_id, song)) return song_website = self.start_url + url song_html = open_request(song_website, delayed=self.delay_request) lyrics = self.get_song_details(song_html) db_operations.save(song=song, song_url=url, movie=album, movie_url=album_url, start_url=self.start_url, lyrics=lyrics, singers=artist, director=artist, lyricist=artist)
def get_song(self, thread_id, url, song, album, album_url, artist): """ Method to get details of a song and save in database :param thread_id: As usual :param url: As usual :param song: Song title :param album: Album name :param album_url: URL of album (same as artist) on the website :param artist: As usual """ if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} -> Song {1} already exists. Skipping'.format( thread_id, song ) ) return song_website = self.start_url + url song_html = open_request(song_website, delayed=self.delay_request) lyrics = self.get_song_details(song_html) db_operations.save( song=song, song_url=url, movie=album, movie_url=album_url, start_url=self.start_url, lyrics=lyrics, singers=artist, director=artist, lyricist=artist )
def download_song(self, thread_id, url, song, movie, movie_url): """ Method to get song details from website. :param thread_id: As usual :param url: URL of song :param song: Name of song :param movie: Name of movie :param movie_url: URL of movie """ # Song already exists if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} -> Song {1} already exists. Skipping.'.format( thread_id, song)) return # Get HTML song_url_ = self.start_url + url song_html = open_request(song_url_, delayed=self.delay_request) lyrics, singers, music_by, lyricist = self.get_song_details(song_html) # Save in database db_operations.save(song=song, song_url=url, movie=movie, movie_url=movie_url, start_url=self.start_url, lyrics=lyrics, singers=singers, director=music_by, lyricist=lyricist)
def get_song(self, thread_id, url, song, artist): """ Get song from a URL :param thread_id: As usual :param url: As usual :param song: As usual :param artist: Artist of song """ if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} --> Song {1} already exists. Skipping.'.format( thread_id, song ) ) complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) album, lyrics, lyricist, additional_artists = self.get_song_details( raw_html ) # Note: additional_artists are artist(s) featured in the song db_operations.save( song, url, album, url, self.start_url, lyrics, additional_artists + [artist, ], [artist, ], lyricist )
def get_artist_albums(self, thread_id, url, artist): """ Method to get all songs for an artist :param thread_id: As usual :param url: As usual :param artist: Artist name """ website = self.start_url + '/' + url raw_html = open_request(website, delayed=self.delay_request) albums_with_songs = self.get_albums_with_songs(raw_html) for album, song_with_url in albums_with_songs: for song_url, song in song_with_url: self.task_queue.put( { 'type': 2, 'song': song, 'url': song_url, 'album': album, 'album_url': url, 'artist': artist, 'n_errors': 0 } )
def get_songs_from_page(self, thread_id, url, artist): """ Get songs from other pages of artist :param thread_id: As usual :param url: As usual :param artist: As usual """ complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) for song_url, song in self.get_songs(raw_html): self.task_queue.put({ 'type': 3, 'url': song_url, 'song': song, 'artist': artist, 'n_errors': 0 })
def get_artists(self, thread_id, url): """ Method to get artists from a URL :param thread_id: As usual :param url: As usual """ complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) artists_with_url = self.get_artist_with_url(raw_html) for artist_url, artist in artists_with_url: self.task_queue.put({ 'type': 1, 'url': artist_url, 'artist': artist, 'n_errors': 0 })
def get_movies(self, thread_id, url): # Get website HTML """ Get movie list from website :param thread_id: As usual :param url: URL of website from which movies are to be fetched """ website = self.start_url + url raw_html = open_request(website, delayed=self.delay_request) # Add movies to task queue movies_with_url = self.get_movies_with_url(raw_html) for url, movie in movies_with_url: self.task_queue.put({ 'type': 1, 'url': url, 'movie': movie, 'n_errors': 0 })
def get_songs_from_page(self, thread_id, url, artist): """ Get songs from other pages of artist :param thread_id: As usual :param url: As usual :param artist: As usual """ complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) for song_url, song in self.get_songs(raw_html): self.task_queue.put( { 'type': 3, 'url': song_url, 'song': song, 'artist': artist, 'n_errors': 0 } )
def get_artists(self, thread_id, url): """ Method to get artists with URL from a web address :param thread_id: As usual :param url: As usual """ website = self.start_url + url raw_html = open_request(website, delayed=self.delay_request) artists_with_url = self.get_artists_with_url(raw_html) for artist_url, artist in artists_with_url: self.task_queue.put( { 'type': 1, 'url': artist_url, 'artist': artist, 'n_errors': 0 } )
def get_movies(self, thread_id, url): # Get website HTML """ Get movie list from website :param thread_id: As usual :param url: URL of website from which movies are to be fetched """ website = self.start_url + url raw_html = open_request(website, delayed=self.delay_request) # Add movies to task queue movies_with_url = self.get_movies_with_url(raw_html) for url, movie in movies_with_url: self.task_queue.put( { 'type': 1, 'url': url, 'movie': movie, 'n_errors': 0 } )
def get_artist_albums(self, thread_id, url, artist): """ Method to get all songs for an artist :param thread_id: As usual :param url: As usual :param artist: Artist name """ website = self.start_url + '/' + url raw_html = open_request(website, delayed=self.delay_request) albums_with_songs = self.get_albums_with_songs(raw_html) for album, song_with_url in albums_with_songs: for song_url, song in song_with_url: self.task_queue.put({ 'type': 2, 'song': song, 'url': song_url, 'album': album, 'album_url': url, 'artist': artist, 'n_errors': 0 })
def download_song(self, thread_id, url, song, movie, movie_url): """ Method to get song details from website. :param thread_id: As usual :param url: URL of song :param song: Name of song :param movie: Name of movie :param movie_url: URL of movie """ # Song already exists if db_operations.exists_song(self.start_url, url): print_util.print_warning( '{0} -> Song {1} already exists. Skipping.'.format( thread_id, song ) ) return # Get HTML song_url_ = self.start_url + url song_html = open_request(song_url_, delayed=self.delay_request) lyrics, singers, music_by, lyricist = self.get_song_details(song_html) # Save in database db_operations.save( song=song, song_url=url, movie=movie, movie_url=movie_url, start_url=self.start_url, lyrics=lyrics, singers=singers, director=music_by, lyricist=lyricist )
def download_movie(self, thread_id, url, movie): """ Method to get all songs from a movie website. :param thread_id: As usual :param url: URL of movie :param movie: Name of movie """ movie_website = self.start_url + url raw_html = open_request(movie_website, delayed=self.delay_request) song_with_url = self.get_songs_with_url(raw_html) # No new songs added if db_operations.number_of_songs(self.start_url, url) == len( song_with_url): db_operations.update_last_crawl(self.start_url, url) print_util.print_warning( '{0} --> Movie {1} contains no new songs. Skipping.'.format( thread_id, movie ) ) return # Add all songs for song_url, song in song_with_url: self.task_queue.put( { 'type': 2, 'url': song_url, 'song': song, 'movie': movie, 'movie_url': url, 'n_errors': 0 } )
def get_artist(self, thread_id, url, artist): """ Get songs for artist from URL in two parts: 1. Get songs from first page (:param url) 2. Add all other pages to task queue :param thread_id: :param url: :param artist: """ complete_url = self.start_url + url raw_html = open_request(complete_url, delayed=self.delay_request) pages = self.get_pages_for_artist(raw_html) # Add all songs from current page for song_url, song in self.get_songs(raw_html): self.task_queue.put( { 'type': 3, 'url': song_url, 'song': song, 'artist': artist, 'n_errors': 0 } ) # Add rest of pages in task queue for page in pages[1:]: self.task_queue.put( { 'type': 2, 'url': page, 'artist': artist, 'n_errors': 0 } )