コード例 #1
0
    def get_song(self, thread_id, url, song, artist):
        """
        Get song from a URL
        :param thread_id: As usual
        :param url: As usual
        :param song: As usual
        :param artist: Artist of song
        """
        if db_operations.exists_song(self.start_url, url):
            print_util.print_warning(
                '{0} --> Song {1} already exists. Skipping.'.format(
                    thread_id, song))
        complete_url = self.start_url + url
        raw_html = open_request(complete_url, delayed=self.delay_request)

        album, lyrics, lyricist, additional_artists = self.get_song_details(
            raw_html
        )  # Note: additional_artists are artist(s) featured in the song

        db_operations.save(song, url, album, url, self.start_url, lyrics,
                           additional_artists + [
                               artist,
                           ], [
                               artist,
                           ], lyricist)
コード例 #2
0
    def download_song(self, thread_id, url, song, movie, movie_url):
        """
        Method to get song details from website.
        :param thread_id: As usual
        :param url: URL of song
        :param song: Name of song
        :param movie: Name of movie
        :param movie_url: URL of movie
        """
        # Song already exists
        if db_operations.exists_song(self.start_url, url):
            print_util.print_warning(
                '{0} -> Song {1} already exists. Skipping.'.format(
                    thread_id, song))
            return

        # Get HTML
        song_url_ = self.start_url + url
        song_html = open_request(song_url_, delayed=self.delay_request)

        lyrics, singers, music_by, lyricist = self.get_song_details(song_html)

        # Save in database
        db_operations.save(song=song,
                           song_url=url,
                           movie=movie,
                           movie_url=movie_url,
                           start_url=self.start_url,
                           lyrics=lyrics,
                           singers=singers,
                           director=music_by,
                           lyricist=lyricist)
コード例 #3
0
    def get_song(self, thread_id, url, song, album, album_url, artist):
        """
        Method to get details of a song and save in database
        :param thread_id: As usual
        :param url: As usual
        :param song: Song title
        :param album: Album name
        :param album_url: URL of album (same as artist) on the website
        :param artist: As usual
        """
        if db_operations.exists_song(self.start_url, url):
            print_util.print_warning(
                '{0} -> Song {1} already exists. Skipping'.format(
                    thread_id, song))
            return

        song_website = self.start_url + url
        song_html = open_request(song_website, delayed=self.delay_request)
        lyrics = self.get_song_details(song_html)
        db_operations.save(song=song,
                           song_url=url,
                           movie=album,
                           movie_url=album_url,
                           start_url=self.start_url,
                           lyrics=lyrics,
                           singers=artist,
                           director=artist,
                           lyricist=artist)
コード例 #4
0
ファイル: base_crawler.py プロジェクト: iiitv/lyrics-crawler
    def get_song(self, thread_id, url, song, artist):
        """
        Get song from a URL
        :param thread_id: As usual
        :param url: As usual
        :param song: As usual
        :param artist: Artist of song
        """
        if db_operations.exists_song(self.start_url, url):
            print_util.print_warning(
                '{0} --> Song {1} already exists. Skipping.'.format(
                    thread_id,
                    song
                )
            )
        complete_url = self.start_url + url
        raw_html = open_request(complete_url, delayed=self.delay_request)

        album, lyrics, lyricist, additional_artists = self.get_song_details(
            raw_html
        )  # Note: additional_artists are artist(s) featured in the song

        db_operations.save(
            song,
            url,
            album,
            url,
            self.start_url,
            lyrics,
            additional_artists + [artist, ],
            [artist, ],
            lyricist
        )
コード例 #5
0
    def download_movie(self, thread_id, url, movie):
        """
        Method to get all songs from a movie website.
        :param thread_id: As usual
        :param url: URL of movie
        :param movie: Name of movie
        """
        movie_website = self.start_url + url
        raw_html = open_request(movie_website, delayed=self.delay_request)

        song_with_url = self.get_songs_with_url(raw_html)

        # No new songs added
        if db_operations.number_of_songs(self.start_url,
                                         url) == len(song_with_url):
            db_operations.update_last_crawl(self.start_url, url)
            print_util.print_warning(
                '{0} --> Movie {1} contains no new songs. Skipping.'.format(
                    thread_id, movie))
            return

        # Add all songs
        for song_url, song in song_with_url:
            self.task_queue.put({
                'type': 2,
                'url': song_url,
                'song': song,
                'movie': movie,
                'movie_url': url,
                'n_errors': 0
            })
コード例 #6
0
ファイル: base_crawler.py プロジェクト: iiitv/lyrics-crawler
    def get_song(self, thread_id, url, song, album, album_url, artist):
        """
        Method to get details of a song and save in database
        :param thread_id: As usual
        :param url: As usual
        :param song: Song title
        :param album: Album name
        :param album_url: URL of album (same as artist) on the website
        :param artist: As usual
        """
        if db_operations.exists_song(self.start_url, url):
            print_util.print_warning(
                '{0} -> Song {1} already exists. Skipping'.format(
                    thread_id,
                    song
                )
            )
            return

        song_website = self.start_url + url
        song_html = open_request(song_website, delayed=self.delay_request)
        lyrics = self.get_song_details(song_html)
        db_operations.save(
            song=song,
            song_url=url,
            movie=album,
            movie_url=album_url,
            start_url=self.start_url,
            lyrics=lyrics,
            singers=artist,
            director=artist,
            lyricist=artist
        )
コード例 #7
0
    def threader(self, thread_id):
        """
        Worker function.
        :return:
        :param thread_id: Assigned ID of thread.
        """
        while not self.task_queue.empty():  # While there are any tasks

            task = self.task_queue.get()  # Get one of them

            if task['n_errors'] >= self.max_allowed_errors:  # Too many errors
                print_util.print_warning(
                    '{0} --> Too many errors in task {1}. Skipping.'.format(
                        thread_id, task))
                continue

            print_util.print_info('{0} --> New task : {1}'.format(
                thread_id, task))  # Log the task

            try:

                # Call corresponding function
                if task['type'] == 0:
                    self.get_movies(thread_id, task['url'])
                elif task['type'] == 1:
                    self.download_movie(thread_id, task['url'], task['movie'])
                elif task['type'] == 2:
                    self.download_song(thread_id, task['url'], task['song'],
                                       task['movie'], task['movie_url'])

                print_util.print_info('{0} --> Task complete : {1}'.format(
                    thread_id, task), Colors.GREEN)  # Log success

            except Exception as e:  # Some error
                print_util.print_error('{0} --> Error : {1}'.format(
                    thread_id, e))  # Log it
                task['n_errors'] += 1  # Increment number of errors
                self.task_queue.put(task)  # Put back in queue
コード例 #8
0
ファイル: base_crawler.py プロジェクト: iiitv/lyrics-crawler
    def download_song(self, thread_id, url, song, movie, movie_url):
        """
        Method to get song details from website.
        :param thread_id: As usual
        :param url: URL of song
        :param song: Name of song
        :param movie: Name of movie
        :param movie_url: URL of movie
        """
        # Song already exists
        if db_operations.exists_song(self.start_url, url):
            print_util.print_warning(
                '{0} -> Song {1} already exists. Skipping.'.format(
                    thread_id,
                    song
                )
            )
            return

        # Get HTML
        song_url_ = self.start_url + url
        song_html = open_request(song_url_, delayed=self.delay_request)

        lyrics, singers, music_by, lyricist = self.get_song_details(song_html)

        # Save in database
        db_operations.save(
            song=song,
            song_url=url,
            movie=movie,
            movie_url=movie_url,
            start_url=self.start_url,
            lyrics=lyrics,
            singers=singers,
            director=music_by,
            lyricist=lyricist
        )
コード例 #9
0
    def threader(self, thread_id):
        """
        Worker function
        :param thread_id: Ass usual
        """
        while not self.task_queue.empty():

            task = self.task_queue.get()
            if task['n_errors'] >= self.max_allowed_errors:
                print_util.print_warning(
                    '{0} --> Too many errors in task {1}. Skipping.'.format(
                        thread_id, task))
                continue

            print_util.print_info('{0} --> New task : {1}'.format(
                thread_id, task))

            try:
                if task['type'] == 0:
                    self.get_artists(thread_id, task['url'])
                elif task['type'] == 1:
                    self.get_artist(thread_id, task['url'], task['artist'])
                elif task['type'] == 2:
                    self.get_songs_from_page(thread_id, task['url'],
                                             task['artist'])
                elif task['type'] == 3:
                    self.get_song(thread_id, task['url'], task['song'],
                                  task['artist'])
                print_util.print_info(
                    '{0} --> Task complete : {1}'.format(thread_id, task),
                    Colors.GREEN)
            except Exception as e:
                print_util.print_error('{0} --> Error : {1}'.format(
                    thread_id, e))
                task['n_errors'] += 1
                self.task_queue.put(task)
コード例 #10
0
ファイル: base_crawler.py プロジェクト: iiitv/lyrics-crawler
    def download_movie(self, thread_id, url, movie):
        """
        Method to get all songs from a movie website.
        :param thread_id: As usual
        :param url: URL of movie
        :param movie: Name of movie
        """
        movie_website = self.start_url + url
        raw_html = open_request(movie_website, delayed=self.delay_request)

        song_with_url = self.get_songs_with_url(raw_html)

        # No new songs added
        if db_operations.number_of_songs(self.start_url, url) == len(
                song_with_url):
            db_operations.update_last_crawl(self.start_url, url)
            print_util.print_warning(
                '{0} --> Movie {1} contains no new songs. Skipping.'.format(
                    thread_id,
                    movie
                )
            )
            return

        # Add all songs
        for song_url, song in song_with_url:
            self.task_queue.put(
                {
                    'type': 2,
                    'url': song_url,
                    'song': song,
                    'movie': movie,
                    'movie_url': url,
                    'n_errors': 0
                }
            )
コード例 #11
0
ファイル: base_crawler.py プロジェクト: iiitv/lyrics-crawler
    def threader(self, thread_id):
        """
        Worker function
        :param thread_id: Ass usual
        """
        while not self.task_queue.empty():

            task = self.task_queue.get()
            if task['n_errors'] >= self.max_allowed_errors:
                print_util.print_warning(
                    '{0} --> Too many errors in task {1}. Skipping.'.format(
                        thread_id,
                        task
                    )
                )
                continue

            print_util.print_info(
                '{0} --> New task : {1}'.format(
                    thread_id,
                    task
                )
            )

            try:
                if task['type'] == 0:
                    self.get_artists(
                        thread_id,
                        task['url']
                    )
                elif task['type'] == 1:
                    self.get_artist(
                        thread_id,
                        task['url'],
                        task['artist']
                    )
                elif task['type'] == 2:
                    self.get_songs_from_page(
                        thread_id,
                        task['url'],
                        task['artist']
                    )
                elif task['type'] == 3:
                    self.get_song(
                        thread_id,
                        task['url'],
                        task['song'],
                        task['artist']
                    )
                print_util.print_info(
                    '{0} --> Task complete : {1}'.format(
                        thread_id,
                        task
                    ),
                    Colors.GREEN
                )
            except Exception as e:
                print_util.print_error(
                    '{0} --> Error : {1}'.format(
                        thread_id,
                        e
                    )
                )
                task['n_errors'] += 1
                self.task_queue.put(task)
コード例 #12
0
ファイル: base_crawler.py プロジェクト: iiitv/lyrics-crawler
    def threader(self, thread_id):
        """
        Worker function.
        :return:
        :param thread_id: Assigned ID of thread.
        """
        while not self.task_queue.empty():  # While there are any tasks

            task = self.task_queue.get()  # Get one of them

            if task['n_errors'] >= self.max_allowed_errors:  # Too many errors
                print_util.print_warning(
                    '{0} --> Too many errors in task {1}. Skipping.'.format(
                        thread_id,
                        task
                    )
                )
                continue

            print_util.print_info(
                '{0} --> New task : {1}'.format(
                    thread_id,
                    task
                )
            )  # Log the task

            try:

                # Call corresponding function
                if task['type'] == 0:
                    self.get_movies(
                        thread_id,
                        task['url']
                    )
                elif task['type'] == 1:
                    self.download_movie(
                        thread_id,
                        task['url'],
                        task['movie']
                    )
                elif task['type'] == 2:
                    self.download_song(
                        thread_id,
                        task['url'],
                        task['song'],
                        task['movie'],
                        task['movie_url']
                    )

                print_util.print_info(
                    '{0} --> Task complete : {1}'.format(
                        thread_id,
                        task
                    ),
                    Colors.GREEN
                )  # Log success

            except Exception as e:  # Some error
                print_util.print_error(
                    '{0} --> Error : {1}'.format(
                        thread_id,
                        e
                    )
                )  # Log it
                task['n_errors'] += 1  # Increment number of errors
                self.task_queue.put(task)  # Put back in queue