def update_albums_with_artist(): _albums = get_all_albums() with managedSession() as session: for album in _albums: _artist = Artist.find_by_id(session, album.artist_id) album.artist_name = _artist.name session.commit()
def get_album_by_artist(album_name, artist_name): with managedSession() as session: _artist = Artist.find_by_name(session, artist_name) if not _artist: return None if not _artist.id: return None _album = Album.find_by_name(session, album_name, _artist.id) return _album
def thread_songs(songs: List[GeniusSongLite], artistId, artistName): with managedSession() as session: song_list = [] for song in songs: _song = Song.find_by_name(session, song.title, artistId) if not _song: _song = search_song(song.title, artistName) if _song: song_list.append(_song) session.flush() session.commit() return song_list
def artist_album_words(artist, web_request=False): with managedSession() as session: _artist = Artist.find_by_name(session, artist) if not _artist: return None if not _artist.id: return None albums = Album.find_albums_by_artist(session, _artist.id) if not albums: return None album_list: List[AlbumData] = [] for album in albums: album_list.append(AlbumData(album_data=album, song_list=[])) artist_data = ArtistData(_artist, album_list) if web_request: return artist_data
def update_all_with_word_count(): with managedSession() as session: artists = get_all_artists() for artist in artists: albums: List[Album] = get_all_artist_albums(artist.id) if albums: for album in albums: if album: songs: List[Song] = get_all_album_songs(album.id) if songs: for song in songs: lyrics_list = song.lyrics.split() song.word_count = len(lyrics_list) session.commit() alb_lyrics_list = album.total_words.split() album.word_count = len(alb_lyrics_list) session.commit() art_lyrics_list = artist.total_words.split() artist.word_count = len(art_lyrics_list) session.commit()
def get_artist_by_name(artistName): # noqa: E501 """Get lyrical data for artist # noqa: E501 :param artist_name: :type artist_name: str :rtype: ArtistAlbums """ # artist = artistName # # if isinstance(artistName, dict): # json_data = json.dumps(artistName) # artist = json_data['artist'] with managedSession() as session: _artist = Artist.find_by_name(session, artistName) if not _artist: return NoContent, 404 return artist_album_words(artistName, web_request=True)
def new_build(artist_name): # Get Artist and Songs from Genius API genius_artist = get_artist_from_genius(artist_name) # # If Genius API doesn't contain artist data, return None if not genius_artist: logger.error( LOGGER, 'Artist data not found via Genius API: {}'.format(artist_name)) return None # Get Artist from MusicBrainz Database mb_artist = get_artist_data(artist_name) # If Artist doesn't exist in MusicBrainz Database, return None if not mb_artist: logger.error( LOGGER, 'Artist data not found in MusicBrainz database: {}'.format( artist_name)) return None # Generate a list of valid albums valid_albums = get_artist_albums_from_mb(mb_artist) # If no valid albums exist for the artist, return None if not valid_albums: return None # with managedSession() as session: # Check if artist already exists in the database db_artist = Artist.find_by_name(session, genius_artist) if db_artist: build_with_existing(db_artist, genius_artist, mb_artist, valid_albums) else: build_from_scratch(genius_artist, mb_artist, valid_albums)
if not mb_artist: logger.error( LOGGER, 'Artist data not found in MusicBrainz database: {}'.format( artist_name)) return None # Generate a list of valid albums valid_albums = get_artist_albums_from_mb(mb_artist) # If no valid albums exist for the artist, return None if not valid_albums: return None # with managedSession() as session: # Check if artist already exists in the database db_artist = Artist.find_by_name(session, genius_artist) if db_artist: build_with_existing(db_artist, genius_artist, mb_artist, valid_albums) else: build_from_scratch(genius_artist, mb_artist, valid_albums) if __name__ == '__main__': logger.set_logging(False) initialize(get_sql_file()) with managedSession() as session: artists: List[Artist] = get_all_artists() for artist in artists: new_build(artist.name)
def migrate_database(): with managedSession() as session: artists: List[Artist] = get_all_artists() for artist in artists: artist_mb_id = None mb_data: ArtistResult = get_artist_data(artist.name) if mb_data: artist_mb_id = mb_data.id else: print('{} not found from MB!'.format(artist.name)) exit(1) albums: List[Album] = get_all_artist_albums(artist.id) artist_simple_words = {} artist_complex_words = {} total_artist_words = [] if not albums: print('{} has no albums!'.format(artist.name)) continue for album in albums: album_simple_words = {} album_complex_words = {} total_album_words = [] songs: List[Song] = get_all_album_songs(album.id) if not songs: print('{} -- {} has no songs'.format( artist.name, album.name)) continue for song in songs: # if SongNew.find_by_name(session, song.title, artist.id): # print('{} already exists -- skipping'.format(song.title)) # continue mb_id = '' duration = 1 if song.music_brainz_data: if 'id' in song.music_brainz_data: mb_id = song.music_brainz_data['id'] if 'length' in song.music_brainz_data: duration = song.music_brainz_data['length'] if not duration: if 'recording' in song.music_brainz_data: recording = song.music_brainz_data['recording'] if recording: if 'length' in recording: duration = recording['length'] song_words = song.lyrics.split() simple_counter = count_words_simple(song.lyrics) complex_counter = count_words_complex(song.lyrics) simple_dict = dict(simple_counter) complex_dict = dict(complex_counter) song_percent_simple = prcnt(len(simple_dict), len(song_words)) song_percent_complex = prcnt(len(complex_dict), len(song_words)) db_song = Song(title=song.title, has_lyrics=True, lyrics=song.lyrics, is_bonus_track=False, duration=duration, mb_id=mb_id, music_brainz_data=song.music_brainz_data, genius_id=song.genius_id, genius_data=song.genius_data, unique_simple=simple_dict, unique_complex=complex_dict, percent_simple=song_percent_simple, percent_complex=song_percent_complex, total_simple=len(simple_dict), total_complex=len(complex_dict), artist_id=song.artist_id, album_id=song.album_id) insert_song(session, db_song) album_simple_words = merge_dict(simple_dict, album_simple_words) album_complex_words = merge_dict(complex_dict, album_complex_words) total_album_words.extend(song_words) album_percent_simple = prcnt(len(album_simple_words), len(total_album_words)) album_percent_complex = prcnt(len(album_complex_words), len(total_album_words)) total_album_words_str = ' '.join(total_album_words) db_album = Album(id=album.id, name=album.name, year=album.year, label=album.label, album_art=album_art, mb_id=album.mb_id, music_brainz_data=album.music_brainz_data, artist_name=album.artist_name, artist_id=album.artist_id, total_words=total_album_words_str, unique_simple=album_simple_words, unique_complex=album_complex_words, percent_simple=album_percent_simple, percent_complex=album_percent_complex, total_simple=len(album_simple_words), total_complex=len(album_complex_words)) insert_album(session, db_album) artist_simple_words = merge_dict(album_simple_words, artist_simple_words) artist_complex_words = merge_dict(album_complex_words, artist_complex_words) total_artist_words.extend(total_album_words) if len(total_artist_words) == 0: continue artist_percent_simple = prcnt(len(artist_simple_words), len(total_artist_words)) artist_percent_complex = prcnt(len(artist_complex_words), len(total_artist_words)) total_artist_words_words_str = ' '.join(total_artist_words) db_artist = Artist(id=artist.id, name=artist.name, image_url=artist.image_url, mb_id=artist_mb_id, genius_id=artist.genius_id, mb_data=mb_data.to_dict(), total_words=total_artist_words_words_str, unique_simple=artist_simple_words, unique_complex=artist_complex_words, percent_simple=artist_percent_simple, percent_complex=artist_percent_complex, total_simple=len(artist_simple_words), total_complex=len(artist_complex_words)) insert_artist(session, db_artist) print('Finished building {}'.format(artist.name))
def get_all_album_songs(album_id): with managedSession() as session: songs = Song.find_songs_by_album(session, album_id) return songs
def get_all_artist_albums(artist_id): with managedSession() as session: _albums = Album.find_albums_by_artist(session, artist_id) return _albums
def get_artist_by_name(artist_name): with managedSession() as session: _artist = Artist.find_by_name(session, artist_name) if _artist: return _artist return None
def get_all_artists(): with managedSession() as session: _artists = Artist.get_all_artists(session) return _artists
def get_all_albums(): with managedSession() as session: _albums = Album.get_all_albums(session) return _albums
def clear_artist_entries(artist_name): with managedSession() as session: artist = Artist().find_by_name(session, artist_name) Song().remove_songs_by_artist(session, artist.id) Album().remove_album_by_artist(session, artist.id)
def build_artist_data(artist): with managedSession() as session: db_artist = Artist.find_by_name(session, artist) mb_artist = get_artist_data(artist) if not mb_artist: return None valid_albums = get_artist_albums_from_mb(mb_artist) if not valid_albums: return None valid_album_titles = [clean_text(x.title) for x in valid_albums] valid_songs = [] existing_songs = [] if db_artist: if db_artist.id: db_songs: List[Song] = Song.find_songs_by_artist(session, db_artist.id) if db_songs: for db_song in db_songs: if db_song.title: existing_songs.append(db_song.title) for va in valid_albums: if va.tracks: for track in va.tracks: if track.title: exists = False for e_song in existing_songs: if compare_strings(e_song, track.title): exists = True if not exists: valid_songs.append(remove_punc(track.title)) if logger.is_logging(): logger.log_title(LOGGER, 'Valid song titles:') for s_title in valid_songs: logger.debug(LOGGER, s_title) genius_artist, genius_songs = get_songs_from_genius(artist) if not db_artist: s_artist = create_artist_for_db(genius_artist, mb_artist) db_artist = insert_artist(session, s_artist) albums = {} num_threads = 15 songs_chunks = list(divide_chunks(genius_songs, ((len(genius_songs) // num_threads) + 1))) processes = [] songs_list = [] with ThreadPoolExecutor(max_workers=num_threads) as executor: for song_list in songs_chunks: processes.append(executor.submit(thread_songs, song_list, db_artist.id, db_artist.name)) for task in as_completed(processes): songs_list.extend(task.result()) if logger.is_logging(): logger.log_title(LOGGER, 'Missing Titles') _songs_titles = [x.title for x in genius_songs] song_list_titles = [y.title for y in songs_list] for tit in _songs_titles: if tit not in song_list_titles: logger.debug(LOGGER, tit) logger.log_title(LOGGER, 'All Titles') for tit in _songs_titles: logger.debug(LOGGER, tit) remaining_albums = {} for _song in songs_list: if _song.album: c_album = clean_text(_song.album) if c_album in valid_album_titles: if _song.album not in albums: albums[_song.album] = [] albums[_song.album].append(_song) else: if _song.album not in remaining_albums: remaining_albums[_song.album] = [] remaining_albums[_song.album].append(_song) logger.debug(LOGGER, "Album wasn't found in valid album titles: {}".format(c_album)) valid_matches = [] for album_key in albums.keys(): a_key = clean_text(album_key) if a_key in valid_album_titles: valid_album = get_valid_album(valid_albums, a_key) build_album_data(session, db_artist.id, album_key, albums[album_key], valid_album, db_artist.name) valid_matches.append(a_key) missing_valid_matches = list(set(valid_album_titles).difference(valid_matches)) for ra_key in remaining_albums.keys(): found = 0 match = None for mvm in missing_valid_matches: if remove_punc(clean_text(mvm)) in remove_punc(clean_text(ra_key)): found = found + 1 match = mvm if found == 1 and match: valid_album = get_valid_album(valid_albums, match) build_album_data(session, db_artist.id, match.title(), remaining_albums[ra_key], valid_album, db_artist.name) else: logger.debug(LOGGER, '{} was not found in valid or remaining albums {}'.format(ra_key, remaining_albums.keys())) logger.info(LOGGER, 'artist data built {}'.format(artist))