def compute_melgram(): # Libraries: Import local import numpy as np import pandas as pd import sqlite3 import os # Libraries: Import custom from masters.paths import path_dataset_lastfm2020_db, path_dataset_lastfm2020 from masters.tools_audio import melgram from masters.tools_dataset import folderstruct_lastfm_dataset, dataset_delete # Check if required folder structure is present print('Checking required folder structure.') folderstruct_lastfm_dataset() # Connect to the database conn = sqlite3.connect(path_dataset_lastfm2020_db) c = conn.cursor() # Select all metadata from the database and make pd.DataFrame c.execute('SELECT * FROM metadata') columns_names = [description[0] for description in c.description] metadata = pd.DataFrame(c.fetchall(), columns=columns_names) # Check if any files are present in the melgram folder dir_features = os.listdir(path_dataset_lastfm2020 + 'features_melgram/') dir_features = [str(i.split('.', 1)[0]) for i in dir_features] # Compute mel spectrogram for each of tracks i = 0 while i < len(metadata): print('\nTrack ' + str(i+1) + '/' + str(len(metadata))) print ('Computing melgram for track: ' + metadata['artist'][i] + ' - ' + metadata['name'][i]) id_dataset = metadata['id_dataset'][i] if id_dataset not in dir_features: path_track_wav = path_dataset_lastfm2020 + 'tracks_wav/' + id_dataset + '.wav' melgram_computed = melgram(path_track_wav) # Remove extra data if track is longer than expected if np.size(melgram_computed, axis=1) >= 1366: melgram_computed = np.delete(melgram_computed, range(1366,np.size(melgram_computed, axis=1)), 1) np.save(path_dataset_lastfm2020 + 'features_melgram/' + id_dataset + '.npy', melgram_computed) print('Successfully computed.') # Remove track from the database if track is shorter than expected elif np.size(melgram_computed, axis=1) < 1366: dataset_delete(conn, c, id_dataset) print('The track is not 1366 samples long. Successfully deleted from the database.') i = i + 1 else: print('Melgram is already computed: ' + metadata['artist'][i] + ' - ' + metadata['name'][i]) i = i + 1 # Close connection to the database file conn.close() print('Done.')
def pair_spotify_preview_url(): # Libraries: Import local import pandas as pd import sqlite3 # Libraries: Import custom from masters.paths import path_dataset_lastfm2020_db from masters.tools_dataset import get_spotify, dataset_delete, folderstruct_lastfm_dataset # Check if required folder structure is present print('Checking required folder structure.') folderstruct_lastfm_dataset() # Connect to the database conn = sqlite3.connect(path_dataset_lastfm2020_db) c = conn.cursor() # Select all metadata from the database and make pd.DataFrame c.execute('SELECT * FROM metadata WHERE "url_spotify_preview" = "None"') columns_names = [description[0] for description in c.description] metadata = pd.DataFrame(c.fetchall(), columns=columns_names) # Use Spotify API to get Spotify Preview URL for each of tracks i = 0 while i < len(metadata): id_dataset = metadata['id_dataset'][i] id_spotify = metadata['id_spotify'][i] print('\nTrack ' + str(i + 1) + '/' + str(len(metadata))) print('Pairing Spotify preview URL for track: ' + metadata['artist'][i] + ' - ' + metadata['name'][i]) # Send Spotify API request and try to get preview URL track_spotify_json = get_spotify(id_spotify, api_key_spotify) url_spotify_preview = track_spotify_json["preview_url"] # Remove track from the database if there is no preview URL if url_spotify_preview is None: print('This track does not have preview associated with Spotify.') dataset_delete(conn, c, id_dataset) print('Successfully deleted from the database.') else: print('Spotify preview URL found: ' + url_spotify_preview) c.execute('UPDATE metadata SET "url_spotify_preview" = "' + url_spotify_preview + '" WHERE "id_spotify" = "' + id_spotify + '"') conn.commit() print('Successfully saved into the database.') i = i + 1 # Close connection to the database file conn.close() print('Done.')
def convert_to_wav(): # Libraries: Import local import pandas as pd import sqlite3 import os # Libraries: Import custom from masters.paths import path_dataset_lastfm2020_db, path_dataset_lastfm2020 from masters.tools_audio import mp3_to_wav from masters.tools_dataset import folderstruct_lastfm_dataset, dataset_delete # Check if required folder structure is present print('Checking required folder structure.') folderstruct_lastfm_dataset() # Connect to the database conn = sqlite3.connect(path_dataset_lastfm2020_db) c = conn.cursor() # Select all metadata from the database and make pd.DataFrame c.execute('SELECT * FROM metadata') columns_names = [description[0] for description in c.description] metadata = pd.DataFrame(c.fetchall(), columns=columns_names) # Check if any files are present in the tracks folder dir_tracks = os.listdir(path_dataset_lastfm2020 + 'tracks_wav/') dir_tracks = [str(i.split('.', 1)[0]) for i in dir_tracks] # Convert each track to WAV i = 0 while i < len(metadata): print('\nTrack ' + str(i+1) + '/' + str(len(metadata))) id_dataset = metadata['id_dataset'][i] if id_dataset not in dir_tracks: print('Converting preview for track: ' + metadata['artist'][i] + ' - ' + metadata['name'][i]) path_track_mp3 = path_dataset_lastfm2020 + 'tracks_mp3/' + id_dataset + '.mp3' path_track_wav = path_dataset_lastfm2020 + 'tracks_wav/' + id_dataset + '.wav' mp3_to_wav(path_track_mp3, path_track_wav) print('Successfully converted.') i = i + 1 else: print('Track is already converted: ' + metadata['artist'][i] + ' - ' + metadata['name'][i]) i = i + 1 # Check if converting was successful dir_tracks = os.listdir(path_dataset_lastfm2020 + 'tracks_wav/') dir_tracks = [str(i.split('.', 1)[0]) for i in dir_tracks] for i in range(len(metadata['id_dataset'])): print('\nTrack ' + str(i+1) + '/' + str(len(metadata))) print ('Verifying preview for track: ' + metadata['artist'][i] + ' - ' + metadata['name'][i]) id_dataset = str(metadata['id_dataset'][i]) if not id_dataset in dir_tracks: dataset_delete(conn, c, id_dataset) print('Track preview is missing. Successfully deleted from the database.') else: print('Successfully verified.') # Close connection to the database file conn.close() print('Done.')
def pair_spotify_id(): # Libraries: Import local import pandas as pd import sqlite3 import requests from bs4 import BeautifulSoup # Libraries: Import custom from masters.paths import path_dataset_lastfm2020_db from masters.tools_dataset import dataset_delete, folderstruct_lastfm_dataset # Check if required folder structure is present print('Checking required folder structure.') folderstruct_lastfm_dataset() # Connect to the database conn = sqlite3.connect(path_dataset_lastfm2020_db) c = conn.cursor() # Select all metadata from the database and make pd.DataFrame c.execute('SELECT * FROM metadata WHERE "id_spotify" = "None"') columns_names = [description[0] for description in c.description] metadata = pd.DataFrame(c.fetchall(), columns=columns_names) # Pair Spotify ID for each of tracks using Last.fm wepage i = 0 while i < len(metadata): print('\nTrack ' + str(i+1) + '/' + str(len(metadata))) print('Pair Spotify ID for track: ' + metadata['artist'][i] + ' - ' + metadata['name'][i]) url = metadata['url_lastfm'][i] id_dataset = metadata['id_dataset'][i] # Load track's Last.fm webpage try: resp = requests.get(url) # If load is successful use BeautifulSoup library to get Spotify ID from webpage if resp.status_code == 200: print('Successfully opened Last.fm page: ' + url) soup = BeautifulSoup(resp.text,'html.parser') spotify_class = soup.find('a',{'class':'resource-external-link resource-external-link--spotify'}) # Remove track from the database if there is no Spotify ID for current track if spotify_class is None: print('This track does not have Spotify ID associated with Last.fm.') dataset_delete(conn, c, id_dataset) print('Successfully deleted from the database.') # Udpate database if Spotify ID is present else: spotify_id = spotify_class['href'].split('/')[4] print('Spotify ID found: ' + spotify_id) c.execute('UPDATE metadata SET "id_spotify" = "' + spotify_id + '" WHERE "url_lastfm" = "' + url + '"') conn.commit() print('Successfully saved into the database.') i = i + 1 # Workaround for regional restrictions elif resp.status_code == 451: print('Unauthorized access. Deleting track...') dataset_delete(conn, c, id_dataset) print('Successfully deleted from the database.') i = i + 1 else: print("Error. Trying to refresh page.") except requests.exceptions.TooManyRedirects as e: print('Request error: ' + str(e) + '. Deleting track...') dataset_delete(conn, c, id_dataset) print('Successfully deleted from the database.') i = i + 1 # Close connection to the database file conn.close() print('Done.')