Python dataset_delete 예제들, masters.tools_dataset.dataset_delete Python 예제들

예제 #1

0

파일 보기

파일: dataset_lastfm.py 프로젝트: renesemela/lastfm-dataset-2020

def compute_melgram():
    # Libraries: Import local
    import numpy as np
    import pandas as pd
    import sqlite3
    import os

    # Libraries: Import custom
    from masters.paths import path_dataset_lastfm2020_db, path_dataset_lastfm2020
    from masters.tools_audio import melgram
    from masters.tools_dataset import folderstruct_lastfm_dataset, dataset_delete

    # Check if required folder structure is present
    print('Checking required folder structure.')
    folderstruct_lastfm_dataset()

    # Connect to the database
    conn = sqlite3.connect(path_dataset_lastfm2020_db)
    c = conn.cursor()

    # Select all metadata from the database and make pd.DataFrame
    c.execute('SELECT * FROM metadata')
    columns_names = [description[0] for description in c.description]
    metadata = pd.DataFrame(c.fetchall(), columns=columns_names)

    # Check if any files are present in the melgram folder
    dir_features = os.listdir(path_dataset_lastfm2020 + 'features_melgram/')
    dir_features = [str(i.split('.', 1)[0]) for i in dir_features]

    # Compute mel spectrogram for each of tracks
    i = 0
    while i < len(metadata):
        print('\nTrack ' + str(i+1) + '/' + str(len(metadata)))
        print ('Computing melgram for track: ' + metadata['artist'][i] + ' - ' + metadata['name'][i])
        id_dataset = metadata['id_dataset'][i]
        if id_dataset not in dir_features:
            path_track_wav = path_dataset_lastfm2020 + 'tracks_wav/' + id_dataset + '.wav'
            melgram_computed = melgram(path_track_wav)
            # Remove extra data if track is longer than expected
            if np.size(melgram_computed, axis=1) >= 1366:
                melgram_computed = np.delete(melgram_computed, range(1366,np.size(melgram_computed, axis=1)), 1)
                np.save(path_dataset_lastfm2020 + 'features_melgram/' + id_dataset + '.npy', melgram_computed)
                print('Successfully computed.')
            # Remove track from the database if track is shorter than expected
            elif np.size(melgram_computed, axis=1) < 1366:
                dataset_delete(conn, c, id_dataset)
                print('The track is not 1366 samples long. Successfully deleted from the database.')
            i = i + 1
        else:
            print('Melgram is already computed: ' + metadata['artist'][i] + ' - ' + metadata['name'][i])
            i = i + 1

    # Close connection to the database file
    conn.close()

    print('Done.')

예제 #2

0

파일 보기

파일: dataset_lastfm.py 프로젝트: renesemela/masters-thesis-music-autotagging

def pair_spotify_preview_url():
    # Libraries: Import local
    import pandas as pd
    import sqlite3

    # Libraries: Import custom
    from masters.paths import path_dataset_lastfm2020_db
    from masters.tools_dataset import get_spotify, dataset_delete, folderstruct_lastfm_dataset

    # Check if required folder structure is present
    print('Checking required folder structure.')
    folderstruct_lastfm_dataset()

    # Connect to the database
    conn = sqlite3.connect(path_dataset_lastfm2020_db)
    c = conn.cursor()

    # Select all metadata from the database and make pd.DataFrame
    c.execute('SELECT * FROM metadata WHERE "url_spotify_preview" = "None"')
    columns_names = [description[0] for description in c.description]
    metadata = pd.DataFrame(c.fetchall(), columns=columns_names)

    # Use Spotify API to get Spotify Preview URL for each of tracks
    i = 0
    while i < len(metadata):
        id_dataset = metadata['id_dataset'][i]
        id_spotify = metadata['id_spotify'][i]
        print('\nTrack ' + str(i + 1) + '/' + str(len(metadata)))
        print('Pairing Spotify preview URL for track: ' +
              metadata['artist'][i] + ' - ' + metadata['name'][i])
        # Send Spotify API request and try to get preview URL
        track_spotify_json = get_spotify(id_spotify, api_key_spotify)
        url_spotify_preview = track_spotify_json["preview_url"]
        # Remove track from the database if there is no preview URL
        if url_spotify_preview is None:
            print('This track does not have preview associated with Spotify.')
            dataset_delete(conn, c, id_dataset)
            print('Successfully deleted from the database.')
        else:
            print('Spotify preview URL found: ' + url_spotify_preview)
            c.execute('UPDATE metadata SET "url_spotify_preview" = "' +
                      url_spotify_preview + '" WHERE "id_spotify" = "' +
                      id_spotify + '"')
            conn.commit()
            print('Successfully saved into the database.')
        i = i + 1

    # Close connection to the database file
    conn.close()

    print('Done.')

예제 #3

0

파일 보기

파일: dataset_lastfm.py 프로젝트: renesemela/lastfm-dataset-2020

def convert_to_wav():
    # Libraries: Import local
    import pandas as pd
    import sqlite3
    import os

    # Libraries: Import custom
    from masters.paths import path_dataset_lastfm2020_db, path_dataset_lastfm2020
    from masters.tools_audio import mp3_to_wav
    from masters.tools_dataset import folderstruct_lastfm_dataset, dataset_delete

    # Check if required folder structure is present
    print('Checking required folder structure.')
    folderstruct_lastfm_dataset()

    # Connect to the database
    conn = sqlite3.connect(path_dataset_lastfm2020_db)
    c = conn.cursor()

    # Select all metadata from the database and make pd.DataFrame
    c.execute('SELECT * FROM metadata')
    columns_names = [description[0] for description in c.description]
    metadata = pd.DataFrame(c.fetchall(), columns=columns_names)

    # Check if any files are present in the tracks folder
    dir_tracks = os.listdir(path_dataset_lastfm2020 + 'tracks_wav/')
    dir_tracks = [str(i.split('.', 1)[0]) for i in dir_tracks]

    # Convert each track to WAV
    i = 0
    while i < len(metadata):
        print('\nTrack ' + str(i+1) + '/' + str(len(metadata)))
        id_dataset = metadata['id_dataset'][i]
        if id_dataset not in dir_tracks:
            print('Converting preview for track: ' + metadata['artist'][i] + ' - ' + metadata['name'][i])
            path_track_mp3 = path_dataset_lastfm2020 + 'tracks_mp3/' + id_dataset + '.mp3'
            path_track_wav = path_dataset_lastfm2020 + 'tracks_wav/' + id_dataset + '.wav'
            mp3_to_wav(path_track_mp3, path_track_wav)
            print('Successfully converted.')
            i = i + 1
        else:
            print('Track is already converted: ' + metadata['artist'][i] + ' - ' + metadata['name'][i])
            i = i + 1

    # Check if converting was successful
    dir_tracks = os.listdir(path_dataset_lastfm2020 + 'tracks_wav/')
    dir_tracks = [str(i.split('.', 1)[0]) for i in dir_tracks]
    for i in range(len(metadata['id_dataset'])):
        print('\nTrack ' + str(i+1) + '/' + str(len(metadata)))
        print ('Verifying preview for track: ' + metadata['artist'][i] + ' - ' + metadata['name'][i])
        id_dataset = str(metadata['id_dataset'][i])
        if not id_dataset in dir_tracks:
            dataset_delete(conn, c, id_dataset)
            print('Track preview is missing. Successfully deleted from the database.')
        else:
            print('Successfully verified.')

    # Close connection to the database file
    conn.close()

    print('Done.')

예제 #4

0

파일 보기

파일: dataset_lastfm.py 프로젝트: renesemela/lastfm-dataset-2020

def pair_spotify_id():
    # Libraries: Import local
    import pandas as pd
    import sqlite3
    import requests
    from bs4 import BeautifulSoup

    # Libraries: Import custom
    from masters.paths import path_dataset_lastfm2020_db
    from masters.tools_dataset import dataset_delete, folderstruct_lastfm_dataset

    # Check if required folder structure is present
    print('Checking required folder structure.')
    folderstruct_lastfm_dataset()

    # Connect to the database
    conn = sqlite3.connect(path_dataset_lastfm2020_db)
    c = conn.cursor()

    # Select all metadata from the database and make pd.DataFrame
    c.execute('SELECT * FROM metadata WHERE "id_spotify" = "None"')
    columns_names = [description[0] for description in c.description]
    metadata = pd.DataFrame(c.fetchall(), columns=columns_names)

    # Pair Spotify ID for each of tracks using Last.fm wepage
    i = 0
    while i < len(metadata):
        print('\nTrack ' + str(i+1) + '/' + str(len(metadata)))
        print('Pair Spotify ID for track: ' + metadata['artist'][i] + ' - ' + metadata['name'][i])
        url = metadata['url_lastfm'][i]
        id_dataset = metadata['id_dataset'][i]
        # Load track's Last.fm webpage
        try:
            resp = requests.get(url)
            # If load is successful use BeautifulSoup library to get Spotify ID from webpage
            if resp.status_code == 200:
                print('Successfully opened Last.fm page: ' + url)
                soup = BeautifulSoup(resp.text,'html.parser')
                spotify_class = soup.find('a',{'class':'resource-external-link resource-external-link--spotify'})
                # Remove track from the database if there is no Spotify ID for current track
                if spotify_class is None:
                    print('This track does not have Spotify ID associated with Last.fm.')
                    dataset_delete(conn, c, id_dataset)
                    print('Successfully deleted from the database.')
                # Udpate database if Spotify ID is present
                else:
                    spotify_id = spotify_class['href'].split('/')[4]
                    print('Spotify ID found: ' + spotify_id)
                    c.execute('UPDATE metadata SET "id_spotify" = "' + spotify_id + '" WHERE "url_lastfm" = "' + url + '"')
                    conn.commit()
                    print('Successfully saved into the database.')
                i = i + 1
            # Workaround for regional restrictions
            elif resp.status_code == 451:
                print('Unauthorized access. Deleting track...')
                dataset_delete(conn, c, id_dataset)
                print('Successfully deleted from the database.')
                i = i + 1
            else:
                print("Error. Trying to refresh page.")
        except requests.exceptions.TooManyRedirects as e:
            print('Request error: ' + str(e) + '. Deleting track...')
            dataset_delete(conn, c, id_dataset)
            print('Successfully deleted from the database.')
            i = i + 1

    # Close connection to the database file
    conn.close()

    print('Done.')