コード例 #1
0
def get_metadata(filename):
    """
    Deprecated: Given the name of an audio file, this function returns the metadata (composer, genre and lyrics(if available))

    :param filename:
    :return:
    """
    import pandas as pd
    import os
    base_path = platform_details.get_platform_path(
        r"Users\theko\Documents\Dataset")
    metadata_path = os.path.join(base_path, 'metadata.csv')
    filename_new = filename.strip(base_path).replace("\\", "/")
    df = pd.read_csv(metadata_path, sep='\n')
    df[['Path', 'Artist', 'Raga', 'RagaId', 'fileId'
        ]] = df['Path,Artist,Raga,RagaId,fileId'].str.split(',', expand=True)
    df = df.drop(columns=['Path,Artist,Raga,RagaId,fileId'])
    df = df.set_index('Path')
    df = pd.DataFrame.drop_duplicates(df)
    # print(df)
    try:
        artist = df.loc[filename_new, 'Artist']
        genre = df.loc[filename_new, 'Raga']
        lyrics = ''
        return artist, genre, lyrics
    except:
        print(filename, filename_new, base_path)
        return 'unknown', 'Celtic', ''
コード例 #2
0
def _creat_artist_ID_dictionary():
    artist_ids = {}
    artists = {}
    artist_ids_file = platform_details.get_platform_path(
        r"Users\theko\Documents\Dataset\v4_artist_ids.txt")
    with open(artist_ids_file, 'r', encoding="utf-8") as f:
        for line in f:
            artist, artist_id = line.strip().split(';')
            artist_ids[artist] = int(artist_id)
            artists[int(artist_id)] = artist
    return artist_ids, artists
コード例 #3
0
def _create_Raga_ID_ditionary():
    genre_ids = {}
    genres = {}
    genre_ids_file = platform_details.get_platform_path(
        r"Users\theko\Documents\Dataset\v4_genre_ids.txt")
    with open(genre_ids_file, 'r', encoding="utf-8") as f:
        for line in f:
            genre, genre_id = line.strip().split(';')
            genre_ids[genre] = int(genre_id)
            genres[int(genre_id)] = genre
    return genre_ids, genres
コード例 #4
0
def add_genre_to_dataset(genreID, genreFilesPath, copy=True):
    genre_id, genre = _create_Raga_ID_ditionary()
    files_list = librosa.util.find_files(
        platform_details.get_platform_path(genreFilesPath))
    artists = []
    fileIDs = []
    for i in range(len(files_list)):
        meta = audio_metadata.load(files_list[i])
        if meta['tags'].__contains__('artist'):
            artist = meta['tags'].artist
        else:
            artist = ''
        artists.append(artist)
        fileIDs.append(
            str(genreID).zfill(3) + str(0).zfill(3) + str(i + 1).zfill(3))
    if genre.keys().__contains__(genreID):
        gen = genre[genreID]
    else:
        try:
            meta = audio_metadata.load(files_list[0])
            gen = meta['tags'].genre
        except:
            raise Exception("Add genre name to the 'v4_genre_ids' first")

    ### assigning artist ID := 0 for now, will edit the code later
    ### TODO: add get_artist() and get_artist_id()
    metadata_path = os.path.join(copy_base, 'metadata.csv')

    if copy:
        with open(metadata_path, 'a', encoding='utf8') as f:
            for i in range(len(files_list)):
                copy_dest = os.path.join(copy_base, fileIDs[i])
                try:
                    os.mkdir(copy_dest)
                except:
                    pass
                new_path = shutil.copy2(files_list[i], copy_dest)
                new_path = new_path.strip(copy_base)
                f.write(
                    f"{new_path},{artists[i]},{gen},{gen.lower()},{fileIDs[i]}\n"
                )
                print(f"file {i + 1} of {len(files_list)} copied!!")
コード例 #5
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import shutil
import librosa
import platform_details
from numba import vectorize
import audio_metadata
#
copy_base = platform_details.get_platform_path('Users/theko/Documents/Dataset')
ALL_COMPOSITIONS = [
    name for name in os.listdir(copy_base)
    if os.path.isdir(os.path.join(copy_base, name))
]


# The following functions (`collate_raga_data`, `collate_and_add_celtic`, and `create_file_artist_genre_ids` ) are
# specific to the current dataset and must not be used
def collate_raga_data(base_dir, labels_json):
    IDX = os.listdir(base_dir)
    print(labels_json)
    LABELS = pd.read_json(labels_json, orient='index')
    AUDIO_DATA = pd.DataFrame()
    local_artists = {'': []}
    AUDIOS = []
    Audios_artist = []
    Audios_raga = []
    Audios_ragaId = []
    Audios_fileId = []
    genre_id = 0
コード例 #6
0
                name = new_name[0]
            else:
                new_name = fname.replace('.opus', '')
                name = new_name
            name = name + '.mp3'
            out = ffmpeg.input(folder + '/' + fname).output(filename=folder +
                                                            "mp3/" + name,
                                                            format='mp3')
            out.run()
            i = i + 1


if __name__ == '__main__':
    # files = librosa.util.find_files(copy_base)
    # fname = np.random.choice(files)
    path = platform_details.get_platform_path("/vismaya/")
    extract_audio_from_videos(path)
    # print(fname)
    # cut_file_1min_segments(fname)
    # signal, sr = librosa.load(fname)
    # three_mins = 3*get_one_min(sr)
    # one_min = get_one_min(sr)
    # signal = signal[-three_mins:-one_min]
    # raga = get_metadata_windows(fname)
    # print(raga)
    # pcd_X, pcd_Y = calculate_pcd(signal, sr)
    # plt.plot(pcd_X, pcd_Y, label=raga)
    # plt.show()

    # sample= (1, 2, 3)
    # X = np.zeros(5)