Esempio n. 1
0
def load_eval_data():
    """We evaluate on the last num_samples."""
    texts, _ = create_train_data()
    if hp.sanity_check:  # We generate samples for the same texts as the ones we've used for training.
        texts = texts[:hp.batch_size]
    else:
        texts = texts[-hp.num_samples:]

    # return texts
    X = np.zeros([len(texts), hp.max_len, hp.n_mels * hp.r])
    print texts
    #texts = tf.convert_to_tensor(texts)
    for i, text in enumerate(texts):
        _spectrogram_in, _magnitude_in = get_spectrograms(text)
        _spectrogram_in = reduce_frames(_spectrogram_in,
                                        hp.win_length // hp.hop_length, hp.r)
        X[i, :_spectrogram_in.shape[0], :] = _spectrogram_in
        # print(_spectrogram_in.shape)

    # X = np.zeros(shape=[len(texts), hp.n_mels*hp.r], dtype=np.float32)
    # for i, text in enumerate(texts):
    #     _spectrogram_in, _magnitude_in = get_spectrograms(texts)
    #     _text = np.fromstring(text, np.int32) # byte to int
    #     X[i, :len(_spectrogram_in)] = _spectrogram_in

    return X
Esempio n. 2
0
def generarDatos(dataset_path):

    for path, subdirs, files in os.walk(dataset_path, topdown=False):
        for file in files:

            # consider only kern files
            if file[-3:] == "wav":
                mel, mag = get_spectrograms(os.path.join(dataset_path, file))
                np.save("datos/audioProcesado/" + file[:-4] + '.pt', mel)
Esempio n. 3
0
    def __getitem__(self, idx):
        wav_name = os.path.join(self.root_dir, self.landmarks_frame.ix[idx, 0]) + '.wav'
        mel, mag = get_spectrograms(wav_name)

        np.save(wav_name[:-4] + '.pt', mel)
        np.save(wav_name[:-4] + '.mag', mag)

        sample = {'mel':mel, 'mag': mag}

        return sample
Esempio n. 4
0
    def __getitem__(self, idx):
        wav_name = self.get_wav_path(self.array_indexes[idx])
        mel, mag = get_spectrograms(wav_name)

        np.save(wav_name[:-4] + '.pt', mel)
        np.save(wav_name[:-4] + '.mag', mag)

        sample = {'mel': mel, 'mag': mag}

        return sample
Esempio n. 5
0
def process(args):
    (tfid, split_dataset) = args
    writer = tf.python_io.TFRecordWriter(os.path.join(hp.TRAIN_DATASET_PATH, f'train_{tfid}.tfrecord'))
    for i in tqdm(split_dataset):
        text = i[0]
        fpath = i[1]
        idxs = match_vocab(text)
        mel, mag = get_spectrograms(fpath)
        example = tf.train.Example(features=tf.train.Features(feature={
            'x': tf.train.Feature(int64_list=tf.train.Int64List(value=idxs.reshape(-1))),
            'y': tf.train.Feature(float_list=tf.train.FloatList(value=mel.reshape(-1))),
            'z': tf.train.Feature(float_list=tf.train.FloatList(value=mag.reshape(-1)))
        }))
    def __getitem__(self, idx):

        fname = self.landmarks_frame.iloc[idx, 0]
        spkr, emo, fnum = fname.strip().split('_')  # spkr_emo_fnum
        wav_name = os.path.join(self.root_dir, spkr, emo, 'wav_22',
                                ("{:05d}".format(int(fnum)) + '.wav'))
        preprocss_name = os.path.join(hp.preprocess_path, fname)
        mel, mag = get_spectrograms(wav_name)

        np.save(preprocss_name + '.pt', mel)
        np.save(preprocss_name + '.mag', mag)

        sample = {'mel': mel, 'mag': mag}

        return sample
Esempio n. 7
0
def main():
    genrelist = ['reggaeton', 'bachata', 'salsa', 'merengue', 'chachacha']
    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'input',
        type=str,
        help=
        'File containing the path to the audios to classify, one path per line.'
        'It can also be the path to a folder with audios instead of a file.')

    parser.add_argument('-o',
                        '--output_file',
                        type=str,
                        default='output.csv',
                        help='Output file with the classified audios')

    parser.add_argument('-s',
                        '--silent',
                        action='store_true',
                        help='Dont print the results, only write the file')

    parser.add_argument('-m',
                        '--model',
                        type=str,
                        default='model.pt',
                        help='File with the model dict_state')

    args = parser.parse_args()
    mod_state = args.model

    if not os.path.isfile(mod_state):
        raise IOError(
            'Model state dictionary {} doesn not exist'.format(mod_state))

    # Load the model
    model = GenreClassifier()
    model.load_state_dict(t.load(mod_state))
    model.eval()
    device = t.device('cuda' if t.cuda.is_available() else 'cpu')
    print('Using device:', device)
    print('---------------')
    model.to(device)
    # Additional Info when using cuda
    if device.type == 'cuda':
        print(t.cuda.get_device_name(0))
        print('Memory Usage:')
        print('Allocated:', round(t.cuda.memory_allocated(0) / 1024**3, 1),
              'GB')
        print('Cached:   ', round(t.cuda.memory_cached(0) / 1024**3, 1), 'GB')

    if os.path.isfile(args.input):
        # Read files from input file, segment them, and predict the genre
        df = pd.DataFrame(columns=['song_path'] + genrelist)
        with open(args.input, 'r') as inputfile:
            for idx, song in enumerate(inputfile.readlines()):
                song = song.strip('\n')
                segments = segment_audio(song)
                audio_spectrograms = get_spectrograms(segments)
                with t.no_grad():
                    pred = model(audio_spectrograms.to(device))
                aux = t.exp(pred)
                percentage = aux.sum(dim=0) / len(aux)
                percentage = percentage.tolist()
                df.loc[idx] = [song] + percentage
                if not args.silent:
                    print("Song '...{:.30}' is genre {:10}".format(
                        song[-30:], genrelist[np.argmax(percentage)]))
        df.to_csv(args.output_file, index=False)
    elif os.path.isdir(args.input):
        # Read files from folder, segment them, and predict the genre
        df = pd.DataFrame(columns=['song_path'] + genrelist)
        for idx, song in enumerate(os.listdir(args.input)):
            song = os.path.join(args.input, song)
            segments = segment_audio(song)
            audio_spectrograms = get_spectrograms(segments)
            with t.no_grad():
                pred = model(audio_spectrograms.to(device))
            aux = t.exp(pred)
            percentage = aux.sum(dim=0) / len(aux)
            percentage = percentage.tolist()
            df.loc[idx] = [song] + percentage
            if not args.silent:
                print("Song '...{:.30}' is genre {:10}".format(
                    song[-30:], genrelist[np.argmax(percentage)]))
        df.to_csv(args.output_file, index=False)
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser(
        description="Evaluate custom waveform files using pretrained MOSnet.")
    parser.add_argument("--rootdir",
                        default=None,
                        type=str,
                        help="rootdir of the waveforms to be evaluated")
    parser.add_argument("--pretrained_model",
                        default="./pre_trained/cnn_blstm.h5",
                        type=str,
                        help="pretrained model file")
    args = parser.parse_args()

    #### tensorflow & gpu settings ####

    # 0 = all messages are logged (default behavior)
    # 1 = INFO messages are not printed
    # 2 = INFO and WARNING messages are not printed
    # 3 = INFO, WARNING, and ERROR messages are not printed
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}

    tf.debugging.set_log_device_placement(False)
    # set memory growth
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            # Currently, memory growth needs to be the same across GPUs
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)

            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus),
                  "Logical GPUs")
        except RuntimeError as e:
            # Memory growth must be set before GPUs have been initialized
            print(e)

    ###################################

    # find waveform files
    wavfiles = sorted(find_files(args.rootdir, "*.wav"))

    # init model
    print("Loading model weights")
    MOSNet = CNN_BLSTM()
    model = MOSNet.build()
    model.load_weights(args.pretrained_model)

    # evaluation
    print("Start evaluating {} waveforms...".format(len(wavfiles)))
    results = []

    for wavfile in tqdm(wavfiles):

        # spectrogram
        mag_sgram = utils.get_spectrograms(wavfile)
        timestep = mag_sgram.shape[0]
        mag_sgram = np.reshape(mag_sgram, (1, timestep, utils.SGRAM_DIM))

        # make prediction
        Average_score, Frame_score = model.predict(mag_sgram,
                                                   verbose=0,
                                                   batch_size=1)

        # write to list
        result = wavfile + " {:.3f}".format(Average_score[0][0])
        results.append(result)

    # print average
    average = np.mean(
        np.array([float(line.split(" ")[-1]) for line in results]))
    print("Average: {}".format(average))

    # write final raw result
    resultrawpath = os.path.join(args.rootdir, "MOSnet_result_raw.txt")
    with open(resultrawpath, "w") as outfile:
        outfile.write("\n".join(sorted(results)))
        outfile.write("\nAverage: {}\n".format(average))