def getFbankFeatures(wavFile, destFile, median, variance, numFrames):
    HCopy('./fbank-48.conf', wavFile, destFile + '.htk')
    htk = HTKFile()
    htk.load(destFile + '.htk')
    fbank = np.asarray(htk.data)
    np.save(destFile + '.npy', fbank)

    num_frames = np.size(fbank[0, :])
    numFrames += num_frames
    for i in range(nfilt):
        feat = fbank[i, :].flatten('F')
        median_ut = np.sum(feat)
        variance_ut = np.sum(feat * feat)
        median[i] += median_ut
        variance[i] += variance_ut
    return median, variance, numFrames
Beispiel #2
0
def predict_vcm(model, input, mean_var):
    ### read normalisation parameters
    assert os.path.exists(mean_var)
    with open(mean_var, 'rb') as f:
        mv = pickle.load(f)
        m, v = mv['mean'], mv['var']
    std = lambda feat: (feat - m) / v

    # Load input feature and predict
    htk_reader = HTKFile()
    htk_reader.load(input)
    feat = std(np.array(htk_reader.data))
    input = Variable(torch.from_numpy(feat.astype('float32')))  # .cuda()
    output_ling = model(input).data.data.cpu().numpy()
    prediction_confidence = output_ling.max()  # post propability

    class_names_ling = ['NCS', 'CNS', 'CRY', 'OTH']
    cls_ling = np.argmax(output_ling)
    predition_vcm = class_names_ling[cls_ling]  # prediction

    return predition_vcm, prediction_confidence
Beispiel #3
0
def code_data_to_MFCC(filepath, outputFilePath, configPath, filenameList=None):
    filepath = filepath + "/"
    outputFilePath = outputFilePath + "/"
    if filenameList == None:
        filenameList = find_all_files(filepath, ".wav")
    f = open(filepath + "codetr.scp", "w+")
    allOutputFiles = []
    for filename in sorted(filenameList):
        filename = filepath + re.search("(.+).wav", filename).group(1)
        inputFileName = filename + ".wav"
        outputFileName = filename + ".mfc"
        allOutputFiles.append(outputFileName)
        f.write(inputFileName + " " + outputFileName + "\n")
    f.close()
    HTK.HCopy(configPath, filepath + "codetr.scp")
    htk_reader = HTKFile()
    for filename in sorted(allOutputFiles):
        htk_reader.load(filename)
        result = numpy.array(htk_reader.data)
        filename_out = (outputFilePath +
                        re.search(".+\/(.+).mfc", filename).group(1) + ".out")
        numpy.savetxt(filename_out, result, delimiter=",")
def datatest_generator(filelistpath, batch_size=32, shuffle=False):
    batch_index = 0
    image_index = -1

    filelist = open(filelistpath, 'r')
    filenames = filelist.readlines()
    filelist.close()

    dataset = (['Chernobyl.csv', 'PolandNFC.csv', 'warblrb10k-eval.csv'])

    labels_dict = {}
    for n in range(len(dataset)):
        labels_list = csv.reader(open(LABELPATH + dataset[n], 'r'))
        next(labels_list)
        for k, r, v in labels_list:
            labels_dict[r + '/' + k] = v

    while True:
        image_index = (image_index + 1) % len(filenames)

        # if shuffle and image_index = 0
        # shuffling filelist
        if shuffle == True and image_index == 0:
            random.shuffle(filenames)

        file_id = filenames[image_index].rstrip()

        if batch_index == 0:
            # re-initialize spectrogram and label batch
            spect_batch = np.zeros(
                [batch_size, spect.shape[0], spect.shape[1], 1])
            label_batch = np.zeros([batch_size, 1])

        if features == 'h5':
            #file_prefix = file_id[:file_id.rfind("/")+1]
            #file_suffix = file_id[file_id.rfind("/")+1:]
            #hf = h5py.File(SPECTPATH + file_prefix + 'enhanced_'+ file_suffix + '.h5')
            hf = h5py.File(SPECTPATH + file_id[:-4] + '.h5',
                           'r')  #[:-4]for evaluation dataset
            imagedata = hf.get('features')
            imagedata = np.array(imagedata)
            hf.close()

            # normalizing intensity values of spectrogram from [-15.0966 to 2.25745] to [0 to 1] range
            imagedata = (imagedata + 15.0966) / (15.0966 + 2.25745)

        elif features == 'mfc':
            htk_reader = HTKFile()
            #file_prefix = file_id[:file_id.rfind("/")+1]
            #file_suffix = file_id[file_id.rfind("/")+1:]
            #htk_reader.load(SPECTPATH + file_prefix + 'enhanced_'+ file_suffix[:-4] + '.mfc')
            htk_reader.load(SPECTPATH + file_id[:-8] + '.mfc')
            imagedata = np.array(htk_reader.data)
            imagedata = imagedata / 17.0

        # processing files with shapes other than expected shape in warblr dataset

        if imagedata.shape[0] != expected_shape[0]:
            old_imagedata = imagedata
            imagedata = np.zeros(expected_shape)

            if old_imagedata.shape[0] < expected_shape[0]:

                diff_in_frames = expected_shape[0] - old_imagedata.shape[0]
                if diff_in_frames < expected_shape[0] / 2:
                    imagedata = np.vstack((old_imagedata, old_imagedata[range(
                        old_imagedata.shape[0] - diff_in_frames,
                        old_imagedata.shape[0])]))

                elif diff_in_frames > expected_shape[0] / 2:
                    count = np.floor(expected_shape[0] /
                                     old_imagedata.shape[0])
                    remaining_diff = (expected_shape[0] -
                                      old_imagedata.shape[0] * int(count))
                    imagedata = np.vstack(([old_imagedata] * int(count)))
                    imagedata = np.vstack((imagedata, old_imagedata[range(
                        old_imagedata.shape[0] - remaining_diff,
                        old_imagedata.shape[0])]))

            elif old_imagedata.shape[0] > expected_shape[0]:
                diff_in_frames = old_imagedata.shape[0] - expected_shape[0]

                if diff_in_frames < expected_shape[0] / 2:
                    imagedata[range(0, diff_in_frames + 1), :] = np.mean(
                        np.array([
                            old_imagedata[range(0, diff_in_frames + 1), :],
                            old_imagedata[range(
                                old_imagedata.shape[0] - diff_in_frames -
                                1, old_imagedata.shape[0]), :]
                        ]),
                        axis=0)
                    imagedata[range(diff_in_frames +
                                    1, expected_shape[0]), :] = old_imagedata[
                                        range(diff_in_frames + 1,
                                              expected_shape[0])]

                elif diff_in_frames > expected_shape[0] / 2:
                    count = int(
                        np.floor(old_imagedata.shape[0] / expected_shape[0]))
                    remaining_diff = (old_imagedata.shape[0] -
                                      expected_shape[0] * count)
                    for index in range(0, count):
                        imagedata[range(0, expected_shape[0]), :] = np.sum(
                            [
                                imagedata, old_imagedata[range(
                                    index * expected_shape[0],
                                    (index + 1) * expected_shape[0])]
                            ],
                            axis=0) / count
                        imagedata[range(0, remaining_diff), :] = np.mean(
                            np.array([
                                old_imagedata[range(
                                    old_imagedata.shape[0] -
                                    remaining_diff, old_imagedata.shape[0]
                                ), :], imagedata[range(0, remaining_diff), :]
                            ]),
                            axis=0)

        if domain_adaptation == True:
            filedataset = file_id[:file_id.rfind('/')]
            #print('Domain adaptation is supposed to be off')
            if filedataset == 'BirdVox-DCASE-20k':
                imagedata = np.matmul(imagedata, transform_for_birdvox)
                imagedata = (imagedata - 3.4) / (6.95 - 3.4)
                #min: 3.4020782 - -max:6.9419036

            elif filedataset == 'ff1010bird':
                imagedata = np.matmul(imagedata, transform_for_ff1010bird)
                imagedata = (imagedata - 1.4) / (7.37 - 1.4)
                # min:1.4374458--max:7.363845

            elif filedataset == 'Chernobyl':
                imagedata = np.matmul(imagedata, transform_for_chern)
                imagedata = (imagedata - 3.75) / (7 - 3.75)
                #3.7511292--max:7.00125

            elif filedataset == 'PolandNFC':
                imagedata = np.matmul(imagedata, transform_for_poland)
                imagedata = (imagedata + 10.8) / (10.8 + 7.40)
                # -10.796116--max:7.4045897
        imagedata = np.reshape(imagedata,
                               (1, imagedata.shape[0], imagedata.shape[1], 1))

        spect_batch[batch_index, :, :, :] = imagedata

        batch_index += 1

        if batch_index >= batch_size:
            batch_index = 0
            inputs = [spect_batch]
            yield inputs
def data_generator(filelistpath, batch_size=16, shuffle=False):
    batch_index = 0
    image_index = -1
    filelist = open(filelistpath, 'r')
    filenames = filelist.readlines()
    filelist.close()

    # shuffling filelist
    if shuffle == True:
        random.shuffle(filenames)

    dataset = ['BirdVox-DCASE-20k.csv', 'ff1010bird.csv', 'warblrb10k.csv']

    labels_dict = {}
    for n in range(len(dataset)):
        labels_list = csv.reader(open(LABELPATH + dataset[n], 'r'))
        next(labels_list)
        for k, r, v in labels_list:
            labels_dict[r + '/' + k + '.wav'] = v

    while True:
        image_index = (image_index + 1) % len(filenames)

        # if shuffle and image_index = 0
        # shuffling filelist
        if shuffle == True and image_index == 0:
            random.shuffle(filenames)

        file_id = filenames[image_index].rstrip()

        if batch_index == 0:
            # re-initialize spectrogram and label batch
            spect_batch = np.zeros([1, spect.shape[0], spect.shape[1], 1])
            label_batch = np.zeros([1, 1])
            aug_spect_batch = np.zeros(
                [batch_size, spect.shape[0], spect.shape[1], 1])
            aug_label_batch = np.zeros([batch_size, 1])

        if features == 'h5':
            hf = h5py.File(SPECTPATH + file_id + '.h5', 'r')
            imagedata = hf.get('features')
            imagedata = np.array(imagedata)
            hf.close()
            # normalizing intensity values of spectrogram from [-15.0966 to 2.25745] to [0 to 1] range
            imagedata = (imagedata + 15.0966) / (15.0966 + 2.25745)
        elif features == 'mfc':
            htk_reader = HTKFile()
            htk_reader.load(SPECTPATH + file_id[:-4] + '.mfc')
            imagedata = np.array(htk_reader.data)
            imagedata = imagedata / 17.0

        # processing files with shapes other than expected shape in warblr dataset

        if imagedata.shape[0] != expected_shape[0]:
            old_imagedata = imagedata
            imagedata = np.zeros(expected_shape)

            if old_imagedata.shape[0] < expected_shape[0]:

                diff_in_frames = expected_shape[0] - old_imagedata.shape[0]
                if diff_in_frames < expected_shape[0] / 2:
                    imagedata = np.vstack((old_imagedata, old_imagedata[range(
                        old_imagedata.shape[0] - diff_in_frames,
                        old_imagedata.shape[0])]))

                elif diff_in_frames > expected_shape[0] / 2:
                    count = np.floor(expected_shape[0] /
                                     old_imagedata.shape[0])
                    remaining_diff = (expected_shape[0] -
                                      old_imagedata.shape[0] * int(count))
                    imagedata = np.vstack(([old_imagedata] * int(count)))
                    imagedata = np.vstack((imagedata, old_imagedata[range(
                        old_imagedata.shape[0] - remaining_diff,
                        old_imagedata.shape[0])]))

            elif old_imagedata.shape[0] > expected_shape[0]:
                diff_in_frames = old_imagedata.shape[0] - expected_shape[0]

                if diff_in_frames < expected_shape[0] / 2:
                    imagedata[range(0, diff_in_frames + 1), :] = np.mean(
                        np.array([
                            old_imagedata[range(0, diff_in_frames + 1), :],
                            old_imagedata[range(
                                old_imagedata.shape[0] - diff_in_frames -
                                1, old_imagedata.shape[0]), :]
                        ]),
                        axis=0)
                    imagedata[range(diff_in_frames +
                                    1, expected_shape[0]), :] = old_imagedata[
                                        range(diff_in_frames + 1,
                                              expected_shape[0])]

                elif diff_in_frames > expected_shape[0] / 2:
                    count = int(
                        np.floor(old_imagedata.shape[0] / expected_shape[0]))
                    remaining_diff = (old_imagedata.shape[0] -
                                      expected_shape[0] * count)
                    for index in range(0, count):
                        imagedata[range(0, expected_shape[0]), :] = np.sum(
                            [
                                imagedata, old_imagedata[range(
                                    index * expected_shape[0],
                                    (index + 1) * expected_shape[0])]
                            ],
                            axis=0) / count
                        imagedata[range(0, remaining_diff), :] = np.mean(
                            np.array([
                                old_imagedata[range(
                                    old_imagedata.shape[0] -
                                    remaining_diff, old_imagedata.shape[0]
                                ), :], imagedata[range(0, remaining_diff), :]
                            ]),
                            axis=0)

        imagedata = np.reshape(imagedata,
                               (1, imagedata.shape[0], imagedata.shape[1], 1))

        spect_batch[0, :, :, :] = imagedata
        label_batch[0, :] = labels_dict[file_id]

        gen_img = datagen.flow(imagedata,
                               label_batch[0, :],
                               batch_size=1,
                               shuffle=False,
                               save_to_dir=None)
        aug_spect_batch[batch_index, :, :, :] = imagedata
        aug_label_batch[batch_index, :] = label_batch[0, :]
        batch_index += 1

        for n in range(AUGMENT_SIZE - 1):
            aug_spect_batch[batch_index, :, :, :], aug_label_batch[
                batch_index, :] = gen_img.next()
            batch_index += 1
            if batch_index >= batch_size:
                batch_index = 0
                inputs = [aug_spect_batch]
                outputs = [aug_label_batch]
                yield inputs, outputs
Beispiel #6
0
def main(session_key, config_file, segment_size, step_size):    
    # Get audiofilename
    audio_dir = "static/uploads/" + session_key + "/"
    for file_name in os.listdir(audio_dir):
        if file_name[0] != ".":
            audio_name = file_name
            break
    # Get full path
    audio_path = audio_dir + file_name

    # If mp3, convert to wav
    if audio_path[-3:] == "mp3":
        wav_audio = AudioSegment.from_mp3(audio_path)
        audio_path = audio_path[:-3:] + "wav" # set new audio_path
        wav_audio.export(audio_path, format="wav")
    
    # Get metadata
    loaded_sound = AudioSegment.from_wav(audio_path)
    audio_duration = len(loaded_sound)
    frame_rate = loaded_sound.frame_rate


    # If duration is longer than 1 hour, segment into chunks
    if audio_duration > 3600000:
        chunks = []
        chunk_start_time = 0
        while chunk_start_time * 1000 < audio_duration:
            subprocess.call(["sox", audio_path, audio_dir + str(int((chunk_start_time / 3600)+1)) + ".wav", "trim", str(chunk_start_time), "3600"])
            chunks.append(audio_dir + str(int((chunk_start_time / 3600)+1)) + ".wav")
            chunk_start_time += 3600
    else:
        chunks = [audio_path]


    # Create dir for ouput and set filenames
    output_dir = "static/data/" + session_key + "/"
    subprocess.call(["mkdir", output_dir])
    output_path = output_dir + audio_name.split(".")[0] + ".mfcc.htk"

    if config_file == "spectrogram":
        waveform = wavfile.read(audio_path)[1]
        print(frame_rate)
        print(segment_size)
        print(int(frame_rate*segment_size))
        f, t, Sxx = signal.spectrogram(waveform, fs=frame_rate, nperseg=int(frame_rate*(segment_size/10000)), noverlap=0)
        Sxx_transpose = Sxx.transpose()
        print("scipy shape: ", Sxx_transpose.shape)

        # Reduce dimensionality to 39 with svd
        svd = TruncatedSVD(n_components=39)
        result = svd.fit_transform(Sxx_transpose)
        print("scipy shape2: ", result.shape)
    else:
        # Prepend path to config file
        config_file = config_dir + config_file

        # Update config file with segment- and steplength, divided by 1000 to get second-format
        update_config(config_file, str(segment_size/10000), str(step_size/10000))

        # Run opensmile to output features in output dir
        subprocess.call([smilextract, "-C", config_file, "-I", audio_path, "-O", output_path])

        # Read file, and return formatted data
        htk_reader = HTKFile()
        htk_reader.load(output_path)
        result = np.array(htk_reader.data)
    
    # Flatten concatenate ten vectors at a time, resulting in 39*10 dimensionality per snippet
    new_result = []
    temp_list = []
    for vec in result:
        temp_list.append(vec)
        if len(temp_list) == 10:
            new_result.append(np.concatenate(tuple(temp_list), axis=0))
            temp_list = []
    result = np.array(new_result)
    
    # Run data through t-SNE
    tsne = TSNE(n_components=2, perplexity=25)#, random_state=None)
    Y1 = convert_range(tsne.fit_transform(result))
    print("t-SNE done")

    # Run data through PCA
    pca = PCA(n_components=2)
    Y2 = convert_range(pca.fit_transform(result))
    print("PCA done")

    # Run data through SOM
    som = True
    if som:
        print("SOM-grid-size: ", int(len(result)**0.5))
        mapsize = [int(len(result)**0.5), int(len(result)**0.5)]
        if mapsize[0] > 100:
            mapsize = [100, 100]
        som = sompy.SOMFactory.build(result, mapsize, mask=None, mapshape='planar', lattice='rect', normalization='var', initialization='pca', neighborhood='gaussian', training='batch', name='sompy')  # this will use the default parameters, but i can change the initialization and neighborhood methods
        som.train(n_job=1, verbose='info')  # verbose='debug' will print more, and verbose=None wont print anything
        som_output = np.array(np.array([np.array(np.unravel_index(int(bmu), (mapsize[0],mapsize[0]))) for bmu in som._bmu[0]]))
        Y3 = convert_range(som_output)
        print("SOM done")
    else:
        Y3 = convert_range(np.array([np.array([random.randint(-50, 50), random.randint(-50, 50)]) for i in range(len(Y2))]))

    # Run data through UMAP
    run_umap = True
    if run_umap:
        Y4 = convert_range(umap.UMAP().fit_transform(result))
        print("UMAP done")
    else:
        Y4 = convert_range(np.array([np.array([random.randint(-50, 50), random.randint(-50, 50)]) for i in range(len(Y2))]))

    # Run data through isomap
    IM = Isomap(n_components=2)
    Y5 = convert_range(IM.fit_transform(result))
    print("Isomap done")

    # Experiment with autoencoder, bad results so commented for now
    # Run data through autoencoder
    # ae = False
    # if ae:
    #     Y5 = convert_range(AE(result))
    # else:
    #     Y5 = convert_range(np.array([np.array([random.randint(-50, 50), random.randint(-50, 50)]) for i in range(len(Y2))]))
    # print("Autoencoder done")



    # K-means on raw features
    kmeans2 = KMeans(n_clusters=2, random_state=0).fit(result)
    print("kmeans2 done")
    kmeans3 = KMeans(n_clusters=3, random_state=0).fit(result)
    print("kmeans3 done")
    kmeans4 = KMeans(n_clusters=4, random_state=0).fit(result)
    print("kmeans4 done")
    kmeans5 = KMeans(n_clusters=5, random_state=0).fit(result)
    print("kmeans5 done")
    kmeans6 = KMeans(n_clusters=6, random_state=0).fit(result)
    print("kmeans6 done")
    kmeans7 = KMeans(n_clusters=7, random_state=0).fit(result)
    print("kmeans7 done")
    kmeans8 = KMeans(n_clusters=8, random_state=0).fit(result)
    print("kmeans8 done")
    kmeans20 = KMeans(n_clusters=20, random_state=0).fit(result)
    print("kmeans20 done")


    # Format t-SNE output to correct dictionary format
    data = []
    i = 0
    for coord1, coord2, coord3, coord4, coord5, cluster_index2, cluster_index3, cluster_index4, cluster_index5, cluster_index6, cluster_index7, cluster_index8, cluster_index20 in zip(Y1, Y2, Y3, Y4, Y5, kmeans2.labels_, kmeans3.labels_, kmeans4.labels_, kmeans5.labels_, kmeans6.labels_, kmeans7.labels_, kmeans8.labels_, kmeans20.labels_):
        data.append({
            "id":i, 
            "tsneX":float(coord1[0]), 
            "tsneY":float(coord1[1]), 
            "pcaX":float(coord2[0]), 
            "pcaY":float(coord2[1]), 
            "somX":float(coord3[0]), 
            "somY":float(coord3[1]), 
            "umapX":float(coord4[0]), 
            "umapY":float(coord4[1]), 
            "aeX":float(coord5[0]), 
            "aeY":float(coord5[1]), 
            "start":int(i*step_size), 
            "active":1, 
            "color":"black", 
            "kcolor2":color_dict[str(cluster_index2)], 
            "kcolor3":color_dict[str(cluster_index3)], 
            "kcolor4":color_dict[str(cluster_index4)], 
            "kcolor5":color_dict[str(cluster_index5)], 
            "kcolor6":color_dict[str(cluster_index6)], 
            "kcolor7":color_dict[str(cluster_index7)], 
            "kcolor8":color_dict[str(cluster_index8)],
            "kcolor20":color_dict[str(cluster_index20)]})
        #data.append({"id":i, "tsneX":random.randint(1,99), "tsneY":random.randint(1,99), "pcaX":random.randint(1,99), "pcaY":random.randint(1,99), "start":int(i*step_size), "active":1, "color":"black"})
        i+=1

    # Save data as csv to be able to load later
    keys = data[0].keys()
    with open(output_dir + "data.csv", 'w') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(data)

    # Save metadata as csv to be able to load later
    metadata = [{"audio_duration":audio_duration, "audio_path":audio_path, "segment_size":segment_size, "step_size":step_size, "chunks":",".join(chunks)}]
    keys = metadata[0].keys()
    with open(output_dir + "metadata.csv", 'w') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(metadata)
Beispiel #7
0
def retrain(valid_points, session_key, old_session_key, segment_size, step_size):    
    # Get audiofilename
    audio_dir = "static/uploads/" + session_key + "/"
    for file_name in os.listdir(audio_dir):
        if file_name[0] != ".":
            audio_name = file_name
            break
    # Get full path
    audio_path = audio_dir + file_name

    # If mp3, convert to wav
    if audio_path[-3:] == "mp3":
        wav_audio = AudioSegment.from_mp3(audio_path)
        audio_path = audio_path[:-3:] + "wav" # set new audio_path
        wav_audio.export(audio_path, format="wav")
    
    # Get metadata
    audio_duration = len(AudioSegment.from_wav(audio_path))

    # Create dir for ouput and set filenames
    output_dir = "static/data/" + session_key + "/"
    subprocess.call(["mkdir", output_dir])

    # Copy audio
    path_to_old_htk = "static/data/" + old_session_key + "/" + audio_name.split(".")[0] + ".mfcc.htk"
    path_to_new_htk = "static/data/" + session_key + "/" + audio_name.split(".")[0] + ".mfcc.htk"
    subprocess.call(["cp", path_to_old_htk, path_to_new_htk])

    # Read file, and return formatted data
    htk_reader = HTKFile()
    htk_reader.load(path_to_old_htk)
    result = np.array(htk_reader.data)
    new_result = []
    
    valid_points_indexes = [i[0] for i in valid_points[1:]]
    start_times = [i[1] for i in valid_points[1:]]
    colors = [i[2] for i in valid_points[1:]]
    for i, line in enumerate(result):
        if i in valid_points_indexes:
            new_result.append(line)

    new_result = np.array(new_result)
    
    # Run data through t-SNE
    tsne = TSNE(n_components=2, perplexity=25)#, random_state=None)
    Y1 = convert_range(tsne.fit_transform(new_result))
    print("t-SNE done")

    # Run data through PCA
    pca = PCA(n_components=2)
    Y2 = convert_range(pca.fit_transform(new_result))
    print("PCA done")

    # Run data through SOM
    som = True
    if som:
        print("SOM-grid-size: ", int(len(new_result)**0.5))
        mapsize = [int(len(new_result)**0.5), int(len(new_result)**0.5)]
        if mapsize[0] > 100:
            mapsize = [100, 100]
        som = sompy.SOMFactory.build(new_result, mapsize, mask=None, mapshape='planar', lattice='rect', normalization='var', initialization='pca', neighborhood='gaussian', training='batch', name='sompy')  # this will use the default parameters, but i can change the initialization and neighborhood methods
        som.train(n_job=1, verbose='info')  # verbose='debug' will print more, and verbose=None wont print anything
        #som_output = np.array([np.array([0, int(bmu)]) if int(bmu) < 10 else np.array([int(str(bmu)[0]), int(str(bmu)[1])]) for bmu in som._bmu[0]])
        som_output = np.array(np.array([np.array(np.unravel_index(int(bmu), (mapsize[0],mapsize[0]))) for bmu in som._bmu[0]]))
        Y3 = convert_range(som_output)
        print("SOM done")
    else:
        Y3 = convert_range(np.array([np.array([random.randint(-50, 50), random.randint(-50, 50)]) for i in range(len(Y2))]))

    # Run data through UMAP
    run_umap = True
    if run_umap:
        Y4 = convert_range(umap.UMAP().fit_transform(new_result))
        print("UMAP done")
    else:
        Y4 = convert_range(np.array([np.array([random.randint(-50, 50), random.randint(-50, 50)]) for i in range(len(Y2))]))

    # Run data through autoencoder
    ae = False
    if ae:
        Y5 = convert_range(AE(result))
    else:
        Y5 = convert_range(np.array([np.array([random.randint(-50, 50), random.randint(-50, 50)]) for i in range(len(Y2))]))
    print("Autoencoder done")

    # K-means on raw features
    kmeans2 = KMeans(n_clusters=2, random_state=0).fit(new_result)
    print("kmeans2 done")
    kmeans3 = KMeans(n_clusters=3, random_state=0).fit(new_result)
    print("kmeans3 done")
    kmeans4 = KMeans(n_clusters=4, random_state=0).fit(new_result)
    print("kmeans4 done")
    kmeans5 = KMeans(n_clusters=5, random_state=0).fit(new_result)
    print("kmeans5 done")
    kmeans6 = KMeans(n_clusters=6, random_state=0).fit(new_result)
    print("kmeans6 done")
    kmeans7 = KMeans(n_clusters=7, random_state=0).fit(new_result)
    print("kmeans7 done")
    kmeans8 = KMeans(n_clusters=8, random_state=0).fit(new_result)
    print("kmeans8 done")

    # Format t-SNE output to correct dictionary format
    data = []
    i = 0
    for coord1, coord2, coord3, coord4, coord5, start_time, color, cluster_index2, cluster_index3, cluster_index4, cluster_index5, cluster_index6, cluster_index7, cluster_index8 in zip(Y1, Y2, Y3, Y4, Y5, start_times, colors, kmeans2.labels_, kmeans3.labels_, kmeans4.labels_, kmeans5.labels_, kmeans6.labels_, kmeans7.labels_, kmeans8.labels_):
        data.append({
            "id":i, 
            "tsneX":float(coord1[0]), 
            "tsneY":float(coord1[1]), 
            "pcaX":float(coord2[0]), 
            "pcaY":float(coord2[1]), 
            "somX":float(coord3[0]), 
            "somY":float(coord3[1]), 
            "umapX":float(coord4[0]), 
            "umapY":float(coord4[1]),
            "aeX":float(coord5[0]), 
            "aeY":float(coord5[1]), 
            "start":start_time, 
            "active":1, 
            "color":color, 
            "kcolor2":color_dict[str(cluster_index2)], 
            "kcolor3":color_dict[str(cluster_index3)], 
            "kcolor4":color_dict[str(cluster_index4)], 
            "kcolor5":color_dict[str(cluster_index5)], 
            "kcolor6":color_dict[str(cluster_index6)], 
            "kcolor7":color_dict[str(cluster_index7)], 
            "kcolor8":color_dict[str(cluster_index8)]})
        #data.append({"id":i, "tsneX":random.randint(1,99), "tsneY":random.randint(1,99), "pcaX":random.randint(1,99), "pcaY":random.randint(1,99), "start":int(i*step_size), "active":1, "color":"black"})
        i+=1

    # Save data as csv to be able to load later
    keys = data[0].keys()
    with open(output_dir + "data.csv", 'w') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(data)

    # Save metadata as csv to be able to load later
    metadata = [{"audio_duration":audio_duration, "audio_path":audio_path, "segment_size":segment_size, "step_size":step_size}]
    keys = metadata[0].keys()
    with open(output_dir + "metadata.csv", 'w') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(metadata)
def dataval_generator(filelistpath, batch_size=32, shuffle=False):
    batch_index = 0
    image_index = -1

    filelist = open(filelistpath[0], 'r')
    filenames = filelist.readlines()
    filelist.close()

    labels_dict = {}
    for n in range(len(dataset)):
        labels_list = csv.reader(open(LABELPATH + dataset[n], 'r'))
        next(labels_list)
        for k, r, v in labels_list:
            labels_dict[r + '/' + k + '.wav'] = v

    while True:
        image_index = (image_index + 1) % len(filenames)

        # if shuffle and image_index = 0
        # shuffling filelist
        if shuffle == True and image_index == 0:
            random.shuffle(filenames)

        file_id = filenames[image_index].rstrip()

        if batch_index == 0:
            # re-initialize spectrogram and label batch
            spect_batch1 = np.zeros(
                [batch_size, spect1.shape[0], spect1.shape[1], 1])
            spect_batch2 = np.zeros(
                [batch_size, spect2.shape[0], spect2.shape[1], 1])
            spect_batch3 = np.zeros(
                [batch_size, spect3.shape[0], spect3.shape[1], 1])
            label_batch = np.zeros([batch_size, 1])

        ####### feature matrix for network 1 ######################3
        if features1 == 'h5':
            hf = h5py.File(SPECTPATH1 + file_id[:-4] + '.h5', 'r')
            imagedata1 = hf.get('features')
            imagedata1 = np.array(imagedata1)
            hf.close()
            imagedata1 = (imagedata1 + 15.0966) / (15.0966 + 2.25745)

        elif features1 == 'mfc':
            htk_reader = HTKFile()
            htk_reader.load(SPECTPATH1 + file_id[:-8] + '.mfc')
            imagedata1 = np.array(htk_reader.data)
            imagedata1 = imagedata1 / 18.0

        imagedata1 = correct_dimensions(imagedata1, expected_shape1)
        imagedata1 = np.reshape(
            imagedata1, (1, imagedata1.shape[0], imagedata1.shape[1], 1))
        spect_batch1[batch_index, :, :, :] = imagedata1

        ####### feature matrix for network 2 ######################
        if features2 == 'h5':
            hf = h5py.File(SPECTPATH2 + file_id[:-4] + '.h5', 'r')
            imagedata2 = hf.get('features')
            imagedata2 = np.array(imagedata2)
            hf.close()
            imagedata2 = (imagedata2 + 15.0966) / (15.0966 + 2.25745)

        elif features2 == 'mfc':
            htk_reader = HTKFile()
            htk_reader.load(SPECTPATH2 + file_id[:-8] + '.mfc')
            imagedata2 = np.array(htk_reader.data)
            imagedata2 = imagedata2 / 18.0

        imagedata2 = correct_dimensions(imagedata2, expected_shape2)
        imagedata2 = np.reshape(
            imagedata2, (1, imagedata2.shape[0], imagedata2.shape[1], 1))
        spect_batch2[batch_index, :, :, :] = imagedata2

        ####### feature matrix for network 3 ######################
        if features3 == 'h5':
            hf = h5py.File(SPECTPATH3 + file_id[:-4] + '.h5', 'r')
            imagedata3 = hf.get('features')
            imagedata3 = np.array(imagedata3)
            hf.close()
            imagedata3 = (imagedata3 + 15.0966) / (15.0966 + 2.25745)

        elif features3 == 'mfc':
            htk_reader = HTKFile()
            htk_reader.load(SPECTPATH3 + file_id[:-8] + '.mfc')
            imagedata3 = np.array(htk_reader.data)
            imagedata3 = imagedata3 / 18.0

        imagedata3 = correct_dimensions(imagedata3, expected_shape3)
        imagedata3 = np.reshape(
            imagedata3, (1, imagedata3.shape[0], imagedata3.shape[1], 1))
        spect_batch3[batch_index, :, :, :] = imagedata3

        ########-----------------------------------###################

        batch_index += 1

        if batch_index >= batch_size:
            batch_index = 0
            inputs1 = spect_batch1
            inputs2 = spect_batch2
            inputs3 = spect_batch3
            inp = [inputs1, inputs2, inputs3]

            yield inp
Beispiel #9
0
def datatest_generator(filelistpath, batch_size=32, shuffle=False):
    batch_index = 0
    image_index = -1

    filelist = open(filelistpath, 'r')
    filenames = filelist.readlines()
    filelist.close()

    # read labels and save in a dict
    labels_dict = {}
    labels_dict = {}
    for n in range(len(dataset)):
        labels_list = csv.reader(open(LABELPATH + dataset[n], 'r'))
        next(labels_list)
        for k, r, v in labels_list:
            labels_dict[r + '/' + k] = v

    while True:
        image_index = (image_index + 1) % len(filenames)

        # if shuffle and image_index = 0
        # shuffling filelist
        if shuffle == True and image_index == 0:
            random.shuffle(filenames)

        file_id = filenames[image_index].rstrip()

        if batch_index == 0:
            # re-initialize spectrogram and label batch
            spect_batch = np.zeros(
                [batch_size, spect.shape[0], spect.shape[1], 1])
            label_batch = np.zeros([batch_size, 1])

        # load features with the select format
        if features == 'h5':
            hf = h5py.File(SPECTPATH + file_id + '.h5',
                           'r')  #[:-4]for evaluation dataset
            imagedata = hf.get('features')
            imagedata = np.array(imagedata)
            hf.close()
            imagedata = (imagedata + 15.0966) / (15.0966 + 2.25745)
        elif features == 'npy':
            imagedata = np.load(SPECTPATH + file_id + '.npy')
            if max_value != 0 and min_value != 0:
                imagedata = (imagedata - min_value) / (max_value - min_value)
        elif features == 'mfc':
            htk_reader = HTKFile()
            htk_reader.load(SPECTPATH + file_id[:-8] + '.mfc')
            imagedata = np.array(htk_reader.data)
            imagedata = imagedata / 17.0

        # processing files with shapes other than expected shape in warblr dataset
        if imagedata.shape[0] != expected_shape[0]:
            old_imagedata = imagedata
            imagedata = np.zeros(expected_shape)

            if old_imagedata.shape[0] < expected_shape[0]:

                diff_in_frames = expected_shape[0] - old_imagedata.shape[0]
                if diff_in_frames < expected_shape[0] / 2:
                    imagedata = np.vstack((old_imagedata, old_imagedata[range(
                        old_imagedata.shape[0] - diff_in_frames,
                        old_imagedata.shape[0])]))

                elif diff_in_frames > expected_shape[0] / 2:
                    count = np.floor(expected_shape[0] /
                                     old_imagedata.shape[0])
                    remaining_diff = (expected_shape[0] -
                                      old_imagedata.shape[0] * int(count))
                    imagedata = np.vstack(([old_imagedata] * int(count)))
                    imagedata = np.vstack((imagedata, old_imagedata[range(
                        old_imagedata.shape[0] - remaining_diff,
                        old_imagedata.shape[0])]))

            elif old_imagedata.shape[0] > expected_shape[0]:
                diff_in_frames = old_imagedata.shape[0] - expected_shape[0]

                if diff_in_frames < expected_shape[0] / 2:
                    imagedata[range(0, diff_in_frames + 1), :] = np.mean(
                        np.array([
                            old_imagedata[range(0, diff_in_frames + 1), :],
                            old_imagedata[range(
                                old_imagedata.shape[0] - diff_in_frames -
                                1, old_imagedata.shape[0]), :]
                        ]),
                        axis=0)
                    imagedata[range(diff_in_frames +
                                    1, expected_shape[0]), :] = old_imagedata[
                                        range(diff_in_frames + 1,
                                              expected_shape[0])]

                elif diff_in_frames > expected_shape[0] / 2:
                    count = int(
                        np.floor(old_imagedata.shape[0] / expected_shape[0]))
                    remaining_diff = (old_imagedata.shape[0] -
                                      expected_shape[0] * count)
                    for index in range(0, count):
                        imagedata[range(0, expected_shape[0]), :] = np.sum(
                            [
                                imagedata, old_imagedata[range(
                                    index * expected_shape[0],
                                    (index + 1) * expected_shape[0])]
                            ],
                            axis=0) / count
                        imagedata[range(0, remaining_diff), :] = np.mean(
                            np.array([
                                old_imagedata[range(
                                    old_imagedata.shape[0] -
                                    remaining_diff, old_imagedata.shape[0]
                                ), :], imagedata[range(0, remaining_diff), :]
                            ]),
                            axis=0)

        imagedata = np.reshape(imagedata,
                               (1, imagedata.shape[0], imagedata.shape[1], 1))

        spect_batch[batch_index, :, :, :] = imagedata

        batch_index += 1

        # create the batch with the features
        if batch_index >= batch_size:
            batch_index = 0
            inputs = [spect_batch]
            yield inputs
Beispiel #10
0
                win_shift=80)

#here we load the raw audio file
sig = mfcc.load_raw_signal('file.raw')

#here we calculate the MFCC+energy, deltas and acceleration coefficients
feat = mfcc.get_feats(sig)
delta = mfcc.get_delta(feat, 2)
acc = mfcc.get_delta(delta, 2)

#here we merge the MFCCs and deltas together to get 39 features
feat = np.hstack((feat, delta, acc))

#here we use HTK to calculate the same thing
#you can comment this line if you don't have HTK installed
HCopy('hcopy8k.conf', 'file.raw', 'file8k.htk')

#here we load the features generate by the command above
htk = HTKFile()
htk.load('file8k.htk')

#calculating the difference between features
diff = feat - htk.data

#computing and dsiplaying the maximum difference between the two methods
print("Maximum difference: {}".format(np.max(np.abs(diff))))

#displaying the difference
P.pcolormesh(diff.T)
P.savefig('diff.png')
Beispiel #11
0
def data_generator(filelistpath, batch_size=16, shuffle=False):
    batch_index = 0
    image_index = -1
    filelist = open(filelistpath, 'r')
    filenames = filelist.readlines()
    filelist.close()

    # shuffling filelist
    if shuffle == True:
        random.shuffle(filenames)

    dataset = ['BirdVox-DCASE-20k.csv', 'ff1010bird.csv', 'warblrb10k.csv']

    labels_dict = {}
    for n in range(len(dataset)):
        labels_list = csv.reader(open(LABELPATH + dataset[n], 'r'))
        next(labels_list)
        for k, r, v in labels_list:
            labels_dict[r + '/' + k + '.wav'] = v

    while True:
        image_index = (image_index + 1) % len(filenames)

        # if shuffle and image_index = 0
        # shuffling filelist
        if shuffle == True and image_index == 0:
            random.shuffle(filenames)

        file_id = filenames[image_index].rstrip()

        if batch_index == 0:
            # re-initialize spectrogram and label batch
            spect_batch = np.zeros([1, spect.shape[0], spect.shape[1], 1])
            label_batch = np.zeros([1, 1])
            aug_spect_batch = np.zeros(
                [batch_size, spect.shape[0], spect.shape[1], 1])
            aug_label_batch = np.zeros([batch_size, 1])

        if features == 'h5':
            hf = h5py.File(SPECTPATH + file_id + '.h5', 'r')
            imagedata = hf.get('features')
            imagedata = np.array(imagedata)
            hf.close()
            # normalizing intensity values of spectrogram from [-15.0966 to 2.25745] to [0 to 1] range
            imagedata = (imagedata + 15.0966) / (15.0966 + 2.25745)
        elif features == 'mfc':
            htk_reader = HTKFile()
            htk_reader.load(SPECTPATH + file_id[:-4] + '.mfc')
            imagedata = np.array(htk_reader.data)
            imagedata = imagedata / 17.0

        imagedata = np.reshape(imagedata,
                               (1, imagedata.shape[0], imagedata.shape[1], 1))

        spect_batch[0, :, :, :] = imagedata
        label_batch[0, :] = labels_dict[file_id]

        gen_img = datagen.flow(imagedata,
                               label_batch[0, :],
                               batch_size=1,
                               shuffle=False,
                               save_to_dir=None)
        aug_spect_batch[batch_index, :, :, :] = imagedata
        aug_label_batch[batch_index, :] = label_batch[0, :]
        batch_index += 1

        for n in range(AUGMENT_SIZE - 1):
            aug_spect_batch[batch_index, :, :, :], aug_label_batch[
                batch_index, :] = gen_img.next()
            batch_index += 1
            if batch_index >= batch_size:
                batch_index = 0
                inputs = [aug_spect_batch]
                outputs = [aug_label_batch]
                yield inputs, outputs
Beispiel #12
0
#setting up the main class
mfcc=MFCC_HTK(filter_file='filter.csv')

#here we load the raw audio file
sig = mfcc.load_raw_signal('file.raw')

#here we calculate the MFCC+energy, deltas and acceleration coefficients
feat = mfcc.get_feats(sig)
delta = mfcc.get_delta(feat,2)
acc = mfcc.get_delta(delta,2)

#here we merge the MFCCs and deltas together to get 39 features
feat = np.hstack((feat,delta,acc))

#here we use HTK to calculate the same thing
#you can comment this line if you don't have HTK installed
HCopy('hcopy.conf','file.raw','file.htk')

#here we load the features generate by the command above
htk=HTKFile()
htk.load('file.htk')

#calculating the difference between features
diff=feat-htk.data

#computing and dsiplaying the maximum difference between the two methods
print("Maximum difference: {}".format(np.max(np.abs(diff))))

#displaying the difference
P.pcolormesh(diff.T)
P.savefig('diff.png')
Beispiel #13
0
import numpy as np
import os
from HTK import HTKFile
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences

# same as the labels in answer.mlf
label_list = ["ling", "yi", "er", "san", "si", "wu", "liu", "qi", "ba", "jiu"]
labels = {}
MFCC = HTKFile()

# load testing data
test_x = []
input_lengths = []
filelist = os.listdir(os.getcwd() + "/MFCC/testing")
filelist.sort()
for filename in filelist:
    MFCC.load("MFCC/testing/" + filename)
    test_x.append(np.array(MFCC.data))
    input_lengths.append(len(np.array(MFCC.data)))

test_x = pad_sequences(test_x, dtype='float', padding='post')
input_lengths = np.array(input_lengths)

test_model = load_model("model.h5")
pred = test_model.predict(test_x)

# decode CTC and output answer
with open("result/result_nn.mlf", "w") as f:
    f.write("#!MLF!#\n")
    for i in range(len(filelist)):
Beispiel #14
0
    "liN": 0,
    "#i": 1,
    "#er": 2,
    "san": 3,
    "sy": 4,
    "#u": 5,
    "liou": 6,
    "qi": 7,
    "ba": 8,
    "jiou": 9,
    "blank": 10
}

# load labels
labels = {}
MFCC = HTKFile()
with open("labels/Clean08TR.mlf") as f:
    f.readline()
    key = None
    value = []
    for line in f:
        if line[0] == '\"':
            key = line[4:-6]
        elif line[0] == '.':
            labels[key] = value
            value = []
        elif line != "sil\n":
            value.append(label_map[line[:-1]])

# load training data
train_x = []