Example #1
0
    def benchmarkOrder(self, folder, maxOrder, train=0.8, saveFig=False):

        np.random.seed(0)
        # We get all the midi files
        files = []
        for filename in glob(folder + '/**', recursive=True):
            if filename[filename.rfind("."):] in [".mid", ".midi"]:
                files.append(filename)

        np.random.shuffle(files)

        print("____ PROCESSING THE DATA")

        trainData = data.data()
        trainData.addFiles(files[:int(train * len(files))], augmentation=True)

        testData = data.data()
        testData.addFiles(files[int(train * len(files)):], augmentation=False)

        retMeans = np.zeros(maxOrder)
        retStd = np.zeros(maxOrder)

        print("There is", trainData.getSize(), "scores for training")

        for order in range(1, maxOrder):
            self.cleanWeights(order=order)
            self.train(trainData)

            tmp = self.getLikelihoodfromData(testData)
            means = np.zeros(testData.getSize())

            for i in range(len(tmp)):
                means[i] = np.mean(tmp[i])

            retMeans[order] = np.mean(means)
            retStd[order] = np.std(means)

        plt.plot(retMeans)
        plt.ylabel('Likelihood over dataset')
        plt.xlabel('Max order of the model')
        plt.fill_between(range(len(retMeans)),
                         retMeans + retStd,
                         retMeans - retStd,
                         alpha=.5)
        if saveFig is False:
            plt.show()
        else:
            plt.savefig("Benchmark.eps")

        print("TRAIN DATA")
        print(files[:int(train * len(files))])

        for i in range(len(means)):
            print(files[int(train * len(files)):][i], "->", means[i])

        return (retMeans, retStd)
Example #2
0
    def benchmarkQuantization(
            self,
            folder,
            quantizations=[1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 24, 32, 64],
            train=0.8):

        # We get all the midi files
        files = []
        for filename in glob(folder + '/**', recursive=True):
            if filename[filename.rfind("."):] in [".mid", ".midi"]:
                files.append(filename)

        np.random.shuffle(files)

        print("____ PROCESSING THE DATA")

        retMeans = np.zeros(len(quantizations))
        retStd = np.zeros(len(quantizations))
        k = 0
        for quantization in quantizations:

            trainData = data.data(quantization=quantization)
            trainData.addFiles(files[:int(train * len(files))])

            testData = data.data(quantization=quantization)
            testData.addFiles(files[int(train * len(files)):],
                              augmentation=False)

            print(trainData.getData("length")[0])

            self.cleanWeights(order=self.maxOrder)
            self.train(trainData)

            tmp = self.getLikelihoodfromData(testData)
            means = np.zeros(testData.getSize())

            for i in range(len(tmp)):
                means[i] = np.mean(tmp[i])

            retMeans[k] = np.mean(means)
            retStd[k] = np.std(means)
            k += 1

        plt.plot(retMeans)
        plt.xticks(np.arange(len(retMeans)), quantizations)
        plt.ylabel('Likelihood over dataset')
        plt.xlabel('Quantization')
        plt.fill_between(range(len(retMeans)),
                         retMeans + retStd,
                         retMeans - retStd,
                         alpha=.5)
        plt.show()

        return (retMeans, retStd)
Example #3
0
def Train(folder, k_fold=5, quantization=24, maxOrder=20, time_representation=False, \
    zero_padding=True, long_term_only=False, short_term_only=False):

    if folder[-1] == "/":
        folder = folder[:-1]

    if os.path.isfile("models/" + str(folder[folder.rfind("/") + 1:]) +
                      "_quantization_" + str(quantization) + "_maxOrder_" +
                      str(maxOrder) + ".model"):
        print(
            "There is already a model saved for these data, would you like to train again? (y/N)\n"
        )
        rep = input("")
        while rep not in ["y", "Y", "n", "N", "", "\n"]:
            rep = input("We did not understand, please type again (y/N).")

        if rep.lower() == "y":
            pass
        else:
            return

    L = idyom.idyom(maxOrder=maxOrder)
    M = data.data(quantization=quantization)
    M.parse(folder)
    L.train(M)

    L.save("models/" + str(folder[folder.rfind("/") + 1:]) + "_quantization_" +
           str(quantization) + "_maxOrder_" + str(maxOrder) + ".model")
Example #4
0
def Train(folder, jump=False):

    L = idyom.idyom(jump=jump, maxOrder=100)
    M = data.data(quantization=24)
    M.parse(folder)
    L.train(M)

    L.save("models/jump_" + str(jump) + ".model")
Example #5
0
def cross_validation(folder, k_fold=10, maxOrder=20, quantization=24, time_representation=False, \
          zero_padding=True, long_term_only=False, short_term_only=False):
    """

	"""

    np.random.seed(0)

    Likelihoods = []

    files = []
    for filename in glob(folder + '/**', recursive=True):
        if filename[filename.rfind("."):] in [".mid", ".midi"]:
            files.append(filename)

    np.random.shuffle(files)

    if int(k_fold) == -1:
        k_fold = len(files)

    if int(k_fold) > len(files):
        raise ValueError(
            "Cannot process with k_fold greater than number of files. Please use -k options to specify a smaller k for cross validation."
        )

    k_fold = len(files) // int(k_fold)

    validationFiles = []

    for i in tqdm(range(math.ceil(len(files) / k_fold))):
        trainData = files[:i * k_fold] + files[(i + 1) * k_fold:]
        evalData = files[i * k_fold:(i + 1) * k_fold]

        # Our IDyOM
        L = idyom.idyom(maxOrder=maxOrder)
        M = data.data(quantization=quantization)
        M.addFiles(trainData)

        L.train(M)

        for file in evalData:
            tmp = L.getLikelihoodfromFile(file,
                                          long_term_only=long_term_only,
                                          short_term_only=short_term_only)
            for i in range(len(tmp)):
                if tmp[i] != tmp[i]:
                    tmp[i] = 1 / 30
            Likelihoods.append(np.mean(tmp))
            filename = file[file.rfind("/") + 1:file.rfind(".")]
            validationFiles.append(filename)

    return Likelihoods, validationFiles
Example #6
0
def checkDataSet(folder):
    """
	Function that check if the dataset is corrupted (contains duplicates).
	Does not delete automatically!
	"""

    files = []
    for filename in glob(folder + '/**', recursive=True):
        if filename[filename.rfind("."):] in [".mid", ".midi"]:
            files.append(filename)

    D = data.data(deleteDuplicates=False)
    D.addFiles(files)
    DATA = D.getData("pitch")

    delete = []
    delete_pitches = []

    for i in range(len(files)):
        for j in range(i, len(files)):
            if i != j and comparePitches(DATA[i], DATA[j]):

                print(files[i], "matches", files[j])

                # We recommand to delete the smallest one
                if len(DATA[i]) > len(DATA[j]):
                    for d in delete_pitches:
                        if comparePitches(d, DATA[i]):
                            delete.append(files[i])
                            delete_pitches.append(DATA[i])
                            break

                    delete.append(files[j])
                    delete_pitches.append(DATA[j])
                else:
                    for d in delete_pitches:
                        if comparePitches(d, DATA[j]):
                            delete.append(files[j])
                            delete_pitches.append(DATA[j])
                            break

                    delete.append(files[i])
                    delete_pitches.append(DATA[i])

    if len(delete) > 0:
        print(
            "We recommand you to delete the following files because they are duplicates:"
        )
        print(list(set(delete)))
    else:
        print("We did not find any duplicates.")
Example #7
0
    def getSurprisefromFile(self,
                            file,
                            zero_padding=False,
                            time_representation=False,
                            short_term_only=False,
                            long_term_only=False):
        """
		Return surprise(-log2(p)) over a score
		
		:param folder: file to compute surprise on 
		:param zero_padding: return surprise as spikes if True

		:type data: string
		:type zero_padding: bool

		:return: list of float

		"""

        probas, entropies = self.getLikelihoodfromFile(
            file,
            short_term_only=short_term_only,
            long_term_only=long_term_only)

        # We compute the surprise by using -log2(probas)
        probas = -np.log(probas + sys.float_info.epsilon) / np.log(2)

        if time_representation is False:
            return probas, entropies

        D = data.data()
        D.addFile(file)
        # We get the length of the notes
        lengths = D.getData("length")[0]

        surprise = []
        entropy = []
        for i in range(len(probas)):
            surprise.append(probas[i])
            entropy.append(entropies[i])
            for j in range(int(lengths[i])):
                if zero_padding:
                    surprise.append(0)
                    entropy.append(0)
                else:
                    surprise.append(probas[i])
                    entropy.append(entropies[i])

        return surprise, entropy
Example #8
0
def Train(folder, k_fold=5, quantization=24, maxOrder=20, time_representation=False, \
    zero_padding=True, long_term_only=False, short_term_only=False, viewPoints="both"):
    """
	Train a model with the passed parameters and then save it to the hardrive.
	"""

    if folder[-1] == "/":
        folder = folder[:-1]

    if viewPoints == "pitch":
        viewPoints_o = ["pitch"]
    elif viewPoints == "length":
        viewPoints_o = ["length"]
    elif viewPoints == "both":
        viewPoints_o = ["pitch", "length"]
    else:
        raise ValueError("We do not know these viewpoints ... ")

    if os.path.isfile("models/" + str(folder[folder.rfind("/") + 1:]) +
                      "_quantization_" + str(quantization) + "_maxOrder_" +
                      str(maxOrder) + "_viewpoints_" + str(viewPoints) +
                      ".model"):
        print(
            "There is already a model saved for these data, would you like to train again? (y/N)\n"
        )
        rep = input("")
        while rep not in ["y", "Y", "n", "N", "", "\n"]:
            rep = input("We did not understand, please type again (y/N).")

        if rep.lower() == "y":
            pass
        else:
            return

    preComputeEntropies = not (
        long_term_only or short_term_only
    )  # We only precompute if we need to combine short and long term models

    L = idyom.idyom(maxOrder=maxOrder, viewPoints=viewPoints_o)
    M = data.data(quantization=quantization)
    M.parse(folder, augment=True)
    L.train(M)

    L.save("models/" + str(folder[folder.rfind("/") + 1:]) + "_quantization_" +
           str(quantization) + "_maxOrder_" + str(maxOrder) + "_viewpoints_" +
           str(viewPoints) + ".model")
Example #9
0
def plotLikelihood(folder, k_fold=2):
    """
	Compare the likelihood between idyom model and jump model.
	"""

    likelihood1, files = cross_validation(folder, k_fold=k_fold, jump=True)

    print(likelihood1)
    print(files)

    plt.ylabel("Likelihood")
    plt.bar([0], [np.mean(likelihood1)], color="b", yerr=[np.std(likelihood1)])
    plt.show()

    print()
    print()
    print()

    print("Mean:", np.mean(likelihood1))
    print("Std:", np.std(likelihood1))

    M = data.data()
    M.parse(folder)
    dat, files2 = M.getScoresFeatures()

    dico = dict(zip(files, likelihood1))

    weights = []

    for file in files2:
        if file in dico:
            weights.append(500 * dico[file]**2)
        else:
            weights.append(0)

    plt.scatter(dat[0][:len(dat[1])], dat[1], s=weights)

    plt.title('Database')
    plt.xlabel('Average 1-note interval')
    plt.ylabel('Average note onset')

    plt.show()
Example #10
0
def cross_validation(folder,
                     k_fold=10,
                     maxOrder=20,
                     quantization=24,
                     jump=False):
    """

	"""

    np.random.seed(0)

    Likelihoods = []

    files = []
    for filename in glob(folder + '/**', recursive=True):
        if filename[filename.rfind("."):] in [".mid", ".midi"]:
            files.append(filename)

    np.random.shuffle(files)

    k_fold = len(files) // int(k_fold)

    validationFiles = []

    for i in range(len(files) // k_fold):
        trainData = files[:i * k_fold] + files[(i + 1) * k_fold:]
        evalData = files[i * k_fold:(i + 1) * k_fold]

        # Our IDyOM
        L = idyom.idyom(maxOrder=maxOrder, jump=jump)
        M = data.data(quantization=quantization)
        M.addFiles(trainData)
        L.train(M)

        for file in evalData:
            Likelihoods.append(np.mean(L.getLikelihoodfromFile(file)))
            validationFiles.append(file)

    return Likelihoods, validationFiles
Example #11
0
	l3[file] = np.nan_to_num(l3[file])
	l1[file] = np.nan_to_num(l1[file])

	likelihoods1.append(np.mean(l1[file]))
	likelihoods2.append(2**-np.mean(l2[file]))
	likelihoods3.append(2**-np.mean(l3[file]))

plt.bar([1,2,3], [np.mean(likelihoods1), np.mean(likelihoods2), np.mean(likelihoods3)], yerr=[np.std(likelihoods1), np.std(likelihoods2), np.std(likelihoods3)])
plt.savefig(folder+"comparisonsIDYOM_IDYOMpy_JUMP.eps")
plt.show()

compareLikelihoods(likelihoods2, likelihoods1, name=folder+"compareLikelihoodsIDyOMpy_VS_IDyOM")

# ploting in the music space

M = data.data()
M.parse("../", augment=False)

dat2, files4 = M.getScoresFeatures()

weights = []
colors = []

for file in range(len(likelihoods1)):
	weights.append(80000*abs(likelihoods1[file]-likelihoods2[file])**2)
	if likelihoods1[file]-likelihoods2[file] < 0:
		colors.append('coral')
	elif likelihoods1[file]-likelihoods2[file] > 0:
		colors.append('deepskyblue')
	else:
		colors.append('black')
Example #12
0
    def getDistributionsfromFile(self,
                                 file,
                                 threshold,
                                 short_term_only=False,
                                 long_term_only=False,
                                 normalization=True):
        """
		Return likelihood over a score
		
		:param folder: file to compute likelihood on 

		:type data: string

		:return: np.array(length)

		"""

        D = data.data()
        D.addFile(file)

        distribution = []

        for model in self.LTM:
            if model.viewPoint == "length":
                dat = D.getData(model.viewPoint)[0]

                STM = longTermModel.longTermModel(model.viewPoint,
                                                  maxOrder=20,
                                                  STM=True,
                                                  init=dat)

                for i in tqdm(range(1, len(dat))):
                    # we instanciate a Short Term Model for the current viewpoint

                    STM.train([dat[:i]], shortTerm=True)
                    predictions_LTM = model.getPrediction(dat[:i])
                    predictions_STM = STM.getPrediction(dat[:i])

                    durations = []
                    for duration in predictions_LTM:
                        if duration not in durations and predictions_LTM[
                                duration] != 0:
                            durations.append(duration)

                    for duration in predictions_STM:
                        if duration not in durations and predictions_STM[
                                duration] != 0:
                            durations.append(duration)

                    distribution_note = {}
                    for duration in durations:
                        if duration in predictions_LTM:
                            p1 = predictions_LTM[duration]
                            flag = True
                        else:
                            p1 = 1 / 30
                            flag = None
                        if duration in predictions_STM:
                            p2 = predictions_STM[duration]
                        else:
                            p2 = None

                        if self.stm and p2 is not None:
                            if flag is not None:
                                p = self.mergeProbas([p1, p2], [
                                    model.getRelativeEntropy(dat[:i]),
                                    STM.getRelativeEntropy(dat[:i])
                                ])
                            else:
                                p = p2
                        else:
                            p = p1

                        if long_term_only:
                            p = p1
                        if short_term_only:
                            p = p2
                            if p is None:
                                p = 1 / 30
                        distribution_note[duration] = p

                    distribution.append(distribution_note)

        ### Time Representation

        D = data.data()
        D.addFile(file)

        probas, entropies = self.getLikelihoodfromFile(
            file,
            short_term_only=short_term_only,
            long_term_only=short_term_only)

        # We compute the surprise by using -log2(probas)
        probas = -np.log(probas + sys.float_info.epsilon) / np.log(2)

        # We get the length of the notes
        lengths = D.getData("length")[0]

        ret = []
        for i in range(len(probas)):
            ret.append(probas[i])
            for j in range(int(lengths[i])):
                ret.append(0)

        notes_surprise = ret

        indexes = []
        probas = []
        current_index = 1
        for i in range(len(distribution)):
            sum_distribution = sum(distribution[i].values())
            keys = np.array(list(distribution[i])).astype(int)
            keys.sort()
            for duration in keys:
                duration = str(duration)
                if int(duration) < int(
                        lengths[i]
                ) and distribution[i][duration] / sum_distribution > threshold:
                    indexes.append(current_index + int(duration))
                    probas.append(distribution[i][duration] / sum_distribution)

                if normalization:
                    sum_distribution -= distribution[i][duration]
            current_index += int(lengths[i]) + 1

        missing_notes = np.zeros(len(notes_surprise))
        missing_notes[indexes] = probas

        plt.plot(notes_surprise)
        plt.plot(missing_notes)
        plt.legend(["surprise", "missing notes"])
        plt.show()

        return notes_surprise, missing_notes
Example #13
0
import sys 
sys.path.append('../')

from idyom import longTermModel
from idyom import data
from idyom import score
from idyom import idyom

import numpy as np
import matplotlib.pyplot as plt

L = idyom.idyom(maxOrder=20, jump=False, maxDepth=10)

M = data.data(quantization=6)

#M.parse("../dataset/")
#M.parse("../datasetprout/")
M.parse("../examples/dataBaseTest")

L.train(M)

L.sample([{"pitch": 74, "length": 24}])

s = L.generate(20)

print(s.getData())

s.plot()

s.writeToMidi("exGen.mid")
Example #14
0
def cross_validation(folder, k_fold=10, maxOrder=20, quantization=24, time_representation=False, \
          zero_padding=True, long_term_only=False, short_term_only=False,\
          viewPoints="both"):
    """
	Cross validate a unique folder using k-fold

	"""
    if viewPoints == "pitch":
        viewPoints_o = ["pitch"]
    elif viewPoints == "length":
        viewPoints_o = ["length"]
    elif viewPoints == "both":
        viewPoints_o = ["pitch", "length"]

    np.random.seed(0)

    ICs = []
    Entropies = []

    files = []
    for filename in glob(folder + '/**', recursive=True):
        if filename[filename.rfind("."):] in [".mid", ".midi"]:
            files.append(filename)

    np.random.shuffle(files)

    if int(k_fold) == -1:
        k_fold = len(files)

    if int(k_fold) > len(files):
        raise ValueError(
            "Cannot process with k_fold greater than number of files. Please use -k options to specify a smaller k for cross validation."
        )

    k_fold = len(files) // int(k_fold)

    validationFiles = []

    for i in tqdm(range(math.ceil(len(files) / k_fold))):
        trainData = files[:i * k_fold] + files[(i + 1) * k_fold:]
        evalData = files[i * k_fold:(i + 1) * k_fold]

        L = idyom.idyom(maxOrder=maxOrder, viewPoints=viewPoints_o)
        M = data.data(quantization=quantization)
        M.addFiles(trainData)

        L.train(M)

        for file in evalData:
            IC, E = L.getSurprisefromFile(
                file,
                long_term_only=long_term_only,
                short_term_only=short_term_only,
                time_representation=time_representation,
                zero_padding=zero_padding)
            ICs.append(IC)
            Entropies.append(E)
            filename = file[file.rfind("/") + 1:file.rfind(".")]
            filename = filename.replace("-", "_")
            validationFiles.append(filename)

    return ICs, Entropies, validationFiles
Example #15
0
def Train_by_piece(folder, nb_pieces=20, quantization=24, maxOrder=20, time_representation=False, \
    zero_padding=True, long_term_only=False, short_term_only=False, viewPoints="both", \
    europa_init=True):
    """
	Train and evaluate a model piece by piece. This allows to see the evolution of the generalization error 
	during the course of the training and for instance identify the right number of data needed the converge.
	"""

    name_temp_file = ".tmp_test_folder_" + folder[folder.rfind("/") +
                                                  1:] + "_" + str(
                                                      np.random.randint(
                                                          100, 999))

    if folder[-1] == "/":
        folder = folder[:-1]

    if viewPoints == "pitch":
        viewPoints_o = ["pitch"]
    elif viewPoints == "length":
        viewPoints_o = ["length"]
    elif viewPoints == "both":
        viewPoints_o = ["pitch", "length"]
    else:
        raise ValueError("We do not know this viewpoint ... ")

    L = idyom.idyom(maxOrder=maxOrder, viewPoints=viewPoints_o, evolutive=True)

    files = glob(folder + '/**.mid', recursive=True) + glob(
        folder + '/**.midi', recursive=True)

    random.shuffle(files)
    train = files[:-nb_pieces]
    test = files[-nb_pieces:]

    if europa_init:
        europe_files = files = glob(
            'dataset/mixed2/**.mid', recursive=True) + glob(
                'dataset/mixed2/**.midi', recursive=True)
        train = europe_files[:100] + train

    if os.path.exists(name_temp_file):
        if os.path.isdir(name_temp_file):
            rmtree(name_temp_file)
        else:
            os.remove(name_temp_file)
    os.mkdir(name_temp_file)

    for file in test:
        copyfile(file, name_temp_file + file[file.rfind("/"):])

    note_counter = []
    dicos = []
    matrix = np.zeros((len(train), nb_pieces))
    print("___ Starting Training ___")
    k = 0
    for file in tqdm(train):
        try:
            M = data.data(quantization=quantization)
            M.parseFile(file)
            L.train(M, preComputeEntropies=False)

            S, E, files = L.getSurprisefromFolder(
                name_temp_file,
                time_representation=time_representation,
                long_term_only=long_term_only,
                short_term_only=short_term_only)
            note_counter.append(len(M.viewPointRepresentation["pitch"][0]))

            dico = {}
            for i in range(len(files)):
                dico[files[i]] = S[i]

            dicos.append(dico)
            tmp = []
            for s in S:
                tmp.append(np.mean(s))

            matrix[k, :] = tmp
            k += 1
        except (FileNotFoundError, RuntimeError, ValueError):
            print(file + " skipped.")

    for i in range(1, len(note_counter)):
        note_counter[i] += note_counter[i - 1]

    saving = {}
    saving['matrix'] = matrix
    saving['note_counter'] = note_counter
    saving['dico'] = dico

    if not os.path.exists("out/" + folder[folder.rfind("/"):]):
        os.makedirs("out/" + folder[folder.rfind("/"):])

    if not os.path.exists("out/" + folder[folder.rfind("/"):] + "/evolution/"):
        os.makedirs("out/" + folder[folder.rfind("/"):] + "/evolution/")

    pickle.dump(
        saving,
        open(
            "out/" + folder[folder.rfind("/") + 1:] + "/evolution/" +
            folder[folder.rfind("/") + 1:] + '.pickle', "wb"))
    sio.savemat(
        "out/" + folder[folder.rfind("/") + 1:] + "/evolution/" +
        folder[folder.rfind("/") + 1:] + '.mat', data)

    print()
    print()
    print()
    print("Data saved at " + "out/" + folder[folder.rfind("/") + 1:] +
          "/evolution/" + folder[folder.rfind("/") + 1:] + '.pickle')
    print(
        "Including a .mat for matlab purpose and a .pickle for python purpose."
    )
    print()
    print()

    if not SERVER:
        plt.errorbar(note_counter,
                     np.mean(matrix, 1),
                     yerr=np.std(matrix, 1) / np.sqrt(nb_pieces))
        plt.title("Evolution of the mean IC over Learning (" +
                  folder[folder.rfind("/") + 1:] + ")")
        plt.ylabel("Mean IC (generlization error)")
        plt.xlabel("Learning (in notes)")
        plt.show()

    rmtree(name_temp_file)
Example #16
0
import sys 
sys.path.append('../')

from idyom import markovChain
from idyom import data
from idyom import score

import numpy as np

M = markovChain.markovChain(3)

D = data.data()
D.parse("dataBaseTest/")
M.train(D.getData("pitch"))

print(D.getData("pitch"))


S = M.generate(500)

S.writeToMidi("generation1.mid")

S.toWaveForm("generation1.wav")

print(S.getData())


quit()

matrix = M.getStatesMatrix()
print(M.transitions)
Example #17
0
def compareWithLISP(folder):
    """
	Start comparisons between our idyom and the one in lisp.
	This function, will add the dataset to lisp, and start training.
	You should have lisp and idyom already installed.
	"""

    if not os.path.exists("lisp/midis/"):
        os.makedirs("lisp/midis/")

    os.system("rm -rf lisp/midis/*")

    # Add folder to lisp database

    replaceinFile("lisp/compute.lisp", "FOLDER", folder)

    # Compute with LISP IDyOM

    os.system("sbcl --noinform --load lisp/compute.lisp")

    replaceinFile("lisp/compute.lisp", folder, "FOLDER")

    folder = "lisp/midis/"
    folder = "dataset/bach_sub/"

    # Our IDyOM
    now = time.time()
    likelihoods1, files1 = cross_validation(folder,
                                            maxOrder=20,
                                            quantization=24,
                                            k_fold=5)  #k-fold=10
    print("execution:", time.time() - now)

    # LISP version

    L2 = lisp.getDico(
        "lisp/12-cpitch_onset-cpitch_onset-nil-nil-melody-nil-10-both-nil-t-nil-c-nil-t-t-x-3.dat"
    )

    likelihoods2, files2 = lisp.getLikelihoods(L2)

    likelihood2 = np.mean(likelihoods2), np.std(likelihoods2), len(
        likelihoods2)

    plt.ylabel("Likelihood")
    plt.bar([0, 1], [np.mean(likelihoods1), likelihood2[0]],
            color="b",
            yerr=[
                1.96 * np.std(likelihoods1) / np.sqrt(len(likelihoods1)),
                1.96 * likelihood2[1] / np.sqrt(likelihood2[2])
            ])

    if not SERVER:
        plt.show()
    else:
        plt.savefig("figs/server/Lisp/likelihood.eps")
        plt.close()

    print("IDyOMpy:", likelihoods1)
    print("LISP:", likelihoods2)

    # Comparing models on pieces

    M = data.data()
    M.parse(folder, augment=False)
    dat1, files3 = M.getScoresFeatures()

    dico = dict(zip(files1, likelihoods1))

    dico2 = dict(zip(files2, likelihoods2))

    x1 = []
    x2 = []

    for file in files1:
        if file in dico2 and dico[file] is not None and dico2[file] is not None:
            x1.append(dico[file])
            x2.append(dico2[file])

    compareLikelihoods(x1, x2, name="Lisp/compareLikelihoods")

    # ploting in the music space

    dat2, files4 = M.getScoresFeatures()

    dico2 = dict(zip(files2, likelihoods2))

    weights = []
    colors = []

    for file in files1:
        if file in dico2 and dico2[file] is not None:
            weights.append(500 * abs(dico[file] - dico2[file])**2)
            if dico[file] - dico2[file] < 0:
                colors.append('coral')
            elif dico[file] - dico2[file] > 0:
                colors.append('deepskyblue')
            else:
                colors.append('black')
        else:
            weights.append(10)
            colors.append('black')

    plt.scatter(dat2[0][:len(dat2[1])], dat2[1], s=weights, c=colors)

    plt.title('Python - Lisp')
    plt.xlabel('Average 1-note interval')
    plt.ylabel('Average note onset')

    if not SERVER:
        plt.show()
    else:
        plt.savefig("figs/server/Lisp/scoreSpace.eps")
        plt.close()

    # LATER
    quit()
    plt.ylabel("Likelihood")
    plt.xlabel("time")
    plt.plot(L2['1']["probability"])
    plt.plot(
        L.getLikelihoodfromFile(folder + L2['1']["melody.name"][0][1:-1] +
                                ".mid"))
    plt.show()
Example #18
0
    def getLikelihoodfromFile(self,
                              file,
                              short_term_only=False,
                              long_term_only=False):
        """
		Return likelihood over a score
		
		:param folder: file to compute likelihood on 

		:type data: string

		:return: np.array(length)

		"""

        D = data.data()
        D.addFile(file)

        probas = np.ones(D.getSizeofPiece(0))
        probas[0] = 1 / len(self.LTM[0].models[0].alphabet)

        for model in self.LTM:
            dat = D.getData(model.viewPoint)[0]

            STM = longTermModel.longTermModel(model.viewPoint,
                                              maxOrder=20,
                                              STM=True,
                                              init=dat)

            for i in tqdm(range(1, len(dat))):
                # we instanciate a Short Term Model for the current viewpoint

                STM.train([dat[:i]], shortTerm=True)

                p1 = model.getLikelihood(dat[:i], dat[i])

                flag = True

                # This happens when the state never happened in the training data
                if p1 is None:
                    p1 = 1 / 30
                    flag = None

                p2 = STM.getLikelihood(dat[:i], dat[i])

                if self.stm and p2 is not None:

                    if flag is not None:
                        p = self.mergeProbas([p1, p2], [
                            model.getRelativeEntropy(dat[:i]),
                            STM.getRelativeEntropy(dat[:i])
                        ])
                    else:
                        p = p2
                else:
                    p = p1

                if long_term_only:
                    p = p1
                if short_term_only:
                    p = p2
                    if p is None:
                        p = 1 / 30

                probas[i] *= p

                if probas[i] == 563540:
                    print("LTM:", model.getLikelihood(dat[:i], dat[i]))
                    print("STM:", p2)
                    #print("ret:", self.mergeProbas([p, p2], [model.getEntropy(dat[:i]), STM.getEntropy(dat[:i])]))
                    print()

        return probas
Example #19
0
    def getLikelihoodfromFile(self,
                              file,
                              short_term_only=False,
                              long_term_only=False):
        """
		Return likelihood over a score
		
		:param folder: file to compute likelihood on 

		:type data: string

		:return: np.array(length)

		"""

        D = data.data()
        D.addFile(file)

        probas = np.ones(D.getSizeofPiece(0))
        probas[0] = 1 / len(self.LTM[0].models[0].alphabet)

        entropies = np.zeros(D.getSizeofPiece(0))
        L = np.ones(len(self.LTM[0].models[0].alphabet)) / len(
            self.LTM[0].models[0].alphabet)
        entropies[0] = -np.sum(L * np.log2(L))

        for model in self.LTM:
            dat = D.getData(model.viewPoint)[0]
            if long_term_only is False:
                STM = longTermModel.longTermModel(model.viewPoint,
                                                  maxOrder=20,
                                                  STM=True,
                                                  init=dat)

            for i in range(1, len(dat)):
                # we instanciate a Short Term Model for the current viewpoint

                if long_term_only is False:
                    STM.train([dat[:i]], shortTerm=True)

                p1 = model.getLikelihood(dat[:i], dat[i])
                if p1 is None:
                    e1 = 4.9
                else:
                    e1 = model.getEntropy(dat[:i])

                flag = True

                # This happens when the state never happened in the training data
                if p1 is None:
                    p1 = 1 / 30
                    e1 = 4.9
                    flag = None
                if long_term_only is False:
                    p2 = STM.getLikelihood(dat[:i], dat[i])
                    if p2 is None:
                        e2 = 4.9
                    else:
                        e2 = STM.getEntropy(dat[:i])
                if long_term_only:
                    p = p1
                    e = e1
                elif short_term_only:
                    p = p2
                    e = e2
                    if p is None:
                        p = 1 / 30
                        e = 4.9
                elif self.stm and p2 is not None:
                    if flag is not None:
                        p = self.mergeProbas([p1, p2], [
                            model.getRelativeEntropy(dat[:i]),
                            STM.getRelativeEntropy(dat[:i])
                        ])
                        e = self.mergeProbas([e1, e2], [
                            model.getRelativeEntropy(dat[:i]),
                            STM.getRelativeEntropy(dat[:i])
                        ])
                    else:
                        p = p2
                        e = e2
                else:
                    p = p1
                    e = e1

                probas[i] *= p
                entropies[i] += e

        return probas, entropies
Example #20
0
def compareJump(folder, k_fold=2):
    """
	Compare the likelihood between idyom model and jump model.
	"""
    # if os.path.isfile(".IDyOM.save"):
    # 	likelihood1, files1 = pickle.load(open(".IDyOM.save", 'rb'))
    # 	print("We loaded idyom model from pickle.")
    # else:
    # 	print("We store idyom model for later.")
    # 	likelihood1, files1 = cross_validation(folder, k_fold=k_fold, jump=False)
    # 	pickle.dump((likelihood1, files1), open(".IDyOM.save", 'wb'))

    likelihood1, files1 = cross_validation(folder, k_fold=k_fold, jump=False)
    likelihood2, files2 = cross_validation(folder, k_fold=k_fold, jump=True)

    plt.ylabel("Likelihood")
    plt.bar([0, 1],
            [np.mean(likelihood1), np.mean(likelihood2)],
            color="b",
            yerr=[
                1.96 * np.std(likelihood1) / np.sqrt(len(likelihood1)),
                1.96 * np.std(likelihood2) / np.sqrt(len(likelihood2))
            ])

    if not SERVER:
        plt.show()
    else:
        plt.savefig("figs/server/JUMPCompare.eps")
        plt.close()

    print("IDyOM")
    print("Mean:", np.mean(likelihood1))
    print("Std:", np.std(likelihood1))

    print("JUMP")
    print("Mean:", np.mean(likelihood2))
    print("Std:", np.std(likelihood2))

    M = data.data()
    M.parse(folder)
    dat1, files3 = M.getScoresFeatures()

    dico = dict(zip(files1, likelihood1))

    dico2 = dict(zip(files2, likelihood2))

    x1 = []
    x2 = []

    for file in files1:
        if file in dico2 and dico[file] is not None and dico2[file] is not None:
            x1.append(dico[file])
            x2.append(dico2[file])

    compareLikelihoods(x1, x2)

    weights = []

    for file in files3:
        if file in dico and dico[file] is not None:
            weights.append(500 * dico[file]**2)
        else:
            weights.append(0)

    plt.subplot(2, 1, 1)

    plt.scatter(dat1[0][:len(dat1[1])], dat1[1], s=weights)

    plt.title('IDyOM')
    plt.xlabel('Average 1-note interval')
    plt.ylabel('Average note onset')

    dat2, files4 = M.getScoresFeatures()

    dico = dict(zip(files2, likelihood2))

    weights = []

    for file in files4:
        if file in dico and dico[file] is not None:
            weights.append(500 * dico[file]**2)
        else:
            weights.append(0)

    plt.subplot(2, 1, 2)

    plt.scatter(dat2[0][:len(dat2[1])], dat2[1], s=weights)

    plt.title('JUMP')
    plt.xlabel('Average 1-note interval')
    plt.ylabel('Average note onset')

    if not SERVER:
        plt.show()
    else:
        plt.savefig("figs/server/scoreSpace.eps")
        plt.close()