sys.stderr = open(os.path.join(scoresPath, 'gridsearch_err.txt'),
                  'w')  #log to a file

#variables inizialization
nFolds = 1
C_range = 2.0**np.arange(-5, 15 + 2, 2)  # libsvm range
gamma_range = 2.0**np.arange(-15, 3 + 2, 2)  # libsvm range
mixtures = 2**np.arange(0, 7, 1)

scores = np.zeros((mixtures.shape[0], nFolds))
cBestValues = np.zeros((mixtures.shape[0], nFolds))
gBestValues = np.zeros((mixtures.shape[0], nFolds))
mIdx = 0

#LOAD DATASET
snoring_dataset = dm.load_ComParE2017(featPath, filetype)  # load dataset
trainset, develset, testset = dm.split_ComParE2017_simple(
    snoring_dataset
)  # creo i trainset per calcolare media e varianza per poter normalizzare
labels = dm.label_loading(
    os.path.join(root_dir, 'lab', 'ComParE2017_Snore.tsv'))
trainset_l, develset_l, _ = dm.split_ComParE2017_simple(labels)
del snoring_dataset

y = []
for seq in trainset:
    y.append(seq[0])

yd = []
for seq in develset:
    yd.append(seq[0])
Esempio n. 2
0
def process_subfold(sf, fold):
    print("Fold " + str(fold))
    t0 = time.time()

    snoring_dataset = dm.load_ComParE2017(featPath, filetype)  # load dataset
    trainset, develset, testset = dm.split_ComParE2017_simple(snoring_dataset)
    # creo i trainset per calcolare media e varianza per poter normalizzare
    del snoring_dataset

    # Read dataset size and preallocate
    a = trainset[0][1].shape
    if filetype == 'npy':
        nfeat = a[0]
    else:
        nfeat = a[1]

    # Read the features
    trainFeat = np.empty([1, nfeat])
    for seq in trainset:
        if filetype == 'npy':
            feat = seq[1].transpose()
        else:
            feat = seq[1]
        # metto tutte le features in una matrice che poi passero al gmm.fit per adattaare l'UBM
        trainFeat = np.vstack((trainFeat, feat))
    trainFeat = np.delete(trainFeat, 0, 0)
    print("DONE!")

    # trainFeat = trainFeat.astype(dtype='float32')

    for m in mixtures:
        # Train the UBM
        print("Fold " + str(fold) + "-->Mixture: " + str(m) + " ")
        sys.stdout.flush()
        gmm = mixture.GMM(n_components=m, n_iter=1000, random_state=1)
        gmm.fit(trainFeat)
        ubmPath = os.path.join(curUbmsPath, str(m))
        if not os.path.exists(ubmPath):
            try:  # handle the simultaneous creation of folders from multiple processes
                os.makedirs(ubmPath)
            except OSError:
                print("OSError.errno 17 ignored")
        if not gmm.converged_:
            print("Fold " + str(fold) + "-->Convergence not reached with " +
                  str(m) + " mixtures")
        joblib.dump(gmm, os.path.join(ubmPath, "ubm_" + str(sf)))
        # salvo l'ubm. mi crea le varie compie tipo ubm_1_02 ecc... per poterle magari riutilizzare per il debug

        # Extract trainset supervectors
        curSupervecSubPath = os.path.join(curSupervecPath, str(m))
        if (not os.path.exists(curSupervecSubPath)):
            try:  # handle the simultaneous creation of folders from multiple processes
                os.makedirs(curSupervecSubPath)
            except OSError:
                print("OSError.errno 17 ignored")

        for seq in trainset:
            gmmMap = GmmMap(n_components=m, n_iter=5, params="m")
            # gli passo i parametri del'ubm calcolato in precedenza
            gmmMap.weights_ = copy.deepcopy(gmm.weights_)
            gmmMap.means_ = copy.deepcopy(gmm.means_)
            gmmMap.covars_ = copy.deepcopy(gmm.covars_)
            # leggo una feature
            if filetype == 'npy':
                feat = seq[1].transpose()
            else:
                feat = seq[1]
            gmmMap.map_adapt(feat)
            # adatta l'ubm ad una feature
            # means_and_covars = np.hstack((gmmMap.means_, gmmMap.covars_))
            svFilePath = os.path.join(
                curSupervecSubPath,
                os.path.splitext(os.path.basename(seq[0]))[0])
            np.save(svFilePath, gmmMap.means_)
            # np.save(svFilePath, means_and_covars,)

        # Extract devset supervectors
        for seq in develset:
            gmmMap = GmmMap(n_components=m, n_iter=5, params="m")
            gmmMap.weights_ = copy.deepcopy(gmm.weights_)
            gmmMap.means_ = copy.deepcopy(gmm.means_)
            gmmMap.covars_ = copy.deepcopy(gmm.covars_)
            if filetype == 'npy':
                feat = seq[1].transpose()
            else:
                feat = seq[1]
            gmmMap.map_adapt(feat)
            # adatta l'ubm ad una feature
            means_and_covars = np.hstack((gmmMap.means_, gmmMap.covars_))
            svFilePath = os.path.join(
                curSupervecSubPath,
                os.path.splitext(os.path.basename(seq[0]))[0])
            np.save(svFilePath, gmmMap.means_)
            # np.save(svFilePath, means_and_covars,)

        for seq in testset:
            gmmMap = GmmMap(n_components=m, n_iter=5, params="m")
            gmmMap.weights_ = copy.deepcopy(gmm.weights_)
            gmmMap.means_ = copy.deepcopy(gmm.means_)
            gmmMap.covars_ = copy.deepcopy(gmm.covars_)
            if filetype == 'npy':
                feat = seq[1].transpose()
            else:
                feat = seq[1]
            gmmMap.map_adapt(feat)
            # adatta l'ubm ad una feature
            means_and_covars = np.hstack((gmmMap.means_, gmmMap.covars_))
            svFilePath = os.path.join(
                curSupervecSubPath,
                os.path.splitext(os.path.basename(seq[0]))[0])
            np.save(svFilePath, gmmMap.means_)
            # np.save(svFilePath, means_and_covars,)

    t1 = time.time()

    print("Fold " + str(fold) + "--Time: " + str(t1 - t0))
def process_subfold(sf,fold):
    print("Fold "+str(fold));
    t0 = time.time();

    snoring_dataset = dm.load_ComParE2017(featPath, filetype)  # load dataset
    trainset, develset, testset = dm.split_ComParE2017_simple(snoring_dataset)  # creo i trainset per calcolare media e varianza per poter normalizzare
    del snoring_dataset

    # Read dataset size and preallocate
    a=trainset[0][1].shape
    if (filetype == 'npy'):
        nfeat = a[0]
    else:
        nfeat = a[1]

    # Read the features
    trainFeat=np.empty([1,nfeat])
    #for seq in trainset:
    for seq in develset:
        if (filetype == 'npy'):
            feat = seq[1].transpose()
        else:
            feat = seq[1]
        # metto tutte le features in una matrice che poi passero al gmm.fit per adattaare l'UBM
        trainFeat = np.vstack((trainFeat, feat))
    trainFeat = np.delete(trainFeat, 0, 0)
    print("DONE!")

    #trainFeat = trainFeat.astype(dtype='float32')


    for m in mixtures:
        # Train the UBM
        print("Fold "+str(fold)+"-->Mixture: "+str(m)+" ");
        sys.stdout.flush();
        gmm = mixture.GMM(n_components=m, n_iter=1000, random_state=1);
        gmm.fit(trainFeat);
        ubmPath = os.path.join(curUbmsPath, str(m));
        if (not os.path.exists(ubmPath)):
            try:#handle the simultaneous creation of folders from multiple processes
                os.makedirs(ubmPath);
            except OSError, e:
                if e.errno != 17:
                    raise   
                else:
                    print "OSError.errno 17 ignored"
                pass
        if (not gmm.converged_):
            print("Fold "+str(fold)+"-->Convergence not reached with " + str(m) +" mixtures");
        joblib.dump(gmm, os.path.join(ubmPath, "ubm_" + str(sf)));         #salvo l'ubm. mi crea le varie compie tipo ubm_1_02 ecc... per poterle magari riutilizzare per il debug

        # Extract trainset supervectors
        curSupervecSubPath = os.path.join(curSupervecPath, str(m));
        if (not os.path.exists(curSupervecSubPath)):
            try:#handle the simultaneous creation of folders from multiple processes
                os.makedirs(curSupervecSubPath);
            except OSError, e:
                if e.errno != 17:
                    raise   
                else:
                    print "OSError.errno 17 ignored"
                pass
with open(experiments_db, 'a+') as f:
    f.write(line)
EXPERIMENT_TAG = fold_name
### Create directories ###
FOLDER_PREFIX = path.join(root_dir, RESULTS_DIR, EXPERIMENT_TAG)
if not path.exists(FOLDER_PREFIX):
    makedirs(FOLDER_PREFIX)
MODEL_PATH = path.join(FOLDER_PREFIX, 'model')
if not path.exists(MODEL_PATH):
    makedirs(MODEL_PATH)
SCORES_PATH = path.join(FOLDER_PREFIX, 'scores')
if not path.exists(SCORES_PATH):
    makedirs(SCORES_PATH)

#GESTIONE DATASET
snoring_dataset = dm.load_ComParE2017(
    path.join(root_dir, 'dataset', args.featureset))  #load dataset
labels = dm.label_loading(path.join(root_dir, 'lab', 'ComParE2017_Snore.tsv'))

trainset, develset, testset = dm.split_ComParE2017_simple(
    snoring_dataset
)  #creo i trainset per calcolare media e varianza per poter normalizzare
trainset_l, develset_l, _ = dm.split_ComParE2017_simple(labels)

del snoring_dataset

trainset, mean, std = dm.normalize_data(
    trainset)  #compute mean and std of the trainset and normalize the trainset

#normalize the dataset with the mean and std of the trainset
develset, _, _ = dm.normalize_data(develset, mean, std)
#testset, _, _ = dm.normalize_data(testset, mean, std)