sys.stderr = open(os.path.join(scoresPath, 'gridsearch_err.txt'), 'w') #log to a file #variables inizialization nFolds = 1 C_range = 2.0**np.arange(-5, 15 + 2, 2) # libsvm range gamma_range = 2.0**np.arange(-15, 3 + 2, 2) # libsvm range mixtures = 2**np.arange(0, 7, 1) scores = np.zeros((mixtures.shape[0], nFolds)) cBestValues = np.zeros((mixtures.shape[0], nFolds)) gBestValues = np.zeros((mixtures.shape[0], nFolds)) mIdx = 0 #LOAD DATASET snoring_dataset = dm.load_ComParE2017(featPath, filetype) # load dataset trainset, develset, testset = dm.split_ComParE2017_simple( snoring_dataset ) # creo i trainset per calcolare media e varianza per poter normalizzare labels = dm.label_loading( os.path.join(root_dir, 'lab', 'ComParE2017_Snore.tsv')) trainset_l, develset_l, _ = dm.split_ComParE2017_simple(labels) del snoring_dataset y = [] for seq in trainset: y.append(seq[0]) yd = [] for seq in develset: yd.append(seq[0])
def process_subfold(sf, fold): print("Fold " + str(fold)) t0 = time.time() snoring_dataset = dm.load_ComParE2017(featPath, filetype) # load dataset trainset, develset, testset = dm.split_ComParE2017_simple(snoring_dataset) # creo i trainset per calcolare media e varianza per poter normalizzare del snoring_dataset # Read dataset size and preallocate a = trainset[0][1].shape if filetype == 'npy': nfeat = a[0] else: nfeat = a[1] # Read the features trainFeat = np.empty([1, nfeat]) for seq in trainset: if filetype == 'npy': feat = seq[1].transpose() else: feat = seq[1] # metto tutte le features in una matrice che poi passero al gmm.fit per adattaare l'UBM trainFeat = np.vstack((trainFeat, feat)) trainFeat = np.delete(trainFeat, 0, 0) print("DONE!") # trainFeat = trainFeat.astype(dtype='float32') for m in mixtures: # Train the UBM print("Fold " + str(fold) + "-->Mixture: " + str(m) + " ") sys.stdout.flush() gmm = mixture.GMM(n_components=m, n_iter=1000, random_state=1) gmm.fit(trainFeat) ubmPath = os.path.join(curUbmsPath, str(m)) if not os.path.exists(ubmPath): try: # handle the simultaneous creation of folders from multiple processes os.makedirs(ubmPath) except OSError: print("OSError.errno 17 ignored") if not gmm.converged_: print("Fold " + str(fold) + "-->Convergence not reached with " + str(m) + " mixtures") joblib.dump(gmm, os.path.join(ubmPath, "ubm_" + str(sf))) # salvo l'ubm. mi crea le varie compie tipo ubm_1_02 ecc... per poterle magari riutilizzare per il debug # Extract trainset supervectors curSupervecSubPath = os.path.join(curSupervecPath, str(m)) if (not os.path.exists(curSupervecSubPath)): try: # handle the simultaneous creation of folders from multiple processes os.makedirs(curSupervecSubPath) except OSError: print("OSError.errno 17 ignored") for seq in trainset: gmmMap = GmmMap(n_components=m, n_iter=5, params="m") # gli passo i parametri del'ubm calcolato in precedenza gmmMap.weights_ = copy.deepcopy(gmm.weights_) gmmMap.means_ = copy.deepcopy(gmm.means_) gmmMap.covars_ = copy.deepcopy(gmm.covars_) # leggo una feature if filetype == 'npy': feat = seq[1].transpose() else: feat = seq[1] gmmMap.map_adapt(feat) # adatta l'ubm ad una feature # means_and_covars = np.hstack((gmmMap.means_, gmmMap.covars_)) svFilePath = os.path.join( curSupervecSubPath, os.path.splitext(os.path.basename(seq[0]))[0]) np.save(svFilePath, gmmMap.means_) # np.save(svFilePath, means_and_covars,) # Extract devset supervectors for seq in develset: gmmMap = GmmMap(n_components=m, n_iter=5, params="m") gmmMap.weights_ = copy.deepcopy(gmm.weights_) gmmMap.means_ = copy.deepcopy(gmm.means_) gmmMap.covars_ = copy.deepcopy(gmm.covars_) if filetype == 'npy': feat = seq[1].transpose() else: feat = seq[1] gmmMap.map_adapt(feat) # adatta l'ubm ad una feature means_and_covars = np.hstack((gmmMap.means_, gmmMap.covars_)) svFilePath = os.path.join( curSupervecSubPath, os.path.splitext(os.path.basename(seq[0]))[0]) np.save(svFilePath, gmmMap.means_) # np.save(svFilePath, means_and_covars,) for seq in testset: gmmMap = GmmMap(n_components=m, n_iter=5, params="m") gmmMap.weights_ = copy.deepcopy(gmm.weights_) gmmMap.means_ = copy.deepcopy(gmm.means_) gmmMap.covars_ = copy.deepcopy(gmm.covars_) if filetype == 'npy': feat = seq[1].transpose() else: feat = seq[1] gmmMap.map_adapt(feat) # adatta l'ubm ad una feature means_and_covars = np.hstack((gmmMap.means_, gmmMap.covars_)) svFilePath = os.path.join( curSupervecSubPath, os.path.splitext(os.path.basename(seq[0]))[0]) np.save(svFilePath, gmmMap.means_) # np.save(svFilePath, means_and_covars,) t1 = time.time() print("Fold " + str(fold) + "--Time: " + str(t1 - t0))
def process_subfold(sf,fold): print("Fold "+str(fold)); t0 = time.time(); snoring_dataset = dm.load_ComParE2017(featPath, filetype) # load dataset trainset, develset, testset = dm.split_ComParE2017_simple(snoring_dataset) # creo i trainset per calcolare media e varianza per poter normalizzare del snoring_dataset # Read dataset size and preallocate a=trainset[0][1].shape if (filetype == 'npy'): nfeat = a[0] else: nfeat = a[1] # Read the features trainFeat=np.empty([1,nfeat]) #for seq in trainset: for seq in develset: if (filetype == 'npy'): feat = seq[1].transpose() else: feat = seq[1] # metto tutte le features in una matrice che poi passero al gmm.fit per adattaare l'UBM trainFeat = np.vstack((trainFeat, feat)) trainFeat = np.delete(trainFeat, 0, 0) print("DONE!") #trainFeat = trainFeat.astype(dtype='float32') for m in mixtures: # Train the UBM print("Fold "+str(fold)+"-->Mixture: "+str(m)+" "); sys.stdout.flush(); gmm = mixture.GMM(n_components=m, n_iter=1000, random_state=1); gmm.fit(trainFeat); ubmPath = os.path.join(curUbmsPath, str(m)); if (not os.path.exists(ubmPath)): try:#handle the simultaneous creation of folders from multiple processes os.makedirs(ubmPath); except OSError, e: if e.errno != 17: raise else: print "OSError.errno 17 ignored" pass if (not gmm.converged_): print("Fold "+str(fold)+"-->Convergence not reached with " + str(m) +" mixtures"); joblib.dump(gmm, os.path.join(ubmPath, "ubm_" + str(sf))); #salvo l'ubm. mi crea le varie compie tipo ubm_1_02 ecc... per poterle magari riutilizzare per il debug # Extract trainset supervectors curSupervecSubPath = os.path.join(curSupervecPath, str(m)); if (not os.path.exists(curSupervecSubPath)): try:#handle the simultaneous creation of folders from multiple processes os.makedirs(curSupervecSubPath); except OSError, e: if e.errno != 17: raise else: print "OSError.errno 17 ignored" pass
with open(experiments_db, 'a+') as f: f.write(line) EXPERIMENT_TAG = fold_name ### Create directories ### FOLDER_PREFIX = path.join(root_dir, RESULTS_DIR, EXPERIMENT_TAG) if not path.exists(FOLDER_PREFIX): makedirs(FOLDER_PREFIX) MODEL_PATH = path.join(FOLDER_PREFIX, 'model') if not path.exists(MODEL_PATH): makedirs(MODEL_PATH) SCORES_PATH = path.join(FOLDER_PREFIX, 'scores') if not path.exists(SCORES_PATH): makedirs(SCORES_PATH) #GESTIONE DATASET snoring_dataset = dm.load_ComParE2017( path.join(root_dir, 'dataset', args.featureset)) #load dataset labels = dm.label_loading(path.join(root_dir, 'lab', 'ComParE2017_Snore.tsv')) trainset, develset, testset = dm.split_ComParE2017_simple( snoring_dataset ) #creo i trainset per calcolare media e varianza per poter normalizzare trainset_l, develset_l, _ = dm.split_ComParE2017_simple(labels) del snoring_dataset trainset, mean, std = dm.normalize_data( trainset) #compute mean and std of the trainset and normalize the trainset #normalize the dataset with the mean and std of the trainset develset, _, _ = dm.normalize_data(develset, mean, std) #testset, _, _ = dm.normalize_data(testset, mean, std)