def get_phoneme_mapping(iphn, phn2int, n_taken=0): # Reverse the codebook for easier manipulation int2phn = flip(phn2int) # Define a sub-dictionary of the picked phonemes class2phn = {j + n_taken: int2phn[i] for j, i in enumerate(iphn)} class2int = {j + n_taken: i for j, i in enumerate(iphn)} return class2phn, class2int
def prepare_data(fname_dtest=None, classmap_existing=None, fname_dtrain=None, n_phn=None, totclasses=None, verbose=False): # Read the datafiles te_DATA, te_keys, te_lengths, phn2int_61, te_PHN = pkl.load( open(fname_dtest, "rb")) tr_DATA, tr_keys, tr_lengths, tr_PHN = pkl.load(open(fname_dtrain, "rb")) if verbose: print("Data loaded from files.") # Take labels down to phoneme level data_tr, label_tr = to_phoneme_level(tr_DATA) data_te, label_te = to_phoneme_level(te_DATA) # Checkout table 3 at # https://www.intechopen.com/books/speech-technologies/phoneme-recognition-on-the-timit-database # Or in the html file # for details phn2int = phn2int_61 if totclasses == 39: f = partial(phn61_to_phn39, int2phn_61=flip(phn2int_61), data_folder=os.path.dirname(fname_dtest)) label_tr, phn2int_39 = f(label_tr) label_te, _ = f(label_te, phn2int_39=phn2int_39) data_tr, label_tr = remove_label(data_tr, label_tr, phn2int_39) data_te, label_te = remove_label(data_te, label_te, phn2int_39) phn2int_39.pop('-', None) phn2int = phn2int_39 # List the phoneme names already in the data folder. taken = [v for k, v in classmap_existing.items()] # Deduce the available phonemes available_phn = [v for k, v in phn2int.items() if not k in taken] # Pick new random phonemes iphn = np.random.permutation(available_phn)[:n_phn] # Find the phonemes in the dataset xtrain, ytrain = getsubset(data_tr, label_tr, iphn) xtest, ytest = getsubset(data_te, label_te, iphn) class2phn, class2int = get_phoneme_mapping(iphn, phn2int, n_taken=len(taken)) return xtrain, ytrain, xtest, ytest, class2phn, class2int
def get_phoneme_mapping(iphn, phn2int, n_taken=0): """ This function takes an array of ints 'iphn' and a mapping dictionary 'phn2int' ---- Args: - iphn: List of integers that denote the class number which the phoneme is associated with - phn2int: Dictionary describing the mapping from phones (phn) to integers (int) (sort of a codebook) - n_taken: Possible number of classses already processed Outputs: - class2phn: A subdictionary that denotes the new subclass number and the corresponding phoneme associated for that subclass - class2phn: A subdictionary that denotes the new subclass number and the corresponding original class no. associated for that subclass """ # Reverse the codebook for easier manipulation int2phn = flip(phn2int) # Define a sub-dictionary of the picked phonemes class2phn = {j + n_taken: int2phn[i] for j, i in enumerate(iphn)} class2int = {j + n_taken: i for j, i in enumerate(iphn)} return class2phn, class2int
os.path.basename(fname_dtest)) fname_dtrain = os.path.join(os.path.dirname(fname_dtest), "train.{}.pkl".format(nfeats)) cmap = read_classmap(os.path.dirname(args.classmap)) te_DATA, te_keys, te_lengths, phn2int_61, te_PHN = pkl.load( open(fname_dtest, "rb")) tr_DATA, tr_keys, tr_lengths, tr_PHN = pkl.load(open(fname_dtrain, "rb")) data_te, label_te = to_phoneme_level(te_DATA) data_tr, label_tr = to_phoneme_level(tr_DATA) phn2int = phn2int_61 if args.totclass == 39: f = partial(phn61_to_phn39, int2phn_61=flip(phn2int_61), data_folder=os.path.dirname(fname_dtest)) label_tr, phn2int_39 = f(label_tr) label_te, _ = f(label_te, phn2int_39=phn2int_39) data_te, label_te = remove_label(data_te, label_te, phn2int_39) data_te, label_te = remove_label(data_te, label_te, phn2int_39) phn2int_39.pop('-', None) phn2int = phn2int_39 iphn = [phn2int[v] for k, v in cmap.items()] xtrain, ytrain = getsubset(data_tr, label_tr, iphn) xtest, ytest = getsubset(data_te, label_te, iphn) xtrain, xtest = normalize(xtrain, xtest)
args = parser.parse_args() fname_dtest = args.input mode, nfeats, ntype, snr = parse("{}.{}.{}.{:d}db.pkl", os.path.basename(fname_dtest)) fname_dtrain = os.path.join(os.path.dirname(fname_dtest), "train.{}.pkl".format(nfeats)) cmap = read_classmap(os.path.dirname(args.classmap)) te_DATA, te_keys, te_lengths, phn2int_61, te_PHN = pkl.load(open(fname_dtest, "rb")) tr_DATA, tr_keys, tr_lengths, tr_PHN = pkl.load(open(fname_dtrain, "rb")) data_te, label_te = to_phoneme_level(te_DATA) data_tr, label_tr = to_phoneme_level(tr_DATA) phn2int = phn2int_61 if args.totclass == 39: f = partial(phn61_to_phn39, int2phn_61=flip(phn2int_61), data_folder=os.path.dirname(fname_dtest)) label_tr, phn2int_39 = f(label_tr) label_te, _ = f(label_te, phn2int_39=phn2int_39) data_te, label_te = remove_label(data_te, label_te, phn2int_39) data_te, label_te = remove_label(data_te, label_te, phn2int_39) phn2int_39.pop('-', None) phn2int = phn2int_39 iphn = [phn2int[v] for k, v in cmap.items()] xtrain, ytrain = getsubset(data_tr, label_tr, iphn) xtest, ytest = getsubset(data_te, label_te, iphn) xtrain, xtest = normalize(xtrain, xtest)
def prepare_data(fname_dtest=None, classmap_existing=None, fname_dtrain=None, n_phn=None, totclasses=None, verbose=False): """ This is the function taht is repsonsible for reading the data files, both (training and test data in .pkl format) and partitions the .pkl files on a per phoneme basis or a per class basis (because the task is Phone Recognition) ---- Args: - fname_dtest: full path of the test data file containing data and labels in .pkl format - fname_dtrain: full path of the training data file containing data and labels in .pkl format - classmap_existing: full path for the existing classmap dictionary file if present - n_phn: No. of phonemes considered for partition of left to be retrieved - totclasses: Total number of phonemes considered in the list - verbose: Flag for controlling verbose output Output: - xtrain: Training data for the subclassed dataset - ytrain: Training data label for the subclassed dataset - xtrain: Training data for the subclassed dataset - ytrain: Training data label for the subclassed dataset - class2phn: A subdictionary that denotes the new subclass number and the corresponding phoneme associated for that subclass - class2int: A subdictionary that denotes the new subclass number and the corresponding original class no. associated for that subclass """ # Read the datafiles te_DATA, te_keys, te_lengths, phn2int_61, te_PHN = pkl.load( open(fname_dtest, "rb")) tr_DATA, tr_keys, tr_lengths, tr_PHN = pkl.load(open(fname_dtrain, "rb")) if verbose: print("Data loaded from files.") # Take labels down to phoneme level data_tr, label_tr = to_phoneme_level(tr_DATA) data_te, label_te = to_phoneme_level(te_DATA) # Checkout table 3 at # https://www.intechopen.com/books/speech-technologies/phoneme-recognition-on-the-timit-database # Or in the html file # for details phn2int = phn2int_61 if totclasses == 39: f = partial(phn61_to_phn39, int2phn_61=flip(phn2int_61), data_folder=os.path.dirname(fname_dtest)) label_tr, phn2int_39 = f(label_tr) label_te, _ = f(label_te, phn2int_39=phn2int_39) data_tr, label_tr = remove_label(data_tr, label_tr, phn2int_39) data_te, label_te = remove_label(data_te, label_te, phn2int_39) phn2int_39.pop('-', None) phn2int = phn2int_39 # List the phoneme names already in the data folder. taken = [v for k, v in classmap_existing.items()] # Deduce the available phonemes available_phn = [v for k, v in phn2int.items() if not k in taken] # Pick new random phonemes iphn = np.random.permutation(available_phn)[:n_phn] # Find the phonemes in the dataset xtrain, ytrain = getsubset(data_tr, label_tr, iphn) xtest, ytest = getsubset(data_te, label_te, iphn) class2phn, class2int = get_phoneme_mapping(iphn, phn2int, n_taken=len(taken)) return xtrain, ytrain, xtest, ytest, class2phn, class2int