Esempio n. 1
0
def get_phoneme_mapping(iphn, phn2int, n_taken=0):
    # Reverse the codebook for easier manipulation
    int2phn = flip(phn2int)
    # Define a sub-dictionary of the picked phonemes
    class2phn = {j + n_taken: int2phn[i] for j, i in enumerate(iphn)}
    class2int = {j + n_taken: i for j, i in enumerate(iphn)}
    return class2phn, class2int
Esempio n. 2
0
def prepare_data(fname_dtest=None,
                 classmap_existing=None,
                 fname_dtrain=None,
                 n_phn=None,
                 totclasses=None,
                 verbose=False):
    # Read the datafiles
    te_DATA, te_keys, te_lengths, phn2int_61, te_PHN = pkl.load(
        open(fname_dtest, "rb"))
    tr_DATA, tr_keys, tr_lengths, tr_PHN = pkl.load(open(fname_dtrain, "rb"))

    if verbose:
        print("Data loaded from files.")

    # Take labels down to phoneme level
    data_tr, label_tr = to_phoneme_level(tr_DATA)
    data_te, label_te = to_phoneme_level(te_DATA)

    # Checkout table 3 at
    # https://www.intechopen.com/books/speech-technologies/phoneme-recognition-on-the-timit-database
    # Or in the html file
    # for details
    phn2int = phn2int_61
    if totclasses == 39:
        f = partial(phn61_to_phn39,
                    int2phn_61=flip(phn2int_61),
                    data_folder=os.path.dirname(fname_dtest))
        label_tr, phn2int_39 = f(label_tr)
        label_te, _ = f(label_te, phn2int_39=phn2int_39)

        data_tr, label_tr = remove_label(data_tr, label_tr, phn2int_39)
        data_te, label_te = remove_label(data_te, label_te, phn2int_39)
        phn2int_39.pop('-', None)
        phn2int = phn2int_39

    # List the phoneme names already in the data folder.
    taken = [v for k, v in classmap_existing.items()]

    # Deduce the available phonemes
    available_phn = [v for k, v in phn2int.items() if not k in taken]

    # Pick new random phonemes
    iphn = np.random.permutation(available_phn)[:n_phn]

    # Find the phonemes in the dataset
    xtrain, ytrain = getsubset(data_tr, label_tr, iphn)
    xtest, ytest = getsubset(data_te, label_te, iphn)

    class2phn, class2int = get_phoneme_mapping(iphn,
                                               phn2int,
                                               n_taken=len(taken))

    return xtrain, ytrain, xtest, ytest, class2phn, class2int
Esempio n. 3
0
def get_phoneme_mapping(iphn, phn2int, n_taken=0):
    """
    This function takes an array of ints 'iphn' and a mapping dictionary 'phn2int' 
    ----
    Args:
    
    - iphn: List of integers that denote the class number which the phoneme is associated with
    - phn2int: Dictionary describing the mapping from phones (phn) to integers (int) (sort of a codebook)
    - n_taken: Possible number of classses already processed

    Outputs:

    - class2phn: A subdictionary that denotes the new subclass number and the corresponding phoneme associated for that subclass
    - class2phn: A subdictionary that denotes the new subclass number and the corresponding original class no. associated for that subclass
    """
    # Reverse the codebook for easier manipulation
    int2phn = flip(phn2int)
    # Define a sub-dictionary of the picked phonemes
    class2phn = {j + n_taken: int2phn[i] for j, i in enumerate(iphn)}
    class2int = {j + n_taken: i for j, i in enumerate(iphn)}
    return class2phn, class2int
Esempio n. 4
0
                                     os.path.basename(fname_dtest))
    fname_dtrain = os.path.join(os.path.dirname(fname_dtest),
                                "train.{}.pkl".format(nfeats))

    cmap = read_classmap(os.path.dirname(args.classmap))
    te_DATA, te_keys, te_lengths, phn2int_61, te_PHN = pkl.load(
        open(fname_dtest, "rb"))

    tr_DATA, tr_keys, tr_lengths, tr_PHN = pkl.load(open(fname_dtrain, "rb"))

    data_te, label_te = to_phoneme_level(te_DATA)
    data_tr, label_tr = to_phoneme_level(tr_DATA)
    phn2int = phn2int_61
    if args.totclass == 39:
        f = partial(phn61_to_phn39,
                    int2phn_61=flip(phn2int_61),
                    data_folder=os.path.dirname(fname_dtest))
        label_tr, phn2int_39 = f(label_tr)
        label_te, _ = f(label_te, phn2int_39=phn2int_39)

        data_te, label_te = remove_label(data_te, label_te, phn2int_39)
        data_te, label_te = remove_label(data_te, label_te, phn2int_39)

        phn2int_39.pop('-', None)
        phn2int = phn2int_39

    iphn = [phn2int[v] for k, v in cmap.items()]

    xtrain, ytrain = getsubset(data_tr, label_tr, iphn)
    xtest, ytest = getsubset(data_te, label_te, iphn)
    xtrain, xtest = normalize(xtrain, xtest)
Esempio n. 5
0
    args = parser.parse_args()

    fname_dtest = args.input
    mode, nfeats, ntype, snr = parse("{}.{}.{}.{:d}db.pkl", os.path.basename(fname_dtest))
    fname_dtrain = os.path.join(os.path.dirname(fname_dtest), "train.{}.pkl".format(nfeats))

    cmap = read_classmap(os.path.dirname(args.classmap))
    te_DATA, te_keys, te_lengths, phn2int_61, te_PHN = pkl.load(open(fname_dtest, "rb"))

    tr_DATA, tr_keys, tr_lengths, tr_PHN = pkl.load(open(fname_dtrain, "rb"))

    data_te, label_te = to_phoneme_level(te_DATA)
    data_tr, label_tr = to_phoneme_level(tr_DATA)
    phn2int = phn2int_61
    if args.totclass == 39:
        f = partial(phn61_to_phn39, int2phn_61=flip(phn2int_61), data_folder=os.path.dirname(fname_dtest))
        label_tr, phn2int_39 = f(label_tr)
        label_te, _ = f(label_te, phn2int_39=phn2int_39)

        data_te, label_te = remove_label(data_te, label_te, phn2int_39)
        data_te, label_te = remove_label(data_te, label_te, phn2int_39)

        phn2int_39.pop('-', None)
        phn2int = phn2int_39

    iphn = [phn2int[v] for k, v in cmap.items()]

    xtrain, ytrain = getsubset(data_tr, label_tr, iphn)
    xtest, ytest = getsubset(data_te, label_te, iphn)
    xtrain, xtest = normalize(xtrain, xtest)
Esempio n. 6
0
def prepare_data(fname_dtest=None,
                 classmap_existing=None,
                 fname_dtrain=None,
                 n_phn=None,
                 totclasses=None,
                 verbose=False):
    """
    This is the function taht is repsonsible for reading the data files, both (training and test data in .pkl format)
    and partitions the .pkl files on a per phoneme basis or a per class basis (because the task is Phone Recognition)
    ----
    Args:

    - fname_dtest: full path of the test data file containing data and labels in .pkl format
    - fname_dtrain: full path of the training data file containing data and labels in .pkl format
    - classmap_existing: full path for the existing classmap dictionary file if present
    - n_phn: No. of phonemes considered for partition of left to be retrieved
    - totclasses: Total number of phonemes considered in the list
    - verbose: Flag for controlling verbose output

    Output:

    - xtrain: Training data for the subclassed dataset
    - ytrain: Training data label for the subclassed dataset
    - xtrain: Training data for the subclassed dataset
    - ytrain: Training data label for the subclassed dataset
    - class2phn: A subdictionary that denotes the new subclass number and the corresponding phoneme associated for 
                that subclass
    - class2int: A subdictionary that denotes the new subclass number and the corresponding original class no. associated 
                for that subclass
    """
    # Read the datafiles
    te_DATA, te_keys, te_lengths, phn2int_61, te_PHN = pkl.load(
        open(fname_dtest, "rb"))
    tr_DATA, tr_keys, tr_lengths, tr_PHN = pkl.load(open(fname_dtrain, "rb"))

    if verbose:
        print("Data loaded from files.")

    # Take labels down to phoneme level
    data_tr, label_tr = to_phoneme_level(tr_DATA)
    data_te, label_te = to_phoneme_level(te_DATA)

    # Checkout table 3 at
    # https://www.intechopen.com/books/speech-technologies/phoneme-recognition-on-the-timit-database
    # Or in the html file
    # for details
    phn2int = phn2int_61
    if totclasses == 39:
        f = partial(phn61_to_phn39,
                    int2phn_61=flip(phn2int_61),
                    data_folder=os.path.dirname(fname_dtest))
        label_tr, phn2int_39 = f(label_tr)
        label_te, _ = f(label_te, phn2int_39=phn2int_39)

        data_tr, label_tr = remove_label(data_tr, label_tr, phn2int_39)
        data_te, label_te = remove_label(data_te, label_te, phn2int_39)
        phn2int_39.pop('-', None)
        phn2int = phn2int_39

    # List the phoneme names already in the data folder.
    taken = [v for k, v in classmap_existing.items()]

    # Deduce the available phonemes
    available_phn = [v for k, v in phn2int.items() if not k in taken]

    # Pick new random phonemes
    iphn = np.random.permutation(available_phn)[:n_phn]

    # Find the phonemes in the dataset
    xtrain, ytrain = getsubset(data_tr, label_tr, iphn)
    xtest, ytest = getsubset(data_te, label_te, iphn)

    class2phn, class2int = get_phoneme_mapping(iphn,
                                               phn2int,
                                               n_taken=len(taken))

    return xtrain, ytrain, xtest, ytest, class2phn, class2int