Ejemplo n.º 1
0
def load_dataset():

    f1 = open(
        '/misc/data15/reco/bhattgau/Rnn/Lists/spk_softmax/Train_feats_labs.plst'
    )
    lines = f1.readlines()
    lines = [l.strip() for l in lines]
    labelz = [int(l.split()[1]) for l in lines]
    #labelz = labelz[:20]
    features = [l.split()[0] for l in lines]

    f2 = open(
        '/misc/data15/reco/bhattgau/Rnn/Lists/spk_softmax/Valid_feats_labs.plst'
    )
    lines = f2.readlines()
    lines = [l.strip() for l in lines]
    val_labelz = [int(l.split()[1]) for l in lines]
    #val_labelz = val_labelz[:20]
    val_features = [l.split()[0] for l in lines]

    n_samp = len(features)
    maxlen = 800  #pad all utterances to this length
    feat_dim = 20
    nSpk = 98
    dpth = '/misc/data15/reco/bhattgau/Rnn/Data/mfcc/Nobackup/VQ_VAD_HO_EPD/'

    Data = np.zeros((n_samp, maxlen, feat_dim), dtype='float32')
    Mask = np.zeros((n_samp, maxlen), dtype='float32')
    #Targets = np.zeros((n_samp, nSpk), dtype='int32')

    vn_samp = len(val_features)
    val_Data = np.zeros((vn_samp, maxlen, feat_dim), dtype='float32')
    val_Mask = np.zeros((vn_samp, maxlen), dtype='float32')
    #Targets = np.zeros((n_samp, nSpk), dtype='int32')

    for ind, f in enumerate(features):
        fname = os.path.join(dpth, f + '.fea')
        fi = htkmfc.HTKFeat_read(fname)
        data = fi.getall()[:, :20]
        Mask[ind, :data.shape[0]] = 1.0
        pad = maxlen - data.shape[0]
        data = np.vstack((data, np.zeros((pad, 20), dtype='float32')))
        Data[ind, :, :] = data

    for ind, f in enumerate(val_features):
        fname = os.path.join(dpth, f + '.fea')
        fi = htkmfc.HTKFeat_read(fname)
        data = fi.getall()[:, :20]
        val_Mask[ind, :data.shape[0]] = 1.0
        pad = maxlen - data.shape[0]
        data = np.vstack((data, np.zeros((pad, 20), dtype='float32')))
        val_Data[ind, :, :] = data

    return Data, Mask, np.asarray(
        labelz, dtype='int32'), val_Data, val_Mask, np.asarray(val_labelz,
                                                               dtype='int32')
Ejemplo n.º 2
0
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument("input", help="input wav file")
    parser.add_argument("output", help="output mfcc file")
    args = parser.parse_args()

    r = htkmfc.HTKFeat_read(args.input)
    #print "Sample Period: {}".format(r.sampPeriod)
    print vars(r)
    data = r.getall()
    np.savetxt(args.output, data, fmt='%6.2f')
def find_speech(scpname):
    # reading scp
    scplines = []
    with open(scpname) as f:
        for line in f:
            fname = line.split("=")[1].split("[")[0]
            label = line.split("=")[1].split("[")[0].split(
                "/")[len(line.split("=")[1].split("[")[0].split("/")) -
                     1].split(".")[0]
            beg = int(line.split("[")[1].split(",")[0])
            end = int(line.split(",")[1].split("]")[0])
            dur = end - beg
            scplines.append([fname, label, beg, dur, end])
    # reading htk
    array_dict = {}
    count = 0
    for [fname, label, beg, dur, end] in scplines:
        features = htkmfc.HTKFeat_read(fname)
        array_dict[label + "_seg" + str(count)] = [
            np.array(features.getall()[beg:end]), beg, dur, end
        ]
        count += 1
    return array_dict
Ejemplo n.º 4
0
for l in all_feats:

    fbfeat = os.path.join(dpth, l)

    spk_id = re.split('[/_.]', l)[2] + '_' + re.split('[/_.]', l)[4]
    lab = tspk_phr.index(spk_id)
    fname = re.split('[/.]', l)[2]

    if os.path.exists(fbfeat):

        LABELS.append(lab)
        NAMES.append(fname)
        mask = np.zeros((382, ), dtype='float32')

        ff = htkmfc.HTKFeat_read(fbfeat)
        data = ff.getall()

        nframes = data.shape[0]

        mask[:nframes] = 1.0
        MASKS.append(mask)

        padl = maxl - nframes
        pad = np.zeros((padl, 40), dtype='float32')

        datapad = np.vstack((data, pad))
        nframes = datapad.shape[0]

        ptr = 0
        #give each frame a forward-backward context of 5 frames