コード例 #1
0
def get_data_files(egs_dir, egs_type):
    if egs_type == 'egs':
        cmd = 'find ' + egs_dir + ' -iname "data.*.egs"'
    elif egs_type == 'post':
        cmd = 'find ' + egs_dir + ' -iname "data.*.post"'
    else:
        print_log(
            'The egs type {egs_type} is not supported, use "egs" or "post", exiting script!'
            .format(egs_type=egs_type))
        sys.exit()

    proc = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE)
    x = proc.stdout.decode('utf-8')
    data_files = []
    for line in x.splitlines():
        line = line.strip()
        data_files.append(line)

    return data_files
コード例 #2
0
def get_egs(data_train, data_test, egs_dir, split_num, nnet_type):

    os.mkdir(join(egs_dir, 'test'))
    os.mkdir(join(egs_dir, 'train'))

    print_log('Getting data dimension')
    dim = int(get_dim(join(args.data_test, 'feats.scp')))
    dim_2 = int(get_dim(join(args.data_train, 'feats.scp')))

    if dim_2 != dim:
        sys.exit(
            '%s: Data dimensions of training and test data do not match, something is wrong, exiting script!'
            % sys.argv[0])
    with open(join(egs_dir, 'dim'), 'w') as fid:
        fid.write('%d' % dim)

    # Get egs for test data
    split_dir = join(data_test, 'split' + str(split_num))
    for batch in range(1, split_num + 1):
        test_data, keys = fetch_feats(join(split_dir, str(batch)))
        test_labels = fetch_labels(data_test, keys)

        if nnet_type == 'cnn':
            pic_dim = int(np.sqrt(dim))
            test_data = np.reshape(test_data, (-1, pic_dim, pic_dim))
            test_data = test_data[:, np.newaxis, :, :]

        with open(join(egs_dir, 'test', 'data.' + str(batch) + '.egs'),
                  'wb') as fid:
            pickle.dump(test_data, fid)

        with open(join(egs_dir, 'test', 'labels.' + str(batch) + '.egs'),
                  'wb') as fid:
            pickle.dump(test_labels, fid)

    print_log('Finished generating test examples')

    # Get egs for train data
    split_dir = join(data_train, 'split' + str(split_num))
    for batch in range(1, split_num + 1):
        train_data, keys = fetch_feats(join(split_dir, str(batch)))
        train_labels = fetch_labels(data_train, keys)

        if nnet_type == 'cnn':
            pic_dim = int(np.sqrt(dim))
            train_data = np.reshape(train_data, (-1, pic_dim, pic_dim))
            train_data = train_data[:, np.newaxis, :, :]

        with open(join(egs_dir, 'train', 'data.' + str(batch) + '.egs'),
                  'wb') as fid:
            pickle.dump(train_data, fid)

        with open(join(egs_dir, 'train', 'labels.' + str(batch) + '.egs'),
                  'wb') as fid:
            pickle.dump(train_labels, fid)

    print_log('Finished generating nnet training examples')
コード例 #3
0
    proc = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE)
    x = proc.stdout.decode('utf-8')
    data_files = []
    for line in x.splitlines():
        line = line.strip()
        data_files.append(line)

    return data_files


if __name__ == '__main__':

    args = get_args()

    print_log('Combining data files of type {egs_type} from {dir1} and {dir2}'.
              format(egs_type=args.egs_type,
                     dir1=args.egs_dir_1,
                     dir2=args.egs_dir_2))

    files_1 = get_data_files(args.egs_dir_1, args.egs_type)
    files_2 = get_data_files(args.egs_dir_2, args.egs_type)

    if len(files_1) != len(files_2):
        print_log(
            'Number of data files of type {egs_type} in {dir1} and {dir2} are not equal, exiting script!'
            .format(egs_type=args.egs_type,
                    dir1=args.egs_dir_1,
                    dir2=args.egs_dir_2))
        sys.exit()

    for i in range(len(files_1)):
        comb_dat = combine(files_1[i], files_2[i])
コード例 #4
0
        print('%s: The final test performance is: %.2f %%' %
              (sys.argv[0], re_er * 100))

        # Save result
        res_file = join(dirname(outmodel), 'result')
        with open(res_file, 'w') as f:
            f.write('Test set Frame Error Rate: %.2f %%' % (re_er * 100))

        # Save model
        with open(outmodel, 'wb') as fid:
            pickle.dump(model, fid)


if __name__ == '__main__':

    print_log('# BEGIN CNN TRAINING')

    args = get_args()
    gpu_id = get_device_id()

    if gpu_id != -1:
        print('%s: Using GPU device %d for nnet' % (sys.argv[0], gpu_id))
    else:
        print_log('Training nnet on single CPU, this will take some time!')

    print_log('Defining nnet model')

    with open(join(args.egs_dir, 'dim'), 'r') as fid:
        insize = int(np.sqrt(int(fid.readline())))

    model = cnn_model(args.nlayers, args.ndepth, args.ksize, args.ntargets,
コード例 #5
0
ファイル: egs_2_post.py プロジェクト: sadhusamik/pyspeech
    soft_out=np.zeros((frame_num,feat_dim))
    for i in range(frame_num):
        soft_out[i,:]=np.exp(x[i,:])/(np.sum(np.exp(x[i,:])))
    return soft_out

def get_post(model,egs):
    
    nnet=pickle.load(open(model,'rb'))
    data=pickle.load(open(egs,'rb'))
    
    x=Variable(torch.from_numpy(data).float())
    x=nnet(x)
    post=softmax(x.data.numpy())
    
    return post

if __name__=='__main__':
    
    args=get_args()
    
    print_log('Obtaining the data files from {egs_dir}'.format(egs_dir=args.egs_dir))
    files=get_data_files(args.egs_dir)
    
    print_log('Obtaining posteriors')
    count=1
    for i in files:
        post=get_post(args.model,i)
        pickle.dump(post,open(join(args.post_dir,'data.'+str(count)+'.post'),'wb'))
        count+=1
   
    print_log('Finished obtaining posteriors from {egs_dir}'.format(egs_dir=args.egs_dir))
コード例 #6
0
        train_labels = fetch_labels(data_train, keys)

        if nnet_type == 'cnn':
            pic_dim = int(np.sqrt(dim))
            train_data = np.reshape(train_data, (-1, pic_dim, pic_dim))
            train_data = train_data[:, np.newaxis, :, :]

        with open(join(egs_dir, 'train', 'data.' + str(batch) + '.egs'),
                  'wb') as fid:
            pickle.dump(train_data, fid)

        with open(join(egs_dir, 'train', 'labels.' + str(batch) + '.egs'),
                  'wb') as fid:
            pickle.dump(train_labels, fid)

    print_log('Finished generating nnet training examples')


if __name__ == '__main__':

    args = get_args()

    if args.nnet_type != 'vanilla' and args.nnet_type != 'cnn':
        sys.exit(
            '%s: Nnet type %s is not supported by pyspeech, exiting script!' %
            (sys.argv[0], args.nnet_type))
    print_log('Generating examples for nnet training')

    get_egs(args.data_train, args.data_test, args.egs_dir, args.split_num,
            args.nnet_type)
コード例 #7
0
        with open(res_file, 'w') as f:
            f.write('Test set Frame Error Rate: %.2f %%' % (cv_er * 100))

        with open(outmodel, 'wb') as fid:
            pickle.dump(model, fid)


if __name__ == '__main__':

    args = get_args()
    gpu_id = get_device_id()

    if gpu_id != -1:
        print('%s: Using GPU device %d for nnet' % (sys.argv[0], gpu_id))
    else:
        print_log('Training nnet   on sinlge CPU, this will take some time!')
    # Activation Function

    if args.activation == 'sigmoid':
        activ = nn.Sigmoid()
    elif args.activation == 'tanh':
        activ = nn.Tanh()
    elif args.activation == 'relu':
        activ = nn.ReLU()
    else:
        sys.exit('%s: The activation function %s is invalid, exiting script!' %
                 (sys.argv[0], args.activation))

    print_log('Defining nnet model')

    with open(join(args.egs_dir, 'dim'), 'r') as fid: