コード例 #1
0
def calculate_scalar(args):
    '''Calculate and write out scalar. 
    
    Args:
      dataset_dir: string
      workspace: string
      data_type: 'train_weak'
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arguments & parameters
    workspace = args.workspace
    mini_data = args.mini_data
    data_type = args.data_type
    assert data_type == 'train_weak', 'We only support using train_weak data ' \
        'to calculate scalar. '

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    relative_name = get_relative_path_no_extension(data_type)

    feature_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(relative_name))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(relative_name))

    create_folder(os.path.dirname(scalar_path))

    # Load data
    load_time = time.time()

    with h5py.File(feature_path, 'r') as hf:
        features = hf['feature'][:]

    # Calculate scalar
    features = np.concatenate(features, axis=0)
    (mean, std) = calculate_scalar_of_tensor(features)

    with h5py.File(scalar_path, 'w') as hf:
        hf.create_dataset('mean', data=mean, dtype=np.float32)
        hf.create_dataset('std', data=std, dtype=np.float32)

    print('All features: {}'.format(features.shape))
    print('mean: {}'.format(mean))
    print('std: {}'.format(std))
    print('Write out scalar to {}'.format(scalar_path))
コード例 #2
0
def calculate_scalar(args):
    '''Calculate and write out scalar of features. 
    
    Args:
      workspace: string
      subtask: 'a' | 'b' | 'c'
      data_type: 'train'
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arguments & parameters
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    mini_data = args.mini_data

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    sub_dir = get_subdir(subtask, data_type)

    feature_path = os.path.join(
        workspace, 'features_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))

    scalar_path = os.path.join(
        workspace, 'scalars_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    create_folder(os.path.dirname(scalar_path))

    # Load data
    load_time = time.time()

    with h5py.File(feature_path, 'r') as hf:
        features = hf['feature_side'][:]

    # Calculate scalar
    features = np.concatenate(features, axis=0)
    (mean, std) = calculate_scalar_of_tensor(features)

    with h5py.File(scalar_path, 'w') as hf:
        hf.create_dataset('mean', data=mean, dtype=np.float32)
        hf.create_dataset('std', data=std, dtype=np.float32)

    print('All features: {}'.format(features.shape))
    print('mean: {}'.format(mean))
    print('std: {}'.format(std))
    print('Write out scalar to {}'.format(scalar_path))
コード例 #3
0
def calculate_scalar(args):
    '''Calculate and write out scalar of development data. 
    
    Args:
      dataset_dir: string
      workspace: string
      audio_type: 'foa' | 'mic'
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arguments & parameters
    workspace = args.workspace
    audio_type = args.audio_type
    mini_data = args.mini_data
    data_type = 'dev'

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    features_dir = os.path.join(
        workspace, 'features',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type,
                                                   data_type,
                                                   frames_per_second,
                                                   mel_bins))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}{}_{}_logmel_{}frames_{}melbins'.format(prefix, audio_type,
                                                   data_type,
                                                   frames_per_second,
                                                   mel_bins), 'scalar.h5')

    create_folder(os.path.dirname(scalar_path))

    # Load data
    load_time = time.time()
    feature_names = os.listdir(features_dir)
    all_features = []

    for feature_name in feature_names:
        feature_path = os.path.join(features_dir, feature_name)

        with h5py.File(feature_path, 'r') as hf:
            feature = hf['feature'][:]
            all_features.append(feature)

    print('Load feature time: {:.3f} s'.format(time.time() - load_time))

    # Calculate scalar
    all_features = np.concatenate(all_features, axis=1)
    (mean, std) = calculate_scalar_of_tensor(all_features)

    with h5py.File(scalar_path, 'w') as hf:
        hf.create_dataset('mean', data=mean, dtype=np.float32)
        hf.create_dataset('std', data=std, dtype=np.float32)

    print('All features: {}'.format(all_features.shape))
    print('mean: {}'.format(mean))
    print('std: {}'.format(std))
    print('Write out scalar to {}'.format(scalar_path))
コード例 #4
0
def calculate_scalar(args):
    '''Calculate and write out scalar of features. 
    
    Args:
      workspace: string
      subtask: 'a' | 'b' | 'c'
      data_type: 'train'
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arguments & parameters
    # workspace = args.workspace
    # subtask = args.subtask
    # data_type = args.data_type
    # mini_data = args.mini_data
    
    workspace = 'D:/Project/DCASE_test'
    subtask = 'a'
    data_type = 'development'
    mini_data = False
    
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
    
    sub_dir = get_subdir(subtask, data_type)
    
    feature_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
    create_folder(os.path.dirname(scalar_path))
        
    # Load data
    load_time = time.time()
    
    with h5py.File(feature_path, 'r') as hf:
        features = hf['feature'][:]
        features_gamm = hf['feature_gamm'][:]
        features_mfcc = hf['feature_mfcc'][:]
        features_panns = hf['feature_panns'][:]
    # Calculate scalar
    features = np.concatenate(features[None,:], axis=0)
    (mean, std) = calculate_scalar_of_tensor(features)
    features_gamm = np.concatenate(features_gamm, axis=0)
    (mean_gamm, std_gamm) = calculate_scalar_of_tensor(features_gamm)
    features_mfcc = np.concatenate(features_mfcc, axis=0)
    
    (mean_mfcc, std_mfcc) = calculate_scalar_of_tensor(features_mfcc)
    features_panns = np.concatenate(features_panns[None,:], axis=0)
    (mean_panns, std_panns) = calculate_scalar_of_tensor(features_panns)
    with h5py.File(scalar_path, 'w') as hf:
        hf.create_dataset('mean', data=mean, dtype=np.float32)
        hf.create_dataset('std', data=std, dtype=np.float32)
        hf.create_dataset('mean_gamm', data=mean_gamm, dtype=np.float32)
        hf.create_dataset('std_gamm', data=std_gamm, dtype=np.float32)
        hf.create_dataset('mean_mfcc', data=mean_mfcc, dtype=np.float32)
        hf.create_dataset('std_mfcc', data=std_mfcc, dtype=np.float32)
        hf.create_dataset('mean_panns', data=mean_panns, dtype=np.float32)
        hf.create_dataset('std_panns', data=std_panns, dtype=np.float32)
    
    print('Write out scalar to {}'.format(scalar_path))