コード例 #1
0
ファイル: build_dataset.py プロジェクト: tae-jun/sampleaudio
def process_dataset(dataset_dir, config, df):
    # Create a directory for outputs.
    os.makedirs(mkpath(dataset_dir, 'tfrecord'), exist_ok=True)

    # Create a pool for multi-processing.
    # The number of processes will be set as same as the number of cpus.
    with Pool(processes=None) as pool:
        for split in ['train', 'val', 'test']:
            print(f'=> Processing split "{split}".')
            df_split = df[df['split'] == split]
            shards = df_split.shard.unique()
            for shard in sorted(shards):
                df_split_shard = df_split[df_split['shard'] == shard]
                filename = f'{split}-{shard + 1:04d}-{len(shards):04d}.tfrecord'
                filepath = mkpath(dataset_dir, 'tfrecord', filename)
                with tf.python_io.TFRecordWriter(filepath) as writer:
                    list_args = [(row, config, split)
                                 for _, row in df_split_shard.iterrows()]
                    for i, examples in enumerate(
                            pool.imap(process_audio, list_args)):
                        for example in examples:
                            writer.write(example.SerializeToString())
                        progress = int(round((i + 1) / len(list_args) * 100))
                        print(
                            f'\rShard ({shard+1:04d}/{len(shards):04d}): {progress:3d}%',
                            end='',
                            flush=True)
                print()
コード例 #2
0
ファイル: eval.py プロジェクト: tae-jun/sampleaudio
def main(args):
  args.model_path = mkpath(args.model_path)
  args.dataset = args.dataset or args.model_path.split('/')[-2].split('-')[1]  # extract dataset name from train_dir.

  if args.dataset == 'mtt':
    config = MTT_CONFIG
    classes = data.mtt.CLASSES
  elif args.dataset == 'scd':
    config = SCD_CONFIG
    classes = data.scd.CLASSES
  elif args.dataset == 'dcs':
    config = DCS_CONFIG
    classes = data.dcs.CLASSES
  else:
    raise Exception('Not implemented.')

  # Create training, validation, and test datasets.
  dataset_path = mkpath(args.data_dir, args.dataset, 'tfrecord')
  dataset_test = create_datasets(dataset_path, args.batch_size, args.num_readers, config, only_test=True)

  # Load the trained model.
  model = tf.keras.models.load_model(args.model_path,
                                     custom_objects={'AudioVarianceScaling': AudioVarianceScaling, 'tf': tf})

  # Evaluate
  evaluate(model, dataset_test, config, classes=classes)
コード例 #3
0
ファイル: train.py プロジェクト: tae-jun/sampleaudio
def main(args):
  print(f'=> Dataset: {args.dataset}')
  if args.dataset == 'mtt':
    config = MTT_CONFIG
  elif args.dataset == 'scd':
    config = SCD_CONFIG
  elif args.dataset == 'dcs':
    config = DCS_CONFIG
  else:
    raise Exception(f'Not implemented dataset: {args.dataset}')

  dataset_path = mkpath(args.data_dir, args.dataset)
  tfrecord_path = f'{dataset_path}/tfrecord'

  # Configure the model.
  model_config = ModelConfig(block=args.block, amplifying_ratio=args.amplifying_ratio, multi=args.multi,
                             num_blocks=config.num_blocks, dropout=args.dropout, activation=config.activation,
                             num_classes=config.num_classes)

  # Set the training directory.
  args.train_dir = mkpath(args.log_dir, datetime.now().strftime('%Y%m%d_%H%M%S') + f'-{args.dataset}')
  if args.name is None:
    args.name = model_config.get_signature()
  args.train_dir += '-' + args.name
  os.makedirs(args.train_dir, exist_ok=False)
  print('=> Training directory: ' + args.train_dir)

  # Create training, validation, and test datasets.
  dataset_train, dataset_val, dataset_test = create_datasets(tfrecord_path, args.batch_size, args.num_readers, config)

  model = SampleCNN(model_config)
  model_config.print_summary()

  num_params = int(sum([K.count_params(p) for p in set(model.trainable_weights)]))
  print(f'=> #params: {num_params:,}')

  for stage in range(args.num_stages):
    print(f'=> Stage {stage}')
    # Set the learning rate of current stage
    lr = args.lr * (args.lr_decay ** stage)
    # Train the network.
    train(model, lr, dataset_train, dataset_val, config, args)
    # Load the best model.
    model = tf.keras.models.load_model(f'{args.train_dir}/best.h5',
                                       custom_objects={'AudioVarianceScaling': AudioVarianceScaling, 'tf': tf})
    # Evaluate.
    rocauc, prauc, acc, f1 = evaluate(model, dataset_test, config)

  # Change the file name of the best checkpoint with the scores.
  os.rename(f'{args.train_dir}/best.h5', f'{args.train_dir}/final-auc_{rocauc:.6f}-acc_{acc:.6f}-f1_{f1:.6f}.h5')
  # Report the final scores.
  print(f'=> FINAL SCORES [{args.dataset}] {args.name}: '
        f'rocauc={rocauc:.6f}, acc={acc:.6f}, f1={f1:.6f}, prauc={prauc:.6f}')

  model_config.print_summary()

  return rocauc, prauc, acc, f1
コード例 #4
0
def make_dataset_info(dataset_dir, num_audios_per_shard):
    df_train = read_csv(
        mkpath(dataset_dir, 'raw/groundtruth_weak_label_training_set.csv'))
    df_test = read_csv(
        mkpath(dataset_dir, 'raw/groundtruth_weak_label_testing_set.csv'))
    df_eval = read_csv(
        mkpath(dataset_dir, 'raw/groundtruth_weak_label_evaluation_set.csv'))

    df_train['path'] = [
        mkpath(dataset_dir, f'raw/{DIR_TRAIN}/Y{f}') for f in df_train['file']
    ]
    df_test['path'] = [
        mkpath(dataset_dir, f'raw/{DIR_TEST}/Y{f}') for f in df_test['file']
    ]
    df_eval['path'] = [
        mkpath(dataset_dir, f'raw/{DIR_EVAL}/Y{f}') for f in df_eval['file']
    ]

    df_train = pd.concat([df_train, df_test])

    # Split validation set.
    val_files = []
    for c in CLASSES:
        df_class = df_train[df_train['label'] == c]
        val_files += df_class.sample(frac=0.1,
                                     random_state=123)['file'].tolist()
    val_files = list(set(val_files))

    is_val = df_train['file'].isin(val_files)
    df_val = df_train[is_val].assign(split='val')
    df_train = df_train[~is_val].assign(split='train')
    df_eval = df_eval.assign(split='test')

    df = pd.concat([df_train, df_val, df_eval])

    # Encode labels.
    label = df.groupby('file')['label'].apply(list)
    label.iloc[:] = [encode(l) for l in label]
    label = label.to_frame().reset_index()
    df = df.drop_duplicates('file').drop('label', axis=1).merge(label,
                                                                on='file')

    # Shuffle and shard.
    df = shuffle(df, random_state=123)
    for split in ['train', 'val', 'test']:
        num_audios = sum(df['split'] == split)
        num_shards = num_audios // num_audios_per_shard
        num_remainders = num_audios % num_audios_per_shard

        shards = np.tile(np.arange(num_shards), num_audios_per_shard)
        shards = np.concatenate(
            [shards, np.arange(num_remainders) % num_shards])
        shards = np.random.permutation(shards)

        df.loc[df['split'] == split, 'shard'] = shards

    df['shard'] = df['shard'].astype(int)

    return df
コード例 #5
0
ファイル: build_dataset.py プロジェクト: tae-jun/sampleaudio
def main(args):
    dataset_dir = mkpath(args.data_dir, args.dataset)

    if args.dataset == 'mtt':
        config = data.config.MTT_CONFIG
        df = data.mtt.make_dataset_info(dataset_dir,
                                        config.num_audios_per_shard)
    elif args.dataset == 'scd':
        config = data.config.SCD_CONFIG
        df = data.scd.make_dataset_info(dataset_dir,
                                        config.num_audios_per_shard)
    elif args.dataset == 'dcs':
        config = data.config.DCS_CONFIG
        df = data.dcs.make_dataset_info(dataset_dir,
                                        config.num_audios_per_shard)
    else:
        raise Exception('Not implemented dataset: ' + args.dataset)

    process_dataset(dataset_dir, config, df)
コード例 #6
0
ファイル: scd.py プロジェクト: tae-jun/sampleaudio
def load_audio_paths(dataset_dir):
    audio_paths = glob(mkpath(dataset_dir, 'raw/*/*.wav'))
    noise_paths = glob(mkpath(dataset_dir, 'raw/_background_noise_/*.wav'))

    with open(mkpath(dataset_dir, 'raw/validation_list.txt')) as f:
        val_paths = f.read().splitlines()
        val_paths = [mkpath(dataset_dir, 'raw', path) for path in val_paths]

    with open(mkpath(dataset_dir, 'raw/testing_list.txt')) as f:
        test_paths = f.read().splitlines()
        test_paths = [mkpath(dataset_dir, 'raw', path) for path in test_paths]

    # Remove validation, test set, and noises from the training set.
    train_paths = list(
        set(audio_paths) - set(val_paths) - set(test_paths) - set(noise_paths))

    # Sort paths.
    train_paths.sort(), val_paths.sort(), test_paths.sort()

    return train_paths, val_paths, test_paths