コード例 #1
0
def _check_vggish_ckpt_exists():
    """check VGGish checkpoint exists or not."""
    util.maybe_create_directory(FLAGS.vggish_ckpt_dir)
    if not util.is_exists(vggish_ckpt_path):
        url = 'https://storage.googleapis.com/audioset/vggish_model.ckpt'
        util.maybe_download(url, params.VGGISH_CHECKPOINT_DIR)
    if not util.is_exists(vggish_pca_path):
        url = 'https://storage.googleapis.com/audioset/vggish_pca_params.npz'
        util.maybe_download(url, params.VGGISH_CHECKPOINT_DIR)
def arange_urban_sound_file_by_class():
    """Arange urban sound file by it's class."""
    src_paths = '/data1/data/UrbanSound8K-16bit/audio'
    dst_dir = '/data1/data/UrbanSound8K-16bit/audio-classfied'
    CLASSES = [
        'air conditioner', 'car horn', 'children playing', 'dog bark',
        'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren',
        'street music'
    ]
    CLASSES_STRIPED = [c.replace(' ', '_') for c in CLASSES]
    for src in src_paths:
        lbl = urban_labels([src])[0]
        dst = '{dir}/{label}'.format(dir=dst_dir, label=CLASSES_STRIPED[lbl])
        maybe_create_directory(dst)
        maybe_copy_file(
            src, '{dst}/{name}'.format(dst=dst, name=os.path.split(src)[-1]))
def convert_urban_pcm24_to_pcm16():
    """Convert urban sound codec from PCM_24 to PCM_16."""
    src_dir = [
        '/data1/data/UrbanSound8K/audio/fold{:d}'.format(i + 1)
        for i in range(10)
    ]
    dst_dir = [
        '/data1/data/UrbanSound8K-16bit/audio/fold{:d}'.format(i + 1)
        for i in range(10)
    ]
    converted_wav_paths = []
    for dsrc, ddst in zip(src_dir, dst_dir):
        maybe_create_directory(ddst)
        wav_files = filter(lambda FP: FP if FP.endswith('.wav') else None,
                           [FP for FP in os.listdir(dsrc)])
        for wav_file in wav_files:
            src_wav, dst_wav = os.path.join(dsrc, wav_file), os.path.join(
                ddst, wav_file)
            convert_wav(src_wav, dst_wav, subtype='PCM_16')
            converted_wav_paths.append(dst_wav)
            print('converted count:', len(converted_wav_paths))
    print(converted_wav_paths, len(converted_wav_paths))
コード例 #4
0
def _create_records():
    """Create audio `train`, `test` and `val` records file."""
    tf.logging.info("Create records..")
    util.maybe_create_directory(FLAGS.records_dir)
    _check_vggish_ckpt_exists()
    wav_files, wav_labels = _wav_files_and_labels()
    tf.logging.info('Possible labels: {}'.format(set(wav_labels)))
    train, test, val = util.train_test_val_split(wav_files, wav_labels)
    with VGGishExtractor(vggish_ckpt_path, vggish_pca_path,
                         params.VGGISH_INPUT_TENSOR_NAME,
                         params.VGGISH_OUTPUT_TENSOR_NAME) as ve:

        train_x, train_y = train
        ve.create_records(train_records_path, train_x, train_y)

        test_x, test_y = test
        ve.create_records(test_records_path, test_x, test_y)

        val_x, val_y = val
        ve.create_records(val_records_path, val_x, val_y)
        tf.logging.info('Dataset size: Train-{} Test-{} Val-{}'.format(
            len(train_y), len(test_y), len(val_y)))
コード例 #5
0
 def create_records(self, record_path, wav_files, wav_labels):
     """Create TF Records from wav files and corresponding labels."""
     record_dir = os.path.dirname(record_path)
     maybe_create_directory(record_dir)
     writer = tf.python_io.TFRecordWriter(record_path)
     N = len(wav_labels)
     n = 1
     for (wav_file, wav_label) in zip(wav_files, wav_labels):
         tf.logging.info('[{}/{}] Extracting VGGish feature:'
             ' label: {} - {}'.format(n, N, wav_label, wav_file))
         n += 1
         features = self.wavfile_to_features(wav_file)
         num_features = features.shape[0] # one feature for one second
         if num_features == 0:
             tf.logging.warning('No vggish features:'
                 ' label: {} - {}'.format(wav_label, wav_file))
             continue
         cur_wav_labels = [wav_label] * num_features
         for (f, l) in zip(features, cur_wav_labels):
             example = encodes_example(np.float64(f), np.int64(l))
             writer.write(example.SerializeToString())
     writer.close()
コード例 #6
0
test_records_path = os.path.join(FLAGS.records_dir,
                                 params.TF_RECORDS_TEST_NAME)

val_records_path = os.path.join(FLAGS.records_dir, params.TF_RECORDS_VAL_NAME)

vggish_ckpt_path = os.path.join(FLAGS.vggish_ckpt_dir,
                                params.VGGISH_CHECKPOINT_NAME)

vggish_pca_path = os.path.join(FLAGS.vggish_ckpt_dir,
                               params.VGGISH_PCA_PARAMS_NAME)

tensorboard_dir = os.path.join(params.TENSORBOARD_DIR, FLAGS.train_name)

audio_ckpt_dir = os.path.join(FLAGS.audio_ckpt_dir, FLAGS.train_name)

util.maybe_create_directory(tensorboard_dir)
util.maybe_create_directory(audio_ckpt_dir)


def _add_triaining_graph():
    with tf.Graph().as_default() as graph:
        logits = define_audio_slim(training=True)
        tf.summary.histogram('logits', logits)
        # define training subgraph
        with tf.variable_scope('train'):
            labels = tf.placeholder(tf.float32,
                                    shape=[None, params.NUM_CLASSES],
                                    name='labels')
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits, labels=labels, name='cross_entropy')
            loss = tf.reduce_mean(cross_entropy, name='loss_op')