Ejemplo n.º 1
0
def calculate_scalar(args):
    '''Calculate and write out scalar of features. 
    
    Args:
      workspace: string
      subtask: 'a' | 'b' | 'c'
      data_type: 'train'
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arguments & parameters
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    mini_data = args.mini_data

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    sub_dir = get_subdir(subtask, data_type)

    feature_path = os.path.join(
        workspace, 'features_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))

    scalar_path = os.path.join(
        workspace, 'scalars_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    create_folder(os.path.dirname(scalar_path))

    # Load data
    load_time = time.time()

    with h5py.File(feature_path, 'r') as hf:
        features = hf['feature_side'][:]

    # Calculate scalar
    features = np.concatenate(features, axis=0)
    (mean, std) = calculate_scalar_of_tensor(features)

    with h5py.File(scalar_path, 'w') as hf:
        hf.create_dataset('mean', data=mean, dtype=np.float32)
        hf.create_dataset('std', data=std, dtype=np.float32)

    print('All features: {}'.format(features.shape))
    print('mean: {}'.format(mean))
    print('std: {}'.format(std))
    print('Write out scalar to {}'.format(scalar_path))
Ejemplo n.º 2
0
    def _load_stat(model_type, subtask, source):
        sub_dir = get_subdir(subtask, data_type)

        validate_statistics_path = os.path.join(
            workspace, 'statistics', filename,
            '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                                 mel_bins),
            '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold),
            model_type, 'validate_statistics.pickle')

        validate_statistics_dict = cPickle.load(
            open(validate_statistics_path, 'rb'))

        accuracy_matrix = np.array(
            [stat['accuracy'] for stat in validate_statistics_dict[source]])

        confusion_matrix = np.array([
            stat['confusion_matrix']
            for stat in validate_statistics_dict[source]
        ])

        legend = '{}'.format(model_type)

        if subtask in ['a', 'b']:
            accuracy = np.mean(accuracy_matrix, axis=-1)
            results = {'accuracy': accuracy, 'legend': legend}
            print('Subtask: {}, Source: {}, Model: {} accuracy: {:.3f}'.format(
                subtask, source, model_type, accuracy[-1]))

        elif subtask == 'c':
            accuracy = np.mean(accuracy_matrix[:, 0:-1], axis=-1)
            unknown_accuracy = accuracy_matrix[:, -1]
            results = {
                'accuracy': accuracy,
                'unknown_accuracy': unknown_accuracy,
                'legend': legend
            }

            print('Subtask: {}, Source: {}, Model: {}, accuracy: {:.3f}, '
                  'Unknown accuracy: {:.3f}'.format(subtask, source,
                                                    model_type, accuracy[-1],
                                                    unknown_accuracy[-1]))

        return results
Ejemplo n.º 3
0
    def load_imgsrc_local(self, img_dir):
        print("为timecard加载图片超链接")
        if not os.path.isdir(img_dir):
            print("文件夹不存在 %s" % img_dir)
            return

        from utilities import get_subdir
        from config import image_format

        dirs = get_subdir(img_dir)
        for each_dir in dirs:
            files = [
                os.path.join(each_dir, f) for f in os.listdir(each_dir)
                if os.path.isfile(os.path.join(each_dir, f))
            ]
            files = [f for f in files if f.split('.')[-1] in image_format]
            if files:
                weibo_title = int(each_dir.split(os.path.sep)[-2])
                self.img_path[weibo_title - 1] = files[0]
Ejemplo n.º 4
0
def calculate_feature_for_all_audio_files(args):
    '''Calculate feature of audio files and write out features to a hdf5 file. 
    
    Args:
      dataset_dir: string
      workspace: string
      subtask: 'a' | 'b' | 'c'
      data_type: 'development' | 'evaluation'
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arguments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    mini_data = args.mini_data

    sample_rate = config.sample_rate
    window_size = config.window_size
    hop_size = config.hop_size
    mel_bins = config.mel_bins
    fmin = config.fmin
    fmax = config.fmax
    frames_per_second = config.frames_per_second
    frames_num = config.frames_num
    total_samples = config.total_samples
    lb_to_idx = config.lb_to_idx

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    sub_dir = get_subdir(subtask, data_type)
    metadata_path = os.path.join(dataset_dir, sub_dir, 'meta.csv')
    audios_dir = os.path.join(dataset_dir, sub_dir, 'audio')

    feature_path = os.path.join(
        workspace, 'features_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    create_folder(os.path.dirname(feature_path))

    # Feature extractor
    feature_extractor = LogMelExtractor(sample_rate=sample_rate,
                                        window_size=window_size,
                                        hop_size=hop_size,
                                        mel_bins=mel_bins,
                                        fmin=fmin,
                                        fmax=fmax)

    # Read metadata
    meta_dict = read_metadata(metadata_path)

    # Extract features and targets
    if mini_data:
        mini_num = 10
        total_num = len(meta_dict['audio_name'])
        random_state = np.random.RandomState(1234)
        indexes = random_state.choice(total_num, size=mini_num, replace=False)
        meta_dict['audio_name'] = meta_dict['audio_name'][indexes]
        meta_dict['scene_label'] = meta_dict['scene_label'][indexes]
        meta_dict['identifier'] = meta_dict['identifier'][indexes]
        meta_dict['source_label'] = meta_dict['source_label'][indexes]

    print('Extracting features of all audio files ...')
    extract_time = time.time()

    # Hdf5 file for storing features and targets
    hf = h5py.File(feature_path, 'w')

    hf.create_dataset(
        name='audio_name',
        data=[audio_name.encode() for audio_name in meta_dict['audio_name']],
        dtype='S80')

    if 'scene_label' in meta_dict.keys():
        hf.create_dataset(name='scene_label',
                          data=[
                              scene_label.encode()
                              for scene_label in meta_dict['scene_label']
                          ],
                          dtype='S24')

    if 'identifier' in meta_dict.keys():
        hf.create_dataset(name='identifier',
                          data=[
                              identifier.encode()
                              for identifier in meta_dict['identifier']
                          ],
                          dtype='S24')

    if 'source_label' in meta_dict.keys():
        hf.create_dataset(name='source_label',
                          data=[
                              source_label.encode()
                              for source_label in meta_dict['source_label']
                          ],
                          dtype='S8')

    hf.create_dataset(name='feature_side',
                      shape=(0, frames_num, mel_bins),
                      maxshape=(None, frames_num, mel_bins),
                      dtype=np.float32)

    for (n, audio_name) in enumerate(meta_dict['audio_name']):
        audio_path = os.path.join(audios_dir, audio_name)
        print(n, audio_path)

        # Read audio
        (audio, _) = read_side_audio(audio_path=audio_path,
                                     target_fs=sample_rate)

        # Pad or truncate audio recording to the same length
        audio = pad_truncate_sequence(audio, total_samples)

        # Extract feature
        feature = feature_extractor.transform(audio)

        # Remove the extra log mel spectrogram frames caused by padding zero
        feature = feature[0:frames_num]

        hf['feature_side'].resize((n + 1, frames_num, mel_bins))
        hf['feature_side'][n] = feature

    hf.close()

    print('Write hdf5 file to {} using {:.3f} s'.format(
        feature_path,
        time.time() - extract_time))
Ejemplo n.º 5
0
def inference_evaluation(args):
    '''Inference on evaluation data and write out submission file. 
    
    Args: 
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'leaderboard' | 'evaluation'
      workspace: string, directory of workspace
      model_type: string, e.g. 'Cnn_9layers'
      iteration: int
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''
    # Arugments & parameters
    subtask = args.subtask
    data_type = args.data_type
    workspace = args.workspace
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    holdout_fold = 'none'
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
        
    sub_dir = get_subdir(subtask, data_type)
    trained_sub_dir = get_subdir(subtask, 'development')
    
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(trained_sub_dir))
        
    checkpoint_path = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(trained_sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, '{}_iterations.pth'.format(iteration))
    
    submission_path = os.path.join(workspace, 'submissions', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        sub_dir, 'holdout_fold={}'.format(holdout_fold), model_type, 
        '{}_iterations'.format(iteration), 'submission.csv')
    create_folder(os.path.dirname(submission_path))

    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)
        
    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy
        
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])
    
    if cuda:
        model.cuda()
        
    # Data generator
    data_generator = EvaluationDataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        scalar=scalar, 
        batch_size=batch_size)
    
    generate_func = data_generator.generate_evaluation(data_type)

    # Inference
    output_dict = forward(model, generate_func, cuda, return_input=False, 
        return_target=False)

    # Write submission
    write_submission(output_dict, subtask, data_type, submission_path)
Ejemplo n.º 6
0
def inference_validation(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'development'
      workspace: string, directory of workspace
      model_type: string, e.g. 'Cnn_9layers'
      iteration: int
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    subtask = args.subtask
    data_type = args.data_type
    workspace = args.workspace
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    visualize = args.visualize
    filename = args.filename
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    
    sources = get_sources(subtask)
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
        
    sub_dir = get_subdir(subtask, data_type)
    
    train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_train.csv')
        
    validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_evaluate.csv')
                
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    checkpoint_path = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, '{}_iterations.pth'.format(iteration))
    
    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)
        
    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy
        
    #checkpoint = torch.load(checkpoint_path)
    #model.load_state_dict(checkpoint['model'])
    
    if cuda:
        model.cuda()
        
    # Data generator
    data_generator = DataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        train_csv=train_csv, 
        validate_csv=validate_csv, 
        holdout_fold=holdout_fold, 
        scalar=scalar, 
        batch_size=batch_size)
    
    # Evaluator
    evaluator = Evaluator(
        model=model, 
        data_generator=data_generator, 
        subtask=subtask, 
        cuda=cuda)
    
    if subtask in ['a', 'c']:
        evaluator.evaluate(data_type='validate', source='a', verbose=True)
        
    elif subtask == 'b':
        evaluator.evaluate(data_type='validate', source='a', verbose=True)
        evaluator.evaluate(data_type='validate', source='b', verbose=True)
        evaluator.evaluate(data_type='validate', source='c', verbose=True)
    
    # Visualize log mel spectrogram
    if visualize:
        evaluator.visualize(data_type='validate', source='a')
Ejemplo n.º 7
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'development' | 'evaluation'
      holdout_fold: '1' | 'none', set 1 for development and none for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''
    
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    fixed = args.fixed
    finetune = args.finetune
    ite_train = args.ite_train
    ite_eva = args.ite_eva
    ite_store = args.ite_store
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = None      # Number of mini-batches to evaluate on training data
    reduce_lr = True
    
    sources_to_evaluate = get_sources(subtask)
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
    
    sub_dir = get_subdir(subtask, data_type)
    
    train_csv = os.path.join(dataset_dir, sub_dir, 'meta.csv')
        
    validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_evaluate.csv')
                
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(workspace, 'statistics', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, 'validate_statistics.pickle')
    
    create_folder(os.path.dirname(validate_statistics_path))
    
    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)
    
    # Model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        if fixed=='True':
            model = Model(in_domain_classes_num, activation='logsoftmax', fixed=True)
        else :
            model = Model(in_domain_classes_num, activation='logsoftmax', fixed=False)
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy

    if cuda:
        model.cuda()
    
    # Optimizer
    if fixed=='True':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, betas=(0.9, 0.999),
                         eps=1e-08, weight_decay=0., amsgrad=True)
    else :
        optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999),
                               eps=1e-08, weight_decay=0., amsgrad=True)

    if finetune=='True':
        model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/'+model_type+'/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Res38/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn14/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn10/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV2/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV1/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Wavegram_Cnn14/2000_iterations.pth'
        device = torch.device('cuda')
        checkpoint = torch.load(model_path, map_location=device)
        model.load_state_dict(checkpoint['model'])
        
    # Data generator
    data_generator = DataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        train_csv=train_csv, 
        validate_csv=validate_csv, 
        holdout_fold=holdout_fold, 
        scalar=scalar, 
        batch_size=batch_size)
    
    # Evaluator
    evaluator = Evaluator(
        model=model, 
        data_generator=data_generator, 
        subtask=subtask, 
        cuda=cuda)
    
    # Statistics
    validate_statistics_container = StatisticsContainer(validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    
    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():
        
        # Evaluate
        #1800
        if iteration % 200 == 0 and iteration > ite_eva:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            for source in sources_to_evaluate:
                train_statistics = evaluator.evaluate(
                    data_type='train', 
                    source=source, 
                    max_iteration=None, 
                    verbose=False)
            
            if holdout_fold != 'none':
                for source in sources_to_evaluate:
                    validate_statistics = evaluator.evaluate(
                        data_type='validate', 
                        source=source, 
                        max_iteration=None, 
                        verbose=False)

                    validate_statistics_container.append_and_dump(
                        iteration, source, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info(
                'Train time: {:.3f} s, validate time: {:.3f} s'
                ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 200 == 0 and iteration > ite_store:
            checkpoint = {
                'iteration': iteration, 
                'model': model.state_dict(), 
                'optimizer': optimizer.state_dict()}

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))
                
            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))
            
        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.93
        
        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'feature_gamm', 'feature_mfcc', 'feature_panns', 'target']:
                batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)
        
        # Train
#         batch_output,batch_loss = model(batch_data_dict['feature'], batch_data_dict['feature_gamm'], batch_data_dict['feature_mfcc'], batch_data_dict['feature_panns'])
#         loss = loss_func(batch_output, batch_data_dict['target'])
    
        # Using Mixup
        model.train()
        mixed_x1, mixed_x2, mixed_x3, mixed_x4, y_a, y_b, lam = mixup_data(x1=batch_data_dict['feature'], x2=batch_data_dict['feature_gamm'], x3=batch_data_dict['feature_mfcc'], x4=batch_data_dict['feature_panns'], y=batch_data_dict['target'], alpha=0.2)
        batch_output,batch_loss = model(mixed_x1, mixed_x2, mixed_x3, mixed_x4)

        if batch_output.shape[1] == 10: # single scale models
            loss = mixup_criterion(loss_func, batch_output, y_a, y_b, lam)
        else:                  # multi scale models
            losses = []
            for ite in range(batch_output.shape[1]-1):
                loss = mixup_criterion(loss_func, batch_output[:,ite,:], y_a, y_b, lam)
                losses.append(loss)
            loss = sum(losses)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        # 12000 for scratch
        if iteration == ite_train:
            break
            
        iteration += 1
Ejemplo n.º 8
0
def train(args):
    '''Training. Model will be saved after several iterations.

    Args:
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'development' | 'evaluation'
      holdout_fold: '1' | 'none', set 1 for development and none for training
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = None  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    sources_to_evaluate = get_sources(subtask)
    in_domain_classes_num = len(config.labels) - 1

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    sub_dir = get_subdir(subtask, data_type)

    train_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup',
                             'fold1_train.csv')

    validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup',
                                'fold1_evaluate.csv')

    feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    feature_hdf5_path_left = os.path.join(
        workspace, 'features_left',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    feature_hdf5_path_right = os.path.join(
        workspace, 'features_right',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    feature_hdf5_path_side = os.path.join(
        workspace, 'features_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    scalar_path_left = os.path.join(
        workspace, 'scalars_left',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    scalar_path_right = os.path.join(
        workspace, 'scalars_right',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    scalar_path_side = os.path.join(
        workspace, 'scalars_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), '{}'.format(sub_dir),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), '{}'.format(sub_dir),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')

    create_folder(os.path.dirname(validate_statistics_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), '{}'.format(sub_dir),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)
    scalar_left = load_scalar(scalar_path_left)
    scalar_right = load_scalar(scalar_path_right)
    scalar_side = load_scalar(scalar_path_side)
    # Model
    Model = eval(model_type)

    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss

    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(
        feature_hdf5_path=feature_hdf5_path,
        feature_hdf5_path_left=feature_hdf5_path_left,
        feature_hdf5_path_right=feature_hdf5_path_right,
        feature_hdf5_path_side=feature_hdf5_path_side,
        train_csv=train_csv,
        validate_csv=validate_csv,
        scalar=scalar,
        scalar_left=scalar_left,
        scalar_right=scalar_right,
        scalar_side=scalar_side,
        batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          subtask=subtask,
                          cuda=cuda)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict, batch_data_dict_left, batch_data_dict_right, batch_data_dict_side in data_generator.generate_train(
    ):

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            for source in sources_to_evaluate:
                train_statistics = evaluator.evaluate(data_type='train',
                                                      source=source,
                                                      max_iteration=None,
                                                      verbose=False)

            for source in sources_to_evaluate:
                validate_statistics = evaluator.evaluate(data_type='validate',
                                                         source=source,
                                                         max_iteration=None,
                                                         verbose=False)

                validate_statistics_container.append_and_dump(
                    iteration, source, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        for key in batch_data_dict_left.keys():
            if key in ['feature_left', 'target']:
                batch_data_dict_left[key] = move_data_to_gpu(
                    batch_data_dict_left[key], cuda)

        for key in batch_data_dict_right.keys():
            if key in ['feature_right', 'target']:
                batch_data_dict_right[key] = move_data_to_gpu(
                    batch_data_dict_right[key], cuda)

        for key in batch_data_dict_side.keys():
            if key in ['feature_side', 'target']:
                batch_data_dict_side[key] = move_data_to_gpu(
                    batch_data_dict_side[key], cuda)

        # Train
        model.train()
        batch_output = model(data=batch_data_dict['feature'],
                             data_left=batch_data_dict_left['feature_left'],
                             data_right=batch_data_dict_right['feature_right'],
                             data_side=batch_data_dict_side['feature_side'])

        # loss
        loss = loss_func(batch_output, batch_data_dict['target'])

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 15000:
            break

        iteration += 1
Ejemplo n.º 9
0
def calculate_feature_for_all_audio_files(args):
    '''Calculate feature of audio files and write out features to a hdf5 file. 
    
    Args:
      dataset_dir: string
      workspace: string
      subtask: 'a' | 'b' | 'c'
      data_type: 'development' | 'evaluation'
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arguments & parameters
    # dataset_dir = args.dataset_dir
    # workspace = args.workspace
    # subtask = args.subtask
    # data_type = args.data_type
    # mini_data = args.mini_data
    
    dataset_dir = 'D:/Project/DCASE_test/Data'
    workspace = 'D:/Project/DCASE_test'
    subtask = 'a'
    data_type = 'development'
    mini_data = False
    
    sample_rate = config.sample_rate
    window_size = config.window_size
    hop_size = config.hop_size
    mel_bins = config.mel_bins
    fmin = config.fmin
    fmax = config.fmax
    frames_per_second = config.frames_per_second
    frames_num = config.frames_num
    total_samples = config.total_samples
    lb_to_idx = config.lb_to_idx
    mfcc_frames = config.mfcc_frames
    n_mfcc = config.n_mfcc
    mfcc_hop_size = config.mfcc_hop_size
    gamm_frames = config.gamm_frames
    n_gamm = config.n_gamm
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
        
    sub_dir = get_subdir(subtask, data_type)
    audios_dir = os.path.join(dataset_dir, sub_dir, 'audio')

    if data_type == 'development':
        metadata_path = os.path.join(dataset_dir, sub_dir, 'meta.csv')
    elif data_type == 'leaderboard':
        metadata_path = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'test.csv')
    else:
        raise Exception('Incorrect data_type!')
    
    feature_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
    create_folder(os.path.dirname(feature_path))
        
    # Feature extractor
    feature_extractor = LogMelExtractor(
        sample_rate=sample_rate, 
        window_size=window_size, 
        hop_size=hop_size, 
        mel_bins=mel_bins, 
        fmin=fmin, 
        fmax=fmax)

    # Read metadata
    meta_dict = read_metadata(metadata_path)

    # Extract features and targets 
    if mini_data:
        mini_num = 300
        total_num = len(meta_dict['audio_name'])
        random_state = np.random.RandomState(1234)
        indexes = random_state.choice(total_num, size=mini_num, replace=False)
        for key in meta_dict.keys():
            meta_dict[key] = meta_dict[key][indexes]
        
    print('Extracting features of all audio files ...')
    extract_time = time.time()
    
    # Hdf5 file for storing features and targets
    hf = h5py.File(feature_path, 'w')

    hf.create_dataset(
        name='audio_name', 
        data=[audio_name.encode() for audio_name in meta_dict['audio_name']], 
        dtype='S80')

    if 'scene_label' in meta_dict.keys():
        hf.create_dataset(
            name='scene_label', 
            data=[scene_label.encode() for scene_label in meta_dict['scene_label']], 
            dtype='S24')
            
    if 'identifier' in meta_dict.keys():
        hf.create_dataset(
            name='identifier', 
            data=[identifier.encode() for identifier in meta_dict['identifier']], 
            dtype='S24')
            
    if 'source_label' in meta_dict.keys():
        hf.create_dataset(
            name='source_label', 
            data=[source_label.encode() for source_label in meta_dict['source_label']], 
            dtype='S8')

    hf.create_dataset(
        name='feature', 
        shape=(0, total_samples), 
        maxshape=(None, total_samples), 
        dtype=np.float32)
    hf.create_dataset(
        name='feature_gamm', 
        shape=(0, gamm_frames, n_gamm), 
        maxshape=(None, gamm_frames, n_gamm), 
        dtype=np.float32)
    hf.create_dataset(
        name='feature_mfcc', 
        shape=(0, mfcc_frames, n_mfcc), 
        maxshape=(None, mfcc_frames, n_mfcc), 
        dtype=np.float32)
    hf.create_dataset(
        name='feature_panns', 
        shape=(0, 320000), 
        maxshape=(None, 320000), 
        dtype=np.float32)
    
    for (n, audio_name) in enumerate(meta_dict['audio_name']):
        audio_path = os.path.join(audios_dir, audio_name)
        print(n, audio_path)
        
        # Read audio
        (audio, _) = read_audio(
            audio_path=audio_path, 
            target_fs=sample_rate)
        
        audio = pad_truncate_sequence(audio, total_samples)
        
        (audio_gamm, _) = read_audio_gamm(
            audio_path=audio_path, 
            target_fs=sample_rate)
        fea_gamm, _ = gtg_in_dB(audio_gamm, sample_rate) 
        fea_gamm = fea_gamm.transpose(1, 0)
        sound, fs = librosa.load(audio_path)
        fea_mfcc = librosa.feature.mfcc(y=sound, sr=fs, hop_length=mfcc_hop_size, n_mfcc=n_mfcc)
        fea_mfcc = fea_mfcc.transpose(1, 0)
        (waveform, _) = librosa.core.load(audio_path, sr=32000, mono=True)
        
        feature = feature_extractor.transform(audio)
        feature = feature[0 : frames_num]
        
        hf['feature'].resize((n + 1, total_samples))
        hf['feature'][n] = audio        
        hf['feature_gamm'].resize((n + 1, gamm_frames, n_gamm))
        hf['feature_gamm'][n] = fea_gamm
        hf['feature_mfcc'].resize((n + 1, mfcc_frames, n_mfcc))
        hf['feature_mfcc'][n] = fea_mfcc
        hf['feature_panns'].resize((n + 1, 320000))
        hf['feature_panns'][n] = waveform
            
    hf.close()
        
    print('Write hdf5 file to {} using {:.3f} s'.format(
        feature_path, time.time() - extract_time))
Ejemplo n.º 10
0
def calculate_scalar(args):
    '''Calculate and write out scalar of features. 
    
    Args:
      workspace: string
      subtask: 'a' | 'b' | 'c'
      data_type: 'train'
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arguments & parameters
    # workspace = args.workspace
    # subtask = args.subtask
    # data_type = args.data_type
    # mini_data = args.mini_data
    
    workspace = 'D:/Project/DCASE_test'
    subtask = 'a'
    data_type = 'development'
    mini_data = False
    
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
    
    sub_dir = get_subdir(subtask, data_type)
    
    feature_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
    create_folder(os.path.dirname(scalar_path))
        
    # Load data
    load_time = time.time()
    
    with h5py.File(feature_path, 'r') as hf:
        features = hf['feature'][:]
        features_gamm = hf['feature_gamm'][:]
        features_mfcc = hf['feature_mfcc'][:]
        features_panns = hf['feature_panns'][:]
    # Calculate scalar
    features = np.concatenate(features[None,:], axis=0)
    (mean, std) = calculate_scalar_of_tensor(features)
    features_gamm = np.concatenate(features_gamm, axis=0)
    (mean_gamm, std_gamm) = calculate_scalar_of_tensor(features_gamm)
    features_mfcc = np.concatenate(features_mfcc, axis=0)
    
    (mean_mfcc, std_mfcc) = calculate_scalar_of_tensor(features_mfcc)
    features_panns = np.concatenate(features_panns[None,:], axis=0)
    (mean_panns, std_panns) = calculate_scalar_of_tensor(features_panns)
    with h5py.File(scalar_path, 'w') as hf:
        hf.create_dataset('mean', data=mean, dtype=np.float32)
        hf.create_dataset('std', data=std, dtype=np.float32)
        hf.create_dataset('mean_gamm', data=mean_gamm, dtype=np.float32)
        hf.create_dataset('std_gamm', data=std_gamm, dtype=np.float32)
        hf.create_dataset('mean_mfcc', data=mean_mfcc, dtype=np.float32)
        hf.create_dataset('std_mfcc', data=std_mfcc, dtype=np.float32)
        hf.create_dataset('mean_panns', data=mean_panns, dtype=np.float32)
        hf.create_dataset('std_panns', data=std_panns, dtype=np.float32)
    
    print('Write out scalar to {}'.format(scalar_path))