def write_strong_meta_to_weak_meta(meta_csv, formated_csv):

    create_folder(os.path.dirname(formated_csv))

    # Read meta csv
    df = pd.read_csv(meta_csv, sep='\t')
    df = pd.DataFrame(df)

    dict = {}

    for row in df.iterrows():

        audio_name = row[1]['filename']

        event_label = row[1]['event_label']

        if audio_name not in dict.keys():
            dict[audio_name] = [event_label]

        else:
            if event_label not in dict[audio_name]:
                dict[audio_name].append(event_label)

    # Write weak labels to csv
    f = open(formated_csv, 'w')

    f.write('{}\t{}\n'.format('filename', 'event_labels'))

    for key in dict.keys():
        f.write('{}\t{}\n'.format(key, ','.join(dict[key])))

    f.close()

    print('Write formated_csv to {}'.format(formated_csv))
Exemple #2
0
def create_cross_validation_file(args):
    '''Create and write out cross validation file. 
    
    Args:
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      data_type: 'train_curated' | 'train_noisy'
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    data_type = args.data_type
    folds_num = config.folds_num

    # Paths
    metadata_path = os.path.join(dataset_dir, '{}.csv'.format(data_type))

    cross_validation_path = os.path.join(
        workspace, 'cross_validation_metadata',
        '{}_cross_validation.csv'.format(data_type))
    create_folder(os.path.dirname(cross_validation_path))

    # Read meta data
    df = pd.read_csv(metadata_path, sep=',')

    # Create cross validation file
    new_df = pd.DataFrame()
    new_df['fname'] = df['fname']
    new_df['fold'] = np.arange(len(df)) % folds_num + 1

    new_df.to_csv(cross_validation_path)

    print('Write cross validation file to {}'.format(cross_validation_path))
Exemple #3
0
    def save(self, savename, dirname=None, exc=None):
        """Saves all SAM attributes to a Pickle file.

        Saves all SAM attributes to a Pickle file which can be later loaded
        into an empty SAM object.

        Parameters
        ----------
        savename - string
            The name of the pickle file (not including the file extension) to
            write to.

        dirname - string, optional, default None
            The path/name of the directory in which the Pickle file will be
            saved. If None, the file will be saved to the current working
            directory.

        exc - array-like of strings, optional, default None
            A vector of SAM attributes to exclude from the saved file.
        """
        self._create_dict(exc)

        if (dirname is not None):
            ut.create_folder(dirname + "/")
            f = open(dirname + "/" + savename + ".p", 'wb')
        else:
            f = open(savename + ".p", 'wb')

        pickle.dump(self.pickle_dict, f)
        f.close()
    def optimize_second_stage(self, z_hat, alpha_hat, max_iteration):
        '''Stage 2: Use the initalization obtained from stage 1 and do the 
        optimization on source and filter

        Inputs:
          z_hat: estimated seed, (samples_num, seed_num)
          alpha_hat: estimated filters, (samples_num, filter_len, filter_len)
          max_iteration: int

        Returns:
          z_hat: estimated seed, (samples_num, seed_num)
          alpha_hat: estimated filters, (samples_num, filter_len, filter_len)
          s_hat: estimated source, (samples_num, 1, 28, 28)
          x_hat: estimated mixture
        '''

        second_stage_figures_dir = os.path.join(self.figures_dir,
                                                'second_stage')
        create_folder(second_stage_figures_dir)

        # Optimize
        (z_hat, alpha_hat, s_hat,
         x_hat) = self.optimize(self.x, self.s, z_hat, alpha_hat,
                                max_iteration, second_stage_figures_dir)

        return z_hat, alpha_hat, s_hat, x_hat
def calculate_scalar(args):
    '''Calculate and write out scalar. 
    
    Args:
      dataset_dir: string
      workspace: string
      data_type: 'train_weak'
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arguments & parameters
    workspace = args.workspace
    mini_data = args.mini_data
    data_type = args.data_type
    assert data_type == 'train_weak', 'We only support using train_weak data ' \
        'to calculate scalar. '

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    relative_name = get_relative_path_no_extension(data_type)

    feature_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(relative_name))

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(relative_name))

    create_folder(os.path.dirname(scalar_path))

    # Load data
    load_time = time.time()

    with h5py.File(feature_path, 'r') as hf:
        features = hf['feature'][:]

    # Calculate scalar
    features = np.concatenate(features, axis=0)
    (mean, std) = calculate_scalar_of_tensor(features)

    with h5py.File(scalar_path, 'w') as hf:
        hf.create_dataset('mean', data=mean, dtype=np.float32)
        hf.create_dataset('std', data=std, dtype=np.float32)

    print('All features: {}'.format(features.shape))
    print('mean: {}'.format(mean))
    print('std: {}'.format(std))
    print('Write out scalar to {}'.format(scalar_path))
def split_unbalanced_csv_to_partial_csvs(args):
    """Split unbalanced csv and write out to to part csvs. Each part csv 
    contains up to 50000 ids. 
    """

    unbalanced_csv_path = args.unbalanced_csv
    unbalanced_partial_csvs_dir = args.unbalanced_partial_csvs_dir

    create_folder(unbalanced_partial_csvs_dir)

    with open(unbalanced_csv_path, 'r') as f:
        lines = f.readlines()

    lines = lines[3:]  # Remove head info
    audios_num_per_file = 50000

    files_num = int(np.ceil(len(lines) / float(audios_num_per_file)))

    for r in range(files_num):
        lines_per_file = lines[r * audios_num_per_file:(r + 1) *
                               audios_num_per_file]

        out_csv_path = os.path.join(
            unbalanced_partial_csvs_dir,
            'unbalanced_train_segments_part{:02d}.csv'.format(r))

        with open(out_csv_path, 'w') as f:
            f.write('empty\n')
            f.write('empty\n')
            f.write('empty\n')
            for line in lines_per_file:
                f.write(line)

        print('Write out csv to {}'.format(out_csv_path))
Exemple #7
0
def create_indexes(args):
    """Create indexes a for dataloader to read for training. When users have 
    a new task and their own data, they need to create similar indexes. The 
    indexes contain meta information of "where to find the data for training".
    """

    # Arguments & parameters
    waveforms_hdf5_path = args.waveforms_hdf5_path
    indexes_hdf5_path = args.indexes_hdf5_path

    # Paths
    create_folder(os.path.dirname(indexes_hdf5_path))

    with h5py.File(waveforms_hdf5_path, 'r') as hr:
        with h5py.File(indexes_hdf5_path, 'w') as hw:
            audios_num = len(hr['audio_name'])
            hw.create_dataset('audio_name',
                              data=hr['audio_name'][:],
                              dtype='S20')
            hw.create_dataset('target', data=hr['target'][:], dtype=np.bool)
            hw.create_dataset('hdf5_path',
                              data=[waveforms_hdf5_path.encode()] * audios_num,
                              dtype='S200')
            hw.create_dataset('index_in_hdf5',
                              data=np.arange(audios_num),
                              dtype=np.int32)

    print('Write to {}'.format(indexes_hdf5_path))
Exemple #8
0
def inference_data_to_truncation(args):
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    subdir = args.subdir
    workspace = args.workspace
    holdout_fold = args.holdout_fold
    iteration = args.iteration
    filename = args.filename
    # data_type = args.data_type?

    # Paths
    hdf5_path = os.path.join(workspace, 'features', 'logmel', subdir,
                             'development_hpss_lrad.h5')

    dev_train_csv = os.path.join(dataset_dir, subdir, 'evaluation_setup',
                                 train_file)

    dev_validate_csv = os.path.join(dataset_dir, subdir, 'evaluation_setup',
                                    evaluate_file)

    # 保存截断特征
    truncation_dir = os.path.join(workspace, 'features', 'truncation',
                                  'holdout_fold={}'.format(holdout_fold))
    create_folder(truncation_dir)

    model_path = os.path.join(
        workspace, 'models', subdir, filename,
        'holdout_fold={}'.format(holdout_fold),
        'md_{}_iters_max_attention2_2019-05-31 00:48:09.h5'.format(iteration))

    # model_path = os.path.join(workspace, 'appendixes',
    #                           'md_{}_iters_max_76.2_Vggish_two_attention.h5'.format(iteration))

    hdf5_train_path = os.path.join(truncation_dir, 'train_hpss_l+r_6900.h5')
    hdf5_validate_path = os.path.join(truncation_dir,
                                      'validate_hpss_l+r_6900.h5')
    train_hf = h5py.File(hdf5_train_path, 'w')
    validate_hf = h5py.File(hdf5_validate_path, 'w')

    # load model
    model = keras.models.load_model(model_path)

    layer_output = K.function(
        [model.layers[0].input, K.learning_phase()], [model.layers[-2].output])

    # Data generator
    generator = DataGenerator(hdf5_path=hdf5_path,
                              batch_size=batch_size,
                              dev_train_csv=dev_train_csv,
                              dev_validate_csv=dev_validate_csv)

    create_feature_in_h5py(generator,
                           layer_output,
                           train_hf,
                           data_type='train')
    create_feature_in_h5py(generator,
                           layer_output,
                           validate_hf,
                           data_type='validate')
Exemple #9
0
def calculate_scalar(args):
    '''Calculate and write out scalar of features. 
    
    Args:
      workspace: string
      subtask: 'a' | 'b' | 'c'
      data_type: 'train'
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arguments & parameters
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    mini_data = args.mini_data

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    sub_dir = get_subdir(subtask, data_type)

    feature_path = os.path.join(
        workspace, 'features_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))

    scalar_path = os.path.join(
        workspace, 'scalars_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    create_folder(os.path.dirname(scalar_path))

    # Load data
    load_time = time.time()

    with h5py.File(feature_path, 'r') as hf:
        features = hf['feature_side'][:]

    # Calculate scalar
    features = np.concatenate(features, axis=0)
    (mean, std) = calculate_scalar_of_tensor(features)

    with h5py.File(scalar_path, 'w') as hf:
        hf.create_dataset('mean', data=mean, dtype=np.float32)
        hf.create_dataset('std', data=std, dtype=np.float32)

    print('All features: {}'.format(features.shape))
    print('mean: {}'.format(mean))
    print('std: {}'.format(std))
    print('Write out scalar to {}'.format(scalar_path))
Exemple #10
0
def fuse_sed_results(args):

    workspace = args.workspace
    sed1_path = args.sed1_path
    sed2_path = args.sed2_path

    out_path = os.path.join(workspace, 'submissions', 'fuse_sed_results',
                            'fused_sed.csv')
    create_folder(os.path.dirname(out_path))

    sed1_events = [
        'Frying', 'Blender', 'Running_water', 'Vacuum_cleaner',
        'Electric_shaver_toothbrush'
    ]
    sed2_events = ['Speech', 'Dog', 'Cat', 'Alarm_bell_ringing', 'Dishes']

    new_list = []

    with open(sed1_path, 'r') as f:
        reader = csv.reader(f, delimiter='\t')
        lis = list(reader)

        for li in lis:
            label = li[3]

            if label in sed1_events:
                new_list.append(li)

            elif label in sed2_events:
                pass

            else:
                raise Exception('Error!')

    with open(sed2_path, 'r') as f:
        reader = csv.reader(f, delimiter='\t')
        lis = list(reader)

        for li in lis:
            label = li[3]

            if label in sed2_events:
                new_list.append(li)

            elif label in sed1_events:
                pass

            else:
                raise Exception('Error!')

    f = open(out_path, 'w')  # f = gzip.open('uuu.txt.gz', 'w')

    for li in new_list:
        f.write('\t'.join(li))
        f.write('\n')

    f.close()
    print('Write out to {}'.format(out_path))
Exemple #11
0
def inference_development_data_bottleneck_features(args):
    
    # Arugments & parameters
    workspace = args.workspace
    validate = args.validate
    holdout_fold = args.holdout_fold
    iteration = args.iteration
    cuda = args.cuda
    
    batch_size = 64
    filename = 'main_pytorch'

    # Paths
    dev_hdf5_path = os.path.join(workspace, 'features', 'logmel', 
                                 'development.h5')
    
    if validate:
        model_path = os.path.join(workspace, 'models', filename, 
                                'holdout_fold={}'.format(holdout_fold), 
                                'md_{}_iters.tar'.format(iteration))
                                
        bottleneck_hdf5_path = os.path.join(
            workspace, 'bottlenecks', filename, 
            'dev_holdout_fold={}'.format(holdout_fold), 
            '{}_iters'.format(iteration), 'bottleneck.h5')
                                
    else:
        model_path = os.path.join(workspace, 'models', filename, 'full_train', 
                                  'md_{}_iters.tar'.format(iteration))
                              
        bottleneck_hdf5_path = os.path.join(
            workspace, 'bottlenecks', filename, 'dev_full_train', 
            '{}_iters'.format(iteration), 'bottleneck.h5')
   
    create_folder(os.path.dirname(bottleneck_hdf5_path))

    # Load model
    model = Model()
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['state_dict'])

    if cuda:
        model.cuda()

    # Data generator
    generator = DataGenerator(hdf5_path=dev_hdf5_path,
                                batch_size=batch_size, 
                                validation_csv=None, 
                                holdout_fold=None)

    generate_func = generator.generate_validate(
        data_type='train', shuffle=False, max_iteration=None)
    
    # Write bottleneck features
    write_bottleneck_features_to_hdf5(
        model, generate_func, bottleneck_hdf5_path, cuda, return_target=True)
Exemple #12
0
def inference_leaderboard_data(args):
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    dev_subdir = args.dev_subdir
    leaderboard_subdir = args.leaderboard_subdir
    workspace = args.workspace
    iteration = args.iteration
    filename = args.filename

    labels = config.labels
    ix_to_lb = config.ix_to_lb
    # subdir = args.subdir

    classes_num = len(labels)

    # Paths
    dev_hdf5_path = os.path.join(workspace, 'features', 'logmel', dev_subdir,
                                 'development.h5')

    test_hdf5_path = os.path.join(workspace, 'features', 'logmel',
                                  leaderboard_subdir, 'leaderboard_hpss.h5')

    model_path = os.path.join(workspace, 'models', dev_subdir, filename,
                              'full_train', 'md_{}_iters.h5'.format(iteration))
    print(model_path)

    submission_path = os.path.join(workspace, 'submissions',
                                   leaderboard_subdir, filename,
                                   'iteration={}'.format(iteration),
                                   'submission1.csv')

    create_folder(os.path.dirname(submission_path))

    # Load model
    model = keras.models.load_model(model_path)

    # Data generator
    generator = TestDataGenerator(dev_hdf5_path=dev_hdf5_path,
                                  test_hdf5_path=test_hdf5_path,
                                  batch_size=batch_size)

    generate_func = generator.generate_test()

    # Predict
    dict = forward(model=model,
                   generate_func=generate_func,
                   return_target=False)

    audio_names = dict['audio_name']  # (audios_num,)
    outputs = dict['output']  # (audios_num, classes_num)

    predictions = np.argmax(outputs, axis=-1)  # (audios_num,)

    # Write result to submission csv
    write_leaderboard_submission(submission_path, audio_names, predictions)
Exemple #13
0
def inference_testing_data(args):

    # Arugments & parameters
    workspace = args.workspace
    iteration = args.iteration
    filename = args.filename
    cuda = args.cuda

    validate = True

    # Paths
    dev_hdf5_path = os.path.join(workspace, 'features', 'logmel',
                                 'development.h5')

    test_hdf5_path = os.path.join(workspace, 'features', 'logmel', 'test.h5')

    model_path = os.path.join(workspace, 'models', filename, 'full_train',
                              'md_{}_iters.tar'.format(iteration))

    submission_path = os.path.join(workspace, 'submissions', filename,
                                   'iteration={}'.format(iteration),
                                   'submission.csv')

    create_folder(os.path.dirname(submission_path))

    # Load model
    model = Model()
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['state_dict'])

    if cuda:
        model.cuda()

    # Data generator
    generator = TestDataGenerator(dev_hdf5_path=dev_hdf5_path,
                                  test_hdf5_path=test_hdf5_path,
                                  batch_size=batch_size)

    generate_func = generator.generate_test()

    # Inference
    dict = forward(model=model,
                   generate_func=generate_func,
                   cuda=cuda,
                   return_target=False,
                   return_bottleneck=False)

    outputs = dict['output']
    itemids = dict['itemid']

    # Write out submission file
    write_testing_data_submission_csv(submission_path, itemids, outputs)
    def optimize_first_stage(self, repeats_num, max_iteration):
        '''Stage 1: Set several initialization and select the best 
        initialization. 

        Inputs:
          repeats_num: int, number of initializations
          max_iteration: int

        Returns:
          z_hat: estimated seed, (samples_num, seed_num)
          alpha_hat: estimated filters, (samples_num, filter_len, filter_len)
          s_hat: estimated source, (samples_num, 1, 28, 28)
          x_hat: estimated mixture
        '''

        # Paths
        first_stage_figures_dir = os.path.join(self.figures_dir, 'first_stage')
        create_folder(first_stage_figures_dir)

        # Repeat mixture and target for applying different initializations for
        # a single mixture
        repeated_x = np.repeat(self.x, repeats=repeats_num, axis=0)
        repeated_s = np.repeat(self.s, repeats=repeats_num, axis=0)
        samples_num = repeated_x.shape[0]

        # Initialize seed and filter
        z_hat = np.random.normal(loc=0.,
                                 scale=1,
                                 size=(samples_num, self.seed_num))

        alpha_hat = np.ones(samples_num)

        # Optimize on seed and filter
        (z_hat, alpha_hat, s_hat,
         x_hat) = self.optimize(repeated_x, repeated_s, z_hat, alpha_hat,
                                max_iteration, first_stage_figures_dir)

        # Find the indice of the best initialization for each input mixture
        indices = self.find_best_initialize_indice(x_hat, repeated_x,
                                                   repeats_num)

        for n in range(len(indices)):
            indices[n] = indices[n] + n * repeats_num

        z_hat = z_hat[indices]
        alpha_hat = alpha_hat[indices]
        s_hat = s_hat[indices]
        x_hat = x_hat[indices]

        return z_hat, alpha_hat, s_hat, x_hat
Exemple #15
0
def dcase2017task4(args):
    """Create black list. Black list is a list of audio ids that will be 
    skipped in training. 
    """

    # Augments & parameters
    workspace = args.workspace

    # Paths
    dcase2017task4_dataset_dir = '/vol/vssp/msos/qk/datasets/dcase2017/task4/dataset_root'

    test_weak_csv = os.path.join(
        dcase2017task4_dataset_dir,
        'metadata/groundtruth_weak_label_testing_set.csv')
    evaluation_weak_csv = os.path.join(
        dcase2017task4_dataset_dir,
        'metadata/groundtruth_weak_label_evaluation_set.csv')

    black_list_csv = os.path.join(workspace, 'black_list',
                                  'dcase2017task4.csv')
    create_folder(os.path.dirname(black_list_csv))

    def get_id_sets(csv_path):
        with open(csv_path, 'r') as fr:
            reader = csv.reader(fr, delimiter='\t')
            lines = list(reader)

        ids_set = []

        for line in lines:
            ids_set.append(line[0][0:11])

        ids_set = list(set(ids_set))
        return ids_set

    test_ids_set = get_id_sets(test_weak_csv)
    evaluation_ids_set = get_id_sets(evaluation_weak_csv)

    full_ids_set = test_ids_set + evaluation_ids_set

    # Write black list
    fw = open(black_list_csv, 'w')

    for id in full_ids_set:
        fw.write('{}\n'.format(id))

    print('Write black list to {}'.format(black_list_csv))
def plot_complexity_map(args):
    
    # Paths
    save_out_path = 'results/complexity_mAP.pdf'
    create_folder(os.path.dirname(save_out_path))

    plt.figure(figsize=(5, 5))
    fig, ax = plt.subplots(1, 1)

    model_types = np.array(['Cnn6', 'Cnn10', 'Cnn14', 'ResNet22', 'ResNet38', 'ResNet54', 
        'MobileNetV1', 'MobileNetV2', 'DaiNet', 'LeeNet', 'LeeNet18', 
        'Res1dNet30', 'Res1dNet44', 'Wavegram-CNN', 'Wavegram-\nLogmel-CNN'])
    flops = np.array([21.986, 28.166, 42.220, 30.081, 48.962, 54.563, 3.614, 2.810, 
        30.395, 4.741, 26.369, 32.688, 61.833, 44.234, 53.510])
    mAPs = np.array([0.343, 0.380, 0.431, 0.430, 0.434, 0.429, 0.389, 0.383, 0.295, 
        0.266, 0.336, 0.365, 0.355, 0.389, 0.439])

    sorted_indexes = np.sort(flops)
    ax.scatter(flops, mAPs)

    shift = [[-5.5, -0.004], [1, -0.004], [-1, -0.014], [-2, 0.006], [-7, 0.006], 
        [1, -0.01], [0.5, 0.004], [-1, -0.014], [1, -0.007], [0.8, -0.008], 
        [1, -0.007], [1, 0.002], [-6, -0.015], [1, -0.008], [0.8, 0]]

    for i, model_type in enumerate(model_types):
        ax.annotate(model_type, (flops[i] + shift[i][0], mAPs[i] + shift[i][1]))

    ax.plot(flops[[0, 1, 2]], mAPs[[0, 1, 2]])
    ax.plot(flops[[3, 4, 5]], mAPs[[3, 4, 5]])
    ax.plot(flops[[6, 7]], mAPs[[6, 7]])
    ax.plot(flops[[9, 10]], mAPs[[9, 10]])
    ax.plot(flops[[11, 12]], mAPs[[11, 12]])
    ax.plot(flops[[13, 14]], mAPs[[13, 14]])

    ax.set_xlim(0, 70)
    ax.set_ylim(0.2, 0.5)
    ax.set_xlabel('Multi-load_statisticss (million)', fontsize=15)
    ax.set_ylabel('mAP', fontsize=15)
    ax.tick_params(axis='x', labelsize=12)
    ax.tick_params(axis='y', labelsize=12)

    plt.tight_layout(0, 0, 0)

    plt.savefig(save_out_path)
    print('Write out figure to {}'.format(save_out_path))
Exemple #17
0
    def _start_service(self):
        storage_provider = self.__config['storage_provider'].upper()
        try:
            create_folder(constants.VRT_OUTPUT_FOLDER_NAME)

            if (storage_provider == StorageProvider.FS):
                tiles_output_folder = self.__config['fs'][
                    'internal_outputs_path']
                create_folder(tiles_output_folder)

            elif (storage_provider == StorageProvider.S3):
                set_gdal_s3()

            self.loop.run_until_complete(self.__task_handler.handle_tasks())
        except Exception as e:
            self.log.error(
                'Error occurred during running service: {0}'.format(e))
            probe.liveness = False
Exemple #18
0
def inference_testing_data_bottleneck_features(args):
    
    # Arugments & parameters
    workspace = args.workspace
    iteration = args.iteration
    cuda = args.cuda
    
    validate = True
    batch_size = 64
    filename = 'main_pytorch'
    
    # Paths
    dev_hdf5_path = os.path.join(workspace, 'features', 'logmel', 
                                 'development.h5')
    
    test_hdf5_path = os.path.join(workspace, 'features', 'logmel', 'test.h5')
                              
    model_path = os.path.join(workspace, 'models', filename, 'full_train', 
                                'md_{}_iters.tar'.format(iteration))   
                                
    bottleneck_hdf5_path = os.path.join(
        workspace, 'bottlenecks', filename, 'test_full_train', 
        '{}_iters'.format(iteration), 'bottleneck.h5')
        
    create_folder(os.path.dirname(bottleneck_hdf5_path))

    # Load model
    model = Model()
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['state_dict'])

    if cuda:
        model.cuda()
        
    # Data generator
    generator = TestDataGenerator(dev_hdf5_path=dev_hdf5_path,
                                    test_hdf5_path=test_hdf5_path, 
                                    batch_size=batch_size)

    generate_func = generator.generate_test()
    
    # Write bottleneck features
    write_bottleneck_features_to_hdf5(
        model, generate_func, bottleneck_hdf5_path, cuda, return_target=False)
def plot_classwise_iteration_map(args):
    
    # Paths
    save_out_path = 'results/classwise_iteration_map.pdf'
    create_folder(os.path.dirname(save_out_path))

    # Load statistics
    statistics_dict = pickle.load(open('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_WavegramLogmelCnn_balanced_mixup_bs32.pkl', 'rb'))

    mAP_mat = np.array([e['average_precision'] for e in statistics_dict['test']])
    mAP_mat = mAP_mat[0 : 300, :]   # 300 * 2000 = 600k iterations
    sorted_indexes = np.argsort(config.full_samples_per_class)[::-1]

    fig, axs = plt.subplots(1, 3, figsize=(20, 5))
    ranges = [np.arange(0, 10), np.arange(250, 260), np.arange(517, 527)]
    axs[0].set_ylabel('AP')

    for col in range(0, 3):
        axs[col].set_ylim(0, 1.)
        axs[col].set_xlim(0, 301)
        axs[col].set_xlabel('Iterations')
        axs[col].set_ylabel('AP')
        axs[col].xaxis.set_ticks(np.arange(0, 301, 100))
        axs[col].xaxis.set_ticklabels(['0', '200k', '400k', '600k'])
        lines = []
        for _ix in ranges[col]:
            _label = crop_label(config.labels[sorted_indexes[_ix]]) + \
                ' ({})'.format(add_comma(config.full_samples_per_class[sorted_indexes[_ix]]))
            line, = axs[col].plot(mAP_mat[:, sorted_indexes[_ix]], label=_label)
            lines.append(line)
        box = axs[col].get_position()
        axs[col].set_position([box.x0, box.y0, box.width * 1., box.height])
        axs[col].legend(handles=lines, bbox_to_anchor=(1., 1.))
        axs[col].yaxis.grid(color='k', linestyle='solid', alpha=0.3, linewidth=0.3)
 
    plt.tight_layout(pad=4, w_pad=1, h_pad=1)
    plt.savefig(save_out_path)
    print(save_out_path)
def rename_annotation_files(work_dir):
    src_folder_dir = os.path.join(work_dir, src_folder_name)
    src_folder_dir = os.path.join(src_folder_dir, '')
    if not os.path.exists(src_folder_dir):
        print(src_folder_dir + " does not exist")
        return

    print(os.path.basename(os.path.dirname(src_folder_dir)))
    dest_folder_dir = utilities.create_folder(work_dir, "Renamed " + os.path.basename(os.path.dirname(src_folder_dir)))
    
    img_file_count = 0
    for filename in os.listdir(src_folder_dir):
        name, ext = os.path.splitext(os.path.join(src_folder_dir, filename))
        name, ext = name.lower(), ext.lower()
        if ext.endswith(allowed_image_types):
            img_file_count += 1
            
            # Copy image #
            new_name = item_name+"_"+str(img_file_count)
            new_img_name = new_name+ext
            
            utilities.copy_and_rename_file(src_folder_dir, dest_folder_dir, filename, new_img_name)
            
            # Copy corresponding json #
            
            json_filename = name + ".json"
            new_json_filename = new_name + ".json"
            
            if not os.path.exists(os.path.join(src_folder_dir, json_filename)):
                continue
            
            new_json_dir = utilities.copy_and_rename_file(src_folder_dir, dest_folder_dir, json_filename, new_json_filename)
            
            # Read and write to json file #
            
            with open(new_json_dir, 'r') as f:
                data = json.load(f)
                data['imagePath'] = new_img_name
                with open(new_json_dir, 'w') as f2:
                    json.dump(data, f2, indent=1)
            
    print("Finished Renaming " + str(img_file_count) + " images and corresponding json files.")
Exemple #21
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      holdout_fold: '1' | 'None', where '1' indicates using validation and 
          'None' indicates using full data for training
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    seq_len = 640
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = 10  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    _temp_submission_path = os.path.join(
        workspace, '_temp_submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv')
    create_folder(os.path.dirname(_temp_submission_path))

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num, seq_len, mel_bins, cuda)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)
    print('cliqueNet parameters:',
          sum(param.numel() for param in model.parameters()))
    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=False)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}, {} level statistics:'.format(
                iteration, taxonomy_level))

            train_fin_time = time.time()

            # Evaluate on training data
            if mini_data:
                raise Exception('`mini_data` flag must be set to False to use '
                                'the official evaluation tool!')

            train_statistics = evaluator.evaluate(data_type='train',
                                                  max_iteration=None)

            # Evaluate on validation data
            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    submission_path=_temp_submission_path,
                    annotation_path=annotation_path,
                    yaml_path=yaml_path,
                    max_iteration=None)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'fine_target', 'coarse_target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output = model(batch_data_dict['feature'])

        # loss
        batch_target = batch_data_dict['{}_target'.format(taxonomy_level)]
        loss = binary_cross_entropy(batch_output, batch_target)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 3000:
            break

        iteration += 1
def plot_six_figures(args):
    
    # Arguments & parameters
    classes_num = config.classes_num
    labels = config.labels
    max_plot_iteration = 540000
    iterations = np.arange(0, max_plot_iteration, 2000)

    # Paths
    class_labels_indices_path = os.path.join('metadata', 'class_labels_indices.csv')
    save_out_path = 'results/six_figures.pdf'
    create_folder(os.path.dirname(save_out_path))
    
    # Plot
    fig, ax = plt.subplots(2, 3, figsize=(14, 7))
    bal_alpha = 0.3
    test_alpha = 1.0
    linewidth = 1.

    # (a) Comparison of architectures
    if True:
        lines = []

        # Wavegram-Logmel-CNN
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_WavegramLogmelCnn_balanced_mixup_bs32.pkl')
        line, = ax[0, 0].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[0, 0].plot(test_map, label='Wavegram-Logmel-CNN', color='g', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # Cnn14
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
        line, = ax[0, 0].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[0, 0].plot(test_map, label='CNN14', color='r', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # MobileNetV1
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_MobileNetV1_balanced_mixup_bs32.pkl')
        line, = ax[0, 0].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[0, 0].plot(test_map, label='MobileNetV1', color='b', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        ax[0, 0].legend(handles=lines, loc=2)
        ax[0, 0].set_title('(a) Comparison of architectures')

    # (b) Comparison of training data and augmentation'
    if True:
        lines = []

        # Full data + balanced sampler + mixup
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
        line, = ax[0, 1].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[0, 1].plot(test_map, label='CNN14,bal,mixup (1.9m)', color='r', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # Full data + balanced sampler + mixup in time domain
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_timedomain_bs32.pkl')
        line, = ax[0, 1].plot(bal_map, color='y', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[0, 1].plot(test_map, label='CNN14,bal,mixup-wav (1.9m)', color='y', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # Full data + balanced sampler + no mixup
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_nomixup_bs32.pkl')
        line, = ax[0, 1].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[0, 1].plot(test_map, label='CNN14,bal,no-mixup (1.9m)', color='g', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # Full data + uniform sampler + no mixup
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_nobalanced_nomixup_bs32.pkl')
        line, = ax[0, 1].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[0, 1].plot(test_map, label='CNN14,no-bal,no-mixup (1.9m)', color='b', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # Balanced data + balanced sampler + mixup
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_balanced_train_Cnn14_balanced_mixup_bs32.pkl')
        line, = ax[0, 1].plot(bal_map, color='m', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[0, 1].plot(test_map, label='CNN14,bal,mixup (20k)', color='m', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # Balanced data + balanced sampler + no mixup
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_balanced_train_Cnn14_balanced_nomixup_bs32.pkl')
        line, = ax[0, 1].plot(bal_map, color='k', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[0, 1].plot(test_map, label='CNN14,bal,no-mixup (20k)', color='k', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        ax[0, 1].legend(handles=lines, loc=2, fontsize=8)
        ax[0, 1].set_title('(b) Comparison of training data and augmentation')

    # (c) Comparison of embedding size
    if True:
        lines = []

        # Embedding size 2048
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
        line, = ax[0, 2].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[0, 2].plot(test_map, label='CNN14,emb=2048', color='r', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # Embedding size 128
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_emb128_balanced_mixup_bs32.pkl')
        line, = ax[0, 2].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[0, 2].plot(test_map, label='CNN14,emb=128', color='g', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # Embedding size 32
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_emb32_balanced_mixup_bs32.pkl')
        line, = ax[0, 2].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[0, 2].plot(test_map, label='CNN14,emb=32', color='b', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        ax[0, 2].legend(handles=lines, loc=2)
        ax[0, 2].set_title('(c) Comparison of embedding size')

    # (d) Comparison of amount of training data
    if True:
        lines = []

        # 100% of full training data
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
        line, = ax[1, 0].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[1, 0].plot(test_map, label='CNN14 (100% full)', color='r', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # 80% of full training data
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_0.8full_train_Cnn14_balanced_mixup_bs32.pkl')
        line, = ax[1, 0].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[1, 0].plot(test_map, label='CNN14 (80% full)', color='b', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # 50% of full training data
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_0.5full_train_Cnn14_balanced_mixup_bs32.pkl')
        line, = ax[1, 0].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[1, 0].plot(test_map, label='cnn14 (50% full)', color='g', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        ax[1, 0].legend(handles=lines, loc=2)
        ax[1, 0].set_title('(d) Comparison of amount of training data')

    # (e) Comparison of sampling rate
    if True:
        lines = []

        # Cnn14 + 32 kHz
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
        line, = ax[1, 1].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[1, 1].plot(test_map, label='CNN14,32kHz', color='r', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # Cnn14 + 16 kHz
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_16k_balanced_mixup_bs32.pkl')
        line, = ax[1, 1].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[1, 1].plot(test_map, label='CNN14,16kHz', color='b', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # Cnn14 + 8 kHz
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_8k_balanced_mixup_bs32.pkl')
        line, = ax[1, 1].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[1, 1].plot(test_map, label='CNN14,8kHz', color='g', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        ax[1, 1].legend(handles=lines, loc=2)
        ax[1, 1].set_title('(e) Comparison of sampling rate')

    # (f) Comparison of mel bins number
    if True:
        lines = []

        # Cnn14 + 128 mel bins
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel128_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
        line, = ax[1, 2].plot(bal_map, color='g', alpha=bal_alpha)
        line, = ax[1, 2].plot(test_map, label='CNN14,128-melbins', color='g', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # Cnn14 + 64 mel bins
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
        line, = ax[1, 2].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
        line, = ax[1, 2].plot(test_map, label='CNN14,64-melbins', color='r', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        # Cnn14 + 32 mel bins
        (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel32_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
        line, = ax[1, 2].plot(bal_map, color='b', alpha=bal_alpha)
        line, = ax[1, 2].plot(test_map, label='CNN14,32-melbins', color='b', alpha=test_alpha, linewidth=linewidth)
        lines.append(line)

        ax[1, 2].legend(handles=lines, loc=2)
        ax[1, 2].set_title('(f) Comparison of mel bins number')

    for i in range(2):
        for j in range(3):
            ax[i, j].set_ylim(0, 0.8)
            ax[i, j].set_xlim(0, len(iterations))
            ax[i, j].set_xlabel('Iterations')
            ax[i, j].set_ylabel('mAP')
            ax[i, j].xaxis.set_ticks(np.arange(0, len(iterations), 50))
            ax[i, j].xaxis.set_ticklabels(['0', '100k', '200k', '300k', '400k', '500k'])
            ax[i, j].yaxis.set_ticks(np.arange(0, 0.81, 0.05))
            ax[i, j].yaxis.set_ticklabels(['0', '', '0.1', '', '0.2', '', '0.3', 
                '', '0.4', '', '0.5', '', '0.6', '', '0.7', '', '0.8'])
            ax[i, j].yaxis.grid(color='k', linestyle='solid', alpha=0.3, linewidth=0.3)
            ax[i, j].xaxis.grid(color='k', linestyle='solid', alpha=0.3, linewidth=0.3)

    plt.tight_layout(0, 1, 0)
    plt.savefig(save_out_path)
    print('Save figure to {}'.format(save_out_path))
def plot_long_fig(args):
    
    # Paths
    stats = pickle.load(open('paper_statistics/stats_for_long_fig.pkl', 'rb'))

    save_out_path = 'results/long_fig.pdf'
    create_folder(os.path.dirname(save_out_path))

    # Load meta
    N = len(config.labels)
    sorted_indexes = stats['sorted_indexes_for_plot']
    sorted_labels = np.array(config.labels)[sorted_indexes]
    audio_clips_per_class = stats['official_balanced_training_samples'] + stats['official_unbalanced_training_samples']
    audio_clips_per_class = audio_clips_per_class[sorted_indexes]

    # Prepare axes for plot
    (ax1a, ax2a, ax3a, ax4a, ax1b, ax2b, ax3b, ax4b) = prepare_plot_long_4_rows(sorted_labels)
 
    # plot the number of training samples
    ax1a.bar(np.arange(N), audio_clips_per_class, alpha=0.3)
    ax2a.bar(np.arange(N), audio_clips_per_class, alpha=0.3)
    ax3a.bar(np.arange(N), audio_clips_per_class, alpha=0.3)
    ax4a.bar(np.arange(N), audio_clips_per_class, alpha=0.3)
   
    # Load mAP of different systems
    """Average instance system of [1] with an mAP of 0.317.
    [1] Kong, Qiuqiang, Changsong Yu, Yong Xu, Turab Iqbal, Wenwu Wang, and 
    Mark D. Plumbley. "Weakly labelled audioset tagging with attention neural 
    networks." IEEE/ACM Transactions on Audio, Speech, and Language Processing 
    27, no. 11 (2019): 1791-1802."""
    maps_avg_instances = stats['averaging_instance_system_avg_9_probs_from_10000_to_50000_iterations']['eval']['average_precision']
    maps_avg_instances = maps_avg_instances[sorted_indexes]

    # PANNs Cnn14
    maps_panns_cnn14 = stats['panns_cnn14']['eval']['average_precision']
    maps_panns_cnn14 = maps_panns_cnn14[sorted_indexes]

    # PANNs MobileNetV1
    maps_panns_mobilenetv1 = stats['panns_mobilenetv1']['eval']['average_precision']
    maps_panns_mobilenetv1 = maps_panns_mobilenetv1[sorted_indexes]

    # PANNs Wavegram-Logmel-Cnn14
    maps_panns_wavegram_logmel_cnn14 = stats['panns_wavegram_logmel_cnn14']['eval']['average_precision']
    maps_panns_wavegram_logmel_cnn14 = maps_panns_wavegram_logmel_cnn14[sorted_indexes]

    # Plot mAPs
    _scatter_4_rows(maps_panns_wavegram_logmel_cnn14, ax1b, ax2b, ax3b, ax4b, s=5, c='g')
    _scatter_4_rows(maps_panns_cnn14, ax1b, ax2b, ax3b, ax4b, s=5, c='r')
    _scatter_4_rows(maps_panns_mobilenetv1, ax1b, ax2b, ax3b, ax4b, s=5, c='b')
    _scatter_4_rows(maps_avg_instances, ax1b, ax2b, ax3b, ax4b, s=5, c='k')
    
    linewidth = 0.7
    line0te = _plot_4_rows(maps_panns_wavegram_logmel_cnn14, ax1b, ax2b, ax3b, ax4b, 
        c='g', linewidth=linewidth, label='AP with Wavegram-Logmel-CNN')
    line1te = _plot_4_rows(maps_panns_cnn14, ax1b, ax2b, ax3b, ax4b, c='r', 
        linewidth=linewidth, label='AP with CNN14')
    line2te = _plot_4_rows(maps_panns_mobilenetv1, ax1b, ax2b, ax3b, ax4b, c='b', 
        linewidth=linewidth, label='AP with MobileNetV1')
    line3te = _plot_4_rows(maps_avg_instances, ax1b, ax2b, ax3b, ax4b, c='k', 
        linewidth=linewidth, label='AP with averaging instances (baseline)')

    # Plot label quality
    label_quality = stats['label_quality']
    sorted_label_quality = np.array(label_quality)[sorted_indexes]
    for k in range(len(sorted_label_quality)):
        if sorted_label_quality[k] and sorted_label_quality[k] == 1:
            sorted_label_quality[k] = 0.99
    
    ax1b.scatter(np.arange(N)[sorted_label_quality != None], 
        sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+')
    ax2b.scatter(np.arange(N)[sorted_label_quality != None], 
        sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+')
    ax3b.scatter(np.arange(N)[sorted_label_quality != None], 
        sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+')
    line_label_quality = ax4b.scatter(np.arange(N)[sorted_label_quality != None], 
        sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+', label='Label quality')
    ax1b.scatter(np.arange(N)[sorted_label_quality == None], 
        0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_')
    ax2b.scatter(np.arange(N)[sorted_label_quality == None], 
        0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_')
    ax3b.scatter(np.arange(N)[sorted_label_quality == None], 
        0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_')
    ax4b.scatter(np.arange(N)[sorted_label_quality == None], 
        0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_')
    
    plt.legend(handles=[line0te, line1te, line2te, line3te, line_label_quality], fontsize=6, loc=1)
    plt.tight_layout(0, 0, 0)
    plt.savefig(save_out_path)
    print('Save fig to {}'.format(save_out_path))
def calculate_logmel_features(args):

    # Arguments & parameters
    workspace = args.workspace
    scene_type = args.scene_type
    snr = args.snr

    sample_rate = config.sample_rate
    window_size = config.window_size
    overlap = config.overlap
    seq_len = config.seq_len
    mel_bins = config.mel_bins
    stft_bins = window_size // 2 + 1
    classes_num = len(config.labels)
    lb_to_ix = config.lb_to_ix

    # Paths
    audio_dir = os.path.join(workspace, 'mixed_audios',
                             'scene_type={},snr={}'.format(scene_type, snr))

    yaml_path = os.path.join(workspace, 'mixture.yaml')

    hdf5_path = os.path.join(workspace, 'features', 'logmel',
                             'scene_type={},snr={}'.format(scene_type, snr),
                             'development.h5')

    create_folder(os.path.dirname(hdf5_path))

    # Load mixture yaml
    load_time = time.time()

    with open(yaml_path, 'r') as f:
        data_list = yaml.load(f)

    logging.info('Loading mixture yaml time: {} s'
                 ''.format(time.time() - load_time))

    # Feature extractor
    feature_extractor = LogMelExtractor(sample_rate=sample_rate,
                                        window_size=window_size,
                                        overlap=overlap,
                                        mel_bins=mel_bins)

    # Create hdf5 file
    write_hdf5_time = time.time()

    hf = h5py.File(hdf5_path, 'w')

    hf.create_dataset(name='mixture_logmel',
                      shape=(0, seq_len, mel_bins),
                      maxshape=(None, seq_len, mel_bins),
                      dtype=np.float32)

    hf.create_dataset(name='mixture_stft',
                      shape=(0, seq_len, stft_bins),
                      maxshape=(None, seq_len, stft_bins),
                      dtype=np.float32)

    hf.create_dataset(name='events_stft',
                      shape=(0, seq_len, stft_bins),
                      maxshape=(None, seq_len, stft_bins),
                      dtype=np.float32)

    hf.create_dataset(name='scene_stft',
                      shape=(0, seq_len, stft_bins),
                      maxshape=(None, seq_len, stft_bins),
                      dtype=np.float32)

    hf.create_dataset(name='target',
                      shape=(0, classes_num),
                      maxshape=(None, classes_num),
                      dtype=np.int32)

    mixture_names = []

    folds = []

    for n, data in enumerate(data_list):

        if n % 10 == 0:
            logging.info('{} / {} audio features calculated'
                         ''.format(n, len(data_list)))

        mixed_audio_name = data['mixture_name']
        mixed_audio_path = os.path.join(audio_dir, mixed_audio_name)

        mixture_names.append(data['mixture_name'])
        folds.append(data['fold'])

        # Extract feature
        features_dict = calculate_logmel(audio_path=mixed_audio_path,
                                         sample_rate=sample_rate,
                                         feature_extractor=feature_extractor)

        # Write out features
        hf['mixture_logmel'].resize((n + 1, seq_len, mel_bins))
        hf['mixture_logmel'][n] = features_dict['mixture_logmel']

        hf['mixture_stft'].resize((n + 1, seq_len, stft_bins))
        hf['mixture_stft'][n] = features_dict['mixture_stft']

        hf['events_stft'].resize((n + 1, seq_len, stft_bins))
        hf['events_stft'][n] = features_dict['events_stft']

        hf['scene_stft'].resize((n + 1, seq_len, stft_bins))
        hf['scene_stft'][n] = features_dict['scene_stft']

        # Write out target
        target = get_target_from_events(data['events'], lb_to_ix)
        hf['target'].resize((n + 1, classes_num))
        hf['target'][n] = target

    hf.create_dataset(name='audio_name',
                      data=[s.encode() for s in mixture_names],
                      dtype='S20')

    hf.create_dataset(name='fold', data=folds, dtype=np.int32)

    hf.close()

    logging.info('Write out hdf5 file to {}'.format(hdf5_path))
    logging.info('Time spent: {} s'.format(time.time() - write_hdf5_time))
    hf.create_dataset(name='fold', data=folds, dtype=np.int32)

    hf.close()

    logging.info('Write out hdf5 file to {}'.format(hdf5_path))
    logging.info('Time spent: {} s'.format(time.time() - write_hdf5_time))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='')
    subparsers = parser.add_subparsers(dest='mode')

    parser_logmel = subparsers.add_parser('logmel')
    parser_logmel.add_argument('--workspace', type=str, required=True)
    parser_logmel.add_argument('--scene_type', type=str, required=True)
    parser_logmel.add_argument('--snr', type=int, required=True)

    args = parser.parse_args()

    logs_dir = os.path.join(args.workspace, 'logs', get_filename(__file__))
    create_folder(logs_dir)
    logging = create_logging(logs_dir, filemode='w')

    logging.info(args)

    if args.mode == 'logmel':
        calculate_logmel_features(args)

    else:
        raise Exception('Incorrect arguments!')
def inference_evaluation_data(args):

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    dev_subdir = args.dev_subdir
    eval_subdir = args.eval_subdir
    workspace = args.workspace
    iteration = args.iteration
    filename = args.filename
    cuda = args.cuda
    labels = config.labels
    ix_to_lb = config.ix_to_lb
    classes_num = len(labels)

    # Paths
    dev_hdf5_path = os.path.join(workspace, 'features', 'logmel', dev_subdir,
                                 'development.h5')

    test_hdf5_path = os.path.join(workspace, 'features', 'logmel', eval_subdir,
                                  'evaluation.h5')

    model_path = os.path.join(workspace, 'models', dev_subdir, filename,
                              'full_train',
                              'md_{}_iters.tar'.format(iteration))

    submission_path = os.path.join(workspace, 'submissions', eval_subdir,
                                   filename, 'iteration={}'.format(iteration),
                                   'submission.csv')

    create_folder(os.path.dirname(submission_path))

    # Load model
    model = Model(classes_num)
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['state_dict'])

    if cuda:
        model.cuda()

    # Data generator
    generator = TestDataGenerator(dev_hdf5_path=dev_hdf5_path,
                                  test_hdf5_path=test_hdf5_path,
                                  batch_size=batch_size)

    generate_func = generator.generate_test()

    # Predict
    dict = forward(model=model,
                   generate_func=generate_func,
                   cuda=cuda,
                   return_target=False)

    audio_names = dict['audio_name']  # (audios_num,)
    outputs = dict['output']  # (audios_num, classes_num)

    predictions = np.argmax(outputs, axis=-1)  # (audios_num,)

    # Write result to submission csv
    f = open(submission_path, 'w')

    write_evaluation_submission(submission_path, audio_names, predictions)
def train(args):

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    subdir = args.subdir
    workspace = args.workspace
    filename = args.filename
    validate = args.validate
    holdout_fold = args.holdout_fold
    mini_data = args.mini_data
    cuda = args.cuda

    labels = config.labels

    if 'mobile' in subdir:
        devices = ['a', 'b', 'c']
    else:
        devices = ['a']

    classes_num = len(labels)

    # Paths
    if mini_data:
        hdf5_path = os.path.join(workspace, 'features', 'logmel', subdir,
                                 'mini_development.h5')
    else:
        hdf5_path = os.path.join(workspace, 'features', 'logmel', subdir,
                                 'development.h5')

    if validate:

        dev_train_csv = os.path.join(dataset_dir, subdir, 'evaluation_setup',
                                     'fold{}_train.txt'.format(holdout_fold))

        dev_validate_csv = os.path.join(
            dataset_dir, subdir, 'evaluation_setup',
            'fold{}_evaluate.txt'.format(holdout_fold))

        models_dir = os.path.join(workspace, 'models', subdir, filename,
                                  'holdout_fold={}'.format(holdout_fold))

    else:
        dev_train_csv = None
        dev_validate_csv = None

        models_dir = os.path.join(workspace, 'models', subdir, filename,
                                  'full_train')

    create_folder(models_dir)

    # Model
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Data generator
    generator = DataGenerator(hdf5_path=hdf5_path,
                              batch_size=batch_size,
                              dev_train_csv=dev_train_csv,
                              dev_validate_csv=dev_validate_csv)

    # Optimizer
    lr = 1e-3
    optimizer = optim.Adam(model.parameters(),
                           lr=lr,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.)

    train_bgn_time = time.time()

    # Train on mini batches
    for (iteration, (batch_x,
                     batch_y)) in enumerate(generator.generate_train()):

        # Evaluate
        if iteration % 100 == 0:

            train_fin_time = time.time()

            (tr_acc, tr_loss) = evaluate(model=model,
                                         generator=generator,
                                         data_type='train',
                                         devices=devices,
                                         max_iteration=None,
                                         cuda=cuda)

            logging.info('tr_acc: {:.3f}, tr_loss: {:.3f}'.format(
                tr_acc, tr_loss))

            if validate:

                (va_acc, va_loss) = evaluate(model=model,
                                             generator=generator,
                                             data_type='validate',
                                             devices=devices,
                                             max_iteration=None,
                                             cuda=cuda)

                logging.info('va_acc: {:.3f}, va_loss: {:.3f}'.format(
                    va_acc, va_loss))

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info(
                'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s'
                ''.format(iteration, train_time, validate_time))

            logging.info('------------------------------------')

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:

            save_out_dict = {
                'iteration': iteration,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            save_out_path = os.path.join(models_dir,
                                         'md_{}_iters.tar'.format(iteration))
            torch.save(save_out_dict, save_out_path)
            logging.info('Model saved to {}'.format(save_out_path))

        # Reduce learning rate
        if iteration % 200 == 0 > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Train
        batch_x = move_data_to_gpu(batch_x, cuda)
        batch_y = move_data_to_gpu(batch_y, cuda)

        model.train()
        batch_output = model(batch_x)

        loss = F.nll_loss(batch_output, batch_y)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 15000:
            break
def inference_evaluation(args):
    '''Inference on evaluation data and write out submission file. 
    
    Args: 
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'leaderboard' | 'evaluation'
      workspace: string, directory of workspace
      model_type: string, e.g. 'Cnn_9layers'
      iteration: int
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''
    # Arugments & parameters
    subtask = args.subtask
    data_type = args.data_type
    workspace = args.workspace
    model_type = args.model_type
    iteration = args.iteration
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    holdout_fold = 'none'
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
        
    sub_dir = get_subdir(subtask, data_type)
    trained_sub_dir = get_subdir(subtask, 'development')
    
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(trained_sub_dir))
        
    checkpoint_path = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(trained_sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, '{}_iterations.pth'.format(iteration))
    
    submission_path = os.path.join(workspace, 'submissions', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        sub_dir, 'holdout_fold={}'.format(holdout_fold), model_type, 
        '{}_iterations'.format(iteration), 'submission.csv')
    create_folder(os.path.dirname(submission_path))

    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)
        
    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        model = Model(in_domain_classes_num, activation='logsoftmax')
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy
        
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])
    
    if cuda:
        model.cuda()
        
    # Data generator
    data_generator = EvaluationDataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        scalar=scalar, 
        batch_size=batch_size)
    
    generate_func = data_generator.generate_evaluation(data_type)

    # Inference
    output_dict = forward(model, generate_func, cuda, return_input=False, 
        return_target=False)

    # Write submission
    write_submission(output_dict, subtask, data_type, submission_path)
Exemple #29
0
def calculate_feature_for_all_audio_files(args):
    '''Calculate feature of audio files and write out features to a hdf5 file. 
    
    Args:
      dataset_dir: string
      workspace: string
      subtask: 'a' | 'b' | 'c'
      data_type: 'development' | 'evaluation'
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arguments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    mini_data = args.mini_data

    sample_rate = config.sample_rate
    window_size = config.window_size
    hop_size = config.hop_size
    mel_bins = config.mel_bins
    fmin = config.fmin
    fmax = config.fmax
    frames_per_second = config.frames_per_second
    frames_num = config.frames_num
    total_samples = config.total_samples
    lb_to_idx = config.lb_to_idx

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    sub_dir = get_subdir(subtask, data_type)
    metadata_path = os.path.join(dataset_dir, sub_dir, 'meta.csv')
    audios_dir = os.path.join(dataset_dir, sub_dir, 'audio')

    feature_path = os.path.join(
        workspace, 'features_side',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        '{}.h5'.format(sub_dir))
    create_folder(os.path.dirname(feature_path))

    # Feature extractor
    feature_extractor = LogMelExtractor(sample_rate=sample_rate,
                                        window_size=window_size,
                                        hop_size=hop_size,
                                        mel_bins=mel_bins,
                                        fmin=fmin,
                                        fmax=fmax)

    # Read metadata
    meta_dict = read_metadata(metadata_path)

    # Extract features and targets
    if mini_data:
        mini_num = 10
        total_num = len(meta_dict['audio_name'])
        random_state = np.random.RandomState(1234)
        indexes = random_state.choice(total_num, size=mini_num, replace=False)
        meta_dict['audio_name'] = meta_dict['audio_name'][indexes]
        meta_dict['scene_label'] = meta_dict['scene_label'][indexes]
        meta_dict['identifier'] = meta_dict['identifier'][indexes]
        meta_dict['source_label'] = meta_dict['source_label'][indexes]

    print('Extracting features of all audio files ...')
    extract_time = time.time()

    # Hdf5 file for storing features and targets
    hf = h5py.File(feature_path, 'w')

    hf.create_dataset(
        name='audio_name',
        data=[audio_name.encode() for audio_name in meta_dict['audio_name']],
        dtype='S80')

    if 'scene_label' in meta_dict.keys():
        hf.create_dataset(name='scene_label',
                          data=[
                              scene_label.encode()
                              for scene_label in meta_dict['scene_label']
                          ],
                          dtype='S24')

    if 'identifier' in meta_dict.keys():
        hf.create_dataset(name='identifier',
                          data=[
                              identifier.encode()
                              for identifier in meta_dict['identifier']
                          ],
                          dtype='S24')

    if 'source_label' in meta_dict.keys():
        hf.create_dataset(name='source_label',
                          data=[
                              source_label.encode()
                              for source_label in meta_dict['source_label']
                          ],
                          dtype='S8')

    hf.create_dataset(name='feature_side',
                      shape=(0, frames_num, mel_bins),
                      maxshape=(None, frames_num, mel_bins),
                      dtype=np.float32)

    for (n, audio_name) in enumerate(meta_dict['audio_name']):
        audio_path = os.path.join(audios_dir, audio_name)
        print(n, audio_path)

        # Read audio
        (audio, _) = read_side_audio(audio_path=audio_path,
                                     target_fs=sample_rate)

        # Pad or truncate audio recording to the same length
        audio = pad_truncate_sequence(audio, total_samples)

        # Extract feature
        feature = feature_extractor.transform(audio)

        # Remove the extra log mel spectrogram frames caused by padding zero
        feature = feature[0:frames_num]

        hf['feature_side'].resize((n + 1, frames_num, mel_bins))
        hf['feature_side'][n] = feature

    hf.close()

    print('Write hdf5 file to {} using {:.3f} s'.format(
        feature_path,
        time.time() - extract_time))
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1
      data_type: 'development' | 'evaluation'
      holdout_fold: '1' | 'none', set 1 for development and none for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''
    
    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    subtask = args.subtask
    data_type = args.data_type
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    fixed = args.fixed
    finetune = args.finetune
    ite_train = args.ite_train
    ite_eva = args.ite_eva
    ite_store = args.ite_store
    
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = None      # Number of mini-batches to evaluate on training data
    reduce_lr = True
    
    sources_to_evaluate = get_sources(subtask)
    in_domain_classes_num = len(config.labels) - 1
    
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''
    
    sub_dir = get_subdir(subtask, data_type)
    
    train_csv = os.path.join(dataset_dir, sub_dir, 'meta.csv')
        
    validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 
        'fold1_evaluate.csv')
                
    feature_hdf5_path = os.path.join(workspace, 'features', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    scalar_path = os.path.join(workspace, 'scalars', 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}.h5'.format(sub_dir))
        
    checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(workspace, 'statistics', filename, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), 
        model_type, 'validate_statistics.pickle')
    
    create_folder(os.path.dirname(validate_statistics_path))
    
    logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
        '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)
    
    # Model
    Model = eval(model_type)
    
    if subtask in ['a', 'b']:
        if fixed=='True':
            model = Model(in_domain_classes_num, activation='logsoftmax', fixed=True)
        else :
            model = Model(in_domain_classes_num, activation='logsoftmax', fixed=False)
        loss_func = nll_loss
        
    elif subtask == 'c':
        model = Model(in_domain_classes_num, activation='sigmoid')
        loss_func = F.binary_cross_entropy

    if cuda:
        model.cuda()
    
    # Optimizer
    if fixed=='True':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, betas=(0.9, 0.999),
                         eps=1e-08, weight_decay=0., amsgrad=True)
    else :
        optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999),
                               eps=1e-08, weight_decay=0., amsgrad=True)

    if finetune=='True':
        model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/'+model_type+'/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Res38/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn14/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn10/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV2/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV1/2000_iterations.pth'
        #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Wavegram_Cnn14/2000_iterations.pth'
        device = torch.device('cuda')
        checkpoint = torch.load(model_path, map_location=device)
        model.load_state_dict(checkpoint['model'])
        
    # Data generator
    data_generator = DataGenerator(
        feature_hdf5_path=feature_hdf5_path, 
        train_csv=train_csv, 
        validate_csv=validate_csv, 
        holdout_fold=holdout_fold, 
        scalar=scalar, 
        batch_size=batch_size)
    
    # Evaluator
    evaluator = Evaluator(
        model=model, 
        data_generator=data_generator, 
        subtask=subtask, 
        cuda=cuda)
    
    # Statistics
    validate_statistics_container = StatisticsContainer(validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    
    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():
        
        # Evaluate
        #1800
        if iteration % 200 == 0 and iteration > ite_eva:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            for source in sources_to_evaluate:
                train_statistics = evaluator.evaluate(
                    data_type='train', 
                    source=source, 
                    max_iteration=None, 
                    verbose=False)
            
            if holdout_fold != 'none':
                for source in sources_to_evaluate:
                    validate_statistics = evaluator.evaluate(
                        data_type='validate', 
                        source=source, 
                        max_iteration=None, 
                        verbose=False)

                    validate_statistics_container.append_and_dump(
                        iteration, source, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info(
                'Train time: {:.3f} s, validate time: {:.3f} s'
                ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 200 == 0 and iteration > ite_store:
            checkpoint = {
                'iteration': iteration, 
                'model': model.state_dict(), 
                'optimizer': optimizer.state_dict()}

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))
                
            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))
            
        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.93
        
        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'feature_gamm', 'feature_mfcc', 'feature_panns', 'target']:
                batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)
        
        # Train
#         batch_output,batch_loss = model(batch_data_dict['feature'], batch_data_dict['feature_gamm'], batch_data_dict['feature_mfcc'], batch_data_dict['feature_panns'])
#         loss = loss_func(batch_output, batch_data_dict['target'])
    
        # Using Mixup
        model.train()
        mixed_x1, mixed_x2, mixed_x3, mixed_x4, y_a, y_b, lam = mixup_data(x1=batch_data_dict['feature'], x2=batch_data_dict['feature_gamm'], x3=batch_data_dict['feature_mfcc'], x4=batch_data_dict['feature_panns'], y=batch_data_dict['target'], alpha=0.2)
        batch_output,batch_loss = model(mixed_x1, mixed_x2, mixed_x3, mixed_x4)

        if batch_output.shape[1] == 10: # single scale models
            loss = mixup_criterion(loss_func, batch_output, y_a, y_b, lam)
        else:                  # multi scale models
            losses = []
            for ite in range(batch_output.shape[1]-1):
                loss = mixup_criterion(loss_func, batch_output[:,ite,:], y_a, y_b, lam)
                losses.append(loss)
            loss = sum(losses)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        # 12000 for scratch
        if iteration == ite_train:
            break
            
        iteration += 1