def write_strong_meta_to_weak_meta(meta_csv, formated_csv): create_folder(os.path.dirname(formated_csv)) # Read meta csv df = pd.read_csv(meta_csv, sep='\t') df = pd.DataFrame(df) dict = {} for row in df.iterrows(): audio_name = row[1]['filename'] event_label = row[1]['event_label'] if audio_name not in dict.keys(): dict[audio_name] = [event_label] else: if event_label not in dict[audio_name]: dict[audio_name].append(event_label) # Write weak labels to csv f = open(formated_csv, 'w') f.write('{}\t{}\n'.format('filename', 'event_labels')) for key in dict.keys(): f.write('{}\t{}\n'.format(key, ','.join(dict[key]))) f.close() print('Write formated_csv to {}'.format(formated_csv))
def create_cross_validation_file(args): '''Create and write out cross validation file. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace data_type: 'train_curated' | 'train_noisy' ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace data_type = args.data_type folds_num = config.folds_num # Paths metadata_path = os.path.join(dataset_dir, '{}.csv'.format(data_type)) cross_validation_path = os.path.join( workspace, 'cross_validation_metadata', '{}_cross_validation.csv'.format(data_type)) create_folder(os.path.dirname(cross_validation_path)) # Read meta data df = pd.read_csv(metadata_path, sep=',') # Create cross validation file new_df = pd.DataFrame() new_df['fname'] = df['fname'] new_df['fold'] = np.arange(len(df)) % folds_num + 1 new_df.to_csv(cross_validation_path) print('Write cross validation file to {}'.format(cross_validation_path))
def save(self, savename, dirname=None, exc=None): """Saves all SAM attributes to a Pickle file. Saves all SAM attributes to a Pickle file which can be later loaded into an empty SAM object. Parameters ---------- savename - string The name of the pickle file (not including the file extension) to write to. dirname - string, optional, default None The path/name of the directory in which the Pickle file will be saved. If None, the file will be saved to the current working directory. exc - array-like of strings, optional, default None A vector of SAM attributes to exclude from the saved file. """ self._create_dict(exc) if (dirname is not None): ut.create_folder(dirname + "/") f = open(dirname + "/" + savename + ".p", 'wb') else: f = open(savename + ".p", 'wb') pickle.dump(self.pickle_dict, f) f.close()
def optimize_second_stage(self, z_hat, alpha_hat, max_iteration): '''Stage 2: Use the initalization obtained from stage 1 and do the optimization on source and filter Inputs: z_hat: estimated seed, (samples_num, seed_num) alpha_hat: estimated filters, (samples_num, filter_len, filter_len) max_iteration: int Returns: z_hat: estimated seed, (samples_num, seed_num) alpha_hat: estimated filters, (samples_num, filter_len, filter_len) s_hat: estimated source, (samples_num, 1, 28, 28) x_hat: estimated mixture ''' second_stage_figures_dir = os.path.join(self.figures_dir, 'second_stage') create_folder(second_stage_figures_dir) # Optimize (z_hat, alpha_hat, s_hat, x_hat) = self.optimize(self.x, self.s, z_hat, alpha_hat, max_iteration, second_stage_figures_dir) return z_hat, alpha_hat, s_hat, x_hat
def calculate_scalar(args): '''Calculate and write out scalar. Args: dataset_dir: string workspace: string data_type: 'train_weak' mini_data: bool, set True for debugging on a small part of data ''' # Arguments & parameters workspace = args.workspace mini_data = args.mini_data data_type = args.data_type assert data_type == 'train_weak', 'We only support using train_weak data ' \ 'to calculate scalar. ' mel_bins = config.mel_bins frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' relative_name = get_relative_path_no_extension(data_type) feature_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(relative_name)) scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(relative_name)) create_folder(os.path.dirname(scalar_path)) # Load data load_time = time.time() with h5py.File(feature_path, 'r') as hf: features = hf['feature'][:] # Calculate scalar features = np.concatenate(features, axis=0) (mean, std) = calculate_scalar_of_tensor(features) with h5py.File(scalar_path, 'w') as hf: hf.create_dataset('mean', data=mean, dtype=np.float32) hf.create_dataset('std', data=std, dtype=np.float32) print('All features: {}'.format(features.shape)) print('mean: {}'.format(mean)) print('std: {}'.format(std)) print('Write out scalar to {}'.format(scalar_path))
def split_unbalanced_csv_to_partial_csvs(args): """Split unbalanced csv and write out to to part csvs. Each part csv contains up to 50000 ids. """ unbalanced_csv_path = args.unbalanced_csv unbalanced_partial_csvs_dir = args.unbalanced_partial_csvs_dir create_folder(unbalanced_partial_csvs_dir) with open(unbalanced_csv_path, 'r') as f: lines = f.readlines() lines = lines[3:] # Remove head info audios_num_per_file = 50000 files_num = int(np.ceil(len(lines) / float(audios_num_per_file))) for r in range(files_num): lines_per_file = lines[r * audios_num_per_file:(r + 1) * audios_num_per_file] out_csv_path = os.path.join( unbalanced_partial_csvs_dir, 'unbalanced_train_segments_part{:02d}.csv'.format(r)) with open(out_csv_path, 'w') as f: f.write('empty\n') f.write('empty\n') f.write('empty\n') for line in lines_per_file: f.write(line) print('Write out csv to {}'.format(out_csv_path))
def create_indexes(args): """Create indexes a for dataloader to read for training. When users have a new task and their own data, they need to create similar indexes. The indexes contain meta information of "where to find the data for training". """ # Arguments & parameters waveforms_hdf5_path = args.waveforms_hdf5_path indexes_hdf5_path = args.indexes_hdf5_path # Paths create_folder(os.path.dirname(indexes_hdf5_path)) with h5py.File(waveforms_hdf5_path, 'r') as hr: with h5py.File(indexes_hdf5_path, 'w') as hw: audios_num = len(hr['audio_name']) hw.create_dataset('audio_name', data=hr['audio_name'][:], dtype='S20') hw.create_dataset('target', data=hr['target'][:], dtype=np.bool) hw.create_dataset('hdf5_path', data=[waveforms_hdf5_path.encode()] * audios_num, dtype='S200') hw.create_dataset('index_in_hdf5', data=np.arange(audios_num), dtype=np.int32) print('Write to {}'.format(indexes_hdf5_path))
def inference_data_to_truncation(args): # Arugments & parameters dataset_dir = args.dataset_dir subdir = args.subdir workspace = args.workspace holdout_fold = args.holdout_fold iteration = args.iteration filename = args.filename # data_type = args.data_type? # Paths hdf5_path = os.path.join(workspace, 'features', 'logmel', subdir, 'development_hpss_lrad.h5') dev_train_csv = os.path.join(dataset_dir, subdir, 'evaluation_setup', train_file) dev_validate_csv = os.path.join(dataset_dir, subdir, 'evaluation_setup', evaluate_file) # 保存截断特征 truncation_dir = os.path.join(workspace, 'features', 'truncation', 'holdout_fold={}'.format(holdout_fold)) create_folder(truncation_dir) model_path = os.path.join( workspace, 'models', subdir, filename, 'holdout_fold={}'.format(holdout_fold), 'md_{}_iters_max_attention2_2019-05-31 00:48:09.h5'.format(iteration)) # model_path = os.path.join(workspace, 'appendixes', # 'md_{}_iters_max_76.2_Vggish_two_attention.h5'.format(iteration)) hdf5_train_path = os.path.join(truncation_dir, 'train_hpss_l+r_6900.h5') hdf5_validate_path = os.path.join(truncation_dir, 'validate_hpss_l+r_6900.h5') train_hf = h5py.File(hdf5_train_path, 'w') validate_hf = h5py.File(hdf5_validate_path, 'w') # load model model = keras.models.load_model(model_path) layer_output = K.function( [model.layers[0].input, K.learning_phase()], [model.layers[-2].output]) # Data generator generator = DataGenerator(hdf5_path=hdf5_path, batch_size=batch_size, dev_train_csv=dev_train_csv, dev_validate_csv=dev_validate_csv) create_feature_in_h5py(generator, layer_output, train_hf, data_type='train') create_feature_in_h5py(generator, layer_output, validate_hf, data_type='validate')
def calculate_scalar(args): '''Calculate and write out scalar of features. Args: workspace: string subtask: 'a' | 'b' | 'c' data_type: 'train' mini_data: bool, set True for debugging on a small part of data ''' # Arguments & parameters workspace = args.workspace subtask = args.subtask data_type = args.data_type mini_data = args.mini_data mel_bins = config.mel_bins frames_per_second = config.frames_per_second # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) feature_path = os.path.join( workspace, 'features_side', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join( workspace, 'scalars_side', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) create_folder(os.path.dirname(scalar_path)) # Load data load_time = time.time() with h5py.File(feature_path, 'r') as hf: features = hf['feature_side'][:] # Calculate scalar features = np.concatenate(features, axis=0) (mean, std) = calculate_scalar_of_tensor(features) with h5py.File(scalar_path, 'w') as hf: hf.create_dataset('mean', data=mean, dtype=np.float32) hf.create_dataset('std', data=std, dtype=np.float32) print('All features: {}'.format(features.shape)) print('mean: {}'.format(mean)) print('std: {}'.format(std)) print('Write out scalar to {}'.format(scalar_path))
def fuse_sed_results(args): workspace = args.workspace sed1_path = args.sed1_path sed2_path = args.sed2_path out_path = os.path.join(workspace, 'submissions', 'fuse_sed_results', 'fused_sed.csv') create_folder(os.path.dirname(out_path)) sed1_events = [ 'Frying', 'Blender', 'Running_water', 'Vacuum_cleaner', 'Electric_shaver_toothbrush' ] sed2_events = ['Speech', 'Dog', 'Cat', 'Alarm_bell_ringing', 'Dishes'] new_list = [] with open(sed1_path, 'r') as f: reader = csv.reader(f, delimiter='\t') lis = list(reader) for li in lis: label = li[3] if label in sed1_events: new_list.append(li) elif label in sed2_events: pass else: raise Exception('Error!') with open(sed2_path, 'r') as f: reader = csv.reader(f, delimiter='\t') lis = list(reader) for li in lis: label = li[3] if label in sed2_events: new_list.append(li) elif label in sed1_events: pass else: raise Exception('Error!') f = open(out_path, 'w') # f = gzip.open('uuu.txt.gz', 'w') for li in new_list: f.write('\t'.join(li)) f.write('\n') f.close() print('Write out to {}'.format(out_path))
def inference_development_data_bottleneck_features(args): # Arugments & parameters workspace = args.workspace validate = args.validate holdout_fold = args.holdout_fold iteration = args.iteration cuda = args.cuda batch_size = 64 filename = 'main_pytorch' # Paths dev_hdf5_path = os.path.join(workspace, 'features', 'logmel', 'development.h5') if validate: model_path = os.path.join(workspace, 'models', filename, 'holdout_fold={}'.format(holdout_fold), 'md_{}_iters.tar'.format(iteration)) bottleneck_hdf5_path = os.path.join( workspace, 'bottlenecks', filename, 'dev_holdout_fold={}'.format(holdout_fold), '{}_iters'.format(iteration), 'bottleneck.h5') else: model_path = os.path.join(workspace, 'models', filename, 'full_train', 'md_{}_iters.tar'.format(iteration)) bottleneck_hdf5_path = os.path.join( workspace, 'bottlenecks', filename, 'dev_full_train', '{}_iters'.format(iteration), 'bottleneck.h5') create_folder(os.path.dirname(bottleneck_hdf5_path)) # Load model model = Model() checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) if cuda: model.cuda() # Data generator generator = DataGenerator(hdf5_path=dev_hdf5_path, batch_size=batch_size, validation_csv=None, holdout_fold=None) generate_func = generator.generate_validate( data_type='train', shuffle=False, max_iteration=None) # Write bottleneck features write_bottleneck_features_to_hdf5( model, generate_func, bottleneck_hdf5_path, cuda, return_target=True)
def inference_leaderboard_data(args): # Arugments & parameters dataset_dir = args.dataset_dir dev_subdir = args.dev_subdir leaderboard_subdir = args.leaderboard_subdir workspace = args.workspace iteration = args.iteration filename = args.filename labels = config.labels ix_to_lb = config.ix_to_lb # subdir = args.subdir classes_num = len(labels) # Paths dev_hdf5_path = os.path.join(workspace, 'features', 'logmel', dev_subdir, 'development.h5') test_hdf5_path = os.path.join(workspace, 'features', 'logmel', leaderboard_subdir, 'leaderboard_hpss.h5') model_path = os.path.join(workspace, 'models', dev_subdir, filename, 'full_train', 'md_{}_iters.h5'.format(iteration)) print(model_path) submission_path = os.path.join(workspace, 'submissions', leaderboard_subdir, filename, 'iteration={}'.format(iteration), 'submission1.csv') create_folder(os.path.dirname(submission_path)) # Load model model = keras.models.load_model(model_path) # Data generator generator = TestDataGenerator(dev_hdf5_path=dev_hdf5_path, test_hdf5_path=test_hdf5_path, batch_size=batch_size) generate_func = generator.generate_test() # Predict dict = forward(model=model, generate_func=generate_func, return_target=False) audio_names = dict['audio_name'] # (audios_num,) outputs = dict['output'] # (audios_num, classes_num) predictions = np.argmax(outputs, axis=-1) # (audios_num,) # Write result to submission csv write_leaderboard_submission(submission_path, audio_names, predictions)
def inference_testing_data(args): # Arugments & parameters workspace = args.workspace iteration = args.iteration filename = args.filename cuda = args.cuda validate = True # Paths dev_hdf5_path = os.path.join(workspace, 'features', 'logmel', 'development.h5') test_hdf5_path = os.path.join(workspace, 'features', 'logmel', 'test.h5') model_path = os.path.join(workspace, 'models', filename, 'full_train', 'md_{}_iters.tar'.format(iteration)) submission_path = os.path.join(workspace, 'submissions', filename, 'iteration={}'.format(iteration), 'submission.csv') create_folder(os.path.dirname(submission_path)) # Load model model = Model() checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) if cuda: model.cuda() # Data generator generator = TestDataGenerator(dev_hdf5_path=dev_hdf5_path, test_hdf5_path=test_hdf5_path, batch_size=batch_size) generate_func = generator.generate_test() # Inference dict = forward(model=model, generate_func=generate_func, cuda=cuda, return_target=False, return_bottleneck=False) outputs = dict['output'] itemids = dict['itemid'] # Write out submission file write_testing_data_submission_csv(submission_path, itemids, outputs)
def optimize_first_stage(self, repeats_num, max_iteration): '''Stage 1: Set several initialization and select the best initialization. Inputs: repeats_num: int, number of initializations max_iteration: int Returns: z_hat: estimated seed, (samples_num, seed_num) alpha_hat: estimated filters, (samples_num, filter_len, filter_len) s_hat: estimated source, (samples_num, 1, 28, 28) x_hat: estimated mixture ''' # Paths first_stage_figures_dir = os.path.join(self.figures_dir, 'first_stage') create_folder(first_stage_figures_dir) # Repeat mixture and target for applying different initializations for # a single mixture repeated_x = np.repeat(self.x, repeats=repeats_num, axis=0) repeated_s = np.repeat(self.s, repeats=repeats_num, axis=0) samples_num = repeated_x.shape[0] # Initialize seed and filter z_hat = np.random.normal(loc=0., scale=1, size=(samples_num, self.seed_num)) alpha_hat = np.ones(samples_num) # Optimize on seed and filter (z_hat, alpha_hat, s_hat, x_hat) = self.optimize(repeated_x, repeated_s, z_hat, alpha_hat, max_iteration, first_stage_figures_dir) # Find the indice of the best initialization for each input mixture indices = self.find_best_initialize_indice(x_hat, repeated_x, repeats_num) for n in range(len(indices)): indices[n] = indices[n] + n * repeats_num z_hat = z_hat[indices] alpha_hat = alpha_hat[indices] s_hat = s_hat[indices] x_hat = x_hat[indices] return z_hat, alpha_hat, s_hat, x_hat
def dcase2017task4(args): """Create black list. Black list is a list of audio ids that will be skipped in training. """ # Augments & parameters workspace = args.workspace # Paths dcase2017task4_dataset_dir = '/vol/vssp/msos/qk/datasets/dcase2017/task4/dataset_root' test_weak_csv = os.path.join( dcase2017task4_dataset_dir, 'metadata/groundtruth_weak_label_testing_set.csv') evaluation_weak_csv = os.path.join( dcase2017task4_dataset_dir, 'metadata/groundtruth_weak_label_evaluation_set.csv') black_list_csv = os.path.join(workspace, 'black_list', 'dcase2017task4.csv') create_folder(os.path.dirname(black_list_csv)) def get_id_sets(csv_path): with open(csv_path, 'r') as fr: reader = csv.reader(fr, delimiter='\t') lines = list(reader) ids_set = [] for line in lines: ids_set.append(line[0][0:11]) ids_set = list(set(ids_set)) return ids_set test_ids_set = get_id_sets(test_weak_csv) evaluation_ids_set = get_id_sets(evaluation_weak_csv) full_ids_set = test_ids_set + evaluation_ids_set # Write black list fw = open(black_list_csv, 'w') for id in full_ids_set: fw.write('{}\n'.format(id)) print('Write black list to {}'.format(black_list_csv))
def plot_complexity_map(args): # Paths save_out_path = 'results/complexity_mAP.pdf' create_folder(os.path.dirname(save_out_path)) plt.figure(figsize=(5, 5)) fig, ax = plt.subplots(1, 1) model_types = np.array(['Cnn6', 'Cnn10', 'Cnn14', 'ResNet22', 'ResNet38', 'ResNet54', 'MobileNetV1', 'MobileNetV2', 'DaiNet', 'LeeNet', 'LeeNet18', 'Res1dNet30', 'Res1dNet44', 'Wavegram-CNN', 'Wavegram-\nLogmel-CNN']) flops = np.array([21.986, 28.166, 42.220, 30.081, 48.962, 54.563, 3.614, 2.810, 30.395, 4.741, 26.369, 32.688, 61.833, 44.234, 53.510]) mAPs = np.array([0.343, 0.380, 0.431, 0.430, 0.434, 0.429, 0.389, 0.383, 0.295, 0.266, 0.336, 0.365, 0.355, 0.389, 0.439]) sorted_indexes = np.sort(flops) ax.scatter(flops, mAPs) shift = [[-5.5, -0.004], [1, -0.004], [-1, -0.014], [-2, 0.006], [-7, 0.006], [1, -0.01], [0.5, 0.004], [-1, -0.014], [1, -0.007], [0.8, -0.008], [1, -0.007], [1, 0.002], [-6, -0.015], [1, -0.008], [0.8, 0]] for i, model_type in enumerate(model_types): ax.annotate(model_type, (flops[i] + shift[i][0], mAPs[i] + shift[i][1])) ax.plot(flops[[0, 1, 2]], mAPs[[0, 1, 2]]) ax.plot(flops[[3, 4, 5]], mAPs[[3, 4, 5]]) ax.plot(flops[[6, 7]], mAPs[[6, 7]]) ax.plot(flops[[9, 10]], mAPs[[9, 10]]) ax.plot(flops[[11, 12]], mAPs[[11, 12]]) ax.plot(flops[[13, 14]], mAPs[[13, 14]]) ax.set_xlim(0, 70) ax.set_ylim(0.2, 0.5) ax.set_xlabel('Multi-load_statisticss (million)', fontsize=15) ax.set_ylabel('mAP', fontsize=15) ax.tick_params(axis='x', labelsize=12) ax.tick_params(axis='y', labelsize=12) plt.tight_layout(0, 0, 0) plt.savefig(save_out_path) print('Write out figure to {}'.format(save_out_path))
def _start_service(self): storage_provider = self.__config['storage_provider'].upper() try: create_folder(constants.VRT_OUTPUT_FOLDER_NAME) if (storage_provider == StorageProvider.FS): tiles_output_folder = self.__config['fs'][ 'internal_outputs_path'] create_folder(tiles_output_folder) elif (storage_provider == StorageProvider.S3): set_gdal_s3() self.loop.run_until_complete(self.__task_handler.handle_tasks()) except Exception as e: self.log.error( 'Error occurred during running service: {0}'.format(e)) probe.liveness = False
def inference_testing_data_bottleneck_features(args): # Arugments & parameters workspace = args.workspace iteration = args.iteration cuda = args.cuda validate = True batch_size = 64 filename = 'main_pytorch' # Paths dev_hdf5_path = os.path.join(workspace, 'features', 'logmel', 'development.h5') test_hdf5_path = os.path.join(workspace, 'features', 'logmel', 'test.h5') model_path = os.path.join(workspace, 'models', filename, 'full_train', 'md_{}_iters.tar'.format(iteration)) bottleneck_hdf5_path = os.path.join( workspace, 'bottlenecks', filename, 'test_full_train', '{}_iters'.format(iteration), 'bottleneck.h5') create_folder(os.path.dirname(bottleneck_hdf5_path)) # Load model model = Model() checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) if cuda: model.cuda() # Data generator generator = TestDataGenerator(dev_hdf5_path=dev_hdf5_path, test_hdf5_path=test_hdf5_path, batch_size=batch_size) generate_func = generator.generate_test() # Write bottleneck features write_bottleneck_features_to_hdf5( model, generate_func, bottleneck_hdf5_path, cuda, return_target=False)
def plot_classwise_iteration_map(args): # Paths save_out_path = 'results/classwise_iteration_map.pdf' create_folder(os.path.dirname(save_out_path)) # Load statistics statistics_dict = pickle.load(open('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_WavegramLogmelCnn_balanced_mixup_bs32.pkl', 'rb')) mAP_mat = np.array([e['average_precision'] for e in statistics_dict['test']]) mAP_mat = mAP_mat[0 : 300, :] # 300 * 2000 = 600k iterations sorted_indexes = np.argsort(config.full_samples_per_class)[::-1] fig, axs = plt.subplots(1, 3, figsize=(20, 5)) ranges = [np.arange(0, 10), np.arange(250, 260), np.arange(517, 527)] axs[0].set_ylabel('AP') for col in range(0, 3): axs[col].set_ylim(0, 1.) axs[col].set_xlim(0, 301) axs[col].set_xlabel('Iterations') axs[col].set_ylabel('AP') axs[col].xaxis.set_ticks(np.arange(0, 301, 100)) axs[col].xaxis.set_ticklabels(['0', '200k', '400k', '600k']) lines = [] for _ix in ranges[col]: _label = crop_label(config.labels[sorted_indexes[_ix]]) + \ ' ({})'.format(add_comma(config.full_samples_per_class[sorted_indexes[_ix]])) line, = axs[col].plot(mAP_mat[:, sorted_indexes[_ix]], label=_label) lines.append(line) box = axs[col].get_position() axs[col].set_position([box.x0, box.y0, box.width * 1., box.height]) axs[col].legend(handles=lines, bbox_to_anchor=(1., 1.)) axs[col].yaxis.grid(color='k', linestyle='solid', alpha=0.3, linewidth=0.3) plt.tight_layout(pad=4, w_pad=1, h_pad=1) plt.savefig(save_out_path) print(save_out_path)
def rename_annotation_files(work_dir): src_folder_dir = os.path.join(work_dir, src_folder_name) src_folder_dir = os.path.join(src_folder_dir, '') if not os.path.exists(src_folder_dir): print(src_folder_dir + " does not exist") return print(os.path.basename(os.path.dirname(src_folder_dir))) dest_folder_dir = utilities.create_folder(work_dir, "Renamed " + os.path.basename(os.path.dirname(src_folder_dir))) img_file_count = 0 for filename in os.listdir(src_folder_dir): name, ext = os.path.splitext(os.path.join(src_folder_dir, filename)) name, ext = name.lower(), ext.lower() if ext.endswith(allowed_image_types): img_file_count += 1 # Copy image # new_name = item_name+"_"+str(img_file_count) new_img_name = new_name+ext utilities.copy_and_rename_file(src_folder_dir, dest_folder_dir, filename, new_img_name) # Copy corresponding json # json_filename = name + ".json" new_json_filename = new_name + ".json" if not os.path.exists(os.path.join(src_folder_dir, json_filename)): continue new_json_dir = utilities.copy_and_rename_file(src_folder_dir, dest_folder_dir, json_filename, new_json_filename) # Read and write to json file # with open(new_json_dir, 'r') as f: data = json.load(f) data['imagePath'] = new_img_name with open(new_json_dir, 'w') as f2: json.dump(data, f2, indent=1) print("Finished Renaming " + str(img_file_count) + " images and corresponding json files.")
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace taxonomy_level: 'fine' | 'coarse' model_type: string, e.g. 'Cnn_9layers_MaxPooling' holdout_fold: '1' | 'None', where '1' indicates using validation and 'None' indicates using full data for training batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace taxonomy_level = args.taxonomy_level model_type = args.model_type holdout_fold = args.holdout_fold batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename seq_len = 640 mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = 10 # Number of mini-batches to evaluate on training data reduce_lr = True labels = get_labels(taxonomy_level) classes_num = len(labels) # Paths if mini_data: prefix = 'minidata_' else: prefix = '' train_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') validate_hdf5_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'validate.h5') scalar_path = os.path.join( workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'train.h5') checkpoints_dir = os.path.join( workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) _temp_submission_path = os.path.join( workspace, '_temp_submissions', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv') create_folder(os.path.dirname(_temp_submission_path)) validate_statistics_path = os.path.join( workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) annotation_path = os.path.join(dataset_dir, 'annotations.csv') yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml') logs_dir = os.path.join( workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 'taxonomy_level={}'.format(taxonomy_level), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) model = Model(classes_num, seq_len, mel_bins, cuda) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) print('cliqueNet parameters:', sum(param.numel() for param in model.parameters())) # Data generator data_generator = DataGenerator(train_hdf5_path=train_hdf5_path, validate_hdf5_path=validate_hdf5_path, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator(model=model, data_generator=data_generator, taxonomy_level=taxonomy_level, cuda=cuda, verbose=False) # Statistics validate_statistics_container = StatisticsContainer( validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate if iteration % 200 == 0: logging.info('------------------------------------') logging.info('Iteration: {}, {} level statistics:'.format( iteration, taxonomy_level)) train_fin_time = time.time() # Evaluate on training data if mini_data: raise Exception('`mini_data` flag must be set to False to use ' 'the official evaluation tool!') train_statistics = evaluator.evaluate(data_type='train', max_iteration=None) # Evaluate on validation data if holdout_fold != 'none': validate_statistics = evaluator.evaluate( data_type='validate', submission_path=_temp_submission_path, annotation_path=annotation_path, yaml_path=yaml_path, max_iteration=None) validate_statistics_container.append_and_dump( iteration, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info('Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'fine_target', 'coarse_target']: batch_data_dict[key] = move_data_to_gpu( batch_data_dict[key], cuda) # Train model.train() batch_output = model(batch_data_dict['feature']) # loss batch_target = batch_data_dict['{}_target'.format(taxonomy_level)] loss = binary_cross_entropy(batch_output, batch_target) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 3000: break iteration += 1
def plot_six_figures(args): # Arguments & parameters classes_num = config.classes_num labels = config.labels max_plot_iteration = 540000 iterations = np.arange(0, max_plot_iteration, 2000) # Paths class_labels_indices_path = os.path.join('metadata', 'class_labels_indices.csv') save_out_path = 'results/six_figures.pdf' create_folder(os.path.dirname(save_out_path)) # Plot fig, ax = plt.subplots(2, 3, figsize=(14, 7)) bal_alpha = 0.3 test_alpha = 1.0 linewidth = 1. # (a) Comparison of architectures if True: lines = [] # Wavegram-Logmel-CNN (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_WavegramLogmelCnn_balanced_mixup_bs32.pkl') line, = ax[0, 0].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth) line, = ax[0, 0].plot(test_map, label='Wavegram-Logmel-CNN', color='g', alpha=test_alpha, linewidth=linewidth) lines.append(line) # Cnn14 (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl') line, = ax[0, 0].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth) line, = ax[0, 0].plot(test_map, label='CNN14', color='r', alpha=test_alpha, linewidth=linewidth) lines.append(line) # MobileNetV1 (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_MobileNetV1_balanced_mixup_bs32.pkl') line, = ax[0, 0].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth) line, = ax[0, 0].plot(test_map, label='MobileNetV1', color='b', alpha=test_alpha, linewidth=linewidth) lines.append(line) ax[0, 0].legend(handles=lines, loc=2) ax[0, 0].set_title('(a) Comparison of architectures') # (b) Comparison of training data and augmentation' if True: lines = [] # Full data + balanced sampler + mixup (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl') line, = ax[0, 1].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth) line, = ax[0, 1].plot(test_map, label='CNN14,bal,mixup (1.9m)', color='r', alpha=test_alpha, linewidth=linewidth) lines.append(line) # Full data + balanced sampler + mixup in time domain (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_timedomain_bs32.pkl') line, = ax[0, 1].plot(bal_map, color='y', alpha=bal_alpha, linewidth=linewidth) line, = ax[0, 1].plot(test_map, label='CNN14,bal,mixup-wav (1.9m)', color='y', alpha=test_alpha, linewidth=linewidth) lines.append(line) # Full data + balanced sampler + no mixup (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_nomixup_bs32.pkl') line, = ax[0, 1].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth) line, = ax[0, 1].plot(test_map, label='CNN14,bal,no-mixup (1.9m)', color='g', alpha=test_alpha, linewidth=linewidth) lines.append(line) # Full data + uniform sampler + no mixup (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_nobalanced_nomixup_bs32.pkl') line, = ax[0, 1].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth) line, = ax[0, 1].plot(test_map, label='CNN14,no-bal,no-mixup (1.9m)', color='b', alpha=test_alpha, linewidth=linewidth) lines.append(line) # Balanced data + balanced sampler + mixup (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_balanced_train_Cnn14_balanced_mixup_bs32.pkl') line, = ax[0, 1].plot(bal_map, color='m', alpha=bal_alpha, linewidth=linewidth) line, = ax[0, 1].plot(test_map, label='CNN14,bal,mixup (20k)', color='m', alpha=test_alpha, linewidth=linewidth) lines.append(line) # Balanced data + balanced sampler + no mixup (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_balanced_train_Cnn14_balanced_nomixup_bs32.pkl') line, = ax[0, 1].plot(bal_map, color='k', alpha=bal_alpha, linewidth=linewidth) line, = ax[0, 1].plot(test_map, label='CNN14,bal,no-mixup (20k)', color='k', alpha=test_alpha, linewidth=linewidth) lines.append(line) ax[0, 1].legend(handles=lines, loc=2, fontsize=8) ax[0, 1].set_title('(b) Comparison of training data and augmentation') # (c) Comparison of embedding size if True: lines = [] # Embedding size 2048 (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl') line, = ax[0, 2].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth) line, = ax[0, 2].plot(test_map, label='CNN14,emb=2048', color='r', alpha=test_alpha, linewidth=linewidth) lines.append(line) # Embedding size 128 (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_emb128_balanced_mixup_bs32.pkl') line, = ax[0, 2].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth) line, = ax[0, 2].plot(test_map, label='CNN14,emb=128', color='g', alpha=test_alpha, linewidth=linewidth) lines.append(line) # Embedding size 32 (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_emb32_balanced_mixup_bs32.pkl') line, = ax[0, 2].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth) line, = ax[0, 2].plot(test_map, label='CNN14,emb=32', color='b', alpha=test_alpha, linewidth=linewidth) lines.append(line) ax[0, 2].legend(handles=lines, loc=2) ax[0, 2].set_title('(c) Comparison of embedding size') # (d) Comparison of amount of training data if True: lines = [] # 100% of full training data (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl') line, = ax[1, 0].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth) line, = ax[1, 0].plot(test_map, label='CNN14 (100% full)', color='r', alpha=test_alpha, linewidth=linewidth) lines.append(line) # 80% of full training data (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_0.8full_train_Cnn14_balanced_mixup_bs32.pkl') line, = ax[1, 0].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth) line, = ax[1, 0].plot(test_map, label='CNN14 (80% full)', color='b', alpha=test_alpha, linewidth=linewidth) lines.append(line) # 50% of full training data (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_0.5full_train_Cnn14_balanced_mixup_bs32.pkl') line, = ax[1, 0].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth) line, = ax[1, 0].plot(test_map, label='cnn14 (50% full)', color='g', alpha=test_alpha, linewidth=linewidth) lines.append(line) ax[1, 0].legend(handles=lines, loc=2) ax[1, 0].set_title('(d) Comparison of amount of training data') # (e) Comparison of sampling rate if True: lines = [] # Cnn14 + 32 kHz (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl') line, = ax[1, 1].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth) line, = ax[1, 1].plot(test_map, label='CNN14,32kHz', color='r', alpha=test_alpha, linewidth=linewidth) lines.append(line) # Cnn14 + 16 kHz (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_16k_balanced_mixup_bs32.pkl') line, = ax[1, 1].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth) line, = ax[1, 1].plot(test_map, label='CNN14,16kHz', color='b', alpha=test_alpha, linewidth=linewidth) lines.append(line) # Cnn14 + 8 kHz (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_8k_balanced_mixup_bs32.pkl') line, = ax[1, 1].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth) line, = ax[1, 1].plot(test_map, label='CNN14,8kHz', color='g', alpha=test_alpha, linewidth=linewidth) lines.append(line) ax[1, 1].legend(handles=lines, loc=2) ax[1, 1].set_title('(e) Comparison of sampling rate') # (f) Comparison of mel bins number if True: lines = [] # Cnn14 + 128 mel bins (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel128_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl') line, = ax[1, 2].plot(bal_map, color='g', alpha=bal_alpha) line, = ax[1, 2].plot(test_map, label='CNN14,128-melbins', color='g', alpha=test_alpha, linewidth=linewidth) lines.append(line) # Cnn14 + 64 mel bins (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl') line, = ax[1, 2].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth) line, = ax[1, 2].plot(test_map, label='CNN14,64-melbins', color='r', alpha=test_alpha, linewidth=linewidth) lines.append(line) # Cnn14 + 32 mel bins (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel32_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl') line, = ax[1, 2].plot(bal_map, color='b', alpha=bal_alpha) line, = ax[1, 2].plot(test_map, label='CNN14,32-melbins', color='b', alpha=test_alpha, linewidth=linewidth) lines.append(line) ax[1, 2].legend(handles=lines, loc=2) ax[1, 2].set_title('(f) Comparison of mel bins number') for i in range(2): for j in range(3): ax[i, j].set_ylim(0, 0.8) ax[i, j].set_xlim(0, len(iterations)) ax[i, j].set_xlabel('Iterations') ax[i, j].set_ylabel('mAP') ax[i, j].xaxis.set_ticks(np.arange(0, len(iterations), 50)) ax[i, j].xaxis.set_ticklabels(['0', '100k', '200k', '300k', '400k', '500k']) ax[i, j].yaxis.set_ticks(np.arange(0, 0.81, 0.05)) ax[i, j].yaxis.set_ticklabels(['0', '', '0.1', '', '0.2', '', '0.3', '', '0.4', '', '0.5', '', '0.6', '', '0.7', '', '0.8']) ax[i, j].yaxis.grid(color='k', linestyle='solid', alpha=0.3, linewidth=0.3) ax[i, j].xaxis.grid(color='k', linestyle='solid', alpha=0.3, linewidth=0.3) plt.tight_layout(0, 1, 0) plt.savefig(save_out_path) print('Save figure to {}'.format(save_out_path))
def plot_long_fig(args): # Paths stats = pickle.load(open('paper_statistics/stats_for_long_fig.pkl', 'rb')) save_out_path = 'results/long_fig.pdf' create_folder(os.path.dirname(save_out_path)) # Load meta N = len(config.labels) sorted_indexes = stats['sorted_indexes_for_plot'] sorted_labels = np.array(config.labels)[sorted_indexes] audio_clips_per_class = stats['official_balanced_training_samples'] + stats['official_unbalanced_training_samples'] audio_clips_per_class = audio_clips_per_class[sorted_indexes] # Prepare axes for plot (ax1a, ax2a, ax3a, ax4a, ax1b, ax2b, ax3b, ax4b) = prepare_plot_long_4_rows(sorted_labels) # plot the number of training samples ax1a.bar(np.arange(N), audio_clips_per_class, alpha=0.3) ax2a.bar(np.arange(N), audio_clips_per_class, alpha=0.3) ax3a.bar(np.arange(N), audio_clips_per_class, alpha=0.3) ax4a.bar(np.arange(N), audio_clips_per_class, alpha=0.3) # Load mAP of different systems """Average instance system of [1] with an mAP of 0.317. [1] Kong, Qiuqiang, Changsong Yu, Yong Xu, Turab Iqbal, Wenwu Wang, and Mark D. Plumbley. "Weakly labelled audioset tagging with attention neural networks." IEEE/ACM Transactions on Audio, Speech, and Language Processing 27, no. 11 (2019): 1791-1802.""" maps_avg_instances = stats['averaging_instance_system_avg_9_probs_from_10000_to_50000_iterations']['eval']['average_precision'] maps_avg_instances = maps_avg_instances[sorted_indexes] # PANNs Cnn14 maps_panns_cnn14 = stats['panns_cnn14']['eval']['average_precision'] maps_panns_cnn14 = maps_panns_cnn14[sorted_indexes] # PANNs MobileNetV1 maps_panns_mobilenetv1 = stats['panns_mobilenetv1']['eval']['average_precision'] maps_panns_mobilenetv1 = maps_panns_mobilenetv1[sorted_indexes] # PANNs Wavegram-Logmel-Cnn14 maps_panns_wavegram_logmel_cnn14 = stats['panns_wavegram_logmel_cnn14']['eval']['average_precision'] maps_panns_wavegram_logmel_cnn14 = maps_panns_wavegram_logmel_cnn14[sorted_indexes] # Plot mAPs _scatter_4_rows(maps_panns_wavegram_logmel_cnn14, ax1b, ax2b, ax3b, ax4b, s=5, c='g') _scatter_4_rows(maps_panns_cnn14, ax1b, ax2b, ax3b, ax4b, s=5, c='r') _scatter_4_rows(maps_panns_mobilenetv1, ax1b, ax2b, ax3b, ax4b, s=5, c='b') _scatter_4_rows(maps_avg_instances, ax1b, ax2b, ax3b, ax4b, s=5, c='k') linewidth = 0.7 line0te = _plot_4_rows(maps_panns_wavegram_logmel_cnn14, ax1b, ax2b, ax3b, ax4b, c='g', linewidth=linewidth, label='AP with Wavegram-Logmel-CNN') line1te = _plot_4_rows(maps_panns_cnn14, ax1b, ax2b, ax3b, ax4b, c='r', linewidth=linewidth, label='AP with CNN14') line2te = _plot_4_rows(maps_panns_mobilenetv1, ax1b, ax2b, ax3b, ax4b, c='b', linewidth=linewidth, label='AP with MobileNetV1') line3te = _plot_4_rows(maps_avg_instances, ax1b, ax2b, ax3b, ax4b, c='k', linewidth=linewidth, label='AP with averaging instances (baseline)') # Plot label quality label_quality = stats['label_quality'] sorted_label_quality = np.array(label_quality)[sorted_indexes] for k in range(len(sorted_label_quality)): if sorted_label_quality[k] and sorted_label_quality[k] == 1: sorted_label_quality[k] = 0.99 ax1b.scatter(np.arange(N)[sorted_label_quality != None], sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+') ax2b.scatter(np.arange(N)[sorted_label_quality != None], sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+') ax3b.scatter(np.arange(N)[sorted_label_quality != None], sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+') line_label_quality = ax4b.scatter(np.arange(N)[sorted_label_quality != None], sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+', label='Label quality') ax1b.scatter(np.arange(N)[sorted_label_quality == None], 0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_') ax2b.scatter(np.arange(N)[sorted_label_quality == None], 0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_') ax3b.scatter(np.arange(N)[sorted_label_quality == None], 0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_') ax4b.scatter(np.arange(N)[sorted_label_quality == None], 0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_') plt.legend(handles=[line0te, line1te, line2te, line3te, line_label_quality], fontsize=6, loc=1) plt.tight_layout(0, 0, 0) plt.savefig(save_out_path) print('Save fig to {}'.format(save_out_path))
def calculate_logmel_features(args): # Arguments & parameters workspace = args.workspace scene_type = args.scene_type snr = args.snr sample_rate = config.sample_rate window_size = config.window_size overlap = config.overlap seq_len = config.seq_len mel_bins = config.mel_bins stft_bins = window_size // 2 + 1 classes_num = len(config.labels) lb_to_ix = config.lb_to_ix # Paths audio_dir = os.path.join(workspace, 'mixed_audios', 'scene_type={},snr={}'.format(scene_type, snr)) yaml_path = os.path.join(workspace, 'mixture.yaml') hdf5_path = os.path.join(workspace, 'features', 'logmel', 'scene_type={},snr={}'.format(scene_type, snr), 'development.h5') create_folder(os.path.dirname(hdf5_path)) # Load mixture yaml load_time = time.time() with open(yaml_path, 'r') as f: data_list = yaml.load(f) logging.info('Loading mixture yaml time: {} s' ''.format(time.time() - load_time)) # Feature extractor feature_extractor = LogMelExtractor(sample_rate=sample_rate, window_size=window_size, overlap=overlap, mel_bins=mel_bins) # Create hdf5 file write_hdf5_time = time.time() hf = h5py.File(hdf5_path, 'w') hf.create_dataset(name='mixture_logmel', shape=(0, seq_len, mel_bins), maxshape=(None, seq_len, mel_bins), dtype=np.float32) hf.create_dataset(name='mixture_stft', shape=(0, seq_len, stft_bins), maxshape=(None, seq_len, stft_bins), dtype=np.float32) hf.create_dataset(name='events_stft', shape=(0, seq_len, stft_bins), maxshape=(None, seq_len, stft_bins), dtype=np.float32) hf.create_dataset(name='scene_stft', shape=(0, seq_len, stft_bins), maxshape=(None, seq_len, stft_bins), dtype=np.float32) hf.create_dataset(name='target', shape=(0, classes_num), maxshape=(None, classes_num), dtype=np.int32) mixture_names = [] folds = [] for n, data in enumerate(data_list): if n % 10 == 0: logging.info('{} / {} audio features calculated' ''.format(n, len(data_list))) mixed_audio_name = data['mixture_name'] mixed_audio_path = os.path.join(audio_dir, mixed_audio_name) mixture_names.append(data['mixture_name']) folds.append(data['fold']) # Extract feature features_dict = calculate_logmel(audio_path=mixed_audio_path, sample_rate=sample_rate, feature_extractor=feature_extractor) # Write out features hf['mixture_logmel'].resize((n + 1, seq_len, mel_bins)) hf['mixture_logmel'][n] = features_dict['mixture_logmel'] hf['mixture_stft'].resize((n + 1, seq_len, stft_bins)) hf['mixture_stft'][n] = features_dict['mixture_stft'] hf['events_stft'].resize((n + 1, seq_len, stft_bins)) hf['events_stft'][n] = features_dict['events_stft'] hf['scene_stft'].resize((n + 1, seq_len, stft_bins)) hf['scene_stft'][n] = features_dict['scene_stft'] # Write out target target = get_target_from_events(data['events'], lb_to_ix) hf['target'].resize((n + 1, classes_num)) hf['target'][n] = target hf.create_dataset(name='audio_name', data=[s.encode() for s in mixture_names], dtype='S20') hf.create_dataset(name='fold', data=folds, dtype=np.int32) hf.close() logging.info('Write out hdf5 file to {}'.format(hdf5_path)) logging.info('Time spent: {} s'.format(time.time() - write_hdf5_time))
hf.create_dataset(name='fold', data=folds, dtype=np.int32) hf.close() logging.info('Write out hdf5 file to {}'.format(hdf5_path)) logging.info('Time spent: {} s'.format(time.time() - write_hdf5_time)) if __name__ == '__main__': parser = argparse.ArgumentParser(description='') subparsers = parser.add_subparsers(dest='mode') parser_logmel = subparsers.add_parser('logmel') parser_logmel.add_argument('--workspace', type=str, required=True) parser_logmel.add_argument('--scene_type', type=str, required=True) parser_logmel.add_argument('--snr', type=int, required=True) args = parser.parse_args() logs_dir = os.path.join(args.workspace, 'logs', get_filename(__file__)) create_folder(logs_dir) logging = create_logging(logs_dir, filemode='w') logging.info(args) if args.mode == 'logmel': calculate_logmel_features(args) else: raise Exception('Incorrect arguments!')
def inference_evaluation_data(args): # Arugments & parameters dataset_dir = args.dataset_dir dev_subdir = args.dev_subdir eval_subdir = args.eval_subdir workspace = args.workspace iteration = args.iteration filename = args.filename cuda = args.cuda labels = config.labels ix_to_lb = config.ix_to_lb classes_num = len(labels) # Paths dev_hdf5_path = os.path.join(workspace, 'features', 'logmel', dev_subdir, 'development.h5') test_hdf5_path = os.path.join(workspace, 'features', 'logmel', eval_subdir, 'evaluation.h5') model_path = os.path.join(workspace, 'models', dev_subdir, filename, 'full_train', 'md_{}_iters.tar'.format(iteration)) submission_path = os.path.join(workspace, 'submissions', eval_subdir, filename, 'iteration={}'.format(iteration), 'submission.csv') create_folder(os.path.dirname(submission_path)) # Load model model = Model(classes_num) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) if cuda: model.cuda() # Data generator generator = TestDataGenerator(dev_hdf5_path=dev_hdf5_path, test_hdf5_path=test_hdf5_path, batch_size=batch_size) generate_func = generator.generate_test() # Predict dict = forward(model=model, generate_func=generate_func, cuda=cuda, return_target=False) audio_names = dict['audio_name'] # (audios_num,) outputs = dict['output'] # (audios_num, classes_num) predictions = np.argmax(outputs, axis=-1) # (audios_num,) # Write result to submission csv f = open(submission_path, 'w') write_evaluation_submission(submission_path, audio_names, predictions)
def train(args): # Arugments & parameters dataset_dir = args.dataset_dir subdir = args.subdir workspace = args.workspace filename = args.filename validate = args.validate holdout_fold = args.holdout_fold mini_data = args.mini_data cuda = args.cuda labels = config.labels if 'mobile' in subdir: devices = ['a', 'b', 'c'] else: devices = ['a'] classes_num = len(labels) # Paths if mini_data: hdf5_path = os.path.join(workspace, 'features', 'logmel', subdir, 'mini_development.h5') else: hdf5_path = os.path.join(workspace, 'features', 'logmel', subdir, 'development.h5') if validate: dev_train_csv = os.path.join(dataset_dir, subdir, 'evaluation_setup', 'fold{}_train.txt'.format(holdout_fold)) dev_validate_csv = os.path.join( dataset_dir, subdir, 'evaluation_setup', 'fold{}_evaluate.txt'.format(holdout_fold)) models_dir = os.path.join(workspace, 'models', subdir, filename, 'holdout_fold={}'.format(holdout_fold)) else: dev_train_csv = None dev_validate_csv = None models_dir = os.path.join(workspace, 'models', subdir, filename, 'full_train') create_folder(models_dir) # Model model = Model(classes_num) if cuda: model.cuda() # Data generator generator = DataGenerator(hdf5_path=hdf5_path, batch_size=batch_size, dev_train_csv=dev_train_csv, dev_validate_csv=dev_validate_csv) # Optimizer lr = 1e-3 optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.) train_bgn_time = time.time() # Train on mini batches for (iteration, (batch_x, batch_y)) in enumerate(generator.generate_train()): # Evaluate if iteration % 100 == 0: train_fin_time = time.time() (tr_acc, tr_loss) = evaluate(model=model, generator=generator, data_type='train', devices=devices, max_iteration=None, cuda=cuda) logging.info('tr_acc: {:.3f}, tr_loss: {:.3f}'.format( tr_acc, tr_loss)) if validate: (va_acc, va_loss) = evaluate(model=model, generator=generator, data_type='validate', devices=devices, max_iteration=None, cuda=cuda) logging.info('va_acc: {:.3f}, va_loss: {:.3f}'.format( va_acc, va_loss)) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s' ''.format(iteration, train_time, validate_time)) logging.info('------------------------------------') train_bgn_time = time.time() # Save model if iteration % 1000 == 0 and iteration > 0: save_out_dict = { 'iteration': iteration, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } save_out_path = os.path.join(models_dir, 'md_{}_iters.tar'.format(iteration)) torch.save(save_out_dict, save_out_path) logging.info('Model saved to {}'.format(save_out_path)) # Reduce learning rate if iteration % 200 == 0 > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.9 # Train batch_x = move_data_to_gpu(batch_x, cuda) batch_y = move_data_to_gpu(batch_y, cuda) model.train() batch_output = model(batch_x) loss = F.nll_loss(batch_output, batch_y) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning if iteration == 15000: break
def inference_evaluation(args): '''Inference on evaluation data and write out submission file. Args: subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'leaderboard' | 'evaluation' workspace: string, directory of workspace model_type: string, e.g. 'Cnn_9layers' iteration: int batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data visualize: bool ''' # Arugments & parameters subtask = args.subtask data_type = args.data_type workspace = args.workspace model_type = args.model_type iteration = args.iteration batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename holdout_fold = 'none' mel_bins = config.mel_bins frames_per_second = config.frames_per_second in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) trained_sub_dir = get_subdir(subtask, 'development') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(trained_sub_dir)) checkpoint_path = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(trained_sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations.pth'.format(iteration)) submission_path = os.path.join(workspace, 'submissions', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), sub_dir, 'holdout_fold={}'.format(holdout_fold), model_type, '{}_iterations'.format(iteration), 'submission.csv') create_folder(os.path.dirname(submission_path)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) # Load scalar scalar = load_scalar(scalar_path) # Load model Model = eval(model_type) if subtask in ['a', 'b']: model = Model(in_domain_classes_num, activation='logsoftmax') loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model']) if cuda: model.cuda() # Data generator data_generator = EvaluationDataGenerator( feature_hdf5_path=feature_hdf5_path, scalar=scalar, batch_size=batch_size) generate_func = data_generator.generate_evaluation(data_type) # Inference output_dict = forward(model, generate_func, cuda, return_input=False, return_target=False) # Write submission write_submission(output_dict, subtask, data_type, submission_path)
def calculate_feature_for_all_audio_files(args): '''Calculate feature of audio files and write out features to a hdf5 file. Args: dataset_dir: string workspace: string subtask: 'a' | 'b' | 'c' data_type: 'development' | 'evaluation' mini_data: bool, set True for debugging on a small part of data ''' # Arguments & parameters dataset_dir = args.dataset_dir workspace = args.workspace subtask = args.subtask data_type = args.data_type mini_data = args.mini_data sample_rate = config.sample_rate window_size = config.window_size hop_size = config.hop_size mel_bins = config.mel_bins fmin = config.fmin fmax = config.fmax frames_per_second = config.frames_per_second frames_num = config.frames_num total_samples = config.total_samples lb_to_idx = config.lb_to_idx # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) metadata_path = os.path.join(dataset_dir, sub_dir, 'meta.csv') audios_dir = os.path.join(dataset_dir, sub_dir, 'audio') feature_path = os.path.join( workspace, 'features_side', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) create_folder(os.path.dirname(feature_path)) # Feature extractor feature_extractor = LogMelExtractor(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax) # Read metadata meta_dict = read_metadata(metadata_path) # Extract features and targets if mini_data: mini_num = 10 total_num = len(meta_dict['audio_name']) random_state = np.random.RandomState(1234) indexes = random_state.choice(total_num, size=mini_num, replace=False) meta_dict['audio_name'] = meta_dict['audio_name'][indexes] meta_dict['scene_label'] = meta_dict['scene_label'][indexes] meta_dict['identifier'] = meta_dict['identifier'][indexes] meta_dict['source_label'] = meta_dict['source_label'][indexes] print('Extracting features of all audio files ...') extract_time = time.time() # Hdf5 file for storing features and targets hf = h5py.File(feature_path, 'w') hf.create_dataset( name='audio_name', data=[audio_name.encode() for audio_name in meta_dict['audio_name']], dtype='S80') if 'scene_label' in meta_dict.keys(): hf.create_dataset(name='scene_label', data=[ scene_label.encode() for scene_label in meta_dict['scene_label'] ], dtype='S24') if 'identifier' in meta_dict.keys(): hf.create_dataset(name='identifier', data=[ identifier.encode() for identifier in meta_dict['identifier'] ], dtype='S24') if 'source_label' in meta_dict.keys(): hf.create_dataset(name='source_label', data=[ source_label.encode() for source_label in meta_dict['source_label'] ], dtype='S8') hf.create_dataset(name='feature_side', shape=(0, frames_num, mel_bins), maxshape=(None, frames_num, mel_bins), dtype=np.float32) for (n, audio_name) in enumerate(meta_dict['audio_name']): audio_path = os.path.join(audios_dir, audio_name) print(n, audio_path) # Read audio (audio, _) = read_side_audio(audio_path=audio_path, target_fs=sample_rate) # Pad or truncate audio recording to the same length audio = pad_truncate_sequence(audio, total_samples) # Extract feature feature = feature_extractor.transform(audio) # Remove the extra log mel spectrogram frames caused by padding zero feature = feature[0:frames_num] hf['feature_side'].resize((n + 1, frames_num, mel_bins)) hf['feature_side'][n] = feature hf.close() print('Write hdf5 file to {} using {:.3f} s'.format( feature_path, time.time() - extract_time))
def train(args): '''Training. Model will be saved after several iterations. Args: dataset_dir: string, directory of dataset workspace: string, directory of workspace subtask: 'a' | 'b' | 'c', corresponds to 3 subtasks in DCASE2019 Task1 data_type: 'development' | 'evaluation' holdout_fold: '1' | 'none', set 1 for development and none for training on all data without validation model_type: string, e.g. 'Cnn_9layers_AvgPooling' batch_size: int cuda: bool mini_data: bool, set True for debugging on a small part of data ''' # Arugments & parameters dataset_dir = args.dataset_dir workspace = args.workspace subtask = args.subtask data_type = args.data_type holdout_fold = args.holdout_fold model_type = args.model_type batch_size = args.batch_size cuda = args.cuda and torch.cuda.is_available() mini_data = args.mini_data filename = args.filename fixed = args.fixed finetune = args.finetune ite_train = args.ite_train ite_eva = args.ite_eva ite_store = args.ite_store mel_bins = config.mel_bins frames_per_second = config.frames_per_second max_iteration = None # Number of mini-batches to evaluate on training data reduce_lr = True sources_to_evaluate = get_sources(subtask) in_domain_classes_num = len(config.labels) - 1 # Paths if mini_data: prefix = 'minidata_' else: prefix = '' sub_dir = get_subdir(subtask, data_type) train_csv = os.path.join(dataset_dir, sub_dir, 'meta.csv') validate_csv = os.path.join(dataset_dir, sub_dir, 'evaluation_setup', 'fold1_evaluate.csv') feature_hdf5_path = os.path.join(workspace, 'features', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) scalar_path = os.path.join(workspace, 'scalars', '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}.h5'.format(sub_dir)) checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_folder(checkpoints_dir) validate_statistics_path = os.path.join(workspace, 'statistics', filename, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type, 'validate_statistics.pickle') create_folder(os.path.dirname(validate_statistics_path)) logs_dir = os.path.join(workspace, 'logs', filename, args.mode, '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), '{}'.format(sub_dir), 'holdout_fold={}'.format(holdout_fold), model_type) create_logging(logs_dir, 'w') logging.info(args) if cuda: logging.info('Using GPU.') else: logging.info('Using CPU. Set --cuda flag to use GPU.') # Load scalar scalar = load_scalar(scalar_path) # Model Model = eval(model_type) if subtask in ['a', 'b']: if fixed=='True': model = Model(in_domain_classes_num, activation='logsoftmax', fixed=True) else : model = Model(in_domain_classes_num, activation='logsoftmax', fixed=False) loss_func = nll_loss elif subtask == 'c': model = Model(in_domain_classes_num, activation='sigmoid') loss_func = F.binary_cross_entropy if cuda: model.cuda() # Optimizer if fixed=='True': optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) else : optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) if finetune=='True': model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/'+model_type+'/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Res38/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn14/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Cnn10/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV2/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_MobileNetV1/2000_iterations.pth' #model_path='/home/cdd/code2/dcase2020_task1/workspace/checkpoints/main/logmel_86frames_40melbins/TAU-urban-acoustic-scenes-2020-mobile-development/holdout_fold=1/Logmel_Wavegram_Cnn14/2000_iterations.pth' device = torch.device('cuda') checkpoint = torch.load(model_path, map_location=device) model.load_state_dict(checkpoint['model']) # Data generator data_generator = DataGenerator( feature_hdf5_path=feature_hdf5_path, train_csv=train_csv, validate_csv=validate_csv, holdout_fold=holdout_fold, scalar=scalar, batch_size=batch_size) # Evaluator evaluator = Evaluator( model=model, data_generator=data_generator, subtask=subtask, cuda=cuda) # Statistics validate_statistics_container = StatisticsContainer(validate_statistics_path) train_bgn_time = time.time() iteration = 0 # Train on mini batches for batch_data_dict in data_generator.generate_train(): # Evaluate #1800 if iteration % 200 == 0 and iteration > ite_eva: logging.info('------------------------------------') logging.info('Iteration: {}'.format(iteration)) train_fin_time = time.time() for source in sources_to_evaluate: train_statistics = evaluator.evaluate( data_type='train', source=source, max_iteration=None, verbose=False) if holdout_fold != 'none': for source in sources_to_evaluate: validate_statistics = evaluator.evaluate( data_type='validate', source=source, max_iteration=None, verbose=False) validate_statistics_container.append_and_dump( iteration, source, validate_statistics) train_time = train_fin_time - train_bgn_time validate_time = time.time() - train_fin_time logging.info( 'Train time: {:.3f} s, validate time: {:.3f} s' ''.format(train_time, validate_time)) train_bgn_time = time.time() # Save model if iteration % 200 == 0 and iteration > ite_store: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict()} checkpoint_path = os.path.join( checkpoints_dir, '{}_iterations.pth'.format(iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) # Reduce learning rate if reduce_lr and iteration % 200 == 0 and iteration > 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.93 # Move data to GPU for key in batch_data_dict.keys(): if key in ['feature', 'feature_gamm', 'feature_mfcc', 'feature_panns', 'target']: batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda) # Train # batch_output,batch_loss = model(batch_data_dict['feature'], batch_data_dict['feature_gamm'], batch_data_dict['feature_mfcc'], batch_data_dict['feature_panns']) # loss = loss_func(batch_output, batch_data_dict['target']) # Using Mixup model.train() mixed_x1, mixed_x2, mixed_x3, mixed_x4, y_a, y_b, lam = mixup_data(x1=batch_data_dict['feature'], x2=batch_data_dict['feature_gamm'], x3=batch_data_dict['feature_mfcc'], x4=batch_data_dict['feature_panns'], y=batch_data_dict['target'], alpha=0.2) batch_output,batch_loss = model(mixed_x1, mixed_x2, mixed_x3, mixed_x4) if batch_output.shape[1] == 10: # single scale models loss = mixup_criterion(loss_func, batch_output, y_a, y_b, lam) else: # multi scale models losses = [] for ite in range(batch_output.shape[1]-1): loss = mixup_criterion(loss_func, batch_output[:,ite,:], y_a, y_b, lam) losses.append(loss) loss = sum(losses) # Backward optimizer.zero_grad() loss.backward() optimizer.step() # Stop learning # 12000 for scratch if iteration == ite_train: break iteration += 1