subparsers = parser.add_subparsers(dest='mode')
    parser_train = subparsers.add_parser('train')
    parser_get_avg_stats = subparsers.add_parser('get_avg_stats')

    args = parser.parse_args()

    args.filename = utilities.get_filename(__file__)

    # Logs
    sub_dir = os.path.join(args.filename,
                           'balance_type={}'.format(args.balance_type),
                           'model_type={}'.format(args.model_type))

    logs_dir = os.path.join(args.workspace, 'logs', sub_dir)
    utilities.create_folder(logs_dir)
    logging = utilities.create_logging(logs_dir, filemode='w')

    logging.info(os.path.abspath(__file__))
    logging.info(args)
    
    totest = 0 
    
    if totest == 0 and (not (args.mode == 'get_avg_stats')): 
        test(args)
        
    
    else: 
        if args.mode == "train":
            train(args)

        elif args.mode == 'get_avg_stats':
Exemple #2
0
        inference_all_fold(args)
        sys.exit()

    # Get reproducible results by manually seed the random number generator
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)
    cudnn.deterministic=True

    # logs directory
    logs_dir = os.path.join(args.workspace, 'logs', args.task_type, args.mode, 
            'model_' + args.model + '_{}'.format(args.audio_type) + '_fold_{}'.format(args.fold) + 
            '_seed_{}'.format(args.seed))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    # appendixes directory
    global appendixes_dir
    appendixes_dir = os.path.join(args.workspace, 'appendixes')
    os.makedirs(appendixes_dir, exist_ok=True)

    # submissions directory
    global submissions_dir
    submissions_dir = os.path.join(appendixes_dir, 'submissions')
    os.makedirs(submissions_dir, exist_ok=True)

    # pretrained path
    global pretrained_path
    pretrained_path = os.path.join(appendixes_dir, 'models_saved', 'sed_only',
def train(args):
    """ Arguments & parameters"""
    # from main.py
    workspace = args.workspace  # store experiments results in the workspace
    sample_rate = args.sample_rate
    window_size = args.window_size
    hop_size = args.hop_size
    mel_bins = args.mel_bins
    fmin = args.fmin
    fmax = args.fmax
    model_type = args.model_type
    loss_type = args.loss_type
    balanced = args.balanced
    augmentation = args.augmentation
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    # resume_iteration = args.resume_iteration
    early_stop = args.early_stop
    filename = args.filename
    # for fine-tune models
    pretrained_checkpoint_path = args.pretrained_checkpoint_path
    freeze_base_num = args.freeze_base_num

    pretrain = True if pretrained_checkpoint_path else False

    # Define Saving Paths
    best_model_path = os.path.join(workspace, 'best_model', filename,
                                   'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
                                   .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), model_type,
                                   'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
                                   'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size),
                                   )
    create_folder(os.path.dirname(best_model_path))

    statistics_path = os.path.join(workspace, 'statistics', filename,
                                   'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
                                   .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), model_type,
                                   'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
                                   'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size),
                                   'statistics.pkl')
    create_folder(os.path.dirname(statistics_path))

    logs_dir = os.path.join(workspace, 'logs', filename,
                            'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'
                            .format(sample_rate, window_size, hop_size, mel_bins, fmin, fmax), model_type,
                            'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
                            'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size))
    create_logging(logs_dir, filemode='w')

    # Dataset
    # return a waveform and a one-hot encoded target.
    # The training csv file filtered minor classes by a Dropping_threshold (10)
    train_csv = pd.read_csv("German_Birdcall_Dataset_Preparation/Germany_Birdcall_resampled_filtered.csv")
    classes_num = len(train_csv["gen"].unique())
    audio_path = "German_Birdcall_Dataset_Preparation/Germany_Birdcall_resampled"
    # Split csv file training and test
    splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.4, random_state=42)
    for train_idx, test_idx in splitter.split(X=train_csv, y=train_csv["gen"]):
        train_df = train_csv.loc[train_idx, :].reset_index(drop=True)
        test_df = train_csv.loc[test_idx, :].reset_index(drop=True)
    # dataset = WaveformDataset(df: pd.DataFrame, datadir: str)
    train_dataset = WaveformDataset(df=train_df, datadir=audio_path)
    test_dataset = WaveformDataset(df=test_df, datadir=audio_path)

    # Train sampler and Train loader
    num_workers = 10
    if balanced == 'balanced':
        train_sampler = BalancedSampler(
            df=train_df,
            batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size)
        train_loader = torch.utils.data.DataLoader(
            dataset=train_dataset,
            batch_sampler=train_sampler,
            collate_fn=collate_fn,
            num_workers=num_workers,
            pin_memory=True)
    else:
        train_sampler = RandomSampler(
            df=train_df,
            batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size)
        train_loader = torch.utils.data.DataLoader(
            dataset=train_dataset,
            batch_sampler=train_sampler,
            collate_fn=collate_fn,
            num_workers=num_workers,
            pin_memory=True)

    eval_test_loader = torch.utils.data.DataLoader(
        dataset=test_dataset,
        batch_size=batch_size,
        collate_fn=collate_fn,
        num_workers=num_workers)

    if 'mixup' in augmentation:
        mixup_augmenter = Mixup(mixup_alpha=1.)

    # Model Initialization
    transfer_model = eval(model_type)  # model_type = "Transfer_Cnn14"
    model = transfer_model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax,
                  classes_num, freeze_base_num)

    logging.info(args)

    # Load pretrained model
    # CHECKPOINT_PATH="Cnn14_mAP=0.431.pth"/"Cnn10_mAP=0.380.pth"/"Cnn6_mAP=0.343.pth"
    if pretrain:
        logging.info('Load pretrained model from {}'.format(pretrained_checkpoint_path))
        model.load_from_pretrain(pretrained_checkpoint_path)
        print('Load pretrained model successfully!')
    # Parallel
    print('GPU number: {}'.format(torch.cuda.device_count()))
    model = torch.nn.DataParallel(model)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    if 'cuda' in device:
        model.to(device)
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Loss
    loss_func = get_loss_func(loss_type)

    # Evaluator : return mAP and Auc value
    evaluator = Evaluator(model=model)

    # Statistics
    statistics_container = StatisticsContainer(statistics_path)

    # Optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate,
                           betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True)

    # Training Loop
    time_initial = time.time()
    train_bgn_time = time.time()
    time1 = time.time()
    iteration = 0
    loss_sum = 0
    loss_average = 0
    best_mAP = 0
    # store validation results with pd.dataframe
    validation_results = pd.DataFrame(columns=["iteration","mAP","Auc"])
    # store training losses with pd.dataframe
    training_results = pd.DataFrame(columns=["iteration","average loss"])
    i = 0
    j = 0
    for batch_data_dict in train_loader:
        """batch_data_dict: {
            'audio_name': (batch_size [*2 if mixup],), 
            'waveform': (batch_size [*2 if mixup], clip_samples), 
            'target': (batch_size [*2 if mixup], classes_num), 
            (ifexist) 'mixup_lambda': (batch_size * 2,)}
        """

        # Evaluate
        if iteration % 200 == 0 or (iteration == 0):
            train_fin_time = time.time()

            test_statistics = evaluator.evaluate(eval_test_loader)
            current_mAP = np.mean(test_statistics['average_precision'])
            current_auc = np.mean(test_statistics['auc'])
            logging.info('Validate test mAP: {:.3f}'.format(current_mAP))
            logging.info('Validate test Auc: {:.3f}'.format(current_auc))
            validation_results.loc[i] = [iteration, current_mAP, current_auc]
            i += 1

            statistics_container.append(iteration, test_statistics, data_type='test')
            statistics_container.dump()
            
            # copy best model
            if current_mAP > best_mAP:
                best_mAP = current_mAP
                best_model = copy.deepcopy(model.state_dict())

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info(
                'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s'
                ''.format(iteration, train_time, validate_time))
            logging.info('------------------------------------')

            train_bgn_time = time.time()  # reset after evaluation

        # Mixup lambda
        if 'mixup' in augmentation:
            batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda(
                batch_size=len(batch_data_dict['waveform']))

        # Move data to device
        for key in batch_data_dict.keys():
            batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device)

        # Forward
        model.train()
        if 'mixup' in augmentation:
            batch_output_dict = model(batch_data_dict['waveform'],
                                      batch_data_dict['mixup_lambda'])
            """{'clipwise_output': (batch_size, classes_num), ...}"""

            batch_target_dict = {'target': do_mixup(batch_data_dict['target'],
                                                    batch_data_dict['mixup_lambda'])}
            """{'target': (batch_size, classes_num)}"""
        else:
            batch_output_dict = model(batch_data_dict['waveform'], None)
            """{'clipwise_output': (batch_size, classes_num), ...}"""

            batch_target_dict = {'target': batch_data_dict['target']}
            """{'target': (batch_size, classes_num)}"""

        # Loss
        loss = loss_func(batch_output_dict, batch_target_dict)

        # Backward
        loss.backward()
#         print(loss)
        loss_sum += loss.item()

        optimizer.step()
        optimizer.zero_grad()

        if iteration % 200 == 0:
            print('--- Iteration: {}, train time: {:.3f} s / 200 iterations ---' \
                  .format(iteration, time.time() - time1))
            time1 = time.time()
            loss_average = loss_sum / 200
            print("average loss of recent 200 batches {:.5f}".format(loss_average))
            loss_sum = 0
            training_results.loc[j] = [iteration, loss_average]
            j += 1
        
        # Stop learning
        if iteration == early_stop:
            break

        iteration += 1

    # Save model
    best_model_path = "best_"+model_type+balanced+augmentation+"freeze"\
                      + str(freeze_base_num)+"_mAP={:.3f}".format(best_mAP)
    torch.save(best_model, best_model_path+".pth")

    # Save validation results
    validation_results_path = "validation_results"+model_type+balanced\
                              + augmentation+"freeze"+str(freeze_base_num)\
                              + "_mAP={:.3f}".format(best_mAP)
    validation_results.to_csv(validation_results_path+'.csv', index=False)
    
    # Save training results
    training_results_path = "training_results"+model_type+balanced\
                              + augmentation+"freeze"+str(freeze_base_num)\
                              + "_mAP={:.3f}".format(best_mAP)
    training_results.to_csv(training_results_path+'.csv', index=False)    

    time_end = time.time()
    time_cost = time_end - time_initial
    print("The whole training process takes: {:.3f} s".format(time_cost))