def adversarial_discovery(full_train_path, full_test_path, model_0, save_path):
    """
        Collect the adversarial - false positives based on model_0
        for the train and validation set.
    """
    print('++================================================++')
    print("++ Beginning False Positive Adversarial Discovery ++")
    print('++================================================++')
    # Do not include boundary uncertainty in full train loader. We only need the model predictions, we do not
    # calculate the loss! Use the HIERARCH_SHIFT flag along to decide if the Heirarchical model will use
    # randomly shifted windows. Note, we flag that this is the full dataset to make sure that during
    # adversarial discovery we alwas sample the midlle of oversized windows
    full_train_loader = get_loader_fuzzy(
        full_train_path,
        parameters.BATCH_SIZE,
        random_seed=parameters.DATA_LOADER_SEED,
        norm=parameters.NORM,
        scale=parameters.SCALE,
        include_boundaries=False,
        shift_windows=parameters.HIERARCHICAL_SHIFT_WINDOWS,
        is_full_dataset=True,
        full_window_predict=True)
    full_test_loader = get_loader_fuzzy(
        full_test_path,
        parameters.BATCH_SIZE,
        random_seed=parameters.DATA_LOADER_SEED,
        norm=parameters.NORM,
        scale=parameters.SCALE,
        include_boundaries=False,
        full_window_predict=True)

    # For now let us try including all of the false negatives!
    train_adversarial_file = "model_0-False_Pos_Train.txt"
    if parameters.HIERARCHICAL_SHIFT_WINDOWS:
        train_adversarial_file = "model_0-False_Pos_Train_Shift.txt"
    adversarial_train_files = adversarial_discovery_helper(
        full_train_loader, model_0)
    adversarial_train_save_path = os.path.join(save_path,
                                               train_adversarial_file)
    with open(adversarial_train_save_path, 'w') as f:
        for file in adversarial_train_files:
            f.write('{}\n'.format(file))

    adversarial_test_files = adversarial_discovery_helper(
        full_test_loader, model_0)
    adversarial_test_save_path = os.path.join(save_path,
                                              "model_0-False_Pos_Test.txt")
    with open(adversarial_test_save_path, 'w') as f:
        for file in adversarial_test_files:
            f.write('{}\n'.format(file))

    return adversarial_train_files, adversarial_test_files
Esempio n. 2
0
def main(args):
    """
    Example runs:

    """
    # Load Model
    model, model_id = loadModel(args.model)
    # Put in eval mode!
    print(model_id)

    # For the window classification setting we literally just want to test on the full_spect chunks
    if args.local_files:
        test_data_path = parameters.LOCAL_FULL_TEST
    else:
        test_data_path = parameters.REMOTE_FULL_TEST

    eval_loader = get_loader_fuzzy(test_data_path,
                                   parameters.BATCH_SIZE,
                                   random_seed=parameters.DATA_LOADER_SEED,
                                   norm=parameters.NORM,
                                   scale=parameters.SCALE,
                                   full_window_predict=True)

    eval_model(eval_loader, model, args.model)
Esempio n. 3
0
def main():
    args = parser.parse_args()

    # Use the 24hr datasets!

    if args.local_files:
        if parameters.LOSS.upper() == "FOCAL":
            train_data_path = parameters.LOCAL_FULL_TRAIN
            test_data_path = parameters.LOCAL_FULL_TEST
        else:
            train_data_path = parameters.LOCAL_TRAIN_FILES
            test_data_path = parameters.LOCAL_TEST_FILES
    else:
        if parameters.LOSS.upper() == "FOCAL":
            train_data_path = parameters.REMOTE_FULL_TRAIN
            test_data_path = parameters.REMOTE_FULL_TEST
        else:
            train_data_path = parameters.REMOTE_TRAIN_FILES
            test_data_path = parameters.REMOTE_TEST_FILES

    if parameters.LOSS.upper() != "FOCAL":
        train_data_path, _ = create_dataset_path(
            train_data_path,
            neg_samples=parameters.NEG_SAMPLES,
            call_repeats=parameters.CALL_REPEATS,
            shift_windows=parameters.SHIFT_WINDOWS)
        test_data_path, _ = create_dataset_path(
            test_data_path,
            neg_samples=parameters.TEST_NEG_SAMPLES,
            call_repeats=1)
    else:
        if parameters.SHIFT_WINDOWS:
            train_data_path += '_OversizeCalls'

    train_loader = get_loader_fuzzy(train_data_path,
                                    parameters.BATCH_SIZE,
                                    random_seed=parameters.DATA_LOADER_SEED,
                                    norm=parameters.NORM,
                                    scale=parameters.SCALE,
                                    shift_windows=parameters.SHIFT_WINDOWS,
                                    full_window_predict=True)
    test_loader = get_loader_fuzzy(test_data_path,
                                   parameters.BATCH_SIZE,
                                   random_seed=parameters.DATA_LOADER_SEED,
                                   norm=parameters.NORM,
                                   scale=parameters.SCALE,
                                   full_window_predict=True)

    # For now model 18 signifies this!!!
    save_path = create_save_path(
        time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime()), args.save_local)

    dloaders = {'train': train_loader, 'valid': test_loader}

    ## Training
    model = get_model(parameters.MODEL_ID)
    model.to(parameters.device)

    print(model)

    writer = SummaryWriter(save_path)
    writer.add_scalar('batch_size', parameters.BATCH_SIZE)
    writer.add_scalar(
        'weight_decay',
        parameters.HYPERPARAMETERS[parameters.MODEL_ID]['l2_reg'])

    loss_func, include_boundaries = get_loss()

    # Honestly probably do not need to have hyper-parameters per model, but leave it for now.
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr'],
        weight_decay=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['l2_reg'])
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr_decay_step'],
        gamma=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr_decay'])

    start_time = time.time()
    model_wts = None

    model_wts = train(dloaders,
                      model,
                      loss_func,
                      optimizer,
                      scheduler,
                      writer,
                      parameters.NUM_EPOCHS,
                      include_boundaries=include_boundaries)

    if model_wts:
        model.load_state_dict(model_wts)
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        save_path = save_path + "/" + "model.pt"
        torch.save(model, save_path)
        print('Saved best model based on {} to path {}'.format(
            parameters.TRAIN_MODEL_SAVE_CRITERIA.upper(), save_path))
    else:
        print('For some reason I don\'t have a model to save')

    print('Training time: {:10f} minutes'.format(
        (time.time() - start_time) / 60))

    writer.close()
def main():
    args = parser.parse_args()

    # What do we need to do across all of the settings!
    # Get the data loaders!
    args = parser.parse_args()

    if args.local_files:
        train_data_path = parameters.LOCAL_TRAIN_FILES
        test_data_path = parameters.LOCAL_TEST_FILES
        full_train_path = parameters.LOCAL_FULL_TRAIN
        full_test_path = parameters.LOCAL_FULL_TEST
    else:
        train_data_path = parameters.REMOTE_TRAIN_FILES
        test_data_path = parameters.REMOTE_TEST_FILES
        full_train_path = parameters.REMOTE_FULL_TRAIN
        full_test_path = parameters.REMOTE_FULL_TEST

    if parameters.HIERARCHICAL_SHIFT_WINDOWS:
        full_train_path += '_OversizeCalls'

    model_0_train_data_path, include_boundaries = create_dataset_path(
        train_data_path,
        neg_samples=parameters.NEG_SAMPLES,
        call_repeats=parameters.CALL_REPEATS,
        shift_windows=parameters.SHIFT_WINDOWS)
    model_0_test_data_path, _ = create_dataset_path(
        test_data_path,
        neg_samples=parameters.TEST_NEG_SAMPLES,
        call_repeats=1)

    # Check if a different dataset is being used for Model_1
    model_1_train_data_path = model_0_train_data_path
    model_1_test_data_path = model_0_test_data_path
    if str(parameters.HIERARCHICAL_REPEATS).lower() != "same":
        # SHould prob just have neg samples x1 since doesnt matter!!
        model_1_train_data_path, _ = create_dataset_path(
            train_data_path,
            neg_samples=parameters.NEG_SAMPLES,
            call_repeats=parameters.HIERARCHICAL_REPEATS,
            shift_windows=parameters.HIERARCHICAL_SHIFT_WINDOWS)

    # Model 0 Loaders
    model_0_train_loader = get_loader_fuzzy(
        model_0_train_data_path,
        parameters.BATCH_SIZE,
        random_seed=parameters.DATA_LOADER_SEED,
        norm=parameters.NORM,
        scale=parameters.SCALE,
        include_boundaries=include_boundaries,
        shift_windows=parameters.SHIFT_WINDOWS,
        full_window_predict=True)
    model_0_test_loader = get_loader_fuzzy(
        model_0_test_data_path,
        parameters.BATCH_SIZE,
        random_seed=parameters.DATA_LOADER_SEED,
        norm=parameters.NORM,
        scale=parameters.SCALE,
        include_boundaries=include_boundaries,
        full_window_predict=True)

    # Model 1 Loaders
    model_1_train_loader = get_loader_fuzzy(
        model_1_train_data_path,
        parameters.BATCH_SIZE,
        random_seed=parameters.DATA_LOADER_SEED,
        norm=parameters.NORM,
        scale=parameters.SCALE,
        include_boundaries=include_boundaries,
        shift_windows=parameters.HIERARCHICAL_SHIFT_WINDOWS,
        full_window_predict=True)
    model_1_test_loader = get_loader_fuzzy(
        model_1_test_data_path,
        parameters.BATCH_SIZE,
        random_seed=parameters.DATA_LOADER_SEED,
        norm=parameters.NORM,
        scale=parameters.SCALE,
        include_boundaries=include_boundaries,
        full_window_predict=True)

    if args.models_path is None:
        save_path = create_save_path(time.strftime("%Y-%m-%d_%H:%M:%S",
                                                   time.localtime()),
                                     args.save_local,
                                     save_prefix='Hierarchical_')
    else:
        save_path = args.models_path

    # Case 1) Do the entire pipeline! Can break now the pipeline into 3 helper functions!
    if args.full_pipeline:
        # Train and save model_0
        #model_0 = train_model_0(dloaders, save_path)
        model_0 = train_model_0(model_0_train_loader, model_0_test_loader,
                                save_path)
        # Do the adversarial discovery
        adversarial_train_files, adversarial_test_files = adversarial_discovery(
            full_train_path, full_test_path, model_0, save_path)
        # Train and save model 1
        train_model_1(adversarial_train_files, adversarial_test_files,
                      model_1_train_loader, model_1_test_loader, save_path)

    # Just generate new adversarial examples
    elif args.adversarial:
        # Load model_0
        model_0_path = os.path.join(save_path, "Model_0/model.pt")
        model_0 = torch.load(model_0_path, map_location=parameters.device)
        adversarial_discovery(full_train_path, full_test_path, model_0,
                              save_path)

    # Train just model_1
    elif args.model1:
        # Read in the adversarial files
        train_adversarial_file = "model_0-False_Pos_Train.txt"
        if parameters.HIERARCHICAL_SHIFT_WINDOWS:
            train_adversarial_file = "model_0-False_Pos_Train_Shift.txt"
        adversarial_train_save_path = os.path.join(save_path,
                                                   train_adversarial_file)
        adversarial_train_files = []
        with open(adversarial_train_save_path, 'r') as f:
            files = f.readlines()
            for file in files:
                adversarial_train_files.append(file.strip())

        adversarial_test_save_path = os.path.join(
            save_path, "model_0-False_Pos_Test.txt")
        adversarial_test_files = []
        with open(adversarial_test_save_path, 'r') as f:
            files = f.readlines()
            for file in files:
                adversarial_test_files.append(file.strip())

        train_model_1(adversarial_train_files, adversarial_test_files,
                      model_1_train_loader, model_1_test_loader, save_path)

    else:
        print("Invalid running mode!")
Esempio n. 5
0
def main():
    # What do we need to do across all of the settings!
    # Get the data loaders!
    args = parser.parse_args()

    if args.local_files:
        train_data_path = parameters.LOCAL_TRAIN_FILES
        test_data_path = parameters.LOCAL_TEST_FILES
        full_train_path = parameters.LOCAL_FULL_TRAIN
        full_test_path = parameters.LOCAL_FULL_TEST
    else:
        if parameters.DATASET.lower() == "noab":
            train_data_path = parameters.REMOTE_TRAIN_FILES
            test_data_path = parameters.REMOTE_TEST_FILES
            full_train_path = parameters.REMOTE_FULL_TRAIN
            full_test_path = parameters.REMOTE_FULL_TEST
        else:
            train_data_path = parameters.REMOTE_BAI_TRAIN_FILES
            test_data_path = parameters.REMOTE_BAI_TEST_FILES
            full_train_path = parameters.REMOTE_FULL_TRAIN_BAI
            full_test_path = parameters.REMOTE_FULL_TEST_BAI


    # Get oversized calls if shifting windows or repeating for model 2
    # We should try to remove both of these This is an issue too!
    if parameters.HIERARCHICAL_SHIFT_WINDOWS: # or parameters.HIERARCHICAL_REPEATS > 1:
        full_train_path += '_OversizeCalls'

    # For model 2 we need to have oversized calls to generate the randomly located repeats
    if parameters.HIERARCHICAL_REPEATS_POS > 1 or parameters.HIERARCHICAL_REPEATS_NEG > 1:
        full_train_path += '_OversizeCalls'

    model_0_train_data_path, include_boundaries = create_dataset_path(train_data_path, neg_samples=parameters.NEG_SAMPLES, 
                                                                    call_repeats=parameters.CALL_REPEATS, 
                                                                    shift_windows=parameters.SHIFT_WINDOWS)
    model_0_test_data_path, _ = create_dataset_path(test_data_path, neg_samples=parameters.TEST_NEG_SAMPLES, 
                                                                call_repeats=1)
    

    # Check if a different dataset is being used for Model_1
    model_1_train_data_path = model_0_train_data_path
    model_1_test_data_path = model_0_test_data_path
    # Remove this same thing!
    #if str(parameters.HIERARCHICAL_REPEATS).lower() != "same" or parameters.HIERARCHICAL_REPEATS_POS > 1  or parameters.HIERARCHICAL_REPEATS_NEG > 1:
    if parameters.HIERARCHICAL_REPEATS_POS > 1  or parameters.HIERARCHICAL_REPEATS_NEG > 1:
        # SHould prob just have neg samples x1 since doesnt matter!!
        # For now set call repeats to 1, but get shifting windows so we later can do call repeats!
        #shift_windows = parameters.HIERARCHICAL_REPEATS > 1 or parameters.HIERARCHICAL_SHIFT_WINDOWS
        # For now should make shift windows just be true! Because it does not make a lot of sense to do 
        # repeats without shifting windows since we can only repeat the pos examples
        shift_windows = True
        # Set this to 1 because we take care of this later!!!!
        call_repeats = 1
        model_1_train_data_path, _ = create_dataset_path(train_data_path, neg_samples=parameters.NEG_SAMPLES, 
                                                        call_repeats=call_repeats,
                                                        shift_windows=shift_windows)
    
    
    # Model 0 Loaders
    model_0_train_loader = get_loader_fuzzy(model_0_train_data_path, parameters.BATCH_SIZE, random_seed=parameters.DATA_LOADER_SEED, 
                                        norm=parameters.NORM, scale=parameters.SCALE, 
                                        include_boundaries=include_boundaries, shift_windows=parameters.SHIFT_WINDOWS)
    model_0_test_loader = get_loader_fuzzy(model_0_test_data_path, parameters.BATCH_SIZE, random_seed=parameters.DATA_LOADER_SEED, 
                                        norm=parameters.NORM, scale=parameters.SCALE, include_boundaries=include_boundaries)
    
    # Model 1 Loaders
    model_1_train_loader = get_loader_fuzzy(model_1_train_data_path, parameters.BATCH_SIZE, random_seed=parameters.DATA_LOADER_SEED, 
                                        norm=parameters.NORM, scale=parameters.SCALE, 
                                        include_boundaries=include_boundaries, shift_windows=parameters.HIERARCHICAL_SHIFT_WINDOWS)
    model_1_test_loader = get_loader_fuzzy(model_1_test_data_path, parameters.BATCH_SIZE, random_seed=parameters.DATA_LOADER_SEED, 
                                        norm=parameters.NORM, scale=parameters.SCALE, include_boundaries=include_boundaries)
    

    if args.path is None:
        save_path = create_save_path(time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime()), args.save_local, save_prefix='Hierarchical_')
    else:
        save_path = args.path

    # Case 1) Do the entire pipeline! Can break now the pipeline into 3 helper functions!
    if args.full_pipeline:
        # Train and save model_0
        if args.model_0 == None:
            model_0 = train_model_0(model_0_train_loader, model_0_test_loader , save_path, args.pre_train_0)
        else: # Load and save model_0
            model_0 = torch.load(args.model_0, map_location=parameters.device)
            first_model_save_path = os.path.join(save_path, "Model_0")
            if not os.path.exists(first_model_save_path):
                os.makedirs(first_model_save_path)

            model_save_path = os.path.join(first_model_save_path, "model.pt")
            torch.save(model_0, model_save_path)

        # Do the adversarial discovery
        adversarial_train_files, adversarial_test_files = adversarial_discovery(full_train_path, full_test_path, 
                                                                model_0_train_loader, model_0_test_loader, model_0, save_path)
        # Train and save model 1
        train_model_1(adversarial_train_files, adversarial_test_files, model_1_train_loader, model_1_test_loader, 
                                                save_path, args.pre_train_1)
    
    # Just generate new adversarial examples 
    elif args.adversarial:
        # Load model_0
        model_0_path = os.path.join(save_path, "Model_0/model.pt")
        model_0 = torch.load(model_0_path, map_location=parameters.device)
        adversarial_discovery(full_train_path, full_test_path, model_0_train_loader, model_0_test_loader, model_0, save_path)

    # Train just model_1
    elif args.model1:
        # Read in the adversarial files
        train_adversarial_file = "model_0-False_Pos_Train.txt"
        #if parameters.HIERARCHICAL_SHIFT_WINDOWS or parameters.HIERARCHICAL_REPEATS > 1:
        if parameters.HIERARCHICAL_REPEATS_POS > 1  or parameters.HIERARCHICAL_REPEATS_NEG > 1:
            train_adversarial_file = "model_0-False_Pos_Train_Shift.txt"

        adversarial_train_save_path = os.path.join(save_path, train_adversarial_file)
        adversarial_train_files = []
        with open(adversarial_train_save_path, 'r') as f:
            files = f.readlines()
            for file in files:
                adversarial_train_files.append(file.strip())

        adversarial_test_save_path = os.path.join(save_path, "model_0-False_Pos_Test.txt")
        adversarial_test_files = []
        with open(adversarial_test_save_path, 'r') as f:
            files = f.readlines()
            for file in files:
                adversarial_test_files.append(file.strip())

        train_model_1(adversarial_train_files, adversarial_test_files, model_1_train_loader, 
                        model_1_test_loader, save_path, args.pre_train_1)

    elif args.visualize:
        model_0_path = os.path.join(save_path, "Model_0/model.pt")
        model_0 = torch.load(model_0_path, map_location=parameters.device)
        model_1_name = hierarchical_model_1_path()
        model_1_path = os.path.join(save_path, model_1_name+'/model.pt')
        model_1 = torch.load(model_1_path, map_location=parameters.device)

        # Read in the adversarial files
        train_adversarial_file = "model_0-False_Pos_Train.txt"
        #if parameters.HIERARCHICAL_SHIFT_WINDOWS or parameters.HIERARCHICAL_REPEATS > 1:
        if parameters.HIERARCHICAL_REPEATS_POS > 1  or parameters.HIERARCHICAL_REPEATS_NEG > 1:
            train_adversarial_file = "model_0-False_Pos_Train_Shift.txt"

        adversarial_train_save_path = os.path.join(save_path, train_adversarial_file)
        adversarial_train_files = []
        with open(adversarial_train_save_path, 'r') as f:
            files = f.readlines()
            for file in files:
                adversarial_train_files.append(file.strip())

        visualize_adversarial(adversarial_train_files, model_1_train_loader, model_0, model_1)

    else:
        print ("Invalid running mode!")
Esempio n. 6
0
def adversarial_discovery(full_train_path, full_test_path, model_1_train_loader, model_1_test_loader, model_0, save_path):
    """
        Collect the adversarial - false positives based on model_0
        for the train and validation set.
    """
    # Test our little function
    print ('++================================================++')
    print ("++ Beginning False Positive Adversarial Discovery ++")
    print ('++================================================++')
    # Do not include boundary uncertainty in full train loader. We only need the model predictions, we do not
    # calculate the loss! Use the HIERARCH_SHIFT flag along to decide if the Heirarchical model will use
    # randomly shifted windows. Note, we flag that this is the full dataset to make sure that during 
    # adversarial discovery we alwas sample the midlle of oversized windows
    #shift_windows = parameters.HIERARCHICAL_SHIFT_WINDOWS # or parameters.HIERARCHICAL_REPEATS > 1
    shift_windows = parameters.HIERARCHICAL_REPEATS_POS > 1 or parameters.HIERARCHICAL_REPEATS_NEG > 1
    full_train_loader = get_loader_fuzzy(full_train_path, parameters.BATCH_SIZE, random_seed=parameters.DATA_LOADER_SEED, 
                                        norm=parameters.NORM, scale=parameters.SCALE, 
                                        include_boundaries=False, shift_windows=shift_windows,
                                        is_full_dataset=True)
    full_test_loader = get_loader_fuzzy(full_test_path, parameters.BATCH_SIZE, random_seed=parameters.DATA_LOADER_SEED, 
                                        norm=parameters.NORM, scale=parameters.SCALE, include_boundaries=False)


    # For now let us try including all of the false negatives!
    train_adversarial_file = "model_0-False_Pos_Train.txt"
    if shift_windows:
        train_adversarial_file = "model_0-False_Pos_Train_Shift.txt"

    adversarial_train_files, model_0_fp_train_logits, model_0_fp_train_preds, model_0_fp_binary_train_preds  = adversarial_discovery_helper(full_train_loader,
                                                             model_0, min_length=parameters.FALSE_POSITIVE_THRESHOLD)
    # Save the adversarial feature file paths
    adversarial_train_save_path = os.path.join(save_path, train_adversarial_file)
    with open(adversarial_train_save_path, 'w') as f:
        for file in adversarial_train_files:
            f.write('{}\n'.format(file))

    # Save model_0 FP train predictions both raw and transformed with additional label
    print ("Saving model_0 FP predictions on the train data")
    save_model_0_predictions(save_path, adversarial_train_files, model_0_fp_train_logits, "model_0_fp_train_logits")
    save_model_0_predictions(save_path, adversarial_train_files, model_0_fp_train_preds, "model_0_fp_train_preds")
    save_model_0_predictions(save_path, adversarial_train_files, model_0_fp_binary_train_preds, "model_0_fp_binary_train_preds")
    transformed_model_0_fp_binary_train_preds = transform_model_0_predictions(model_0_fp_binary_train_preds)
    save_model_0_predictions(save_path, adversarial_train_files, transformed_model_0_fp_binary_train_preds, "transformed_model_0_fp_binary_train_preds")

    # Save model_0 TP train predictions both raw and transformed with additional label
    print ("Saving model_0 TP predictions on the train data")
    elephant_train_files, model_0_tp_train_logits, model_0_tp_train_preds, model_0_tp_binary_train_preds, gt_train_labels = model_0_Elephant_Predictions(model_1_train_loader, model_0)
    save_model_0_predictions(save_path, elephant_train_files, model_0_tp_train_logits, "model_0_tp_train_logits")
    save_model_0_predictions(save_path, elephant_train_files, model_0_tp_train_preds, "model_0_tp_train_preds")
    save_model_0_predictions(save_path, elephant_train_files, model_0_tp_binary_train_preds, "model_0_tp_binary_train_preds")
    transformed_model_0_tp_binary_train_preds = transform_model_0_predictions(model_0_tp_binary_train_preds, gt_train_labels)
    save_model_0_predictions(save_path, elephant_train_files, transformed_model_0_tp_binary_train_preds, "transformed_model_0_tp_binary_train_preds")

    test_adversarial_files = "model_0-False_Pos_Test.txt"
    adversarial_test_files, model_0_fp_test_logits, model_0_fp_test_preds, model_0_fp_binary_test_preds = adversarial_discovery_helper(full_test_loader, 
                                                             model_0, min_length=parameters.FALSE_POSITIVE_THRESHOLD)
    adversarial_test_save_path = os.path.join(save_path, test_adversarial_files)
    with open(adversarial_test_save_path, 'w') as f:
        for file in adversarial_test_files:
            f.write('{}\n'.format(file))

    # Save model_0 FP test predictions both raw and transformed with additional label
    print ("Saving model_0 FP predictions on the test data")
    save_model_0_predictions(save_path, adversarial_test_files, model_0_fp_test_logits, "model_0_fp_test_logits")
    save_model_0_predictions(save_path, adversarial_test_files, model_0_fp_test_preds, "model_0_fp_test_preds")
    save_model_0_predictions(save_path, adversarial_test_files, model_0_fp_binary_test_preds, "model_0_fp_binary_test_preds")
    transformed_model_0_fp_binary_test_preds = transform_model_0_predictions(model_0_fp_binary_test_preds)
    save_model_0_predictions(save_path, adversarial_test_files, transformed_model_0_fp_binary_test_preds, "transformed_model_0_fp_binary_test_preds")

    # Save model_0 TP train predictions both raw and transformed with additional label
    print ("Saving model_0 TP predictions on the test data")
    elephant_test_files, model_0_tp_test_logits, model_0_tp_test_preds, model_0_tp_binary_test_preds, gt_test_labels = model_0_Elephant_Predictions(model_1_test_loader, model_0)
    save_model_0_predictions(save_path, elephant_test_files, model_0_tp_test_logits, "model_0_tp_test_logits")
    save_model_0_predictions(save_path, elephant_test_files, model_0_tp_test_preds, "model_0_tp_test_preds")
    save_model_0_predictions(save_path, elephant_test_files, model_0_tp_binary_test_preds, "model_0_tp_binary_test_preds")
    transformed_model_0_tp_binary_test_preds = transform_model_0_predictions(model_0_tp_binary_test_preds, gt_test_labels)
    save_model_0_predictions(save_path, elephant_test_files, transformed_model_0_tp_binary_test_preds, "transformed_model_0_tp_binary_test_preds")

    return adversarial_train_files, adversarial_test_files
def main():
    args = parser.parse_args()


    if args.local_files:
        train_data_path = parameters.LOCAL_TRAIN_FILES
        test_data_path = parameters.LOCAL_TEST_FILES
        full_train_path = parameters.LOCAL_FULL_TRAIN
        full_test_path = parameters.LOCAL_FULL_TEST
    else:
        if parameters.DATASET.lower() == "noab":
            train_data_path = parameters.REMOTE_TRAIN_FILES
            test_data_path = parameters.REMOTE_TEST_FILES
            full_train_path = parameters.REMOTE_FULL_TRAIN
            full_test_path = parameters.REMOTE_FULL_TEST
        else:
            train_data_path = parameters.REMOTE_BAI_TRAIN_FILES
            test_data_path = parameters.REMOTE_BAI_TEST_FILES
            full_train_path = parameters.REMOTE_FULL_TRAIN_BAI
            full_test_path = parameters.REMOTE_FULL_TEST_BAI

    
    
    train_data_path, include_boundaries = create_dataset_path(train_data_path, neg_samples=parameters.NEG_SAMPLES, 
                                                                    call_repeats=parameters.CALL_REPEATS, 
                                                                    shift_windows=parameters.SHIFT_WINDOWS)
    test_data_path, _ = create_dataset_path(test_data_path, neg_samples=parameters.TEST_NEG_SAMPLES, 
                                                                call_repeats=1)
    
    train_loader = get_loader_fuzzy(train_data_path, parameters.BATCH_SIZE, random_seed=parameters.DATA_LOADER_SEED, 
                                        norm=parameters.NORM, scale=parameters.SCALE, 
                                        include_boundaries=include_boundaries, shift_windows=parameters.SHIFT_WINDOWS)
    test_loader = get_loader_fuzzy(test_data_path, parameters.BATCH_SIZE, random_seed=parameters.DATA_LOADER_SEED, 
                                        norm=parameters.NORM, scale=parameters.SCALE, include_boundaries=include_boundaries)

    # For now we don't need to save the model
    save_path = create_save_path(time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime()), args.save_local)

    train_dataloaders = {'train':train_loader, 'valid':test_loader}

    # Load the full data sets - SET SHUFFLE = False
    full_train_loader = get_loader_fuzzy(full_train_path, parameters.BATCH_SIZE, shuffle=False, 
                                        norm=parameters.NORM, scale=parameters.SCALE, 
                                        include_boundaries=False, shift_windows=False,
                                        is_full_dataset=True)
    full_test_loader = get_loader_fuzzy(full_test_path, parameters.BATCH_SIZE, shuffle=False, 
                                        norm=parameters.NORM, scale=parameters.SCALE, include_boundaries=False)
    full_dataloaders = {'train':full_train_loader, 'valid': full_test_loader}

    
    model = get_model(parameters.MODEL_ID)
    model.to(parameters.device)

    print(model)

    writer = SummaryWriter(save_path)
    writer.add_scalar('batch_size', parameters.BATCH_SIZE)
    writer.add_scalar('weight_decay', parameters.HYPERPARAMETERS[parameters.MODEL_ID]['l2_reg'])

    # Want to use focal loss! Next thing to check on!
    loss_func, include_boundaries = get_loss()

    # Honestly probably do not need to have hyper-parameters per model, but leave it for now.
    optimizer = torch.optim.Adam(model.parameters(), lr=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr'],
                                 weight_decay=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['l2_reg'])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr_decay_step'], 
                                            gamma=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr_decay'])

    start_time = time.time()

    curriculum_profiling(model, train_dataloaders, full_dataloaders, loss_func, optimizer, scheduler, writer)

    print('Training time: {:10f} minutes'.format((time.time()-start_time)/60))

    writer.close()
Esempio n. 8
0
def main():
    args = parser.parse_args()

    use_focal = False
    if parameters.LOSS.lower() == "focal" or parameters.LOSS.lower(
    ) == "focal_chunk":
        use_focal = True

    if args.local_files:
        if use_focal:
            train_data_path = parameters.LOCAL_FULL_TRAIN
            test_data_path = parameters.LOCAL_FULL_TEST
        else:
            train_data_path = parameters.LOCAL_TRAIN_FILES
            test_data_path = parameters.LOCAL_TEST_FILES
    else:
        if parameters.DATASET.lower() == "noab":
            if use_focal:
                train_data_path = parameters.REMOTE_FULL_TRAIN
                test_data_path = parameters.REMOTE_FULL_TEST
            else:
                train_data_path = parameters.REMOTE_TRAIN_FILES
                test_data_path = parameters.REMOTE_TEST_FILES
        else:
            if use_focal:
                train_data_path = parameters.REMOTE_FULL_TRAIN_BAI
                test_data_path = parameters.REMOTE_FULL_TEST_BAI
            else:
                train_data_path = parameters.REMOTE_BAI_TRAIN_FILES
                test_data_path = parameters.REMOTE_BAI_TEST_FILES

    if use_focal:
        include_boundaries = False
    else:
        train_data_path, include_boundaries = create_dataset_path(
            train_data_path,
            neg_samples=parameters.NEG_SAMPLES,
            call_repeats=parameters.CALL_REPEATS,
            shift_windows=parameters.SHIFT_WINDOWS)
        test_data_path, _ = create_dataset_path(
            test_data_path,
            neg_samples=parameters.TEST_NEG_SAMPLES,
            call_repeats=1)

    train_loader = get_loader_fuzzy(train_data_path,
                                    parameters.BATCH_SIZE,
                                    random_seed=parameters.DATA_LOADER_SEED,
                                    norm=parameters.NORM,
                                    scale=parameters.SCALE,
                                    include_boundaries=include_boundaries,
                                    shift_windows=parameters.SHIFT_WINDOWS)
    test_loader = get_loader_fuzzy(test_data_path,
                                   parameters.BATCH_SIZE,
                                   random_seed=parameters.DATA_LOADER_SEED,
                                   norm=parameters.NORM,
                                   scale=parameters.SCALE,
                                   include_boundaries=include_boundaries)

    save_path = create_save_path(
        time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime()), args.save_local)

    dloaders = {'train': train_loader, 'valid': test_loader}

    ## Training
    # Load a pre-trained model
    if parameters.PRE_TRAIN:
        model = torch.load(args.pre_train, map_location=parameters.device)
    else:
        model = get_model(parameters.MODEL_ID)
        model.to(parameters.device)

    print(model)

    writer = SummaryWriter(save_path)
    writer.add_scalar('batch_size', parameters.BATCH_SIZE)
    writer.add_scalar(
        'weight_decay',
        parameters.HYPERPARAMETERS[parameters.MODEL_ID]['l2_reg'])

    # Want to use focal loss! Next thing to check on!
    loss_func, include_boundaries = get_loss()

    # Honestly probably do not need to have hyper-parameters per model, but leave it for now.
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr'],
        weight_decay=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['l2_reg'])
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr_decay_step'],
        gamma=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr_decay'])

    start_time = time.time()
    model_wts = None

    model_wts = train(dloaders,
                      model,
                      loss_func,
                      optimizer,
                      scheduler,
                      writer,
                      parameters.NUM_EPOCHS,
                      include_boundaries=include_boundaries)

    if model_wts:
        model.load_state_dict(model_wts)
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        save_path = save_path + "/" + "model.pt"
        torch.save(model, save_path)
        print('Saved best model based on {} to path {}'.format(
            parameters.TRAIN_MODEL_SAVE_CRITERIA.upper(), save_path))
    else:
        print('For some reason I don\'t have a model to save')

    print('Training time: {:10f} minutes'.format(
        (time.time() - start_time) / 60))

    writer.close()