예제 #1
0
def attack_analysis(parameters, n, a_start, a_type):

    N = np.shape(parameter.X_test)[0]
    a_index = random.randint(0, 1)
    a_end = a_start / 100

    #print("###########")
    tmp_array = np.copy(parameter.X_test)
    #print (parameter.X_test[0:2, 1:2])
    if (a_type == "additive"):
        parameter.X_test[a_index:a_index + n +
                         1, :] = parameter.X_test[a_index:a_index + n +
                                                  1, :] + random.uniform(
                                                      a_start, a_end)
    else:
        parameter.X_test[a_index:a_index + n +
                         1, :] = parameter.X_test[a_index:a_index + n +
                                                  1, :] - random.uniform(
                                                      a_start, a_end)

    rmse_error, average_error = predict_function.predict(
        parameter.X_test, parameter.Y_test, parameters)

    parameter.X_test = np.copy(tmp_array)

    #print (parameter.X_test[0:2, 1:2])

    return rmse_error, average_error
예제 #2
0
def diff_attack_analysis(parameters, n, a_start, a_type):

    N = np.shape(parameter.X_test)[0]
    #a_index = random.randint(0, N + 1 - n)
    a_index = random.randint(0, 1)
    a_end = a_start + a_start / 100

    tmp_array = np.copy(parameter.X_test)
    parameter.X_test[a_index:a_index + n +
                     1:2, :] = parameter.X_test[a_index:a_index + n +
                                                1:2, :] + random.uniform(
                                                    a_start, a_end)
    parameter.X_test[a_index + 1:a_index + n +
                     1:2, :] = parameter.X_test[a_index + 1:a_index + n +
                                                1:2, :] - random.uniform(
                                                    a_start, a_end)

    rmse_error, average_error = predict_function.predict(
        parameter.X_test, parameter.Y_test, parameters)

    parameter.X_test = np.copy(tmp_array)

    return rmse_error, average_error
예제 #3
0
from predict_function import pred_args, read_json, load_checkpoint, predict
import matplotlib.pyplot as plt

# get parameters
pred_arg = pred_args()
# load a trained model
model = load_checkpoint(pred_arg.checkpoint, pred_arg.gpu)
# predict flower
predict(pred_arg.image_path, model, pred_arg.gpu, pred_arg.top_k)
예제 #4
0
def main():
    #parse arguments
    config.parse()
    args = config.args
    for k, v in vars(args).items():
        logger.info(f"{k}:{v}")
    #set seeds
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed_all(args.random_seed)
    np.random.seed(args.random_seed)
    random.seed(args.random_seed)

    #arguments check
    device, n_gpu = args_check(args)
    os.makedirs(args.output_dir, exist_ok=True)
    forward_batch_size = int(args.train_batch_size /
                             args.gradient_accumulation_steps)
    args.forward_batch_size = forward_batch_size

    #load config
    teachers_and_student = parse_model_config(args.model_config_json)

    #Prepare GLUE task
    processor = processors[args.task_name]()
    args.output_mode = output_modes[args.task_name]
    label_list = processor.get_labels()
    num_labels = len(label_list)

    #read data
    train_dataset = None
    eval_datasets = None
    num_train_steps = None

    tokenizer_S = teachers_and_student['student']['tokenizer']
    prefix_S = teachers_and_student['student']['prefix']

    if args.do_train:
        train_dataset = load_and_cache_examples(args,
                                                args.task_name,
                                                tokenizer_S,
                                                prefix=prefix_S,
                                                evaluate=False)
    if args.do_predict:
        eval_datasets = []
        eval_task_names = ("mnli",
                           "mnli-mm") if args.task_name == "mnli" else (
                               args.task_name, )
        for eval_task in eval_task_names:
            eval_datasets.append(
                load_and_cache_examples(args,
                                        eval_task,
                                        tokenizer_S,
                                        prefix=prefix_S,
                                        evaluate=True))
    logger.info("Data loaded")

    #Build Model and load checkpoint
    if args.do_train:
        model_Ts = []
        for teacher in teachers_and_student['teachers']:
            model_type_T = teacher['model_type']
            model_config_T = teacher['config']
            checkpoint_T = teacher['checkpoint']

            _, _, model_class_T = MODEL_CLASSES[model_type_T]
            model_T = model_class_T(model_config_T, num_labels=num_labels)
            state_dict_T = torch.load(checkpoint_T, map_location='cpu')
            missing_keys, un_keys = model_T.load_state_dict(state_dict_T,
                                                            strict=True)
            logger.info(f"Teacher Model {model_type_T} loaded")
            model_T.to(device)
            model_T.eval()
            model_Ts.append(model_T)

    student = teachers_and_student['student']
    model_type_S = student['model_type']
    model_config_S = student['config']
    checkpoint_S = student['checkpoint']
    _, _, model_class_S = MODEL_CLASSES[model_type_S]
    model_S = model_class_S(model_config_S, num_labels=num_labels)
    if checkpoint_S is not None:
        state_dict_S = torch.load(checkpoint_S, map_location='cpu')
        missing_keys, un_keys = model_S.load_state_dict(state_dict_S,
                                                        strict=False)
        logger.info(f"missing keys:{missing_keys}")
        logger.info(f"unexpected keys:{un_keys}")
    else:
        logger.warning("Initializing student randomly")
    logger.info("Student Model loaded")
    model_S.to(device)

    if args.local_rank != -1 or n_gpu > 1:
        if args.local_rank != -1:
            raise NotImplementedError
        elif n_gpu > 1:
            if args.do_train:
                model_Ts = [
                    torch.nn.DataParallel(model_T) for model_T in model_Ts
                ]
            model_S = torch.nn.DataParallel(model_S)  #,output_device=n_gpu-1)

    if args.do_train:
        #parameters
        params = list(model_S.named_parameters())
        all_trainable_params = divide_parameters(params, lr=args.learning_rate)
        logger.info("Length of all_trainable_params: %d",
                    len(all_trainable_params))

        if args.local_rank == -1:
            train_sampler = RandomSampler(train_dataset)
        else:
            raise NotImplementedError
        train_dataloader = DataLoader(train_dataset,
                                      sampler=train_sampler,
                                      batch_size=args.forward_batch_size,
                                      drop_last=True)
        num_train_steps = int(
            len(train_dataloader) // args.gradient_accumulation_steps *
            args.num_train_epochs)

        ########## DISTILLATION ###########
        train_config = TrainingConfig(
            gradient_accumulation_steps=args.gradient_accumulation_steps,
            ckpt_frequency=args.ckpt_frequency,
            log_dir=args.output_dir,
            output_dir=args.output_dir,
            fp16=args.fp16,
            device=args.device)

        distill_config = DistillationConfig(temperature=args.temperature,
                                            kd_loss_type='ce')

        logger.info(f"{train_config}")
        logger.info(f"{distill_config}")
        adaptor_T = BertForGLUESimpleAdaptor
        adaptor_S = BertForGLUESimpleAdaptor

        distiller = MultiTeacherDistiller(train_config=train_config,
                                          distill_config=distill_config,
                                          model_T=model_Ts,
                                          model_S=model_S,
                                          adaptor_T=adaptor_T,
                                          adaptor_S=adaptor_S)

        optimizer = AdamW(all_trainable_params, lr=args.learning_rate)
        scheduler_class = get_linear_schedule_with_warmup
        scheduler_args = {
            'num_warmup_steps': int(args.warmup_proportion * num_train_steps),
            'num_training_steps': num_train_steps
        }

        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(train_dataset))
        logger.info("  Forward batch size = %d", forward_batch_size)
        logger.info("  Num backward steps = %d", num_train_steps)

        callback_func = partial(predict,
                                eval_datasets=eval_datasets,
                                args=args)
        with distiller:
            distiller.train(optimizer,
                            scheduler_class=scheduler_class,
                            scheduler_args=scheduler_args,
                            dataloader=train_dataloader,
                            num_epochs=args.num_train_epochs,
                            callback=callback_func,
                            max_grad_norm=1)

    if not args.do_train and args.do_predict:
        res = predict(model_S, eval_datasets, step=0, args=args)
        print(res)
예제 #5
0
print('Cost at initial theta (zeros): ', cost)
print('Gradient at initial theta (zeros): ')
print(grad)

result = opt.fmin_tnc(func=costfunction,
                      x0=initial_theta,
                      fprime=gradient,
                      args=(X2, Y))
theta = result[0]
cost = costfunction(theta, X2, Y)
print('Cost at theta found by fmin_tnc: ', cost)
print('Theta: ')
print(theta)

#Prediction
p = predict(theta, X2)
print('Prediction: ')
print(p)

#Accuracy
correct = [
    1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0
    for (a, b) in zip(p, Y)
]
accuracy = (sum(map(int, correct)) / len(correct))
print('Train Accuracy: {0}%'.format(accuracy * 100))

#Confusion Matrix
Y_predict = predict(theta, X2_test)
conf_metrics = metrics.confusion_matrix(Y_test, Y_predict)
print('Confusion Metrics: ')