Beispiel #1
0
def model_update_add(args, method, lr_lists):
    
    
    model_name = args.model
    
    git_ignore_folder = args.repo
    
    dataset_name = args.dataset
    
    num_epochs = args.epochs
    
    batch_size = args.bz
    
    is_GPU = args.GPU
    
#     args.ratio


    regularization_coeff = args.wd
    
    if not is_GPU:
        device = torch.device("cpu")
    else:    
        GPU_ID = int(args.GID)
        device = torch.device("cuda:"+str(GPU_ID) if torch.cuda.is_available() else "cpu")
    
    model_class = getattr(sys.modules[__name__], model_name)
    
    
    data_preparer = Data_preparer()
    
    
    dataset_train = torch.load(git_ignore_folder + "dataset_train")
    
    dataset_test = torch.load(git_ignore_folder + "dataset_test")
    
    
    X_to_add = torch.load(git_ignore_folder + 'X_to_add')
            
    Y_to_add = torch.load(git_ignore_folder + 'Y_to_add')
    
    learning_rate_all_epochs = torch.load(git_ignore_folder + 'learning_rate_all_epochs')
    
#     generate_random_id_add(git_ignore_folder, dataset_train, num_epochs)
    
    random_ids_all_epochs = torch.load(git_ignore_folder + 'random_ids_multi_epochs')
    
    mini_batch_num = int((len(dataset_train) - 1)/batch_size) + 1
    
    
    para_list_all_epochs = torch.load(git_ignore_folder + 'para_list_all_epochs')
    
    gradient_list_all_epochs = torch.load(git_ignore_folder + 'gradient_list_all_epochs')
    
#     data_train_loader = torch.load(git_ignore_folder + "data_train_loader")
#     
#     data_test_loader = torch.load(git_ignore_folder + "data_test_loader")
    
    
    

    dim = [len(dataset_train), len(dataset_train[0][0])]

    
    origin_train_data_size = len(dataset_train)
    
    
    
    
    num_class = get_data_class_num_by_name(data_preparer, dataset_name)
    
    hyper_para_function=getattr(Data_preparer, "get_hyperparameters_" + dataset_name)
    
    model = model_class(dim[1], num_class)
    
    
    init_para_list = list(torch.load(git_ignore_folder + 'init_para'))
    
    
    init_model(model,init_para_list)
    
    print('data dimension::',dim)
    
    if is_GPU:
        model.to(device)
    
#     init_model_params = list(model.parameters())
    
    
    criterion, optimizer = hyper_para_function(data_preparer, model.parameters(), lr_lists[0], regularization_coeff)
    
#     hyper_params = [criterion, optimizer]
    
    if method == baseline_method:
        
        
        added_random_ids_multi_epochs = get_sampling_each_iteration0(random_ids_all_epochs, X_to_add.shape[0], mini_batch_num, len(dataset_train))
     
        print("delta data size::", X_to_add.shape[0])
         
        torch.save(added_random_ids_multi_epochs, git_ignore_folder + 'added_random_ids_multi_epochs')

#         added_random_ids_multi_epochs = torch.load(git_ignore_folder + 'added_random_ids_multi_epochs')

        dataset_train.data = torch.cat([dataset_train.data, X_to_add], 0)
    
        dataset_train.labels = torch.cat([dataset_train.labels, Y_to_add], 0)

        
        t1 = time.time()
        
        updated_model, exp_para_list, exp_grad_list = model_update_standard_lib_add(num_epochs, dataset_train, dim, model, random_ids_all_epochs, batch_size, learning_rate_all_epochs, added_random_ids_multi_epochs, criterion, optimizer, is_GPU, device, regularization_coeff)
    
        t2 = time.time()
            
        process = psutil.Process(os.getpid())

        print('memory usage::', process.memory_info().rss)
        
        
        print('time_baseline::', t2 - t1)
    
        origin_model = torch.load(git_ignore_folder + 'origin_model')
        
        compute_model_para_diff(list(origin_model.parameters()), list(updated_model.parameters()))
    
    
        torch.save(updated_model, git_ignore_folder + 'model_base_line')
        
        torch.save(exp_para_list, git_ignore_folder + 'exp_para_list')
        
        torch.save(exp_grad_list, git_ignore_folder + 'exp_grad_list')    
        
    
    else:
        if method == deltagrad_method:
            
            added_random_ids_multi_epochs = torch.load(git_ignore_folder + 'added_random_ids_multi_epochs')
            
            dataset_train.data = torch.cat([dataset_train.data, X_to_add], 0)
    
            dataset_train.labels = torch.cat([dataset_train.labels, Y_to_add], 0)
            
            exp_para_list = torch.load(git_ignore_folder + 'exp_para_list')
        
            exp_grad_list = torch.load(git_ignore_folder + 'exp_grad_list')
            
            period = args.period
            
            init_epochs = args.init
            
            m = args.m
            
            cached_size = args.cached_size
            
            grad_list_all_epochs_tensor, para_list_all_epoch_tensor, grad_list_GPU_tensor, para_list_GPU_tensor = cache_grad_para_history(git_ignore_folder, cached_size, is_GPU, device)
            
#             model_update_provenance_test3(period, 1, init_epochs, dataset_train, model, grad_list_all_epoch_tensor, para_list_all_epoch_tensor, grad_list_GPU_tensor, para_list_GPU_tensor, cached_size, max_epoch, 2, learning_rate_all_epochs, random_ids_multi_epochss, sorted_ids_multi_epochss, batch_size, dim, added_random_ids_multi_epochs, X_to_add, Y_to_add, criterion, optimizer, lr_scheduler, regularization_coeff, is_GPU, device)
            
            t1 = time.time()
            
            updated_model = model_update_delta_grad_add(exp_para_list, exp_grad_list, period, 1, init_epochs, dataset_train, model, grad_list_all_epochs_tensor, para_list_all_epoch_tensor, grad_list_GPU_tensor, para_list_GPU_tensor, cached_size, m, learning_rate_all_epochs, random_ids_all_epochs, batch_size, dim, added_random_ids_multi_epochs, criterion, optimizer, regularization_coeff, is_GPU, device)
            
            t2 = time.time()
            
            process = psutil.Process(os.getpid())
    
            print('memory usage::', process.memory_info().rss)
            
            
            print('time_deltagrad::', t2 - t1)
            
            
            model_base_line = torch.load(git_ignore_folder + 'model_base_line')
            
            compute_model_para_diff(list(model_base_line.parameters()), list(updated_model.parameters()))
            
            torch.save(updated_model, git_ignore_folder + 'model_deltagrad')    
Beispiel #2
0
    
    is_GPU = bool(int(sys_argv[9]))
    
    if not is_GPU:
        device = torch.device("cpu")
    else:    
        GPU_ID = int(sys_argv[10])
        device = torch.device("cuda:"+str(GPU_ID) if torch.cuda.is_available() else "cpu")


    print(device)

    model_class = getattr(sys.modules[__name__], model_name)
    
    
    data_preparer = Data_preparer()
    
    dataset_train = torch.load(git_ignore_folder + "dataset_train")
    
    dataset_test = torch.load(git_ignore_folder + "dataset_test")
    
    data_train_loader = torch.load(git_ignore_folder + "data_train_loader")
    
    data_test_loader = torch.load(git_ignore_folder + "data_test_loader")
    
#     dataset_train, dataset_test, data_train_loader, data_test_loader = get_train_test_data_loader_by_name_lr(data_preparer, model_class, dataset_name, batch_size)
    
    dim = [len(dataset_train), len(dataset_train[0][0])]
    
    num_class = get_data_class_num_by_name(data_preparer, dataset_name)
    
Beispiel #3
0
def main_add(args, lr_lists):
    model_name = args.model
    
    git_ignore_folder = args.repo
    
    dataset_name = args.dataset
    
    num_epochs = args.epochs
    
    batch_size = args.bz
    
    is_GPU = args.GPU
    
#     args.ratio


    regularization_coeff = args.wd
    
    if not is_GPU:
        device = torch.device("cpu")
    else:    
        GPU_ID = int(args.GID)
        device = torch.device("cuda:"+str(GPU_ID) if torch.cuda.is_available() else "cpu")
    
    model_class = getattr(sys.modules[__name__], model_name)
    
    
    data_preparer = Data_preparer()
    
    
    dataset_train = torch.load(git_ignore_folder + "dataset_train")
    
    dataset_test = torch.load(git_ignore_folder + "dataset_test")
    
    
    generate_random_id_add(git_ignore_folder, dataset_train, num_epochs)
    
    random_ids_all_epochs = torch.load(git_ignore_folder + 'random_ids_multi_epochs')
    
#     data_train_loader = torch.load(git_ignore_folder + "data_train_loader")
#     
#     data_test_loader = torch.load(git_ignore_folder + "data_test_loader")
    
    
    
    
    dim = [len(dataset_train), len(dataset_train[0][0])]
    
    num_class = get_data_class_num_by_name(data_preparer, dataset_name)
    
    hyper_para_function=getattr(Data_preparer, "get_hyperparameters_" + dataset_name)
    
    if model_name == 'Logistic_regression':
        model = model_class(dim[1], num_class)
    else:
        model = model_class()
    
#     model = model_class(dim[1], num_class)
    
    print('data dimension::',dim)
    
    if is_GPU:
        model.to(device)
    
    init_model_params = list(model.parameters())
    
    
    criterion, optimizer = hyper_para_function(data_preparer, model.parameters(), lr_lists[0], regularization_coeff)
    
    hyper_params = [criterion, optimizer]
    
    
#     lrs = ast.literal_eval(input)#map(float, input.strip('[]').split(','))
#     [2.0, 3.0, 4.0, 5.0]
    
#     model, gradient_list_all_epochs, para_list_all_epochs, learning_rate_all_epochs, all_ids_list_all_epochs = model_training_skipnet(num_epochs, model, data_train_loader, data_test_loader, len(dataset_train), len(dataset_test), optimizer, criterion, lr_scheduler, batch_size, is_GPU, device, lrs)

# net, gradient_list_all_epochs, para_list_all_epochs, learning_rate_all_epochs, X_theta_prod_seq, X_theta_prod_softmax_seq, random_ids_multi_epochs
    
    t1 = time.time()
    
#     model, gradient_list_all_epochs, para_list_all_epochs, learning_rate_all_epochs, X_theta_prod_seq, X_theta_prod_softmax_seq, random_ids_multi_epochs = model_training_lr(num_epochs, model, dataset_train, data_test_loader, len(dataset_train), len(dataset_test), optimizer, criterion, lr_scheduler, batch_size, is_GPU, device, lrs)

    model, gradient_list_all_epochs, para_list_all_epochs, learning_rate_all_epochs = model_training_lr_test(random_ids_all_epochs, num_epochs, model, dataset_train, len(dataset_train), optimizer, criterion, batch_size, is_GPU, device, lr_lists)
    
    t2 = time.time()
    
    
    t3 = time.time()
    
#     capture_provenance(git_ignore_folder, data_train_loader, len(dataset_train), dim, num_epochs, num_class, batch_size, int((dim[0] - 1)/batch_size) + 1, torch.stack(random_ids_multi_epochs), X_theta_prod_softmax_seq, X_theta_prod_seq)

#     data_train_loader.batch_sampler.reset_ids()
    

#     x_sum_by_class_by_batch = compute_x_sum_by_class_by_batch(data_train_loader, len(dataset_train), batch_size, num_class, random_ids_multi_epochs)
    
    
#     data_train_loader.batch_sampler.reset_ids()
    
    t4 = time.time()
    
    
    print("training time full::", t2 - t1)
    
    print("provenance prepare time::", t4 - t3)    
    
    torch.save(dataset_train, git_ignore_folder + "dataset_train")
    
    torch.save(gradient_list_all_epochs, git_ignore_folder + 'gradient_list_all_epochs')
    
    torch.save(para_list_all_epochs, git_ignore_folder + 'para_list_all_epochs')
    
    torch.save(learning_rate_all_epochs, git_ignore_folder + 'learning_rate_all_epochs')
                  
    torch.save(num_epochs, git_ignore_folder+'epoch')    
    
    torch.save(hyper_params, git_ignore_folder + 'hyper_params')
    
    save_random_id_orders(git_ignore_folder, random_ids_all_epochs)
    
    torch.save(para_list_all_epochs[0], git_ignore_folder + 'init_para')
    
    torch.save(model, git_ignore_folder + 'origin_model')
    
    torch.save(model_class, git_ignore_folder + 'model_class')
    
    torch.save(regularization_coeff, git_ignore_folder + 'beta')
    
    torch.save(dataset_name, git_ignore_folder + 'dataset_name')
    
    torch.save(batch_size, git_ignore_folder + 'batch_size')

    torch.save(device, git_ignore_folder + 'device')

    torch.save(is_GPU, git_ignore_folder + 'is_GPU')
        
    test(model, dataset_test, batch_size, criterion, len(dataset_test), is_GPU, device)