Esempio n. 1
0
 def __init__(self, model, all_JS_data, all_JS_label, all_JS_length,
              all_JS_index, device):
     self.device = device
     self.data_len = len(all_JS_data)
     self.model = model
     self.model.to(device)
     self.model.eval()
     self.dataset = JSDataset(X=all_JS_data,
                              y=all_JS_label,
                              length=all_JS_length,
                              ID=all_JS_index)
     self.loader = DataLoader(self.dataset,
                              batch_size=1,
                              shuffle=False,
                              num_workers=1)
     print("dataloader", len(self.loader))
Esempio n. 2
0
def train_defense(train_data_list, test_data_list, batch_size =512, n_epoch = 50, lr=0.01, L1_regularization = 0, device="cuda:0", save_name = None):

    model = CountsNet().to(device)
    criterion = nn.BCELoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

    train_dataset = JSDataset(X=train_data_list[0], y=train_data_list[1], length=train_data_list[2], ID=train_data_list[3])
    test_dataset = JSDataset(X=test_data_list[0], y=test_data_list[1], length=test_data_list[2], ID=test_data_list[3])
    train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True, num_workers = 16)
    test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False, num_workers = 16)

    #total_loss, total_acc, best_acc = 0, 0, 0
    train_acc_list = []
    train_loss_list = []
    train_F1_list = []
    test_acc_list = []
    test_loss_list = []
    test_F1_list = []

    for epoch in range(n_epoch):
        train_total_loss, train_total_acc = 0, 0
        model.train()
        train_pred_epoch = []
        train_ture_epoch = []
        test_pred_epoch = []
        test_ture_epoch = []

        for i, (inputs, labels,_,_) in enumerate(train_loader):
            print(f"process {i+1} / {len(train_loader)}", end="\r")
            inputs = inputs.to(device, dtype=torch.float)
            labels = labels.to(device, dtype=torch.float)
            optimizer.zero_grad()
            outputs = model(inputs)
            outputs = outputs.squeeze()
            
            
            if L1_regularization > 0:
                regularization_loss = 0
                for param in model.parameters():
                    regularization_loss += torch.sum(torch.abs(param))
                loss = criterion(outputs, labels) + L1_regularization * regularization_loss
            else:
                loss = criterion(outputs, labels)
                
            loss.backward() 
            optimizer.step() 
            pred = evalution_outputs_transform(outputs)
            train_pred_epoch += list(pred)
            train_ture_epoch += list(labels)
            train_total_loss += loss.item()

        train_pred_epoch = np.array(train_pred_epoch)
        train_ture_epoch = np.array(train_ture_epoch)
        train_TP = sum((train_pred_epoch == 1) & (train_ture_epoch == 1))
        train_TN = sum((train_pred_epoch == 0) & (train_ture_epoch == 0))
        train_FP = sum((train_pred_epoch == 1) & (train_ture_epoch == 0))
        train_FN = sum((train_pred_epoch == 0) & (train_ture_epoch == 1))
        train_ACC = (train_TP + train_TN)/(train_TP+train_TN+train_FP+train_FN)
        train_precision = (train_TP)/(train_TP+train_FP+0.0000000001)
        train_recall = train_TP/(train_TP+train_FN+0.0000000001)
        train_F1 = 2/(1/train_precision + 1/train_recall)

        train_acc_list.append(train_ACC)
        train_loss_list.append(train_total_loss/len(train_loader))
        train_F1_list.append(train_F1)


        model.eval()
        with torch.no_grad():
            test_total_loss, test_total_acc = 0, 0
            for i, (inputs, labels,_,_) in enumerate(test_loader):
                inputs = inputs.to(device, dtype=torch.float)
                labels = labels.to(device, dtype=torch.float)
                outputs = model(inputs)
                #outputs = torch.clamp(outputs, 0, 1)
                outputs[outputs != outputs] = 0
                outputs = outputs.squeeze()
                
                if L1_regularization > 0:
                    regularization_loss = 0
                    for param in model.parameters():
                        regularization_loss += torch.sum(torch.abs(param))
                    loss = criterion(outputs, labels) + L1_regularization * regularization_loss
                else:
                    loss = criterion(outputs, labels)
                
                pred = evalution_outputs_transform(outputs)
                test_pred_epoch += list(pred)
                test_ture_epoch += list(labels)
                test_total_loss += loss.item()
            test_pred_epoch = np.array(test_pred_epoch)
            test_ture_epoch = np.array(test_ture_epoch)
            test_TP = sum((test_pred_epoch == 1) & (test_ture_epoch == 1))
            test_TN = sum((test_pred_epoch == 0) & (test_ture_epoch == 0))
            test_FP = sum((test_pred_epoch == 1) & (test_ture_epoch == 0))
            test_FN = sum((test_pred_epoch == 0) & (test_ture_epoch == 1))
            test_ACC = (test_TP + test_TN)/(test_TP+test_TN+test_FP+test_FN)
            test_precision = (test_TP)/(test_TP+test_FP+0.0000000001)
            test_recall = test_TP/(test_TP+test_FN+0.0000000001)
            test_F1 = 2/(1/test_precision + 1/test_recall)

            test_acc_list.append(test_ACC)
            test_loss_list.append(test_total_loss/len(test_loader))
            test_F1_list.append(test_F1)
            print(f'Epoch {epoch+1} || Train | Loss:{train_total_loss/len(train_loader):.5f} Acc: {train_ACC*100:.3f} F1: {train_F1:.3f} || Valid | Loss:{test_total_loss/len(test_loader):.5f} Acc: {test_ACC*100:.3f} F1: {test_F1:.3f}')
    if save_name != None:
        torch.save(model.state_dict(), "./Model/"+save_name)
    train_test_plot([train_acc_list, test_acc_list], mode = "Acc", saving_name = save_name+"_ACC.png")
    train_test_plot([train_loss_list, test_loss_list], mode = "Loss", saving_name = save_name+"_LOSS.png")
    train_test_plot([train_F1_list, test_F1_list], mode = "F1", saving_name = save_name+"_F1.png")
Esempio n. 3
0
                clf_idx, self._get_acc(self.clf2score[clf_idx]))


from tqdm import tqdm
if not flag_train:
    num_clf = 10
    num_sample = -1
    clf_results = ClassificationResult()
    ctx = mx.gpu()
    clf_dict = {}
    for k in range(num_clf):
        clf_dict[k] = CIFARNET_QUICK(class_num=config.class_num, ctx=ctx)
        net_path = 'bagging_%d.params' % k
        clf_dict[k].load_parameters(net_path, ctx=ctx)
    ds_test = JSDataset(config.test_root,
                        fortrain=False,
                        crop_hw=(config.height, config.width),
                        max_sample_num=num_sample)
    data_iter = gluon.data.DataLoader(ds_test,
                                      batch_size=1,
                                      shuffle=False,
                                      last_batch="rollover")
    for batch in tqdm(data_iter):
        data, label = batch
        data = data.as_in_context(ctx)
        for clf_idx in clf_dict.keys():
            pred_score = clf_dict[clf_idx](data)
            clf_results.update(clf_idx, pred_score, label)
    for clf_idx in clf_dict.keys():
        print clf_results.get(clf_idx)
    print clf_results.get(-1)
Esempio n. 4
0
    print(f"Training data: {X_train.shape}")
    print(f"Testing data: {X_test.shape}")
    scaledata = ScaleData(X_train)
    X_train_scale = scaledata.fit(X_train, mode="standardization")
    X_test_scale = scaledata.fit(X_test, mode="standardization")
    device = torch.device("cpu")  #"cuda:0" if torch.cuda.is_available() else
    batch_size = 512
    n_epoch = 50
    lr = 0.01

    model = CountsNet().to(device)
    criterion = nn.BCELoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

    train_dataset = JSDataset(X=X_train_scale,
                              y=y_train,
                              length=L_train,
                              ID=ID_train)
    test_dataset = JSDataset(X=X_test_scale,
                             y=y_test,
                             length=L_test,
                             ID=ID_test)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=16)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=16)

    total_loss, total_acc, best_acc = 0, 0, 0
Esempio n. 5
0
File: train.py Progetto: zfxu/tests
def get_classifer(max_sample_num):
    ctx = mx.gpu()
    crop_hw = (config.height, config.width)

    ds_train = JSDataset(config.train_root, fortrain=True, crop_hw = crop_hw,max_sample_num = max_sample_num)
    ds_test = JSDataset(config.test_root, fortrain=False, crop_hw=crop_hw, max_sample_num = max_sample_num)

    trainiter = gluon.data.DataLoader(ds_train,batch_size=config.batch_size, shuffle=True,last_batch="rollover")
    testiter = gluon.data.DataLoader(ds_test,batch_size=config.batch_size, shuffle=False,last_batch="rollover")

    logging.info("train num: {} test num: {}".format(len(ds_train), len(ds_test)))

    max_update = config.max_epoch * len(ds_train) // config.batch_size
    lr_sch = mx.lr_scheduler.PolyScheduler(max_update=max_update,base_lr=config.base_lr,pwr=1)

    net = CIFARNET_QUICK(class_num = config.class_num, ctx=ctx)

    if not (config.pretrained_model is None) and not (config.pretrained_model == ""):
        net.load_params(config.pretrained_model,ctx = ctx)
        logging.info("loading model {}".format(config.pretrained_model))

    trainer = mx.gluon.Trainer(net.collect_params(),optimizer=config.optimizer,optimizer_params={"learning_rate":config.base_lr})


    loss_ce = mx.gluon.loss.SoftmaxCrossEntropyLoss()

    class ACC_SHOW(object):
        def __init__(self,label_num):
            self.label_num = label_num
            self.axis = 1
            self.acc = {'total':0,'hit':0}
            self.acc_per_class = OrderedDict()
            for key in range(label_num):
                self.acc_per_class[key] = {'total':0,'hit':0}
            return
        def reset(self):
            self.acc = {'total':0,'hit':0}
            self.acc_per_class = OrderedDict()
            for key in range(self.label_num):
                self.acc_per_class[key] = {'total':0,'hit':0}
            return

        def update(self,preds, labels):
            if isinstance(preds[0],mx.nd.NDArray):
                preds = map(lambda pred: pred.asnumpy(),preds)
                labels = map(lambda label: label.asnumpy(),labels)
            for pred, label in zip(preds,labels):
                pred_label = np.argmax(pred,axis=self.axis)
                label = label.flatten().astype('int32')
                pred_label = pred_label.flatten().astype('int32')
                for p,l in zip(pred_label,label):
                    self.acc_per_class[l]['total'] += 1
                    self.acc['total'] += 1
                    if l == p:
                        self.acc_per_class[l]['hit'] += 1
                        self.acc['hit'] += 1
            return

        def _calc_acc(self,md):
            total = md['total']
            hit = md['hit']
            if total < 1:
                return 0
            return float(hit) / total

        def get_acc(self):
            return self._calc_acc(self.acc)

        def get(self):
            infos = ['acc {:.5} acc_per_class'.format( self._calc_acc(self.acc) )]
            for key in self.acc_per_class.keys():
                #print self.acc_per_class[key]
                infos.append('{:.3}'.format(self._calc_acc(self.acc_per_class[key])))
            return ' '.join(infos)

    class LOSS_SHOW(object):
        def __init__(self):
            self.loss_list = []

        def reset(self):
            self.loss_list = []

        def update(self, loss_list):
            if isinstance(loss_list[0],mx.nd.NDArray):
                loss_list = map(lambda loss: loss.asnumpy(), loss_list)
            loss = np.vstack(loss_list)
            #print loss.tolist()[0]
            self.loss_list.extend(loss.tolist()[0])

        def get(self):
            return "loss {:.5}".format( np.asarray(self.loss_list).mean()  )
    import pdb
    def show_gradient(net):
        return
        grads_list = []
        for block in net.layers:
            if not isinstance(block, CIFARNET_BLOCK) and not isinstance(block, CIFARNET_BLOCK_A):
                continue
            for layer in block.layers:
                if not isinstance(layer, gluon.nn.Conv2D):
                    continue
                grads = layer.weight.grad().as_in_context(mx.cpu()).asnumpy()
                grads_list.append(grads.mean())
                grads_list.append(grads.max())
                grads_list.append(grads.min())
        line = ['grads: ']
        for grads in grads_list:
            line.append( '%.6f'%grads )
        logging.info(','.join(line))
        return


    class TopAcc:
        def __init__(self):
            self.path = ""
            self.score = 0
        def update(self, path, score):
            if self.score < score:
                self.score = score
                self.path = path
            return
        def get_top(self):
            return self.path,self.score

    top_acc = TopAcc()

    loss_show = LOSS_SHOW()
    acc = ACC_SHOW( config.class_num )
    display_iter = len(ds_train) // (2 * config.batch_size )
    if display_iter < 1:
        display_iter = 1
    update = 0
    for epoch in range(config.max_epoch):
        acc.reset()
        loss_show.reset()
        for batch in trainiter:
            update += 1
            data, label = batch
            data_list, label_list = utils.split_and_load(data,ctx_list=[ctx]), utils.split_and_load(label,ctx_list=[ctx])
            with mx.autograd.record():
                pred_list = map(lambda data: net(data), data_list)
                loss_list = map(lambda (pred,label): loss_ce(pred,label), zip(pred_list,label_list))
            for loss in loss_list:
                loss.backward()
            trainer.step(config.batch_size)
            mx.nd.waitall()
            acc.update(labels = label_list,preds = pred_list)
            loss_show.update(loss_list)
            if 0 == (update % display_iter):
                logging.info("train update {} lr {} {} {}".format(update,trainer.learning_rate,loss_show.get(), acc.get()))
            trainer.set_learning_rate(lr_sch(update))
        acc.reset()
        show_gradient(net)
        loss_show.reset()
        for (data,label) in testiter:
            data_list,label_list = utils.split_and_load(data,[ctx]),utils.split_and_load(label, [ctx])
            pred_list = map(lambda data : net(data), data_list)
            loss_list = map(lambda (pred,label): loss_ce(pred,label), zip(pred_list,label_list))
            mx.nd.waitall()
            acc.update(labels = label_list, preds = pred_list)
            loss_show.update(loss_list)
        logging.info("test update {} epoch {} {} {}".format(update,epoch,loss_show.get(), acc.get()))
        if epoch % config.save_epoch_step == 0:
            net.save_params(config.model_path(epoch))
            top_acc.update( config.model_path(epoch), acc.get_acc() )
    net.save_params(config.model_path("last"))
    return top_acc.get_top()
Esempio n. 6
0
def testing_model(model_path, device="cpu", mode="good"):
    with open('./good_counts_data.pkl', 'rb') as f:
        good_counts, good_data_idx, good_data_length = pickle.load(f)
    with open('./mal_counts_data.pkl', 'rb') as f:
        mal_counts, mal_data_idx, mal_data_length = pickle.load(f)
    if mode == "good":
        all_JS_data = good_counts
        all_JS_label = np.zeros(len(good_counts))
        all_JS_length = good_data_length
        all_JS_index = good_data_idx
    elif mode == "mal":
        all_JS_data = mal_counts
        all_JS_label = np.ones(len(mal_counts))
        all_JS_length = mal_data_length
        all_JS_index = mal_data_idx
    elif mode == "all":
        all_JS_data = np.vstack((good_counts, mal_counts))
        all_JS_label = np.hstack(
            (np.zeros(len(good_counts)), np.ones(len(mal_counts))))
        all_JS_length = np.hstack((good_data_length, mal_data_length))
        all_JS_index = np.hstack((good_data_idx, mal_data_idx))
    else:
        return

    test_dataset = JSDataset(X=all_JS_data,
                             y=all_JS_label,
                             length=all_JS_length,
                             ID=all_JS_index)
    test_loader = DataLoader(test_dataset,
                             batch_size=64,
                             shuffle=False,
                             num_workers=16)

    model = CountsNet().to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    with torch.no_grad():
        test_pred_epoch = []
        test_ture_epoch = []
        test_total_acc = 0
        for i, (inputs, labels, _, _) in enumerate(test_loader):
            inputs = inputs.to(device, dtype=torch.float)
            labels = labels.to(device, dtype=torch.float)
            outputs = model(inputs)
            #outputs[outputs != outputs] = 0
            outputs = outputs.squeeze()

            pred = evalution_outputs_transform(outputs)
            test_pred_epoch += list(pred)
            test_ture_epoch += list(labels)
        test_pred_epoch = np.array(test_pred_epoch)
        test_ture_epoch = np.array(test_ture_epoch)
        test_TP = sum((test_pred_epoch == 1) & (test_ture_epoch == 1))
        test_TN = sum((test_pred_epoch == 0) & (test_ture_epoch == 0))
        test_FP = sum((test_pred_epoch == 1) & (test_ture_epoch == 0))
        test_FN = sum((test_pred_epoch == 0) & (test_ture_epoch == 1))
        print(test_TP, test_TN, test_FP, test_FN)
        test_ACC = (test_TP + test_TN) / (test_TP + test_TN + test_FP +
                                          test_FN)
        test_precision = (test_TP) / (test_TP + test_FP + 0.0000000001)
        test_recall = test_TP / (test_TP + test_FN + 0.0000000001)
        test_F1 = 2 / (1 / test_precision + 1 / test_recall)

        print(f'{mode} Acc: {test_ACC*100:.3f} F1: {test_F1:.3f}')
    return test_ACC, test_F1
Esempio n. 7
0
def attack_model(model_path, attacker_path, device="cpu", adv_training=False):
    with open(attacker_path, 'rb') as f:
        adv_examples_data = pickle.load(f)
    adv_status_all = []
    adv_ori_data_all = []
    adv_adv_data_all = []
    adv_label_all = []
    adv_length_all = []
    adv_ID_all = []
    adv_mal_all = []
    if adv_training:
        adv_examples_data = adv_examples_data[8000:]
    for data in adv_examples_data:
        if data["status"] == "success":
            adv_status_all.append(data["status"])
            adv_ori_data_all.append(data["data"])
            adv_adv_data_all.append(data["adv_data"])
            adv_label_all.append(int(data["label"][0]))
            adv_length_all.append(int(data["length"][0]))
            adv_ID_all.append(data["ID"][0])
            adv_mal_all.append(data["adv_data"])
        else:
            adv_status_all.append(data["status"])
            adv_ori_data_all.append(data["data"])
            adv_adv_data_all.append(data["adv_data"])
            adv_label_all.append(int(data["label"][0]))
            adv_length_all.append(int(data["length"][0]))
            adv_ID_all.append(data["ID"][0])
            adv_mal_all.append(data["data"])

    test_dataset = JSDataset(X=adv_mal_all,
                             y=adv_label_all,
                             length=adv_length_all,
                             ID=adv_ID_all)
    test_loader = DataLoader(test_dataset,
                             batch_size=64,
                             shuffle=False,
                             num_workers=16)

    ori_dataset = JSDataset(X=adv_ori_data_all,
                            y=adv_label_all,
                            length=adv_length_all,
                            ID=adv_ID_all)
    ori_loader = DataLoader(ori_dataset,
                            batch_size=64,
                            shuffle=False,
                            num_workers=16)

    model = CountsNet().to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    with torch.no_grad():
        test_pred_epoch = []
        test_ture_epoch = []
        test_total_acc = 0
        for i, (inputs, labels, _, _) in enumerate(test_loader):
            inputs = inputs.to(device, dtype=torch.float)
            labels = labels.to(device, dtype=torch.float)
            outputs = model(inputs)
            outputs[outputs != outputs] = 0
            outputs = outputs.squeeze()

            pred = evalution_outputs_transform(outputs)
            test_pred_epoch += list(pred)
            test_ture_epoch += list(labels)
        test_pred_epoch = np.array(test_pred_epoch)
        test_ture_epoch = np.array(test_ture_epoch)
        test_TP = sum((test_pred_epoch == 1) & (test_ture_epoch == 1))
        test_TN = sum((test_pred_epoch == 0) & (test_ture_epoch == 0))
        test_FP = sum((test_pred_epoch == 1) & (test_ture_epoch == 0))
        test_FN = sum((test_pred_epoch == 0) & (test_ture_epoch == 1))
        test_ACC = (test_TP + test_TN) / (test_TP + test_TN + test_FP +
                                          test_FN)
        test_precision = (test_TP) / (test_TP + test_FP + 0.0000000001)
        test_recall = test_TP / (test_TP + test_FN + 0.0000000001)
        test_F1 = 2 / (1 / test_precision + 1 / test_recall)
        #####################################################
        ori_pred_epoch = []
        ori_ture_epoch = []
        ori_total_acc = 0
        for i, (inputs, labels, _, _) in enumerate(ori_loader):
            inputs = inputs.to(device, dtype=torch.float)
            labels = labels.to(device, dtype=torch.float)
            outputs = model(inputs)
            outputs[outputs != outputs] = 0
            outputs = outputs.squeeze()

            pred = evalution_outputs_transform(outputs)
            ori_pred_epoch += list(pred)
            ori_ture_epoch += list(labels)
        ori_pred_epoch = np.array(ori_pred_epoch)
        ori_ture_epoch = np.array(ori_ture_epoch)
        ori_TP = sum((ori_pred_epoch == 1) & (ori_ture_epoch == 1))
        ori_TN = sum((ori_pred_epoch == 0) & (ori_ture_epoch == 0))
        ori_FP = sum((ori_pred_epoch == 1) & (ori_ture_epoch == 0))
        ori_FN = sum((ori_pred_epoch == 0) & (ori_ture_epoch == 1))
        ori_ACC = (ori_TP + ori_TN) / (ori_TP + ori_TN + ori_FP + ori_FN)
        ori_precision = (ori_TP) / (ori_TP + ori_FP + 0.0000000001)
        ori_recall = ori_TP / (ori_TP + ori_FN + 0.0000000001)
        ori_F1 = 2 / (1 / ori_precision + 1 / ori_recall)

        print(
            f'Ori | Acc: {ori_ACC*100:.3f} F1: {ori_F1:.3f}|| Adv Acc: {test_ACC*100:.3f} F1: {test_F1:.3f}'
        )
    return ori_ACC, ori_F1, test_ACC, test_F1