def roni(currentModel, sgdUpdate, previousValError, iterationNum, val_dataset): updatedModel = currentModel + sgdUpdate tempModel = emotionModel.EmoGRU(vocab_inp_size, embedding_dim, units, BATCH_SIZE, target_size) tempModel.to(device) layer = 0 for name, param in tempModel.named_parameters(): if param.requires_grad: param.data = torch.tensor(updatedModel[layer]) layer += 1 val_accuracy = 0 batchNum = 0 for (batch, (inp, targ, lens)) in enumerate(val_dataset): predictions, _ = tempModel(inp.permute(1, 0).to(device), lens, device) batch_accuracy = emotionModel.accuracy(targ.to(device), predictions) val_accuracy += batch_accuracy batchNum += 1 val_accuracy = (val_accuracy / batchNum) # print('{:.4f},{:.4f}'.format(val_accuracy, previousValError)) if iterationNum < 0: if (previousValError - val_accuracy) <= RONI_THRESH: return True else: return False else: if (val_accuracy - previousValError) >= RONIThresh[iterationNum]: return True else: return False
def main(): print("Loading data from '%s'" % opt.data) dataset = torch.load(opt.data) dict_checkpoint = opt.train_from if opt.train_from else opt.train_from_state_dict if dict_checkpoint: print('Loading dicts from checkpoint at %s' % dict_checkpoint) checkpoint = torch.load(dict_checkpoint) dataset['dicts'] = checkpoint['dicts'] trainData = onmt.Dataset(dataset['train']['src'], dataset['train']['tgt'], opt.batch_size, opt.gpus) validData = onmt.Dataset(dataset['valid']['src'], dataset['valid']['tgt'], opt.batch_size, opt.gpus, volatile=True) dicts = dataset['dicts'] print(' * vocabulary size. source = %d; target = %d' % (dicts['src'].size(), dicts['tgt'].size())) print(' * number of training sentences. %d' % len(dataset['train']['src'])) print(' * maximum batch size. %d' % opt.batch_size) print('Loading Encoder Model ...') enc_check = torch.load(opt.encoder_model, map_location=lambda storage, loc: storage) m_opt = enc_check['opt'] src_dict = enc_check['dicts']['src'] encoder = onmt.Models.Encoder(m_opt, src_dict) encoder.load_state_dict(enc_check['encoder']) print('Loading CNN Classifier Model ...') class_check = torch.load(opt.classifier_model, map_location=lambda storage, loc: storage) class_opt = class_check['opt'] class_dict = class_check['vocabulary'] class_model = emoModel.EmoGRU(class_opt["vocab_inp_size"], class_opt["embedding_dim"], class_opt["units"], opt.batch_size, class_opt["target_size"]) # class_model = onmt.CNNModels.ConvNet(class_opt, class_dict) class_model.load_state_dict(class_check['model']) print('Building model...') decoder = onmt.Models_decoder.Decoder(opt, dicts['tgt']) generator = nn.Sequential(nn.Linear(opt.rnn_size, dicts['tgt'].size()), nn.LogSoftmax()) class_input = nn.Sequential(nn.Linear(opt.rnn_size, class_dict.size())) if opt.train_from: print('Loading model from checkpoint at %s' % opt.train_from) chk_model = checkpoint['model'] generator_state_dict = chk_model.generator.state_dict() model_state_dict = { k: v for k, v in chk_model.state_dict().items() if 'generator' not in k } model.load_state_dict(model_state_dict) generator.load_state_dict(generator_state_dict) opt.start_epoch = checkpoint['epoch'] + 1 if opt.train_from_state_dict: print('Loading model from checkpoint at %s' % opt.train_from_state_dict) decoder.load_state_dict(checkpoint['decoder']) generator.load_state_dict(checkpoint['generator']) opt.start_epoch = checkpoint['epoch'] + 1 model = onmt.Models_decoder.DecoderModel(decoder) if len(opt.gpus) >= 1: encoder.cuda() model.cuda() class_model.cuda() generator.cuda() class_input.cuda() else: encoder.cpu() model.cpu() class_model.cpu() generator.cpu() class_input.cpu() if len(opt.gpus) > 1: encoder = nn.DataParallel(encoder, device_ids=opt.gpus, dim=1) model = nn.DataParallel(model, device_ids=opt.gpus, dim=1) generator = nn.DataParallel(generator, device_ids=opt.gpus, dim=0) class_input = nn.DataParallel(class_input, device_ids=opt.gpus, dim=0) if not opt.train_from_state_dict and not opt.train_from: for p in model.parameters(): p.data.uniform_(-opt.param_init, opt.param_init) decoder.load_pretrained_vectors(opt) optim = onmt.Optim(opt.optim, opt.learning_rate, opt.max_grad_norm, lr_decay=opt.learning_rate_decay, start_decay_at=opt.start_decay_at) else: print('Loading optimizer from checkpoint:') optim = checkpoint['optim'] print(optim) optim.set_parameters(model.parameters()) model.encoder = encoder model.generator = generator model.class_input = class_input model.class_model = class_model if opt.train_from or opt.train_from_state_dict: optim.optimizer.load_state_dict( checkpoint['optim'].optimizer.state_dict()) nParams = sum([p.nelement() for p in model.parameters()]) print('* number of parameters: %d' % nParams) trainModel(model, trainData, validData, dataset, optim)
test_dataset = DataLoader(test_dataset, batch_size=BATCH_SIZE, drop_last=False, shuffle=False) # print(val_dataset.batch_size) # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # torch.cuda.set_device(0) class_check = torch.load(classifier_model, map_location=lambda storage, loc: storage) class_opt = class_check['opt'] class_dict = class_check['vocabulary'] model = emoModel.EmoGRU(vocab_inp_size, embedding_dim, units, BATCH_SIZE, target_size) model = emoModel.EmoGRU(class_opt["vocab_inp_size"], class_opt["embedding_dim"], class_opt["units"], BATCH_SIZE, class_opt["target_size"]) model.load_state_dict(class_check['model']) model.eval() # model.to(device) # # obtain one sample from the data iterator # it = iter(train_dataset) # x, y, x_len = next(it) # # sort the batch first to be able to use with pac_pack sequence # xs, ys, lens = util.sort_batch(x, y, x_len)
def main(): globalVocab = constructGlobalVocab() validationFileName = 'val.tsv' input_tensor_val, target_tensor_val = preprocessData(validationFileName, globalVocab, iid=iid) # Creating training and validation sets using an 80-20 split beforeInputSet, calibrateInputSet, beforeTargetSet, calibrateTargetSet = train_test_split(input_tensor_val, target_tensor_val, test_size=calibrationSetSize) trainSets, quizSets = getTrainQuizSets(beforeInputSet, beforeTargetSet, TRAIN_QUIZ_PAIRS) calibrateDataset = dataLoadUtils.MyData(calibrateInputSet, calibrateTargetSet) calibrateDataset = DataLoader(calibrateDataset, batch_size = BATCH_SIZE, drop_last=True,shuffle=True) trainingIterations = 200 vocab_inp_size = len(globalVocab.word2idx) embedding_dim = 256 units = 1024 target_size = num_emotions models = [] optimizers = [] criterion = nn.CrossEntropyLoss() # the same as log_softmax + NLLLoss for idx in xrange(0,TRAIN_QUIZ_PAIRS): use_cuda = True if torch.cuda.is_available() else False device = torch.device("cuda" if use_cuda else "cpu") model = emotionModel.EmoGRU(vocab_inp_size, embedding_dim, units, BATCH_SIZE, target_size) model.to(device) models.append(model) optimizer = torch.optim.Adam(models[idx].parameters()) optimizers.append(optimizer) VAL_BUFFER_SIZE = len(input_tensor_val) VAL_N_BATCH = VAL_BUFFER_SIZE // BATCH_SIZE print(len(calibrateDataset)) diffAccuracies = np.zeros([trainingIterations, TRAIN_QUIZ_PAIRS, len(calibrateDataset)]) print(diffAccuracies.shape) trainIterator = [(iter(dataset)) for dataset in trainSets] for iteration in xrange(0,trainingIterations): # for quizSet in quizSets: # Measure current validation accuracy # prev_val_accuracy = 0 # numBatches = 0 # for (batch, (inp, targ, lens)) in enumerate(quizSets[0]): # predictions,_ = model(inp.permute(1, 0).to(device), lens, device) # batch_accuracy = emotionModel.accuracy(targ.to(device), predictions) # prev_val_accuracy += batch_accuracy # numBatches += 1 for setIdx in xrange(0, TRAIN_QUIZ_PAIRS): prev_val_accuracy = measureAccuracy(models[setIdx],quizSets[setIdx], device) print("Model %d accuracy at iteration %d: %d" % (setIdx, iteration,prev_val_accuracy)) batchCount = 0 currentState = deepcopy(models[setIdx].state_dict()) for (batch, (inp, targ, lens)) in enumerate(calibrateDataset): loss = 0 predictions, _ = models[setIdx](inp.permute(1 ,0).to(device), lens, device) # TODO:don't need loss += emotionModel.loss_function(targ.to(device), predictions) batch_loss = (loss / int(targ.shape[1])) optimizers[setIdx].zero_grad() loss.backward() optimizers[setIdx].step() # print("Printing after grad") # for name,param in model.named_parameters(): # print(param.data) # break newAccuracy = measureAccuracy(model, quizSets[setIdx], device) # print("Model %d accuracy at iteration %d: %d" % (setIdx, iteration,prev_val_accuracy)) print("Accuracy after calibration batch %d: %d" % (batchCount,newAccuracy)) diff = newAccuracy - prev_val_accuracy diffAccuracies[iteration,setIdx, batchCount] = diff # print("Printing after undoing grad") models[setIdx].load_state_dict(currentState) # for name,param in model.named_parameters(): # print(param.data) # break batchCount = batchCount + 1 # print(diffAccuracies) nextBatch = next(trainIterator[setIdx]) inp = nextBatch[0] targ = nextBatch[1] lens = nextBatch[2] loss = 0 predictions, _ = models[setIdx](inp.permute(1 ,0).to(device), lens, device) # TODO:don't need loss += emotionModel.loss_function(targ.to(device), predictions) batch_loss = (loss / int(targ.shape[1])) optimizers[setIdx].zero_grad() loss.backward() optimizers[setIdx].step() # # print(model.parameters()) # # sys.exit(0) # # model = torch.load(PATH) # # getData # # getGrad(model) # # optimizer.step() print(diffAccuracies) iterThreshs = [] for iteration in range(0,trainingIterations): iterationDiffs = diffAccuracies[iteration, :, :] print(iterationDiffs.shape) iterationDiffsAvg = np.mean(iterationDiffs, axis=0) iterThresh = iterationDiffsAvg.mean() - 3 * iterationDiffsAvg.std() iterThreshs.append(iterThresh) print(iterThreshs)
train_dataset = DataLoader(train_dataset, batch_size=BATCH_SIZE, drop_last=True, shuffle=True) test_dataset = DataLoader(test_dataset, batch_size=BATCH_SIZE, drop_last=True, shuffle=True) # print(val_dataset.batch_size) # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") torch.cuda.set_device(0) model = emoModel.EmoGRU(vocab_inp_size, embedding_dim, units, BATCH_SIZE, target_size) # model.to(device) # obtain one sample from the data iterator it = iter(train_dataset) x, y, x_len = next(it) # sort the batch first to be able to use with pac_pack sequence xs, ys, lens = util.sort_batch(x, y, x_len) print("Input size: ", xs.size()) output, _ = model(xs) print(output.size()) model = emoModel.EmoGRU(vocab_inp_size, embedding_dim, units, BATCH_SIZE,
def main(): globalVocab = constructGlobalVocab() input_tensor_clients = [] target_tensor_clients = [] for x in range(0, numClients): trainFileName = '' if x < (numClients - maliciousClients): trainFileName = 'train_' + str(x) + '.tsv' else: trainFileName = 'train_' + str(x) + '_corrupted.tsv' input_tensor_train, target_tensor_train = preprocessData(trainFileName, globalVocab, iid=iid) input_tensor_clients.append(input_tensor_train) target_tensor_clients.append(target_tensor_train) validationFileName = 'val.tsv' input_tensor_val, target_tensor_val = preprocessData(validationFileName, globalVocab, iid=iid) # validationFileName = 'val.tsv' input_tensor_attackRate, target_tensor_attackRate = preprocessData( validationFileName, globalVocab, True, iid=iid) testFileName = 'test.tsv' input_tensor_test, target_tensor_test = preprocessData(testFileName, globalVocab, iid=iid) TRAIN_BUFFER_SIZE = len(input_tensor_train) VAL_BUFFER_SIZE = len(input_tensor_val) ATTACKRATE_BUFFER_SIZE = len(input_tensor_attackRate) TEST_BUFFER_SIZE = len(input_tensor_test) BATCH_SIZE = 60 TRAIN_N_BATCH = TRAIN_BUFFER_SIZE // BATCH_SIZE VAL_N_BATCH = VAL_BUFFER_SIZE // BATCH_SIZE ATTACKRATE_N_BATCH = ATTACKRATE_BUFFER_SIZE // BATCH_SIZE TEST_N_BATCH = TEST_BUFFER_SIZE // BATCH_SIZE embedding_dim = 256 units = 1024 vocab_inp_size = len(globalVocab.word2idx) target_size = num_emotions train_dataset_clients = [] for client in range(0, numClients): train_dataset = dataLoadUtils.MyData(input_tensor_clients[client], target_tensor_clients[client]) train_dataset = DataLoader(train_dataset, batch_size=BATCH_SIZE, drop_last=True, shuffle=True) train_dataset_clients.append(train_dataset) val_dataset = dataLoadUtils.MyData(input_tensor_val, target_tensor_val) val_dataset = DataLoader(val_dataset, batch_size=BATCH_SIZE, drop_last=True, shuffle=True) attackRate_dataset = dataLoadUtils.MyData(input_tensor_attackRate, target_tensor_attackRate) attackRate_dataset = DataLoader(attackRate_dataset, batch_size=BATCH_SIZE, drop_last=True, shuffle=True) test_dataset = dataLoadUtils.MyData(input_tensor_test, target_tensor_test) test_dataset = DataLoader(test_dataset, batch_size=BATCH_SIZE, drop_last=True, shuffle=True) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = emotionModel.EmoGRU(vocab_inp_size, embedding_dim, units, BATCH_SIZE, target_size) model.to(device) train_dataset = train_dataset_clients # print(len(train_dataset)) # # obtain one sample from the data iterator # it = iter(train_dataset) # x, y, x_len = next(it) # # sort the batch first to be able to use with pac_pack sequence # xs, ys, lens = emotionModel.sort_batch(x, y, x_len) # print("Input size: ", xs.size()) # output, _ = model(xs.to(device), lens, device) # print(output.size()) ### Enabling cuda use_cuda = True if torch.cuda.is_available() else False device = torch.device("cuda" if use_cuda else "cpu") model = emotionModel.EmoGRU(vocab_inp_size, embedding_dim, units, BATCH_SIZE, target_size) model.to(device) ### loss criterion and optimizer for training criterion = nn.CrossEntropyLoss() # the same as log_softmax + NLLLoss # optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.75, weight_decay=0.001) optimizer = torch.optim.Adam(model.parameters()) total_loss = 0 train_accuracy, val_accuracy = 0, 0 # print(train_dataset) numIterations = 200 datasetIter = [(iter(dataset)) for dataset in train_dataset] for iteration in range(0, numIterations): nextBatches = [next(iterator) for iterator in datasetIter] # print(iteration) aggregatedGradients = np.zeros(0) total_loss = 0 prev_validation_accuracy = 0 + val_accuracy train_accuracy, val_accuracy, attackRate_accuracy = 0, 0, 0 randomPeers = 0 peerList = [] for verifier in range(numVerifiers): peerList.append(random.randint(0, numClients)) for nextBatch in nextBatches: inp = nextBatch[0] # print(inp) targ = nextBatch[1] # print(targ) lens = nextBatch[2] loss = 0 predictions, _ = model(inp.permute(1, 0).to(device), lens, device) # TODO:don't need loss += emotionModel.loss_function(targ.to(device), predictions) batch_loss = (loss / int(targ.shape[1])) total_loss += batch_loss optimizer.zero_grad() loss.backward() modelLayers = getCurrentModel(model) layers = [] for name, param in model.named_parameters(): if param.requires_grad: layers.append(param.grad) goodUpdate = roni(modelLayers, layers, prev_validation_accuracy, iteration, val_dataset) print(goodUpdate) # sys.exit() # goodUpdate = roni(modelLayers, layers, prev_validation_accuracy) # print(goodUpdate) # return layers if goodUpdate and roni: if len(aggregatedGradients) == 0: aggregatedGradients = layers else: for layerIdx in range(0, len(aggregatedGradients)): aggregatedGradients[layerIdx] = aggregatedGradients[ layerIdx] + layers[layerIdx] # break if (len(aggregatedGradients) == 0): continue layer = 0 for name, param in model.named_parameters(): if param.requires_grad: param.grad = aggregatedGradients[layer] layer += 1 optimizer.step() for (batch, (inp, targ, lens)) in enumerate(val_dataset): predictions, _ = model(inp.permute(1, 0).to(device), lens, device) batch_accuracy = emotionModel.accuracy(targ.to(device), predictions) val_accuracy += batch_accuracy if ((val_accuracy / VAL_N_BATCH) > 90): print(iteration) break val_accuracy = (val_accuracy / VAL_N_BATCH) # print(' Val Acc. {:.4f}'.format(val_accuracy / VAL_N_BATCH)) # break for (batch, (inp, targ, lens)) in enumerate(attackRate_dataset): predictions, _ = model(inp.permute(1, 0).to(device), lens, device) batch_accuracy = emotionModel.accuracy(targ.to(device), predictions) attackRate_accuracy += batch_accuracy # print(' Attack Rate. {:.4f}'.format(attackRate_accuracy / ATTACKRATE_N_BATCH)) attackRate_accuracy = attackRate_accuracy / ATTACKRATE_N_BATCH print('{},{:.4f},{:.4f}'.format(iteration, val_accuracy, attackRate_accuracy))
def roniPeerToPeer(currentModel, sgdUpdate, peerIndices): tempModel = emotionModel.EmoGRU(vocab_inp_size, embedding_dim, units, BATCH_SIZE, target_size) tempModel.to(device) # print(peerIndices) layer = 0 for name, param in tempModel.named_parameters(): if param.requires_grad: param.data = currentModel[layer] layer += 1 prev_accuracies = [] for peer in peerIndices: peerAccuracy = 0 batchCount = 0 for (batch, (inp, targ, lens)) in enumerate(train_dataset_clients[peer]): predictions, _ = tempModel( inp.permute(1, 0).to(device), lens, device) batch_accuracy = emotionModel.accuracy(targ.to(device), predictions) # print(batch_accuracy) peerAccuracy += batch_accuracy batchCount += 1 break peerAccuracy = (peerAccuracy / batchCount) # print(peerAccuracy) # print(batchCount) prev_accuracies.append(peerAccuracy) print(prev_accuracies) # sys.exit() updatedModel = currentModel + sgdUpdate layer = 0 for name, param in tempModel.named_parameters(): if param.requires_grad: param.data = updatedModel[layer] layer += 1 new_accuracies = [] for peer in peerIndices: peerAccuracy = 0 batchCount = 0 for (batch, (inp, targ, lens)) in enumerate(train_dataset_clients[peer]): predictions, _ = tempModel( inp.permute(1, 0).to(device), lens, device) batch_accuracy = emotionModel.accuracy(targ.to(device), predictions) peerAccuracy += batch_accuracy batchCount += 1 peerAccuracy = (peerAccuracy / batchCount) new_accuracies.append(peerAccuracy) print(new_accuracies) # sys.exit diffs = [prev - new for prev, new in zip(prev_accuracies, new_accuracies)] passes = sum(diff < RONI_THRESH for diff in diffs) if passes >= 2: return True else: return False