def ensemble(state, X_test, y_test, g): mod1 = Model().to(state['device']) mod2 = Model().to(state['device']) mod3 = Model().to(state['device']) mod4 = Model().to(state['device']) mod = Model().to(state['device']) mod1.load_state_dict(torch.load(state['path1'])['model_state_dict']) mod2.load_state_dict(torch.load(state['path2'])['model_state_dict']) mod3.load_state_dict(torch.load(state['path3'])['model_state_dict']) mod4.load_state_dict(torch.load(state['path4'])['model_state_dict']) for p, p1, p2, p3, p4 in zip(mod.parameters(), mod1.parameters(), mod2.parameters(), mod3.parameters(), mod4.parameters()): p.data.copy_( p1.data.mul(0.25).add(p2.data.mul(0.25)).add( p3.data.mul(0.25)).add(p4.data.mul(0.25))) mod.state_dict() acc = test_with_dropout(X_test, y_test, mod, state['device'], state['cuda']) path = g + str(state['itr']) + 'epoch.' + str(state['acq']) + 'acq.pth.tar' state['rep'] = path torch.save({'model_state_dict': mod.state_dict()}, state['rep']) return mod, acc
def ini_model_train(opt): X_ini, y_ini, X_test, y_test, X_train_All, y_train_All = ini_model(opt) mod = Model().to(device) optimizer = optim.SGD(mod.parameters(), lr=opt.ini_lr) criterion = nn.CrossEntropyLoss() num_batches_train = X_ini.shape[0] // opt.ini_batch_size mod.train() for i in range(opt.ini_epoch): loss = 0 for j in range(num_batches_train): slce = get_slice(j, opt.ini_batch_size) X_tra = torch.from_numpy(X_ini[slce]).float().to(device) Y_tra = torch.from_numpy(y_ini[slce]).long().to(device) optimizer.zero_grad() out = mod(X_tra) batch_loss = criterion(out, Y_tra) batch_loss.backward() optimizer.step() loss += batch_loss mod.eval() acc = test_without_dropout(X_test, y_test, mod, device) print('\n[{}/{} epoch], training loss:{:.4f}, test accuracy is:{} \n'. format(i, opt.ini_epoch, loss.item() / num_batches_train, acc)) if i + 1 == opt.ini_epoch: for d in range(opt.num_dev): torch.save( { 'epoch': i, 'model_state_dict': mod.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss.item() }, os.path.join(opt.ini_model_path, 'device' + str(d), "ini.model.pth.tar")) torch.save( { 'epoch': i, 'model_state_dict': mod.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss.item() }, opt.ini_model_path) return X_test, y_test, X_train_All, y_train_All
def en_ave(mod1, mod2, mod3, mod4, X_test, y_test, state): print('=> load average ensemble') mod = Model().to(device) for p, p1, p2, p3, p4 in zip(mod.parameters(), mod1.parameters(), mod2.parameters(), mod3.parameters(), mod4.parameters()): p.data.copy_( p1.data.mul(0.25).add(p2.data.mul(0.25)).add( p3.data.mul(0.25)).add(p4.data.mul(0.25))) acc = test_without_dropout(X_test, y_test, mod, device) path = os.path.join('exp', 'ensemble.') + str( state['itr']) + 'epoch.' + str(state['acq']) + 'acq.pth.tar' state['rep'] = path torch.save({'model_state_dict': mod.state_dict()}, state['rep']) return mod, acc
def ini_train(X_ini, y_ini, X_te, y_te, epochs, paths, device, batch_size, lr, momentum, arr_drop): mod = Model(arr_drop).to(device) optimizer = optim.SGD(mod.parameters(), lr=lr, momentum=momentum) criterion = nn.CrossEntropyLoss() #batch_size = 200 num_batches_train = X_ini.shape[0] // batch_size print("number of batch ", num_batches_train) mod.train() for i in range(epochs): loss = 0 for j in range(num_batches_train): slce = get_slice(j, batch_size) X_tra = torch.from_numpy(X_ini[slce]).float().to(device) Y_tra = torch.from_numpy(y_ini[slce]).long().to(device) optimizer.zero_grad() out = mod(X_tra) batch_loss = criterion(out, Y_tra) batch_loss.backward() optimizer.step() loss += batch_loss mod.eval() with torch.no_grad(): X_va = torch.from_numpy(X_te).float().to(device) Y_va = torch.from_numpy(y_te).long().to(device) output = mod(X_va) preds = torch.max(output, 1)[1] acc = accuracy_score(Y_va, preds) print('\n[{}/{} epoch], training loss:{:.4f}, test accuracy is:{} \n'. format(i, epochs, loss.item() / num_batches_train, acc)) if i + 1 == epochs: for path in paths: torch.save( { 'epoch': i, 'model_state_dict': mod.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss.item() }, os.path.join(path, "ini.model.pth.tar")) return mod
def train(): model = Model() model.to("cuda:0") Opt = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3) # checkpoint = torch.load("./model.pth") # model.load_state_dict(checkpoint["model"]) # Opt.load_state_dict(checkpoint["Opt"]) for i in range(10000): Opt.zero_grad() imgs, targets = read_batch() imgs = torch.tensor(imgs, dtype=torch.float32).to("cuda:0") targets = torch.tensor(targets, dtype=torch.float32).to("cuda:0") preds = model(imgs) loss = make_loss(preds, targets) loss.backward() Opt.step() if i % 10 == 0: print("Iteration: %d, Loss: %f"%(i, loss)) if i % 10 == 0: state = {'model':model.state_dict(), 'Opt':Opt.state_dict(), 'itr':i} torch.save(state, "./model.pth")
def model(dataset, model_name=None, device=None, train=True): """加载模型""" device = device or torch.device( "cuda" if torch.cuda.is_available() else "cpu") net = Model(vocab_size=dataset.vocab_size, embedding_dim=config.embedding_dim, output_size=dataset.target_vocab_size, encoder_hidden_size=config.encoder_hidden_size, decoder_hidden_size=config.decoder_hidden_size, encoder_layers=config.encoder_layers, decoder_layers=config.decoder_layers, dropout=config.dropout, embedding_weights=dataset.vector_weights, device=device) if model_name: # 如果指定了模型名称, 就加载对应的模型 pre_trained_state_dict = torch.load(FILE_PATH + config.model_path + model_name, map_location=device) state_dict = net.state_dict() state_dict.update(pre_trained_state_dict) net.load_state_dict(state_dict) net.train() if train else net.eval() return net
position_target = data['point_map'] img = img.to(device=device) position_target = position_target.to(device=device, dtype=torch.float32) pred = model(img) logits = pred['logits'] # loss calculation loss_pos_val = loss_position(logits, position_target) epoch_loss_val += loss_pos_val.item() * img.size(0) loss_val.append(epoch_loss_val / len(val_indices)) # print statistics print(f"epoch:[%.d] Validation loss: %.5f" % (epoch + 1, loss_val[-1])) # Save the model torch.save(model.state_dict(), 'stats/model_saved.pth') # Save latent space feature maps along with some log statistics latent = latent[1:, ...] # removes first, which was an torch.empty loss_train = np.array(loss_train) loss_val = np.array(loss_val) mdic = { 'latent': latent, 'loss_train': loss_train, 'loss_val': loss_val, 'batch_size': batch_size, 'validation_split': validation_split, 'dataset_size': dataset_size, 'random_seed': random_seed } savemat("stats/log.mat", mdic)
def random_run(acquisition_iterations, X_Pool, y_Pool, pool_subset, dropout_iterations, nb_classes, Queries, X_test, y_test, rep, X_old, y_old, device, itr, cuda, g): mod = Model().to(device) if cuda: cp = torch.load(rep) print("\n ********load gpu version******* \n") else: cp = torch.load(rep, map_location='cpu') mod.load_state_dict(cp['model_state_dict']) optimizer = optim.Adam(mod.parameters(), lr=0.001, weight_decay=0.5) #,weight_decay=0.5 #optimizer = optim.SGD(mod.parameters(), lr=0.001,weight_decay=0.5) optimizer.load_state_dict(cp['optimizer_state_dict']) criterion = nn.CrossEntropyLoss() X_train = np.empty([0, 1, 28, 28]) y_train = np.empty([ 0, ]) AA = [] losses_train = [] #acc = test(test_loader,mod,device,cuda) acc = test(X_test, y_test, mod, device, cuda) AA.append(acc) print('initial test accuracy: ', acc) for i in range(acquisition_iterations): pool_subset_dropout = np.asarray( random.sample(range(0, X_Pool.shape[0]), pool_subset)) X_Pool_Dropout = X_Pool[pool_subset_dropout, :, :, :] y_Pool_Dropout = y_Pool[pool_subset_dropout] x_pool_index = np.random.choice(X_Pool_Dropout.shape[0], Queries, replace=False) Pooled_X = X_Pool_Dropout[x_pool_index, :, :, :] Pooled_Y = y_Pool_Dropout[x_pool_index] delete_Pool_X = np.delete(X_Pool, (pool_subset_dropout), axis=0) delete_Pool_Y = np.delete(y_Pool, (pool_subset_dropout), axis=0) delete_Pool_X_Dropout = np.delete(X_Pool_Dropout, (x_pool_index), axis=0) delete_Pool_Y_Dropout = np.delete(y_Pool_Dropout, (x_pool_index), axis=0) X_Pool = np.concatenate((delete_Pool_X, delete_Pool_X_Dropout), axis=0) y_Pool = np.concatenate((delete_Pool_Y, delete_Pool_Y_Dropout), axis=0) print('updated pool size is ', X_Pool.shape[0]) X_train = np.concatenate((X_train, Pooled_X), axis=0) y_train = np.concatenate((y_train, Pooled_Y), axis=0) print('number of data points from pool', X_train.shape[0]) batch_size = 100 X = np.vstack((X_old, Pooled_X)) y = np.hstack((y_old, Pooled_Y)) X, y = shuffle(X, y) num_batch = X.shape[0] // batch_size print("number of batch: ", num_batch) mod.train() for h in range(itr): losses = 0 for j in range(num_batch): slce = get_slice(j, batch_size) X_fog_ = torch.from_numpy(X[slce]).float().to(device) y_fog_ = torch.from_numpy(y[slce]).long().to(device) optimizer.zero_grad() out = mod(X_fog_) train_loss = criterion(out, y_fog_) losses += train_loss train_loss.backward() optimizer.step() losses_train.append(losses.item() / num_batch) acc = test(X_test, y_test, mod, device, cuda) print('test accuracy: ', acc) AA.append(acc) torch.save( { 'model_state_dict': mod.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, g) return AA, mod, X_train, y_train, losses_train, optimizer
def main(args: argparse.Namespace): # Load input data with open(args.train_metadata, 'r') as f: train_posts = json.load(f) with open(args.val_metadata, 'r') as f: val_posts = json.load(f) # Load labels labels = {} with open(args.label_intent, 'r') as f: intent_labels = json.load(f) labels['intent'] = {} for label in intent_labels: labels['intent'][label] = len(labels['intent']) with open(args.label_semiotic, 'r') as f: semiotic_labels = json.load(f) labels['semiotic'] = {} for label in semiotic_labels: labels['semiotic'][label] = len(labels['semiotic']) with open(args.label_contextual, 'r') as f: contextual_labels = json.load(f) labels['contextual'] = {} for label in contextual_labels: labels['contextual'][label] = len(labels['contextual']) # Build dictionary from training set train_captions = [] for post in train_posts: train_captions.append(post['orig_caption']) dictionary = Dictionary(tokenizer_method="TreebankWordTokenizer") dictionary.build_dictionary_from_captions(train_captions) # Set up torch device if 'cuda' in args.device and torch.cuda.is_available(): device = torch.device(args.device) kwargs = {'pin_memory': True} else: device = torch.device('cpu') kwargs = {} # Set up number of workers num_workers = min(multiprocessing.cpu_count(), args.num_workers) # Set up data loaders differently based on the task # TODO: Extend to ELMo + word2vec etc. if args.type == 'image_only': train_dataset = ImageOnlyDataset(train_posts, labels) val_dataset = ImageOnlyDataset(val_posts, labels) train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=num_workers, collate_fn=collate_fn_pad_image_only, **kwargs) val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, collate_fn=collate_fn_pad_image_only, **kwargs) elif args.type == 'image_text': train_dataset = ImageTextDataset(train_posts, labels, dictionary) val_dataset = ImageTextDataset(val_posts, labels, dictionary) train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=num_workers, collate_fn=collate_fn_pad_image_text, **kwargs) val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, collate_fn=collate_fn_pad_image_text, **kwargs) elif args.type == 'text_only': train_dataset = TextOnlyDataset(train_posts, labels, dictionary) val_dataset = TextOnlyDataset(val_posts, labels, dictionary) train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=num_workers, collate_fn=collate_fn_pad_text_only, **kwargs) val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=num_workers, collate_fn=collate_fn_pad_text_only, **kwargs) # Set up the model model = Model(vocab_size=dictionary.size()).to(device) # Set up an optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_scheduler_step_size, gamma=args.lr_scheduler_gamma) # decay by 0.1 every 15 epochs # Set up loss function loss_fn = torch.nn.CrossEntropyLoss() # Setup tensorboard if args.tensorboard: writer = tensorboard.SummaryWriter(log_dir=args.log_dir + "/" + args.name, flush_secs=1) else: writer = None # Training loop if args.classification == 'intent': keys = ['intent'] elif args.classification == 'semiotic': keys = ['semiotic'] elif args.classification == 'contextual': keys = ['contextual'] elif args.classification == 'all': keys = ['intent', 'semiotic', 'contextual'] else: raise ValueError("args.classification doesn't exist.") best_auc_ovr = 0.0 best_auc_ovo = 0.0 best_acc = 0.0 best_model = None best_optimizer = None best_scheduler = None for epoch in range(args.epochs): for mode in ["train", "eval"]: # Set up a progress bar if mode == "train": pbar = tqdm.tqdm(enumerate(train_data_loader), total=len(train_data_loader)) model.train() else: pbar = tqdm.tqdm(enumerate(val_data_loader), total=len(val_data_loader)) model.eval() total_loss = 0 label = dict.fromkeys(keys, np.array([], dtype=np.int)) pred = dict.fromkeys(keys, None) for _, batch in pbar: if 'caption' not in batch: caption_data = None else: caption_data = batch['caption'].to(device) if 'image' not in batch: image_data = None else: image_data = batch['image'].to(device) label_batch = {} for key in keys: label_batch[key] = batch['label'][key].to(device) if mode == "train": model.zero_grad() pred_batch = model(image_data, caption_data) for key in keys: label[key] = np.concatenate((label[key], batch['label'][key].cpu().numpy())) x = pred_batch[key].detach().cpu().numpy() x_max = np.max(x, axis=1).reshape(-1, 1) z = np.exp(x - x_max) prediction_scores = z / np.sum(z, axis=1).reshape(-1, 1) if pred[key] is not None: pred[key] = np.vstack((pred[key], prediction_scores)) else: pred[key] = prediction_scores loss_batch = {} loss = None for key in keys: loss_batch[key] = loss_fn(pred_batch[key], label_batch[key]) if loss is None: loss = loss_batch[key] else: loss += loss_bath[key] total_loss += loss.item() if mode == "train": loss.backward() optimizer.step() # Terminate the progress bar pbar.close() # Update lr scheduler if mode == "train": scheduler.step() for key in keys: auc_score_ovr = roc_auc_score(label[key], pred[key], multi_class='ovr') # pylint: disable-all auc_score_ovo = roc_auc_score(label[key], pred[key], multi_class='ovo') # pylint: disable-all accuracy = accuracy_score(label[key], np.argmax(pred[key], axis=1)) print("[{} - {}] [AUC-OVR={:.3f}, AUC-OVO={:.3f}, ACC={:.3f}]".format(mode, key, auc_score_ovr, auc_score_ovo, accuracy)) if mode == "eval": best_auc_ovr = max(best_auc_ovr, auc_score_ovr) best_auc_ovo = max(best_auc_ovo, auc_score_ovo) best_acc = max(best_acc, accuracy) best_model = model best_optimizer = optimizer best_scheduler = scheduler if writer: writer.add_scalar('AUC-OVR/{}-{}'.format(mode, key), auc_score_ovr, epoch) writer.add_scalar('AUC-OVO/{}-{}'.format(mode, key), auc_score_ovo, epoch) writer.add_scalar('ACC/{}-{}'.format(mode, key), accuracy, epoch) writer.flush() if writer: writer.add_scalar('Loss/{}'.format(mode), total_loss, epoch) writer.flush() print("[{}] Epoch {}: Loss = {}".format(mode, epoch, total_loss)) hparam_dict = { 'train_split': args.train_metadata, 'val_split': args.val_metadata, 'lr': args.lr, 'epochs': args.epochs, 'batch_size': args.batch_size, 'num_workers': args.num_workers, 'shuffle': args.shuffle, 'lr_scheduler_gamma': args.lr_scheduler_gamma, 'lr_scheduler_step_size': args.lr_scheduler_step_size, } metric_dict = { 'AUC-OVR': best_auc_ovr, 'AUC-OVO': best_auc_ovo, 'ACC': best_acc } if writer: writer.add_hparams(hparam_dict=hparam_dict, metric_dict=metric_dict) writer.flush() Path(args.output_dir).mkdir(exist_ok=True) torch.save({ 'hparam_dict': hparam_dict, 'metric_dict': metric_dict, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), }, Path(args.output_dir) / '{}.pt'.format(args.name))