def test(model, device, test_loader, images, texts, lengths, converter, prediction_dir): model.to(device) images = images.to(device) model.eval() pred_json = {} pred_list = [] make_folder(prediction_dir) for i, datas in enumerate(test_loader): datas, targets = datas batch_size = datas.size(0) dataloader.loadData(images, datas) t, l = converter.encode(targets) dataloader.loadData(texts, t) dataloader.loadData(lengths, l) preds = model(images) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) pred_string = converter.decode(preds.data, preds_size.data, raw=False) pred_dict = { 'image_path': test_loader.dataset.get_img_path(i), 'prediction': pred_string } pred_list.append(pred_dict) pred_json = {'predict': pred_list} with open(os.path.join(prediction_dir, 'predict.json'), 'w') as save_json: json.dump(pred_json, save_json, indent=2, ensure_ascii=False)
def get_model_results_for_folder(model: torch.nn.Module, folder: str, device: Union[torch.device, str] = None, pbar_desc: str = None) -> pd.DataFrame: """Collect model float prediction for all image files in folder. The model must return a 2D tensor of size (batch_size, binary predictions). All non-directory files ending with '.png' in folder are assumed to be valid image files loadable by PIL.Image.open. :param model: the model to use :param device: if given, the device to move the model onto before evaluation :param folder: the folder to search for image files in :param pbar_desc: description for the progress bar :return: pd.DataFrame with columns 'img' (the file name of the image relative to the folder), and 'pred' (the float sigmoid of the prediction of the model). """ with torch.no_grad(): model.eval() if device is not None: model.to(device) img_fns = [ fn for fn in os.listdir(folder) if os.path.isfile(os.path.join(folder, fn)) and fn.endswith('.png') ] row_list = [] for img_fn in tqdm(img_fns, desc=pbar_desc): # TODO: batch-processing img = PIL.Image.open(os.path.join(folder, img_fn)) img_t = to_tens(img).to(device) pred_t = torch.sigmoid(model(img_t.unsqueeze(0)).squeeze(0)) row_list.append({'img': img_fn, 'pred': float(pred_t)}) return pd.DataFrame(row_list)
def __init__(self,loader_test,loader_train,dataprep,file_R,cuda,device,model,crop=True): self.loader_test = loader_test self.loader_train=loader_train self.root_M = file_R self.dataprep=dataprep self.crop = crop self.fileC = torch.load(self.root_M,map_location='cpu') model.load_state_dict(self.fileC['model_state_dict']) self.losses_epoc = self.fileC['m_los'] self.psnr = self.fileC['psnr'] self.ssim = self.fileC['ssim'] self.best_psnr =0 self.best_t = [] self.best_d = [] self.best_s = [] if cuda: model.to(device) # from torchsummary import summary # summary(model,input_size=(1,256,64,64)) self.model = model self.device = device self.loss = nn.MSELoss() self.cuda = cuda self.test_all() if self.crop: self.reconstruct()
def test(device, testset, testloader, model): model.to(device) model.eval() acc = [] with tqdm(testloader, total=config.num_batches) as pbar: for batch_idx, batch in enumerate(pbar): train_inputs, train_targets = batch['train'] train_inputs = train_inputs.to(device=device) train_targets = train_targets.to(device=device) train_embeddings = model(train_inputs) test_inputs, test_targets = batch['test'] test_inputs = test_inputs.to(device=device) test_targets = test_targets.to(device=device) test_embeddings = model(test_inputs) prototypes = get_prototypes(train_embeddings, train_targets, testset.num_classes_per_task) with torch.no_grad(): accuracy = get_accuracy(prototypes, test_embeddings, test_targets) pbar.set_postfix(accuracy='{0:.4f}'.format(accuracy.item())) acc.append(accuracy) if batch_idx >= config.num_batches: break return acc
def run(args, trainset, testset, action): if not torch.cuda.is_available(): args.device = 'cpu' args.device = torch.device(args.device) model = action.create_model() if args.store and os.path.isfile(args.store): model.load_state_dict(torch.load(args.store, map_location='cpu')) if args.pretrained: assert os.path.isfile(args.pretrained) model.load_state_dict(torch.load(args.pretrained, map_location='cpu')) model.to(args.device) checkpoint = None if args.resume: assert os.path.isfile(args.resume) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['model']) # dataloader testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) # optimizer min_loss = float('inf') learnable_params = filter(lambda p: p.requires_grad, model.parameters()) if args.optimizer == 'Adam': optimizer = torch.optim.Adam(learnable_params) else: optimizer = torch.optim.SGD(learnable_params, lr=0.1) if checkpoint is not None: min_loss = checkpoint['min_loss'] optimizer.load_state_dict(checkpoint['optimizer']) # training LOGGER.debug('train, begin') for epoch in range(args.start_epoch, args.epochs): running_loss = action.train(model, trainloader, optimizer, args.device) val_loss = action.validate(model, testloader, args.device) is_best = val_loss < min_loss min_loss = min(val_loss, min_loss) LOGGER.info('epoch, %04d, %f, %f', epoch + 1, running_loss, val_loss) print('epoch, %04d, floss_train=%f, floss_val=%f' % (epoch + 1, running_loss, val_loss)) if is_best: save_checkpoint(model.state_dict(), args.outfile, 'model') LOGGER.debug('train, end')
def evaluate(data_source, ngramProb=None): # Turn on evaluation mode which disables dropout. model.to(device) model.eval() model.set_mode('eval') total_loss = 0. stout = [] ntokens = len(dictionary) hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i) # import pdb; pdb.set_trace() if ngramProb != None: _, batch_ngramProb = get_batch(ngramProb, i) # gs534 add sentence resetting eosidx = dictionary.get_eos() output, hidden = model(data, hidden, separate=args.reset, eosidx=eosidx) output_flat = output.view(-1, ntokens) logProb = interpCrit(output.view(-1, ntokens), targets) rnnProbs = torch.exp(-logProb) if args.interp and args.evalmode: final_prob = args.factor * rnnProbs + (1 - args.factor) * batch_ngramProb else: final_prob = rnnProbs if args.stream_out: stout += final_prob.tolist() total_loss += (-torch.log(final_prob).sum()) / data.size(1) hidden = repackage_hidden(hidden) return total_loss / len(data_source), stout
def train(num_epochs, model, device, train_loader, val_loader, images, texts, lengths, converter, optimizer, lr_scheduler, prediction_dir, print_iter, opt) : # criterion = CTCLoss() # criterion.to(device) criterion = torch.nn.CrossEntropyLoss(ignore_index = 0).to(device) images = images.to(device) model.to(device) for epoch in range(num_epochs) : count = 0 model.train() for i, datas in enumerate(train_loader) : datas, targets = datas batch_size = datas.size(0) count += batch_size dataloader.loadData(images, datas) t, l = converter.encode(targets, opt.batch_max_length) dataloader.loadData(texts, t) dataloader.loadData(lengths, l) preds = model(images, t[:, :-1]) # preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds.view(-1, preds.shape[-1]), t[:, 1:].contiguous().view(-1)) model.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) optimizer.step() if count % print_iter < train_loader.batch_size : print('epoch {} [{}/{}]loss : {}'.format(epoch, count, len(train_loader.dataset), cost)) if (epoch %3==0) &(epoch !=0 ): res = validation(model, device, val_loader, images, texts, lengths, converter, prediction_dir, opt) save_model(opt.save_dir, f'{epoch}_{round(float(res),3)}', model, optimizer, lr_scheduler, opt)
def test(model, data_loader_test, device, predict_path) : class_name = {1 :'sidewalk_blocks' , 2 : 'alley_damaged', 3 :'sidewalk_damaged', 4 : 'caution_zone_manhole', 5 : 'braille_guide_blocks_damaged', 6 : 'alley_speed_bump', 7 : 'roadway_crosswalk', 8 : 'sidewalk_urethane', 9 : 'caution_zone_repair_zone', 10 : 'sidewalk_asphalt', 11 : 'sidewalk_other', 12 : 'alley_crosswalk', 13 : 'caution_zone_tree_zone', 14 : 'caution_zone_grating', 15 : 'roadway_normal', 16 : 'bike_lane', 17 : 'caution_zone_stairs', 18 : 'alley_normal', 19 : 'sidewalk_cement', 20 : 'braille_guide_blocks_normal', 21 : 'sidewalk_soil_stone'} model.to(device) model.eval() try : os.mkdir(os.path.join('./predictions')) except : pass # 이미지 전체 반복 pred_xml = elemTree.Element('predictions') pred_xml.text = '\n ' for idx, data in enumerate(data_loader_test) : print('{} / {}'.format(idx+1, len(data_loader_test))) images, target = data images = list(image.to(device) for image in images) outputs = model(images) output = outputs[0] masks, labels, scores = output['masks'], output['labels'], output['scores'] texts = [] # 이미지 한장에 대하여 xml_image = elemTree.SubElement(pred_xml, 'image') xml_image.attrib['name'] = target[0]['image_id'].split('.')[0] xml_image.text = '\n ' for index in range(len(masks)) : mask, label, score = masks[index], int(labels[index]), scores[index] # class, score, x1, y1, x2, y2 mask_arr = mask[0].cpu().detach().numpy() mask_bin = np.where(mask_arr > 0.3, True, False) polygons = Mask(mask_bin).polygons() points = polygons.points point = '' for p in points[0]: point += str(p[0]) + ',' + str(p[1]) +';' xml_predict = elemTree.SubElement(xml_image, 'predict') xml_predict.tail = '\n ' xml_predict.attrib['class_name'] = class_name[label] xml_predict.attrib['score'] = str(float(score)) xml_predict.attrib['polygon'] = point if index == len(masks) - 1 : xml_predict.tail = '\n ' xml_image.tail = '\n ' if idx == len(data_loader_test) - 1: xml_image.tail = '\n' pred_xml = elemTree.ElementTree(pred_xml) pred_xml.write('./predictions/'+ predict_path + '.xml')
def evaluate(model, testset, device): # eval on valid set loss_val_, acc_val_ = 0., 0. model.eval() model.to(device) for j, sample in enumerate(testset): img = sample['img'].to(device) score = sample['score'].to(device) score_pred = model(img) loss_val_ += bce_loss(score_pred, score).item() acc_val_ += accuracy(score_pred, score) avg_loss, avg_acc = loss_val_/(j+1), acc_val_/(j+1) return avg_loss, avg_acc
def train(model, trainloader, testloader, criterion, optimizer, epochs=10, print_every=40, print_accuracy=False, device='cpu'): steps = 0 # turn on drop out in the network (default) model.train() # change to device model.to(device) for e in range(epochs): running_loss = 0 for ii, (inputs, labels) in enumerate(trainloader): steps += 1 inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() # Forward and backward passes outputs = model.forward(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if steps % print_every == 0: #print("Epoch: {}/{}... ".format(e+1, epochs), # "Loss: {:.4f}".format(running_loss/print_every)) #running_loss = 0 if print_accuracy: train_accuracy = accuracy_score(model, trainloader, device=device, print_score=False) test_accuracy = accuracy_score(model, testloader, device=device, print_score=False) print( "Epoch: {}/{}... ".format(e + 1, epochs), "Loss: {:.4f}".format(running_loss / print_every), "Train accuracy: {} %".format(f'{train_accuracy:.1f}'), "Test accuracy: {} %".format(f'{test_accuracy:.1f}')) else: print("Epoch: {}/{}... ".format(e + 1, epochs), "Loss: {:.4f}".format(running_loss / print_every)) running_loss = 0
def load_checkpoint(opts, model, label): """ Load a model from a file. """ cur_dir = os.getcwd() model_dir = os.path.join(cur_dir, opts.results_dir, opts.experiment_name, 'checkpoints') model_file = os.path.join(model_dir, '{}_net.pth.tar'.format(label)) print("Loading model from {}".format(model_file)) model_dict = torch.load(model_file, map_location=opts.device) device = torch.device(opts.device) model.to(opts.device) model.load_state_dict(model_dict['state_dict']) return model
def __init__(self, dataset, model, model_state_file, save_dir, device, shuffle, num_epochs, batch_size, learning_rate, early_stopping_criteria): self.dataset = dataset self.class_weights = dataset.class_weights.to(device) self.model = model.to(device) self.save_dir = save_dir self.device = device self.shuffle = shuffle self.num_epochs = num_epochs self.batch_size = batch_size self.loss_func = nn.CrossEntropyLoss(self.class_weights) self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate) self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer=self.optimizer, mode='min', factor=0.5, patience=1) self.train_state = { 'done_training': False, 'stop_early': False, 'early_stopping_step': 0, 'early_stopping_best_val': 1e8, 'early_stopping_criteria': early_stopping_criteria, 'learning_rate': learning_rate, 'epoch_index': 0, 'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'test_loss': -1, 'test_acc': -1, 'model_filename': model_state_file}
def load_model(model, checkpoint, device): ckpt = torch.load(os.path.join('checkpoint', checkpoint)) if 'args' in ckpt: args = ckpt['args'] if model == 'vqvae': model = VQVAE() elif model == 'pixelsnail_bottom': model = PixelSNAIL( [64, 64], 512, args.channel, 5, 4, args.n_res_block, args.n_res_channel, attention=False, dropout=args.dropout, n_cond_res_block=args.n_cond_res_block, cond_res_channel=args.n_res_channel, ) if 'model' in ckpt: ckpt = ckpt['model'] model.load_state_dict(ckpt) model = model.to(device) model.eval() return model
def train(model, optimizer, epochs, print_every=1000, plot_every=20): scheduler = ReduceLROnPlateau(optimizer=optimizer, mode='max', factor=0.1, patience=10, verbose=True) model = model.to(device=device) for e in range(epochs): for t, (x, y) in enumerate(loader_train): model.train() x = x.to(device=device, dtype=dtype) y = y.to(device=device, dtype=torch.long) scores = model(x) _, preds = scores.max(1) loss = F.cross_entropy(scores, y) optimizer.zero_grad() loss.backward() optimizer.step() if t % print_every == 0: print('Epoch %d, Iteration %d, loss = %.4f' % (e, t, loss)) val_acc = check_accuracy(model) scheduler.step(val_acc) print() if e % plot_every == 0 and e > 0: plt.plot(np.arange(len(a)), a, 'b', np.arange(len(b)), b, 'y') plt.ylim(0, 1) # plt.show() plt.savefig("./logs/Epoch %d.png" % e)
def train(device, dataset, dataloader, model): print("in train") model = model.to(device) model.train() optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) # Training loop images_per_batch = {} batch_count, images_per_batch['train'], images_per_batch[ 'test'] = 0, [], [] with tqdm(dataloader, total=config.num_batches) as pbar: for batch_idx, batch in enumerate(pbar): model.zero_grad() train_inputs, train_targets = batch['train'] train_inputs = train_inputs.to(device=device) train_targets = train_targets.to(device=device) train_embeddings = model(train_inputs) test_inputs, test_targets = batch['test'] test_inputs = test_inputs.to(device=device) test_targets = test_targets.to(device=device) test_embeddings = model(test_inputs) prototypes = get_prototypes(train_embeddings, train_targets, dataset.num_classes_per_task) loss = prototypical_loss(prototypes, test_embeddings, test_targets) loss.backward() optimizer.step() #Just keeping the count here batch_count += 1 images_per_batch['train'].append(train_inputs.shape[1]) images_per_batch['test'].append(test_inputs.shape[1]) with torch.no_grad(): accuracy = get_accuracy(prototypes, test_embeddings, test_targets) pbar.set_postfix(accuracy='{0:.4f}'.format(accuracy.item())) if batch_idx >= config.num_batches: break print("Number of batches in the dataloader: ", batch_count) # Save model if check_dir() is not None: filename = os.path.join( 'saved_models', 'protonet_cifar_fs_{0}shot_{1}way.pt'.format(config.k, config.n)) with open(filename, 'wb') as f: state_dict = model.state_dict() torch.save(state_dict, f) print("Model saved") return batch_count, images_per_batch
def __init__(self, model, optimizer): self.model = model.to(DEVICE) self.optimizer = optimizer self.epoch = 0 self.iterations = 0 self.accuracies = np.array([]) self.losses = np.array([]) self.eval_accuracies = np.array([]) self.log_softmax = nn.LogSoftmax(dim=1).to(DEVICE)
def inference(model, data_loader, **kwargs): dev = kwargs['device'] if dev == 'cuda': args.cpu = False args.ngraph = '' # from trainer import Trainer model = model.to(torch.device('cpu' if args.cpu else 'cuda')) t = Trainer(args, data_loader, model, loss, checkpoint) metric = t.test() if dev == 'cpu': args.cpu = True # args.ngraph = '' args.ngraph = './edsr.model' # from trainer import Trainer model = model.to(torch.device('cpu' if args.cpu else 'cuda')) t = Trainer(args, data_loader, model, loss, checkpoint) metric = t.test() if metric == 'nan': metric = 0 return metric
def run(args, testset, action): if not torch.cuda.is_available(): args.device = 'cpu' args.device = torch.device(args.device) model = action.create_model() if args.pretrained: assert os.path.isfile(args.pretrained) model.load_state_dict(torch.load(args.pretrained, map_location='cpu')) model.to(args.device) # dataloader testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=args.workers) # testing LOGGER.debug('tests, begin') action.evaluate(model, testloader, args.device) LOGGER.debug('tests, end')
def print_examples(model, device, dataset): transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((299, 299)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) model.to(device) model.eval() test_img1 = transform( cv2.imread("../Data/flickr8k/test_examples/dog.jpg")).unsqueeze(0) print("Example 1 CORRECT: Dog on a beach by the ocean") print("Example 1 OUTPUT: " + " ".join(model.caption_image(test_img1.to(device), dataset.vocab))) print('=' * 50) test_img2 = transform( cv2.imread("../Data/flickr8k/test_examples/child.jpg")).unsqueeze(0) print("Example 2 CORRECT: Child holding red frisbee outdoors") print("Example 2 OUTPUT: " + " ".join(model.caption_image(test_img2.to(device), dataset.vocab))) print('=' * 50) test_img3 = transform( cv2.imread("../Data/flickr8k/test_examples/bus.png")).unsqueeze(0) print("Example 3 CORRECT: Bus driving by parked cars") print("Example 3 OUTPUT: " + " ".join(model.caption_image(test_img3.to(device), dataset.vocab))) print('=' * 50) test_img4 = transform( cv2.imread("../Data/flickr8k/test_examples/boat.png")).unsqueeze(0) print("Example 4 CORRECT: A small boat in the ocean") print("Example 4 OUTPUT: " + " ".join(model.caption_image(test_img4.to(device), dataset.vocab))) print('=' * 50) test_img5 = transform( cv2.imread("../Data/flickr8k/test_examples/horse.png")).unsqueeze(0) print("Example 5 CORRECT: A cowboy riding a horse in the desert") print("Example 5 OUTPUT: " + " ".join(model.caption_image(test_img5.to(device), dataset.vocab))) print('=' * 50) model.train()
def train(num_epochs, model, device, train_loader, val_loader, images, texts, lengths, converter, optimizer, lr_scheduler, prediction_dir, print_iter): criterion = CTCLoss() criterion.to(device) images = images.to(device) model.to(device) for epoch in range(num_epochs): print(epoch) count = 0 model.train() for i, datas in enumerate(train_loader): datas, targets = datas batch_size = datas.size(0) count += batch_size dataloader.loadData(images, datas) t, l = converter.encode(targets) dataloader.loadData(texts, t) dataloader.loadData(lengths, l) preds = model(images) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, texts, preds_size, lengths) / batch_size model.zero_grad() cost.backward() optimizer.step() if count % print_iter < train_loader.batch_size: print('epoch {} [{}/{}]loss : {}'.format( epoch, count, len(train_loader.dataset), cost)) validation(model, device, val_loader, images, texts, lengths, converter, prediction_dir) save_model('{}'.format(epoch), model, optimizer, lr_scheduler) lr_scheduler.step()
ntokens = len(corpus.dictionary) if args.load is None: model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied, corpus=corpus, embeddings=embeddings) else: with open(args.load, 'rb') as f: model = torch.load(f) model = model.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Variables, to detach them from their history.""" if type(h) == Variable: return Variable(h.data) else:
# This Preprocess internally Triage class # This will split data and encode using passing tokenizer # Creating instance of the class Preprocess = prepare_data.Preprocess(dataframe=df_new_reduced, tokenizer=bert_tokenizer, max_len=config.MAX_LEN, train_batch_size=config.TRAIN_BATCH_SIZE, valid_batch_size=config.VALID_BATCH_SIZE, test_batch_size=config.TEST_BATCH_SIZE) # Accessing the process_data_for_model method of Preprocess class testing_loader = Preprocess.process_data_for_test() ################################################################################# model = model.DistillBERTClass() # Creating the model shape model.to(device) # Loading back the model from checkpoint # checkpoint = torch.load(config.checkpoint_path, map_location=device) # Loading the model from check point # model.load_state_dict(checkpoint['model_state_dict']) model = torch.load(config.checkpoint_path, map_location=device) model.eval() model.to(device) # Loading model to GPU # Validation on test data # Creating the loss function # Optimizer is not needed since its for prediction loss_function = torch.nn.CrossEntropyLoss() test_loss, test_accu, y_test_actual, y_test_predicted, y_test_predicted_prob_list = valid( model=model, testing_loader=testing_loader, loss_fn=loss_function)
optimizer.param_groups[0]['lr'] = args.lr model.dropouti, model.dropouth, model.dropout, args.dropoute = ( args.dropouti, args.dropouth, args.dropout, args.dropoute) if args.wdrop: from weight_drop import WeightDrop for rnn in model.rnns: if type(rnn) == WeightDrop: rnn.dropout = args.wdrop elif rnn.zoneout > 0: rnn.zoneout = args.wdrop ### if not criterion: criterion = torch.nn.CrossEntropyLoss() ### if args.cuda: model = model.to('cuda') ### params = list(model.parameters()) + list(criterion.parameters()) total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in params if x.size()) print('Args:', args) print('Model total parameters:', total_params) if args.torchscript: print("Scripting the module...") model = torch.jit.script(model) if args.trace: example_input = torch.randn(args.bptt, args.batch_size).to('cuda') print('data.size():{}\n'.format(example_input.size()))
def main(model, path): print(path) t1 = time.time() checkpoint_folder = "Model_Checkpoints" project_path = os.getcwd() save_path = os.path.join(project_path, checkpoint_folder) if not os.path.exists(checkpoint_folder): os.makedirs(checkpoint_folder) else: shutil.rmtree(save_path) os.makedirs(checkpoint_folder) in_features = 300 hidden_size = 256 layer_num = 2 print("\n") print(" Loading Data ... ") print("="*30) print("\n") train_dl, valid_dl, trn, vld = dataloader.train_val_loader(path) print(" Got train_dataloader and validation_dataloader ") print("="*30) print("\n") print(" Loading LSTM Model ...") print("="*30) print("\n") model = model.Rnn_Lstm(in_features, hidden_size, layer_num, 391) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) optimizer = optim.Adam(model.parameters(), lr=1e-2) criterion = nn.BCEWithLogitsLoss() epochs = 10 print(" Training started ... ") print("="*30) print("\n") for epoch in range(1, epochs + 1): checkpoint_name = "checkpoint_"+ str(epoch) +".pth" checkpoint_save_path = os.path.join(save_path, checkpoint_name) running_loss = 0.0 model.train() # turn on training mode for x, y in tqdm.tqdm(train_dl): x, y = x.to(device), y.to(device) optimizer.zero_grad() preds = model(x) loss = criterion(preds, y) loss.backward() optimizer.step() running_loss += loss.item() * x.size(0) epoch_loss = running_loss / len(trn) # calculate the validation loss for this epoch val_loss = 0.0 model.eval() # turn on evaluation mode for x, y in valid_dl: x, y = x.to(device), y.to(device) preds = model(x) loss = criterion(preds, y) val_loss += loss.item() * x.size(0) val_loss /= len(vld) print('Epoch: {}, Training Loss: {:.4f}, Validation Loss: {:.4f} \n'.format(epoch, epoch_loss, val_loss)) print("Checkpoint saved after {} epoch\n".format(epoch)) torch.save(model.state_dict(), checkpoint_save_path) print("Training completed -> Finished -- {} \n".format(time.time()-t1)) print("="*30) print("\n")
def train_model(model, trainds, testds, config, device, writer=None): batch_size = config['data']['batch_size'] status = config['training']['status'] epochs = config['training']['epochs'] balanced_loss = config['loss']['balanced'] # nval = config['nval'] nval_tests = config['nval_tests'] nsave = config['nsave'] model_save = config['model_save'] rank = config['rank'] nranks = config['nranks'] hvd = config['hvd'] num_classes = config['data']['num_classes'] ## create samplers for these datasets train_sampler = torch.utils.data.distributed.DistributedSampler( trainds, nranks, rank, shuffle=True, drop_last=True) test_sampler = torch.utils.data.distributed.DistributedSampler( testds, nranks, rank, shuffle=True, drop_last=True) ## create data loaders train_loader = torch.utils.data.DataLoader( trainds, shuffle=False, sampler=train_sampler, num_workers=config['data']['num_parallel_readers'], batch_size=batch_size, persistent_workers=True) test_loader = torch.utils.data.DataLoader( testds, shuffle=False, sampler=test_sampler, num_workers=config['data']['num_parallel_readers'], batch_size=batch_size, persistent_workers=True) loss_func = loss.get_loss(config) ave_loss = CalcMean.CalcMean() acc_func = accuracy.get_accuracy(config) ave_acc = CalcMean.CalcMean() opt_func = optimizer.get_optimizer(config) opt = opt_func(model.parameters(), **config['optimizer']['args']) lrsched_func = optimizer.get_learning_rate_scheduler(config) lrsched = lrsched_func(opt, **config['lr_schedule']['args']) # Add Horovod Distributed Optimizer if hvd: opt = hvd.DistributedOptimizer( opt, named_parameters=model.named_parameters()) # Broadcast parameters from rank 0 to all other processes. hvd.broadcast_parameters(model.state_dict(), root_rank=0) model.to(device) for epoch in range(epochs): logger.info(' epoch %s of %s', epoch, epochs) train_sampler.set_epoch(epoch) test_sampler.set_epoch(epoch) model.to(device) for batch_counter, (inputs, targets, class_weights, nonzero_mask) in enumerate(train_loader): # move data to device inputs = inputs.to(device) targets = targets.to(device) class_weights = class_weights.to(device) nonzero_mask = nonzero_mask.to(device) # zero grads opt.zero_grad() outputs, endpoints = model(inputs) # set the weights if balanced_loss: weights = class_weights nonzero_to_class_scaler = torch.sum( nonzero_mask.type(torch.float32)) / torch.sum( class_weights.type(torch.float32)) else: weights = nonzero_mask nonzero_to_class_scaler = torch.ones(1, device=device) loss_value = loss_func(outputs, targets.long()) loss_value = torch.mean( loss_value * weights) * nonzero_to_class_scaler # backward calc grads loss_value.backward() # apply grads opt.step() ave_loss.add_value(float(loss_value.to('cpu'))) # calc acc ave_acc.add_value( float(acc_func(outputs, targets, weights).to('cpu'))) # print statistics if batch_counter % status == 0: logger.info( '<[%3d of %3d, %5d of %5d]> train loss: %6.4f acc: %6.4f', epoch + 1, epochs, batch_counter, len(trainds) / nranks / batch_size, ave_loss.mean(), ave_acc.mean()) if writer and rank == 0: global_batch = epoch * len( trainds) / nranks / batch_size + batch_counter writer.add_scalars('loss', {'train': ave_loss.mean()}, global_batch) writer.add_scalars('accuracy', {'train': ave_acc.mean()}, global_batch) #writer.add_histogram('input_trans',endpoints['input_trans'].view(-1),global_batch) ave_loss = CalcMean.CalcMean() ave_acc = CalcMean.CalcMean() # release tensors for memory del inputs, targets, weights, endpoints, loss_value if config['batch_limiter'] and batch_counter > config[ 'batch_limiter']: logger.info('batch limiter enabled, stop training early') break # save at end of epoch torch.save(model.state_dict(), model_save + '_%05d.torch_model_state_dict' % epoch) if nval_tests == -1: nval_tests = len(testds) / nranks / batch_size logger.info('epoch %s complete, running validation on %s batches', epoch, nval_tests) model.to(device) # every epoch, evaluate validation data set with torch.no_grad(): vloss = CalcMean.CalcMean() vacc = CalcMean.CalcMean() vious = [CalcMean.CalcMean() for i in range(num_classes)] for valid_batch_counter, (inputs, targets, class_weights, nonzero_mask) in enumerate(test_loader): inputs = inputs.to(device) targets = targets.to(device) class_weights = class_weights.to(device) nonzero_mask = nonzero_mask.to(device) # set the weights if balanced_loss: weights = class_weights nonzero_to_class_scaler = torch.sum( nonzero_mask.type(torch.float32)) / torch.sum( class_weights.type(torch.float32)) else: weights = nonzero_mask nonzero_to_class_scaler = torch.ones(1, device=device) outputs, endpoints = model(inputs) loss_value = loss_func(outputs, targets.long()) loss_value = torch.mean( loss_value * weights) * nonzero_to_class_scaler vloss.add_value(float(loss_value.to('cpu'))) # calc acc vacc.add_value( float(acc_func(outputs, targets, weights).to('cpu'))) # calc ious ious = get_ious(outputs, targets, weights, num_classes) for i in range(num_classes): vious[i].add_value(float(ious[i])) if valid_batch_counter > nval_tests: break mean_acc = vacc.mean() mean_loss = vloss.mean() # if config['hvd'] is not None: # mean_acc = config['hvd'].allreduce(torch.tensor([mean_acc])) # mean_loss = config['hvd'].allreduce(torch.tensor([mean_loss])) mious = float( torch.sum(torch.FloatTensor([x.mean() for x in vious]))) / num_classes ious_out = { 'jet': vious[0].mean(), 'electron': vious[1].mean(), 'bkgd': vious[2].mean(), 'all': mious } # add validation to tensorboard if writer and rank == 0: global_batch = epoch * len( trainds) / nranks / batch_size + batch_counter writer.add_scalars('loss', {'valid': mean_loss}, global_batch) writer.add_scalars('accuracy', {'valid': mean_acc}, global_batch) writer.add_scalars('IoU', ious_out, global_batch) logger.warning( '>[%3d of %3d, %5d of %5d]<<< ave valid loss: %6.4f ave valid acc: %6.4f on %s batches >>>', epoch + 1, epochs, batch_counter, len(trainds) / nranks / batch_size, mean_loss, mean_acc, valid_batch_counter + 1) logger.warning(' >> ious: %s', ious_out) # update learning rate lrsched.step()
model = model.MCCNNNet() #判断是否启用GPU device = torch.device(args.gpu_device if args.cuda else "cpu") #加载训练好的model if args.loadmodel is not None: pretrain_dict = torch.load(args.loadmodel) model.load_state_dict(pretrain_dict) train_datatset = MyDataLoader(args.datapath) train_loader = torch.utils.data.DataLoader(dataset=train_datatset, batch_size=args.batch_size, shuffle=True) model.to(device) criterion = nn.BCELoss() optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9) def main(): model.train() for epoch in range(1, args.epochs): i = 0 for left_image, right_image, label in train_loader: left_image = left_image.to(device) right_image = right_image.to(device) label = label.to(device) label = label.float() optimizer.zero_grad()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--epochs', '-e', default=300, type=int, help='number of epochs to learn') parser.add_argument('--d_hidden', '-d', default=650, type=int, help='number of units in hidden layers') parser.add_argument('--seq_len', '-b', type=int, default=20, help='learning minibatch size') parser.add_argument('--betapoint', '-c', type=int, default=10, help='betapoint for decrease beta') parser.add_argument('--seed', type=int, default=1111) parser.add_argument('--lr', type=float, default=0.0001) parser.add_argument('--batch_size', type=int, default=160) parser.add_argument('--d_emb', type=int, default=512) parser.add_argument('--n', type=int, default=2) parser.add_argument('--h', type=int, default=8) parser.add_argument('--num', type=int, default=3) parser.add_argument('--beta', type=float, default=1.0) parser.add_argument('--training_capacity', type=int, default=3e5) global device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") args = parser.parse_args() np.random.seed(args.seed) torch.manual_seed(args.seed) lr = args.lr base_path = os.path.dirname(os.path.realpath(__file__)) text_data_dir = os.path.join(base_path, '../misspelling/data/') output_dir = os.path.join(base_path, 'output/') ###################################################### ########### DATA PREPARE ########### ###################################################### train_file = 'train_correct.txt' val_file = 'dev_correct.txt' test_file = 'test_correct.txt' train_noise_filename = text_data_dir + 'train.txt' train_filename = text_data_dir + train_file valid_noise_filename = text_data_dir + 'dev.txt' valid_filename = text_data_dir + val_file test_noise_filename = text_data_dir + 'test.txt' test_filename = text_data_dir + test_file vocab, id2vocab = {'<eos>': 0}, {0: '<eos>'} vocab, id2vocab, train_len = update_vocab(text_data_dir + train_file, vocab, id2vocab) #train_noise_tokens = open(train_noise_filename).read().replace('\n', ' <eos> ').strip().split() #train_tokens = open(train_filename).read().replace('\n', ' <eos> ').strip().split() vocab, id2vocab, _ = update_vocab(text_data_dir + val_file, vocab, id2vocab) valid_noise_tokens = open(valid_noise_filename).read().replace( '\n', ' <eos> ').strip().split() valid_tokens = open(valid_filename).read().replace( '\n', ' <eos> ').strip().split() vocab, id2vocab, _ = update_vocab(text_data_dir + test_file, vocab, id2vocab) test_noise_tokens = open(test_noise_filename).read().replace( '\n', ' <eos> ').strip().split() test_tokens = open(test_filename).read().replace( '\n', ' <eos> ').strip().split() alph = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,:;'*!?`$%&(){}[]-/\@_#" seq_len = args.seq_len #X_train, mask_train, Y_train = make_input_data(train_noise_tokens, train_tokens, seq_len, alph, vocab) X_valid, mask_valid, Y_valid = make_input_data(valid_noise_tokens, valid_tokens, seq_len, alph, vocab) X_test, mask_test, Y_test = make_input_data(test_noise_tokens, test_tokens, seq_len, alph, vocab) #X_train, mask_train, Y_train = X_train.to(device), mask_train.to(device), Y_train.to(device) X_valid, mask_valid, Y_valid = X_valid.to(device), mask_valid.to( device), Y_valid.to(device) X_test, mask_test, Y_test = X_test.to(device), mask_test.to( device), Y_test.to(device) ###################################################### ########### MODEL AND TRAINING CONFIG ########### ###################################################### model_name = "beta_{}_emb_{}_h_{}_hidden_{}_n_{}_lr_{}_bs_{}_check_{}".format( args.beta, args.d_emb, args.h, args.d_hidden, args.n, args.lr, args.batch_size, args.betapoint) global message_filename message_filename = output_dir + 'r_' + model_name + '.txt' model_filename = output_dir + 'm_' + model_name + '.pt' with open(message_filename, 'w') as out: out.write('start\n') char_vocab_size = len(alph) + 5 global model if args.num == 3: model = model.MUDE(char_vocab_size, d_emb=args.d_emb, h=args.h, n=args.n, d_hidden=args.d_hidden, vocab_size=len(vocab), dropout=0.01) #model = nn.DataParallel(model) model.to(device) global criterion criterion = nn.NLLLoss() global seq_criterion seq_criterion = nn.NLLLoss(ignore_index=char_vocab_size - 1) global optimizer optimizer = getattr(optim, 'RMSprop')(model.parameters(), lr=lr) ###################################################### ########### START TRAINING ########### ###################################################### print(args) print(message_filename) best_acc = 0 for epoch in range(1, args.epochs + 1): with open(train_noise_filename) as f_input: with open(train_filename) as f_label: sentence_count = 0 epoch_start_time = time.time() input_text = "" label_text = "" for input_line, label_line in zip(f_input, f_label): input_text += input_line label_text += label_line sentence_count += 1 if sentence_count % args.training_capacity == 0: # or train_len <= args.training_capacity and sentence_count == train_len: train_noise_tokens = input_text.replace( '\n', ' <eos> ').strip().split() train_tokens = label_text.replace( '\n', ' <eos> ').strip().split() input_text = "" label_text = "" X_train, mask_train, Y_train = make_input_data( train_noise_tokens, train_tokens, seq_len, alph, vocab) train(epoch, X_train, mask_train, Y_train, args.batch_size, args.seq_len, len(vocab), char_vocab_size, args) #val_acc = evaluate(X_valid, mask_valid, Y_valid, args.batch_size, args.seq_len, len(vocab), args) #test_acc = evaluate(X_test, mask_test, Y_test, args.batch_size, args.seq_len, len(vocab), args) val_precision, val_recall, val_acc, val_f05, val_real_precision, val_real_recall, val_real_acc, val_real_f05, val_non_precision, val_non_recall, val_non_acc, val_non_f05 = check_performance( X_valid, mask_valid, Y_valid, valid_noise_tokens, valid_tokens, id2vocab, len(vocab), args.seq_len, args) test_precision, test_recall, test_acc, test_f05, test_real_precision, test_real_recall, test_real_acc, test_real_f05, test_non_precision, test_non_recall, test_non_acc, test_non_f05 = check_performance( X_test, mask_test, Y_test, test_noise_tokens, test_tokens, id2vocab, len(vocab), args.seq_len, args) message = ( '-' * 89 + '\n| end of epoch {:3d} | time: {:5.4f}s | valid precision {:5.4f} | valid recall {:5.4f} | valid accuracy {:5.4f} | valid F0.5 {:5.4f} | ' .format(epoch, (time.time() - epoch_start_time), val_precision, val_recall, val_acc, val_f05) + '\n| end of epoch {:3d} | time: {:5.4f}s | valid real-word precision {:5.4f} | valid real-word recall {:5.4f} | valid real-word accuracy {:5.4f} | valid real-word F0.5 {:5.4f} | ' .format(epoch, (time.time() - epoch_start_time), val_real_precision, val_real_recall, val_real_acc, val_real_f05) + '\n| end of epoch {:3d} | time: {:5.4f}s | valid non-word precision {:5.4f} | valid non-word recall {:5.4f} | valid non-word accuracy {:5.4f} | valid non-word F0.5 {:5.4f} | ' .format(epoch, (time.time() - epoch_start_time), val_non_precision, val_non_recall, val_non_acc, val_non_f05) + '\n| end of epoch {:3d} | time: {:5.4f}s | test precision {:5.4f} | test recall {:5.4f} | test accuracy {:5.4f} | test F0.5 {:5.4f} | ' .format(epoch, (time.time() - epoch_start_time), test_precision, test_recall, test_acc, test_f05) + '\n| end of epoch {:3d} | time: {:5.4f}s | test real-word precision {:5.4f} | test real-word recall {:5.4f} | test real-word accuracy {:5.4f} | test real-word F0.5 {:5.4f} | ' .format(epoch, (time.time() - epoch_start_time), test_real_precision, test_real_recall, test_real_acc, test_real_f05) + '\n| end of epoch {:3d} | time: {:5.4f}s | test non-word precision {:5.4f} | test non-word recall {:5.4f} | test non-word accuracy {:5.4f} | test non-word F0.5 {:5.4f} | ' .format(epoch, (time.time() - epoch_start_time), test_non_precision, test_non_recall, test_non_acc, test_non_f05) + '-' * 89) output_s(message, message_filename) # Save the model if the validation loss is the best we've seen so far. if val_f05 > best_acc: save(model, model_filename) best_acc = val_f05
OUTPUT_DIM = INPUT_DIM HIDDEN_DIM = 15 BATCH_SIZE = 1 NUM_LAYERS = 1 NUM_EPOCHS = 1 SEQ_SIZE = 60 SKIP_SIZE = 1 model = model.LSTMModel( input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, hidden_dim=HIDDEN_DIM, batch_size=BATCH_SIZE, num_layers=NUM_LAYERS, ) model.to(device=DEVICE) loss_fn = torch.nn.MSELoss(reduction="sum") optimiser = torch.optim.Adam(model.parameters(), lr=1e-1) training_data = make_training_data() print(training_data[0][0].shape[0]) real = csv_data.iloc[:, -1] """ loss = 0 for epoch in range(NUM_EPOCHS): print("Epoch", epoch, "Loss", loss) loss = train(model, training_data, loss_fn, optimiser, batch_size=BATCH_SIZE) predictions = predict()
def main(): dataframe = pd.read_csv('../input/imdb.csv') # load dataframe dataframe.sentiment = dataframe.sentiment.apply(lambda x: 1 if x == 'positive' else 0) # sentiment is category target variable so we have to label encode it, we can do it like this by hand, or simply with sklearn.model_selection.LabelEncoder # now split data into validation and training df_train, df_valid = model_selection.train_test_split( dataframe, test_size=0.1, # 10 percent of dataframe will be for validation random_state= 42, # if we are going to run multiple time this script, random state enables that everytime we get same split with same random state shuffle=True, # shuffle indices stratify=dataframe.sentiment. values # same distribution in train and valid ) df_train = df_train.reset_index( drop=True) # we reset indices from 0 to len(df_train) df_valid = df_valid.reset_index( drop=True) # we reset indices from 0 to len(df_valid) # make datasets with our class in order to make data loaders training_dataset = dataset.BERTdataset(review=df_train.review.values, sentiment=df_train.sentiment.values) # from dataset to dataloader training_data_loader = torch.utils.data.DataLoader( dataset=training_dataset, batch_size=config.TRAINING_BATCH_SIZE, shuffle=True, num_workers=4) validation_dataset = dataset.BERTdataset( review=df_valid.review.values, sentiment=df_valid.sentiment.values, ) # from dataset to dataloader validation_data_loader = torch.utils.data.DataLoader( dataset=validation_dataset, batch_size=config.VALIDATION_BATCH_SIZE, shuffle=False, num_workers=4) device = torch.device('cuda') model = model.BERTsentiment() model.to(device) # move model to cuda device # params to optimize param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.00 }] number_of_training_steps = int( len(df_train) / config.TRAINING_BATCH_SIZE * config.EPOCHS) #AdamW focuses on regularization and model does better on generalization optimizer = AdamW(params=param_optimizer, lr=3e-5) scheduler = get_linear_schedule_with_warmup( optimizer=optimizer, num_warmup_steps=0, num_training_steps=number_of_training_steps, ) best_accuracy = [] for epoch in range(config.EPOCHS): print('EPOCH:', epoch + 1) engine.training_loop(training_data_loader, model, optimizer, scheduler, device) outputs, sentiments = engine.validation_loop(validation_data_loader, model, device) # distribution is 50 50 so we can use acc score outputs = np.array(outputs) >= 0.5 # positive class accuracy = metrics.accuracy_score(sentiments, outputs) print('ACCURACY SCORE', {accuracy}) if accuracy > best_accuracy: torch.save(model.state_dict(), config.MODEL_PATH) # save model in working dir best_accuracy = accuracy
def main(): """ Training and validation. """ global epochs_since_improvement, start_epoch, label_map, best_loss, epoch, checkpoint # Initialize model or load checkpoint if checkpoint is None: model = SSD300(n_classes=n_classes) # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo biases = list() not_biases = list() for param_name, param in model.named_parameters(): if param.requires_grad: if param_name.endswith('.bias'): biases.append(param) else: not_biases.append(param) optimizer = torch.optim.SGD(params=[{ 'params': biases, 'lr': 2 * lr }, { 'params': not_biases }], lr=lr, momentum=momentum, weight_decay=weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_loss = checkpoint['best_loss'] print( '\nLoaded checkpoint from epoch %d. Best loss so far is %.3f.\n' % (start_epoch, best_loss)) model = checkpoint['model'] optimizer = checkpoint['optimizer'] # Move to default device model = model.to(device) criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy).to(device) # Custom dataloaders train_dataset = PascalVOCDataset(data_folder, split='train') val_dataset = PascalVOCDataset(data_folder, split='test') train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, collate_fn=train_dataset.collate_fn, num_workers=workers, pin_memory=True) # note that we're passing the collate function here val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, collate_fn=val_dataset.collate_fn, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Paper describes decaying the learning rate at the 80000th, 100000th, 120000th 'iteration', i.e. model update or batch # The paper uses a batch size of 32, which means there were about 517 iterations in an epoch # Therefore, to find the epochs to decay at, you could do, # if epoch in {80000 // 517, 100000 // 517, 120000 // 517}: # adjust_learning_rate(optimizer, 0.1) # In practice, I just decayed the learning rate when loss stopped improving for long periods, # and I would resume from the last best checkpoint with the new learning rate, # since there's no point in resuming at the most recent and significantly worse checkpoint. # So, when you're ready to decay the learning rate, just set checkpoint = 'BEST_checkpoint_ssd300.pth.tar' above # and have adjust_learning_rate(optimizer, 0.1) BEFORE this 'for' loop # One epoch's training train(train_loader=train_loader, model=model, criterion=criterion, optimizer=optimizer, epoch=epoch) # One epoch's validation val_loss = validate(val_loader=val_loader, model=model, criterion=criterion) # Did validation loss improve? is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, optimizer, val_loss, best_loss, is_best)