def predict(captcha, model_dir='./model/model-latest.pkl', use_gpu=True, mode='captcha'): """ :param captcha: :param model_dir: :param use_gpu: :param mode: :return: """ gpu_available = torch.cuda.is_available() if mode == 'captcha': from model import CaptchaModel elif mode == 'kaptcha': from kaptcha_model import CaptchaModel else: return model = CaptchaModel() if use_gpu and gpu_available: model_state = torch.load(model_dir) else: model_state = torch.load(model_dir, map_location=lambda storage, loc: storage) model.load_state_dict(model_state['network']) if use_gpu and gpu_available: model = model.cuda() else: model = model.cpu() transformer = Compose(ToTensor()) img_pil = Image.open(captcha) img_tensor = transformer.transforms(img_pil) model.eval() x = torch.stack([img_tensor]) if use_gpu and gpu_available: x = x.cuda() pred1, pred2, pred3, pred4 = model(x) pred_seq = [ torch.argmax(pred1).item(), torch.argmax(pred2).item(), torch.argmax(pred3).item(), torch.argmax(pred4).item() ] pred_seq = [item + 1 for item in pred_seq] _, id2label = get_dict() res = ''.join([id2label[i] for i in pred_seq]) return res
def run_training(): # Create pathlib.Path for the data data_path = Path(config.data_dir) image_files = list(data_path.glob("*.png")) targets = [] targets_orig = [] targets_unique = set() # Loop through each file and create target list for file in data_path.iterdir(): targets_orig.append(file.stem) # append the filename targets.append(list(file.stem)) # append the list of chars targets_unique.update(list(file.stem)) # keep track of unique chars msg = "Number of target data-points: {}, \nUnique chars: {} \n" print(msg.format(len(targets), sorted(targets_unique))) # Label encode le = preprocessing.LabelEncoder() le.fit(sorted(targets_unique)) targets_encoded = [le.transform(x) for x in targets] targets_encoded = np.array(targets_encoded) + 1 # adding 1 because 0 represents "unkwown" msg = "Encoded targets: \n{}" print(msg.format(targets_encoded)) # Split the dataset train_images, test_images, train_targets, test_targets, train_orig_targets, test_orig_targets = \ model_selection.train_test_split( image_files, targets_encoded, targets_orig, test_size=0.1, random_state=42 ) train_dataset = dataset.ClassificationDataset(image_paths=train_images, targets=train_targets, resize=(config.image_height, config.image_width)) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.batch_size, num_workers=config.num_workers, shuffle=True ) test_dataset = dataset.ClassificationDataset(image_paths=test_images, targets=test_targets, resize=(config.image_height, config.image_width)) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.batch_size, num_workers=config.num_workers, shuffle=False ) # Create instance of the model and assign to gpu model = CaptchaModel(num_chars=len(le.classes_)) # model.to(config.device) model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=0.8, patience=5, verbose=True ) prev_val_loss = sys.maxsize for epoch in range(config.epochs): # Train the model over all train batches train_loss = train(model, train_loader, optimizer) # Test the model over test batches val_preds, val_loss = eval(model, test_loader) # Print out the actual label and predicted labels # Loop through and pass each batch to the decode function val_preds_tmp = [] for vp in val_preds: vp = decode_predictions(vp, le) val_preds_tmp.extend(vp) val_preds = val_preds_tmp # Print out the first 5 predictions for the test set each epoch print(f"Epoch: {epoch+1}, Train loss: {train_loss}, Val loss: {val_loss}") pprint(list(zip(test_orig_targets, val_preds))[:5]) # Save the model if val_loss decreased if val_loss <= prev_val_loss: print(f"Val loss decreased from {prev_val_loss} to {val_loss}. Saving model.") torch.save(model.state_dict(), Path(config.output_dir)/'captcha_model.pkl') prev_val_loss = val_loss print("\n\n")
def eval(model_dir, data_dir, batch_size=64, log_dir='./logs', use_gpu=True, mode='captcha'): """ :param model_dir: :param data_dir: :param batch_size: :param log_dir: :param use_gpu: :param mode: :return: """ x_test, y_test = get_data_split(data_dir, modes=['test']) if mode == 'captcha': from model import CaptchaModel elif mode == 'kaptcha': from kaptcha_model import CaptchaModel model = CaptchaModel() gpu_available = torch.cuda.is_available() if use_gpu and gpu_available: model = model.cuda() model_state = torch.load(model_dir) else: model_state = torch.load(model_dir, map_location=lambda storage, loc: storage) model.load_state_dict(model_state['network']) test_ds = CaptchaLoader((x_test, y_test), shuffle=True) test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=True) model.eval() acc_history = [] with tqdm(total=int(np.ceil(len(test_loader.dataset) / batch_size)), desc='Eval') as eval_bar: for _, (x, y) in enumerate(test_loader): x = torch.tensor(x, requires_grad=False) y = torch.tensor(y, requires_grad=False) if use_gpu and gpu_available: x = x.cuda() y = y.cuda() pred1, pred2, pred3, pred4 = model(x) acc_mean = np.mean([ acc(pred1, y[:, 0]), acc(pred2, y[:, 1]), acc(pred3, y[:, 2]), acc(pred4, y[:, 3]) ]) pred = torch.stack((pred1, pred2, pred3, pred4), dim=-1) multi_acc_mean = multi_acc(torch.argmax(pred, dim=1), y) acc_history.append([acc_mean.item(), multi_acc_mean]) eval_bar.update() eval_bar.set_postfix(acc=acc_mean, multi_acc=multi_acc_mean) if not os.path.exists(log_dir): os.mkdir(log_dir) with open(os.path.join(log_dir, 'eval.json'), mode=r'w') as out_fp: json.dump(acc_history, out_fp)