def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_path = "captcha-breaker-v%d.pth" % CaptchaNN.version() data_path = "./fetched" getImageUrl = "http://captcha.qq.com/getimage" downloadNum = 500 net = CaptchaNN() net = net.to(device) net.load_state_dict(torch.load(model_path)) net.eval() transform = CaptchaDataset.get_transform(224, 224) for i in range(0, downloadNum): file = "!unclassified.jpg" url = getImageUrl localPath = os.path.join(data_path, file) urllib.request.urlretrieve(url, localPath) pilImg = Image.open(localPath) img = transform(pilImg) img = CaptchaDataset.to_var(img) X = img.to(device) pred = CaptchaDataset.decode_label(net.predict(X)) os.rename(localPath, os.path.join(data_path, pred + ".jpg")) print("Downloaded and recognized as ", pred) pass
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_path = "captcha-breaker-v%d.pth" % CaptchaNN.version() data_path = "./datav2/test" net = CaptchaNN() net = net.to(device) net.load_state_dict(torch.load(model_path)) net.eval() #transform = dataset.CaptchaDataset.get_transform(224, 224) train_dataset = CaptchaDataset(data_path, 224, 224) trainIter = DataLoader(train_dataset, batch_size=1, num_workers=0, shuffle=True, drop_last=False) rightNum = 0 for i, (X, label) in enumerate(trainIter): X = X.to(device) label = label.to(device) label = label.long() label1 = label[:, 0] label2 = label[:, 1] label3 = label[:, 2] label4 = label[:, 3] y1, y2, y3, y4 = net(X) _, y1_pred = torch.max(y1.data, dim=1) _, y2_pred = torch.max(y2.data, dim=1) _, y3_pred = torch.max(y3.data, dim=1) _, y4_pred = torch.max(y4.data, dim=1) print( CaptchaDataset.decode_label((label1, label2, label3, label4)), CaptchaDataset.decode_label((y1_pred.item(), y2_pred.item(), y3_pred.item(), y4_pred.item()))) if label1 == y1_pred.item() and label2 == y2_pred.item( ) and label3 == y3_pred.item() and label4 == y4_pred.item(): rightNum += 1 print("RIGHT: %d/%d %f" % (rightNum, i, rightNum / i))
def train(model: Model, init_epoch): checkpoint_path = checkpoint_dir + '/latest.ckpt' cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_best_only=True, verbose=1, save_weights_only=True, save_freq='epoch') model.fit(CaptchaDataset("samples/train", batch_size=batch_size), validation_data=CaptchaDataset("samples/test", batch_size=batch_size), epochs=init_epoch + 100, callbacks=[cp_callback], initial_epoch=init_epoch, verbose=2) return model
def display_batch(X_batch, y_batch): ''' This function displays on a window multiple images with their corresponding labels :param X_batch: Must be the batch image samples :param y_batch: Must be the batch categorical labels Returns the figure and the axis of the graph ''' from dataset import CaptchaDataset dataset = CaptchaDataset() texts = dataset.labels_to_text(y_batch.argmax(axis=2)) n = X_batch.shape[0] # Number of column subplots per row cols = ceil(sqrt(n)) # Number of rows rows = n // cols if n % cols > 0: rows += 1 # Create rows x cols subplots fig, ax = plt.subplots(rows, cols, figsize=(8, 8)) for i in range(0, rows): for j in range(0, cols): if i < rows - 1 or n % cols == 0 or j < n % cols: index = i * cols + j plt.sca(ax[i, j]) plt.imshow(X_batch[index, :, :, 0] * 255, cmap='gray') plt.xticks([]) plt.yticks([]) title = b''.join(texts[index]).decode() plt.title(title) else: ax[i, j].set_visible(False) plt.tight_layout() plt.show() return fig, ax
def __iter__(self): text_size = CaptchaDataset().text_size it = super().__iter__() while True: X_batch, y_batch = next(it) chars = find_chars(X_batch[0, :, :, 0], char_size=IMAGE_SIZE, num_chars=text_size) for k in range(0, text_size): yield chars[k], y_batch[0, k, :]
def __init__(self): dataset = CaptchaDataset() num_samples, text_size = dataset.num_samples, dataset.text_size extra_samples = num_samples while (num_samples + extra_samples) * text_size < NUM_SAMPLES: extra_samples += num_samples super().__init__(dataset.X, dataset.y, batch_size=1, shuffle=True, generate_samples=extra_samples)
def __init__(self, char_size=(40, 40)): dataset = CaptchaDataset() num_classes = dataset.num_char_classes # The next lines defines the layers of the CNN t_in = Input(shape=char_size + (1, ), dtype=np.float32) x = t_in x = Conv2D(32, kernel_size=(5, 5), kernel_initializer='he_normal', padding='same')(x) x = MaxPool2D((2, 2))(x) x = Conv2D(64, kernel_size=(3, 3), kernel_initializer='he_normal', activation='relu', padding='same')(x) x = MaxPool2D((2, 2))(x) x = Conv2D(32, kernel_size=(3, 3), kernel_initializer='he_normal', activation='relu', padding='same')(x) x = MaxPool2D((2, 2))(x) x = Flatten()(x) x = Dense(64, activation='relu')(x) x = Dense(num_classes, activation='softmax')(x) t_out = x # Initialize super instance (a keras model) super().__init__([t_in], [t_out]) # Compile the model self.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
def predict(self, X): ''' Use this model to predict the texts inside the images specified :param X: Must be a 4D array of size: n.samples x img.height x img.width x 1 :return A 3D array 'y' of size n.samples x text-size x alphabet-size Where the element y[i, j, k] indicates the probability of the jth character on the ith image to be the label k ''' dataset = CaptchaDataset() num_classes, text_size = dataset.num_char_classes, dataset.text_size char_size = self.char_classifier.layers[0].input_shape[1:3] y = np.zeros([X.shape[0], text_size, num_classes]).astype(np.float32) for i in range(0, X.shape[0]): chars = find_chars(X[i, :, :, 0], char_size, num_chars=text_size).reshape((-1, ) + char_size + (1, )) y[i] = self.char_classifier.predict(chars) return y
normalized = True if normalized: from dataset import mean, std else: mean = [0. for _ in range(3)] std = [1. for _ in range(3)] def collate_fn(batch): imgs = torch.stack([x[0] for x in batch], dim=0) labels = [x[1] for x in batch] return imgs, labels training_dataset = CaptchaDataset(os.path.join(root_dir, 'train'), mean=mean, std=std) testing_dataset = CaptchaDataset(os.path.join(root_dir, 'test'), mean=mean, std=std) print('data has been loaded.') traing_bsz = 64 testing_bsz = 64 num_workers = 4 training_loader = DataLoader(training_dataset, batch_size=traing_bsz, shuffle=True, collate_fn=collate_fn,
axis=0) if __name__ == '__main__': # Unitary test import matplotlib.pyplot as plt from utils import waitKey def wait(): if waitKey() == 'q': raise KeyboardInterrupt() try: while True: # Get 1 sample from the captcha dataset dataset = CaptchaDataset() input = iter(InputFlow(dataset.X, dataset.y, batch_size=1)) X_batch, y_batch = next(input) # Find characters in the image frames = find_chars(X_batch[0, :, :, 0], char_size=(40, 40), num_chars=dataset.text_size) # Show all the extracted characters n = len(frames) fig, ax = plt.subplots(1, n, figsize=(10, 2)) for i in range(0, n): plt.sca(ax[i])
def predict_text(self, X): ''' Its the same as predict_abels() but this returns directly a char values instead of integer labels :return A 2D array of size n.samples x text-size of char values ''' return CaptchaDataset().labels_to_text(self.predict_labels(X)) if __name__ == '__main__': from input import InputFlow import matplotlib.pyplot as plt from metrics import summary # Get dataset images dataset = CaptchaDataset() X, y = dataset.X, dataset.y # Build the model model = OCRModel() # The next lines will show a bunch of captcha images & the predictions made by # the model indices = np.random.choice(np.arange(0, dataset.num_samples), size=9) X_batch, y_batch = next(iter(InputFlow(X, y, batch_size=9))) # Predict texts inside images texts = [ ''.join([char.item().decode() for char in text]) for text in dataset.labels_to_text(y_batch.argmax(axis=2))
from warpctc_pytorch import CTCLoss from model import CRNNCTC from dataset import CaptchaDataset from utils import get_accuracy, AverageMeter, collate_fn, train_result, test_result from tqdm import tqdm log_file = open("log_cmd.txt", "w") normalized = True if normalized: from dataset import mean, std else: mean = [0. for _ in range(3)] std = [1. for _ in range(3)] training_dataset = CaptchaDataset('train', mean=mean, std=std) testing_dataset = CaptchaDataset('test', mean=mean, std=std) print('data has been loaded.') training_loader = DataLoader(training_dataset, batch_size=Config.batch_size, shuffle=True, collate_fn=collate_fn, pin_memory=True) testing_loader = DataLoader(testing_dataset, batch_size=Config.batch_size, shuffle=False, collate_fn=collate_fn, pin_memory=True)
plt.subplots_adjust(top=0.85) elif eval: # Load previously computed weights if train is off and eval enabled model.load_weights() if eval: if verbose: print('Testing the model...') # Evaluate the model on test y_test_pred = model.predict(X_test, verbose=verbose) # Show accuracy score y_test_labels = y_test.argmax(axis=1) y_test_labels_pred = y_test_pred.argmax(axis=1) print('Accuracy on test set: {}'.format( np.round(accuracy_score(y_test_labels, y_test_labels_pred), 4))) # Show evaluation confusion matrx plt.figure(figsize=(6, 5)) alphabet = CaptchaDataset().alphabet sns.heatmap(confusion_matrix(y_test_labels, y_test_labels_pred), annot=True, fmt='d', xticklabels=alphabet, yticklabels=alphabet) plt.title('Confusion matrix of eval predictions') plt.show()
def predict_text(self, X): ''' Its the same as predict_abels() but this returns directly a char values instead of integer labels :return A 2D array of size n.samples x text-size of char values ''' return CaptchaDataset().labels_to_text(self.predict_labels(X))
transforms.RandomGrayscale(0.3), transforms.ColorJitter(0.1, 0.1, 0.1), #transforms.RandomAffine((-2,2), (0.1, 0.1), (0.9, 1.) ), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) test_transform = transforms.Compose([ #transforms.Resize((165, 75)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_dataset = CaptchaDataset(root_dir=os.path.join(args.data, 'train'), transform=train_transform) test_dataset = CaptchaDataset(root_dir=os.path.join(args.data, 'test'), transform=test_transform) model = models.DeepCNN(length=train_dataset.label_length, n_classes=len(train_dataset.alphabet)) if args.model: print('Loading model') model.load_state_dict(torch.load(args.model)) model.to(device) train_loader = DataLoader(train_dataset, batch_size=args.batch_size,
char_size=IMAGE_SIZE, num_chars=text_size) for k in range(0, text_size): yield chars[k], y_batch[0, k, :] if __name__ == '__main__': import matplotlib.pyplot as plt import pandas as pd # Generate char samples generator = iter(CharImageGenerator()) X = np.zeros([NUM_SAMPLES] + list(IMAGE_SIZE) + [1]).astype(np.float32) y = np.zeros([NUM_SAMPLES, CaptchaDataset().num_char_classes]).astype(np.uint8) epochs_per_tick = 10 print() for k in range(0, NUM_SAMPLES): if k % epochs_per_tick == 0: print('{}/{}, {}%'.format(k, NUM_SAMPLES, floor(k / NUM_SAMPLES * 100)).rjust(18), end='\r') X[k, :, :, 0], y[k, :] = next(generator) print('', end='\r') print() print('Done') # Print info df = pd.DataFrame.from_dict({
parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') args = parser.parse_args() device = torch.device("cpu" if args.no_cuda else "cuda") data_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) dataset = CaptchaDataset(root_dir=args.data, transform=data_transform) model = models.DeepCNN(length=dataset.label_length, n_classes=len(dataset.alphabet)) model.load_state_dict(torch.load(args.model)) model.to(device) data_loader = DataLoader(dataset, batch_size=1, shuffle=False) result = [] total_pred, total_given = [], [] for i, (images, labels) in enumerate(data_loader): t0 = time.time()