def GenerateTrainingData(cls, captchas_dir, training_data_dir, max_size=500, max_captcha_length=8): image_shape = _GetShapeOfImagesUnderDir(captchas_dir) training_data_shape = tuple( [max_size] + list(ImagePreprocessor.GetProcessedImageShape(image_shape))) training_image_data = numpy.zeros(training_data_shape, dtype=numpy.float32) training_labels = numpy.zeros((max_size, max_captcha_length), dtype=numpy.int32) i = 0 for captcha_filepath in utils.GetFilePathsUnderDir(captchas_dir): try: image_data = ImagePreprocessor.GetImageData(captcha_filepath) except Exception as e: print e, captcha_filepath continue i += 1 index = i % max_size training_image_data[index] = ImagePreprocessor.ProcessImage( image_data) captcha_ids = _GetCaptchaIdsFromImageFilename(captcha_filepath) training_labels[index, :] = numpy.zeros(max_captcha_length, dtype=numpy.int32) training_labels[index, :captcha_ids.shape[0]] = captcha_ids if i != 0 and (i % 1000) == 0: print 'Generated {0} examples.'.format(i) if i != 0 and i % max_size == 0: print i file_path = os.path.join( training_data_dir, "training_images_{0}.npy".format(i / max_size)) try: cls.Save(file_path, training_image_data, training_labels) except Exception as e: print e
def _GetShapeOfImagesUnderDir(captchas_dir): for captcha_filepath in utils.GetFilePathsUnderDir(captchas_dir): image_data = ImagePreprocessor.GetImageData(captcha_filepath) return image_data.shape return None