Beispiel #1
0
    def __init__(self):
        # create models and load weights
        self.det_info = {
            'input_shape': (512, 1024, 3),
            'channel_mean':
            np.array([123, 117, 104]),
            'weight_path':
            '/home/eugene/storystream/ai-p-fonts/log/seglink/seglink_epoch-29_loss-0.91_val_loss-0.36.h5'
        }
        self.reg_info = {
            'input_shape': (32, 480, 1),
            'weight_path':
            '/home/eugene/storystream/ai-p-fonts/log/crnn/gru/CRNN_epoch-29_loss-0.00_val_loss-0.00.h5'
        }

        self.det = SegLink(input_shape=self.det_info['input_shape'])
        self.reg = CRNN(input_shape=self.reg_info['input_shape'])

        self.det.create_model(self.det_info['weight_path'])
        self.reg.create_model(self.reg_info['weight_path'],
                              gru=True,
                              train=False)
        self.lexicons = [
            'gt2', 'gt2rs', 'targa', 'turbo', '91150', 'carrera 4',
            'carrera 4 gts', 'carrera 4s', 'carrera gts', 'carrera s',
            'targa 4', 'targa 4 gts', 'targa 4s', 'turbo s', '911 turbo',
            '911 turbo s', '911 carrera 4', '911 carrera 4 gts',
            '911 carrera 4s', '911 carrera gts', '911 carrera s',
            '911 targa 4', '911 targa 4 gts', '911 targa 4s'
        ]
Beispiel #2
0
    def __init__(self, batch_size=None):
        net_params, train_params = parser_cfg_file('./net.cfg')
        self._model_save_path = str(train_params['model_save_path'])
        self.input_img_height = int(net_params['input_height'])
        self.input_img_width = int(net_params['input_width'])
        if batch_size is None:
            self.test_batch_size = int(net_params['test_batch_size'])
        else:
            self.test_batch_size = batch_size

        # 加载label onehot
        f = open('./data/word_onehot.txt', 'r')
        data = f.read()
        words_onehot_dict = eval(data)
        self.words_list = list(words_onehot_dict.keys())
        self.words_onehot_list = [words_onehot_dict[self.words_list[i]] for i in range(len(self.words_list))]

        # 构建网络
        self.inputs_tensor = tf.placeholder(tf.float32, [self.test_batch_size, self.input_img_height, self.input_img_width, 1])
        self.seq_len_tensor = tf.placeholder(tf.int32, [None], name='seq_len')

        crnn_net = CRNN(net_params, self.inputs_tensor, self.seq_len_tensor, self.test_batch_size, True)
        net_output, decoded, self.max_char_count = crnn_net.construct_graph()
        self.dense_decoded = tf.sparse_tensor_to_dense(decoded[0], default_value=-1)

        self.sess = tf.Session()
        saver = tf.train.Saver()
        saver.restore(self.sess, "./model/ckpt")
Beispiel #3
0
def main():
    """
        Entry point when using CRNN from the commandline
    """

    args = parse_arguments()

    crnn = None

    if crnn is None:
        crnn = CRNN(
            args.iteration_count,
            args.batch_size,
            args.model_path,
            args.examples_path,
            args.max_image_width,
            0,  #train/test ratio   here train rate is 0
            args.restore,
            1)

    predict_result = crnn.test()
    f = open(args.output_path, 'w')
    for str in predict_result:
        str1 = str.split(':')[0]
        str2 = str.split(':')[1]
        str2 = str2.strip('_')
        f.writelines(str1 + ':' + str2)
    f.close()
Beispiel #4
0
def load_model(abc, seq_proj=[0, 0], backend='resnet18', snapshot=None, cuda=False):
    net = CRNN(abc=abc, seq_proj=seq_proj, backend=backend)
    net = nn.DataParallel(net)
    if snapshot is not None:
        load_weights(net, torch.load(snapshot))
    if cuda:
        net = net.cuda()
    return net
Beispiel #5
0
def load_model_from_checkpoint(checkpoint_file_name, use_gpu=False):
    """Load a pretrained CRNN model."""
    model = CRNN(line_size, 1, len(vocab), 256)
    checkpoint = torch.load(checkpoint_file_name,
                            map_location='cpu' if not use_gpu else None)
    model.load_state_dict(checkpoint['state_dict'])
    model.float()
    model.eval()
    model = model.cuda() if use_gpu else model.cpu()
    return model
Beispiel #6
0
def main():
    """
        Entry point when using CRNN from the commandline
    """

    args = parse_arguments()

    if not args.train and not args.test:
        print("If we are not training, and not testing, what is the point?")

    crnn = None

    if args.train:
        crnn = CRNN(args.iteration_count, args.batch_size, args.model_path,
                    args.examples_path, args.max_image_width,
                    args.train_test_ratio, args.restore, 0)

        crnn.train(args.iteration_count)

    if args.test:
        if crnn is None:
            crnn = CRNN(args.iteration_count, args.batch_size, args.model_path,
                        args.examples_path, args.max_image_width, 0,
                        args.restore, 1)

        crnn.test()
Beispiel #7
0
def main():
    args = parse_arguments()

    if not args.train and not args.test:
        print("If we are not training,and not testing,what is the point?")

    crnn = None

    if args.train:
        crnn = CRNN(
            args.batch_size,
            args.model_path,
            args.example_path,
            args.max_image_width,
            args.train_test_ratio,
            args.restore
        )
        crnn.train(args.iteration_count)

    if args.test:
        if crnn is None:
            crnn = CRNN(
                args.batch_size,
                args.model_path,
                args.examples_path,
                args.max_image_width,
                0,
                args.restore
            )
        crnn.test()
Beispiel #8
0
def main():
    """
        Entry point when using CRNN from the commandline
    """

    args = parse_arguments()

    if not args.train and not args.test:
        print("If we are not training, and not testing, what is the point?")

    crnn = None

    charset = ""
    if os.path.isfile(args.char_set_string):
        # if charset is file read from file.
        with open(args.char_set_string, "r") as f:
            while True:
                c = f.readline()
                charset += c.strip("\n")
                if not c:
                    charset += "\n"  # Add line break to charset at the end
                    break
    else:
        charset = args.char_set_string

    if args.train:
        crnn = CRNN(
            args.batch_size,
            args.model_path,
            args.examples_path,
            args.max_image_width,
            args.train_test_ratio,
            args.restore,
            charset,
            args.use_trdg,
            args.language,
            args.learning_rate
        )

        crnn.train(args.iteration_count)

    if args.test:
        if crnn is None:
            crnn = CRNN(
                args.batch_size,
                args.model_path,
                args.examples_path,
                args.max_image_width,
                0,
                args.restore,
                charset,
                args.use_trdg,
                args.language,
                args.learning_rate
            )

        crnn.test()
def eval(path="checkpoint3.pt"):
    net = CRNN(nclass=100).double()
    optimizer = optim.Adam(net.parameters())

    checkpoint = torch.load(path)
    net.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    epoch = checkpoint["epoch"]
    loss = checkpoint["loss"]
    print(f"model current epoch: {epoch} with loss: {loss}")

    net.eval()

    while 1:
        data = next(dataset)
        images = data["the_inputs"]
        labels = data["the_labels"]
        input_length = data["input_length"]
        label_length = data["label_length"]

        preds = net(images).detach()
        pred_texts, probs = decode_batch2(preds, string.printable)
        for i in range(len(pred_texts)):
            print(pred_texts[i], probs[i])
            print(images[i].size())
Beispiel #10
0
 def __init__(self, model_path):
     alphabet_unicode = config.alphabet_v2
     self.alphabet = ''.join([chr(uni) for uni in alphabet_unicode])
     # print(len(self.alphabet))
     self.nclass = len(self.alphabet) + 1
     self.model = CRNN(config.imgH, 1, self.nclass, 256)
     self.cuda = False
     if torch.cuda.is_available():
         self.cuda = True
         self.model.cuda()
         self.model.load_state_dict({
             k.replace('module.', ''): v
             for k, v in torch.load(model_path).items()
         })
     else:
         # self.model = nn.DataParallel(self.model)
         self.model.load_state_dict(
             torch.load(model_path, map_location='cpu'))
     self.model.eval()
     self.converter = strLabelConverter(self.alphabet)
Beispiel #11
0
def ocr(orig_img, lines, checkpoint_file_name, use_gpu=False):
    """OCR on segmented lines."""
    model = CRNN(line_size, 1, len(vocab), 256)
    checkpoint = torch.load(checkpoint_file_name,
                            map_location='cpu' if not use_gpu else None)
    model.load_state_dict(checkpoint['state_dict'])
    model.float()
    model.eval()
    model = model.cuda() if use_gpu else model.cpu()
    torch.set_grad_enabled(False)

    result = []
    for line in lines:
        (x1, y1), (x2, y2) = line
        line_img = image_resize(np.array(np.rot90(orig_img[y1:y2, x1:x2])),
                                height=line_size)

        inputs = torch.from_numpy(line_img /
                                  255).float().unsqueeze(0).unsqueeze(0)
        outputs = model(inputs)
        prediction = outputs.softmax(2).max(2)[1]

        def to_text(tensor, max_length=None, remove_repetitions=False):
            sentence = ''
            sequence = tensor.cpu().detach().numpy()
            for i in range(len(sequence)):
                if max_length is not None and i >= max_length:
                    continue
                char = idx2char[sequence[i]]
                if char != 'B':  # ignore blank
                    if remove_repetitions and i != 0 and char == idx2char[
                            sequence[i - 1]]:
                        pass
                    else:
                        sentence = sentence + char
            return sentence

        predicted_text = to_text(prediction[:, 0], remove_repetitions=True)
        result.append((line_img, predicted_text))

    return result
    def __init__(self, pre_train=False):
        net_params, train_params = parser_cfg_file('./net.cfg')

        self.input_height = int(net_params['input_height'])
        self.input_width = int(net_params['input_width'])
        self.batch_size = int(train_params['batch_size'])
        self._learning_rate = float(train_params['learning_rate'])
        self._max_iterators = int(train_params['max_iterators'])
        self._train_logger_init()
        self._pre_train = pre_train
        self._model_save_path = str(train_params['model_save_path'])

        if self._pre_train:
            ckpt = tf.train.checkpoint_exists(self._model_save_path)
            if ckpt:
                print('Checkpoint is valid...')
                f = open('./model/train_step.txt', 'r')
                step = f.readline()
                self._start_step = int(step)
                f.close()
            else:
                assert 0, print('Checkpoint is invalid...')
        else:
            self._start_step = 0

        self._inputs = tf.placeholder(
            tf.float32, [self.batch_size, 32, self.input_width, 1])

        # label
        self._label = tf.sparse_placeholder(tf.int32, name='label')

        # The length of the sequence [32] * 64
        self._seq_len = tf.placeholder(tf.int32, [None], name='seq_len')

        crnn_net = CRNN(net_params, self._inputs, self._seq_len,
                        self.batch_size, True)
        self._net_output, self._decoded, self._max_char_count = crnn_net.construct_graph(
        )
        self.dense_decoded = tf.sparse_tensor_to_dense(self._decoded[0],
                                                       default_value=-1)
Beispiel #13
0
def recognition2(examples_path, output_path):
    """
        Entry point when using CRNN from the commandline
    """
    crnn = None
    if crnn is None:
        crnn = CRNN(
            10,
            1,
            "./save/",
            examples_path,
            230,
            0,  #train/test ratio   here train rate is 0
            True,
            1)

    predict_result = crnn.test()
    f = open(output_path, 'w')
    for str in predict_result:
        str1 = str.split(':')[0]
        str2 = str.split(':')[1]
        str2 = str2.strip('_')
        f.writelines(str1 + ':' + str2)
    f.close()
Beispiel #14
0
class PytorchOcr():
    def __init__(self, model_path):
        alphabet_unicode = config.alphabet_v2
        self.alphabet = ''.join([chr(uni) for uni in alphabet_unicode])
        # print(len(self.alphabet))
        self.nclass = len(self.alphabet) + 1
        self.model = CRNN(config.imgH, 1, self.nclass, 256)
        self.cuda = False
        if torch.cuda.is_available():
            self.cuda = True
            self.model.cuda()
            self.model.load_state_dict({
                k.replace('module.', ''): v
                for k, v in torch.load(model_path).items()
            })
        else:
            # self.model = nn.DataParallel(self.model)
            self.model.load_state_dict(
                torch.load(model_path, map_location='cpu'))
        self.model.eval()
        self.converter = strLabelConverter(self.alphabet)

    def recognize(self, img):
        h, w = img.shape[:2]
        if len(img.shape) == 3:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        image = Image.fromarray(img)
        transformer = resizeNormalize((int(w / h * 32), 32))
        image = transformer(image)
        image = image.view(1, *image.size())
        image = Variable(image)

        if self.cuda:
            image = image.cuda()

        preds = self.model(image)

        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)

        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        txt = self.converter.decode(preds.data, preds_size.data, raw=False)

        return txt
Beispiel #15
0
def main(epoch_num, lr=0.1, training=True, fix_width=True):
    """
    Main

    Args:
        training (bool, optional): If True, train the model, otherwise test it (default: True)
        fix_width (bool, optional): Scale images to fixed size (default: True)
    """

    model_path = ('fix_width_' if fix_width else '') + 'crnn.pth'
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
    root = 'data/IIIT5K/'
    if training:
        net = CRNN(1, len(letters) + 1)
        start_epoch = 0
        # if there is pre-trained model, load it
        if os.path.exists(model_path):
            print('Pre-trained model detected.\nLoading model...')
            net.load_state_dict(torch.load(model_path))
        if torch.cuda.is_available():
            print('GPU detected.')
        net = train(root,
                    start_epoch,
                    epoch_num,
                    letters,
                    net=net,
                    lr=lr,
                    fix_width=fix_width)
        # save the trained model for training again
        torch.save(net.state_dict(), model_path)
        # test
        test(root, net, letters, fix_width=fix_width)
    else:
        net = CRNN(1, len(letters) + 1)
        if os.path.exists(model_path):
            net.load_state_dict(torch.load(model_path))
        test(root, net, letters, fix_width=fix_width)
Beispiel #16
0
def main():
    """ Main Function. """
    print(__doc__)
    # optical flow parameters
    opt_params = {
        'pyr_scale': 0.5,
        'levels': 3,
        'winsize': 15,
        'iterations': 3,
        'poly_n': 5,
        'poly_sigma': 1.2
    }
    # create optical flow object
    opt = OpticalFlow(**opt_params)
    # video dataset parameters
    labels_path = '../labels_gary.txt'
    width = 100
    height = 100
    processor = opt
    # create video data object
    vids = VideoDataset(labels_path)
    vids.set_video_params(width, height, processor)
    # read video paths and labels
    X, y = vids.read_data()
    # partition dataset
    X_tr, y_tr, X_te, y_te = vids.partition_data(X, y, ratio=0.8)
    X_tr = X_tr[:10].copy()
    y_tr = y_tr[:10].copy()
    X_te = X_te[:10].copy()
    y_te = y_te[:10].copy()
    print(X_tr)
    print(y_tr)
    # create CRNN model
    crnn = CRNN()
    print(crnn)
    # train model
    tr = Trainer(crnn, vids)
    tr.train(X_tr, y_tr, X_te, y_te, epochs=2, batch_size=10)
    def __init__(self, config=TextRecognitionModelConfig()):
        super().__init__()

        self.config = config

        self.cnn = ResNet()

        if self.config.with_STN:
            config_stn = TransformationConfig()
            # config_stn = TransformationConfig(self.cnn)
            # config_stn.outputsize = 256*2*16

            self.stn = Transformation(config_stn)

        self.encoder = CRNN(self.cnn)

        self.decoder = DecoderWithAttention(
            num_classes=config.num_classes,
            in_planes=self.encoder.out_planes,
            sDim=config.decoder_s_dim,
            attDim=config.attention_dim,
            max_len_labels=config.max_len_labels,
            use_bidecoder=config.use_bidecoder,
            device=config.device,
        )

        for name, param in self.named_parameters():
            if 'fc2' in name:
                print(f'Skip {name} as it is already initialized')
                continue
            if 'bias' in name:
                nn.init.constant_(param, 0.0)
            if 'weight' in name:
                if len(param.shape) >= 2:
                    nn.init.kaiming_normal_(param)
                else:
                    param.data.fill_(1)
Beispiel #18
0
def infer(files, save_static_path=None):
    result_list = []
    place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
    print('train with {}'.format(place))
    with fluid.dygraph.guard(place):
        params, _ = fluid.load_dygraph('{}/crnn_best'.format('output/baidu_model'))#train_parameters['save_model_dir']))
        # crnn = CRNN(train_parameters["class_dim"] + 1, 1)
        crnn = CRNN(3828, 1)
        crnn.load_dict(params)
        crnn.eval()
        for file in tqdm(files):
            img = precess_img(file)
            img = fluid.dygraph.to_variable(img).astype('float32')
            if save_static_path is not None:
                out_dygraph, static_layer = TracedLayer.trace(crnn, inputs=[img])
                # 将转换后的模型保存
                static_layer.save_inference_model(save_static_path, feed=[0], fetch=[0])
            pred = crnn(img)
            output = utils.greedy_decode(pred.numpy(), blank=train_parameters["class_dim"])
            p_s = "".join([train_parameters['r_label_dict'][c] for c in output[0]])
            result_list.append('{0}\t{1}'.format(os.path.basename(file), p_s))
            break
    return result_list
Beispiel #19
0
def main():
    """
        Entry point when using CRNN from the commandline
    """

    args = parse_arguments()

    if not args.train and not args.test:
        print("If we are not training, and not testing, what is the point?")

    crnn = None

    if args.train:
        crnn = CRNN(
            args.batch_size,
            args.model_path,
            args.examples_path,
            args.max_image_width,
            args.train_test_ratio,
            args.restore
        )

        crnn.train(args.iteration_count)

    if args.test:
        if crnn is None:
            crnn = CRNN(
                args.batch_size,
                args.model_path,
                args.examples_path,
                args.max_image_width,
                0,
                args.restore
            )

        crnn.test()
Beispiel #20
0
                                size=(384, 48),
                                max_length=None)
dloader_test384 = torch.utils.data.DataLoader(dset_test384,
                                              shuffle=False,
                                              batch_size=opt.test_batchsize,
                                              num_workers=int(opt.workers))
character_str = open(opt.char_dir, 'r').read()
print('character  ', character_str[13])

net_t_list = []

net_t_list.append(
    CRNN(48,
         1,
         len(character_str) - 1,
         256,
         opt.nrnn,
         0.5,
         opt.variational_dropout,
         leakyRelu=True))
net_t_list.append(
    CRNN(48,
         1,
         len(character_str) - 1,
         256,
         opt.nrnn,
         0.5,
         opt.variational_dropout,
         RRelu=True))
net_t_list.append(
    CRNN(48,
         1,
def train():
	with tf.device('/cpu:0'):
		# x_text, pos1, pos2, y = data_helpers.load_data_and_labels(FLAGS.train_dir)
		x_text, y = data_helpers.load_data_and_labels(FLAGS.train_dir)

	# Build vocabulary
	# Example: x_text[3] = "A misty <e1>ridge</e1> uprises from the <e2>surge</e2>."
	# ['a misty ridge uprises from the surge <UNK> <UNK> ... <UNK>']
	# =>
	# [27 39 40 41 42  1 43  0  0 ... 0]
	# dimension = FLAGS.max_sentence_length
	text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(FLAGS.max_sentence_length)
	text_vec = np.array(list(text_vocab_processor.fit_transform(x_text)))
	print("Text Vocabulary Size: {:d}".format(len(text_vocab_processor.vocabulary_)))


	# Example: pos1[3] = [-2 -1  0  1  2   3   4 999 999 999 ... 999]
	# [95 96 97 98 99 100 101 999 999 999 ... 999]
	# =>
	# [11 12 13 14 15  16  21  17  17  17 ...  17]
	# dimension = MAX_SENTENCE_LENGTH
	# pos_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(FLAGS.max_sentence_length)
	# pos_vocab_processor.fit(pos1 + pos2)
	# pos1_vec = np.array(list(pos_vocab_processor.transform(pos1)))
	# pos2_vec = np.array(list(pos_vocab_processor.transform(pos2)))
	# print("Position Vocabulary Size: {:d}".format(len(pos_vocab_processor.vocabulary_)))

	# x = np.array([list(i) for i in zip(text_vec, pos1_vec, pos2_vec)])
	x = np.array([list(i) for i in text_vec])

	print("x = {0}".format(x.shape))
	print("y = {0}".format(y.shape))

	# Randomly shuffle data
	np.random.seed(10)
	shuffle_indices = np.random.permutation(np.arange(len(y)))
	x_shuffled = x[shuffle_indices]
	y_shuffled = y[shuffle_indices]

	# Split train/test set
	# TODO: This is very crude, should use cross-validation
	dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
	x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
	# x_dev = np.array(x_dev).transpose((1, 0, 2))
	y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
	print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev)))

	gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)

	with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
		model = CRNN(layers=FLAGS.layers, max_length=FLAGS.max_sentence_length, n_classes=y.shape[1], pooling_type=FLAGS.pooling_type,
					 vocab_size=len(text_vocab_processor.vocabulary_), embedding_size=FLAGS.text_embedding_dim,
					 f1=FLAGS.f1, f2=FLAGS.f2, n_channels=FLAGS.n_channels)

		# Output directory for models and summaries
		timestamp = str(int(time.time()))
		out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
		print("Writing to {}\n".format(out_dir))

		# Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
		checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
		checkpoint_prefix = os.path.join(checkpoint_dir, "model")
		if not os.path.exists(checkpoint_dir):
			os.makedirs(checkpoint_dir)
		saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

		# Write vocabulary
		text_vocab_processor.save(os.path.join(out_dir, "text_vocab"))
		# pos_vocab_processor.save(os.path.join(out_dir, "position_vocab"))

		sess.run(tf.global_variables_initializer())

		# Pre-trained word2vec
		if FLAGS.word2vec:
			# initial matrix with random uniform
			initW = np.random.uniform(-0.25, 0.25, (len(text_vocab_processor.vocabulary_), FLAGS.text_embedding_dim))
			# load any vectors from the word2vec
			print("Load word2vec file {0}".format(FLAGS.word2vec))
			with open(FLAGS.word2vec, "rb") as f:
				header = f.readline()
				vocab_size, layer1_size = map(int, header.split())
				binary_len = np.dtype('float32').itemsize * layer1_size
				for line in range(vocab_size):
					word = []
					while True:
						ch = f.read(1).decode('latin-1')
						if ch == ' ':
							word = ''.join(word)
							break
						if ch != '\n':
							word.append(ch)
					idx = text_vocab_processor.vocabulary_.get(word)
					if idx != 0:
						initW[idx] = np.fromstring(f.read(binary_len), dtype='float32')
					else:
						f.read(binary_len)
			sess.run(model.W_emb.assign(initW))
			print("Success to load pre-trained word2vec model!\n")

		batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)

		max_f1 = -1

		for step, batch in enumerate(batches):
			x_batch, y_batch = zip(*batch)

			feed_dict = {model.input_text: x_batch, model.dropout_keep_prob: FLAGS.dropout_keep_prob, model.labels: y_batch}
			# max_pooling, convs = sess.run([model.max_pooing, model.conv], feed_dict=feed_dict)
			_, loss, accuracy = sess.run([model.train, model.cost, model.accuracy], feed_dict=feed_dict)

			# Training log display
			if step % FLAGS.display_every == 0:
				print("step {}:, loss {}, acc {}".format(step, loss, accuracy))

			# Evaluation
			if step % FLAGS.evaluate_every == 0:
				print("\nEvaluation:")
				feed_dict = {
					model.input_text: x_dev,
					model.labels: y_dev,
					model.dropout_keep_prob: 1.0
				}
				loss, accuracy, predictions = sess.run(
					[model.cost, model.accuracy, model.predictions], feed_dict)

				f1 = f1_score(np.argmax(y_dev, axis=1), predictions, average="macro")
				print("step {}:, loss {}, acc {}, f1 {}\n".format(step, loss, accuracy, f1))

				# Model checkpoint
				if f1 > max_f1 * 0.99:
					path = saver.save(sess, checkpoint_prefix, global_step=step)
					print("Saved model checkpoint to {}\n".format(path))
					max_f1 = f1
Beispiel #22
0
from crnn import CRNN

batch_size = 10
model_path = 'MyModel'
examples_picture_path = 'restore/'
examples_label_path = 'target_label.txt'
dictionary_path = 'dictionary.txt'
max_image_width = 256
train_test_ratio = 0.9
restore = False
NUM_CLASSES = 52
iteration_count = 4000

crnn = CRNN(batch_size, model_path, examples_picture_path, examples_label_path, dictionary_path, max_image_width, train_test_ratio, restore, NUM_CLASSES)

if __name__ == '__main__':
    crnn.train(iteration_count)
def main():
    conf_file = "conf/train.yml"
    with open(conf_file, 'r') as f:
        args = edict(yaml.load(f))

    train_root = args.train_root
    test_root = args.test_root
    batch_size = args.batch_size
    max_len = args.max_len
    img_h = args.img_h
    img_w = args.img_w
    n_hidden = args.n_hidden
    n_iter = args.n_iter
    lr = args.lr
    cuda = args.cuda
    val_interval = args.val_interval
    save_interval = args.save_interval
    model_dir = args.model_dir
    debug_level = args.debug_level
    experiment = args.experiment
    n_channel = args.n_channel
    n_class = args.n_class
    beta = args.beta

    image = torch.FloatTensor(batch_size, n_channel, img_h, img_h)
    text = torch.IntTensor(batch_size * max_len)
    length = torch.IntTensor(batch_size)

    logging.getLogger().setLevel(debug_level)
    '''
        50 - critical
        40 - error
        30 - warining
        20 - info
        10 - debug
    '''
    crnn = CRNN(img_h, n_channel, n_class, n_hidden).cuda()
    crnn.apply(weights_init)

    criterion = CTCLoss().cuda()

    optimizer = optim.RMSprop(crnn.parameters(), lr=lr)
    # optimizer = optim.Adam(crnn.parameters(), lr=lr,
    #                    betas=(beta, 0.999))

    trainset = train_set(train_root, batch_size, img_h, img_w, n_class)
    valset = train_set(test_root, batch_size, img_h, img_w, n_class)

    cur_iter = 0
    for ITER in range(n_iter):
        for train_img, train_label, train_lengths, batch_label \
                in iter(trainset):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()

            if train_img is None:
                break
            cur_iter += 1
            loadData(image, train_img)
            loadData(text, train_label)
            loadData(length, train_lengths)
            preds = crnn(train_img.cuda())
            # preds = F.softmax(preds, dim=2)
            # print(preds.shape)
            preds_size = Variable(torch.IntTensor([preds.size(0)] *
                                                  batch_size))
            # print(batch_label, text, length, len(text), len(length), length.sum(),
            #     preds.shape, preds_size.shape)
            cost = criterion(preds, text, preds_size, length)\
                    / batch_size
            crnn.zero_grad()
            cost.backward()
            optimizer.step()
            print("training-iter {} cost {}".format(
                ITER,
                cost.cpu().detach().numpy()[0]))
            if cur_iter % val_interval == 0:
                val(crnn, valset, criterion, n_class)
            if cur_iter % save_interval == 0:
                model_file = os.path.join(model_dir,
                                          "crnn_iter{}.pth".format(ITER))
                print("saving in file {}".format(model_file))
                with open(model_file, 'wb') as f:
                    torch.save(crnn, f)
Beispiel #24
0
def train():
    if config.restart_training:
        shutil.rmtree(config.output_dir, ignore_errors=True)
    if config.output_dir is None:
        config.output_dir = 'output'
    if not os.path.exists(config.output_dir):
        os.makedirs(config.output_dir)
    logger = setup_logger(os.path.join(config.output_dir, 'train_log'))
    logger.info('train with gpu %s and mxnet %s' %
                (config.gpu_id, mx.__version__))

    ctx = mx.gpu(config.gpu_id)
    # 设置随机种子
    mx.random.seed(2)
    mx.random.seed(2, ctx=ctx)

    train_transfroms = transforms.Compose(
        [transforms.RandomBrightness(0.5),
         transforms.ToTensor()])
    train_dataset = ImageDataset(config.trainfile,
                                 (config.img_h, config.img_w),
                                 3,
                                 80,
                                 config.alphabet,
                                 phase='train')
    train_data_loader = DataLoader(
        train_dataset.transform_first(train_transfroms),
        config.train_batch_size,
        shuffle=True,
        last_batch='keep',
        num_workers=config.workers)
    test_dataset = ImageDataset(config.testfile, (config.img_h, config.img_w),
                                3,
                                80,
                                config.alphabet,
                                phase='test')
    test_data_loader = DataLoader(test_dataset.transform_first(
        transforms.ToTensor()),
                                  config.eval_batch_size,
                                  shuffle=True,
                                  last_batch='keep',
                                  num_workers=config.workers)
    net = CRNN(len(config.alphabet), hidden_size=config.nh)
    net.hybridize()
    if not config.restart_training and config.checkpoint != '':
        logger.info('load pretrained net from {}'.format(config.checkpoint))
        net.load_parameters(config.checkpoint, ctx=ctx)
    else:
        net.initialize(ctx=ctx)

    criterion = gluon.loss.CTCLoss()

    all_step = len(train_data_loader)
    logger.info('each epoch contains {} steps'.format(all_step))
    schedule = mx.lr_scheduler.FactorScheduler(step=config.lr_decay_step *
                                               all_step,
                                               factor=config.lr_decay,
                                               stop_factor_lr=config.end_lr)
    # schedule = mx.lr_scheduler.MultiFactorScheduler(step=[15 * all_step, 30 * all_step, 60 * all_step,80 * all_step],
    #                                                 factor=0.1)
    adam_optimizer = mx.optimizer.Adam(learning_rate=config.lr,
                                       lr_scheduler=schedule)
    trainer = gluon.Trainer(net.collect_params(), optimizer=adam_optimizer)

    sw = SummaryWriter(logdir=config.output_dir)
    for epoch in range(config.start_epoch, config.end_epoch):
        loss = .0
        train_acc = .0
        tick = time.time()
        cur_step = 0
        for i, (data, label) in enumerate(train_data_loader):
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)

            with autograd.record():
                output = net(data)
                loss_ctc = criterion(output, label)
            loss_ctc.backward()
            trainer.step(data.shape[0])

            loss_c = loss_ctc.mean()
            cur_step = epoch * all_step + i
            sw.add_scalar(tag='ctc_loss',
                          value=loss_c.asscalar(),
                          global_step=cur_step // 2)
            sw.add_scalar(tag='lr',
                          value=trainer.learning_rate,
                          global_step=cur_step // 2)
            loss += loss_c
            acc = accuracy(output, label, config.alphabet)
            train_acc += acc
            if (i + 1) % config.display_interval == 0:
                acc /= len(label)
                sw.add_scalar(tag='train_acc', value=acc, global_step=cur_step)
                batch_time = time.time() - tick
                logger.info(
                    '[{}/{}], [{}/{}],step: {}, Speed: {:.3f} samples/sec, ctc loss: {:.4f},acc: {:.4f}, lr:{},'
                    ' time:{:.4f} s'.format(
                        epoch, config.end_epoch, i, all_step, cur_step,
                        config.display_interval * config.train_batch_size /
                        batch_time,
                        loss.asscalar() / config.display_interval, acc,
                        trainer.learning_rate, batch_time))
                loss = .0
                tick = time.time()
                nd.waitall()
        if epoch == 0:
            sw.add_graph(net)
        logger.info('start val ....')
        train_acc /= train_dataset.__len__()
        validation_accuracy = evaluate_accuracy(
            net, test_data_loader, ctx,
            config.alphabet) / test_dataset.__len__()
        sw.add_scalar(tag='val_acc',
                      value=validation_accuracy,
                      global_step=cur_step)
        logger.info("Epoch {},train_acc {:.4f}, val_acc {:.4f}".format(
            epoch, train_acc, validation_accuracy))
        net.save_parameters("{}/{}_{:.4f}_{:.4f}.params".format(
            config.output_dir, epoch, train_acc, validation_accuracy))
    sw.close()
import torch
from crnn import CRNN

embedding_size = 8
hidden_state = 512
hidden_state = 4
output_size = 8
output_size = 2
batch_size = 1

clock_periods = list([2**i for i in range(9)])
model = CRNN(embedding_size, hidden_state, output_size, clock_periods)
inputs = torch.randn(16, batch_size, embedding_size)
y_predicted = model.forward(inputs)
print(y_predicted)
Beispiel #26
0
        # m.weight.data.normal_(1.0, 0.02)
        m.weight.data.uniform_(1.0, 5)
        m.bias.data.fill_(0)
    elif isinstance(m, nn.GRU):
        nn.init.xavier_uniform_(m.weight.data,
                                gain=nn.init.calculate_gain('leaky_relu'))


#    elif isinstance(m, nn.Linear):
#        m.weight.data.normal_(0.0, 0.02)
#        m.bias.data.fill_(0)

net = CRNN(48,
           1,
           len(char2index),
           256,
           opt.nrnn,
           opt.dropout,
           opt.variational_dropout,
           leakyRelu=True)
print(net)
params = net.state_dict()
params_shape = []
for k, v in params.items():
    #    print(k, v.numpy().shape, reduce(mul, v.numpy().shape))
    params_shape.append(reduce(mul, v.numpy().shape))
params_total = sum(params_shape)
print('params_total:', params_total)

if opt.finetune:
    print('Loading model from', opt.modeldir + opt.modelname)
    net.load_state_dict(torch.load(opt.modeldir + opt.modelname))
Beispiel #27
0
    return reader


if __name__ == '__main__':
    from paddle import fluid

    total_step = 30
    LR = 1e-3
    with fluid.dygraph.guard():
        lr = fluid.layers.piecewise_decay(
            [total_step // 3, total_step * 2 // 3], [LR, LR * 0.1, LR * 0.01])
        # lr = fluid.layers.polynomial_decay(LR,total_step,1e-7,power=0.9)
        from crnn import CRNN

        crnn = CRNN(train_parameters["class_dim"] + 1, batch_size=16)
        optimizer = fluid.optimizer.Adam(learning_rate=lr,
                                         parameter_list=crnn.parameters())
        step = []
        lr = []
        for x in range(total_step):
            step.append(x)
            l = fluid.dygraph.to_variable(np.array([1]))
            optimizer.minimize(l)
            lr.append(optimizer.current_step_lr())
            print(x, optimizer.current_step_lr())

        from matplotlib import pyplot as plt

        plt.plot(step, lr)
        plt.show()
Beispiel #28
0
class PorscheFonts:
    def __init__(self):
        # create models and load weights
        self.det_info = {
            'input_shape': (512, 1024, 3),
            'channel_mean':
            np.array([123, 117, 104]),
            'weight_path':
            '/home/eugene/storystream/ai-p-fonts/log/seglink/seglink_epoch-29_loss-0.91_val_loss-0.36.h5'
        }
        self.reg_info = {
            'input_shape': (32, 480, 1),
            'weight_path':
            '/home/eugene/storystream/ai-p-fonts/log/crnn/gru/CRNN_epoch-29_loss-0.00_val_loss-0.00.h5'
        }

        self.det = SegLink(input_shape=self.det_info['input_shape'])
        self.reg = CRNN(input_shape=self.reg_info['input_shape'])

        self.det.create_model(self.det_info['weight_path'])
        self.reg.create_model(self.reg_info['weight_path'],
                              gru=True,
                              train=False)
        self.lexicons = [
            'gt2', 'gt2rs', 'targa', 'turbo', '91150', 'carrera 4',
            'carrera 4 gts', 'carrera 4s', 'carrera gts', 'carrera s',
            'targa 4', 'targa 4 gts', 'targa 4s', 'turbo s', '911 turbo',
            '911 turbo s', '911 carrera 4', '911 carrera 4 gts',
            '911 carrera 4s', '911 carrera gts', '911 carrera s',
            '911 targa 4', '911 targa 4 gts', '911 targa 4s'
        ]

    def word_postprocess(self, word):
        word = word.replace(" ", "-")
        word = word.replace("4-gts", "4gts")
        word = word.replace("91150", "carrera-50")
        word = word.replace("911-", "")
        word = word.replace("gt2rs", "gt2-rs")
        word = word + "-" if "-" not in word else word
        return word

    def predict(self, img_path, save=False):
        ori_img = io.imread(img_path)[..., :3]
        gt_label = os.path.basename(img_path).split('_')[0]
        gt_label = gt_label + "-" if "-" not in gt_label else gt_label
        quads = self.det.test_one(ori_img)
        if len(quads):
            words, scores = self.reg.test_one(ori_img, quads)
            words = np.array(words)
            scores = np.array(scores)
            keep_bool = scores > 0.5
            if np.any(keep_bool):
                quads = quads[keep_bool]
                words = list(words[keep_bool])
                scores = list(scores[keep_bool])

                # Save image for debug
                pil_image = draw_annotated_box(ori_img, quads, words, scores)
                if save:
                    pil_image.save(
                        os.path.join(
                            'output',
                            os.path.splitext(os.path.basename(img_path))[0] +
                            '.jpg'))

                # zip quadrilaterals, words and scores and sort
                predictions = list(zip(quads, words, scores))
                predictions.sort(key=lambda x: x[2],
                                 reverse=True)  # Sort scores descending

                for quad, word, score in predictions:
                    if word in self.lexicons:
                        word = self.word_postprocess(word)
                        print(gt_label, word)
                        if gt_label == word:
                            return True
                        return False
Beispiel #29
0
batch_size = 90

video_dataset = TorchVideoTrainDataset('torch_video_3/', 'qia2020/train/', df,
                                       y_df, 40000)
train_loader = DataLoader(video_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          drop_last=True,
                          num_workers=8,
                          pin_memory=True)

val_video_dataset = TorchVideoTrainDataset('torch_video_3_val/',
                                           'qia2020/val/', val_df, val_y_df,
                                           5000)
val_loader = DataLoader(val_video_dataset,
                        batch_size=batch_size,
                        shuffle=False,
                        drop_last=True,
                        num_workers=8,
                        pin_memory=True)

# checkpoint_dir = 'lightning_logs/version_39/checkpoints/epoch=1.ckpt'
system = CRNN()

seed_everything(42)

# trainer = Trainer(gpus=[0], accelerator='ddp', resume_from_checkpoint=checkpoint_dir, deterministic=True, max_epochs=100)
trainer = Trainer(gpus=[0], max_epochs=100, deterministic=True)
trainer.fit(system, train_loader, val_loader)
Beispiel #30
0
def start_train():
    model = CRNN(MODEL_HYPER.batch_size, MODEL_HYPER.epoches,
                 MODEL_HYPER.data_path, MODEL_HYPER.text_path,
                 MODEL_HYPER.log_path, MODEL_HYPER.model_path)
    model.train()
    model.save()
Beispiel #31
0
import os
import torch
import cv2
from crnn import CRNN
from tqdm import tqdm

import csv
import numpy as np

model = CRNN()
model.load_state_dict(torch.load('55acc.pt'))
model.eval()
model.to('cuda')

data_dir = "qia2020/test/"
emo = {0: 'hap', 1: 'sur', 2: 'neu', 3: 'fea', 4: 'dis', 5: 'ang', 6: 'sad'}

with open('test_confirm.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['FileID', 'Emotion'])
    for filename in tqdm(sorted(os.listdir(data_dir))):
        if not filename.endswith(".mp4"):
            continue

        f = 'torch_video_3_test/' + filename[:5] + '.pt'

        X = torch.load(f)
        X = X.unsqueeze(0).to('cuda:0')

        with np.load(data_dir + filename[:5] + '.npz') as data:
            T = torch.Tensor(data['word_embed'])
Beispiel #32
0
def test_CRNN():
	
	T = 50
	batch_size = 2
	nstates = 3
	input_size = 4
	output_size = 6
	clock_rates = [1, 2, 4]

	unit = CRNN(input_size, nstates, output_size, clock_rates)

	W = unit.get_weights()

	X = np.random.randn(T, input_size, batch_size)

	unit.forget()
	acc_Y = unit.forward(X)
	wrand = np.random.randn(*acc_Y.shape)
	loss = np.sum(acc_Y * wrand)
	dY = wrand
	dX = unit.backward(dY)
	dW = unit.get_grads()
	unit.forget()

	def fwd():
		unit.set_weights(W)
		h = unit.forward(X)
		unit.forget()
		return np.sum(h * wrand)

	delta = 1e-4
	error_threshold = 1e-3
	all_values = [X, W]
	backpropagated_gradients = [dX, dW]
	names = ['X', 'W']

	error_count = 0
	for v in range(len(names)):
		values = all_values[v]
		dvalues = backpropagated_gradients[v]
		name = names[v]
		
		for i in range(values.size):
			actual = values.flat[i]
			values.flat[i] = actual + delta
			loss_minus = fwd()
			values.flat[i] = actual - delta
			loss_plus = fwd()
			values.flat[i] = actual
			backpropagated_gradient = dvalues.flat[i]
			numerical_gradient = (loss_minus - loss_plus) / (2 * delta)
			

			if numerical_gradient == 0 and backpropagated_gradient == 0:
				error = 0 
			elif abs(numerical_gradient) < 1e-7 and abs(backpropagated_gradient) < 1e-7:
				error = 0 
			else:
				error = abs(backpropagated_gradient - numerical_gradient) / abs(numerical_gradient + backpropagated_gradient)
			
			if error > error_threshold:
				print 'FAILURE!!!\n'
				print '\tparameter: ', name, '\tindex: ', np.unravel_index(i, values.shape)
				print '\tvalues: ', actual
				print '\tbackpropagated_gradient: ', backpropagated_gradient 
				print '\tnumerical_gradient', numerical_gradient 
				print '\terror: ', error
				print '\n\n'

				error_count += 1

	if error_count == 0:
		print 'CRNN Gradient Check Passed'
	else:
		print 'Failed for {} parameters'.format(error_count)