Python DatasetSequenceの例、text_recognizer.datasets.dataset_sequence.DatasetSequence Pythonの例

コード例 #1

0

ファイルを表示

ファイル: base.py プロジェクト: cyyeh/fsdl-text-recognizer-project

    def fit(self,
            dataset,
            batch_size: int = 32,
            epochs: int = 10,
            augment_val: bool = True,
            callbacks: list = None):
        if callbacks is None:
            callbacks = []

        self.network.compile(loss=self.loss(),
                             optimizer=self.optimizer(),
                             metrics=self.metrics())

        train_sequence = DatasetSequence(dataset.x_train,
                                         dataset.y_train,
                                         batch_size,
                                         augment_fn=self.batch_augment_fn,
                                         format_fn=self.batch_format_fn)
        test_sequence = DatasetSequence(
            dataset.x_test,
            dataset.y_test,
            batch_size,
            augment_fn=self.batch_augment_fn if augment_val else None,
            format_fn=self.batch_format_fn)

        self.network.fit_generator(generator=train_sequence,
                                   epochs=epochs,
                                   callbacks=callbacks,
                                   validation_data=test_sequence,
                                   use_multiprocessing=True,
                                   workers=2,
                                   shuffle=True)

コード例 #2

0

ファイルを表示

    def fit(
        self, dataset, batch_size: int = 32, epochs: int = 10, augment_val: bool = True, callbacks: list = None, initial_epoch: int = 0,  # pylint: disable=line-too-long
    ):
        if callbacks is None:
            callbacks = []

        self.network.compile(loss=self.loss(), optimizer=self.optimizer(), metrics=self.metrics())

        train_sequence = DatasetSequence(
            dataset.x_train,
            dataset.y_train,
            batch_size,
            augment_fn=self.batch_augment_fn,
            format_fn=self.batch_format_fn,
        )
        test_sequence = DatasetSequence(
            dataset.x_test,
            dataset.y_test,
            batch_size,
            augment_fn=self.batch_augment_fn if augment_val else None,
            format_fn=self.batch_format_fn,
        )

        self.network.fit(
            train_sequence,
            epochs=epochs,
            callbacks=callbacks,
            validation_data=test_sequence,
            use_multiprocessing=False,
            workers=1,
            shuffle=True,
            initial_epoch=initial_epoch
        )

コード例 #3

0

ファイルを表示

ファイル: line_model.py プロジェクト: turbulent0/fsdl-text-recognizer-project

 def evaluate(self, x, y, batch_size=16, verbose=True):
     """Evaluate model."""
     sequence = DatasetSequence(x, y)
     preds_raw = self.network.predict(sequence)
     trues = np.argmax(y, -1)
     preds = np.argmax(preds_raw, -1)
     pred_strings = ["".join(self.data.mapping.get(label, "") for label in pred).strip(" |_") for pred in preds]
     true_strings = ["".join(self.data.mapping.get(label, "") for label in true).strip(" |_") for true in trues]
     char_accuracies = [
         1 - editdistance.eval(true_string, pred_string) / len(true_string)
         for pred_string, true_string in zip(pred_strings, true_strings)
     ]
     if verbose:
         sorted_ind = np.argsort(char_accuracies)
         print("\nLeast accurate predictions:")
         for ind in sorted_ind[:5]:
             print(f"True: {true_strings[ind]}")
             print(f"Pred: {pred_strings[ind]}")
         print("\nMost accurate predictions:")
         for ind in sorted_ind[-5:]:
             print(f"True: {true_strings[ind]}")
             print(f"Pred: {pred_strings[ind]}")
         print("\nRandom predictions:")
         random_ind = np.random.randint(0, len(char_accuracies), 5)
         for ind in random_ind:  # pylint: disable=not-an-iterable
             print(f"True: {true_strings[ind]}")
             print(f"Pred: {pred_strings[ind]}")
     mean_accuracy = np.mean(char_accuracies)
     return mean_accuracy

コード例 #4

0

ファイルを表示

ファイル: line_detector_model.py プロジェクト: pagpires/text-recognizer-pytorch

    def evaluate(self, x, y, batch_size=16, verbose=False):
        # NOTE no transformation here
        val_dl = DatasetSequence(x, y, batch_size=batch_size)
        was_training = self.network.training
        self.network.eval()
        preds = []
        labels = []
        with torch.no_grad():
            for batch in val_dl:
                batch_inputs, batch_labels = batch
                batch_inputs = batch_inputs.to(
                    device)  # no need to move label to GPU

                batch_preds = self.network(batch_inputs)
                preds.append(batch_preds.cpu())
                labels.append(batch_labels)

        preds = torch.cat(preds).numpy()
        labels = torch.cat(labels).numpy()

        if was_training:
            self.network.train()

        n, h, w = labels.shape
        # preds: (batch, num_class, h, w); labels: (batch, h, w)
        corrects = np.argmax(preds, axis=1) == labels
        mean_corrects = np.sum(corrects, axis=(1, 2)) / (h * w)
        return np.mean(mean_corrects)

コード例 #5

0

ファイルを表示

ファイル: line_model_ctc.py プロジェクト: sz640/fsdl-text-recognizer-project-19

    def evaluate(self, x, y, batch_size: int = 16, verbose: bool = True) -> float:
        test_sequence = DatasetSequence(x, y, batch_size, format_fn=self.batch_format_fn)

        # We can use the `ctc_decoded` layer that is part of our model here.
        decoding_model = KerasModel(inputs=self.network.input, outputs=self.network.get_layer('ctc_decoded').output)
        preds = decoding_model.predict_generator(test_sequence)

        trues = np.argmax(y, -1)
        pred_strings = [''.join(self.data.mapping.get(label, '') for label in pred).strip(' |_') for pred in preds]
        true_strings = [''.join(self.data.mapping.get(label, '') for label in true).strip(' |_') for true in trues]

        char_accuracies = [
            1 - editdistance.eval(true_string, pred_string) / len(true_string)
            for pred_string, true_string in zip(pred_strings, true_strings)
        ]
        if verbose:
            sorted_ind = np.argsort(char_accuracies)
            print("\nLeast accurate predictions:")
            for ind in sorted_ind[:5]:
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
            print("\nMost accurate predictions:")
            for ind in sorted_ind[-5:]:
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
            print("\nRandom predictions:")
            random_ind = np.random.randint(0, len(char_accuracies), 5)
            for ind in random_ind:  # pylint: disable=not-an-iterable
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
        mean_accuracy = np.mean(char_accuracies)
        return mean_accuracy

コード例 #6

0

ファイルを表示

    def fit(self, dataset, batch_size: int = 32, epochs: int = 10, augment_val: bool = True, callbacks: list = None, **train_args):
        if callbacks is None:
            callbacks = []
        if train_args.get('pretrained', False):
            self.load_weights()
            print('loaded pretrained network')

        train_sequence = DatasetSequence(
            dataset.x_train, dataset.y_train, 
            batch_size=batch_size, augment_fn=self.batch_augment_fn, format_fn=self.batch_format_fn
        )

        print(f"Total #training: {len(train_sequence.dataset)}")
        print(f"Total #params: {sum([param.nelement() for param in self.network.parameters()])}")

        self.network.to(device)
        self.network.train()

        optimizer = self.optimizer()(self.network.parameters(), lr=3e-4) # RMSProp is better than Adam in this case
        blank_idx = self.data.num_classes-1
        loss_fn = self.loss()(blank=blank_idx, reduction='mean')
        
        validation_interval = 5
        score = self.evaluate(dataset.x_test, dataset.y_test)
        print(f"Validation score: {score:.4f}")
        
        for epoch in range(epochs):
            running_loss = 0.0
            for i, batch in enumerate(train_sequence, 0):

                inputs, labels = batch
                inputs = inputs.to(device)
                labels = labels.to(device)
                # NOTE this assumes blank_idx only occurs in continuity in the last part of seq
                # first get output lengths without padding, then calculate length, then concat the output
                output_lengths = (torch.sum(labels != blank_idx, dim=1)).to(torch.long).to(device)
                labels_concat = torch.cat([labels[i, :l] for i, l in enumerate(output_lengths)])

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                log_soft_max, input_lengths = self.network(inputs)
                loss = loss_fn(log_soft_max.cpu(), labels_concat.cpu(), input_lengths.cpu(), output_lengths.cpu())
                loss.backward()
                optimizer.step()

                # print statistics
                running_loss += loss.item()

            print(f"[{epoch+1}, {i+1}] loss: {running_loss/(i+1):.5f}")

            if epoch % validation_interval == (validation_interval-1):
                score = self.evaluate(dataset.x_test, dataset.y_test)
                print(f"Validation score: {score:.4f}")
        
        print('Finished Training')

コード例 #7

0

ファイルを表示

ファイル: base.py プロジェクト: sebastian-sosa/text-recognizer

 def evaluate(self,
              x: np.ndarray,
              y: np.ndarray,
              batch_size: int = 16,
              _verbose: bool = False):
     # pylint: disable=unused-argument
     sequence = DatasetSequence(
         x, y,
         batch_size=batch_size)  # Use a small batch size to use less memory
     preds = self.network.predict(sequence)
     return np.mean(np.argmax(preds, -1) == np.argmax(y, -1))

コード例 #8

0

ファイルを表示

    def evaluate(self, x, y, batch_size=64, verbose=False):
        val_dl = DatasetSequence(x, y, batch_size=batch_size)
        was_training = self.network.training
        self.network.eval()
        preds = []
        labels = []
        with torch.no_grad():
            for batch in val_dl:
                batch_inputs, batch_labels = batch
                batch_inputs = batch_inputs.to(device)
                batch_labels = torch.squeeze(
                    batch_labels, dim=1)  # no need to move label to GPU

                batch_preds = self.network(batch_inputs)
                preds.append(batch_preds.cpu())
                labels.append(batch_labels)

        preds = torch.cat(preds).numpy()
        labels = torch.cat(labels).numpy()  # (B,)

        if was_training:
            self.network.train()
        print(f"Evaluated: preds: {preds.shape}, labels: {labels.shape}")
        return np.mean(np.argmax(preds, -1) == labels)

コード例 #9

0

ファイルを表示

    def evaluate(self, x, y, batch_size: int = 16, verbose: bool = True) -> float:
        blank_idx = self.data.num_classes - 1
        output_length = self.data.output_shape[0]
        test_sequence = DatasetSequence(x, y, batch_size, format_fn=self.batch_format_fn)
        with torch.no_grad():
            was_training = self.network.training
            self.network.eval()
            preds_raw = []
            input_lengths = []
            labels_raw = []
            
            running_loss = 0
            for i, batch in enumerate(test_sequence):
                batch_x, batch_y = map(lambda out: out.to(device), batch)
                batch_x = batch_x.to(device)
                batch_y = batch_y.to(device)
                # log_soft_max (T, B, num_classes)
                log_soft_max, batch_input_lengths = map(lambda out: out.to("cpu"), self.network(batch_x))
                preds_raw.append(log_soft_max.permute(1,0,2))
                input_lengths.append(batch_input_lengths)
                labels_raw.append(batch_y.to("cpu"))
                output_lengths = (torch.sum(batch_y != blank_idx, dim=1)).to(torch.long).cpu()
                
                loss = self.loss()(blank=blank_idx, reduction='mean')(log_soft_max, batch_y.cpu(), batch_input_lengths, output_lengths)
                running_loss += loss.item()
            # preds_raw: (B, T, C)
            preds_raw, input_lengths = torch.cat(preds_raw), torch.cat(input_lengths)
            labels_raw = torch.cat(labels_raw).numpy() # (B, output_length)
        print(f"Validation loss: {running_loss/(i+1):.4f}")
        
        preds = ctc_decode(preds_raw, input_lengths, output_length)

        trues = labels_raw
        pred_strings = [''.join(self.data.mapping.get(label, '') for label in pred).strip(' |_') for pred in preds]
        true_strings = [''.join(self.data.mapping.get(label, '') for label in true).strip(' |_') for true in trues]

        char_accuracies = [
            1 - editdistance.eval(true_string, pred_string) / len(true_string)
            for pred_string, true_string in zip(pred_strings, true_strings)
        ]
        if verbose:
            sorted_ind = np.argsort(char_accuracies)
            print("\nLeast accurate predictions:")
            for ind in sorted_ind[:5]:
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
            print("\nMost accurate predictions:")
            for ind in sorted_ind[-5:]:
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
            print("\nRandom predictions:")
            random_ind = np.random.randint(0, len(char_accuracies), 5)
            for ind in random_ind:
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
        mean_accuracy = np.mean(char_accuracies)
        
        if was_training:
            self.network.train()

        return mean_accuracy

コード例 #10

0

ファイルを表示

ファイル: base.py プロジェクト: cyyeh/fsdl-text-recognizer-project

 def evaluate(self, x, y, batch_size=16, verbose=False):  # pylint: disable=unused-argument
     # Use a small batch size to use less memory
     sequence = DatasetSequence(x, y, batch_size=batch_size)
     preds = self.network.predict_generator(sequence)
     return np.mean(np.argmax(preds, -1) == np.argmax(y, -1))

コード例 #11

0

ファイルを表示

    def fit(self,
            dataset,
            batch_size: int = 32,
            epochs: int = 10,
            augment_val: bool = True,
            callbacks: list = None,
            **train_args):
        if callbacks is None:
            callbacks = []
        if train_args.get('pretrained', False):
            self.load_weights()
            print('loaded pretrained network')

        train_sequence = DatasetSequence(dataset.x_train,
                                         dataset.y_train,
                                         batch_size=batch_size,
                                         augment_fn=self.batch_augment_fn,
                                         format_fn=self.batch_format_fn)

        print(f"Total #training: {len(train_sequence.dataset)}")
        print(
            f"Total #params: {sum([param.nelement() for param in self.network.parameters()])}"
        )

        self.network.to(device)
        self.network.train()

        optimizer_class = self.optimizer()
        optimizer = optimizer_class(self.network.parameters(),
                                    lr=3e-4)  # magic Adam lr
        loss_fn_class = self.loss()
        loss_fn = loss_fn_class()

        validation_interval = 1
        for epoch in range(epochs):  # loop over the dataset multiple times
            running_loss = 0.0
            for i, batch in enumerate(train_sequence, 0):

                inputs, labels = batch
                inputs = inputs.to(device)
                labels = labels.to(device)
                labels = torch.squeeze(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = self.network(inputs)
                loss = loss_fn(outputs, labels)
                loss.backward()
                optimizer.step()

                # print statistics
                running_loss += loss.item()

            print(f"[{epoch+1}, {i+1}] loss: {running_loss/(i+1):.5f}")

            if epoch % validation_interval == (validation_interval - 1):
                score = self.evaluate(dataset.x_test, dataset.y_test)
                print(f"Validation score: {score:.4f}")

        print('Finished Training')

コード例 #12

0

ファイルを表示

 def evaluate(self, x, y, batch_size=16, verbose=False):
     sequence = DatasetSequence(x, y, batch_size=batch_size)
     preds = self.network.predict_generator(sequence)
     return np.mean(np.argmax(preds, -1) == np.argmax(y, -1))

コード例 #13

0

ファイルを表示

ファイル: line_model.py プロジェクト: pagpires/text-recognizer-pytorch

    def evaluate(self, x, y, batch_size=16, verbose=True):
        # x: (n, h, w); y: (n, output_length, num_classes)
        num_data, output_length, num_classes = y.shape
        loss_fn = self.loss()()
        sequence = DatasetSequence(x, y, batch_size=batch_size)

        running_loss = 0
        with torch.no_grad():
            was_training = self.network.training
            self.network.eval()
            preds_raw = []
            labels_raw = []
            for i, batch in enumerate(sequence):
                batch_x, batch_y = batch
                batch_x = batch_x.to(device)
                batch_y = batch_y.to(device)

                batch_pred = self.network(batch_x)
                loss = loss_fn(batch_pred, batch_y)
                running_loss += loss.item()
                preds_raw.append(batch_pred.cpu())
                labels_raw.append(batch_y.cpu())

            preds_raw = torch.cat(preds_raw).numpy()
            # transform labels from scalar to original one-hot-encoding shape
            # labels_raw = to_categorical(torch.cat(labels_raw).numpy(), num_classes)
            print(f"Evaluation loss: {running_loss/(i+1)}")
            if was_training:
                self.network.train()

        # trues.shape: (batch, output_length, num_classes)
        # preds_raw.shape = (batch, num_classes, output_length)
        trues = torch.cat(labels_raw).numpy()
        preds = np.argmax(preds_raw, 1)
        pred_strings = [
            ''.join(self.data.mapping.get(label, '')
                    for label in pred).strip(' |_') for pred in preds
        ]
        true_strings = [
            ''.join(self.data.mapping.get(label, '')
                    for label in true).strip(' |_') for true in trues
        ]
        char_accuracies = [
            1 - editdistance.eval(true_string, pred_string) / len(true_string)
            for pred_string, true_string in zip(pred_strings, true_strings)
        ]
        if verbose:
            sorted_ind = np.argsort(char_accuracies)
            print("\nLeast accurate predictions:")
            for ind in sorted_ind[:5]:
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
            print("\nMost accurate predictions:")
            for ind in sorted_ind[-5:]:
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
            print("\nRandom predictions:")
            random_ind = np.random.randint(0, len(char_accuracies), 5)
            for ind in random_ind:  # pylint: disable=not-an-iterable
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
        mean_accuracy = np.mean(char_accuracies)

        return mean_accuracy