Ejemplo n.º 1
0
 def _update_details(self):
     assert isinstance(self.data_set, DataSet)
     # Try to get predictions
     try:
         predictions = self.data_set[pedia.predictions]
     except:
         predictions = None
     if predictions is not None:
         predictions = misc.convert_to_dense_labels(predictions)
         cursor = self._cursor
         prediction = predictions[cursor]
         if isinstance(prediction, np.ndarray): prediction = prediction[0]
         # Decide color
         color = 'black'
         if self.labels is not None:
             color = 'green' if self.labels[cursor] == prediction else 'red'
         if (pedia.top_k_prob in self.data_set.properties.keys()
                 and pedia.top_k_label in self.data_set.properties.keys()):
             probs = self.data_set[pedia.top_k_prob][cursor]
             preds = self.data_set[pedia.top_k_label][cursor]
             assert preds[0] == prediction
             info = ' '.join([
                 '{}({:.1f})'.format(self._get_class_string(pred),
                                     prob * 100)
                 for pred, prob in zip(preds, probs)
             ])
         else:
             info = 'Prediction: {}'.format(
                 self._get_class_string(prediction))
         self.details.config(text=info, fg=color)
     else:
         self.details.config(text='No details', fg='grey')
Ejemplo n.º 2
0
    def load_as_tframe_data(cls, data_dir):
        from .dataset import DataSet
        file_path = os.path.join(data_dir, cls.TFD_FILE_NAME)
        if os.path.exists(file_path): return DataSet.load(file_path)

        # If .tfd file does not exist, try to convert from raw data
        console.show_status('Trying to convert raw data to tframe DataSet ...')
        images, labels = cls.load_as_numpy_arrays(data_dir)
        data_set = DataSet(images,
                           labels,
                           name=cls.DATA_NAME,
                           **cls.PROPERTIES)

        # Generate groups if necessary
        if data_set.num_classes is not None:
            groups = []
            dense_labels = misc.convert_to_dense_labels(labels)
            for i in range(data_set.num_classes):
                # Find samples of class i and append to groups
                samples = list(
                    np.argwhere([j == i for j in dense_labels]).ravel())
                groups.append(samples)
            data_set.properties[data_set.GROUPS] = groups

        # Show status
        console.show_status('Successfully converted {} samples'.format(
            data_set.size))
        # Save DataSet
        console.show_status('Saving data set ...')
        data_set.save(file_path)
        console.show_status('Data set saved to {}'.format(file_path))
        return data_set
Ejemplo n.º 3
0
    def evaluate_model(self,
                       data,
                       batch_size=None,
                       extractor=None,
                       export_false=False,
                       **kwargs):
        # Feed data set into model and get results
        false_sample_list = []
        false_label_list = []
        true_label_list = []
        num_samples = 0

        console.show_status('Evaluating classifier ...')
        for batch in self.get_data_batches(data, batch_size):
            assert isinstance(batch, DataSet) and batch.targets is not None
            # Get predictions
            preds = self._classify_batch(batch, extractor)
            # Get true labels in dense format
            if batch.targets.shape[-1] > 1:
                targets = batch.targets.reshape(-1, batch.targets.shape[-1])
            else:
                targets = batch.targets
            num_samples += len(targets)
            true_labels = misc.convert_to_dense_labels(targets)
            if len(true_labels) < len(preds):
                assert len(true_labels) == 1
                true_labels = np.concatenate((true_labels, ) * len(preds))
            # Select false samples
            false_indices = np.argwhere(preds != true_labels)
            if false_indices.size == 0: continue
            features = batch.features
            if self.input_type is InputTypes.RNN_BATCH:
                features = np.reshape(features, [-1, *features.shape[2:]])
            false_indices = np.reshape(false_indices, false_indices.size)
            false_sample_list.append(features[false_indices])
            false_label_list.append(preds[false_indices])
            true_label_list.append(true_labels[false_indices])

        # Concatenate
        if len(false_sample_list) > 0:
            false_sample_list = np.concatenate(false_sample_list)
            false_label_list = np.concatenate(false_label_list)
            true_label_list = np.concatenate(true_label_list)

        # Show accuracy
        accuracy = (num_samples - len(false_sample_list)) / num_samples * 100
        console.supplement('Accuracy on {} is {:.2f}%'.format(
            data.name, accuracy))

        # Try to export false samples
        if export_false and accuracy < 100:
            false_set = DataSet(features=false_sample_list,
                                targets=true_label_list)
            if hasattr(data, 'properties'):
                false_set.properties = data.properties
            false_set.data_dict[pedia.predictions] = false_label_list
            from tframe.data.images.image_viewer import ImageViewer
            vr = ImageViewer(false_set)
            vr.show()
Ejemplo n.º 4
0
 def init_groups(self):
     if self.num_classes is None: return
     groups = []
     dense_labels = misc.convert_to_dense_labels(self.targets)
     for i in range(self.num_classes):
         samples = list(np.argwhere(dense_labels == i).ravel())
         groups.append(samples)
     self.properties[self.GROUPS] = groups
Ejemplo n.º 5
0
 def _classify_batch(self, batch, extractor):
     assert isinstance(batch, DataSet) and batch.features is not None
     batch = self._sanity_check_before_use(batch)
     feed_dict = self._get_default_feed_dict(batch, is_training=False)
     probs = self._probabilities.run(feed_dict)
     if self.input_type is InputTypes.RNN_BATCH:
         assert len(probs.shape) == 3
         probs = np.reshape(probs, (-1, probs.shape[2]))
     if extractor is None: preds = misc.convert_to_dense_labels(probs)
     else: preds = extractor(probs)
     return preds
Ejemplo n.º 6
0
 def dense_labels(self):
     if self.DENSE_LABELS in self.data_dict:
         return self.data_dict[self.DENSE_LABELS]
     if self.num_classes is None:
         raise AssertionError(
             '!! # classes should be known for getting dense labels')
     # Try to convert dense labels from targets
     targets = self.targets
     # Handle sequence summary situation
     if isinstance(targets, (list, tuple)):
         targets = np.concatenate(targets, axis=0)
     dense_labels = misc.convert_to_dense_labels(targets)
     self.dense_labels = dense_labels
     return dense_labels
Ejemplo n.º 7
0
    def evaluate_model(self,
                       data,
                       batch_size=None,
                       extractor=None,
                       export_false=False,
                       **kwargs):
        console.show_status('Evaluating classifier ...')
        assert isinstance(data, DataSet)

        preds = self.classify(data,
                              batch_size=batch_size,
                              extractor=GPAT.raw_extractor)
        # preds = GPAT.test_all_prods_op(data, preds)
        preds = np.argmax(preds, axis=-1)
        # targets = data.labels
        targets = data.targets
        # targets = np.reshape(targets, (targets.shape[0], -1))
        labels = convert_to_dense_labels(targets)
        false_indices = [
            ind for ind in range(len(preds)) if preds[ind] != labels[ind]
        ]
        correct_indices = [
            ind for ind in range(len(preds)) if ind not in false_indices
        ]
        assert len(false_indices) + len(correct_indices) == len(preds)
        false_labels = labels[false_indices]
        counter = Counter(false_labels)
        cou = counter.most_common(len(list(counter.keys())))

        false_samples = data[false_indices]
        short_samples = [
            arr for arr in false_samples.features if arr.size < 32000
        ]

        false_samples_lengths = [data.lengths[i] for i in false_indices]
        less_audio_length = [
            false_samples_lengths[i] for i in range(len(false_samples_lengths))
            if false_samples_lengths[i] < 32000
        ]

        console.show_status('total_num :')
        console.pprint(len(labels))
        console.show_status('False_labels_num:')
        console.pprint(len(false_labels))
        console.show_status('The false num of each label:')
        console.pprint(cou)
        console.show_status('Short samples num:')
        console.pprint(len(less_audio_length))

        return correct_indices, false_indices
Ejemplo n.º 8
0
 def refresh_groups(self, target_key='targets'):
     # Sanity check
     if self.num_classes is None:
         raise AssertionError('!! DataSet should have known # classes')
     targets = self[target_key]
     if targets is None:
         raise AssertionError(
             '!! Can not find targets with key `{}`'.format(target_key))
     # Handle sequence summary situation
     if isinstance(targets, (list, tuple)):
         targets = np.concatenate(targets, axis=0)
     dense_labels = misc.convert_to_dense_labels(targets)
     groups = []
     for i in range(self.num_classes):
         # Find samples of class i and append to groups
         samples = list(np.argwhere([j == i for j in dense_labels]).ravel())
         groups.append(samples)
     self.properties[self.GROUPS] = groups
Ejemplo n.º 9
0
    def set_data(self, data_set):
        if data_set is not None:
            # If a path is given
            if isinstance(data_set, six.string_types):
                data_set = DataSet.load(data_set)
            if not isinstance(data_set, DataSet):
                raise TypeError(
                    '!! Data set must be an instance of tframe DataSet')
            if not data_set.is_regular_array:
                data_set = data_set.stack
            self.data_set = data_set
            self._set_cursor(0)
            if self.data_set.targets is not None:
                self.labels = misc.convert_to_dense_labels(
                    self.data_set.targets)
            console.show_status('Data set set to ImageViewer')

            # Refresh image viewer
            self.refresh()
Ejemplo n.º 10
0
 def _update_details(self):
     assert isinstance(self.data_set, DataSet)
     # Try to get predictions
     try:
         predictions = self.data_set[pedia.predictions]
     except:
         predictions = None
     if predictions is not None:
         predictions = misc.convert_to_dense_labels(predictions)
         cursor = self._cursor
         prediction = predictions[cursor]
         if isinstance(prediction, np.ndarray): prediction = prediction[0]
         info = 'Prediction: {}'.format(self._get_class_string(prediction))
         color = 'black'
         if self.labels is not None:
             color = 'green' if self.labels[cursor] == prediction else 'red'
         self.details.config(text=info, fg=color)
     else:
         self.details.config(text='No details', fg='grey')
Ejemplo n.º 11
0
    def result_analyze(prods, dataset, audio_length=32000):
        """the analysis of the classifiied results"""
        # the input dataset should be the raw data(list, unequal length)
        # the input dataset should have the properties NUM_CLASSES
        assert isinstance(dataset, DataSet)
        dense_prods = prods
        dense_labels = misc.convert_to_dense_labels(dataset.targets)
        assert len(dense_prods) == len(dense_labels)

        false_indices = list(np.argwhere(dense_prods != dense_labels).ravel())
        correct_indices = [
            i for i in range(len(dense_prods)) if i not in false_indices
        ]
        false_samples = [dataset.features[i] for i in false_indices]
        correct_samples = [dataset.features[i] for i in correct_indices]
        correct_labels = dense_labels[correct_indices]
        false_labels = dense_labels[false_indices]

        # analysis in aspects:
        # the false samples distribution via classes, and the false class pointed to
        # the distribution of the length of the false samples
        false_class_num = []
        for i in range(dataset.num_classes):
            false_class_num.append(
                len([
                    false_labels[j] for j in range(len(false_labels))
                    if false_labels[j] == i
                ]))

        false_class_short_num = [
            len(false_samples[i]) for i in range(len(false_samples))
            if len(false_samples[i]) < audio_length
        ]
        console.supplement(
            '.. Total Num: {}, Less than audio length num {}'.format(
                len(false_samples), len(false_class_short_num)))

        plot_bar_diagram(false_class_num,
                         title='False class num of the categories')
Ejemplo n.º 12
0
    def set_data(self, data_set):
        if data_set is not None:
            # If a path is given
            if isinstance(data_set, six.string_types):
                data_set = DataSet.load(data_set)
            if not isinstance(data_set, DataSet):
                raise TypeError(
                    '!! Data set must be an instance of tframe DataSet')
            if not data_set.is_regular_array:
                data_set = data_set.stack
            self.data_set = data_set
            self._set_cursor(0)
            # For DataSet like MNIST and CIFAR-XXX
            if self.data_set.targets is not None:
                if len(self.data_set.targets.shape) == 1:
                    self.labels = self.data_set.targets
                elif len(self.data_set.targets.shape) == 2:
                    self.labels = misc.convert_to_dense_labels(
                        self.data_set.targets).flatten()
            # Consider DataSets in image segmentation tasks
            interleave_key = self.kwargs.get('interleave_key', None)
            if interleave_key is not None:
                if not interleave_key in data_set.data_dict.keys():
                    raise KeyError('!! Can not find `{}` in DataSet'.format(
                        interleave_key))
                else:
                    shadows = getattr(data_set, interleave_key)
                    features = data_set.features
                    assert shadows.shape == features.shape
                    images = []
                    for x, y, in zip(features, shadows):
                        images.append(np.reshape(x, (1, ) + x.shape))
                        images.append(np.reshape(y, (1, ) + y.shape))
                    data_set.features = np.concatenate(images, axis=0)

            console.show_status('Data set set to ImageViewer')

            # Refresh image viewer
            self.refresh()
Ejemplo n.º 13
0
def _brutal_chop(signal_set, size):
    assert isinstance(signal_set, SignalSet) and signal_set.size == 1
    checker.check_positive_integer(size)

    features, one_hots, dense_labels = [], [], []
    labels = signal_set.data_dict.get(pedia.labels, None)
    for i, s in enumerate(signal_set.signals):
        assert isinstance(s, Signal)
        if len(s) < size:
            s = np.pad(s, (0, size - len(s)),
                       mode='constant',
                       constant_values=0)
        num_steps = len(s) // size
        features.append(np.reshape(s[:num_steps * size], (num_steps, size)))
        if labels is not None:
            one_hots.append(np.tile(labels[i], (num_steps, 1)))
            dense_label = misc.convert_to_dense_labels(labels[i])
            dense_labels.append(dense_label)

    # Set features and targets to signal set
    signal_set.features = features
    if labels is not None:
        signal_set.targets = one_hots
        signal_set.data_dict['dense_labels'] = dense_labels
Ejemplo n.º 14
0
def get_scores(prediction, labels):
    label = convert_to_dense_labels(labels)
    scores = mapk(label, prediction)
    return scores