Example #1
0
    def execute(self, slot, subindex, roi, result):
        all_features_and_labels_df = None

        for lane_index, (labels_dict_slot, features_slot) in \
                enumerate( zip(self.EdgeLabelsDict, self.EdgeFeaturesDataFrame) ):
            logger.info(
                "Retrieving features for lane {}...".format(lane_index))

            labels_dict = labels_dict_slot.value.copy(
            )  # Copy now to avoid threading issues.
            if not labels_dict:
                continue

            sp_columns = np.array(labels_dict.keys())
            edge_features_df = features_slot.value
            assert list(edge_features_df.columns[0:2]) == ['sp1', 'sp2']

            labels_df = pd.DataFrame(sp_columns, columns=['sp1', 'sp2'])
            labels_df['label'] = labels_dict.values()

            # Drop zero labels
            labels_df = labels_df[labels_df['label'] != 0]

            # Merge in features
            features_and_labels_df = pd.merge(edge_features_df,
                                              labels_df,
                                              how='right',
                                              on=['sp1', 'sp2'])
            if all_features_and_labels_df is not None:
                all_features_and_labels_df = all_features_and_labels_df.append(
                    features_and_labels_df)
            else:
                all_features_and_labels_df = features_and_labels_df

        if all_features_and_labels_df is None:
            # No labels yet.
            result[0] = None
            return

        assert list(all_features_and_labels_df.columns[0:2]) == ['sp1', 'sp2']
        assert all_features_and_labels_df.columns[-1] == 'label'

        feature_matrix = all_features_and_labels_df.iloc[:, 2:
                                                         -1].values  # Omit 'sp1', 'sp2', and 'label'
        labels = all_features_and_labels_df.iloc[:, -1].values

        logger.info("Training classifier with {} labels...".format(
            len(labels)))
        # TODO: Allow factory to be configured via an input slot
        classifier_factory = ParallelVigraRfLazyflowClassifierFactory()
        classifier = classifier_factory.create_and_train(
            feature_matrix,
            labels,
            feature_names=all_features_and_labels_df.columns[2:-1].values)
        assert set(classifier.known_classes).issubset(set([1, 2]))
        result[0] = classifier
    def test_basic(self):
        # Initialize factory
        factory = ParallelVigraRfLazyflowClassifierFactory(10)
        
        # Train
        classifier = factory.create_and_train(self.training_feature_matrix, self.training_labels)
        assert isinstance(classifier, ParallelVigraRfLazyflowClassifier)
        assert list(classifier.known_classes) == [1,2]

        # Predict        
        probabilities = classifier.predict_probabilities( self.prediction_data )
        assert probabilities.shape == (4,2)
        assert probabilities.dtype == numpy.float32
        assert (0 <= probabilities).all() and (probabilities <= 1.0).all()
        assert (numpy.argmax(probabilities, axis=-1)+1 == self.expected_classes).all()
    def execute(self, slot, subindex, roi, result):
        all_features_and_labels_df = None

        for lane_index, (labels_dict_slot, features_slot) in \
                enumerate( zip(self.EdgeLabelsDict, self.EdgeFeaturesDataFrame) ):
            logger.info("Retrieving features for lane {}...".format(lane_index))

            labels_dict = labels_dict_slot.value.copy() # Copy now to avoid threading issues.
            if not labels_dict:
                continue

            sp_columns = np.array(labels_dict.keys())
            edge_features_df = features_slot.value
            assert list(edge_features_df.columns[0:2]) == ['sp1', 'sp2']

            labels_df = pd.DataFrame(sp_columns, columns=['sp1', 'sp2'])
            labels_df['label'] = labels_dict.values()

            # Drop zero labels
            labels_df = labels_df[labels_df['label'] != 0]
            
            # Merge in features
            features_and_labels_df = pd.merge(edge_features_df, labels_df, how='right', on=['sp1', 'sp2'])
            if all_features_and_labels_df is not None:
                all_features_and_labels_df = all_features_and_labels_df.append(features_and_labels_df)
            else:
                all_features_and_labels_df = features_and_labels_df

        if all_features_and_labels_df is None:
            # No labels yet.
            result[0] = None
            return

        assert list(all_features_and_labels_df.columns[0:2]) == ['sp1', 'sp2']
        assert all_features_and_labels_df.columns[-1] == 'label'

        feature_matrix = all_features_and_labels_df.iloc[:, 2:-1].values # Omit 'sp1', 'sp2', and 'label'
        labels = all_features_and_labels_df.iloc[:, -1].values

        logger.info("Training classifier with {} labels...".format( len(labels) ))
        # TODO: Allow factory to be configured via an input slot
        classifier_factory = ParallelVigraRfLazyflowClassifierFactory()
        classifier = classifier_factory.create_and_train( feature_matrix,
                                                          labels,
                                                          feature_names=all_features_and_labels_df.columns[2:-1].values )
        assert set(classifier.known_classes).issubset(set([1,2]))
        result[0] = classifier
    def test_basic(self):
        # Initialize factory
        factory = ParallelVigraRfLazyflowClassifierFactory(10)

        # Train
        classifier = factory.create_and_train(self.training_feature_matrix,
                                              self.training_labels)
        assert isinstance(classifier, ParallelVigraRfLazyflowClassifier)
        assert list(classifier.known_classes) == [1, 2]

        # Predict
        probabilities = classifier.predict_probabilities(self.prediction_data)
        assert probabilities.shape == (4, 2)
        assert probabilities.dtype == numpy.float32
        assert (0 <= probabilities).all() and (probabilities <= 1.0).all()
        assert (numpy.argmax(probabilities, axis=-1) +
                1 == self.expected_classes).all()