def execute(self, slot, subindex, roi, result): all_features_and_labels_df = None for lane_index, (labels_dict_slot, features_slot) in \ enumerate( zip(self.EdgeLabelsDict, self.EdgeFeaturesDataFrame) ): logger.info( "Retrieving features for lane {}...".format(lane_index)) labels_dict = labels_dict_slot.value.copy( ) # Copy now to avoid threading issues. if not labels_dict: continue sp_columns = np.array(labels_dict.keys()) edge_features_df = features_slot.value assert list(edge_features_df.columns[0:2]) == ['sp1', 'sp2'] labels_df = pd.DataFrame(sp_columns, columns=['sp1', 'sp2']) labels_df['label'] = labels_dict.values() # Drop zero labels labels_df = labels_df[labels_df['label'] != 0] # Merge in features features_and_labels_df = pd.merge(edge_features_df, labels_df, how='right', on=['sp1', 'sp2']) if all_features_and_labels_df is not None: all_features_and_labels_df = all_features_and_labels_df.append( features_and_labels_df) else: all_features_and_labels_df = features_and_labels_df if all_features_and_labels_df is None: # No labels yet. result[0] = None return assert list(all_features_and_labels_df.columns[0:2]) == ['sp1', 'sp2'] assert all_features_and_labels_df.columns[-1] == 'label' feature_matrix = all_features_and_labels_df.iloc[:, 2: -1].values # Omit 'sp1', 'sp2', and 'label' labels = all_features_and_labels_df.iloc[:, -1].values logger.info("Training classifier with {} labels...".format( len(labels))) # TODO: Allow factory to be configured via an input slot classifier_factory = ParallelVigraRfLazyflowClassifierFactory() classifier = classifier_factory.create_and_train( feature_matrix, labels, feature_names=all_features_and_labels_df.columns[2:-1].values) assert set(classifier.known_classes).issubset(set([1, 2])) result[0] = classifier
def test_basic(self): # Initialize factory factory = ParallelVigraRfLazyflowClassifierFactory(10) # Train classifier = factory.create_and_train(self.training_feature_matrix, self.training_labels) assert isinstance(classifier, ParallelVigraRfLazyflowClassifier) assert list(classifier.known_classes) == [1,2] # Predict probabilities = classifier.predict_probabilities( self.prediction_data ) assert probabilities.shape == (4,2) assert probabilities.dtype == numpy.float32 assert (0 <= probabilities).all() and (probabilities <= 1.0).all() assert (numpy.argmax(probabilities, axis=-1)+1 == self.expected_classes).all()
def execute(self, slot, subindex, roi, result): all_features_and_labels_df = None for lane_index, (labels_dict_slot, features_slot) in \ enumerate( zip(self.EdgeLabelsDict, self.EdgeFeaturesDataFrame) ): logger.info("Retrieving features for lane {}...".format(lane_index)) labels_dict = labels_dict_slot.value.copy() # Copy now to avoid threading issues. if not labels_dict: continue sp_columns = np.array(labels_dict.keys()) edge_features_df = features_slot.value assert list(edge_features_df.columns[0:2]) == ['sp1', 'sp2'] labels_df = pd.DataFrame(sp_columns, columns=['sp1', 'sp2']) labels_df['label'] = labels_dict.values() # Drop zero labels labels_df = labels_df[labels_df['label'] != 0] # Merge in features features_and_labels_df = pd.merge(edge_features_df, labels_df, how='right', on=['sp1', 'sp2']) if all_features_and_labels_df is not None: all_features_and_labels_df = all_features_and_labels_df.append(features_and_labels_df) else: all_features_and_labels_df = features_and_labels_df if all_features_and_labels_df is None: # No labels yet. result[0] = None return assert list(all_features_and_labels_df.columns[0:2]) == ['sp1', 'sp2'] assert all_features_and_labels_df.columns[-1] == 'label' feature_matrix = all_features_and_labels_df.iloc[:, 2:-1].values # Omit 'sp1', 'sp2', and 'label' labels = all_features_and_labels_df.iloc[:, -1].values logger.info("Training classifier with {} labels...".format( len(labels) )) # TODO: Allow factory to be configured via an input slot classifier_factory = ParallelVigraRfLazyflowClassifierFactory() classifier = classifier_factory.create_and_train( feature_matrix, labels, feature_names=all_features_and_labels_df.columns[2:-1].values ) assert set(classifier.known_classes).issubset(set([1,2])) result[0] = classifier
def test_basic(self): # Initialize factory factory = ParallelVigraRfLazyflowClassifierFactory(10) # Train classifier = factory.create_and_train(self.training_feature_matrix, self.training_labels) assert isinstance(classifier, ParallelVigraRfLazyflowClassifier) assert list(classifier.known_classes) == [1, 2] # Predict probabilities = classifier.predict_probabilities(self.prediction_data) assert probabilities.shape == (4, 2) assert probabilities.dtype == numpy.float32 assert (0 <= probabilities).all() and (probabilities <= 1.0).all() assert (numpy.argmax(probabilities, axis=-1) + 1 == self.expected_classes).all()