Exemple #1
0
 def get_agg_clusters(index: TensorList, n_cluster: int) -> AgglomerativeClustering:
     '''
     Runs agglomerative clustering on the `index` TensorList passed in
     '''
     index_np: np.ndarray = index.numpy()
     model = AgglomerativeClustering(linkage="average", affinity="cosine", n_clusters=n_cluster, compute_full_tree=True)
     model.fit(index_np)
     return model
Exemple #2
0
 def get_kmeans_clusters(cls, index: TensorList, n_cluster: int) -> KMeans:
     '''
     Runs kmeans clustering on the `index` TensorList passed in
     '''
     index_np: np.ndarray = index.numpy()
     model = KMeans(n_clusters=n_cluster, random_state=0)
     model.fit(index_np)
     return model
Exemple #3
0
    def test_numpy(self):
        def _create_list():
            return [
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
                torch.zeros(1, TENSOR_EMBEDDING_DIM),
            ]

        tl = TensorList(tensor_list=_create_list())
        assert type(tl.numpy()) == np.ndarray
Exemple #4
0
class LinearWindowFunction(WindowFunction):
    def __init__(
        self,
        positive_label: str,
        context_window: int,
        feature_extractor: FeatureExtractor,
        feature_summarizer: Callable[[List[Any]], torch.Tensor] = FeatureCollator.sum,
        linear_type: LinearType = LinearType.SVM_LINEAR,
        use_batch: bool = True,
        threshold: Optional[float] = 0.7,
        **kwargs,
    ):
        self.positive_label = positive_label
        self.feature_extractor = feature_extractor
        self.context_window = context_window
        super(LinearWindowFunction, self).__init__(
            positive_label,
            feature_extractor,
            context_window,
            use_batch=use_batch,
            threshold=threshold,
            **kwargs,
        )

        self.dictionary = TensorList()
        self.labels = TensorList()
        self.feature_summarizer = feature_summarizer
        self.linear_model = construct_linear_classifier(linear_type=linear_type)
    
    @log_time(function_prefix='linear_window_train')
    def _train_model(self, training_data: List[Tuple[List[str], List[Any], str]]):
        for i, (sentence_window, feature_window, label) in enumerate(training_data):
            window_summary = self.feature_summarizer(feature_window)
            self.dictionary.append(window_summary)
            self.labels.append(torch.Tensor([label_index(label)]))
        x_train = self.dictionary.numpy()
        y_train = self.labels.numpy()
        x_train, y_train = balance_dataset(x_train, y_train)
        self.linear_model.fit(x_train, y_train)

    def _predict(self, features: List[torch.Tensor]) -> int:
        feature_summary = self.feature_summarizer(features).numpy()
        label: np.ndarray = self.linear_model.predict(feature_summary)
        return label.item()
    
    def _predict_probabilities(self, features: List[torch.Tensor]) -> float:
        feature_summary = self.feature_summarizer(features).numpy()
        confidence: np.ndarray = self.linear_model.decision_function(feature_summary)
        return confidence.item()

    @log_time(function_prefix='linear_window_snorkel_predict')
    def _batch_probabilities(self, features: List[List[torch.Tensor]]) -> List[float]:
        feature_summaries: List[np.ndarray] = list(map(lambda f: self.feature_summarizer(f).numpy(), features))
        batch_np: np.ndarray = TensorList(feature_summaries).numpy()
        confidence_batch: np.ndarray = self.linear_model.decision_function(batch_np)
        return list(map(lambda conf: conf.item(), TensorList([confidence_batch]).to_list()))

    @log_time(function_prefix='linear_window_predict')
    def _batch_predict(self, features: List[List[torch.Tensor]]) -> List[int]:
        feature_summaries: List[np.ndarray] = list(map(lambda f: self.feature_summarizer(f).numpy(), features))
        batch_np: np.ndarray = TensorList(feature_summaries).numpy()
        label_batch: np.ndarray = self.linear_model.predict(batch_np)
        return list(map(lambda label: label.item(), TensorList([label_batch]).to_list()))

    @overrides
    def __str__(self):
        return f'LinearWindowFunction({self.context_window})({self.feature_extractor})'