Beispiel #1
0
    def fit(self, graphs):
        """fit."""
        try:

            # make matrix
            data_matrix = vectorize(graphs,
                                    vectorizer=self.vectorizer,
                                    **self.params_vectorize)
            data_matrix_neg = data_matrix.multiply(-1)
            data_matrix_both = vstack([data_matrix, data_matrix_neg], format="csr")
            # make labels
            length = data_matrix.shape[0]
            y = [-1] * length + [1] * length
            y = np.ravel(y)
            # fit:
            estimator = self.program.fit(data_matrix_both, y)
            # moving intercept:

            scores = [estimator.decision_function(sparse_vector)[0]
                      for sparse_vector in data_matrix]
            scores_sorted = sorted(scores)
            pivot = scores_sorted[int(len(scores_sorted) * self.nu)]
            estimator.intercept_ -= pivot

            # calibration:
            data_y = np.asarray([1 if score >= pivot else -1 for score in scores])
            self.program = CalibratedClassifierCV(estimator, method='sigmoid')
            self.program.fit(data_matrix, data_y)
            return self

        except Exception as e:
            logger.debug('Failed iteration. Reason: %s' % e)
            logger.debug('Exception', exc_info=True)
Beispiel #2
0
 def fit(self, graphs):
     """fit."""
     try:
         graphs, graphs_ = tee(graphs)
         data_matrix = vectorize(graphs_,
                                 vectorizer=self.vectorizer,
                                 **self.params_vectorize)
         y = self._extract_targets(graphs)
         # manage case for single class learning
         if len(set(y)) == 1:
             # make negative data matrix
             negative_data_matrix = data_matrix.multiply(-1)
             # make targets
             y = list(y)
             y_neg = [-1] * len(y)
             # concatenate elements
             data_matrix = vstack([data_matrix, negative_data_matrix],
                                  format="csr")
             y = y + y_neg
             y = np.ravel(y)
         self.program = self.program.fit(data_matrix, y)
         return self
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
Beispiel #3
0
 def setup(self, known_graphs=None, candidate_graphs=None):
     """Setup."""
     # compute the nearest neighbors for the 'proposal_graphs' w.r.t. the
     # known graphs in the list 'known_graphs'
     parameters_priors = dict(n_neighbors=self.n_neighbors)
     parameters_priors.update(dict(vectorizer__complexity=self.complexity,
                                   vectorize__n_jobs=-1,
                                   vectorize__fit_flag=False,
                                   vectorize__n_blocks=5,
                                   vectorize__block_size=100))
     fit_wrapped_knn_predictor_known = \
         model(known_graphs,
               program=KNNWrapper(program=NearestNeighbors()),
               parameters_priors=parameters_priors)
     # compute distances of candidate_graphs to known_graphs
     knn_candidate_graphs = predict(candidate_graphs,
                                    program=fit_wrapped_knn_predictor_known)
     knn_candidate_graphs = list(knn_candidate_graphs)
     self.distances_to_known_graphs = []
     for knn_candidate_graph in knn_candidate_graphs:
         distances = knn_candidate_graph.graph['distances']
         self.distances_to_known_graphs.append(distances)
     # compute candidate_graphs encodings
     self.candidate_graphs_data_matrix = \
         vectorize(candidate_graphs,
                   vectorizer=Vectorizer(complexity=self.complexity),
                   block_size=400, n_jobs=-1)
Beispiel #4
0
 def fit(self, graphs):
     """fit."""
     try:
         graphs, graphs_ = tee(graphs)
         data_matrix = vectorize(graphs_,
                                 vectorizer=self.vectorizer,
                                 **self.params_vectorize)
         y = self._extract_targets(graphs)
         # manage case for single class learning
         if len(set(y)) == 1:
             # make negative data matrix
             negative_data_matrix = data_matrix.multiply(-1)
             # make targets
             y = list(y)
             y_neg = [-1] * len(y)
             # concatenate elements
             data_matrix = vstack(
                 [data_matrix, negative_data_matrix], format="csr")
             y = y + y_neg
             y = np.ravel(y)
         self.program = self.program.fit(data_matrix, y)
         return self
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
Beispiel #5
0
 def setup(self, known_graphs=None, candidate_graphs=None):
     """Setup."""
     # compute the nearest neighbors for the 'proposal_graphs' w.r.t. the
     # known graphs in the list 'known_graphs'
     parameters_priors = dict(n_neighbors=self.n_neighbors)
     parameters_priors.update(dict(vectorizer__complexity=self.complexity,
                                   vectorize__n_jobs=-1,
                                   vectorize__fit_flag=False,
                                   vectorize__n_blocks=5,
                                   vectorize__block_size=100))
     fit_wrapped_knn_predictor_known = \
         model(known_graphs,
               program=KNNWrapper(program=NearestNeighbors()),
               parameters_priors=parameters_priors)
     # compute distances of candidate_graphs to known_graphs
     knn_candidate_graphs = predict(candidate_graphs,
                                    program=fit_wrapped_knn_predictor_known)
     knn_candidate_graphs = list(knn_candidate_graphs)
     self.distances_to_known_graphs = []
     for knn_candidate_graph in knn_candidate_graphs:
         distances = knn_candidate_graph.graph['distances']
         self.distances_to_known_graphs.append(distances)
     # compute candidate_graphs encodings
     self.candidate_graphs_data_matrix = \
         vectorize(candidate_graphs,
                   vectorizer=Vectorizer(complexity=self.complexity),
                   block_size=400, n_jobs=-1)
 def _cluster(self, seqs, clustering_algorithm=None):
     data_matrix = vectorize(seqs,
                             vectorizer=self.seq_vectorizer,
                             n_blocks=self.n_blocks,
                             block_size=self.block_size,
                             n_jobs=self.n_jobs)
     predictions = clustering_algorithm.fit_predict(data_matrix)
     # collect instance ids per cluster id
     for i in range(len(predictions)):
         self.clusters[predictions[i]] += [i]
Beispiel #7
0
 def _cluster(self, seqs, clustering_algorithm=None):
     data_matrix = vectorize(seqs,
                             vectorizer=self.seq_vectorizer,
                             n_blocks=self.n_blocks,
                             block_size=self.block_size,
                             n_jobs=self.n_jobs)
     predictions = clustering_algorithm.fit_predict(data_matrix)
     # collect instance ids per cluster id
     for i in range(len(predictions)):
         self.clusters[predictions[i]] += [i]
Beispiel #8
0
 def fit(self, graphs):
     """fit."""
     try:
         self.graphs = list(graphs)
         data_matrix = vectorize(self.graphs, vectorizer=self.vectorizer, **self.params_vectorize)
         self.program = self.program.fit(data_matrix)
         return self
     except Exception as e:
         logger.debug("Failed iteration. Reason: %s" % e)
         logger.debug("Exception", exc_info=True)
Beispiel #9
0
 def fit(self, graphs):
     """fit."""
     try:
         graphs, graphs_ = tee(graphs)
         data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize)
         y = self._extract_targets(graphs)
         self.program = self.program.fit(data_matrix, y)
         return self
     except Exception as e:
         logger.debug("Failed iteration. Reason: %s" % e)
         logger.debug("Exception", exc_info=True)
Beispiel #10
0
 def fit_predict(self, graphs):
     """fit_predict."""
     try:
         data_matrix = vectorize(graphs,
                                 vectorizer=self.vectorizer,
                                 **self.params_vectorize)
         predictions = self.program.fit_predict(data_matrix)
         return predictions
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
Beispiel #11
0
 def fit(self, graphs):
     """fit."""
     try:
         self.graphs = list(graphs)
         data_matrix = vectorize(self.graphs,
                                 vectorizer=self.vectorizer,
                                 **self.params_vectorize)
         self.program = self.program.fit(data_matrix)
         return self
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
Beispiel #12
0
 def decision_function(self, graphs):
     """decision_function."""
     try:
         graphs, graphs_ = tee(graphs)
         data_matrix = vectorize(graphs_,
                                 vectorizer=self.vectorizer,
                                 **self.params_vectorize)
         scores = self.program.decision_function(data_matrix)
         return scores
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
Beispiel #13
0
 def decision_function(self, graphs):
     """decision_function."""
     try:
         graphs, graphs_ = tee(graphs)
         data_matrix = vectorize(graphs_,
                                 vectorizer=self.vectorizer,
                                 **self.params_vectorize)
         scores = self.program.decision_function(data_matrix)
         return scores
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
Beispiel #14
0
 def predict(self, graphs):
     """predict."""
     try:
         graphs, graphs_ = tee(graphs)
         data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize)
         predictions = self.program.predict(data_matrix)
         for prediction, graph in izip(predictions, graphs):
             graph.graph["prediction"] = prediction
             graph.graph["score"] = prediction
             yield graph
     except Exception as e:
         logger.debug("Failed iteration. Reason: %s" % e)
         logger.debug("Exception", exc_info=True)
Beispiel #15
0
 def fit(self, graphs):
     """fit."""
     try:
         graphs, graphs_ = tee(graphs)
         data_matrix = vectorize(graphs_,
                                 vectorizer=self.vectorizer,
                                 **self.params_vectorize)
         y = self._extract_targets(graphs)
         self.program = self.program.fit(data_matrix, y)
         return self
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
Beispiel #16
0
 def _data_matrix(self, iterable, fit_vectorizer=False):
     assert(is_iterable(iterable)), 'Not iterable'
     graphs = mp_pre_process(iterable,
                             pre_processor=self.pre_processor,
                             pre_processor_args=self.pre_processor_args,
                             n_blocks=self.pre_processor_n_blocks,
                             block_size=self.pre_processor_block_size,
                             n_jobs=self.pre_processor_n_jobs)
     graphs, graphs_ = tee(graphs)
     self.vectorizer.set_params(**self.vectorizer_args)
     if fit_vectorizer:
         self.vectorizer.fit(graphs_)
     X = vectorize(graphs, vectorizer=self.vectorizer, n_jobs=self.n_jobs, n_blocks=self.n_blocks)
     return X
Beispiel #17
0
 def predict(self, graphs):
     """predict."""
     try:
         graphs, graphs_ = tee(graphs)
         data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize)
         distances, indices = self.program.kneighbors(data_matrix)
         for knn_ids, graph in izip(indices, graphs):
             neighbor_graphs = []
             for knn_id in knn_ids:
                 neighbor_graphs.append(self.graphs[knn_id])
             graph.graph["neighbors"] = neighbor_graphs
             yield graph
     except Exception as e:
         logger.debug("Failed iteration. Reason: %s" % e)
         logger.debug("Exception", exc_info=True)
Beispiel #18
0
 def predict(self, graphs):
     """predict."""
     try:
         graphs, graphs_ = tee(graphs)
         data_matrix = vectorize(graphs_,
                                 vectorizer=self.vectorizer,
                                 **self.params_vectorize)
         predictions = self.program.predict(data_matrix)
         for prediction, graph in izip(predictions, graphs):
             graph.graph['prediction'] = prediction
             graph.graph['score'] = prediction
             yield graph
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
    def seq_to_data_matrix(self, sequences=None):               
                
        # Transform sequences to matrix
        graphs = mp_pre_process(sequences, pre_processor=self.pre_processor, pre_processor_args={}, n_jobs=-1)	        
                      
        seq_data_matrix = vectorize(graphs, vectorizer=self.vectorizer, n_jobs=-1)                

        # Densify the matrix
        seq_data_matrx = seq_data_matrix.toarray()

        # Standardize the matrix
        self.scale.fit(seq_data_matrx)
        std_seq_data_matrx = self.scale.transform(seq_data_matrx)        

        return std_seq_data_matrx
Beispiel #20
0
 def predict(self, graphs):
     """predict."""
     try:
         graphs, graphs_ = tee(graphs)
         data_matrix = vectorize(graphs_,
                                 vectorizer=self.vectorizer,
                                 **self.params_vectorize)
         predictions = self.program.predict(data_matrix)
         scores = self.program.decision_function(data_matrix)
         for score, prediction, graph in izip(scores, predictions, graphs):
             graph.graph['prediction'] = prediction
             graph.graph['score'] = score
             yield graph
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
Beispiel #21
0
 def predict(self, graphs):
     """predict."""
     try:
         graphs, graphs_ = tee(graphs)
         data_matrix = vectorize(graphs_,
                                 vectorizer=self.vectorizer,
                                 **self.params_vectorize)
         distances, indices = self.program.kneighbors(data_matrix)
         for knn_ids, graph in izip(indices, graphs):
             neighbor_graphs = []
             for knn_id in knn_ids:
                 neighbor_graphs.append(self.graphs[knn_id])
             graph.graph['neighbors'] = neighbor_graphs
             yield graph
     except Exception as e:
         logger.debug('Failed iteration. Reason: %s' % e)
         logger.debug('Exception', exc_info=True)
Beispiel #22
0
    def predict(self, graphs):
        """predict.
        only overwrite is this:
        decision_function -> predict_proba

        graph.graph['score'] will be a (len 2) list
        """
        try:
            graphs, graphs_ = tee(graphs)
            data_matrix = vectorize(graphs_,
                                    vectorizer=self.vectorizer,
                                    **self.params_vectorize)
            predictions = self.program.predict(data_matrix)
            # scores = self.program.decision_function(data_matrix)
            scores = self.program.predict_proba(data_matrix)
            for score, prediction, graph in izip(scores, predictions, graphs):
                graph.graph['prediction'] = prediction
                graph.graph['score'] = score
                yield graph
        except Exception as e:
            logger.debug('Failed iteration. Reason: %s' % e)
            logger.debug('Exception', exc_info=True)
Beispiel #23
0
    def fit(self, graphs):
        """fit."""
        try:

            # make matrix
            data_matrix = vectorize(graphs,
                                    vectorizer=self.vectorizer,
                                    **self.params_vectorize)
            data_matrix_neg = data_matrix.multiply(-1)
            data_matrix_both = vstack([data_matrix, data_matrix_neg],
                                      format="csr")
            # make labels
            length = data_matrix.shape[0]
            y = [-1] * length + [1] * length
            y = np.ravel(y)
            # fit:
            estimator = self.program.fit(data_matrix_both, y)
            # moving intercept:

            scores = [
                estimator.decision_function(sparse_vector)[0]
                for sparse_vector in data_matrix
            ]
            scores_sorted = sorted(scores)
            pivot = scores_sorted[int(len(scores_sorted) * self.nu)]
            estimator.intercept_ -= pivot

            # calibration:
            data_y = np.asarray(
                [1 if score >= pivot else -1 for score in scores])
            self.program = CalibratedClassifierCV(estimator, method='sigmoid')
            self.program.fit(data_matrix, data_y)
            return self

        except Exception as e:
            logger.debug('Failed iteration. Reason: %s' % e)
            logger.debug('Exception', exc_info=True)