def fit(self, graphs): """fit.""" try: # make matrix data_matrix = vectorize(graphs, vectorizer=self.vectorizer, **self.params_vectorize) data_matrix_neg = data_matrix.multiply(-1) data_matrix_both = vstack([data_matrix, data_matrix_neg], format="csr") # make labels length = data_matrix.shape[0] y = [-1] * length + [1] * length y = np.ravel(y) # fit: estimator = self.program.fit(data_matrix_both, y) # moving intercept: scores = [estimator.decision_function(sparse_vector)[0] for sparse_vector in data_matrix] scores_sorted = sorted(scores) pivot = scores_sorted[int(len(scores_sorted) * self.nu)] estimator.intercept_ -= pivot # calibration: data_y = np.asarray([1 if score >= pivot else -1 for score in scores]) self.program = CalibratedClassifierCV(estimator, method='sigmoid') self.program.fit(data_matrix, data_y) return self except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def fit(self, graphs): """fit.""" try: graphs, graphs_ = tee(graphs) data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize) y = self._extract_targets(graphs) # manage case for single class learning if len(set(y)) == 1: # make negative data matrix negative_data_matrix = data_matrix.multiply(-1) # make targets y = list(y) y_neg = [-1] * len(y) # concatenate elements data_matrix = vstack([data_matrix, negative_data_matrix], format="csr") y = y + y_neg y = np.ravel(y) self.program = self.program.fit(data_matrix, y) return self except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def setup(self, known_graphs=None, candidate_graphs=None): """Setup.""" # compute the nearest neighbors for the 'proposal_graphs' w.r.t. the # known graphs in the list 'known_graphs' parameters_priors = dict(n_neighbors=self.n_neighbors) parameters_priors.update(dict(vectorizer__complexity=self.complexity, vectorize__n_jobs=-1, vectorize__fit_flag=False, vectorize__n_blocks=5, vectorize__block_size=100)) fit_wrapped_knn_predictor_known = \ model(known_graphs, program=KNNWrapper(program=NearestNeighbors()), parameters_priors=parameters_priors) # compute distances of candidate_graphs to known_graphs knn_candidate_graphs = predict(candidate_graphs, program=fit_wrapped_knn_predictor_known) knn_candidate_graphs = list(knn_candidate_graphs) self.distances_to_known_graphs = [] for knn_candidate_graph in knn_candidate_graphs: distances = knn_candidate_graph.graph['distances'] self.distances_to_known_graphs.append(distances) # compute candidate_graphs encodings self.candidate_graphs_data_matrix = \ vectorize(candidate_graphs, vectorizer=Vectorizer(complexity=self.complexity), block_size=400, n_jobs=-1)
def fit(self, graphs): """fit.""" try: graphs, graphs_ = tee(graphs) data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize) y = self._extract_targets(graphs) # manage case for single class learning if len(set(y)) == 1: # make negative data matrix negative_data_matrix = data_matrix.multiply(-1) # make targets y = list(y) y_neg = [-1] * len(y) # concatenate elements data_matrix = vstack( [data_matrix, negative_data_matrix], format="csr") y = y + y_neg y = np.ravel(y) self.program = self.program.fit(data_matrix, y) return self except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def _cluster(self, seqs, clustering_algorithm=None): data_matrix = vectorize(seqs, vectorizer=self.seq_vectorizer, n_blocks=self.n_blocks, block_size=self.block_size, n_jobs=self.n_jobs) predictions = clustering_algorithm.fit_predict(data_matrix) # collect instance ids per cluster id for i in range(len(predictions)): self.clusters[predictions[i]] += [i]
def fit(self, graphs): """fit.""" try: self.graphs = list(graphs) data_matrix = vectorize(self.graphs, vectorizer=self.vectorizer, **self.params_vectorize) self.program = self.program.fit(data_matrix) return self except Exception as e: logger.debug("Failed iteration. Reason: %s" % e) logger.debug("Exception", exc_info=True)
def fit(self, graphs): """fit.""" try: graphs, graphs_ = tee(graphs) data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize) y = self._extract_targets(graphs) self.program = self.program.fit(data_matrix, y) return self except Exception as e: logger.debug("Failed iteration. Reason: %s" % e) logger.debug("Exception", exc_info=True)
def fit_predict(self, graphs): """fit_predict.""" try: data_matrix = vectorize(graphs, vectorizer=self.vectorizer, **self.params_vectorize) predictions = self.program.fit_predict(data_matrix) return predictions except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def fit(self, graphs): """fit.""" try: self.graphs = list(graphs) data_matrix = vectorize(self.graphs, vectorizer=self.vectorizer, **self.params_vectorize) self.program = self.program.fit(data_matrix) return self except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def decision_function(self, graphs): """decision_function.""" try: graphs, graphs_ = tee(graphs) data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize) scores = self.program.decision_function(data_matrix) return scores except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def predict(self, graphs): """predict.""" try: graphs, graphs_ = tee(graphs) data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize) predictions = self.program.predict(data_matrix) for prediction, graph in izip(predictions, graphs): graph.graph["prediction"] = prediction graph.graph["score"] = prediction yield graph except Exception as e: logger.debug("Failed iteration. Reason: %s" % e) logger.debug("Exception", exc_info=True)
def fit(self, graphs): """fit.""" try: graphs, graphs_ = tee(graphs) data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize) y = self._extract_targets(graphs) self.program = self.program.fit(data_matrix, y) return self except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def _data_matrix(self, iterable, fit_vectorizer=False): assert(is_iterable(iterable)), 'Not iterable' graphs = mp_pre_process(iterable, pre_processor=self.pre_processor, pre_processor_args=self.pre_processor_args, n_blocks=self.pre_processor_n_blocks, block_size=self.pre_processor_block_size, n_jobs=self.pre_processor_n_jobs) graphs, graphs_ = tee(graphs) self.vectorizer.set_params(**self.vectorizer_args) if fit_vectorizer: self.vectorizer.fit(graphs_) X = vectorize(graphs, vectorizer=self.vectorizer, n_jobs=self.n_jobs, n_blocks=self.n_blocks) return X
def predict(self, graphs): """predict.""" try: graphs, graphs_ = tee(graphs) data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize) distances, indices = self.program.kneighbors(data_matrix) for knn_ids, graph in izip(indices, graphs): neighbor_graphs = [] for knn_id in knn_ids: neighbor_graphs.append(self.graphs[knn_id]) graph.graph["neighbors"] = neighbor_graphs yield graph except Exception as e: logger.debug("Failed iteration. Reason: %s" % e) logger.debug("Exception", exc_info=True)
def predict(self, graphs): """predict.""" try: graphs, graphs_ = tee(graphs) data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize) predictions = self.program.predict(data_matrix) for prediction, graph in izip(predictions, graphs): graph.graph['prediction'] = prediction graph.graph['score'] = prediction yield graph except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def seq_to_data_matrix(self, sequences=None): # Transform sequences to matrix graphs = mp_pre_process(sequences, pre_processor=self.pre_processor, pre_processor_args={}, n_jobs=-1) seq_data_matrix = vectorize(graphs, vectorizer=self.vectorizer, n_jobs=-1) # Densify the matrix seq_data_matrx = seq_data_matrix.toarray() # Standardize the matrix self.scale.fit(seq_data_matrx) std_seq_data_matrx = self.scale.transform(seq_data_matrx) return std_seq_data_matrx
def predict(self, graphs): """predict.""" try: graphs, graphs_ = tee(graphs) data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize) predictions = self.program.predict(data_matrix) scores = self.program.decision_function(data_matrix) for score, prediction, graph in izip(scores, predictions, graphs): graph.graph['prediction'] = prediction graph.graph['score'] = score yield graph except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def predict(self, graphs): """predict.""" try: graphs, graphs_ = tee(graphs) data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize) distances, indices = self.program.kneighbors(data_matrix) for knn_ids, graph in izip(indices, graphs): neighbor_graphs = [] for knn_id in knn_ids: neighbor_graphs.append(self.graphs[knn_id]) graph.graph['neighbors'] = neighbor_graphs yield graph except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def predict(self, graphs): """predict. only overwrite is this: decision_function -> predict_proba graph.graph['score'] will be a (len 2) list """ try: graphs, graphs_ = tee(graphs) data_matrix = vectorize(graphs_, vectorizer=self.vectorizer, **self.params_vectorize) predictions = self.program.predict(data_matrix) # scores = self.program.decision_function(data_matrix) scores = self.program.predict_proba(data_matrix) for score, prediction, graph in izip(scores, predictions, graphs): graph.graph['prediction'] = prediction graph.graph['score'] = score yield graph except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def fit(self, graphs): """fit.""" try: # make matrix data_matrix = vectorize(graphs, vectorizer=self.vectorizer, **self.params_vectorize) data_matrix_neg = data_matrix.multiply(-1) data_matrix_both = vstack([data_matrix, data_matrix_neg], format="csr") # make labels length = data_matrix.shape[0] y = [-1] * length + [1] * length y = np.ravel(y) # fit: estimator = self.program.fit(data_matrix_both, y) # moving intercept: scores = [ estimator.decision_function(sparse_vector)[0] for sparse_vector in data_matrix ] scores_sorted = sorted(scores) pivot = scores_sorted[int(len(scores_sorted) * self.nu)] estimator.intercept_ -= pivot # calibration: data_y = np.asarray( [1 if score >= pivot else -1 for score in scores]) self.program = CalibratedClassifierCV(estimator, method='sigmoid') self.program.fit(data_matrix, data_y) return self except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)