Ejemplo n.º 1
0
 def setup(self, known_graphs=None, candidate_graphs=None):
     """Setup."""
     # compute the nearest neighbors for the 'proposal_graphs' w.r.t. the
     # known graphs in the list 'known_graphs'
     parameters_priors = dict(n_neighbors=self.n_neighbors)
     parameters_priors.update(dict(vectorizer__complexity=self.complexity,
                                   vectorize__n_jobs=-1,
                                   vectorize__fit_flag=False,
                                   vectorize__n_blocks=5,
                                   vectorize__block_size=100))
     fit_wrapped_knn_predictor_known = \
         model(known_graphs,
               program=KNNWrapper(program=NearestNeighbors()),
               parameters_priors=parameters_priors)
     # compute distances of candidate_graphs to known_graphs
     knn_candidate_graphs = predict(candidate_graphs,
                                    program=fit_wrapped_knn_predictor_known)
     knn_candidate_graphs = list(knn_candidate_graphs)
     self.distances_to_known_graphs = []
     for knn_candidate_graph in knn_candidate_graphs:
         distances = knn_candidate_graph.graph['distances']
         self.distances_to_known_graphs.append(distances)
     # compute candidate_graphs encodings
     self.candidate_graphs_data_matrix = \
         vectorize(candidate_graphs,
                   vectorizer=Vectorizer(complexity=self.complexity),
                   block_size=400, n_jobs=-1)
Ejemplo n.º 2
0
 def setup(self, known_graphs=None, candidate_graphs=None):
     """Setup."""
     # compute the nearest neighbors for the 'proposal_graphs' w.r.t. the
     # known graphs in the list 'known_graphs'
     parameters_priors = dict(n_neighbors=self.n_neighbors)
     parameters_priors.update(dict(vectorizer__complexity=self.complexity,
                                   vectorize__n_jobs=-1,
                                   vectorize__fit_flag=False,
                                   vectorize__n_blocks=5,
                                   vectorize__block_size=100))
     fit_wrapped_knn_predictor_known = \
         model(known_graphs,
               program=KNNWrapper(program=NearestNeighbors()),
               parameters_priors=parameters_priors)
     # compute distances of candidate_graphs to known_graphs
     knn_candidate_graphs = predict(candidate_graphs,
                                    program=fit_wrapped_knn_predictor_known)
     knn_candidate_graphs = list(knn_candidate_graphs)
     self.distances_to_known_graphs = []
     for knn_candidate_graph in knn_candidate_graphs:
         distances = knn_candidate_graph.graph['distances']
         self.distances_to_known_graphs.append(distances)
     # compute candidate_graphs encodings
     self.candidate_graphs_data_matrix = \
         vectorize(candidate_graphs,
                   vectorizer=Vectorizer(complexity=self.complexity),
                   block_size=400, n_jobs=-1)
Ejemplo n.º 3
0
 def setup(self, known_graphs=None, candidate_graphs=None):
     """Setup."""
     # compute the nearest neighbors for the 'proposal_graphs' w.r.t. the
     # known graphs in the list 'known_graphs'
     parameters_priors = dict(n_neighbors=self.n_neighbors)
     parameters_priors.update(dict(vectorizer__complexity=self.complexity,
                                   vectorizer__discrete=True))
     fit_wrapped_knn_predictor_known = \
         model(known_graphs,
               program=KNNWrapper(program=NearestNeighbors()),
               parameters_priors=parameters_priors)
     # compute distances of candidate_graphs to known_graphs
     knn_candidate_graphs = predict(candidate_graphs,
                                    program=fit_wrapped_knn_predictor_known)
     knn_candidate_graphs = list(knn_candidate_graphs)
     self.distances_to_known_graphs = []
     for knn_candidate_graph in knn_candidate_graphs:
         distances = knn_candidate_graph.graph['distances']
         self.distances_to_known_graphs.append(distances)
     # compute candidate_graphs encodings
     vec = Vectorizer(complexity=self.complexity)
     self.candidate_graphs_data_matrix = vec.transform(candidate_graphs)
Ejemplo n.º 4
0
 def setup(self, known_graphs=None, candidate_graphs=None):
     """Setup."""
     # compute the nearest neighbors for the 'proposal_graphs' w.r.t. the
     # known graphs in the list 'known_graphs'
     parameters_priors = dict(n_neighbors=self.n_neighbors)
     parameters_priors.update(
         dict(vectorizer__complexity=self.complexity,
              vectorizer__discrete=True))
     fit_wrapped_knn_predictor_known = \
         model(known_graphs,
               program=KNNWrapper(program=NearestNeighbors()),
               parameters_priors=parameters_priors)
     # compute distances of candidate_graphs to known_graphs
     knn_candidate_graphs = predict(candidate_graphs,
                                    program=fit_wrapped_knn_predictor_known)
     knn_candidate_graphs = list(knn_candidate_graphs)
     self.distances_to_known_graphs = []
     for knn_candidate_graph in knn_candidate_graphs:
         distances = knn_candidate_graph.graph['distances']
         self.distances_to_known_graphs.append(distances)
     # compute candidate_graphs encodings
     vec = Vectorizer(complexity=self.complexity)
     self.candidate_graphs_data_matrix = vec.transform(candidate_graphs)
Ejemplo n.º 5
0
    def efficient_selection(self,
                            candidate_graphs,
                            known_graphs=None):
        """Propose a small number of alternative structures.

        Parameters
        ----------
        candidate_graphs : networkx graphs
            The iterator over the seed graphs, i.e. the graphs that are used
            as a starting point for the proposal.

        known_graphs : networkx graphs
            The iterator over the already known graphs. These are used to bias
            the exploration towards less similar proposals.
        """
        start = time.time()

        candidate_graphs = transform(
            candidate_graphs,
            program=AnnotateImportance(
                program=self.fit_wrapped_predictor.program))
        candidate_graphs = list(candidate_graphs)

        # transform graphs according to importance
        # this allows similarity notion to be task dependent
        known_graphs = transform(
            known_graphs,
            program=AnnotateImportance(
                program=self.fit_wrapped_predictor.program))
        known_graphs = list(known_graphs)
        # store the nearest neighbors in knn_manager
        # compute the k nearest neighbors distances of each proposal graph
        knn_manager = KNNManager(n_neighbors=self.n_neighbors, complexity=3)
        knn_manager.setup(known_graphs=known_graphs,
                          candidate_graphs=candidate_graphs)
        delta_time = datetime.timedelta(seconds=(time.time() - start))
        logger.info('Knn computation took: %s' % (str(delta_time)))

        # compute predictions
        predicted_graphs = predict(candidate_graphs,
                                   program=self.fit_wrapped_predictor)
        predicted_graphs = list(predicted_graphs)
        scores = np.array([graph.graph['score']
                           for graph in predicted_graphs]).reshape(-1, 1)

        # iterations
        tradeoff = self.exploration_vs_exploitation_tradeoff
        selection_ids = []
        for i in range(self.n_proposals):
            uncertainties = knn_manager.average_distances()
            # run the acquisition function (n_proposals times)
            # and return best_id
            maximal_id = self._acquisition(
                scores,
                uncertainties,
                exploration_vs_exploitation_tradeoff=tradeoff)
            # update distances with new selection
            knn_manager.add_element(maximal_id)
            # store id
            selection_ids.append(maximal_id)
            graph = candidate_graphs[maximal_id]
            logger.debug('>%s' % graph.graph['header'])
            logger.debug(graph.graph['sequence'])
        return selection_ids
Ejemplo n.º 6
0
    def efficient_selection(self,
                            candidate_graphs,
                            known_graphs=None):
        """Propose a small number of alternative structures.

        Parameters
        ----------
        candidate_graphs : networkx graphs
            The iterator over the seed graphs, i.e. the graphs that are used
            as a starting point for the proposal.

        known_graphs : networkx graphs
            The iterator over the already known graphs. These are used to bias
            the exploration towards less similar proposals.
        """
        start = time.time()

        candidate_graphs = transform(
            candidate_graphs,
            program=AnnotateImportance(
                program=self.fit_wrapped_predictor.program))
        candidate_graphs = list(candidate_graphs)

        # transform graphs according to importance
        # this allows similarity notion to be task dependent
        known_graphs = transform(
            known_graphs,
            program=AnnotateImportance(
                program=self.fit_wrapped_predictor.program))
        known_graphs = list(known_graphs)
        # store the nearest neighbors in knn_manager
        # compute the k nearest neighbors distances of each proposal graph
        knn_manager = KNNManager(n_neighbors=self.n_neighbors, complexity=3)
        knn_manager.setup(known_graphs=known_graphs,
                          candidate_graphs=candidate_graphs)
        delta_time = datetime.timedelta(seconds=(time.time() - start))
        logger.info('Knn computation took: %s' % (str(delta_time)))

        # compute predictions
        predicted_graphs = predict(candidate_graphs,
                                   program=self.fit_wrapped_predictor)
        predicted_graphs = list(predicted_graphs)
        scores = np.array([graph.graph['score']
                           for graph in predicted_graphs]).reshape(-1, 1)

        # iterations
        tradeoff = self.exploration_vs_exploitation_tradeoff
        selection_ids = []
        for i in range(self.n_proposals):
            uncertainties = knn_manager.average_distances()
            # run the acquisition function (n_proposals times)
            # and return best_id
            maximal_id = self._acquisition(
                scores,
                uncertainties,
                exploration_vs_exploitation_tradeoff=tradeoff)
            # update distances with new selection
            knn_manager.add_element(maximal_id)
            # store id
            selection_ids.append(maximal_id)
            graph = candidate_graphs[maximal_id]
            logger.debug('>%s' % graph.graph['header'])
            logger.debug(graph.graph['sequence'])
        return selection_ids