Example #1
0
    def candidate_generator(self, seed_graphs):
        """Generate candidates.

        Parameters
        ----------
        seed_graphs : networkx graphs
            The iterator over the seed graphs, i.e. the graphs that are used as
            a starting point for the proposal.
        """
        start = time.time()
        graphs = transform(seed_graphs,
                           program=AnnotateImportance(
                               program=self.fit_wrapped_predictor.program))
        graphs = list(graphs)
        logger.debug('Working on %d graphs' % len(graphs))

        # mark the position of nodes with the attribute 'exclude' to remove
        # the influence of primers
        graphs = transform(
            graphs,
            program=MarkWithIntervals(quadruples=self.exclusion_quadruples))

        # find the ktop largest (reverse=True) values for the
        # attribute='importance' in the vertices of a graph
        # and add an attribute to each vertex that is 'selected'=True
        # if the node is among the ktop
        graphs = transform(graphs,
                           program=MarkKTop(attribute='importance',
                                            exclude_attribute='exclude',
                                            ktop=self.n_substitutions,
                                            reverse=True,
                                            mark_attribute='selected'))

        # generate graphs that have all possible combination of symbols in
        # the nodes marked by MarkTop
        graphs = transform(graphs,
                           program=ReplaceWithAllCombinations(
                               attribute='selected',
                               label_list=self.label_list))

        # refold the sequences to account for structural changes
        graphs = transform(graphs, program=self.seq_to_structure_prog)

        # return the candidate graphs
        candidate_graphs = list(graphs)
        delta_time = datetime.timedelta(seconds=(time.time() - start))
        logger.info('Candidate generation took: %s' % (str(delta_time)))
        logger.info('Number of candidates: %d' % (len(candidate_graphs)))

        return candidate_graphs
Example #2
0
    def candidate_generator(self, seed_graphs):
        """Generate candidates.

        Parameters
        ----------
        seed_graphs : networkx graphs
            The iterator over the seed graphs, i.e. the graphs that are used as
            a starting point for the proposal.
        """
        start = time.time()
        graphs = transform(seed_graphs,
                           program=AnnotateImportance(
                               program=self.fit_wrapped_predictor.program))
        graphs = list(graphs)
        logger.debug('Working on %d graphs' % len(graphs))

        # mark the position of nodes with the attribute 'exclude' to remove
        # the influence of primers
        graphs = transform(graphs,
                           program=MarkWithIntervals(
                               quadruples=self.exclusion_quadruples))

        # find the ktop largest (reverse=True) values for the
        # attribute='importance' in the vertices of a graph
        # and add an attribute to each vertex that is 'selected'=True
        # if the node is among the ktop
        graphs = transform(graphs,
                           program=MarkKTop(attribute='importance',
                                            exclude_attribute='exclude',
                                            ktop=self.n_substitutions,
                                            reverse=True,
                                            mark_attribute='selected'))

        # generate graphs that have all possible combination of symbols in
        # the nodes marked by MarkTop
        graphs = transform(graphs, program=ReplaceWithAllCombinations(
            attribute='selected', label_list=self.label_list))

        # refold the sequences to account for structural changes
        graphs = transform(graphs, program=self.seq_to_structure_prog)

        # return the candidate graphs
        candidate_graphs = list(graphs)
        delta_time = datetime.timedelta(seconds=(time.time() - start))
        logger.info('Candidate generation took: %s' % (str(delta_time)))
        logger.info('Number of candidates: %d' % (len(candidate_graphs)))

        return candidate_graphs
Example #3
0
    def transform(self, orig_graphs=None):
        """transform."""
        try:
            graphs = self._transform(orig_graphs)
            # reduce all 'label' attributes of contracted nodes to a
            # histogram to be written in the 'label' attribute of the
            # resulting graph
            label_modifier = contraction_modifier(attribute_in='label',
                                                  attribute_out='label',
                                                  reduction='categorical')
            # reduce all 'weight' attributes of contracted nodes using
            # a sum to be written in the 'weight' attribute of the
            # resulting graph
            weight_modifier = contraction_modifier(attribute_in='weight',
                                                   attribute_out='weight',
                                                   reduction='sum')
            modifiers = [label_modifier, weight_modifier]
            s = self.original_edges_to_nesting
            priors = dict(nesting=self.nesting,
                          weight_scaling_factor=1,
                          original_edges_to_nesting=s)
            ca = 'max_clique_hash'
            graphs = transform(graphs,
                               program=Contract(modifiers=modifiers,
                                                contraction_attribute=ca),
                               parameters_priors=priors)
            return graphs

        except Exception as e:
            logger.debug('Failed iteration. Reason: %s' % e)
            logger.debug('Exception', exc_info=True)
Example #4
0
    def transform(self, orig_graphs=None):
        """transform."""
        try:
            graphs = self._transform(orig_graphs)
            # reduce all 'label' attributes of contracted nodes to a
            # histogram to be written in the 'label' attribute of the
            # resulting graph
            label_modifier = contraction_modifier(attribute_in='label',
                                                  attribute_out='label',
                                                  reduction='categorical')
            # reduce all 'weight' attributes of contracted nodes using
            # a sum to be written in the 'weight' attribute of the
            # resulting graph
            weight_modifier = contraction_modifier(attribute_in='weight',
                                                   attribute_out='weight',
                                                   reduction='sum')
            modifiers = [label_modifier, weight_modifier]
            s = self.original_edges_to_nesting
            priors = dict(nesting=self.nesting,
                          weight_scaling_factor=1,
                          original_edges_to_nesting=s)
            ca = 'max_clique_hash'
            graphs = transform(graphs,
                               program=Contract(modifiers=modifiers,
                                                contraction_attribute=ca),
                               parameters_priors=priors)
            return graphs

        except Exception as e:
            logger.debug('Failed iteration. Reason: %s' % e)
            logger.debug('Exception', exc_info=True)
Example #5
0
    def efficient_selection(self,
                            candidate_graphs,
                            known_graphs=None):
        """Propose a small number of alternative structures.

        Parameters
        ----------
        candidate_graphs : networkx graphs
            The iterator over the seed graphs, i.e. the graphs that are used
            as a starting point for the proposal.

        known_graphs : networkx graphs
            The iterator over the already known graphs. These are used to bias
            the exploration towards less similar proposals.
        """
        start = time.time()

        candidate_graphs = transform(
            candidate_graphs,
            program=AnnotateImportance(
                program=self.fit_wrapped_predictor.program))
        candidate_graphs = list(candidate_graphs)

        # transform graphs according to importance
        # this allows similarity notion to be task dependent
        known_graphs = transform(
            known_graphs,
            program=AnnotateImportance(
                program=self.fit_wrapped_predictor.program))
        known_graphs = list(known_graphs)
        # store the nearest neighbors in knn_manager
        # compute the k nearest neighbors distances of each proposal graph
        knn_manager = KNNManager(n_neighbors=self.n_neighbors, complexity=3)
        knn_manager.setup(known_graphs=known_graphs,
                          candidate_graphs=candidate_graphs)
        delta_time = datetime.timedelta(seconds=(time.time() - start))
        logger.info('Knn computation took: %s' % (str(delta_time)))

        # compute predictions
        predicted_graphs = predict(candidate_graphs,
                                   program=self.fit_wrapped_predictor)
        predicted_graphs = list(predicted_graphs)
        scores = np.array([graph.graph['score']
                           for graph in predicted_graphs]).reshape(-1, 1)

        # iterations
        tradeoff = self.exploration_vs_exploitation_tradeoff
        selection_ids = []
        for i in range(self.n_proposals):
            uncertainties = knn_manager.average_distances()
            # run the acquisition function (n_proposals times)
            # and return best_id
            maximal_id = self._acquisition(
                scores,
                uncertainties,
                exploration_vs_exploitation_tradeoff=tradeoff)
            # update distances with new selection
            knn_manager.add_element(maximal_id)
            # store id
            selection_ids.append(maximal_id)
            graph = candidate_graphs[maximal_id]
            logger.debug('>%s' % graph.graph['header'])
            logger.debug(graph.graph['sequence'])
        return selection_ids
Example #6
0
    def efficient_selection(self,
                            candidate_graphs,
                            known_graphs=None):
        """Propose a small number of alternative structures.

        Parameters
        ----------
        candidate_graphs : networkx graphs
            The iterator over the seed graphs, i.e. the graphs that are used
            as a starting point for the proposal.

        known_graphs : networkx graphs
            The iterator over the already known graphs. These are used to bias
            the exploration towards less similar proposals.
        """
        start = time.time()

        candidate_graphs = transform(
            candidate_graphs,
            program=AnnotateImportance(
                program=self.fit_wrapped_predictor.program))
        candidate_graphs = list(candidate_graphs)

        # transform graphs according to importance
        # this allows similarity notion to be task dependent
        known_graphs = transform(
            known_graphs,
            program=AnnotateImportance(
                program=self.fit_wrapped_predictor.program))
        known_graphs = list(known_graphs)
        # store the nearest neighbors in knn_manager
        # compute the k nearest neighbors distances of each proposal graph
        knn_manager = KNNManager(n_neighbors=self.n_neighbors, complexity=3)
        knn_manager.setup(known_graphs=known_graphs,
                          candidate_graphs=candidate_graphs)
        delta_time = datetime.timedelta(seconds=(time.time() - start))
        logger.info('Knn computation took: %s' % (str(delta_time)))

        # compute predictions
        predicted_graphs = predict(candidate_graphs,
                                   program=self.fit_wrapped_predictor)
        predicted_graphs = list(predicted_graphs)
        scores = np.array([graph.graph['score']
                           for graph in predicted_graphs]).reshape(-1, 1)

        # iterations
        tradeoff = self.exploration_vs_exploitation_tradeoff
        selection_ids = []
        for i in range(self.n_proposals):
            uncertainties = knn_manager.average_distances()
            # run the acquisition function (n_proposals times)
            # and return best_id
            maximal_id = self._acquisition(
                scores,
                uncertainties,
                exploration_vs_exploitation_tradeoff=tradeoff)
            # update distances with new selection
            knn_manager.add_element(maximal_id)
            # store id
            selection_ids.append(maximal_id)
            graph = candidate_graphs[maximal_id]
            logger.debug('>%s' % graph.graph['header'])
            logger.debug(graph.graph['sequence'])
        return selection_ids