def __init__(self,
                 radius_list=[0, 1],
                 thickness_list=[1, 2],
                 grammar=None,
                 core_interface_pair_remove_threshold=2,
                 interface_remove_threshold=2,
                 complexity=3,
                 vectorizer=Vectorizer(complexity=3),
                 estimator=estimator_wrapper.estimator_wrapper()):

        self.complexity = complexity
        self.feasibility_checker = FeasibilityChecker()
        self.postprocessor = processing.PostProcessor()
        self.vectorizer = vectorizer
        # lists of int
        self.radius_list = [int(2 * r) for r in radius_list]
        self.thickness_list = [int(2 * t) for t in thickness_list]
        # scikit  classifier
        self.estimatorobject = estimator
        # grammar object
        self.local_substitutable_graph_grammar = grammar
        # cips hashes will be masked with this, this is unrelated to the vectorizer
        self.hash_bitmask = pow(2, 20) - 1
        # we will save current graph at every intervalth step of sampling and attach to graphinfos[graphs]
        self.sampling_interval = None
        # how many sampling steps are done
        self.n_steps = None
        # current step in sampling proces of a single graph
        self.step = None
        # how often do we try to get a cip from the current graph  in sampling
        self.select_cip_max_tries = None
        # sample path
        self.sample_path = None

        self.local_substitutable_graph_grammar = LocalSubstitutableGraphGrammar(
            self.radius_list,
            self.thickness_list,
            complexity=self.complexity,
            cip_remove_threshold=core_interface_pair_remove_threshold,
            interface_remove_threshold=interface_remove_threshold,
            nbit=20)
Ejemplo n.º 2
0
    def __init__(self,
                 radius_list=[0, 1],
                 thickness_list=[1, 2],
                 grammar=None,
                 core_interface_pair_remove_threshold=2,
                 interface_remove_threshold=2,
                 complexity=3,
                 vectorizer=Vectorizer(complexity=3),
                 estimator=estimator_wrapper.estimator_wrapper()):


        self.complexity = complexity
        self.feasibility_checker = FeasibilityChecker()
        self.postprocessor = processing.PostProcessor()
        self.vectorizer = vectorizer
        # lists of int
        self.radius_list = [int(2 * r) for r in radius_list]
        self.thickness_list = [int(2 * t) for t in thickness_list]
        # scikit  classifier
        self.estimatorobject = estimator
        # grammar object
        self.local_substitutable_graph_grammar = grammar
        # cips hashes will be masked with this, this is unrelated to the vectorizer
        self.hash_bitmask = pow(2, 20) - 1
        # we will save current graph at every intervalth step of sampling and attach to graphinfos[graphs]
        self.sampling_interval = None
        # how many sampling steps are done
        self.n_steps = None
        # current step in sampling proces of a single graph
        self.step = None
        # how often do we try to get a cip from the current graph  in sampling
        self.select_cip_max_tries = None
        # sample path
        self.sample_path = None

        self.local_substitutable_graph_grammar = LocalSubstitutableGraphGrammar(self.radius_list,
                                                                                    self.thickness_list,
                                                                                    complexity=self.complexity,
                                                                                    cip_remove_threshold=core_interface_pair_remove_threshold,
                                                                                    interface_remove_threshold=interface_remove_threshold,
                                                                                    nbit=20)
Ejemplo n.º 3
0
class GraphLearnSampler(object):


    '''
    HERE PREPARATIONS FOR SAMPLING ARE TAKEN CARE OF
    
    Init/save/load  are not too surprising. 
    
    fit_grammar will tell the grammar object to learn from the graphs you provide.
    
    fit will call fit_grammar  
        it also tries to train a SVM with all the input graphs that 
        can decide how much a given graph is like the ones in input.
    
    '''

    def __init__(self,
                 radius_list=[0, 1],
                 thickness_list=[1, 2],
                 grammar=None,
                 core_interface_pair_remove_threshold=2,
                 interface_remove_threshold=2,
                 complexity=3,
                 vectorizer=Vectorizer(complexity=3),
                 estimator=estimator_wrapper.estimator_wrapper()):


        self.complexity = complexity
        self.feasibility_checker = FeasibilityChecker()
        self.postprocessor = processing.PostProcessor()
        self.vectorizer = vectorizer
        # lists of int
        self.radius_list = [int(2 * r) for r in radius_list]
        self.thickness_list = [int(2 * t) for t in thickness_list]
        # scikit  classifier
        self.estimatorobject = estimator
        # grammar object
        self.local_substitutable_graph_grammar = grammar
        # cips hashes will be masked with this, this is unrelated to the vectorizer
        self.hash_bitmask = pow(2, 20) - 1
        # we will save current graph at every intervalth step of sampling and attach to graphinfos[graphs]
        self.sampling_interval = None
        # how many sampling steps are done
        self.n_steps = None
        # current step in sampling proces of a single graph
        self.step = None
        # how often do we try to get a cip from the current graph  in sampling
        self.select_cip_max_tries = None
        # sample path
        self.sample_path = None

        self.local_substitutable_graph_grammar = LocalSubstitutableGraphGrammar(self.radius_list,
                                                                                    self.thickness_list,
                                                                                    complexity=self.complexity,
                                                                                    cip_remove_threshold=core_interface_pair_remove_threshold,
                                                                                    interface_remove_threshold=interface_remove_threshold,
                                                                                    nbit=20)
        
    def save(self, file_name):
        self.local_substitutable_graph_grammar._revert_multicore_transform()
        dill.dump(self.__dict__, open(file_name, "w"), protocol=dill.HIGHEST_PROTOCOL)
        # joblib.dump(self.__dict__, file_name, compress=1)
        logger.debug('Saved model: %s' % file_name)

    def load(self, file_name):
        # self.__dict__ = joblib.load(file_name)
        self.__dict__ = dill.load(open(file_name))
        logger.debug('Loaded model: %s' % file_name)

    def fit(self, graphs,
            core_interface_pair_remove_threshold=2,
            interface_remove_threshold=2,
            nu=.5):
        """
          use input to fit the grammar and fit the estimator
        """
        graphs, graphs_ = itertools.tee(graphs)
        self.estimator = self.estimatorobject.fit(graphs_, vectorizer=self.vectorizer, nu=nu)
        self.local_substitutable_graph_grammar.fit(graphs)




    '''
      ENTRY POINT FOR SAMPLING. THE ACTUAL WORK WILL BE DONE BY _SAMPLE
      
    '''
    def sample(self, graph_iter,
               n_samples=10,
               n_steps=50,
               select_cip_max_tries=20):
        """
            input: graph iterator
            output: yield (sampled_graph,{dictionary of info about sampling process}
        """
        self.sampling_interval= 99999
        if n_samples:
            self.sampling_interval = int(n_steps / n_samples) + 1
        self.n_steps = n_steps
        self.select_cip_max_tries = select_cip_max_tries
   
        # sampling
        for graph in graph_iter:
            sampled_graph = self._sample(graph)
            # yield sampled_graph
            for new_graph in self.return_formatter(sampled_graph):
                yield new_graph

    def return_formatter(self, sample_product):
        # after _sample we need to decide what to yield...
        yield sample_product


    '''
    
    HERE ALL THE SAMPLING HAPPENS EXCEPT THE GRAPH PROPOSITION WHICH IS DESCRIBED BELOW
    
    '''
    def _sample(self, graph):
        '''
            we sample a single graph.

            input: a graph
            output: (sampled_graph,{info dictionary})
        '''
        # prepare variables and graph
        graph = self._sample_init(graph)
        self._score_list = [graph._score]
        self.sample_path = []

        try:
            for self.step in xrange(self.n_steps):
                # check similarity - stop condition..
                self._stop_condition(graph)
                # get a proposal for a new graph
                # keep it if we like it
                candidate_graph = self._propose(graph)
                if self._accept(graph, candidate_graph):
                    graph = candidate_graph

                # save score
                # take snapshot
                self._score_list_append(graph)
                self._sample_path_append(graph)

        except Exception as exc:
            logger.debug(exc)
            logger.debug(traceback.format_exc(10))

        self._score_list += [self._score_list[-1]] * (self.n_steps + 1 - len(self._score_list))
        # we put the result in the sample_path
        # and we return a nice graph as well as a dictionary of additional information
        self._sample_path_append(graph)
        sampled_graph = self.vectorizer._revert_edge_to_vertex_transform(graph)
        sampled_graph.graph['sampling_info'] = {'graphs_history': self.sample_path, 'score_history': self._score_list}
        return sampled_graph

    def _score_list_append(self, graph):
        self._score_list.append(graph._score)

    def _sample_path_append(self, graph):
        if self.step % self.sampling_interval == 0:
            graph.graph['score'] = graph._score
            self.sample_path.append(self.vectorizer._revert_edge_to_vertex_transform(graph))

    def _sample_init(self, graph):
        '''
        we prepare the sampling process
        '''
        graph = self.vectorizer._edge_to_vertex_transform(graph)
        self._score(graph)
        return graph

    def _stop_condition(self, graph):
        pass
        
    def _score(self, graph):
        """
        :param graph: a graph
        :return: score of graph
        we also set graph.score_nonlog and graph.score
        """
        if '_score' not in graph.__dict__:
            transformed_graph = self.vectorizer.transform_single(nx.Graph(graph))
            # slow so dont do it..
            # graph.score_nonlog = self.estimator.base_estimator.decision_function(transformed_graph)[0]
            graph._score = self.estimator.predict_proba(transformed_graph)[0,1]
        return graph._score

    def _accept(self, graph_old, graph_new):
        '''
            we took the old graph to generate a new graph by conducting a replacement step.
            now we want to know if this new graph is good enough to take the old ones place.
            in this implementation we use the score of the graph to judge the new graph
        '''

        # first calculate the score ratio between old and new graph.
        score_graph_old = self._score(graph_old)
        score_graph_new = self._score(graph_new)
        score_ratio = score_graph_new / score_graph_old
        # if the new graph scores higher, the ratio is > 1 and we accept
        if score_ratio > 1.0:
            return True
        return score_ratio > random.random()




    '''
        FIRST WE PICK A CIP FROM THE ORIGINAL GRPAH (SEE BELOW)
        THEN WE DECIDE ON A CIP TO REPLACE IT WITH
    '''



    def _propose(self, graph):
        '''
         we wrap the propose single cip, so it may be overwritten some day
        '''
        graph = self._propose_graph(graph)
        if graph is not None:
            return graph
        raise Exception("propose failed.")

    def _propose_graph(self, graph):
        """
        we choose ONE core in the graph and return a valid grpah with a changed core
        note that when we chose the core, we made sure that there would be possible replacements..
        """
        # finding a legit candidate..
        original_cip = self.select_original_cip(graph)

        # see which substitution to make
        candidate_cips = self._select_cips(original_cip)
        for candidate_cip in candidate_cips:
            # substitute and return
            graph_new = core_substitution(graph, original_cip.graph, candidate_cip.graph)
            if self.feasibility_checker.check(graph_new):
                graph_clean(graph_new)
                return self.postprocessor.postprocess(graph_new)


    def _select_cips(self, cip):
        """
        :param cip: the cip we selected from the graph
        :yields: cips found in the grammar that can replace the input cip

        log to debug on fail
        """
        core_hashes = self._get_valid_core_hashes(cip)
        for core_hash in core_hashes:
                yield self.local_substitutable_graph_grammar.grammar[cip.interface_hash][core_hash]


    def _get_valid_core_hashes(self, cip):
        '''
        :param cip: the chip to be replaced
        :return: list of core_hashes of acceptable replacement cips
        '''
        result_list = list(self.local_substitutable_graph_grammar.grammar[cip.interface_hash].keys())
        random.shuffle(result_list)
        return result_list

    '''
        PICK A CIP FROM THE ORIGINAL GRPAH
    '''

    def select_original_cip(self, graph):
        """
        selects a cip from the original graph.
        (we try maxtries times to make sure we get something nice)

        - original_cip_extraction  takes care of extracting a cip
        - accept_original_cip makes sure that the cip we got is indeed in the grammar
        """
        for x in xrange(self.select_cip_max_tries):

            #  get a cip
            cip = self._original_cip_extraction(graph)
            if not cip:
                continue
            cip = cip[0]
            
            # return if the cip is good.
            if self._accept_original_cip(cip):
                return cip

        raise Exception('select_cip_for_substitution failed')


    def  _original_cip_extraction(self,graph):
        '''
        selects the next candidate.
        '''
        # choose random node
        node = random.choice(graph.nodes())
        if 'edge' in graph.node[node]:
            node = random.choice(graph.neighbors(node))
        # random radius and thickness
        radius = random.choice(self.local_substitutable_graph_grammar.radius_list)
        thickness = random.choice(self.local_substitutable_graph_grammar.thickness_list)
        return extract_core_and_interface(node, graph, [radius], [thickness], vectorizer=self.vectorizer)


    def _accept_original_cip(self, cip):
        '''
        :param cip: the cip we need to judge
        :return: good or nogood (bool)
        '''
        # if the cip is in the grammar we are ok.
        if cip.interface_hash in self.local_substitutable_graph_grammar.grammar:
            return True
        return False
class GraphLearnSampler(object):
    '''
    HERE PREPARATIONS FOR SAMPLING ARE TAKEN CARE OF
    
    Init/save/load  are not too surprising. 
    
    fit_grammar will tell the grammar object to learn from the graphs you provide.
    
    fit will call fit_grammar  
        it also tries to train a SVM with all the input graphs that 
        can decide how much a given graph is like the ones in input.
    
    '''
    def __init__(self,
                 radius_list=[0, 1],
                 thickness_list=[1, 2],
                 grammar=None,
                 core_interface_pair_remove_threshold=2,
                 interface_remove_threshold=2,
                 complexity=3,
                 vectorizer=Vectorizer(complexity=3),
                 estimator=estimator_wrapper.estimator_wrapper()):

        self.complexity = complexity
        self.feasibility_checker = FeasibilityChecker()
        self.postprocessor = processing.PostProcessor()
        self.vectorizer = vectorizer
        # lists of int
        self.radius_list = [int(2 * r) for r in radius_list]
        self.thickness_list = [int(2 * t) for t in thickness_list]
        # scikit  classifier
        self.estimatorobject = estimator
        # grammar object
        self.local_substitutable_graph_grammar = grammar
        # cips hashes will be masked with this, this is unrelated to the vectorizer
        self.hash_bitmask = pow(2, 20) - 1
        # we will save current graph at every intervalth step of sampling and attach to graphinfos[graphs]
        self.sampling_interval = None
        # how many sampling steps are done
        self.n_steps = None
        # current step in sampling proces of a single graph
        self.step = None
        # how often do we try to get a cip from the current graph  in sampling
        self.select_cip_max_tries = None
        # sample path
        self.sample_path = None

        self.local_substitutable_graph_grammar = LocalSubstitutableGraphGrammar(
            self.radius_list,
            self.thickness_list,
            complexity=self.complexity,
            cip_remove_threshold=core_interface_pair_remove_threshold,
            interface_remove_threshold=interface_remove_threshold,
            nbit=20)

    def save(self, file_name):
        self.local_substitutable_graph_grammar._revert_multicore_transform()
        dill.dump(self.__dict__,
                  open(file_name, "w"),
                  protocol=dill.HIGHEST_PROTOCOL)
        # joblib.dump(self.__dict__, file_name, compress=1)
        logger.debug('Saved model: %s' % file_name)

    def load(self, file_name):
        # self.__dict__ = joblib.load(file_name)
        self.__dict__ = dill.load(open(file_name))
        logger.debug('Loaded model: %s' % file_name)

    def fit(self,
            graphs,
            core_interface_pair_remove_threshold=2,
            interface_remove_threshold=2,
            nu=.5):
        """
          use input to fit the grammar and fit the estimator
        """
        graphs, graphs_ = itertools.tee(graphs)
        self.estimator = self.estimatorobject.fit(graphs_,
                                                  vectorizer=self.vectorizer,
                                                  nu=nu)
        self.local_substitutable_graph_grammar.fit(graphs)

    '''
      ENTRY POINT FOR SAMPLING. THE ACTUAL WORK WILL BE DONE BY _SAMPLE
      
    '''

    def sample(self,
               graph_iter,
               n_samples=10,
               n_steps=50,
               select_cip_max_tries=20):
        """
            input: graph iterator
            output: yield (sampled_graph,{dictionary of info about sampling process}
        """
        self.sampling_interval = 99999
        if n_samples:
            self.sampling_interval = int(n_steps / n_samples) + 1
        self.n_steps = n_steps
        self.select_cip_max_tries = select_cip_max_tries

        # sampling
        for graph in graph_iter:
            sampled_graph = self._sample(graph)
            # yield sampled_graph
            for new_graph in self.return_formatter(sampled_graph):
                yield new_graph

    def return_formatter(self, sample_product):
        # after _sample we need to decide what to yield...
        yield sample_product

    '''
    
    HERE ALL THE SAMPLING HAPPENS EXCEPT THE GRAPH PROPOSITION WHICH IS DESCRIBED BELOW
    
    '''

    def _sample(self, graph):
        '''
            we sample a single graph.

            input: a graph
            output: (sampled_graph,{info dictionary})
        '''
        # prepare variables and graph
        graph = self._sample_init(graph)
        self._score_list = [graph._score]
        self.sample_path = []

        try:
            for self.step in xrange(self.n_steps):
                # check similarity - stop condition..
                self._stop_condition(graph)
                # get a proposal for a new graph
                # keep it if we like it
                candidate_graph = self._propose(graph)
                if self._accept(graph, candidate_graph):
                    graph = candidate_graph

                # save score
                # take snapshot
                self._score_list_append(graph)
                self._sample_path_append(graph)

        except Exception as exc:
            logger.debug(exc)
            logger.debug(traceback.format_exc(10))

        self._score_list += [self._score_list[-1]
                             ] * (self.n_steps + 1 - len(self._score_list))
        # we put the result in the sample_path
        # and we return a nice graph as well as a dictionary of additional information
        self._sample_path_append(graph)
        sampled_graph = self.vectorizer._revert_edge_to_vertex_transform(graph)
        sampled_graph.graph['sampling_info'] = {
            'graphs_history': self.sample_path,
            'score_history': self._score_list
        }
        return sampled_graph

    def _score_list_append(self, graph):
        self._score_list.append(graph._score)

    def _sample_path_append(self, graph):
        if self.step % self.sampling_interval == 0:
            graph.graph['score'] = graph._score
            self.sample_path.append(
                self.vectorizer._revert_edge_to_vertex_transform(graph))

    def _sample_init(self, graph):
        '''
        we prepare the sampling process
        '''
        graph = self.vectorizer._edge_to_vertex_transform(graph)
        self._score(graph)
        return graph

    def _stop_condition(self, graph):
        pass

    def _score(self, graph):
        """
        :param graph: a graph
        :return: score of graph
        we also set graph.score_nonlog and graph.score
        """
        if '_score' not in graph.__dict__:
            transformed_graph = self.vectorizer.transform_single(
                nx.Graph(graph))
            # slow so dont do it..
            # graph.score_nonlog = self.estimator.base_estimator.decision_function(transformed_graph)[0]
            graph._score = self.estimator.predict_proba(transformed_graph)[0,
                                                                           1]
        return graph._score

    def _accept(self, graph_old, graph_new):
        '''
            we took the old graph to generate a new graph by conducting a replacement step.
            now we want to know if this new graph is good enough to take the old ones place.
            in this implementation we use the score of the graph to judge the new graph
        '''

        # first calculate the score ratio between old and new graph.
        score_graph_old = self._score(graph_old)
        score_graph_new = self._score(graph_new)
        score_ratio = score_graph_new / score_graph_old
        # if the new graph scores higher, the ratio is > 1 and we accept
        if score_ratio > 1.0:
            return True
        return score_ratio > random.random()

    '''
        FIRST WE PICK A CIP FROM THE ORIGINAL GRPAH (SEE BELOW)
        THEN WE DECIDE ON A CIP TO REPLACE IT WITH
    '''

    def _propose(self, graph):
        '''
         we wrap the propose single cip, so it may be overwritten some day
        '''
        graph = self._propose_graph(graph)
        if graph is not None:
            return graph
        raise Exception("propose failed.")

    def _propose_graph(self, graph):
        """
        we choose ONE core in the graph and return a valid grpah with a changed core
        note that when we chose the core, we made sure that there would be possible replacements..
        """
        # finding a legit candidate..
        original_cip = self.select_original_cip(graph)

        # see which substitution to make
        candidate_cips = self._select_cips(original_cip)
        for candidate_cip in candidate_cips:
            # substitute and return
            graph_new = core_substitution(graph, original_cip.graph,
                                          candidate_cip.graph)
            if self.feasibility_checker.check(graph_new):
                graph_clean(graph_new)
                return self.postprocessor.postprocess(graph_new)

    def _select_cips(self, cip):
        """
        :param cip: the cip we selected from the graph
        :yields: cips found in the grammar that can replace the input cip

        log to debug on fail
        """
        core_hashes = self._get_valid_core_hashes(cip)
        for core_hash in core_hashes:
            yield self.local_substitutable_graph_grammar.grammar[
                cip.interface_hash][core_hash]

    def _get_valid_core_hashes(self, cip):
        '''
        :param cip: the chip to be replaced
        :return: list of core_hashes of acceptable replacement cips
        '''
        result_list = list(self.local_substitutable_graph_grammar.grammar[
            cip.interface_hash].keys())
        random.shuffle(result_list)
        return result_list

    '''
        PICK A CIP FROM THE ORIGINAL GRPAH
    '''

    def select_original_cip(self, graph):
        """
        selects a cip from the original graph.
        (we try maxtries times to make sure we get something nice)

        - original_cip_extraction  takes care of extracting a cip
        - accept_original_cip makes sure that the cip we got is indeed in the grammar
        """
        for x in xrange(self.select_cip_max_tries):

            #  get a cip
            cip = self._original_cip_extraction(graph)
            if not cip:
                continue
            cip = cip[0]

            # return if the cip is good.
            if self._accept_original_cip(cip):
                return cip

        raise Exception('select_cip_for_substitution failed')

    def _original_cip_extraction(self, graph):
        '''
        selects the next candidate.
        '''
        # choose random node
        node = random.choice(graph.nodes())
        if 'edge' in graph.node[node]:
            node = random.choice(graph.neighbors(node))
        # random radius and thickness
        radius = random.choice(
            self.local_substitutable_graph_grammar.radius_list)
        thickness = random.choice(
            self.local_substitutable_graph_grammar.thickness_list)
        return extract_core_and_interface(node,
                                          graph, [radius], [thickness],
                                          vectorizer=self.vectorizer)

    def _accept_original_cip(self, cip):
        '''
        :param cip: the cip we need to judge
        :return: good or nogood (bool)
        '''
        # if the cip is in the grammar we are ok.
        if cip.interface_hash in self.local_substitutable_graph_grammar.grammar:
            return True
        return False