def fit(self,
         G_pos,
         core_interface_pair_remove_threshold=3,
         interface_remove_threshold=2,
         grammar_n_jobs=-1):
     """
       use input to fit the grammar and fit the estimator
     """
     # get grammar
     self.local_substitutable_graph_grammar = LocalSubstitutableGraphGrammar(
         self.radius_list,
         self.thickness_list,
         core_interface_pair_remove_threshold,
         interface_remove_threshold,
         nbit=self.nbit,
         node_entity_check=self.node_entity_check)
     self.local_substitutable_graph_grammar.fit(G_pos, grammar_n_jobs)
예제 #2
0
 def fit(self, G_pos,
         core_interface_pair_remove_threshold=3,
         interface_remove_threshold=2,
         grammar_n_jobs=-1):
     """
       use input to fit the grammar and fit the estimator
     """
     # get grammar
     self.local_substitutable_graph_grammar = LocalSubstitutableGraphGrammar(self.radius_list, self.thickness_list,
                                                                             core_interface_pair_remove_threshold,
                                                                             interface_remove_threshold,
                                                                             nbit=self.nbit,
                                                                             node_entity_check=self.node_entity_check)
     self.local_substitutable_graph_grammar.fit(G_pos, grammar_n_jobs)
예제 #3
0
class directedSampler(GraphLearnSampler):

    '''
    ok here is the plan:

    1. train grammar as usual, no estimator needed.

    2. find NN

    3. the actual action might highjack the sample function.
        but instead of graph_iterator
        we give a graph_pair_iterator, start and goal graph,

    4. then we overwrite _sample to seperate start and goal again
        and use the original _sample with a changed scoring scheme

    5. draw result to see what actually happens

    6. ???

    7. we are done with the clustering process

    '''

    def fit(self, G_pos,
            core_interface_pair_remove_threshold=3,
            interface_remove_threshold=2,
            grammar_n_jobs=-1):
        """
          use input to fit the grammar and fit the estimator
        """
        # get grammar
        self.local_substitutable_graph_grammar = LocalSubstitutableGraphGrammar(self.radius_list, self.thickness_list,
                                                                                core_interface_pair_remove_threshold,
                                                                                interface_remove_threshold,
                                                                                nbit=self.nbit,
                                                                                node_entity_check=self.node_entity_check)
        self.local_substitutable_graph_grammar.fit(G_pos, grammar_n_jobs)

    def get_nearest_neighbor_iterable(self, graphlist, start_graphs, start_is_subset=True):

        # vectorize all
        graphlist= list(graphlist)
        graphlist_ = copy.deepcopy(graphlist)
        X = self.vectorizer.transform_single(graphlist_)


        start_graphs= list(start_graphs)
        graphlist_= copy.deepcopy(start_graphs)
        Y = self.vectorizer.transform_single(graphlist_)
        
        
        forest = LSHForest()
        forest.fit(X)
        #http://scikit-learn.org/stable/modules/neighbors.html
        distances, indices = forest.kneighbors(Y, n_neighbors=2)

        # we just assume that this is short...
        index = 0
        if start_is_subset:
            index += 1
        
        #matches= ( X_index ,Y_index, distance  )
        matches = [(indices[i, index], i, distances[i, index]) for i in range(len(indices))]
        matches.sort()

        # this looks super confusing....
        #for index, graph in enumerate(selection_iterator(graphlist, [a[0] for a in matches])):
        #    yield ((graph, start_graphs[matches[index][1]], X[matches[index][0]]))
        # so i wrote this:,,, you may even get rid of the matches variable i think.. and use indices directly
        for Xi,Yi,dist in matches:
            yield ((start_graphs[Yi],graphlist[Xi],X[Xi]))
    '''
        # iterate over graphs
        graphlist, graphlist_ = itertools.tee(graphlist)
        for i, g in enumerate(graphlist):

            # compare to all other graphs and see who the NN is :)
            gl, graphlist_ = itertools.tee(graphlist_)
            best_sim = 0.0
            NN = 0
            for i2, g2 in enumerate(gl):
                sim = X[i].dot(X[i2].T).todense()[0][0]
                # print sim # just to make sure..
                if sim > best_sim and i != i2:
                    best_sim = sim
                    NN = g2
            yield (g, NN, X[i2])
    '''

    def _stop_condition(self, graph):

        #if len(self.sample_path) == 1 and self.goal_graph:
        #    self.sample_path.append(self.goal_graph)
        if '_score' in graph.__dict__:
            if graph._score > 0.99:
                self._sample_notes += ';edge %d %d;' % (self.starthash, self.finhash)

                #print graph._score
                #draw.draw_graph_set_graphlearn(self.
                raise Exception('goal reached')



    def get_average_vector(self,graphiter):
        all = self.vectorizer.transform_single(graphiter)
        return all.mean(axis=0)


    def sample(self, graph_iter,target_graph=None, start_graphs=None,target_vector=None, start_gr_in_graph_iter=None,**kwargs):
        '''
            graph iter are the background graphs that are always there.
            if we set a target_graph, all the graphs move towrd that one.
            if we set start_graphs, each start graph will try to reach its closest neighbor in the background
        '''

        if start_graphs:
            graphiter = self.get_nearest_neighbor_iterable(graph_iter, start_graphs, start_gr_in_graph_iter)

        elif target_graph:
            target_copy= nx.Graph(target_graph)
            target_vector= self.vectorizer.transform_single(target_copy)
            graphiter = itertools.izip(graph_iter,itertools.repeat(target_graph),itertools.repeat(target_vector))

        elif target_vector is not None:
            graphiter = itertools.izip(graph_iter,itertools.repeat(None),itertools.repeat(target_vector))


        # graphiter = itertools.islice(graphiter, doXgraphs)
        for e in super(directedSampler, self).sample(graphiter,**kwargs):
            yield e


    def _sample(self, g_pair):
        # g_pair = (startgraph, zie;graph, zielvector)
        self.starthash = hash(g_pair[0])
        self.finhash = hash(g_pair[2])
        self.startgraph = g_pair[0]
        self.goal = g_pair[2]
        self.goal_graph = g_pair[1] # may be none oO
        #self.goal_size = len(self.vectorizer._edge_to_vertex_transform(self.goal_graph))
        return super(directedSampler, self)._sample(g_pair[0])

    def _score(self, graphmanager):
        if '_score' not in graphmanager.__dict__:
            transformed_graph = self.vectorizer.transform_single(nx.Graph(graphmanager))
            # slow so dont do it..
            # graph.score_nonlog = self.estimator.base_estimator.decision_function(transformed_graph)[0]

            #print self.goal.shape
            #print transformed_graph.shape

            graphmanager._score = transformed_graph.dot(self.goal.T)[0,0]
            #graph._score=  (1 - distance(transformed_graph,self.goal))[0,0]

            # print graph._score
            # graph.score -= .007*abs( self.goal_size - len(graph) )
        return graphmanager._score
class directedSampler(GraphLearnSampler):
    '''
    ok here is the plan:

    1. train grammar as usual, no estimator needed.

    2. find NN

    3. the actual action might highjack the sample function.
        but instead of graph_iterator
        we give a graph_pair_iterator, start and goal graph,

    4. then we overwrite _sample to seperate start and goal again
        and use the original _sample with a changed scoring scheme

    5. draw result to see what actually happens

    6. ???

    7. we are done with the clustering process

    '''
    def fit(self,
            G_pos,
            core_interface_pair_remove_threshold=3,
            interface_remove_threshold=2,
            grammar_n_jobs=-1):
        """
          use input to fit the grammar and fit the estimator
        """
        # get grammar
        self.local_substitutable_graph_grammar = LocalSubstitutableGraphGrammar(
            self.radius_list,
            self.thickness_list,
            core_interface_pair_remove_threshold,
            interface_remove_threshold,
            nbit=self.nbit,
            node_entity_check=self.node_entity_check)
        self.local_substitutable_graph_grammar.fit(G_pos, grammar_n_jobs)

    def get_nearest_neighbor_iterable(self,
                                      graphlist,
                                      start_graphs,
                                      start_is_subset=True):

        # vectorize all
        graphlist = list(graphlist)
        graphlist_ = copy.deepcopy(graphlist)
        X = self.vectorizer.transform_single(graphlist_)

        start_graphs = list(start_graphs)
        graphlist_ = copy.deepcopy(start_graphs)
        Y = self.vectorizer.transform_single(graphlist_)

        forest = LSHForest()
        forest.fit(X)
        #http://scikit-learn.org/stable/modules/neighbors.html
        distances, indices = forest.kneighbors(Y, n_neighbors=2)

        # we just assume that this is short...
        index = 0
        if start_is_subset:
            index += 1

        #matches= ( X_index ,Y_index, distance  )
        matches = [(indices[i, index], i, distances[i, index])
                   for i in range(len(indices))]
        matches.sort()

        # this looks super confusing....
        #for index, graph in enumerate(selection_iterator(graphlist, [a[0] for a in matches])):
        #    yield ((graph, start_graphs[matches[index][1]], X[matches[index][0]]))
        # so i wrote this:,,, you may even get rid of the matches variable i think.. and use indices directly
        for Xi, Yi, dist in matches:
            yield ((start_graphs[Yi], graphlist[Xi], X[Xi]))

    '''
        # iterate over graphs
        graphlist, graphlist_ = itertools.tee(graphlist)
        for i, g in enumerate(graphlist):

            # compare to all other graphs and see who the NN is :)
            gl, graphlist_ = itertools.tee(graphlist_)
            best_sim = 0.0
            NN = 0
            for i2, g2 in enumerate(gl):
                sim = X[i].dot(X[i2].T).todense()[0][0]
                # print sim # just to make sure..
                if sim > best_sim and i != i2:
                    best_sim = sim
                    NN = g2
            yield (g, NN, X[i2])
    '''

    def _stop_condition(self, graph):

        #if len(self.sample_path) == 1 and self.goal_graph:
        #    self.sample_path.append(self.goal_graph)
        if '_score' in graph.__dict__:
            if graph._score > 0.99:
                self._sample_notes += ';edge %d %d;' % (self.starthash,
                                                        self.finhash)

                #print graph._score
                #draw.draw_graph_set_graphlearn(self.
                raise Exception('goal reached')

    def get_average_vector(self, graphiter):
        all = self.vectorizer.transform_single(graphiter)
        return all.mean(axis=0)

    def sample(self,
               graph_iter,
               target_graph=None,
               start_graphs=None,
               target_vector=None,
               start_gr_in_graph_iter=None,
               **kwargs):
        '''
            graph iter are the background graphs that are always there.
            if we set a target_graph, all the graphs move towrd that one.
            if we set start_graphs, each start graph will try to reach its closest neighbor in the background
        '''

        if start_graphs:
            graphiter = self.get_nearest_neighbor_iterable(
                graph_iter, start_graphs, start_gr_in_graph_iter)

        elif target_graph:
            target_copy = nx.Graph(target_graph)
            target_vector = self.vectorizer.transform_single(target_copy)
            graphiter = itertools.izip(graph_iter,
                                       itertools.repeat(target_graph),
                                       itertools.repeat(target_vector))

        elif target_vector is not None:
            graphiter = itertools.izip(graph_iter, itertools.repeat(None),
                                       itertools.repeat(target_vector))

        # graphiter = itertools.islice(graphiter, doXgraphs)
        for e in super(directedSampler, self).sample(graphiter, **kwargs):
            yield e

    def _sample(self, g_pair):
        # g_pair = (startgraph, zie;graph, zielvector)
        self.starthash = hash(g_pair[0])
        self.finhash = hash(g_pair[2])
        self.startgraph = g_pair[0]
        self.goal = g_pair[2]
        self.goal_graph = g_pair[1]  # may be none oO
        #self.goal_size = len(self.vectorizer._edge_to_vertex_transform(self.goal_graph))
        return super(directedSampler, self)._sample(g_pair[0])

    def _score(self, graphmanager):
        if '_score' not in graphmanager.__dict__:
            transformed_graph = self.vectorizer.transform_single(
                nx.Graph(graphmanager))
            # slow so dont do it..
            # graph.score_nonlog = self.estimator.base_estimator.decision_function(transformed_graph)[0]

            #print self.goal.shape
            #print transformed_graph.shape

            graphmanager._score = transformed_graph.dot(self.goal.T)[0, 0]
            #graph._score=  (1 - distance(transformed_graph,self.goal))[0,0]

            # print graph._score
            # graph.score -= .007*abs( self.goal_size - len(graph) )
        return graphmanager._score
예제 #5
0
def get_graphs(dataset_fname='../toolsdata/bursi.pos.gspan', size=100):
    return map(etvt, list(islice(gspan_to_eden(dataset_fname), size)))


# fastest way to get a grammar is by using the sampler class..
# but here is how to do it manualy

from graphlearn.graphlearn import decompose
from graphlearn.graphlearn import LocalSubstitutableGraphGrammar as LSGG
from graphlearn.utils.ascii import nx_to_ascii
from graphlearn.utils.neighbors import getallneighbors
# make graphs decomposeable
deco = decompose.Decomposer()
decomps = [deco.make_new_decomposer(g) for g in get_graphs()]

# train a grammar
grammar = LSGG()
grammar.fit(decomps)

# here happens the magic

graphs = get_graphs(size=10)
graph = graphs[1]
graphs = getallneighbors(deco.make_new_decomposer(graph), grammar)

# print system:

for e in graphs:
    print nx_to_ascii(e, xmax=40, ymax=20)