def sample(graphs):

    sampler =GraphLearnSampler()
    graphs, graphs_ = itertools.tee(graphs)
    sampler.fit(graphs)
    return unpack(sampler.sample(graphs_,
                                 same_radius=False,
                                 max_size_diff=False,
                                 sampling_interval=9999,
                                 select_cip_max_tries=100,
                                 batch_size=30,
                                 n_steps=100,
                                 n_jobs=-1,
                                 improving_threshold=0.9
                                 ))
Example #2
0
def sample(graphs):

    sampler = GraphLearnSampler()
    graphs, graphs_ = itertools.tee(graphs)
    sampler.fit(graphs)
    return unpack(
        sampler.transform(
            graphs_,
            same_radius=False,
            size_constrained_core_choice=False,
            sampling_interval=9999,
            select_cip_max_tries=100,
            batch_size=30,
            n_steps=100,
            n_jobs=-1,
            improving_threshold=0.9,
        )
    )
Example #3
0
 def _sample(self, graph):
     sampled_graph = GraphLearnSampler._sample(self, graph)
     
     if self.num_scores> self.MIN_SCORES_2_PREDICT:
         if self.num_scores - self.num_scores_fitted > self.MIN_BATCH_SIZE:
             self.create_features()
             self.cip_fit()
             
             print("FITTED ", self.num_scores_fitted)
     
     return sampled_graph
Example #4
0
    def fit(self, input, grammar_n_jobs=-1, grammar_batch_size=10, train_min_size=None):
        """
          use input to fit the grammar and fit the estimator
        """
        self.preprocessor.set_param(self.vectorizer)

        graphmanagers = self.preprocessor.fit_transform(input,self.vectorizer)

        self.estimatorobject.fit(graphmanagers,
                                 vectorizer=self.vectorizer,
                                 nu=nu,
                                 grammar_n_jobs=grammar_n_jobs,
                                 random_state=self.random_state)

        self.lsgg.fit(graphmanagers, grammar_n_jobs, grammar_batch_size=grammar_batch_size)


        tempest= EstiWrap(nu=.5,  grammar_n_jobs=grammar_n_jobs)
        tempest.fit(graphmanagers,
                    vectorizer=self.vectorizer,

                    random_state=self.random_state)




        '''
        HOW TO TRAIN NEW CORES?
        make a sampler
        with: estimator as estimator, interface-groups as input, dat filter for cip choosing
        '''


        def entitycheck(g,nodes):
            if type(nodes) is not list:
                nodes=[nodes]
            for e in nodes:
                if 'interface' in g.node[e]:
                    return False
            return True

        prod=self.lsgg.productions

        for i, interface_hash in enumerate(prod.keys()):

            if prod[interface_hash] < train_min_size:
                continue
            print "################################# new ihash"
            # for all the interface buckets
            cips=prod[interface_hash].values()
            sampler=GraphLearnSampler(estimator=tempest,node_entity_check=entitycheck)
            graphs_wrapped=[ GraphWrap(cip.graph, self.vectorizer) for cip in cips ]
            graphs=[ cip.graph for cip in cips ]

            sampler.lsgg.fit(graphs_wrapped)
            sampler.preprocessor.fit(0,self.vectorizer)
            sampler.postprocessor.fit(sampler.preprocessor)
            r=sampler.sample(graphs, max_size_diff=0, select_cip_max_tries=100, quick_skip_orig_cip=False,
                             improving_linear_start=.2, improving_threshold=.6)

            # get graphs and sample them
            r= list(r)

            for j, raw_graph in enumerate(r):
                # for each resulting graph
                raw_graph.graph.pop('graph',None)
                score= tempest.score(raw_graph)
                if score > tempest.score(cips[j].graph):
                    # check if the score is good enough, then add to grammar
                    self.lsgg.productions[interface_hash][score]=CoreInterfacePair(
                         interface_hash=cips[j].interface_hash,
                         core_hash=score,
                         graph=raw_graph,
                         radius=cips[j].radius,
                         thickness=cips[j].thickness,
                         core_nodes_count=len(raw_graph),
                         count=1,
                         distance_dict=cips[j].distance_dict)
                    print 'new graph:',score
                    draw.graphlearn(raw_graph)
Example #5
0

# ok we create an iterator over the graphs we want to work with...
graphs_pos= picker( gspan_to_eden(path+'bursi.pos.gspan') , sample)


# save results here:
originals=[]
improved=[]



# we want to use an increasing part of the test set..
percentages=[.1, .2, .4, .6, .8 ,1 ]

sampler = GraphLearnSampler()

for perc in percentages:

    # we work with count many graphs
    count = int(lenpo*perc)
    # make copy of graphiterator
    # select count random elements
    # triplicate  the count long iterator
    graphs_pos, graphs_pos_ = itertools.tee(graphs_pos)
    x=range(count)
    random.shuffle(x)
    graphs_pos_ = picker(graphs_pos_, x )
    graphs_pos_,graphs_pos__,graphs_pos___ = itertools.tee(graphs_pos_,3)

Example #6
0
 def __init__(self, *args, **kwargs):
     GraphLearnSampler.__init__(self, *args, **kwargs)
     CipPredictor.__init__(self)
     
     self.online_learning = True
     self.intelligent_cip_selection = False
Example #7
0
    def fit(self, input, grammar_n_jobs=-1, grammar_batch_size=10, train_min_size=None):
        """
          use input to fit the grammar and fit the estimator
        """
        self.preprocessor.set_param(self.vectorizer)

        graphmanagers = self.preprocessor.fit_transform(input,self.vectorizer)

        self.estimatorobject.fit(graphmanagers,
                                 vectorizer=self.vectorizer,
                                 nu=nu,
                                 grammar_n_jobs=grammar_n_jobs,
                                 random_state=self.random_state)

        self.lsgg.fit(graphmanagers, grammar_n_jobs, grammar_batch_size=grammar_batch_size)


        tempest= EstiWrap(nu=.5,  grammar_n_jobs=grammar_n_jobs)
        tempest.fit(graphmanagers,
                    vectorizer=self.vectorizer,

                    random_state=self.random_state)




        '''
        HOW TO TRAIN NEW CORES?
        make a sampler
        with: estimator as estimator, interface-groups as input, dat filter for cip choosing
        '''


        def entitycheck(g,nodes):
            if type(nodes) is not list:
                nodes=[nodes]
            for e in nodes:
                if 'interface' in g.node[e]:
                    return False
            return True

        prod=self.lsgg.productions

        for i, interface_hash in enumerate(prod.keys()):

            if prod[interface_hash] < train_min_size:
                continue
            print "################################# new ihash"
            # for all the interface buckets
            cips=prod[interface_hash].values()
            sampler=GraphLearnSampler(estimator=tempest,node_entity_check=entitycheck)
            graphs_wrapped=[ GraphWrap(cip.graph, self.vectorizer) for cip in cips ]
            graphs=[ cip.graph for cip in cips ]

            sampler.lsgg.fit(graphs_wrapped)
            sampler.preprocessor.fit(0,self.vectorizer)
            sampler.postprocessor.fit(sampler.preprocessor)
            r=sampler.transform(graphs, size_constrained_core_choice=0, select_cip_max_tries=100, quick_skip_orig_cip=False,
                                improving_linear_start=.2, improving_threshold=.6)

            # get graphs and sample them
            r= list(r)

            for j, raw_graph in enumerate(r):
                # for each resulting graph
                raw_graph.graph.pop('graph',None)
                score= tempest.predict(raw_graph)
                if score > tempest.predict(cips[j].graph):
                    # check if the score is good enough, then add to grammar
                    self.lsgg.productions[interface_hash][score]=CoreInterfacePair(
                         interface_hash=cips[j].interface_hash,
                         core_hash=score,
                         graph=raw_graph,
                         radius=cips[j].radius,
                         thickness=cips[j].thickness,
                         core_nodes_count=len(raw_graph),
                         count=1,
                         distance_dict=cips[j].distance_dict)
                    print 'new graph:',score
                    draw.graphlearn(raw_graph)
Example #8
0

# ok we create an iterator over the graphs we want to work with...
graphs_pos= picker( gspan_to_eden(path+'bursi.pos.gspan') , sample)


# save results here:
originals=[]
improved=[]



# we want to use an increasing part of the test set..
percentages=[.1, .2, .4, .6, .8 ,1 ]

sampler = GraphLearnSampler()

for perc in percentages:

    # we work with count many graphs
    count = int(lenpo*perc)
    # make copy of graphiterator
    # select count random elements
    # triplicate  the count long iterator
    graphs_pos, graphs_pos_ = itertools.tee(graphs_pos)
    x=range(count)
    random.shuffle(x)
    graphs_pos_ = picker(graphs_pos_, x )
    graphs_pos_,graphs_pos__,graphs_pos___ = itertools.tee(graphs_pos_,3)