Beispiel #1
0
    def fit(self, graphs_pos, graphs_neg=[]):

        if self.trained:
            return self
        self.trained=True
        map(utils.remove_eden_annotation,graphs_pos+graphs_neg)
        map(lambda x: utils.node_operation(x, lambda n,d: d.pop('importance',None)), graphs_pos+graphs_neg)
        map( lambda graph: graph.graph.pop('mass_annotate_mp_was_here',None) ,graphs_pos+graphs_neg)

        if graphs_neg:
            #print 'choosing to train binary esti'
            self.estimator = SGDClassifier()
            classes= [1]*len(graphs_pos)+[-1]*len(graphs_neg)
            self.estimator.fit(self.vectorizer.transform(graphs_pos+graphs_neg),classes)
        else:
            self.estimator = ExperimentalOneClassEstimator()
            self.estimator.fit(self.vectorizer.transform(graphs_pos))
        return self
Beispiel #2
0
class Annotator():

    def __init__(self, multiprocess=True, score_attribute='importance'):
        self.score_attribute=score_attribute
        self.vectorizer=Vectorizer()
        self.multi_process=multiprocess
        self.trained=False

    def fit(self, graphs_pos, graphs_neg=[]):

        if self.trained:
            return self
        self.trained=True
        map(utils.remove_eden_annotation,graphs_pos+graphs_neg)
        map(lambda x: utils.node_operation(x, lambda n,d: d.pop('importance',None)), graphs_pos+graphs_neg)
        map( lambda graph: graph.graph.pop('mass_annotate_mp_was_here',None) ,graphs_pos+graphs_neg)

        if graphs_neg:
            #print 'choosing to train binary esti'
            self.estimator = SGDClassifier()
            classes= [1]*len(graphs_pos)+[-1]*len(graphs_neg)
            self.estimator.fit(self.vectorizer.transform(graphs_pos+graphs_neg),classes)
        else:
            self.estimator = ExperimentalOneClassEstimator()
            self.estimator.fit(self.vectorizer.transform(graphs_pos))
        return self


    def fit_transform(self,graphs_p, graphs_n=[]):
        self.fit(graphs_p,graphs_n)
        return self.transform(graphs_p),self.transform(graphs_n)

    def transform(self,graphs):
        return  self.annotate(graphs)

    def annotate(self,graphs,neg=False):
        if not graphs:
            return []
        return mass_annotate_mp(graphs,self.vectorizer,score_attribute=self.score_attribute,estimator=self.estimator,
                                multi_process=self.multi_process, invert_score=neg)