def fit(self, graphs_pos, graphs_neg=[]): if self.trained: return self self.trained=True map(utils.remove_eden_annotation,graphs_pos+graphs_neg) map(lambda x: utils.node_operation(x, lambda n,d: d.pop('importance',None)), graphs_pos+graphs_neg) map( lambda graph: graph.graph.pop('mass_annotate_mp_was_here',None) ,graphs_pos+graphs_neg) if graphs_neg: #print 'choosing to train binary esti' self.estimator = SGDClassifier() classes= [1]*len(graphs_pos)+[-1]*len(graphs_neg) self.estimator.fit(self.vectorizer.transform(graphs_pos+graphs_neg),classes) else: self.estimator = ExperimentalOneClassEstimator() self.estimator.fit(self.vectorizer.transform(graphs_pos)) return self
class Annotator(): def __init__(self, multiprocess=True, score_attribute='importance'): self.score_attribute=score_attribute self.vectorizer=Vectorizer() self.multi_process=multiprocess self.trained=False def fit(self, graphs_pos, graphs_neg=[]): if self.trained: return self self.trained=True map(utils.remove_eden_annotation,graphs_pos+graphs_neg) map(lambda x: utils.node_operation(x, lambda n,d: d.pop('importance',None)), graphs_pos+graphs_neg) map( lambda graph: graph.graph.pop('mass_annotate_mp_was_here',None) ,graphs_pos+graphs_neg) if graphs_neg: #print 'choosing to train binary esti' self.estimator = SGDClassifier() classes= [1]*len(graphs_pos)+[-1]*len(graphs_neg) self.estimator.fit(self.vectorizer.transform(graphs_pos+graphs_neg),classes) else: self.estimator = ExperimentalOneClassEstimator() self.estimator.fit(self.vectorizer.transform(graphs_pos)) return self def fit_transform(self,graphs_p, graphs_n=[]): self.fit(graphs_p,graphs_n) return self.transform(graphs_p),self.transform(graphs_n) def transform(self,graphs): return self.annotate(graphs) def annotate(self,graphs,neg=False): if not graphs: return [] return mass_annotate_mp(graphs,self.vectorizer,score_attribute=self.score_attribute,estimator=self.estimator, multi_process=self.multi_process, invert_score=neg)