def fit(self): print "Fitting %s:" % self.__class__.__name__ print "positive_unary_rules", self.positive_unary_rules print "negative_unary_rules", self.negative_unary_rules print "# of positive examples:", len(self.positive_unary_rules) print "# of negative examples:", len(self.negative_unary_rules) self.feature_function = UnaryFeatureFunction( self.positive_unary_rules + self.negative_unary_rules) X = [] y = [] for pur in self.positive_unary_rules: X.append(self.feature_function.map(pur)) y.append(1) for nur in self.negative_unary_rules: X.append(self.feature_function.map(nur)) y.append(0) print "length of feature vector:", np.shape(X)[1] cw = { 0: len(self.positive_unary_rules), 1: len(self.negative_unary_rules) } # self.classifier = RandomForestClassifier(class_weight='auto', n_estimators=30) # RandomForestClassifier() # self.classifier = SVC(probability=True, class_weight='auto') self.classifier = LogisticRegression(class_weight='balanced') self.classifier.fit(X, y)
class RFUnaryModel(UnaryModel): def __init__(self): self.positive_unary_rules = [] self.negative_unary_rules = [] self.feature_function = None self.classifier = None self.scores = {} @staticmethod def val_func(p, c): if p.signature.id in ('Is', 'CC'): return False return UnaryRule.val_func(p, c) def update(self, tag_rules, positive_unary_rules): all_unary_rules = self.generate_unary_rules(tag_rules) negative_unary_rules = all_unary_rules - positive_unary_rules self.positive_unary_rules.extend(positive_unary_rules) self.negative_unary_rules.extend(negative_unary_rules) def fit(self): print "Fitting %s:" % self.__class__.__name__ print "positive_unary_rules", self.positive_unary_rules print "negative_unary_rules", self.negative_unary_rules print "# of positive examples:", len(self.positive_unary_rules) print "# of negative examples:", len(self.negative_unary_rules) self.feature_function = UnaryFeatureFunction( self.positive_unary_rules + self.negative_unary_rules) X = [] y = [] for pur in self.positive_unary_rules: X.append(self.feature_function.map(pur)) y.append(1) for nur in self.negative_unary_rules: X.append(self.feature_function.map(nur)) y.append(0) print "length of feature vector:", np.shape(X)[1] cw = { 0: len(self.positive_unary_rules), 1: len(self.negative_unary_rules) } # self.classifier = RandomForestClassifier(class_weight='auto', n_estimators=30) # RandomForestClassifier() # self.classifier = SVC(probability=True, class_weight='auto') self.classifier = LogisticRegression(class_weight='balanced') self.classifier.fit(X, y) def get_score(self, ur): if ur in self.scores: return self.scores[ur] x = self.feature_function.map(ur) probas = self.classifier.predict_proba([x]) score = probas[0][1] self.scores[ur] = score return score
class RFUnaryModel(UnaryModel): def __init__(self): self.positive_unary_rules = [] self.negative_unary_rules = [] self.feature_function = None self.classifier = None self.scores = {} @staticmethod def val_func(p, c): if p.signature.id in ('Is', 'CC'): return False return UnaryRule.val_func(p, c) def update(self, tag_rules, positive_unary_rules): all_unary_rules = self.generate_unary_rules(tag_rules) negative_unary_rules = all_unary_rules - positive_unary_rules self.positive_unary_rules.extend(positive_unary_rules) self.negative_unary_rules.extend(negative_unary_rules) def fit(self): print "Fitting %s:" % self.__class__.__name__ print "# of positive examples:", len(self.positive_unary_rules) print "# of negative examples:", len(self.negative_unary_rules) self.feature_function = UnaryFeatureFunction(self.positive_unary_rules + self.negative_unary_rules) X = [] y = [] for pur in self.positive_unary_rules: X.append(self.feature_function.map(pur)) y.append(1) for nur in self.negative_unary_rules: X.append(self.feature_function.map(nur)) y.append(0) print "length of feature vector:", np.shape(X)[1] cw = {0: len(self.positive_unary_rules), 1: len(self.negative_unary_rules)} # self.classifier = RandomForestClassifier(class_weight='auto', n_estimators=30) # RandomForestClassifier() # self.classifier = SVC(probability=True, class_weight='auto') self.classifier = LogisticRegression(class_weight='auto') self.classifier.fit(X, y) def get_score(self, ur): if ur in self.scores: return self.scores[ur] x = self.feature_function.map(ur) probas = self.classifier.predict_proba([x]) score = probas[0][1] self.scores[ur] = score return score
def fit(self): print "Fitting %s:" % self.__class__.__name__ print "# of positive examples:", len(self.positive_unary_rules) print "# of negative examples:", len(self.negative_unary_rules) self.feature_function = UnaryFeatureFunction(self.positive_unary_rules + self.negative_unary_rules) X = [] y = [] for pur in self.positive_unary_rules: X.append(self.feature_function.map(pur)) y.append(1) for nur in self.negative_unary_rules: X.append(self.feature_function.map(nur)) y.append(0) print "length of feature vector:", np.shape(X)[1] cw = {0: len(self.positive_unary_rules), 1: len(self.negative_unary_rules)} # self.classifier = RandomForestClassifier(class_weight='auto', n_estimators=30) # RandomForestClassifier() # self.classifier = SVC(probability=True, class_weight='auto') self.classifier = LogisticRegression(class_weight='auto') self.classifier.fit(X, y)