def train(labeled_featuresets): weights = {} weights = {"bias": 0.0} labels_weights = {} # For numerical values the perceptron algorithm should be straight forward for featureset, label in labeled_featuresets: labels_weights[label] = None for feature, value in featureset.iteritems(): if isinstance(value, (unicode, str)): weights['%s_%s' % (feature, value)] = 0.0 else: weights[feature] = 0.0 for label in labels_weights: labels_weights[label] = copy.deepcopy(weights) classifier = Perceptron(labels_weights) classifier.learning_rate = 100.0/len(labeled_featuresets) logging.info("The perceptron algorithm will be trained %s times over the set " "with learning rate %f" % (EPOCHS, classifier.learning_rate)) for epoch in range(EPOCHS): random.shuffle(labeled_featuresets) for featureset, label in labeled_featuresets: if classifier.classify(featureset) != label: classifier.update(featureset, label) logging.info("Perceptron classfier is trained") if util.log_level() == logging.DEBUG: classifier.dump_json() return classifier
def train(labeled_featuresets): weights = {} weights = {"bias": 0.0} labels_weights = {} # For numerical values the perceptron algorithm should be straight forward for featureset, label in labeled_featuresets: labels_weights[label] = None for feature, value in featureset.iteritems(): if isinstance(value, (unicode, str)): weights['%s_%s' % (feature, value)] = 0.0 else: weights[feature] = 0.0 for label in labels_weights: labels_weights[label] = copy.deepcopy(weights) classifier = Perceptron(labels_weights) classifier.learning_rate = 100.0 / len(labeled_featuresets) logging.info( "The perceptron algorithm will be trained %s times over the set " "with learning rate %f" % (EPOCHS, classifier.learning_rate)) for epoch in range(EPOCHS): random.shuffle(labeled_featuresets) for featureset, label in labeled_featuresets: if classifier.classify(featureset) != label: classifier.update(featureset, label) logging.info("Perceptron classfier is trained") if util.log_level() == logging.DEBUG: classifier.dump_json() return classifier
def problem_set(self): problem_set = [] for stmt in self.pos_tagged_tokens: problem_set.extend(self._get_problems(stmt)) logging.info("%d Problems calculated for %s." % (len(problem_set), self.name)) if util.log_level() == logging.DEBUG: self.dump_json(problem_set, "pset") return problem_set
def get_labeled_featureset(self): labeled_featureset = [] for labeled_problem in self.problem_set: problem, label = labeled_problem labeled_featureset.append((self.get_featureset(problem), label)) logging.info("%d labeled featureset is calculated for %s." % (len(labeled_featureset), self.name)) if util.log_level() == logging.DEBUG: self.dump_json(labeled_featureset, "lfs") return labeled_featureset