Beispiel #1
0
def train(labeled_featuresets):
  weights = {}
  weights = {"bias": 0.0}
  labels_weights = {}
  # For numerical values the perceptron algorithm should be straight forward
  for featureset, label in labeled_featuresets:
    labels_weights[label] = None
    for feature, value in featureset.iteritems():
      if isinstance(value, (unicode, str)):
        weights['%s_%s' % (feature, value)] = 0.0
      else:
        weights[feature] = 0.0

  for label in labels_weights:
    labels_weights[label] = copy.deepcopy(weights)

  classifier = Perceptron(labels_weights)
  classifier.learning_rate = 100.0/len(labeled_featuresets) 

  logging.info("The perceptron algorithm will be trained %s times over the set "
               "with learning rate %f" % (EPOCHS, classifier.learning_rate))
  for epoch in range(EPOCHS):
    random.shuffle(labeled_featuresets)
    for featureset, label in labeled_featuresets:
      if classifier.classify(featureset) != label:
        classifier.update(featureset, label)

  logging.info("Perceptron classfier is trained")
  if util.log_level() == logging.DEBUG:
    classifier.dump_json()

  return classifier
Beispiel #2
0
def train(labeled_featuresets):
    weights = {}
    weights = {"bias": 0.0}
    labels_weights = {}
    # For numerical values the perceptron algorithm should be straight forward
    for featureset, label in labeled_featuresets:
        labels_weights[label] = None
        for feature, value in featureset.iteritems():
            if isinstance(value, (unicode, str)):
                weights['%s_%s' % (feature, value)] = 0.0
            else:
                weights[feature] = 0.0

    for label in labels_weights:
        labels_weights[label] = copy.deepcopy(weights)

    classifier = Perceptron(labels_weights)
    classifier.learning_rate = 100.0 / len(labeled_featuresets)

    logging.info(
        "The perceptron algorithm will be trained %s times over the set "
        "with learning rate %f" % (EPOCHS, classifier.learning_rate))
    for epoch in range(EPOCHS):
        random.shuffle(labeled_featuresets)
        for featureset, label in labeled_featuresets:
            if classifier.classify(featureset) != label:
                classifier.update(featureset, label)

    logging.info("Perceptron classfier is trained")
    if util.log_level() == logging.DEBUG:
        classifier.dump_json()

    return classifier
Beispiel #3
0
 def problem_set(self):
     problem_set = []
     for stmt in self.pos_tagged_tokens:
         problem_set.extend(self._get_problems(stmt))
     logging.info("%d Problems calculated for %s." % (len(problem_set), self.name))
     if util.log_level() == logging.DEBUG:
         self.dump_json(problem_set, "pset")
     return problem_set
Beispiel #4
0
 def get_labeled_featureset(self):
     labeled_featureset = []
     for labeled_problem in self.problem_set:
         problem, label = labeled_problem
         labeled_featureset.append((self.get_featureset(problem), label))
     logging.info("%d labeled featureset is calculated for %s." % (len(labeled_featureset), self.name))
     if util.log_level() == logging.DEBUG:
         self.dump_json(labeled_featureset, "lfs")
     return labeled_featureset
Beispiel #5
0
 def problem_set(self):
     problem_set = []
     for stmt in self.pos_tagged_tokens:
         problem_set.extend(self._get_problems(stmt))
     logging.info("%d Problems calculated for %s." %
                  (len(problem_set), self.name))
     if util.log_level() == logging.DEBUG:
         self.dump_json(problem_set, "pset")
     return problem_set
Beispiel #6
0
 def get_labeled_featureset(self):
     labeled_featureset = []
     for labeled_problem in self.problem_set:
         problem, label = labeled_problem
         labeled_featureset.append((self.get_featureset(problem), label))
     logging.info("%d labeled featureset is calculated for %s." %
                  (len(labeled_featureset), self.name))
     if util.log_level() == logging.DEBUG:
         self.dump_json(labeled_featureset, "lfs")
     return labeled_featureset