Esempio n. 1
0
 def train(self, fvects, fids):
     """
     Train using SGD. Supports crossValidation. This function
     is called by outside programs.
     """
     # You cannot set both heldout and cross-validation.
     if self.folds and self.heldoutVects:
         sys.stderr.write(
             "Cannot perform heldout and cross-validation simultaneously\n")
         sys.exit(-1)
     if self.folds == 0:
         return self.train_client(fvects, fids)
     else:
         # store the training data folds.
         print "Performing %d-fold cross-validation" % self.folds
         trainVects = {}
         fold = 0
         count = 0
         for fv in fvects:
             trainVects.setdefault(fold, []).append(fv)
             fold += 1
             if fold == self.folds:
                 fold = 0
         # train for each fold. Accumulate statistics.
         stats = {}
         statKeys = ["macro", "micro"]
         statKeys.extend(self.labels)
         for sk in statKeys:
             stats[sk] = {}
             for metric in ["precision", "recall", "F", "accuracy"]:
                 stats[sk][metric] = 0
         for i in trainVects:
             print "Fold number = %d" % (i + 1)
             traindata = []
             for j in trainVects:
                 if j != i:
                     traindata.extend(trainVects[j])
             self.train_client(traindata, fids)
             e = self.get_performance(trainVects[i])
             results = e.getMetrics()
             e.show(results)
             # add the current metrics to stats.
             for sk in statKeys:
                 for metric in ["precision", "recall", "F", "accuracy"]:
                     stats[sk][metric] += results[sk][metric]
         # print the overall results and averages.
         print "Average Results over %d-fold cross validation" % self.folds
         for sk in statKeys:
             for metric in ["precision", "recall", "F", "accuracy"]:
                 stats[sk][metric] /= float(self.folds)
         E = EVALUATOR(self.n)
         E.show(stats)
     pass
Esempio n. 2
0
 def get_performance(self, fvects):
     """
     Compute precision, recall and F-scores with the current
     weight vector for the fvects using the EVALUATOR.. 
     """
     E = EVALUATOR(self.n)
     pred = PREDICTOR()
     pred.loadWeights(self.w, self.bias, self.n)
     for v in fvects:
         (lbl, prob) = pred.predictVect(v)
         E.add(v.label, lbl)
     return E
Esempio n. 3
0
 def train(self,fvects,fids):
     """
     Train using SGD. Supports crossValidation. This function
     is called by outside programs.
     """
     # You cannot set both heldout and cross-validation.
     if self.folds and self.heldoutVects:
         sys.stderr.write(
             "Cannot perform heldout and cross-validation simultaneously\n")
         sys.exit(-1)
     if self.folds == 0:
         return self.train_client(fvects,fids)
     else:
         # store the training data folds.
         print "Performing %d-fold cross-validation" % self.folds
         trainVects = {}
         fold = 0
         count = 0
         for fv in fvects:
             trainVects.setdefault(fold,[]).append(fv)
             fold += 1
             if fold == self.folds:
                 fold = 0
         # train for each fold. Accumulate statistics.
         stats = {}
         statKeys = ["macro","micro"]
         statKeys.extend(self.labels)
         for sk in statKeys:
             stats[sk] = {}
             for metric in ["precision","recall","F","accuracy"]:
                 stats[sk][metric] = 0
         for i in trainVects:
             print "Fold number = %d" % (i+1)
             traindata = []
             for j in trainVects:
                 if j != i:
                     traindata.extend(trainVects[j])
             self.train_client(traindata,fids)
             e = self.get_performance(trainVects[i])
             results = e.getMetrics()
             e.show(results)                
             # add the current metrics to stats.
             for sk in statKeys:
                 for metric in ["precision","recall","F","accuracy"]:
                     stats[sk][metric] += results[sk][metric]
         # print the overall results and averages.
         print "Average Results over %d-fold cross validation" % self.folds
         for sk in statKeys:
             for metric in ["precision","recall","F","accuracy"]:
                 stats[sk][metric] /= float(self.folds)
         E = EVALUATOR(self.n)
         E.show(stats)
     pass
Esempio n. 4
0
 def get_performance(self,fvects):
     """
     Compute precision, recall and F-scores with the current
     weight vector for the fvects using the EVALUATOR.. 
     """
     E = EVALUATOR(self.n)
     pred = PREDICTOR()
     pred.loadWeights(self.w,self.bias,self.n)
     for v in fvects:
         (lbl,prob) = pred.predictVect(v)
         E.add(v.label,lbl)
     return E
Esempio n. 5
0
def test_logreg(model_fname,test_fname,output_fname=None, prob=True,acc=True):
    """
    Predict labels for the test instances using the trained
    model. If prob is set to True, then show class probabilities.
    If acc is set to True and if the test instances have labels,
    then we will predict accuracies for the test instances.
    If an output_fname is specified we will write the predictions to
    the file instead of writing to the terminal.
    """
    pred = PREDICTOR()
    pred.loadModel(model_fname)
    testFile = SEQUENTIAL_FILE_READER(test_fname)
    count = 0
    E = EVALUATOR(pred.n)
    if output_fname:
        output = open(output_fname,"w")
    else:
        output = sys.stdout
    for mv in testFile:
        v = mv["vect"]
        (lbl,prob) = pred.predictVect(v)
        output.write("%d\t%s\n" % (lbl,str(prob)))
        if pred.n == 2 and v.label == -1 :
            trueLabel = 0
        else:
            trueLabel = v.label
        if v.label is not None:
            E.add(trueLabel,lbl)
        count += 1
    testFile.close()
    if acc:
        result = E.getMetrics()
        E.show(result)
    pass
Esempio n. 6
0
def test_logreg(model_fname,
                test_fname,
                output_fname=None,
                prob=True,
                acc=True):
    """
    Predict labels for the test instances using the trained
    model. If prob is set to True, then show class probabilities.
    If acc is set to True and if the test instances have labels,
    then we will predict accuracies for the test instances.
    If an output_fname is specified we will write the predictions to
    the file instead of writing to the terminal.
    """
    pred = PREDICTOR()
    pred.loadModel(model_fname)
    testFile = SEQUENTIAL_FILE_READER(test_fname)
    count = 0
    E = EVALUATOR(pred.n)
    if output_fname:
        output = open(output_fname, "w")
    else:
        output = sys.stdout
    for mv in testFile:
        v = mv["vect"]
        (lbl, prob) = pred.predictVect(v)
        output.write("%d\t%s\n" % (lbl, str(prob)))
        if pred.n == 2 and v.label == -1:
            trueLabel = 0
        else:
            trueLabel = v.label
        if v.label is not None:
            E.add(trueLabel, lbl)
        count += 1
    testFile.close()
    if acc:
        result = E.getMetrics()
        E.show(result)
    pass