def train(self, fvects, fids): """ Train using SGD. Supports crossValidation. This function is called by outside programs. """ # You cannot set both heldout and cross-validation. if self.folds and self.heldoutVects: sys.stderr.write( "Cannot perform heldout and cross-validation simultaneously\n") sys.exit(-1) if self.folds == 0: return self.train_client(fvects, fids) else: # store the training data folds. print "Performing %d-fold cross-validation" % self.folds trainVects = {} fold = 0 count = 0 for fv in fvects: trainVects.setdefault(fold, []).append(fv) fold += 1 if fold == self.folds: fold = 0 # train for each fold. Accumulate statistics. stats = {} statKeys = ["macro", "micro"] statKeys.extend(self.labels) for sk in statKeys: stats[sk] = {} for metric in ["precision", "recall", "F", "accuracy"]: stats[sk][metric] = 0 for i in trainVects: print "Fold number = %d" % (i + 1) traindata = [] for j in trainVects: if j != i: traindata.extend(trainVects[j]) self.train_client(traindata, fids) e = self.get_performance(trainVects[i]) results = e.getMetrics() e.show(results) # add the current metrics to stats. for sk in statKeys: for metric in ["precision", "recall", "F", "accuracy"]: stats[sk][metric] += results[sk][metric] # print the overall results and averages. print "Average Results over %d-fold cross validation" % self.folds for sk in statKeys: for metric in ["precision", "recall", "F", "accuracy"]: stats[sk][metric] /= float(self.folds) E = EVALUATOR(self.n) E.show(stats) pass
def get_performance(self, fvects): """ Compute precision, recall and F-scores with the current weight vector for the fvects using the EVALUATOR.. """ E = EVALUATOR(self.n) pred = PREDICTOR() pred.loadWeights(self.w, self.bias, self.n) for v in fvects: (lbl, prob) = pred.predictVect(v) E.add(v.label, lbl) return E
def train(self,fvects,fids): """ Train using SGD. Supports crossValidation. This function is called by outside programs. """ # You cannot set both heldout and cross-validation. if self.folds and self.heldoutVects: sys.stderr.write( "Cannot perform heldout and cross-validation simultaneously\n") sys.exit(-1) if self.folds == 0: return self.train_client(fvects,fids) else: # store the training data folds. print "Performing %d-fold cross-validation" % self.folds trainVects = {} fold = 0 count = 0 for fv in fvects: trainVects.setdefault(fold,[]).append(fv) fold += 1 if fold == self.folds: fold = 0 # train for each fold. Accumulate statistics. stats = {} statKeys = ["macro","micro"] statKeys.extend(self.labels) for sk in statKeys: stats[sk] = {} for metric in ["precision","recall","F","accuracy"]: stats[sk][metric] = 0 for i in trainVects: print "Fold number = %d" % (i+1) traindata = [] for j in trainVects: if j != i: traindata.extend(trainVects[j]) self.train_client(traindata,fids) e = self.get_performance(trainVects[i]) results = e.getMetrics() e.show(results) # add the current metrics to stats. for sk in statKeys: for metric in ["precision","recall","F","accuracy"]: stats[sk][metric] += results[sk][metric] # print the overall results and averages. print "Average Results over %d-fold cross validation" % self.folds for sk in statKeys: for metric in ["precision","recall","F","accuracy"]: stats[sk][metric] /= float(self.folds) E = EVALUATOR(self.n) E.show(stats) pass
def get_performance(self,fvects): """ Compute precision, recall and F-scores with the current weight vector for the fvects using the EVALUATOR.. """ E = EVALUATOR(self.n) pred = PREDICTOR() pred.loadWeights(self.w,self.bias,self.n) for v in fvects: (lbl,prob) = pred.predictVect(v) E.add(v.label,lbl) return E
def test_logreg(model_fname,test_fname,output_fname=None, prob=True,acc=True): """ Predict labels for the test instances using the trained model. If prob is set to True, then show class probabilities. If acc is set to True and if the test instances have labels, then we will predict accuracies for the test instances. If an output_fname is specified we will write the predictions to the file instead of writing to the terminal. """ pred = PREDICTOR() pred.loadModel(model_fname) testFile = SEQUENTIAL_FILE_READER(test_fname) count = 0 E = EVALUATOR(pred.n) if output_fname: output = open(output_fname,"w") else: output = sys.stdout for mv in testFile: v = mv["vect"] (lbl,prob) = pred.predictVect(v) output.write("%d\t%s\n" % (lbl,str(prob))) if pred.n == 2 and v.label == -1 : trueLabel = 0 else: trueLabel = v.label if v.label is not None: E.add(trueLabel,lbl) count += 1 testFile.close() if acc: result = E.getMetrics() E.show(result) pass
def test_logreg(model_fname, test_fname, output_fname=None, prob=True, acc=True): """ Predict labels for the test instances using the trained model. If prob is set to True, then show class probabilities. If acc is set to True and if the test instances have labels, then we will predict accuracies for the test instances. If an output_fname is specified we will write the predictions to the file instead of writing to the terminal. """ pred = PREDICTOR() pred.loadModel(model_fname) testFile = SEQUENTIAL_FILE_READER(test_fname) count = 0 E = EVALUATOR(pred.n) if output_fname: output = open(output_fname, "w") else: output = sys.stdout for mv in testFile: v = mv["vect"] (lbl, prob) = pred.predictVect(v) output.write("%d\t%s\n" % (lbl, str(prob))) if pred.n == 2 and v.label == -1: trueLabel = 0 else: trueLabel = v.label if v.label is not None: E.add(trueLabel, lbl) count += 1 testFile.close() if acc: result = E.getMetrics() E.show(result) pass