def getModel(X, y): model_file = os.path.join(cachedir, 'model') if os.path.exists(model_file): m = cmaxent.MaxentModel() m.load(model_file) return m else: lmaxent.set_verbose(1) m = cmaxent.MaxentModel() m.begin_add_event() c_line = 0 for i in range(X.shape[0]): row = X.getrow(i).tocoo() context = [] for f, v in zip(row.col, row.data): context.append((str(f), v)) weight = 1 label = str(y[i]) m.add_event(context, label, weight) c_line += 1 if c_line%1000 == 0: print '%d' % c_line elif c_line%100 == 0: sys.stdout.write('.') sys.stdout.flush() m.end_add_event(PRUNE_COUNT) m.train(LBFGS_ITERATION, 'lbfgs', PRIOR_WEIGHT, TOLERANCE) m.save(model_file) return m
if context is None: continue weight = 1 predictions = model.eval_all(context) predicted_target = predictions[0][0] target = int(target) predicted_target = int(predicted_target) confusion_matrix[target][predicted_target] += 1 accuracy = (np.trace(confusion_matrix) / float(confusion_matrix.sum())) print 'Confusion Matrix:\n%s' % str(confusion_matrix) print 'Accuracy: %f\n' % accuracy return (confusion_matrix, accuracy) if __name__ == "__main__": test_file = os.path.join(DATA, 'twitter/annotated/ver2.8-econLabel.csv') assert(os.path.exists(test_file)) lmaxent.set_verbose(1) distant_model = getModel() doTest(test_file, distant_model) print MARK