def run_LR(self, rundir=None): """ Demo script that evaluates the supervised prediction performance of the p(t|d) (stored in theta.npy) of an the LDA model in `rundir`. """ if not rundir: rundir = self.rundir ( (trainX,trainY,train_ids), (testX, testY,test_ids) ) = load_data(rundir, dataset=NIPSDIR) allX = np.vstack((trainX,testX)) allY = np.concatenate((trainY,testY)) if not self.LRparams: self.grid_search(allX,allY) lr = logistic_regression.train_lrpipe(trainX, trainY, self.LRparams) allLabels = load_labels(NIPSDIR) allTitles = load_titles(NIPSDIR) evaluate(lr, testX, testY, testTitles=allTitles[test_ids], testLabels=allLabels[test_ids])
def run_SVM(self, rundir=None): """ Demo script that evaluates the supervised prediction performance of the p(t|d) (stored in theta.npy) of an the LDA model in `rundir`. """ if not rundir: rundir = self.rundir ( (trainX,trainY,train_ids), (testX, testY,test_ids) ) = load_data(rundir, dataset=NIPSDIR) allX = np.vstack((trainX,testX)) allY = np.concatenate((trainY,testY)) if not self.SVMparams: self.grid_search(allX,allY) sv = support_vector_machines.train_svpipe(trainX, trainY, self.SVMparams) allLabels = load_labels(NIPSDIR) allTitles = load_titles(NIPSDIR) evaluate(sv, testX, testY, testTitles=allTitles[test_ids], testLabels=allLabels[test_ids])
from scikits.learn.grid_search import GridSearchCV from scikits.learn import metrics from scikits.learn.pipeline import Pipeline from liblda.supervised.load_data import load_data import logging logger = logging.getLogger('SVM') logger.setLevel(logging.INFO) rundir = '/Users/ivan/Homes/master/Documents/Projects/runs/reduced1/' NIPSDIR = "/CurrentPorjects/LatentDirichletAllocation/data/NIPS1-17/" ((trainX, trainY, train_ids), (testX, testY, test_ids)) = load_data(rundir, dataset=NIPSDIR, normalize_cols=False) allX = np.vstack((trainX, testX)) allY = np.concatenate((trainY, testY)) print "called:" print "( (trainX,trainY,train_ids), (testX, testY,test_ids) ) = load_data(rundir, dataset=NIPSDIR, normalize_cols=False)" def do_grid_search(X, Y, gs_params=None): """ Given data (X,Y) will perform a grid search on g_params for a LogisticRegression called logreg """ svpipe = Pipeline([('rbfsvm', SVC())]) if not gs_params: gs_params = {
from liblda.supervised.load_data import load_data import logging logging.basicConfig(format = '%(asctime)s : %(levelname)s : %(message)s', level = logging.INFO) logger = logging.getLogger('LogReg') logger.setLevel(logging.INFO) rundir = '/Users/ivan/Homes/master/Documents/Projects/runs/reduced1/' NIPSDIR = "/CurrentPorjects/LatentDirichletAllocation/data/NIPS1-17/" ( (trainX,trainY,train_ids), (testX, testY,test_ids) ) = load_data(rundir, dataset=NIPSDIR, normalize_cols=False) allX = np.vstack((trainX,testX)) allY = np.concatenate((trainY,testY)) print "called:" print "( (trainX,trainY,train_ids), (testX, testY,test_ids) ) = load_data(rundir, dataset=NIPSDIR, normalize_cols=False)" LRparametersL1 = { 'logreg__C': (0.1, 1, 2, 5, 10, 50), 'logreg__penalty': ('l1',) , } LRparametersL2 = { 'logreg__C': (0.001, 0.01, 0.1, 1), 'logreg__penalty': ('l2',) , }