Beispiel #1
0
 def load_model(self, model_fname, train_fname):
     """
     Load the matrix from the model file.
     """
     self.Cinv = genfromtxt("%s.matrix" % model_fname)
     train_file = SEQUENTIAL_FILE_READER(train_fname)
     train_vects = train_file.read()
     train_file.close()
     (self.D, self.t) = get_train_data(train_vects)
     para_file = open(model_fname)
     self.beta = float(para_file.readline().split()[1])
     kernel_type = para_file.readline().strip().split("\t")[1]
     if kernel_type == "GAUSSIAN_QUADRATIC_KERNEL":
         self.kernel = GAUSSIAN_QUADRATIC_KERNEL()
         for i in range(0, 4):
             (para, val) = para_file.readline().strip().split()
             if para == "theta_0":
                 self.kernel.theta_0 = float(val)
             elif para == "theta_1":
                 self.kernel.theta_1 = float(val)
             elif para == "theta_2":
                 self.kernel.theta_2 = float(val)
             elif para == "theta_3":
                 self.kernel.theta_3 = float(val)
     para_file.close()
     pass
Beispiel #2
0
def train_GPR(train_fname,
              model_fname,
              verbose=True,
              beta=1,
              theta_0=None,
              theta_1=None,
              theta_2=None,
              theta_3=None):
    """
    This is the utility function used to train a regression
    model using Gaussian Process.
    """
    train_file = SEQUENTIAL_FILE_READER(train_fname)
    train_vects = train_file.read()
    train_file.close()
    learner = GPR()
    learner.verbose = verbose
    learner.beta = beta
    kernel = GAUSSIAN_QUADRATIC_KERNEL()
    if theta_0:
        kernel.theta_0 = theta_0
    if theta_1:
        kernel.theta_1 = theta_1
    if theta_2:
        kernel.theta_2 = theta_2
    if theta_3:
        kernel.theta_3 = theta_3
    learner.set_kernel(kernel)
    learner.train(train_vects)
    learner.save_model(model_fname)
    pass
Beispiel #3
0
def predict_GPR(test_fname,
                train_fname,
                model_fname,
                output_fname=None,
                accuracy=False):
    """
    Predict the outputs for the test instances.
    If the output is not specified, then
    write to the standard output.
    """
    test_file = SEQUENTIAL_FILE_READER(test_fname)
    test_vects = test_file.read()
    test_file.close()
    learner = GPR()
    learner.load_model(model_fname, train_fname)
    count = 0
    error = 0
    if output_fname:
        output_file = SEQUENTIAL_FILE_WRITER(output_fname)
    else:
        output_file = SEQUENTIAL_FILE_WRITER(None, "STDOUT")
    for v in test_vects["vects"]:
        (mean, variance) = learner.predict(v)
        output_file.writeLine("%f\t%f\n" % (mean, variance))
        if accuracy:
            error += (v.label - mean)**2
        count += 1
    error = sqrt(error) / float(count)
    if accuracy:
        output_file.writeLine("RMSE = %f\n" % error)
    output_file.close()
    pass
Beispiel #4
0
 def load_model(self, model_fname, train_fname):
     """
     Load the matrix from the model file.
     """
     self.Cinv = genfromtxt("%s.matrix" % model_fname)
     train_file = SEQUENTIAL_FILE_READER(train_fname)
     train_vects = train_file.read()
     train_file.close()
     (self.D, self.t) = get_train_data(train_vects)
     para_file = open(model_fname)
     self.beta = float(para_file.readline().split()[1])
     kernel_type = para_file.readline().strip().split("\t")[1]
     if kernel_type == "GAUSSIAN_QUADRATIC_KERNEL":
         self.kernel = GAUSSIAN_QUADRATIC_KERNEL()
         for i in range(0,4):
             (para,val) = para_file.readline().strip().split()
             if para == "theta_0":
                 self.kernel.theta_0 = float(val)
             elif para == "theta_1":
                 self.kernel.theta_1 = float(val)
             elif para == "theta_2":
                 self.kernel.theta_2 = float(val)
             elif para == "theta_3":
                 self.kernel.theta_3 = float(val)
     para_file.close()
     pass
Beispiel #5
0
def predict_GPR(test_fname,
                train_fname,
                model_fname,
                output_fname=None,
                accuracy=False):
    """
    Predict the outputs for the test instances.
    If the output is not specified, then
    write to the standard output.
    """
    test_file = SEQUENTIAL_FILE_READER(test_fname)
    test_vects = test_file.read()
    test_file.close()
    learner = GPR()
    learner.load_model(model_fname, train_fname)
    count = 0
    error = 0
    if output_fname:
        output_file = SEQUENTIAL_FILE_WRITER(output_fname)
    else:
        output_file = SEQUENTIAL_FILE_WRITER(None, "STDOUT")
    for v in test_vects["vects"]:
        (mean, variance) = learner.predict(v)
        output_file.writeLine("%f\t%f\n" % (mean, variance))
        if accuracy:
            error += (v.label - mean) ** 2
        count += 1
    error = sqrt(error) / float(count)
    if accuracy:
        output_file.writeLine("RMSE = %f\n" % error)
    output_file.close()
    pass   
Beispiel #6
0
def train_GPR(train_fname, model_fname,
              verbose=True,
              beta=1,
              theta_0=None, theta_1=None,
              theta_2=None, theta_3=None):
    """
    This is the utility function used to train a regression
    model using Gaussian Process.
    """
    train_file = SEQUENTIAL_FILE_READER(train_fname)
    train_vects = train_file.read()
    train_file.close()
    learner = GPR()
    learner.verbose = verbose
    learner.beta = beta
    kernel = GAUSSIAN_QUADRATIC_KERNEL()
    if theta_0:
        kernel.theta_0 = theta_0
    if theta_1:
        kernel.theta_1 = theta_1
    if theta_2:
        kernel.theta_2 = theta_2
    if theta_3:
        kernel.theta_3 = theta_3
    learner.set_kernel(kernel)
    learner.train(train_vects)
    learner.save_model(model_fname)
    pass
Beispiel #7
0
def test_logreg(model_fname,test_fname,output_fname=None, prob=True,acc=True):
    """
    Predict labels for the test instances using the trained
    model. If prob is set to True, then show class probabilities.
    If acc is set to True and if the test instances have labels,
    then we will predict accuracies for the test instances.
    If an output_fname is specified we will write the predictions to
    the file instead of writing to the terminal.
    """
    pred = PREDICTOR()
    pred.loadModel(model_fname)
    testFile = SEQUENTIAL_FILE_READER(test_fname)
    count = 0
    E = EVALUATOR(pred.n)
    if output_fname:
        output = open(output_fname,"w")
    else:
        output = sys.stdout
    for mv in testFile:
        v = mv["vect"]
        (lbl,prob) = pred.predictVect(v)
        output.write("%d\t%s\n" % (lbl,str(prob)))
        if pred.n == 2 and v.label == -1 :
            trueLabel = 0
        else:
            trueLabel = v.label
        if v.label is not None:
            E.add(trueLabel,lbl)
        count += 1
    testFile.close()
    if acc:
        result = E.getMetrics()
        E.show(result)
    pass
Beispiel #8
0
def train_TG(
    classes, train_fname, model_fname, iterations=2, L1=0, heldout_fname=None, crossValidation=None, verbose=False
):
    """
    Train using binary maximum entropy model (i.e. logistic regression)
    using stocastic gradient decent method. If heldout_fname is given
    then we will report the accuracy on heldout data after each iteration.
    If cross-validation is set to a number
    (e.g. 5 for five-fold cross-validation)
    then we will perform cross-validation and will report accuracy for each fold
    as well as the average. You cannot specify both cross-validation and holdout
    evaluation at the same time. If you do so then an error will be reported.
    """
    TrainSeqFileReader = SEQUENTIAL_FILE_READER(train_fname)
    trainVects = TrainSeqFileReader.read()
    TrainSeqFileReader.close()
    if heldout_fname:
        HeldoutSeqFileReader = SEQUENTIAL_FILE_READER(heldout_fname)
        heldoutVects = HeldoutSeqFileReader.read()
        HeldoutSeqFileReader.close()
    Learner = TruncatedGradient(classes)
    Learner.total_iterations = iterations
    Learner.c = L1
    Learner.verbose = verbose
    if crossValidation:
        Learner.folds = crossValidation
    if heldout_fname:
        Learner.heldoutVects = heldoutVects["vects"]
    no_features = classes * len(trainVects["featIDs"])
    Learner.train(trainVects["vects"], no_features)
    Learner.writeModel(no_features, model_fname)
    pass
Beispiel #9
0
def train_SGD(classes,
              train_fname,
              model_fname,
              iterations=2,
              L2=0,
              heldout_fname=None,
              crossValidation=None,
              verbose=False):
    """
    Train using binary maximum entropy model (i.e. logistic regression)
    using stocastic gradient decent method. If heldout_fname is given
    then we will report the accuracy on heldout data after each iteration.
    If cross-validation is set to a number
    (e.g. 5 for five-fold cross-validation)
    then we will perform cross-validation and will report accuracy for each fold
    as well as the average. You cannot specify both cross-validation and holdout
    evaluation at the same time. If you do so then an error will be reported.
    """
    TrainSeqFileReader = SEQUENTIAL_FILE_READER(train_fname)
    trainVects = TrainSeqFileReader.read()
    TrainSeqFileReader.close()
    heldoutVects = None
    if heldout_fname:
        HeldoutSeqFileReader = SEQUENTIAL_FILE_READER(heldout_fname)
        heldoutVects = HeldoutSeqFileReader.read()
        HeldoutSeqFileReader.close()
    Learner = SGD(classes)
    Learner.total_iterations = iterations
    Learner.c = L2
    Learner.verbose = verbose
    if crossValidation:
        Learner.folds = crossValidation
    if heldout_fname:
        Learner.heldoutVects = heldoutVects["vects"]
    no_features = classes * len(trainVects["featIDs"])
    Learner.train(trainVects["vects"], no_features)
    print "Writing the model... %s" % model_fname
    Learner.writeModel(no_features, model_fname)
    pass
Beispiel #10
0
def test_logreg(model_fname,
                test_fname,
                output_fname=None,
                prob=True,
                acc=True):
    """
    Predict labels for the test instances using the trained
    model. If prob is set to True, then show class probabilities.
    If acc is set to True and if the test instances have labels,
    then we will predict accuracies for the test instances.
    If an output_fname is specified we will write the predictions to
    the file instead of writing to the terminal.
    """
    pred = PREDICTOR()
    pred.loadModel(model_fname)
    testFile = SEQUENTIAL_FILE_READER(test_fname)
    count = 0
    E = EVALUATOR(pred.n)
    if output_fname:
        output = open(output_fname, "w")
    else:
        output = sys.stdout
    for mv in testFile:
        v = mv["vect"]
        (lbl, prob) = pred.predictVect(v)
        output.write("%d\t%s\n" % (lbl, str(prob)))
        if pred.n == 2 and v.label == -1:
            trueLabel = 0
        else:
            trueLabel = v.label
        if v.label is not None:
            E.add(trueLabel, lbl)
        count += 1
    testFile.close()
    if acc:
        result = E.getMetrics()
        E.show(result)
    pass