Esempio n. 1
0
def train_TG(
    classes, train_fname, model_fname, iterations=2, L1=0, heldout_fname=None, crossValidation=None, verbose=False
):
    """
    Train using binary maximum entropy model (i.e. logistic regression)
    using stocastic gradient decent method. If heldout_fname is given
    then we will report the accuracy on heldout data after each iteration.
    If cross-validation is set to a number
    (e.g. 5 for five-fold cross-validation)
    then we will perform cross-validation and will report accuracy for each fold
    as well as the average. You cannot specify both cross-validation and holdout
    evaluation at the same time. If you do so then an error will be reported.
    """
    TrainSeqFileReader = SEQUENTIAL_FILE_READER(train_fname)
    trainVects = TrainSeqFileReader.read()
    TrainSeqFileReader.close()
    if heldout_fname:
        HeldoutSeqFileReader = SEQUENTIAL_FILE_READER(heldout_fname)
        heldoutVects = HeldoutSeqFileReader.read()
        HeldoutSeqFileReader.close()
    Learner = TruncatedGradient(classes)
    Learner.total_iterations = iterations
    Learner.c = L1
    Learner.verbose = verbose
    if crossValidation:
        Learner.folds = crossValidation
    if heldout_fname:
        Learner.heldoutVects = heldoutVects["vects"]
    no_features = classes * len(trainVects["featIDs"])
    Learner.train(trainVects["vects"], no_features)
    Learner.writeModel(no_features, model_fname)
    pass
Esempio n. 2
0
 def load_model(self, model_fname, train_fname):
     """
     Load the matrix from the model file.
     """
     self.Cinv = genfromtxt("%s.matrix" % model_fname)
     train_file = SEQUENTIAL_FILE_READER(train_fname)
     train_vects = train_file.read()
     train_file.close()
     (self.D, self.t) = get_train_data(train_vects)
     para_file = open(model_fname)
     self.beta = float(para_file.readline().split()[1])
     kernel_type = para_file.readline().strip().split("\t")[1]
     if kernel_type == "GAUSSIAN_QUADRATIC_KERNEL":
         self.kernel = GAUSSIAN_QUADRATIC_KERNEL()
         for i in range(0, 4):
             (para, val) = para_file.readline().strip().split()
             if para == "theta_0":
                 self.kernel.theta_0 = float(val)
             elif para == "theta_1":
                 self.kernel.theta_1 = float(val)
             elif para == "theta_2":
                 self.kernel.theta_2 = float(val)
             elif para == "theta_3":
                 self.kernel.theta_3 = float(val)
     para_file.close()
     pass
Esempio n. 3
0
def predict_GPR(test_fname,
                train_fname,
                model_fname,
                output_fname=None,
                accuracy=False):
    """
    Predict the outputs for the test instances.
    If the output is not specified, then
    write to the standard output.
    """
    test_file = SEQUENTIAL_FILE_READER(test_fname)
    test_vects = test_file.read()
    test_file.close()
    learner = GPR()
    learner.load_model(model_fname, train_fname)
    count = 0
    error = 0
    if output_fname:
        output_file = SEQUENTIAL_FILE_WRITER(output_fname)
    else:
        output_file = SEQUENTIAL_FILE_WRITER(None, "STDOUT")
    for v in test_vects["vects"]:
        (mean, variance) = learner.predict(v)
        output_file.writeLine("%f\t%f\n" % (mean, variance))
        if accuracy:
            error += (v.label - mean)**2
        count += 1
    error = sqrt(error) / float(count)
    if accuracy:
        output_file.writeLine("RMSE = %f\n" % error)
    output_file.close()
    pass
Esempio n. 4
0
def train_GPR(train_fname,
              model_fname,
              verbose=True,
              beta=1,
              theta_0=None,
              theta_1=None,
              theta_2=None,
              theta_3=None):
    """
    This is the utility function used to train a regression
    model using Gaussian Process.
    """
    train_file = SEQUENTIAL_FILE_READER(train_fname)
    train_vects = train_file.read()
    train_file.close()
    learner = GPR()
    learner.verbose = verbose
    learner.beta = beta
    kernel = GAUSSIAN_QUADRATIC_KERNEL()
    if theta_0:
        kernel.theta_0 = theta_0
    if theta_1:
        kernel.theta_1 = theta_1
    if theta_2:
        kernel.theta_2 = theta_2
    if theta_3:
        kernel.theta_3 = theta_3
    learner.set_kernel(kernel)
    learner.train(train_vects)
    learner.save_model(model_fname)
    pass
Esempio n. 5
0
 def load_model(self, model_fname, train_fname):
     """
     Load the matrix from the model file.
     """
     self.Cinv = genfromtxt("%s.matrix" % model_fname)
     train_file = SEQUENTIAL_FILE_READER(train_fname)
     train_vects = train_file.read()
     train_file.close()
     (self.D, self.t) = get_train_data(train_vects)
     para_file = open(model_fname)
     self.beta = float(para_file.readline().split()[1])
     kernel_type = para_file.readline().strip().split("\t")[1]
     if kernel_type == "GAUSSIAN_QUADRATIC_KERNEL":
         self.kernel = GAUSSIAN_QUADRATIC_KERNEL()
         for i in range(0,4):
             (para,val) = para_file.readline().strip().split()
             if para == "theta_0":
                 self.kernel.theta_0 = float(val)
             elif para == "theta_1":
                 self.kernel.theta_1 = float(val)
             elif para == "theta_2":
                 self.kernel.theta_2 = float(val)
             elif para == "theta_3":
                 self.kernel.theta_3 = float(val)
     para_file.close()
     pass
Esempio n. 6
0
def predict_GPR(test_fname,
                train_fname,
                model_fname,
                output_fname=None,
                accuracy=False):
    """
    Predict the outputs for the test instances.
    If the output is not specified, then
    write to the standard output.
    """
    test_file = SEQUENTIAL_FILE_READER(test_fname)
    test_vects = test_file.read()
    test_file.close()
    learner = GPR()
    learner.load_model(model_fname, train_fname)
    count = 0
    error = 0
    if output_fname:
        output_file = SEQUENTIAL_FILE_WRITER(output_fname)
    else:
        output_file = SEQUENTIAL_FILE_WRITER(None, "STDOUT")
    for v in test_vects["vects"]:
        (mean, variance) = learner.predict(v)
        output_file.writeLine("%f\t%f\n" % (mean, variance))
        if accuracy:
            error += (v.label - mean) ** 2
        count += 1
    error = sqrt(error) / float(count)
    if accuracy:
        output_file.writeLine("RMSE = %f\n" % error)
    output_file.close()
    pass   
Esempio n. 7
0
def train_GPR(train_fname, model_fname,
              verbose=True,
              beta=1,
              theta_0=None, theta_1=None,
              theta_2=None, theta_3=None):
    """
    This is the utility function used to train a regression
    model using Gaussian Process.
    """
    train_file = SEQUENTIAL_FILE_READER(train_fname)
    train_vects = train_file.read()
    train_file.close()
    learner = GPR()
    learner.verbose = verbose
    learner.beta = beta
    kernel = GAUSSIAN_QUADRATIC_KERNEL()
    if theta_0:
        kernel.theta_0 = theta_0
    if theta_1:
        kernel.theta_1 = theta_1
    if theta_2:
        kernel.theta_2 = theta_2
    if theta_3:
        kernel.theta_3 = theta_3
    learner.set_kernel(kernel)
    learner.train(train_vects)
    learner.save_model(model_fname)
    pass
Esempio n. 8
0
def train_SGD(classes,
              train_fname,
              model_fname,
              iterations=2,
              L2=0,
              heldout_fname=None,
              crossValidation=None,
              verbose=False):
    """
    Train using binary maximum entropy model (i.e. logistic regression)
    using stocastic gradient decent method. If heldout_fname is given
    then we will report the accuracy on heldout data after each iteration.
    If cross-validation is set to a number
    (e.g. 5 for five-fold cross-validation)
    then we will perform cross-validation and will report accuracy for each fold
    as well as the average. You cannot specify both cross-validation and holdout
    evaluation at the same time. If you do so then an error will be reported.
    """
    TrainSeqFileReader = SEQUENTIAL_FILE_READER(train_fname)
    trainVects = TrainSeqFileReader.read()
    TrainSeqFileReader.close()
    heldoutVects = None
    if heldout_fname:
        HeldoutSeqFileReader = SEQUENTIAL_FILE_READER(heldout_fname)
        heldoutVects = HeldoutSeqFileReader.read()
        HeldoutSeqFileReader.close()
    Learner = SGD(classes)
    Learner.total_iterations = iterations
    Learner.c = L2
    Learner.verbose = verbose
    if crossValidation:
        Learner.folds = crossValidation
    if heldout_fname:
        Learner.heldoutVects = heldoutVects["vects"]
    no_features = classes * len(trainVects["featIDs"])
    Learner.train(trainVects["vects"], no_features)
    print "Writing the model... %s" % model_fname
    Learner.writeModel(no_features, model_fname)
    pass