def labels_io_modular():
	from modshogun import RegressionLabels, CSVFile
	lab=RegressionLabels()
	f=CSVFile("../data/label_train_regression.dat","r")
	f.set_delimiter(" ")
	lab.load(f)
	#print lab.get_labels()
	return lab
Exemple #2
0
def labels_io_modular():
    from modshogun import RegressionLabels, CSVFile
    lab = RegressionLabels()
    f = CSVFile("../data/label_train_regression.dat", "r")
    f.set_delimiter(" ")
    lab.load(f)
    #print lab.get_labels()
    return lab
Exemple #3
0
def evaluation_meansquarederror_modular (ground_truth, predicted):
	from modshogun import RegressionLabels
	from modshogun import MeanSquaredError

	ground_truth_labels = RegressionLabels(ground_truth)
	predicted_labels = RegressionLabels(predicted)

	evaluator = MeanSquaredError()
	mse = evaluator.evaluate(predicted_labels,ground_truth_labels)

	return mse
def transfer_multitask_leastsquares_regression(fm_train=traindat,
                                               fm_test=testdat,
                                               label_train=label_traindat):
    from modshogun import RegressionLabels, RealFeatures, Task, TaskGroup
    try:
        from modshogun import MultitaskLeastSquaresRegression
    except ImportError:
        print("MultitaskLeastSquaresRegression not available")
        exit(0)

    features = RealFeatures(traindat)
    labels = RegressionLabels(label_train)

    n_vectors = features.get_num_vectors()
    task_one = Task(0, n_vectors // 2)
    task_two = Task(n_vectors // 2, n_vectors)
    task_group = TaskGroup()
    task_group.append_task(task_one)
    task_group.append_task(task_two)

    mtlsr = MultitaskLeastSquaresRegression(0.1, features, labels, task_group)
    mtlsr.set_regularization(1)  # use regularization ratio
    mtlsr.set_tolerance(1e-2)  # use 1e-2 tolerance
    mtlsr.train()
    mtlsr.set_current_task(0)
    out = mtlsr.apply_regression().get_labels()
    return out
Exemple #5
0
        def RunLASSOShogun(q):
            totalTimer = Timer()

            # Load input dataset.
            # If the dataset contains two files then the second file is the responses
            # file.
            try:
                Log.Info("Loading dataset", self.verbose)
                if len(self.dataset) == 2:
                    testSet = np.genfromtxt(self.dataset[1], delimiter=',')

                    # Get all the parameters.
                    lambda1 = re.search("-l (\d+)", options)
                    lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))

                # Use the last row of the training set as the responses.
                X, y = SplitTrainData(self.dataset)

                with totalTimer:
                    model = LeastAngleRegression(lasso=True)
                    model.set_max_l1_norm(lambda1)
                    model.set_labels(RegressionLabels(y))
                    model.train(RealFeatures(X.T))

            except Exception as e:
                print(e)
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
Exemple #6
0
  def RunMetrics(self, options):
    Log.Info("Perform Linear Ridge Regression.", self.verbose)

    results = self.LinearRidgeRegressionShogun(options)
    if results < 0:
      return results

    metrics = {'Runtime' : results}

    if len(self.dataset) >= 3:

      X, y = SplitTrainData(self.dataset)
      tau = re.search("-t (\d+)", options)
      tau = 1.0 if not tau else int(tau.group(1))
      model = LRR(tau, RealFeatures(X.T), RegressionLabels(y))
      model.train()

      testData = LoadDataset(self.dataset[1])
      truelabels = LoadDataset(self.dataset[2])

      predictedlabels = model.apply_regression(RealFeatures(testData.T)).get_labels()

      SimpleMSE = Metrics.SimpleMeanSquaredError(truelabels, predictedlabels)
      metrics['Simple MSE'] = SimpleMSE
      return metrics

    else:
      Log.Fatal("This method requires three datasets!")
Exemple #7
0
    def RunLinearRidgeRegressionShogun(q):
      totalTimer = Timer()

      # Load input dataset.
      # If the dataset contains two files then the second file is the responses
      # file.
      Log.Info("Loading dataset", self.verbose)
      if len(self.dataset) >= 2:
        testSet = np.genfromtxt(self.dataset[1], delimiter=',')

      # Use the last row of the training set as the responses.
      X, y = SplitTrainData(self.dataset)
      tau = re.search("-t (\d+)", options)
      tau = 1.0 if not tau else int(tau.group(1))

      try:
        with totalTimer:
          # Perform linear ridge regression.
          model = LRR(tau, RealFeatures(X.T), RegressionLabels(y))
          model.train()

          if len(self.dataset) >= 2:
            model.apply_regression(RealFeatures(testSet.T))

      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
Exemple #8
0
def regression_cartree_modular(num_train=500,num_test=50,x_range=15,noise_var=0.2,ft=feattypes):
	try:
		from modshogun import RealFeatures, RegressionLabels, CSVFile, CARTree, PT_REGRESSION
		from numpy import random
	except ImportError:
		print("Could not import Shogun and/or numpy modules")
		return

	random.seed(1)

	# form training dataset : y=x with noise
	X_train=random.rand(1,num_train)*x_range;
	Y_train=X_train+random.randn(num_train)*noise_var

	# form test dataset
	X_test=array([[float(i)/num_test*x_range for i in range(num_test)]])

	# wrap features and labels into Shogun objects
	feats_train=RealFeatures(X_train)
	feats_test=RealFeatures(X_test)
	train_labels=RegressionLabels(Y_train[0])

	# CART Tree formation
	c=CARTree(ft,PT_REGRESSION,5,True)
	c.set_labels(train_labels)
	c.train(feats_train)

	# Classify test data
	output=c.apply_regression(feats_test).get_labels()

	return c,output
Exemple #9
0
        def RunLARSShogun(q):
            totalTimer = Timer()

            # Load input dataset.
            try:
                Log.Info("Loading dataset", self.verbose)
                inputData = np.genfromtxt(self.dataset[0], delimiter=',')
                responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
                inputFeat = RealFeatures(inputData.T)
                responsesFeat = RegressionLabels(responsesData)

                # Get all the parameters.
                lambda1 = re.search("-l (\d+)", options)
                lambda1 = 0.0 if not lambda1 else int(lambda1.group(1))

                with totalTimer:
                    # Perform LARS.
                    model = LeastAngleRegression(False)
                    model.set_max_l1_norm(lambda1)
                    model.set_labels(responsesFeat)
                    model.train(inputFeat)
                    model.get_w(model.get_path_size() - 1)
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
Exemple #10
0
    def RunLinearRegressionShogun(q):
      totalTimer = Timer()

      # Load input dataset.
      # If the dataset contains two files then the second file is the responses
      # file.
      try:
        Log.Info("Loading dataset", self.verbose)
        if len(self.dataset) == 2:
          testSet = np.genfromtxt(self.dataset[1], delimiter=',')

        # Use the last row of the training set as the responses.
        X, y = SplitTrainData(self.dataset)

        with totalTimer:
          # Perform linear regression.
          model = LeastSquaresRegression(RealFeatures(X.T), RegressionLabels(y))
          model.train()
          b = model.get_w()

          if len(self.dataset) == 2:
            pred = classifier.apply(RealFeatures(testSet.T))
            self.predictions = pred.get_labels()

      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
Exemple #11
0
    def RunSVRShogun(q):
      totalTimer = Timer()
      # Load input dataset.
      Log.Info("Loading dataset", self.verbose)
      # Use the last row of the training set as the responses.
      X, y = SplitTrainData(self.dataset)

      # Get all the parameters.
      c = re.search("-c (\d+\.\d+)", options)
      e = re.search("-e (\d+\.\d+)", options)
      g = re.search("-g (\d+\.\d+)", options)

      self.C = 1.0 if not c else float(c.group(1))
      self.epsilon = 1.0 if not e else float(e.group(1))
      g = 10.0 if not g else float(g.group(1))
      self.width = np.true_divide(1, g)

      data = RealFeatures(X.T)
      labels_train = RegressionLabels(y)
      self.kernel = GaussianKernel(data, data, self.width)

      try:
        with totalTimer:
          # Perform SVR.
          model = LibSVR(self.C, self.epsilon, self.kernel, labels_train)
          model.train()
      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
Exemple #12
0
    def RunSVRShogun():
      totalTimer = Timer()
      # Load input dataset.
      Log.Info("Loading dataset", self.verbose)
      # Use the last row of the training set as the responses.
      X, y = SplitTrainData(self.dataset)

      # Get all the parameters.
      self.C = 1.0
      self.epsilon = 1.0
      self.width = 0.1
      if "c" in options:
        self.C = float(options.pop("c"))
      if "epsilon" in options:
        self.epsilon = float(options.pop("epsilon"))
      if "gamma" in options:
        self.width = np.true_divide(1, float(options.pop("gamma")))

      if len(options) > 0:
        Log.Fatal("Unknown parameters: " + str(options))
        raise Exception("unknown parameters")

      data = RealFeatures(X.T)
      labels_train = RegressionLabels(y)
      self.kernel = GaussianKernel(data, data, self.width)

      try:
        with totalTimer:
          # Perform SVR.
          model = LibSVR(self.C, self.epsilon, self.kernel, labels_train)
          model.train()
      except Exception as e:
        return -1

      return totalTimer.ElapsedTime()
Exemple #13
0
        def RunLASSOShogun():
            totalTimer = Timer()

            # Load input dataset.
            # If the dataset contains two files then the second file is the responses
            # file.
            try:
                Log.Info("Loading dataset", self.verbose)
                if len(self.dataset) >= 2:
                    testSet = np.genfromtxt(self.dataset[1], delimiter=',')

                # Get all the parameters.
                lambda1 = None
                if "lambda1" in options:
                    lambda1 = float(options.pop("lambda1"))

                if len(options) > 0:
                    Log.Fatal("Unknown parameters: " + str(options))
                    raise Exception("unknown parameters")

                # Use the last row of the training set as the responses.
                X, y = SplitTrainData(self.dataset)

                with totalTimer:
                    model = LeastAngleRegression(lasso=True)
                    if lambda1:
                        model.set_max_l1_norm(lambda1)
                    model.set_labels(RegressionLabels(y))
                    model.train(RealFeatures(X.T))

            except Exception as e:
                return -1

            return totalTimer.ElapsedTime()
def regression_svrlight_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat, \
				    width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3):


	from modshogun import RegressionLabels, RealFeatures
	from modshogun import GaussianKernel
	try:
		from modshogun import SVRLight
	except ImportError:
		print('No support for SVRLight available.')
		return

	feats_train=RealFeatures(fm_train)
	feats_test=RealFeatures(fm_test)

	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=RegressionLabels(label_train)

	svr=SVRLight(C, epsilon, kernel, labels)
	svr.set_tube_epsilon(tube_epsilon)
	svr.parallel.set_num_threads(num_threads)
	svr.train()

	kernel.init(feats_train, feats_test)
	out = svr.apply().get_labels()

	return out, kernel
Exemple #15
0
    def RunLARSShogun(q):
      totalTimer = Timer()

      # Load input dataset.
      try:
        Log.Info("Loading dataset", self.verbose)
        inputData = np.genfromtxt(self.dataset[0], delimiter=',')
        responsesData = np.genfromtxt(self.dataset[1], delimiter=',')
        inputFeat = RealFeatures(inputData.T)
        responsesFeat = RegressionLabels(responsesData)

        # Get all the parameters.
        lambda1 = None
        if "lambda1" in options:
          lambda1 = float(options.pop("lambda1"))

        if len(options) > 0:
          Log.Fatal("Unknown parameters: " + str(options))
          raise Exception("unknown parameters")

        with totalTimer:
          # Perform LARS.
          model = LeastAngleRegression(False)
          if lambda1:
            model.set_max_l1_norm(lambda1)
          model.set_labels(responsesFeat)
          model.train(inputFeat)
          model.get_w_for_var(model.get_path_size() - 1)
      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
def runShogunSVRWDKernel(train_xt, train_lt, test_xt, svm_c=1, svr_param=0.1):
    """
	serialize svr with string kernels
	"""

    ##################################################
    # set up svr
    feats_train = construct_features(train_xt)
    feats_test = construct_features(test_xt)

    max_len = len(train_xt[0])
    kernel_wdk = WeightedDegreePositionStringKernel(SIZE, 5)
    shifts_vector = np.ones(max_len, dtype=np.int32) * NUMSHIFTS
    kernel_wdk.set_shifts(shifts_vector)

    ########
    # set up spectrum
    use_sign = False
    kernel_spec_1 = WeightedCommWordStringKernel(SIZE, use_sign)
    #kernel_spec_2 = WeightedCommWordStringKernel(SIZE, use_sign)

    ########
    # combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_wdk)
    kernel.append_kernel(kernel_spec_1)
    #kernel.append_kernel(kernel_spec_2)

    # init kernel
    labels = RegressionLabels(train_lt)

    # two svr models: epsilon and nu
    svr_epsilon = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
    print "Ready to train!"
    svr_epsilon.train(feats_train)
    #svr_nu=LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
    #svr_nu.train(feats_train)

    # predictions
    print "Making predictions!"
    kernel.init(feats_train, feats_test)
    out1_epsilon = svr_epsilon.apply().get_labels()
    out2_epsilon = svr_epsilon.apply(feats_test).get_labels()
    #out1_nu=svr_epsilon.apply().get_labels()
    #out2_nu=svr_epsilon.apply(feats_test).get_labels()

    ##################################################
    # serialize to file
    fEpsilon = open(FNEPSILON, 'w+')
    #fNu = open(FNNU, 'w+')
    svr_epsilon.save(fEpsilon)
    #svr_nu.save(fNu)
    fEpsilon.close()
    #fNu.close()

    ##################################################

    #return out1_epsilon,out2_epsilon,out1_nu,out2_nu ,kernel
    return out1_epsilon, out2_epsilon, kernel
Exemple #17
0
def _evaluate(indata):
    prefix = 'kernel_'
    feats = util.get_features(indata, prefix)
    kargs = util.get_args(indata, prefix)
    fun = eval(indata[prefix + 'name'] + 'Kernel')
    kernel = fun(feats['train'], feats['train'], *kargs)

    prefix = 'regression_'
    kernel.parallel.set_num_threads(indata[prefix + 'num_threads'])

    try:
        name = indata[prefix + 'name']
        if (name == 'KERNELRIDGEREGRESSION'):
            name = 'KernelRidgeRegression'

        rfun = eval(name)
    except NameError as e:
        print("%s is disabled/unavailable!" % indata[prefix + 'name'])
        return False

    labels = RegressionLabels(double(indata[prefix + 'labels']))
    if indata[prefix + 'type'] == 'svm':
        regression = rfun(indata[prefix + 'C'], indata[prefix + 'epsilon'],
                          kernel, labels)
    elif indata[prefix + 'type'] == 'kernelmachine':
        regression = rfun(indata[prefix + 'tau'], kernel, labels)
    else:
        return False

    regression.parallel.set_num_threads(indata[prefix + 'num_threads'])
    if prefix + 'tube_epsilon' in indata:
        regression.set_tube_epsilon(indata[prefix + 'tube_epsilon'])

    regression.train()

    alphas = 0
    bias = 0
    sv = 0
    if prefix + 'bias' in indata:
        bias = abs(regression.get_bias() - indata[prefix + 'bias'])
    if prefix + 'alphas' in indata:
        for item in regression.get_alphas().tolist():
            alphas += item
        alphas = abs(alphas - indata[prefix + 'alphas'])
    if prefix + 'support_vectors' in indata:
        for item in inregression.get_support_vectors().tolist():
            sv += item
        sv = abs(sv - indata[prefix + 'support_vectors'])

    kernel.init(feats['train'], feats['test'])
    classified = max(
        abs(regression.apply().get_labels() - indata[prefix + 'classified']))

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               alphas=alphas,
                               bias=bias,
                               support_vectors=sv,
                               classified=classified)
def regression_gaussian_process_modular (n=100,n_test=100, \
  x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

    from modshogun import RealFeatures, RegressionLabels, GaussianKernel, Math
    try:
        from modshogun import GaussianLikelihood, ZeroMean, \
          ExactInferenceMethod, GaussianProcessRegression
    except ImportError:
        print("Eigen3 needed for Gaussian Processes")
        return

    # reproducable results
    random.seed(seed)
    Math.init_random(17)

    # easy regression data: one dimensional noisy sine wave
    X = random.rand(1, n) * x_range

    X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]])
    Y_test = sin(X_test)
    Y = sin(X) + random.randn(n) * noise_var

    # shogun representation
    labels = RegressionLabels(Y[0])
    feats_train = RealFeatures(X)
    feats_test = RealFeatures(X_test)

    # GP specification
    shogun_width = width * width * 2
    kernel = GaussianKernel(10, shogun_width)
    zmean = ZeroMean()
    lik = GaussianLikelihood()
    lik.set_sigma(noise_var)
    inf = ExactInferenceMethod(kernel, feats_train, zmean, labels, lik)

    # train GP
    gp = GaussianProcessRegression(inf)
    gp.train()

    # some things we can do
    alpha = inf.get_alpha()
    diagonal = inf.get_diagonal_vector()
    cholesky = inf.get_cholesky()

    # get mean and variance vectors
    mean = gp.get_mean_vector(feats_test)
    variance = gp.get_variance_vector(feats_test)

    # plot results
    #plot(X[0],Y[0],'x') # training observations
    #plot(X_test[0],Y_test[0],'-') # ground truth of test
    #plot(X_test[0],mean, '-') # mean predictions of test
    #fill_between(X_test[0],mean-1.96*sqrt(variance),mean+1.96*sqrt(variance),color='grey')  # 95% confidence interval
    #legend(["training", "ground truth", "mean predictions"])

    #show()

    return alpha, diagonal, round(variance, 12), round(mean, 12), cholesky
def regression_least_squares_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,tau=1e-6):

	from modshogun import RegressionLabels, RealFeatures
	from modshogun import GaussianKernel
	from modshogun import LeastSquaresRegression

	ls=LeastSquaresRegression(RealFeatures(traindat), RegressionLabels(label_train))
	ls.train()
	out = ls.apply(RealFeatures(fm_test)).get_labels()
	return out,ls
Exemple #20
0
def regression_linear_ridge_modular(fm_train=traindat,
                                    fm_test=testdat,
                                    label_train=label_traindat,
                                    tau=1e-6):

    from modshogun import RegressionLabels, RealFeatures
    from modshogun import LinearRidgeRegression

    rr = LinearRidgeRegression(tau, RealFeatures(traindat),
                               RegressionLabels(label_train))
    rr.train()
    out = rr.apply(RealFeatures(fm_test)).get_labels()
    return out, rr
Exemple #21
0
def get_labels(raw=False, type='binary'):
    data = concatenate(
        array(
            (-ones(NUM_EXAMPLES, dtype=double), ones(NUM_EXAMPLES,
                                                     dtype=double))))
    if raw:
        return data
    else:
        if type == 'binary':
            return BinaryLabels(data)
        if type == 'regression':
            return RegressionLabels(data)
        return None
Exemple #22
0
def mkl_regression_modular(n=100,n_test=100, \
  x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

    from modshogun import RegressionLabels, RealFeatures
    from modshogun import GaussianKernel, PolyKernel, CombinedKernel
    from modshogun import MKLRegression, SVRLight

    # reproducible results
    random.seed(seed)

    # easy regression data: one dimensional noisy sine wave
    n = 15
    n_test = 100
    x_range_test = 10
    noise_var = 0.5
    X = random.rand(1, n) * x_range

    X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]])
    Y_test = sin(X_test)
    Y = sin(X) + random.randn(n) * noise_var

    # shogun representation
    labels = RegressionLabels(Y[0])
    feats_train = RealFeatures(X)
    feats_test = RealFeatures(X_test)

    # combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(GaussianKernel(10, 2))
    kernel.append_kernel(GaussianKernel(10, 3))
    kernel.append_kernel(PolyKernel(10, 2))
    kernel.init(feats_train, feats_train)

    # constraint generator and MKLRegression
    svr_constraints = SVRLight()
    svr_mkl = MKLRegression(svr_constraints)
    svr_mkl.set_kernel(kernel)
    svr_mkl.set_labels(labels)
    svr_mkl.train()

    # predictions
    kernel.init(feats_train, feats_test)
    out = svr_mkl.apply().get_labels()

    return out, svr_mkl, kernel
def regression_libsvr_modular (svm_c=1, svr_param=0.1, n=100,n_test=100, \
  x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

    from modshogun import RegressionLabels, RealFeatures
    from modshogun import GaussianKernel
    from modshogun import LibSVR, LIBSVR_NU_SVR, LIBSVR_EPSILON_SVR

    # reproducable results
    random.seed(seed)

    # easy regression data: one dimensional noisy sine wave
    n = 15
    n_test = 100
    x_range_test = 10
    noise_var = 0.5
    X = random.rand(1, n) * x_range

    X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]])
    Y_test = sin(X_test)
    Y = sin(X) + random.randn(n) * noise_var

    # shogun representation
    labels = RegressionLabels(Y[0])
    feats_train = RealFeatures(X)
    feats_test = RealFeatures(X_test)

    kernel = GaussianKernel(feats_train, feats_train, width)

    # two svr models: epsilon and nu
    svr_epsilon = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_EPSILON_SVR)
    svr_epsilon.train()
    svr_nu = LibSVR(svm_c, svr_param, kernel, labels, LIBSVR_NU_SVR)
    svr_nu.train()

    # predictions
    kernel.init(feats_train, feats_test)
    out1_epsilon = svr_epsilon.apply().get_labels()
    out2_epsilon = svr_epsilon.apply(feats_test).get_labels()
    out1_nu = svr_epsilon.apply().get_labels()
    out2_nu = svr_epsilon.apply(feats_test).get_labels()

    return out1_epsilon, out2_epsilon, out1_nu, out2_nu, kernel
def regression_kernel_ridge_modular (n=100,n_test=100, \
  x_range=6,x_range_test=10,noise_var=0.5,width=1, tau=1e-6, seed=1):

    from modshogun import RegressionLabels, RealFeatures
    from modshogun import GaussianKernel
    from modshogun import KernelRidgeRegression

    # reproducable results
    random.seed(seed)

    # easy regression data: one dimensional noisy sine wave
    n = 15
    n_test = 100
    x_range_test = 10
    noise_var = 0.5
    X = random.rand(1, n) * x_range

    X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]])
    Y_test = sin(X_test)
    Y = sin(X) + random.randn(n) * noise_var

    # shogun representation
    labels = RegressionLabels(Y[0])
    feats_train = RealFeatures(X)
    feats_test = RealFeatures(X_test)

    kernel = GaussianKernel(feats_train, feats_train, width)

    krr = KernelRidgeRegression(tau, kernel, labels)
    krr.train(feats_train)

    kernel.init(feats_train, feats_test)
    out = krr.apply().get_labels()

    # plot results
    #plot(X[0],Y[0],'x') # training observations
    #plot(X_test[0],Y_test[0],'-') # ground truth of test
    #plot(X_test[0],out, '-') # mean predictions of test
    #legend(["training", "ground truth", "mean predictions"])
    #show()

    return out, kernel, krr
def evaluation_cross_validation_regression(train_fname=traindat,
                                           label_fname=label_traindat,
                                           width=0.8,
                                           tau=1e-6):
    from modshogun import CrossValidation, CrossValidationResult
    from modshogun import MeanSquaredError, CrossValidationSplitting
    from modshogun import RegressionLabels, RealFeatures
    from modshogun import GaussianKernel, KernelRidgeRegression, CSVFile

    # training data
    features = RealFeatures(CSVFile(train_fname))
    labels = RegressionLabels(CSVFile(label_fname))

    # kernel and predictor
    kernel = GaussianKernel()
    predictor = KernelRidgeRegression(tau, kernel, labels)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but here, the std x-val is used
    splitting_strategy = CrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium = MeanSquaredError()

    # cross-validation instance
    cross_validation = CrossValidation(predictor, features, labels,
                                       splitting_strategy,
                                       evaluation_criterium)

    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # (optional) request 95% confidence intervals for results (not actually needed
    # for this toy example)
    cross_validation.set_conf_int_alpha(0.05)

    # (optional) tell machine to precompute kernel matrix. speeds up. may not work
    predictor.data_lock(labels, features)

    # perform cross-validation and print(results)
    result = cross_validation.evaluate()
Exemple #26
0
def transfer_multitask_group_regression(fm_train=traindat,
                                        fm_test=testdat,
                                        label_train=label_traindat):

    from modshogun import RegressionLabels, RealFeatures, Task, TaskGroup, MultitaskLSRegression

    features = RealFeatures(traindat)
    labels = RegressionLabels(label_train)

    n_vectors = features.get_num_vectors()
    task_one = Task(0, n_vectors / 2)
    task_two = Task(n_vectors / 2, n_vectors)
    task_group = TaskGroup()
    task_group.add_task(task_one)
    task_group.add_task(task_two)

    mtlsr = MultitaskLSRegression(0.1, features, labels, task_group)
    mtlsr.train()
    mtlsr.set_current_task(0)
    out = mtlsr.apply_regression().get_labels()
    return out
Exemple #27
0
def runShogunSVRSpectrumKernel(train_xt, train_lt, test_xt, svm_c=1):
    """
	serialize svr with spectrum kernels
	"""

    ##################################################
    # set up svr
    charfeat_train = StringCharFeatures(train_xt, DNA)
    feats_train = StringWordFeatures(DNA)
    feats_train.obtain_from_char(charfeat_train, K - 1, K, GAP, False)
    preproc = SortWordString()
    preproc.init(feats_train)
    feats_train.add_preprocessor(preproc)
    feats_train.apply_preprocessor()

    charfeat_test = StringCharFeatures(test_xt, DNA)
    feats_test = StringWordFeatures(DNA)
    feats_test.obtain_from_char(charfeat_test, K - 1, K, GAP, False)
    feats_test.add_preprocessor(preproc)
    feats_test.apply_preprocessor()

    kernel = CommWordStringKernel(feats_train, feats_train, False)
    kernel.io.set_loglevel(MSG_DEBUG)

    # init kernel
    labels = RegressionLabels(train_lt)

    # two svr models: epsilon and nu
    print "Ready to train!"
    svr_epsilon = LibSVR(svm_c, SVRPARAM, kernel, labels, LIBSVR_EPSILON_SVR)
    svr_epsilon.io.set_loglevel(MSG_DEBUG)
    svr_epsilon.train()

    # predictions
    print "Making predictions!"
    out1_epsilon = svr_epsilon.apply(feats_train).get_labels()
    kernel.init(feats_train, feats_test)
    out2_epsilon = svr_epsilon.apply(feats_test).get_labels()

    return out1_epsilon, out2_epsilon, kernel
Exemple #28
0
        def RunLinearRidgeRegressionShogun(q):
            totalTimer = Timer()

            # Load input dataset.
            # If the dataset contains two files then the second file is the responses
            # file.
            Log.Info("Loading dataset", self.verbose)
            if len(self.dataset) >= 2:
                testSet = np.genfromtxt(self.dataset[1], delimiter=',')

            # Use the last row of the training set as the responses.
            X, y = SplitTrainData(self.dataset)
            if "alpha" in options:
                tau = float(options.pop("alpha"))
            else:
                Log.Fatal("Required parameter 'alpha' not specified!")
                raise Exception("missing parameter")

            if len(options) > 0:
                Log.Fatal("Unknown parameters: " + str(options))
                raise Exception("unknown parameters")

            try:
                with totalTimer:
                    # Perform linear ridge regression.
                    model = LRR(tau, RealFeatures(X.T), RegressionLabels(y))
                    model.train()

                    if len(self.dataset) >= 2:
                        model.apply_regression(RealFeatures(testSet.T))

            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
    def RunMetrics(self, options):
        if len(self.dataset) >= 3:

            X, y = SplitTrainData(self.dataset)
            tau = re.search("-t (\d+)", options)
            tau = 1.0 if not tau else int(tau.group(1))
            model = LRR(tau, RealFeatures(X.T), RegressionLabels(y))
            model.train()

            testData = LoadDataset(self.dataset[1])
            truelabels = LoadDataset(self.dataset[2])

            predictedlabels = model.apply_regression(RealFeatures(
                testData.T)).get_labels()

            SimpleMSE = Metrics.SimpleMeanSquaredError(truelabels,
                                                       predictedlabels)
            metrics_dict = {}
            metrics_dict['Simple MSE'] = SimpleMSE
            return metrics_dict

        else:
            Log.Fatal("This method requires three datasets!")
Exemple #30
0
    def RunMetrics(self, options):
        Log.Info("Perform Linear Ridge Regression.", self.verbose)

        results = self.LinearRidgeRegressionShogun(options)
        if results < 0:
            return results

        metrics = {'Runtime': results}

        if len(self.dataset) >= 3:

            X, y = SplitTrainData(self.dataset)
            if "alpha" in options:
                tau = float(options.pop("alpha"))
            else:
                Log.Fatal("Required parameter 'alpha' not specified!")
                raise Exception("missing parameter")

            if len(options) > 0:
                Log.Fatal("Unknown parameters: " + str(options))
                raise Exception("unknown parameters")
            model = LRR(tau, RealFeatures(X.T), RegressionLabels(y))
            model.train()

            testData = LoadDataset(self.dataset[1])
            truelabels = LoadDataset(self.dataset[2])

            predictedlabels = model.apply_regression(RealFeatures(
                testData.T)).get_labels()

            SimpleMSE = Metrics.SimpleMeanSquaredError(truelabels,
                                                       predictedlabels)
            metrics['Simple MSE'] = SimpleMSE
            return metrics

        else:
            Log.Fatal("This method requires three datasets!")
def regression_randomforest_modular(num_train=500,
                                    num_test=50,
                                    x_range=15,
                                    noise_var=0.2,
                                    ft=feattypes):
    try:
        from modshogun import RealFeatures, RegressionLabels, CSVFile, RandomForest, MeanRule, PT_REGRESSION
    except ImportError:
        print("Could not import Shogun modules")
        return

    random.seed(1)

    # form training dataset : y=x with noise
    X_train = random.rand(1, num_train) * x_range
    Y_train = X_train + random.randn(num_train) * noise_var

    # form test dataset
    X_test = array([[float(i) / num_test * x_range for i in range(num_test)]])

    # wrap features and labels into Shogun objects
    feats_train = RealFeatures(X_train)
    feats_test = RealFeatures(X_test)
    train_labels = RegressionLabels(Y_train[0])

    # Random Forest formation
    rand_forest = RandomForest(feats_train, train_labels, 20, 1)
    rand_forest.set_feature_types(ft)
    rand_forest.set_machine_problem_type(PT_REGRESSION)
    rand_forest.set_combination_rule(MeanRule())
    rand_forest.train()

    # Regress test data
    output = rand_forest.apply_regression(feats_test).get_labels()

    return rand_forest, output
def stochasticgbmachine_modular(train=traindat,train_labels=label_traindat,ft=feat_types):
	try:
		from modshogun import RealFeatures, RegressionLabels, CSVFile, CARTree, StochasticGBMachine, SquaredLoss
	except ImportError:
		print("Could not import Shogun modules")
		return

	# wrap features and labels into Shogun objects
	feats=RealFeatures(CSVFile(train))
	labels=RegressionLabels(CSVFile(train_labels))

	# divide into training (90%) and test dataset (10%)
	p=np.random.permutation(labels.get_num_labels())
	num=labels.get_num_labels()*0.9

	cart=CARTree()
	cart.set_feature_types(ft)
	cart.set_max_depth(1)
	loss=SquaredLoss()
	s=StochasticGBMachine(cart,loss,500,0.01,0.6)

	# train
	feats.add_subset(np.int32(p[0:num]))
	labels.add_subset(np.int32(p[0:num]))
	s.set_labels(labels)
	s.train(feats)
	feats.remove_subset()
	labels.remove_subset()

	# apply
	feats.add_subset(np.int32(p[num:len(p)]))
	labels.add_subset(np.int32(p[num:len(p)]))
	output=s.apply_regression(feats)

	feats.remove_subset()
	labels.remove_subset()

	return s,output