Exemple #1
0
 def __init__(self, data_dictionary, model_target, kernel=LINEAR, cv_segments=10, **args):
     #Create an SVM model object
 
     #Check to see if a threshold has been specified in the function's arguments
     try: self.threshold = args['threshold']
     except KeyError: self.threshold=2.3711   # if there is no 'threshold' key, then use the default (2.3711)
     
     #Store some object data
     model_dict = deepcopy(data_dictionary)
     self.model_target = model_target
     self.folds = cv_segments
            
     #Label the exceedances in the training set.
     model_dict[model_target] = self.Assign_Labels(model_dict[model_target])
     
     #Extract the training labels and training set
     self.training_labels = model_dict.pop(model_target)
     self.training_set = np.transpose(model_dict.values())
     self.headers = model_dict.keys()
             
     #Scale the covariates to [-1,1]
     self.Scale_Covariates()
     
     #Generate an SVM model.
     self.svm_problem = svm.svm_problem(self.training_labels, self.training_set)
     self.svm_params = {'kernel_type' : kernel, 'weight_label' : [0,1], 'weight' : [10,1]}
     self.model=svm.svm_model(self.svm_problem, svm.svm_parameter(**self.svm_params))
     
     #Use cross-validation to find the best number of components in the model.
     self.Select_Linear_Model(-5, 10)
     
     #Rebuild the model, calculating the probabilities of class membership
     self.svm_params['probability']=1
     self.model=svm.svm_model(self.svm_problem, svm.svm_parameter(**self.svm_params))
Exemple #2
0
	def constructSVMModels(self, db_250k, arrays_to_form_model, array_id2median_intensity,\
						minPercUnCoveredByLerContig=0.6, cnv_method_id=6, kernel_type=None, C=10, gamma=0., \
						eps=1e-2, deletedFractionType=1):
		"""
		2010-7-25
			add argument deletedFractionType
				1: CNVCall.percUnCoveredByLerContig
				2: CNVCall.fractionDeletedInPECoverageData
		2010-7-1
		"""
		sys.stderr.write("Constructing SVM models for %s arrays ...\n"%(len(arrays_to_form_model)))
		from svm import svm_problem, svm_parameter, svm_model, cross_validation, LINEAR, POLY, RBF
		if kernel_type is None:
			kernel_type = RBF
		param = svm_parameter(C = C, eps=eps, probability = 1, gamma=gamma, kernel_type = kernel_type)
		array_id2model = {}
		for array_id in arrays_to_form_model:
			if array_id not in array_id2median_intensity:	#model array has to be in array_id2median_intensity
				continue
			cnvFeatureData = self.getCNVFeatureData(db_250k, array_id=array_id, \
					minPercUnCoveredByLerContig=minPercUnCoveredByLerContig, cnv_method_id=cnv_method_id, \
					replaceAmpWithMedianIntensity=False, deletedFractionType=deletedFractionType)
			
			problem = svm_problem(cnvFeatureData.class_label_ls, cnvFeatureData.feature_data)
			model = svm_model(problem, param)
			array_id2model[array_id] = model
		sys.stderr.write("%s models.\n"%(len(array_id2model)))
		return array_id2model
Exemple #3
0
 def train(self, examples, parameters=None):
     self.isBinary = self.isBinaryProblem(examples)
     examples = self.filterTrainingSet(examples)
     ExampleUtils.writeExamples(examples, self.tempDir + "/train.dat")
     #prepare parameters:
     if parameters.has_key("c"):
         assert (not parameters.has_key("C"))
         parameters["C"] = parameters["c"]
         del parameters["c"]
     totalExamples = float(sum(self.classes.values()))
     weight_label = self.classes.keys()
     weight_label.sort()
     weight = []
     for k in weight_label:
         weight.append(1.0 - self.classes[k] / totalExamples)
     libSVMparam = svm.svm_parameter(nr_weight=len(self.classes),
                                     weight_label=weight_label,
                                     weight=weight,
                                     **parameters)
     labels = []
     samples = []
     for example in examples:
         labels.append(example[1])
         samples.append(example[2])
     problem = svm.svm_problem(labels, samples)
     self.model = svm.svm_model(problem, libSVMparam)
 def load_model(self, file_name=''):
     """
     Loads the svm model from the given file.
     """
     file_name = file_name or (name(self) + '.model')
     self.model = svm.svm_model(file_name)
     super(SvmLearner, self).load_model(file_name)
Exemple #5
0
    def train(self,labels,data):
        '''
        Train the classifier.
        
        @param labels: A list of class labels.
        @param data: A 2D array or list of feature vectors.  One feature vector per row.
        '''
        
        # Check the types and convert to np arrays
        if isinstance(data,list) or isinstance(data,tuple):
            data = np.array(data,dtype=np.double)
            

        labels = np.array(labels,dtype=np.double)
            
        # Preprocess the data    
        labels,data = self._preprocessor.train(labels,data)
        labels,data = self._label_scale.train(labels,data)
        
        
        # Create the svm parameter data and problem description
        param = svm.svm_parameter(svm_type=svm.EPSILON_SVR,kernel_type = svm.RBF, p = self._epsilon, gamma=self._gamma)
        prob = svm.svm_problem(labels.tolist(),data.tolist())
        
        # train the svm
        self._model = svm.svm_model(prob, param)
Exemple #6
0
    def train(self, labels, data):
        '''
        Train the classifier.
        
        @param labels: A list of class labels.
        @param data: A 2D array or list of feature vectors.  One feature vector per row.
        '''

        # Check the types and convert to np arrays
        if isinstance(data, list) or isinstance(data, tuple):
            data = np.array(data, dtype=np.double)

        labels = np.array(labels, dtype=np.double)

        # Preprocess the data
        labels, data = self._preprocessor.train(labels, data)
        labels, data = self._label_scale.train(labels, data)

        # Create the svm parameter data and problem description
        param = svm.svm_parameter(svm_type=svm.EPSILON_SVR,
                                  kernel_type=svm.RBF,
                                  p=self._epsilon,
                                  gamma=self._gamma)
        prob = svm.svm_problem(labels.tolist(), data.tolist())

        # train the svm
        self._model = svm.svm_model(prob, param)
Exemple #7
0
    def _stop_training(self):
        """ Trains and creates the model.
        """
        # reset variables
        self.problems = []
        self.models = []
        self.labels = []
        self._dosim = numpy.zeros(self._output_dim, dtype='int')

        # finally generate the models
        for n in range(self._output_dim):
            # get labels (min,max)
            self.labels.append((self.Y[:, n].max(), self.Y[:, n].min()))
            if self.labels[n][0] == self.labels[n][1]:
                # apport simulation if there is only one label
                self._dosim[n] = 1
                self.problems.append(None)
                self.models.append(None)
                continue

            # construct problems
            self.problems.append(svm.svm_problem(self.Y[:, n], self.X))

            # generate models
            self.models.append(svm.svm_model(self.problems[n],
                                             self.parameters))

            # check if there are only 2 classes
            if self.models[n].get_nr_class() > 2:
                raise mdp.NodeException(
                    "Only binary classification possible with libsvm for now !"
                )

        # reset data for training
        self.reset_model()
Exemple #8
0
    def load(self):
        #Check to see if learner already exists
        if '%s_learner'% self.className not in os.listdir('.'):
            print 'Note: %s learner does not exist yet' % self.className
            return 
        else:
            print '%s_learner' % self.className
            #Update the model
            self.models.append(svm.svm_model('%s_learner' % self.className))

            #Update the labeled data
            temp = pd.read_csv('%s_learner.csv' % self.className)
            temp.rename(columns={temp.columns[0]:'index'},inplace=True)
            temp.set_index('index',inplace=True)
            text_file = open('%s_learner.txt' % self.className, "r")

            #Print most recent accuracy
            details = eval(text_file.read())
            print 'Last accuracy: %s' % details['confusion_matrix']
            self.nbc = details['NBC']

            #Convert everything back to a dataframe
            for el in details.keys():
                try:
                    details[el] = pd.DataFrame(details[el])
                except:
                    continue

            #Update most recent test results
            self.test_results = details
            self.unlabeled_datasets.data = pd.concat([temp.ix[[el for el in temp.index if el not in self.unlabeled_datasets.data.index]],self.unlabeled_datasets.data])
Exemple #9
0
    def train(self, c, g, probability=True, compensation=True,
              path=None, filename=None, save=True):
        if filename is None:
            filename = os.path.splitext(self.getOption('strArffFileName'))[0]
            filename += '.model'
        if path is None:
            path = self.dctEnvPaths['data']
        param = svm.svm_parameter(kernel_type=svm.RBF,
                                  C=c, gamma=g,
                                  probability=1 if probability else 0)

        labels, samples = self.getData(normalize=True)

        # because we train the SVM with dict we need to redefine the zero-insert
        self.hasZeroInsert = False
        if not self.oClassifier is None:
            self.oClassifier.setOption('hasZeroInsert', True)

        if compensation:
            weight, weight_label = self._calculateCompensation(labels)
            param.weight = weight
            param.weight_label = weight_label
            param.nr_weight = len(weight)

        problem = svm.svm_problem(labels, samples)
        model = svm.svm_model(problem, param)
        if save:
            model.save(os.path.join(path, filename))
        return problem, model
Exemple #10
0
    def train(self, search=False, **kwargs):
        """ Train the SVM on the dataset. For RBF kernels (the default), an optional meta-parameter search can be performed.
        @param search: optional name of grid search class to use for RBF kernels: 'GridSearch' or 'GridSearchDOE' 
        @param log2g: base 2 log of the RBF width parameter
        @param log2C: base 2 log of the slack parameter
        @param searchlog: filename into which to dump the search log
        @param others: ...are passed through to the grid search and/or libsvm 
        """

        self.setParams(**kwargs)
        problem = svm_problem(self.ds['target'].flatten(),
                              self.ds['input'].tolist())
        if search:
            # this is a bit of a hack...
            model = eval(
                search +
                "(problem, self.svmtarget, cmin=[0,-7],cmax=[25,1], cstep=[0.5,0.2],plotflag=self.plot,searchlog=self.searchlog,**self.params)"
            )
        else:
            param = svm_parameter(**self.params)
            model = svm_model(problem, param)
            logging.info("Training completed with parameters:")
            logging.info(repr(param))

        self.svm.setModel(model)
Exemple #11
0
    def train(self, c, g, probability=True, compensation=True,
              path=None, filename=None, save=True):
        if filename is None:
            filename = splitext(self.arff_file)[0]
            filename += '.model'
        if path is None:
            path = self.data_dir
        param = svm.svm_parameter(kernel_type=svm.RBF,
                                  C=c, gamma=g,
                                  probability=1 if probability else 0)

        labels, samples = self.getData(normalize=True)

        # because we train the SVM with dict we need to redefine the zero-insert
        self.has_zero_insert = False
        if not self.classifier is None:
            self.classifier.setOption('hasZeroInsert', True)

        if compensation:
            weight, weight_label = self._calculateCompensation(labels)
            param.weight = weight
            param.weight_label = weight_label
            param.nr_weight = len(weight)

        problem = svm.svm_problem(labels, samples)
        model = svm.svm_model(problem, param)
        if save:
            model.save(os.path.join(path, filename))
        return problem, model
def svm(y,K,**param_kw):
    """
    Solve the SVM problem. Return ``(alpha, b)``

    `y`
      labels
    `K`
      precopmuted kernel matrix

    Additional keyword arguments are passed on as svm parameters to
    the model.

    The wrapper is needed to precondition the precomputed matrix for
    use with libsvm, and to extract the model parameters and convert
    them into the canonical weight vector plus scalar offset. Normally
    libsvm hides these model paramters, preferring instead to provide
    a high-level model object that can be queried for results.

    """
    i = arange(1,len(K)+1).reshape((-1,1))
    X = hstack((i, K))
    y = asarray(y,dtype=double)
    X = asarray(X,dtype=double)
    prob = svm_problem(y,X)
    param = svm_parameter(kernel_type=PRECOMPUTED,**param_kw)
    model = svm_model(prob, param)
    return get_alpha_b(model)
Exemple #13
0
def predict_post():
    """
    Predcition page

    @@@
    # args

    | args | nullable | type | remark |
    |--------|--------|--------|--------|
    |    company    |    false    |    string   |    stock symbol    |
    |    date length    |    false    |    int   |    the date length for prediction   |
    # return
    | return |  type | remark |
    |--------|--------|--------|
    |    pred   |    list   |    predicted results    |

    @@@
    """
    form = predictForm()
    strategy=request.form['strategy']
    company = request.form['company']
    
    length = request.form['length']
    if strategy == 'bayes':
        pred = bayes_model(company, int(length))
    elif strategy == 'svm':
        pred = svm_model(company, int(length))
    pred=pred.tolist()
    return render_template('predict.html',form=form,pred=pred,dynamic=time.time(),strategy=strategy)
Exemple #14
0
 def train(self, session, doc):
     # doc here is [[class,...], [{vector},...]]
     (labels, vectors) = doc.get_raw(session)
     problem = svm.svm_problem(labels, vectors)
     self.model = svm.svm_model(problem, self.param)
     modelPath = self.get_path(session, 'modelPath')
     self.model.save(str(modelPath))
     self.predicting = 1
Exemple #15
0
 def train(self, session, doc):
     # doc here is [[class,...], [{vector},...]]
     (labels, vectors) = doc.get_raw(session)
     problem = svm.svm_problem(labels, vectors)
     self.model = svm.svm_model(problem, self.param)
     modelPath = self.get_path(session, 'modelPath')
     self.model.save(str(modelPath))
     self.predicting = 1
 def train(self, dataset):
     """
     Trains the svm classifier. Converts words to real numbers for training
     as SVM expects only numbers.
     """
     super(SvmLearner, self).train(dataset)
     prob  = svm.svm_problem(self.results, self.observations)
     param = svm.svm_parameter(kernel_type=svm.LINEAR, C=10, probability=1)
     self.model = svm.svm_model(prob, param)
Exemple #17
0
 def Select_Linear_Model(self, C_min=-10, C_steps=11):
     #Search for the model parameters that give the smallest CV error
     C = self.__Linear_Search__(C_min, C_steps, 1)
     C = self.__Linear_Search__(np.log2(C)-2, 50, 0.08)
     C = self.__Linear_Search__(np.log2(C)-0.5, 50, 0.02)
     
     self.svm_params['C'] = C
     
     self.model = svm.svm_model(self.svm_problem, svm.svm_parameter(**self.svm_params))
Exemple #18
0
    def train(self,trainset):
        """
        Trains the SVM.
        """

        self.n_classes = len(trainset.metadata['targets'])

        # Set LIBSVM parameters
        kernel_types = {'linear':libsvm.LINEAR,'polynomial':libsvm.POLY,
                        'rbf':libsvm.RBF,'sigmoid':libsvm.SIGMOID}
        if self.kernel not in kernel_types:
            raise ValueError('Invalid kernel: '+self.kernel+'. Should be either \'linear\', \'polynomial\', \'rbf\' or \'sigmoid\'')

        if self.label_weights != None:
            class_to_id = trainset.metadata['class_to_id']
            nr_weight = self.n_classes
            weight_label = range(self.n_classes)
            weight = [1]*self.n_classes
            for k,v in self.label_weights.iteritems():
                weight[class_to_id[k]] = v
        else:
            nr_weight = 0
            weight_label = []
            weight = []

        libsvm_params = libsvm.svm_parameter(svm_type = libsvm.C_SVC,
                                             kernel_type = kernel_types[self.kernel],
                                             degree=self.degree,
                                             gamma=self.gamma,
                                             coef0=self.coef0,
                                             C=self.C,
                                             probability=int(self.output_probabilities),
                                             cache_size=self.cache_size,
                                             eps=self.tolerance,
                                             shrinking=int(self.shrinking),
                                             nr_weight = nr_weight,
                                             weight_label = weight_label,
                                             weight = weight)
        

        # Put training set in the appropriate format:
        #  if is sparse (i.e. a pair), inputs are converted to dictionaries
        #  if not, inputs are assumed to be sequences and are kept intact
        libsvm_inputs = []
        libsvm_targets = []
        for input,target in trainset:
            if type(input) == tuple:
                libsvm_inputs += [dict(zip(input[1],input[0]))]
            else:
                libsvm_inputs += [input]
            libsvm_targets += [float(target)] # LIBSVM requires double-valued targets

        libsvm_problem = libsvm.svm_problem(libsvm_targets,libsvm_inputs)

        # Train SVM
        self.svm = libsvm.svm_model(libsvm_problem,libsvm_params)
Exemple #19
0
 def Select_Model(self, C_min=-10, C_steps=11,  gamma_min=-15, gamma_steps=16):
     #Search for the model parameters that give the smallest CV error
     (C, gamma) = self.__Search__(C_min, C_steps, gamma_min, gamma_steps, 1, 1)
     #(C, gamma) = self.__Search__(np.log2(C)-5, 100, np.log2(gamma)-5, 100, 0.1, 0.1)
     (C, gamma) = self.__Search__(np.log2(C)-1, 100, np.log2(gamma)-1, 100, 0.02, 0.02)
     #(C, gamma) = self.__Search__(np.log2(C)-0.5, 100, np.log2(gamma)-0.5, 100, 0.01, 0.01)
     
     self.svm_params['C'] = C
     self.svm_params['gamma'] = gamma
     
     self.model = svm.svm_model(self.svm_problem, svm.svm_parameter(**self.svm_params))
Exemple #20
0
    def __setstate__(self,state):
        '''This function is neccessary for pickling'''
        # Translate everything but the svm because that cannot be simply pickled.
        for key,value in state.iteritems():
            if key == 'svm':
                filename = tempfile.mktemp()
                open(filename,'w').write(value)
                self.svm = svm.svm_model(filename)
                os.remove(filename)
                continue

            self.__dict__[key] = value
Exemple #21
0
    def __setstate__(self, state):
        '''This function is neccessary for pickling'''
        # Translate everything but the svm because that cannot be simply pickled.
        for key, value in state.iteritems():
            if key == 'svm':
                filename = tempfile.mktemp()
                open(filename, 'w').write(value)
                self.svm = svm.svm_model(filename)
                os.remove(filename)
                continue

            self.__dict__[key] = value
def readmodel(model):
    """Reads the model and parameters for the given model name.
    Returns (model, simmeths)"""
    if model not in MODELS:
        raise web.notfound('No model %s. Choices are: %s' % (model, ', '.join(MODELS)))
    modelfname = model+'.model'
    from svm import svm_model
    t1 = time.time()
    model = svm_model(modelfname)
    f = open(modelfname.replace('.model', '.params'))
    model.scales = eval(f.readline().strip())
    simmeths = eval(f.readline().strip())
    f.close()
    log('Loaded verification model for %s from %s with %d dims and simmeths %s in %0.3f secs' % (model, modelfname, len(model.scales), simmeths, time.time()-t1))
    return (model, simmeths)
Exemple #23
0
    def __init__(self, data_dir, svm_prefix, has_zero_insert):
        super(LibSvmClassifier, self).__init__()
        self.data_dir = data_dir
        self.svm_prefix = svm_prefix
        self.has_zero_insert = has_zero_insert

        model_path = join(data_dir, svm_prefix + '.model')
        if os.path.isfile(model_path):
            self.logger.info("Loading libSVM model file '%s'." % model_path)
            self.svm_model = svm_model(model_path)
        else:
            raise IOError("libSVM model file '%s' not found!" % model_path)

        range_file = join(data_dir, svm_prefix + '.range')
        if isfile(range_file):
            self.logger.info("Loading libSVM range file '%s'." % range_file)
            self.normalizer = Normalizer(range_file)
        else:
            raise IOError("libSVM range file '%s' not found!" % range_file)

        self.probability = True if self.svm_model.probability == 1 else False
Exemple #24
0
    def __init__(self, data_dir, svm_prefix, has_zero_insert):
        super(LibSvmClassifier, self).__init__()
        self.data_dir = data_dir
        self.svm_prefix = svm_prefix
        self.has_zero_insert = has_zero_insert

        model_path = join(data_dir, svm_prefix + ".model")
        if os.path.isfile(model_path):
            self.logger.info("Loading libSVM model file '%s'." % model_path)
            self.svm_model = svm_model(model_path)
        else:
            raise IOError("libSVM model file '%s' not found!" % model_path)

        range_file = join(data_dir, svm_prefix + ".range")
        if isfile(range_file):
            self.logger.info("Loading libSVM range file '%s'." % range_file)
            self.normalizer = Normalizer(range_file)
        else:
            raise IOError("libSVM range file '%s' not found!" % range_file)

        self.probability = True if self.svm_model.probability == 1 else False
Exemple #25
0
    def train(self, search=False, **kwargs):
        """ Train the SVM on the dataset. For RBF kernels (the default), an optional meta-parameter search can be performed.

        :key search: optional name of grid search class to use for RBF kernels: 'GridSearch' or 'GridSearchDOE' 
        :key log2g: base 2 log of the RBF width parameter
        :key log2C: base 2 log of the slack parameter
        :key searchlog: filename into which to dump the search log
        :key others: ...are passed through to the grid search and/or libsvm 
        """
        
        self.setParams(**kwargs)
        problem = svm_problem(self.ds['target'].flatten(), self.ds['input'].tolist())
        if search:
            # this is a bit of a hack...
            model = eval(search + "(problem, self.svmtarget, cmin=[0,-7],cmax=[25,1], cstep=[0.5,0.2],plotflag=self.plot,searchlog=self.searchlog,**self.params)")
        else:
            param = svm_parameter(**self.params)
            model = svm_model(problem, param)
            logging.info("Training completed with parameters:")
            logging.info(repr(param))

        self.svm.setModel(model)
Exemple #26
0
 def train(self, examples, parameters=None):
     self.isBinary = self.isBinaryProblem(examples)
     examples = self.filterTrainingSet(examples)
     ExampleUtils.writeExamples(examples, self.tempDir+"/train.dat")
     #prepare parameters:
     if parameters.has_key("c"):
         assert(not parameters.has_key("C"))
         parameters["C"] = parameters["c"]
         del parameters["c"]
     totalExamples = float(sum(self.classes.values()))
     weight_label = self.classes.keys()
     weight_label.sort()
     weight = []
     for k in weight_label:
         weight.append(1.0-self.classes[k]/totalExamples)
     libSVMparam = svm.svm_parameter(nr_weight = len(self.classes), weight_label=weight_label, weight=weight, **parameters)
     labels = []
     samples = []
     for example in examples:
         labels.append(example[1])
         samples.append(example[2])
     problem = svm.svm_problem(labels, samples)
     self.model = svm.svm_model(problem, libSVMparam)
Exemple #27
0
def bench_svm(X, Y, T):
    """
    bench with swig-generated wrappers that come with libsvm
    """

    import svm

    X1 = X.tolist()
    Y1 = Y.tolist()
    T1 = T.tolist()

    gc.collect()

    # start time
    tstart = datetime.now()
    problem = svm.svm_problem(Y1, X1)
    param = svm.svm_parameter(svm_type=0, kernel_type=0)
    model = svm.svm_model(problem, param)
    for i in T.tolist():
        model.predict(i)
    delta = (datetime.now() - tstart)
    # stop time
    svm_results.append(delta.seconds + delta.microseconds/mu_second)
Exemple #28
0
 def load_model(self, session, path):
     try:
         self.model = svm.svm_model(path.encode('utf-8'))
         self.predicting = 1
     except:
         raise ConfigFileException(path)
Exemple #29
0
from numpy import *

# a two-class problem
#labels = array([0., 1., 1., 2.])
labels = array([-1, 1, 1, -1])
samples = array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]])

# set the parameters of the SVM
param = svm.svm_parameter(kernel_type=svm.LINEAR, C=10)
param.kernel_type = svm.RBF

# svm_problem is used to hold the training data for the problem
prob = svm.svm_problem(labels, samples)

# now construct the model
model = svm.svm_model(prob, param)
print "Number of classes:", model.get_nr_class()

# predict one new sample with the model:
#testdata = array([1., 0.])
testdata = array([[1., 0.], [1., 1.], [0., 0.], [0., 1.]])
for data in testdata:
    print "One Prediction: ", model.predict(data)
    print "Desicion Values of the Prediction: ", model.predict_values(
        data)  #[(1,-1)]
#    print "Probability of the Prediction: ",model.predict_probability( data )

print "---------LIBLINEAR----------------"

class1 = [ll.vector2sparse(samples[0]), ll.vector2sparse(samples[1])]
class2 = [ll.vector2sparse(samples[2]), ll.vector2sparse(samples[3])]
Exemple #30
0
    def train_SVR_Linear(self,
                         labels,
                         vectors,
                         verbose,
                         C_range,
                         callback=None):
        '''Private use only'''
        # combine the labels and vectors into one set.
        data = []
        for i in range(len(labels)):
            data.append([labels[i], vectors[i]])

        #shuffle the data
        rng = random.Random()
        if self.random_seed != None:
            rng.seed(self.random_seed)
        rng.shuffle(data)

        # partition into validation and training
        if type(
                self.validation_size
        ) == float and self.validation_size > 0.0 and self.validation_size < 1.0:
            training_cutoff = int(len(data) * (1.0 - self.validation_size))
        elif type(self.validation_size
                  ) == int and self.validation_size < len(labels):
            training_cutoff = len(labels) - self.validation_size
        else:
            raise NotImplementedError(
                "Cannot determine validation set from %s" %
                self.validation_size)

        if verbose: print "Training Cutoff:", len(labels), training_cutoff
        training_data = data[:training_cutoff]
        validation_data = data[training_cutoff:]

        tmp_labels = []
        tmp_vectors = []
        for each in training_data:
            tmp_labels.append(each[0])
            tmp_vectors.append(each[1])

        prob = svm.svm_problem(tmp_labels, tmp_vectors)

        training_info = []
        training_svm = []
        training_table = Table()
        self.training_table = training_table
        i = 0
        for C in C_range:

            param = svm.svm_parameter(svm_type=self.svm_type,
                                      kernel_type=svm.LINEAR,
                                      C=C,
                                      p=self.epsilon,
                                      nu=self.nu)

            test_svm = svm.svm_model(prob, param)

            mse = 0.0
            total = len(validation_data)
            for label, vector in validation_data:
                pred = test_svm.predict(vector)
                error = label - pred
                mse += error * error
            mse = mse / total

            training_svm.append(test_svm)
            training_info.append([C, mse])
            training_table.setElement(i, 'C', C)
            training_table.setElement(i, 'mse', mse)
            i += 1

            if callback != None:
                callback(int(100 * float(i) / len(C_range)))

        if verbose: print
        if verbose: print "------------------------------"
        if verbose: print " Tuning Information:"
        if verbose: print "         C   error"
        if verbose: print "------------------------------"
        best = training_info[0]
        best_svm = training_svm[0]
        for i in range(len(training_info)):
            each = training_info[i]
            if verbose: print " %8.3e  %0.8f" % (each[0], each[1])
            if best[-1] > each[-1]:
                best = each
                best_svm = training_svm[i]
        if verbose: print "------------------------------"
        if verbose: print
        if verbose: print "------------------------------"
        if verbose: print " Best Tuning:"
        if verbose: print "         C   error"
        if verbose: print "------------------------------"
        if verbose: print " %8.3e  %0.8f" % (best[0], best[1])
        if verbose: print "------------------------------"
        if verbose: print
        self.training_info = training_info
        self.C = best[0]
        self.error = best[1]

        self.svm = best_svm
    return classifier

#inject the funcitonality into the vigra.learning.RandomForest class
setattr(vigra.learning.RandomForest,"dumpToH5G",dumpRF)
setattr(vigra.learning.RandomForest,"reconstructFromH5G", types.MethodType(reconstructRF, vigra.learning.RandomForest))



if __name__ == '__main__':

    at = vigra.VigraArray.defaultAxistags(4)

    at.dropChannelAxis()
    import svm

    svmmod = svm.svm_model()

    testObjects = [ numpy.zeros((100,20,7),numpy.uint8), at,[at,numpy.zeros((100,20,7),numpy.uint8)], {"pups" : at}, [at, "test", 42, 42.0, {"42" : 42,"test" : ["test"]}], svmmod]

    for o in testObjects:
        f = h5py.File("/tmp/test.h5","w")
        g = f.create_group("/testg")
        g.dumpObject(o)
        o2 = g.reconstructObject()

        print
        print "################"
        print "Original:", o
        print "------"
        print "Result  :", o2
        print o2.__class__
Exemple #32
0
    def train_SVR_Linear(self,labels,vectors,verbose, C_range, callback=None):
        '''Private use only'''
        # combine the labels and vectors into one set.
        data = []
        for i in range(len(labels)):
            data.append([labels[i],vectors[i]])
            
        #shuffle the data
        rng = random.Random()
        if self.random_seed != None:
            rng.seed(self.random_seed)
        rng.shuffle(data)
                
        # partition into validation and training
        if type(self.validation_size) == float and self.validation_size > 0.0 and self.validation_size < 1.0:
            training_cutoff = int(len(data)*(1.0-self.validation_size))
        elif type(self.validation_size) == int and self.validation_size < len(labels):
            training_cutoff = len(labels)-self.validation_size
        else:
            raise NotImplementedError("Cannot determine validation set from %s"%self.validation_size)
            
        if verbose: print "Training Cutoff:",len(labels),training_cutoff
        training_data = data[:training_cutoff]
        validation_data = data[training_cutoff:]
        
        tmp_labels = []
        tmp_vectors = []
        for each in training_data:
            tmp_labels.append(each[0])
            tmp_vectors.append(each[1])
        
        prob = svm.svm_problem(tmp_labels,tmp_vectors)
        
        training_info = []
        training_svm = []
        training_table = Table()
        self.training_table = training_table
        i=0
        for C in C_range:
                
            param = svm.svm_parameter(svm_type=self.svm_type,kernel_type = svm.LINEAR, C = C, p=self.epsilon,nu=self.nu)
                
            test_svm = svm.svm_model(prob, param)
                
            mse = 0.0
            total = len(validation_data)
            for label,vector in validation_data:
                pred = test_svm.predict(vector)
                error = label - pred
                mse += error*error
            mse = mse/total
 
            training_svm.append(test_svm)
            training_info.append([C,mse])
            training_table.setElement(i,'C',C)
            training_table.setElement(i,'mse',mse)
            i+=1

            if callback != None:
                callback(int(100*float(i)/len(C_range)))
                
        if verbose: print 
        if verbose: print "------------------------------"
        if verbose: print " Tuning Information:"
        if verbose: print "         C   error"
        if verbose: print "------------------------------"
        best = training_info[0]
        best_svm = training_svm[0]
        for i in range(len(training_info)):
            each = training_info[i]
            if verbose: print " %8.3e  %0.8f"%(each[0],each[1])
            if best[-1] > each[-1]:
                best = each
                best_svm = training_svm[i]
        if verbose: print "------------------------------"
        if verbose: print 
        if verbose: print "------------------------------"
        if verbose: print " Best Tuning:"
        if verbose: print "         C   error"
        if verbose: print "------------------------------"
        if verbose: print " %8.3e  %0.8f"%(best[0],best[1])
        if verbose: print "------------------------------"
        if verbose: print
        self.training_info = training_info
        self.C     = best[0]
        self.error = best[1]

        self.svm = best_svm
 def _load_model_file(self, model_file_path):
     assert os.path.exists(model_file_path), model_file_path
     self.model = svm.svm_model(model_file_path)
Exemple #34
0
 def loadModel(self, filename):
     """ Read the SVM model description from a file """
     self.model = svm_model(filename)
Exemple #35
0
HOME_PATH = dirname(abspath(__file__))
sys.path.insert(0, HOME_PATH)

import redis
import simplejson
from svm import svm_model
from build_svm import url_re, seg

def _get_features():
    db = redis.StrictRedis()
    ws = simplejson.loads(db.get('features') or '[]')
    return [w.encode('utf-8', 'ignore') for w in ws]

words = _get_features()
snap_model = svm_model(HOME_PATH + '/snap.svm')

def predict(text):
    x = _build_x(text)
    label = snap_model.predict(x)
    label = int(label)
    if label == 1:
        return  True
    return False

def _build_x(text):
    text = url_re.sub('', text)
    w_list = seg.cut(text.strip())
    w_list.reverse()
    w_list = [w.encode('utf-8') for w in w_list]
    features = []
Exemple #36
0
 def load_model(self, session, path):
     try:
         self.model = svm.svm_model(path.encode('utf-8'))
         self.predicting = 1
     except:
         raise ConfigFileException(path)
 def train(self, features, labels):
     assert isinstance(labels, np.ndarray), "labels should be numpy array"
     features = self._cleanse_features(features)
     problem = svm.svm_problem(labels.tolist(), features)
     self.model = svm.svm_model(problem, self._svm_parameter)
Exemple #38
0
        print '新的特征表存入redis...'
        print 'len features', len(ls)
        db = redis.StrictRedis()
        db.set('features', simplejson.dumps(ls))
        f.close()

        f = open(sample_file, 'r')
        j = f.read()
        fy, fx, fd = simplejson.loads(j)
        f.close()
        print '训练新的model'
        prob = svm_problem(fy, fx)
        param = svm_parameter(kernel_type = LINEAR, C = 80)

        ## training  the model
        m = svm_model(prob, param)
        m.save('snap.svm')

        img = '<img src="%s"></img>'
        super_count = 0
        error_count = 0
        html_snap = ''
        html_trash = ''
        for i, x in enumerate(fx):
            label = m.predict(x)
            if label == 1:
                html_snap += img % fd[i][0]
            else:
                html_trash += img % fd[i][0]
            if label == fy[i]:
                super_count += 1