Beispiel #1
0
 def __init__(self, data_dictionary, model_target, kernel=LINEAR, cv_segments=10, **args):
     #Create an SVM model object
 
     #Check to see if a threshold has been specified in the function's arguments
     try: self.threshold = args['threshold']
     except KeyError: self.threshold=2.3711   # if there is no 'threshold' key, then use the default (2.3711)
     
     #Store some object data
     model_dict = deepcopy(data_dictionary)
     self.model_target = model_target
     self.folds = cv_segments
            
     #Label the exceedances in the training set.
     model_dict[model_target] = self.Assign_Labels(model_dict[model_target])
     
     #Extract the training labels and training set
     self.training_labels = model_dict.pop(model_target)
     self.training_set = np.transpose(model_dict.values())
     self.headers = model_dict.keys()
             
     #Scale the covariates to [-1,1]
     self.Scale_Covariates()
     
     #Generate an SVM model.
     self.svm_problem = svm.svm_problem(self.training_labels, self.training_set)
     self.svm_params = {'kernel_type' : kernel, 'weight_label' : [0,1], 'weight' : [10,1]}
     self.model=svm.svm_model(self.svm_problem, svm.svm_parameter(**self.svm_params))
     
     #Use cross-validation to find the best number of components in the model.
     self.Select_Linear_Model(-5, 10)
     
     #Rebuild the model, calculating the probabilities of class membership
     self.svm_params['probability']=1
     self.model=svm.svm_model(self.svm_problem, svm.svm_parameter(**self.svm_params))
Beispiel #2
0
    def lib_svm(self, train_file, test_file, digit0, digit1):
        features, labels = self.get_data(train_file, digit0, digit1)
        training_data = svm_problem(labels, features)

        if (self.kernel == 'gaussian'):
            params = svm_parameter('-s 0 -t 2 -c 1 -g 0.05')
        else:
            params = svm_parameter('-s 0 -t 2 -c 1 -g 0.001275')

        model = svm_train(training_data, params)

        test_features, test_labels = self.get_data(test_file, digit0, digit1)
        p_labels, p_acc, p_vals = svm_predict(test_labels, test_features,
                                              model)
Beispiel #3
0
    def train(self, c, g, probability=True, compensation=True,
              path=None, filename=None, save=True):
        if filename is None:
            filename = os.path.splitext(self.getOption('strArffFileName'))[0]
            filename += '.model'
        if path is None:
            path = self.dctEnvPaths['data']
        param = svm.svm_parameter(kernel_type=svm.RBF,
                                  C=c, gamma=g,
                                  probability=1 if probability else 0)

        labels, samples = self.getData(normalize=True)

        # because we train the SVM with dict we need to redefine the zero-insert
        self.hasZeroInsert = False
        if not self.oClassifier is None:
            self.oClassifier.setOption('hasZeroInsert', True)

        if compensation:
            weight, weight_label = self._calculateCompensation(labels)
            param.weight = weight
            param.weight_label = weight_label
            param.nr_weight = len(weight)

        problem = svm.svm_problem(labels, samples)
        model = svm.svm_model(problem, param)
        if save:
            model.save(os.path.join(path, filename))
        return problem, model
Beispiel #4
0
    def train(self, search=False, **kwargs):
        """ Train the SVM on the dataset. For RBF kernels (the default), an optional meta-parameter search can be performed.
        @param search: optional name of grid search class to use for RBF kernels: 'GridSearch' or 'GridSearchDOE' 
        @param log2g: base 2 log of the RBF width parameter
        @param log2C: base 2 log of the slack parameter
        @param searchlog: filename into which to dump the search log
        @param others: ...are passed through to the grid search and/or libsvm 
        """

        self.setParams(**kwargs)
        problem = svm_problem(self.ds['target'].flatten(),
                              self.ds['input'].tolist())
        if search:
            # this is a bit of a hack...
            model = eval(
                search +
                "(problem, self.svmtarget, cmin=[0,-7],cmax=[25,1], cstep=[0.5,0.2],plotflag=self.plot,searchlog=self.searchlog,**self.params)"
            )
        else:
            param = svm_parameter(**self.params)
            model = svm_model(problem, param)
            logging.info("Training completed with parameters:")
            logging.info(repr(param))

        self.svm.setModel(model)
Beispiel #5
0
 def train(self, examples, parameters=None):
     self.isBinary = self.isBinaryProblem(examples)
     examples = self.filterTrainingSet(examples)
     ExampleUtils.writeExamples(examples, self.tempDir + "/train.dat")
     #prepare parameters:
     if parameters.has_key("c"):
         assert (not parameters.has_key("C"))
         parameters["C"] = parameters["c"]
         del parameters["c"]
     totalExamples = float(sum(self.classes.values()))
     weight_label = self.classes.keys()
     weight_label.sort()
     weight = []
     for k in weight_label:
         weight.append(1.0 - self.classes[k] / totalExamples)
     libSVMparam = svm.svm_parameter(nr_weight=len(self.classes),
                                     weight_label=weight_label,
                                     weight=weight,
                                     **parameters)
     labels = []
     samples = []
     for example in examples:
         labels.append(example[1])
         samples.append(example[2])
     problem = svm.svm_problem(labels, samples)
     self.model = svm.svm_model(problem, libSVMparam)
def svm(y,K,**param_kw):
    """
    Solve the SVM problem. Return ``(alpha, b)``

    `y`
      labels
    `K`
      precopmuted kernel matrix

    Additional keyword arguments are passed on as svm parameters to
    the model.

    The wrapper is needed to precondition the precomputed matrix for
    use with libsvm, and to extract the model parameters and convert
    them into the canonical weight vector plus scalar offset. Normally
    libsvm hides these model paramters, preferring instead to provide
    a high-level model object that can be queried for results.

    """
    i = arange(1,len(K)+1).reshape((-1,1))
    X = hstack((i, K))
    y = asarray(y,dtype=double)
    X = asarray(X,dtype=double)
    prob = svm_problem(y,X)
    param = svm_parameter(kernel_type=PRECOMPUTED,**param_kw)
    model = svm_model(prob, param)
    return get_alpha_b(model)
Beispiel #7
0
    def __init__(self,
                 input_dim=None,
                 output_dim=None,
                 params=None,
                 dtype='float64'):
        """ Initializes the SVM.
                
        params  --  class of type svm_parameter with all parameters
                    see libsvm documentation
        """
        super(BinarySVMNode, self).__init__(input_dim, output_dim, dtype)

        if not params:
            # make a linear SVM with C = 10
            params = svm.svm_parameter(kernel_type=svm.LINEAR, C=10)
        self.parameters = params

        # variables for training
        self.X = numpy.zeros((0, self._input_dim), dtype=self._dtype)
        self.Y = numpy.zeros((0, self._output_dim), dtype=self._dtype)

        # list with models, parameters and labels for each output
        self.problems = []
        self.models = []
        self.labels = []
Beispiel #8
0
    def train(self, labels, data):
        '''
        Train the classifier.
        
        @param labels: A list of class labels.
        @param data: A 2D array or list of feature vectors.  One feature vector per row.
        '''

        # Check the types and convert to np arrays
        if isinstance(data, list) or isinstance(data, tuple):
            data = np.array(data, dtype=np.double)

        labels = np.array(labels, dtype=np.double)

        # Preprocess the data
        labels, data = self._preprocessor.train(labels, data)
        labels, data = self._label_scale.train(labels, data)

        # Create the svm parameter data and problem description
        param = svm.svm_parameter(svm_type=svm.EPSILON_SVR,
                                  kernel_type=svm.RBF,
                                  p=self._epsilon,
                                  gamma=self._gamma)
        prob = svm.svm_problem(labels.tolist(), data.tolist())

        # train the svm
        self._model = svm.svm_model(prob, param)
Beispiel #9
0
    def _test_prob_model(self, param1, param2):
        probability_param = '-b 1'
        df = self.df

        param_str = ' '.join(
            [self.base_param, param1, param2, probability_param])
        param = svm_parameter(param_str)

        model = svm_train(self.prob, param)

        # Get predictions with probabilities as dictionaries
        (df['prediction'], _,
         probability_lists) = svm_predict(self.y, self.x, model,
                                          probability_param + ' -q')
        probability_dicts = [
            dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
        ]
        df['probabilities'] = probability_dicts

        spec = libsvm.convert(model, self.column_names, 'target',
                              'probabilities')

        if macos_version() >= (10, 13):
            metrics = evaluate_classifier_with_probabilities(spec,
                                                             df,
                                                             verbose=False)
            self.assertEquals(metrics['num_key_mismatch'], 0)
            self.assertLess(metrics['max_probability_error'], 0.00001)
Beispiel #10
0
    def test_multi_class_without_probability(self):
        # Generate some random data.
        # This unit test should not rely on scikit learn for test data.
        x, y = [], []
        for _ in range(50):
            x.append([
                random.gauss(200, 30),
                random.gauss(-100, 22),
                random.gauss(100, 42)
            ])
            y.append(random.choice([1, 2, 10, 12]))
        y[0], y[1], y[2], y[3] = 1, 2, 10, 12
        column_names = ['x1', 'x2', 'x3']
        prob = svmutil.svm_problem(y, x)

        df = pd.DataFrame(x, columns=column_names)

        for param1 in self.non_kernel_parameters:
            for param2 in self.kernel_parameters:
                param_str = ' '.join([self.base_param, param1, param2])
                param = svm_parameter(param_str)

                model = svm_train(prob, param)

                # Get predictions with probabilities as dictionaries
                (df['prediction'], _, _) = svm_predict(y, x, model, ' -q')

                spec = libsvm.convert(model, column_names, 'target')

                metrics = evaluate_classifier(spec, df, verbose=False)
                self.assertEquals(metrics['num_errors'], 0)
Beispiel #11
0
	def constructSVMModels(self, db_250k, arrays_to_form_model, array_id2median_intensity,\
						minPercUnCoveredByLerContig=0.6, cnv_method_id=6, kernel_type=None, C=10, gamma=0., \
						eps=1e-2, deletedFractionType=1):
		"""
		2010-7-25
			add argument deletedFractionType
				1: CNVCall.percUnCoveredByLerContig
				2: CNVCall.fractionDeletedInPECoverageData
		2010-7-1
		"""
		sys.stderr.write("Constructing SVM models for %s arrays ...\n"%(len(arrays_to_form_model)))
		from svm import svm_problem, svm_parameter, svm_model, cross_validation, LINEAR, POLY, RBF
		if kernel_type is None:
			kernel_type = RBF
		param = svm_parameter(C = C, eps=eps, probability = 1, gamma=gamma, kernel_type = kernel_type)
		array_id2model = {}
		for array_id in arrays_to_form_model:
			if array_id not in array_id2median_intensity:	#model array has to be in array_id2median_intensity
				continue
			cnvFeatureData = self.getCNVFeatureData(db_250k, array_id=array_id, \
					minPercUnCoveredByLerContig=minPercUnCoveredByLerContig, cnv_method_id=cnv_method_id, \
					replaceAmpWithMedianIntensity=False, deletedFractionType=deletedFractionType)
			
			problem = svm_problem(cnvFeatureData.class_label_ls, cnvFeatureData.feature_data)
			model = svm_model(problem, param)
			array_id2model[array_id] = model
		sys.stderr.write("%s models.\n"%(len(array_id2model)))
		return array_id2model
Beispiel #12
0
    def _evaluation_test_helper_with_probability(self, labels, allow_slow):
        import copy
        df = pd.DataFrame(self.x, columns=self.column_names)
        y = copy.copy(self.y)
        for i, val in enumerate(labels):
            y[i] = val
        probability_param = '-b 1'

        for param1 in self.non_kernel_parameters:
            for param2 in self.kernel_parameters:
                param_str = ' '.join([self.base_param, param1, param2, probability_param])
                # print("PARAMS: ", param_str)
                param = svm_parameter(param_str)

                model = svm_train(self.prob, param)

                # Get predictions with probabilities as dictionaries
                (df['prediction'], _, probability_lists) = svm_predict(y, self.x, model, probability_param + ' -q')
                probability_dicts = [dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists]
                df['probabilities'] = probability_dicts

                spec = libsvm.convert(model, self.column_names, 'target', 'probabilities')

                if macos_version() >= (10, 13):
                    metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False)
                    self.assertEquals(metrics['num_key_mismatch'], 0)
                    self.assertLess(metrics['max_probability_error'], 0.00001)

                if not allow_slow:
                    break

            if not allow_slow:
                break
Beispiel #13
0
def iqr_model_train(matrix_kernel_train, labels_train, idx2clipid,
                    svm_para = '-w1 50 -t 4 -b 1 -c 1'):
    """
    Light-weighted SVM learning module for online IQR

    @param matrix_kernel_train: n-by-n square numpy array with kernel values
        between training data
    @param labels_train: row-wise labels of training data (1 or True indicates
        positive, 0 or False otherwise
    @param idx2clipid: idx2clipid(row_idx) returns the clipid for the 0-base row
        in matrix
    @param svm_para: (optional) SVM learning parameter

    @rtype: dictionary with 'clipids_SV': list of clipids for support vectors
    @return: output as a dictionary with 'clipids_SV'

    """
    log = logging.getLogger('iqr_model_train')

    # set training inputs
    matrix_kernel_train = np.vstack((np.arange(1, len(matrix_kernel_train)+1),
                                     matrix_kernel_train)).T
    log.debug("Done matrix_kernel_train")

    problem = svm.svm_problem(labels_train.tolist(), matrix_kernel_train.tolist(), isKernel=True)
    log.debug("Done problem")
    svm_param = svm.svm_parameter(svm_para)
    log.debug("Done svm_param")

    # train model
    model = svmutil.svm_train(problem, svm_param)
    log.debug("Done train model")

    # release memory
    del problem
    del svm_param
    log.debug("Done release memory")

    # check learning failure
    if model.l == 0:
        raise Exception('svm model learning failure')
    log.debug("Done checking learning failure (no failure)")

    n_SVs = model.l
    clipids_SVs = []
    idxs_train_SVs = svmtools.get_SV_idxs_nonlinear_svm(model)
    for i in range(n_SVs):
        _idx_1base = idxs_train_SVs[i]
        _idx_0base = _idx_1base - 1
        clipids_SVs.append(idx2clipid[_idx_0base])
        model.SV[i][0].value = i+1 # within SVM model, index needs to be 1-base
    log.debug("Done collecting support vector IDs")

    #svmutil.svm_save_model(filepath_model, model)

    output = dict()
    output['model'] = model
    output['clipids_SVs'] = clipids_SVs

    return output
 def set_svm_params(self, param_dict):
     param_str = ' '.join(k for k in param_dict.keys())
     Params = namedtuple('Params', param_str)
     self.params = Params(**param_dict)
     self._svm_parameter = svm.svm_parameter(
         **{k: getattr(self.params, k)
            for k in self.params._fields})
Beispiel #15
0
    def train(self, c, g, probability=True, compensation=True,
              path=None, filename=None, save=True):
        if filename is None:
            filename = splitext(self.arff_file)[0]
            filename += '.model'
        if path is None:
            path = self.data_dir
        param = svm.svm_parameter(kernel_type=svm.RBF,
                                  C=c, gamma=g,
                                  probability=1 if probability else 0)

        labels, samples = self.getData(normalize=True)

        # because we train the SVM with dict we need to redefine the zero-insert
        self.has_zero_insert = False
        if not self.classifier is None:
            self.classifier.setOption('hasZeroInsert', True)

        if compensation:
            weight, weight_label = self._calculateCompensation(labels)
            param.weight = weight
            param.weight_label = weight_label
            param.nr_weight = len(weight)

        problem = svm.svm_problem(labels, samples)
        model = svm.svm_model(problem, param)
        if save:
            model.save(os.path.join(path, filename))
        return problem, model
Beispiel #16
0
    def train(self,labels,data):
        '''
        Train the classifier.
        
        @param labels: A list of class labels.
        @param data: A 2D array or list of feature vectors.  One feature vector per row.
        '''
        
        # Check the types and convert to np arrays
        if isinstance(data,list) or isinstance(data,tuple):
            data = np.array(data,dtype=np.double)
            

        labels = np.array(labels,dtype=np.double)
            
        # Preprocess the data    
        labels,data = self._preprocessor.train(labels,data)
        labels,data = self._label_scale.train(labels,data)
        
        
        # Create the svm parameter data and problem description
        param = svm.svm_parameter(svm_type=svm.EPSILON_SVR,kernel_type = svm.RBF, p = self._epsilon, gamma=self._gamma)
        prob = svm.svm_problem(labels.tolist(),data.tolist())
        
        # train the svm
        self._model = svm.svm_model(prob, param)
Beispiel #17
0
def construct_svm_anytime(type="linear",C=2.,gamma=0.1,datatype='float64'):
    """ SVM anytime model.
    """
    inputs = 14
    outputs = 1
    
    # Parameters
    svm_C = C
    svm_gamma = gamma
    
    # construct model
    if(type=='linear'):
        # using a L2-loss primal SVM with eps = 0.01
        # (faster if we have many timesteps)
        model = BinaryLinearSVMNode(inputs,outputs,C=svm_C,solver_type=2,eps=0.01)
    else:
        params = svm.svm_parameter(kernel_type=svm.RBF, C=svm_C, gamma=svm_gamma)
        model = BinarySVMNode(inputs,outputs,params)
    
    # additional properties
    model.dtype = datatype
    model.randrange = 0
    model.type = 'SVM_any'
    
    return model
Beispiel #18
0
def do_one_cv_classify_predeffolds_multi(theinput):
	c = theinput[0]
	gamma = theinput[1]
	nf = theinput[2]
	output = theinput[3]
	input = theinput[4]
	useprob = theinput[5]
	fold_start = theinput[6]
			
		
		
	param = svm.svm_parameter('-c %g -g %g -b %d' % (c,gamma,int(useprob)))
	
	prob = svm.svm_problem(output, input)
	target = (c_double * prob.l)()
	posclass = output[0]
	fold_start_p = (c_int *len(fold_start))()
	for i in xrange(len(fold_start)):
		fold_start_p[i] = fold_start[i]
	libsvm.svm_cross_validation_labeltargets(prob, fold_start_p,param, nf, target)

	acc = len([i for i in xrange(len(output)) if output[i] == target[i]])*1.0/prob.l
	del target
	del fold_start_p
	return acc
Beispiel #19
0
def n_gram_svm(class_size, take_size):
    cost = np.array([2.0, 2.0, 2.0, 2.0, 2.0])
    gamma = np.array([0.0078125, 0.0078125, 0.0078125, 0.0078125, 0.0078125])

    tst = time()

    preset = np.load(get_feature_file('small'))
    vocab_size = int(np.max(preset[:, :-class_size])) + 1
    np.random.shuffle(preset)

    train_set = preset[take_size:]
    train_txt = train_set[:, :-class_size].astype(np.int64)
    train_cls = train_set[:, -class_size:].astype(np.float32)

    train_dict = [{
        gram: 1 if gram in txt else 0
        for gram in np.arange(1, vocab_size)
    } for txt in train_txt]
    train_major: List[Set[int]] = [set() for _ in np.arange(class_size)]

    for i, cls in enumerate(train_cls):
        for k in np.nonzero(np.abs(cls - np.max(cls)) < 1e-4)[0]:
            train_major[k].add(i)

    models = []

    for k in np.arange(class_size):
        problem = svm_problem([
            1 if i in train_major[k] else -1 for i in np.arange(len(train_cls))
        ], train_dict)
        param = svm_parameter('-t 0 -c %f -g %f -b 1 -q' % (cost[k], gamma[k]))
        models.append(svm_train(problem, param))

    train_time = time() - tst
    tst = time()

    test_set = preset[:take_size]
    test_txt = test_set[:, :-5].astype(np.int64)
    test_cls = test_set[:, -5:].astype(np.float32)

    res = np.array([])
    test_dict = [{
        gram: 1 if gram in txt else 0
        for gram in np.arange(1, vocab_size)
    } for txt in test_txt]

    for dic, cls in zip(test_dict, test_cls):
        prob = np.zeros(class_size)

        for k in np.arange(class_size):
            _, _, p = svm_predict([], [dic], models[k], '-b 1 -q')
            prob[k] = p[0][0]

        prob /= np.sum(prob)
        res = np.append(
            res, cls @ prob / (np.linalg.norm(cls) * np.linalg.norm(prob)))

    test_time = time() - tst
    test_acc = np.mean(res)
    return train_time, test_time, test_acc
Beispiel #20
0
def do_one_cv_classify(theinput):
	c = theinput[0]
	gamma = theinput[1]
	nf = theinput[2]
	output = theinput[3]
	input = theinput[4]
	useprob = theinput[5]	
	perfmetric = theinput[6]

	param = svm.svm_parameter('-c %g -g %g -b %d' % (c,gamma,int(useprob)))

	prob = svm.svm_problem(output, input)
	target = (c_double * prob.l)()
	
	posclass = output[0]
	fold_start = (c_int *1)();
	fold_start[0] = -1;
	libsvm.svm_cross_validation(prob, fold_start, param, nf, target)
	ys = prob.y[:prob.l]
	db = array([[ys[i],target[i]] for i in range(prob.l)])
	
	del target
	
	neg = len([x for x in ys if x != posclass])
	pos = prob.l-neg
	
	
	
	[topacc,topphi,minfpfnratio,topf1,auc,optbias] = optimize_results(db,neg,pos,posval,perfmetric)
		
	return topacc,topphi,minfpfnratio,topf1,auc,optbias
Beispiel #21
0
def train_test():
	train_subdir = "data/train/"
	test_subdir = "data/test/"
	img_kinds = ["happy", "anger", "neutral", "surprise"]
	models = {}
	params = "-t 0 -c 3"
	svm_params = {	"happy": params,
					"anger": params,
					"neutral": params,
					"surprise": params}

	#train the models
	print 'BUILDING TRAIN MODELS'
	for img_kind in img_kinds:
		print "\t" + img_kind
		problem = build_problem(img_kind, train_subdir)
		param = svm.svm_parameter(svm_params[img_kind])
		models[img_kind] = svmutil.svm_train(problem, param)
	print '================================'

	#for each image in test set let's see what is the answe
	total_count = 0
	correct_count = 0
	wrong_count = 0

	print 'TESTING MODELS'
	for img_kind in img_kinds:
		images = glob.glob(test_subdir + "f_" + img_kind + "*.jpg")
		for image in images:
			print "\t" + image
			image_data = cv.LoadImage(image)
			
			# Let's see what are the results from the models
			results = {}
			for kind in img_kinds:
				test_data = get_image_features(image_data, True, kind)
				predict_input_data = []
				predict_input_data.append(test_data)

				# do svm query
				(val, val_2, label) = svmutil.svm_predict([1] ,predict_input_data, models[kind])
				results[kind] = label[0][0]
			
			sorted_results = sorted(results.iteritems(), key=operator.itemgetter(1))
			result = sorted_results[len(sorted_results)-1][0]

			total_count += 1
			if result == img_kind:
				print 'YES :' + result
				correct_count += 1
			else:
				print 'NO  :' + result
				print sorted_results
				wrong_count += 1
			print '-----------------------'
	print '================================'
	print "Total Pictures: " + str(total_count)
	print "Correct: " + str(correct_count)
	print "Wrong: " + str(wrong_count)
	print "Accuracy: " + str(correct_count/float(total_count) * 100)
Beispiel #22
0
class svm_classifier:
    """Support Vector Machine Classifier"""
    m_name = "svm"
    feature_scaling = True
    m_params = svm.svm_parameter()
    m_prob = None

    def convert_raw_training_data(self, raw_data):
        t_data = ""
        for line in raw_data.split('\n'):
            if line == "":
                break
            x = [l.strip() for l in line.split(':')]
            t_data += x[0] + " "
            i = 1
            for feature in x[1].split():
                t_data += str(i) + ":" + feature + " "
                i += 1
            t_data += "\n"
        return t_data

    def scale_features(self, variant_name, models_folder):
        command = "svm-scale -s %s %s > %s" % (models_folder + variant_name + ".range",\
                                              variant_name + ".t",\
                                              variant_name + ".t.scale")
        try:
            result = subprocess.check_output(command, shell=True)
        except subprocess.CalledProcessError as e:
            print_error("Scaling Command Failed")
            quit()

    def generate_model(self, variant_name, models_folder):
        training_file = variant_name + ".t"
        if self.feature_scaling:
            self.scale_features(variant_name, models_folder)
            training_file += ".scale"
        (y, x) = svm_read_problem(training_file)
        self.m_prob = svm.svm_problem(y, x,
                                      self.m_params.kernel_type == PRECOMPUTED)

        libsvm_path = os.environ['LIBSVM_PATH']
        scaled_filename = os.path.abspath(training_file)
        cp = "python grid.py " + scaled_filename
        curdir = os.getcwd()
        os.chdir(libsvm_path + "/tools/")
        result = call_process(cp)
        os.chdir(curdir)
        C, g, rate = [float(l) for l in result.split("\n")[-2].split(" ")]

        print "C: %.8f, gamma: %.8f\n" % (C, g)

        self.m_params.C = C
        self.m_params.gamma = g

        print "\n-----------------------------"
        model = svm.svm_train(self.m_prob, self.m_params)
        print "-----------------------------\n"

        svm_save_model(models_folder + variant_name + ".model", model)
Beispiel #23
0
    def _test_evaluation(self, allow_slow):
        """
        Test that the same predictions are made
        """
        from svm import svm_parameter, svm_problem
        from svmutil import svm_train, svm_predict

        # Generate some smallish (poly kernels take too long on anything else) random data
        x, y = [], []
        for _ in range(50):
            cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2)
            x.append([cur_x1, cur_x2])
            y.append(1 + 2 * cur_x1 + 3 * cur_x2)

        input_names = ["x1", "x2"]
        df = pd.DataFrame(x, columns=input_names)
        prob = svm_problem(y, x)

        # Parameters
        base_param = "-s 3"  # model type is epsilon SVR
        non_kernel_parameters = [
            "", "-c 1.5 -p 0.5 -h 1", "-c 0.5 -p 0.5 -h 0"
        ]
        kernel_parameters = [
            "",
            "-t 2 -g 1.2",  # rbf kernel
            "-t 0",  # linear kernel
            "-t 1",
            "-t 1 -d 2",
            "-t 1 -g 0.75",
            "-t 1 -d 0 -g 0.9 -r 2",  # poly kernel
            "-t 3",
            "-t 3 -g 1.3",
            "-t 3 -r 0.8",
            "-t 3 -r 0.8 -g 0.5",  # sigmoid kernel
        ]

        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                param_str = " ".join([base_param, param1, param2])
                print(param_str)
                param = svm_parameter(param_str)

                model = svm_train(prob, param)
                (df["prediction"], _, _) = svm_predict(y, x, model)

                spec = libsvm.convert(model,
                                      input_names=input_names,
                                      target_name="target")

                if _is_macos() and _macos_version() >= (10, 13):
                    metrics = evaluate_regressor(spec, df)
                    self.assertAlmostEqual(metrics["max_error"], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
Beispiel #24
0
def trainSVM(kernel, labels):
    #need to add an id number as the first column of the list
    svmKernel = column_stack((arange(1, len(kernel.tolist()) + 1), kernel))
    prob = svm_problem(labels.tolist(), svmKernel.tolist(), isKernel=True)
    param = svm_parameter('-t 4')   

    model = svm_train(prob, param)
    return model
def lib_svm(train_file, test_file, kernel):
    print("inside libsvm")
    features, labels = get_data_from_csv(train_file)
    print(features)

    training_data = svm_problem(labels, features)

    if (kernel == 'gaussian'):
        params = svm_parameter('-s 0 -t 2 -c 1 -g 0.05')
    else:
        params = svm_parameter('-s 0 -t 2 -c 1 -g 0.001275')

    model = svm_train(training_data, params)

    test_features, test_labels = get_data_from_csv(test_file)
    p_labels, p_acc, p_vals = svm_predict(test_labels, test_features, model)
    return p_labels, p_acc, p_vals
Beispiel #26
0
    def _test_evaluation(self, allow_slow):
        """
        Test that the same predictions are made
        """
        from svm import svm_parameter, svm_problem
        from svmutil import svm_train, svm_predict

        # Generate some smallish (poly kernels take too long on anything else) random data
        x, y = [], []
        for _ in range(50):
            cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2)
            x.append([cur_x1, cur_x2])
            y.append(1 + 2 * cur_x1 + 3 * cur_x2)

        input_names = ['x1', 'x2']
        df = pd.DataFrame(x, columns=input_names)
        prob = svm_problem(y, x)

        # Parameters
        base_param = '-s 3'  # model type is epsilon SVR
        non_kernel_parameters = [
            '', '-c 1.5 -p 0.5 -h 1', '-c 0.5 -p 0.5 -h 0'
        ]
        kernel_parameters = [
            '',
            '-t 2 -g 1.2',  # rbf kernel
            '-t 0',  # linear kernel
            '-t 1',
            '-t 1 -d 2',
            '-t 1 -g 0.75',
            '-t 1 -d 0 -g 0.9 -r 2',  # poly kernel
            '-t 3',
            '-t 3 -g 1.3',
            '-t 3 -r 0.8',
            '-t 3 -r 0.8 -g 0.5'  # sigmoid kernel
        ]

        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                param_str = ' '.join([base_param, param1, param2])
                print(param_str)
                param = svm_parameter(param_str)

                model = svm_train(prob, param)
                (df['prediction'], _, _) = svm_predict(y, x, model)

                spec = libsvm.convert(model,
                                      input_names=input_names,
                                      target_name='target')

                metrics = evaluate_regressor(spec, df)
                self.assertAlmostEquals(metrics['max_error'], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
Beispiel #27
0
 def Select_Linear_Model(self, C_min=-10, C_steps=11):
     #Search for the model parameters that give the smallest CV error
     C = self.__Linear_Search__(C_min, C_steps, 1)
     C = self.__Linear_Search__(np.log2(C)-2, 50, 0.08)
     C = self.__Linear_Search__(np.log2(C)-0.5, 50, 0.02)
     
     self.svm_params['C'] = C
     
     self.model = svm.svm_model(self.svm_problem, svm.svm_parameter(**self.svm_params))
 def train(self, dataset):
     """
     Trains the svm classifier. Converts words to real numbers for training
     as SVM expects only numbers.
     """
     super(SvmLearner, self).train(dataset)
     prob  = svm.svm_problem(self.results, self.observations)
     param = svm.svm_parameter(kernel_type=svm.LINEAR, C=10, probability=1)
     self.model = svm.svm_model(prob, param)
Beispiel #29
0
    def train(self,trainset):
        """
        Trains the SVM.
        """

        self.n_classes = len(trainset.metadata['targets'])

        # Set LIBSVM parameters
        kernel_types = {'linear':libsvm.LINEAR,'polynomial':libsvm.POLY,
                        'rbf':libsvm.RBF,'sigmoid':libsvm.SIGMOID}
        if self.kernel not in kernel_types:
            raise ValueError('Invalid kernel: '+self.kernel+'. Should be either \'linear\', \'polynomial\', \'rbf\' or \'sigmoid\'')

        if self.label_weights != None:
            class_to_id = trainset.metadata['class_to_id']
            nr_weight = self.n_classes
            weight_label = range(self.n_classes)
            weight = [1]*self.n_classes
            for k,v in self.label_weights.iteritems():
                weight[class_to_id[k]] = v
        else:
            nr_weight = 0
            weight_label = []
            weight = []

        libsvm_params = libsvm.svm_parameter(svm_type = libsvm.C_SVC,
                                             kernel_type = kernel_types[self.kernel],
                                             degree=self.degree,
                                             gamma=self.gamma,
                                             coef0=self.coef0,
                                             C=self.C,
                                             probability=int(self.output_probabilities),
                                             cache_size=self.cache_size,
                                             eps=self.tolerance,
                                             shrinking=int(self.shrinking),
                                             nr_weight = nr_weight,
                                             weight_label = weight_label,
                                             weight = weight)
        

        # Put training set in the appropriate format:
        #  if is sparse (i.e. a pair), inputs are converted to dictionaries
        #  if not, inputs are assumed to be sequences and are kept intact
        libsvm_inputs = []
        libsvm_targets = []
        for input,target in trainset:
            if type(input) == tuple:
                libsvm_inputs += [dict(zip(input[1],input[0]))]
            else:
                libsvm_inputs += [input]
            libsvm_targets += [float(target)] # LIBSVM requires double-valued targets

        libsvm_problem = libsvm.svm_problem(libsvm_targets,libsvm_inputs)

        # Train SVM
        self.svm = libsvm.svm_model(libsvm_problem,libsvm_params)
Beispiel #30
0
    def iterGridSearchSVM(self,
                          c_info=None,
                          g_info=None,
                          fold=5,
                          probability=False,
                          compensation=True):
        swap = lambda a, b: (b, a)
        if not c_info is None and len(c_info) >= 3:
            c_begin, c_end, c_step = c_info[:3]
        else:
            c_begin, c_end, c_step = -5, 15, 2
        if c_end < c_begin:
            c_begin, c_end = swap(c_begin, c_end)
        c_step = abs(c_step)

        if not g_info is None and len(g_info) >= 3:
            g_begin, g_end, g_step = g_info[:3]
        else:
            g_begin, g_end, g_step = -15, 3, 2
        if g_end < g_begin:
            g_begin, g_end = swap(g_begin, g_end)
        g_step = abs(g_step)

        labels, samples = self.getData(normalize=True)
        problem = svm.svm_problem(labels, samples)

        if compensation:
            weight, weight_label = self._calculateCompensation(labels)

        n = (c_end - c_begin) / c_step + 1
        n *= (g_end - g_begin) / g_step + 1

        l2c = c_begin
        while l2c <= c_end:
            l2g = g_begin
            while l2g <= g_end:

                param = svm.svm_parameter(kernel_type=svm.RBF,
                                          C=2.**l2c,
                                          gamma=2.**l2g,
                                          probability=1 if probability else 0)
                if compensation:
                    param.weight = weight
                    param.weight_label = weight_label
                    param.nr_weight = len(weight)

                predictions = svm.cross_validation(problem, param, fold)
                predictions = map(int, predictions)

                conf = ConfusionMatrix.from_lists(labels, predictions,
                                                  self.class_names.keys())
                yield n, l2c, l2g, conf

                l2g += g_step
            l2c += c_step
Beispiel #31
0
 def Select_Model(self, C_min=-10, C_steps=11,  gamma_min=-15, gamma_steps=16):
     #Search for the model parameters that give the smallest CV error
     (C, gamma) = self.__Search__(C_min, C_steps, gamma_min, gamma_steps, 1, 1)
     #(C, gamma) = self.__Search__(np.log2(C)-5, 100, np.log2(gamma)-5, 100, 0.1, 0.1)
     (C, gamma) = self.__Search__(np.log2(C)-1, 100, np.log2(gamma)-1, 100, 0.02, 0.02)
     #(C, gamma) = self.__Search__(np.log2(C)-0.5, 100, np.log2(gamma)-0.5, 100, 0.01, 0.01)
     
     self.svm_params['C'] = C
     self.svm_params['gamma'] = gamma
     
     self.model = svm.svm_model(self.svm_problem, svm.svm_parameter(**self.svm_params))
Beispiel #32
0
 def search(self):
     """ iterate successive parameter grid refinement and evaluation; adapted from LIBSVM grid search tool """
     jobs = self.calculate_jobs()
     scores = []
     for line in jobs:
         for (c, g) in line:
             # run cross-validation for this point
             self.setParams(C=2 ** c, gamma=2 ** g)
             param = svm_parameter(**self.params)
             cvresult = array(cross_validation(self.problem, param, self.crossval))
             corr, = where(cvresult == self.targets)
             res = (c, g, float(corr.size) / self.targets.size)                
             scores.append(res)
             self._save_points(res)
         self._redraw(scores)
     scores = array(scores)
     best = scores[scores[:, 0].argmax(), 1:]
     self.setParams(C=2 ** best[0], gamma=2 ** best[1])
     logging.info("best log2C=%12.7g, log2g=%11.7g " % (best[0], best[1]))
     param = svm_parameter(**self.params)
     return param
Beispiel #33
0
 def go_train(self):
     #输入训练参数
     parameter = self.param.toPlainText()
     #读取新数据
     y, x = svm_read_problem("./1_scale.txt")
     prob = svm_problem(y, x)
     #传递训练参数
     param = svm.svm_parameter(parameter)
     #训练、生成模型并保存
     model = svm_train(prob, param)
     svm_save_model("./mode.txt", model)
     self.label_5.setText("训练完成,创建分类器完成。")
Beispiel #34
0
    def testMultiClass(self, level=1):
        """ Multiclass classification test with BinarySVM Node.
        """
        params = svm.svm_parameter(kernel_type=svm.RBF, C=10)
        node = BinarySVMNode(2, 4, params)
        node.train(self.mc_samples, self.mc_labels)
        node.stop_training()

        testresult = node(self.mc_samples)

        # test if labels are the same as the test output
        assert_array_almost_equal(self.mc_labels, testresult, 2)
Beispiel #35
0
	def trainmodel(self,train,cv,test,modelsavepath):
		y,x = svmutil.svm_read_problem(train)#读入训练数据
		# ycv,xcv = svm_read_problem(cv)#读入验证集
		# ytest,xtest=svm_read_problem(test)#读入测试集
		prob  = svm.svm_problem(y, x)
		param = svm.svm_parameter('-t 2 -c 0.5 -g 0.125 -b 1')		
		model = svmutil.svm_train(prob, param)				
		yt,xt = svmutil.svm_read_problem(train)#???????????
		p_labs, p_acc, p_vals = svmutil.svm_predict(yt, xt, model,'-b 1')
		svmutil.svm_save_model(modelsavepath, model)#save model
		# model = svmutil.svm_load_model('model_file')#读取model
		pass
Beispiel #36
0
 def search(self):
     """ iterate successive parameter grid refinement and evaluation; adapted from LIBSVM grid search tool """
     jobs = self.calculate_jobs()
     scores = []
     for line in jobs:
         for (c, g) in line:
             # run cross-validation for this point
             self.setParams(C=2 ** c, gamma=2 ** g)
             param = svm_parameter(**self.params)
             cvresult = array(cross_validation(self.problem, param, self.crossval))
             corr, = where(cvresult == self.targets)
             res = (c, g, float(corr.size) / self.targets.size)
             scores.append(res)
             self._save_points(res)
         self._redraw(scores)
     scores = array(scores)
     best = scores[scores[:, 0].argmax(), 1:]
     self.setParams(C=2 ** best[0], gamma=2 ** best[1])
     logging.info("best log2C=%12.7g, log2g=%11.7g " % (best[0], best[1]))
     param = svm_parameter(**self.params)
     return param
Beispiel #37
0
    def iterGridSearchSVM(self, c_info=None, g_info=None, fold=5,
                          probability=False, compensation=True):
        swap = lambda a,b: (b,a)
        if not c_info is None and len(c_info) >= 3:
            c_begin, c_end, c_step = c_info[:3]
        else:
            c_begin, c_end, c_step = -5,  15, 2
        if c_end < c_begin:
            c_begin, c_end = swap(c_begin, c_end)
        c_step = abs(c_step)

        if not g_info is None and len(g_info) >= 3:
            g_begin, g_end, g_step = g_info[:3]
        else:
            g_begin, g_end, g_step = -15, 3, 2
        if g_end < g_begin:
            g_begin, g_end = swap(g_begin, g_end)
        g_step = abs(g_step)

        labels, samples = self.getData(normalize=True)
        #print len(labels), len(samples)
        problem = svm.svm_problem(labels, samples)

        if compensation:
            weight, weight_label = self._calculateCompensation(labels)

        n = (c_end - c_begin) / c_step + 1
        n *= (g_end - g_begin) / g_step + 1

        l2c = c_begin
        while l2c <= c_end:
            l2g = g_begin
            while l2g <= g_end:

                param = svm.svm_parameter(kernel_type=svm.RBF,
                                          C=2.**l2c, gamma=2.**l2g,
                                          probability=1 if probability else 0)
                if compensation:
                    param.weight = weight
                    param.weight_label = weight_label
                    param.nr_weight = len(weight)

                predictions = svm.cross_validation(problem, param, fold)
                predictions = map(int, predictions)

                #print n,c,g
                conf = ConfusionMatrix.from_lists(labels, predictions,
                                                  self.l2nl)
                yield n,l2c,l2g,conf

                l2g += g_step
            l2c += c_step
Beispiel #38
0
def example_make_model(img_kind, svm_params):
	subdir = "data/"
	problem = build_problem(img_kind)
	print "Prob built"

	param = svm.svm_parameter(svm_params)
	print "Params Set"

	problem_model = svmutil.svm_train(problem, param)
	print "Model built"

	svmutil.svm_save_model(subdir + img_kind + '.model', problem_model)
	print "Done"
Beispiel #39
0
 def learnModel(self, train_y, train_X):
     # scale train data
     svmScaler = preprocessing.MinMaxScaler(feature_range = (-1, 1))
     train_X_scaledArr = svmScaler.fit_transform(train_X)
     
     # learn and save svm model
     X = train_X_scaledArr.tolist()   
     problem = svm_problem(train_y, X)
     paramStr = '-c ' + str(self._param_c) + ' -g ' + str(self._param_g) + ' -q'
     param = svm_parameter(paramStr)
     
     self._model = svm_train(problem, param)
     self._scaler = svmScaler
Beispiel #40
0
    def __init__(self):
        super(LIBSVMRunner, self).__init__()
        self.c_range = -5, 15, 2
        self.gamma_range = 3, -15, -2
        self.kernel_type = 'LINEAR'

        self.svmparam = svm.svm_parameter('-b 1')
        self._optparser.add_option('-k', '--kernel', dest='kernel_type')

        self.cvfunc = svmfun.leave_one_out
        self.n_cv = None

        pass
Beispiel #41
0
def do_one_cv_classify_predeffolds_valid(theinput):
	c = theinput[0]
	gamma = theinput[1]
	nf = theinput[2]
	output = theinput[3]
	input = theinput[4]
	output_valid = theinput[5]
	input_valid = theinput[6]
	useprob = theinput[7]
	fold_start = theinput[8]
	fold_start_valid = theinput[9]
	perfmetric = theinput[10]
	
	param = svm.svm_parameter('-c %g -g %g -b %d' % (c,gamma,int(useprob)))

	prob = svm.svm_problem(output, input)
	fold_start_p = (c_int *len(fold_start))()
	for i in xrange(len(fold_start)):
		fold_start_p[i] = fold_start[i]
		
	prob_valid = svm.svm_problem(output_valid, input_valid)
	fold_start_p_valid = (c_int *len(fold_start_valid))()
	for i in xrange(len(fold_start_valid)):
		fold_start_p_valid[i] = fold_start_valid[i]


	target = (c_double * prob_valid.l)()
	posclass = output[0]
	
#	print prob
	libsvm.svm_cross_validation_sepsets(prob, prob_valid,fold_start_p, fold_start_p_valid,param, nf, target)

	
	ys = prob.y[:prob_valid.l]
	db = array([[ys[i],target[i]] for i in range(prob_valid.l)])
#	print db
	del target
	del fold_start_p
	del fold_start_p_valid
	
	neg = len([x for x in ys if x != posclass])
#	print neg
	pos = prob_valid.l-neg
#	print pos
		
#	print fb,neg,pos,posclass,perfmetric
	
	[topacc,topphi,minfpfnratio,topf1,auc,optbias] = optimize_results(db,neg,pos,posclass,perfmetric)
		
	return topacc,topphi,minfpfnratio,topf1,auc,optbias
Beispiel #42
0
    def testSingleClass(self, level=1):
        """ Single class classification test with BinarySVMNode.
        """
        params = svm.svm_parameter(kernel_type=svm.RBF, C=10)
        node = BinarySVMNode(2, 1, params)
        node.train(self.sc_samples, self.sc_labels)
        node.stop_training()

        testresult = node(self.sc_samples)

        # rescale from SVM output [-1,1] to [0,1]
        testresult = (testresult + 1) / 2.

        # test if labels are the same as the test output
        assert_array_almost_equal(self.sc_labels, testresult, 2)
Beispiel #43
0
def test(word, documents):
    import svm, random
    docs = [d.copy() for d in documents if d[reverse_map[word]]]
    nondocs = [d.copy() for d in documents if not d[reverse_map[word]]]
    nondocs = random.sample(nondocs, min(5 * len(docs), len(nondocs)))
    print float(len(nondocs)) / (len(docs) + len(nondocs))
    cats = [1 for i in docs] + [0 for i in nondocs]
    obs = docs + nondocs
    for i in xrange(len(obs)):
        obs[i][reverse_map[word]] = 0.
    zobs = zip(obs, cats)
    random.shuffle(zobs)
    obs, cats = zip(*zobs)
    params = svm.svm_parameter(C=1, kernel_type=svm.LINEAR)
    problem = svm.svm_problem(cats, obs)
    target = svm.cross_validation(problem, params, 20)
    return sum(target[i] == cats[i] for i in cats) / float(len(cats))
Beispiel #44
0
def test(word, documents):
    import svm,random
    docs = [d.copy() for d in documents if d[reverse_map[word]]]
    nondocs = [d.copy() for d in documents if not d[reverse_map[word]]]
    nondocs = random.sample(nondocs,min(5*len(docs),len(nondocs)))
    print float(len(nondocs))/(len(docs)+len(nondocs))
    cats = [1 for i in docs] + [0 for i in nondocs]
    obs = docs + nondocs
    for i in xrange(len(obs)):
        obs[i][reverse_map[word]] = 0.
    zobs = zip(obs,cats)
    random.shuffle(zobs)
    obs,cats = zip(*zobs)
    params = svm.svm_parameter(C=1, kernel_type=svm.LINEAR)
    problem = svm.svm_problem(cats,obs)
    target = svm.cross_validation(problem,params,20)
    return sum(target[i] == cats[i] for i in cats)/float(len(cats))
Beispiel #45
0
    def __Linear_Search__(self, C_min, C_steps, C_step_by=1.):
        #Utility function used by Parameter_Search() to find the best parameters
        param_grid = np.array( [ C for C in 2**(np.arange(C_steps, dtype=float)*C_step_by+C_min) ] )
        error_grid = np.zeros( len(param_grid) )
        
        
        for i in range( len(param_grid) ):
            self.svm_params['C'] = float( param_grid[i] )
            
            CV_predictions = svm.cross_validation(self.svm_problem, svm.svm_parameter(**self.svm_params), self.folds)
            
            error = sum(abs(CV_predictions-self.training_labels))/len(self.training_labels)
            error_grid[i] = error

        best = mlab.find(error_grid == error_grid.flatten().min())

        C = param_grid[best][0]

        return C
def do_one_cv(theinput):
	nu = theinput[0]
	c = theinput[1]
	gamma = theinput[2]
	nf = theinput[3]
	output = theinput[4]
	input = theinput[5]
	bins = theinput[6]
	
	param = svm.svm_parameter('-s %d -t %d -n %g -c %g -g %g' % (svm.NU_SVR,svm.RBF,nu,c,gamma))

	prob = svm.svm_problem(output, input)
	target = (c_double * prob.l)()
	fold_start = (c_int *1)();
	fold_start[0] = -1;
	
	libsvm.svm_cross_validation_labeltargets(prob, fold_start,param, nf, target)	
	MSE,SCC = evaluations(prob.y[:prob.l],target[:prob.l],bins)
	del target
	return MSE,SCC
Beispiel #47
0
    def train(self, search=False, **kwargs):
        """ Train the SVM on the dataset. For RBF kernels (the default), an optional meta-parameter search can be performed.

        :key search: optional name of grid search class to use for RBF kernels: 'GridSearch' or 'GridSearchDOE' 
        :key log2g: base 2 log of the RBF width parameter
        :key log2C: base 2 log of the slack parameter
        :key searchlog: filename into which to dump the search log
        :key others: ...are passed through to the grid search and/or libsvm 
        """
        
        self.setParams(**kwargs)
        problem = svm_problem(self.ds['target'].flatten(), self.ds['input'].tolist())
        if search:
            # this is a bit of a hack...
            model = eval(search + "(problem, self.svmtarget, cmin=[0,-7],cmax=[25,1], cstep=[0.5,0.2],plotflag=self.plot,searchlog=self.searchlog,**self.params)")
        else:
            param = svm_parameter(**self.params)
            model = svm_model(problem, param)
            logging.info("Training completed with parameters:")
            logging.info(repr(param))

        self.svm.setModel(model)
Beispiel #48
0
def bench_svm(X, Y, T):
    """
    bench with swig-generated wrappers that come with libsvm
    """

    import svm

    X1 = X.tolist()
    Y1 = Y.tolist()
    T1 = T.tolist()

    gc.collect()

    # start time
    tstart = datetime.now()
    problem = svm.svm_problem(Y1, X1)
    param = svm.svm_parameter(svm_type=0, kernel_type=0)
    model = svm.svm_model(problem, param)
    for i in T.tolist():
        model.predict(i)
    delta = (datetime.now() - tstart)
    # stop time
    svm_results.append(delta.seconds + delta.microseconds/mu_second)
Beispiel #49
0
    def __Search__(self, C_min, C_steps,  gamma_min, gamma_steps, C_step_by=1., gamma_step_by=1.):
        #Utility function used by Parameter_Search() to find the best parameters
        param_grid = np.array( [[ (C,gamma) for C in 2**(np.arange(C_steps, dtype=float)*C_step_by+C_min)] for gamma in 2**(np.arange(gamma_steps, dtype=float)*gamma_step_by+gamma_min)] )
        error_grid = np.zeros( shape=param_grid.shape[0:2] )
        
        
        for row in range( param_grid.shape[0] ):
            for col in range( param_grid.shape[1] ):
                self.svm_params['C'] = float( param_grid[row,col,0] )
                self.svm_params['gamma'] = float( param_grid[row,col,1] )
                
                CV_predictions = svm.cross_validation(self.svm_problem, svm.svm_parameter(**self.svm_params), self.folds)
                
                error = sum(abs(CV_predictions-self.training_labels))/len(self.training_labels)
                error_grid[row,col] = error

        best = mlab.find(error_grid == error_grid.flatten().min())
        row = best // C_steps
        col = best % C_steps

        (C, gamma) = param_grid[row, col][0].flatten()

        return (C, gamma)
Beispiel #50
0
 def train(self, examples, parameters=None):
     self.isBinary = self.isBinaryProblem(examples)
     examples = self.filterTrainingSet(examples)
     ExampleUtils.writeExamples(examples, self.tempDir+"/train.dat")
     #prepare parameters:
     if parameters.has_key("c"):
         assert(not parameters.has_key("C"))
         parameters["C"] = parameters["c"]
         del parameters["c"]
     totalExamples = float(sum(self.classes.values()))
     weight_label = self.classes.keys()
     weight_label.sort()
     weight = []
     for k in weight_label:
         weight.append(1.0-self.classes[k]/totalExamples)
     libSVMparam = svm.svm_parameter(nr_weight = len(self.classes), weight_label=weight_label, weight=weight, **parameters)
     labels = []
     samples = []
     for example in examples:
         labels.append(example[1])
         samples.append(example[2])
     problem = svm.svm_problem(labels, samples)
     self.model = svm.svm_model(problem, libSVMparam)
Beispiel #51
0
def train(request):
    
    points = models.Point2d.objects.all()
    
    # Storing the information to be presented to SVM
    labels = []
    inputs = []
    
    # For each point, store the information into arrays
    for p in points:
        labels.append( p.label )
        inputs.append([p.x, p.y])
    
    prob = svm.svm_problem(labels, inputs)
    param = svm.svm_parameter('-t 2 -c 100')
    model = svmutil.svm_train(prob, param)
    
    try:
        svmutil.svm_save_model('libsvm.model', model)
    except Exception as e:
        print "error: ", e, "\n"
    
    data = {"status": "trained"}
    return json(data)
	for i, fileName in enumerate( ['./dataset2/avon.csv', './dataset2/brian_merge.csv', './dataset2/mon_merge.csv', './dataset2/nofar_merge.csv'] ):
		tmp = readDataset(fileName) # array of Instance
		dataSet = dataSet + tmp
		print 'size:', len(tmp)
		label = label + [i]*len(tmp)
		dataSet, label = shuffle(dataSet, label, random_state=0)
	cutIndex = int(TRAIN_SET_RATIO*len(dataSet))
	## use accel_abs and alpha_abs as input for encoding respectively
	print 'learning dictionary'
	data_accel = [I.accel_abs() for I in dataSet]
	data_alpha = [I.alpha_abs() for I in dataSet]
	RPDictionary_accel = Dictionary(PATCH_SIZE, data_accel[:cutIndex])
	RPDictionary_alpha = Dictionary(PATCH_SIZE, data_alpha[:cutIndex])
	aggregate_feature = [ f[0]+f[1] for f in zip( RPDictionary_accel.encoding(data_accel), RPDictionary_alpha.encoding(data_alpha) ) ]
	#aggregate_feature = preprocessing.scale(aggregate_feature) ## scale columns independently to have zero mean and unit variance

	writeFeature('./svm_train', aggregate_feature[:cutIndex], label[:cutIndex]) 
	writeFeature('./svm_test', aggregate_feature[cutIndex:], label[cutIndex:]) 

	## SVM training
	X_train, Y_train = readFeature('./svm_train',PATCH_SIZE*2)
	prob = svm_problem(Y_train, X_train)
	param = svm_parameter('-t 1 -q -d 2')
	model = svm_train(prob, param)

	## SVM predicting
	X_test, Y_test = readFeature('./svm_test',PATCH_SIZE*2)
	p_labels, p_acc, p_vals = svm_predict(Y_test, X_test, model)
	print p_acc	
	print confusion_matrix(Y_test, p_labels)
	'meeting':2,
	'selfStudy':3,
}

if __name__=='__main__':
	argparser = argparse.ArgumentParser()
	argparser.add_argument('topicFilePath', type=str, help='hdp_topic file')
	argparser.add_argument('labelFilePath', type=str, help='hdp_topic file')
	args = argparser.parse_args()
	args = vars(args)

	# with open(args.filePath, 'r') as fr:
	# for SVM (LDA) input format	
	#topicNum, X_train = svm_read_problem(args['topicFilePath'])
	with open(args['topicFilePath'],'r') as fr:
		X_train = fr.readlines()
		X_train = [[float(num) for num in e.split()] for e in X_train]
	X_train = X_train[1:] # abandon the first instance
	#print X_train
	Y_train = []
	with open(args['labelFilePath'], 'r') as fw:
		for line in fw:
			line = line.rstrip().split(';')
			Y_train.append(label[ line[1] ])
	#print Y_train
	prob = svm_problem(Y_train, X_train)
	param = svm_parameter('-v 5 -q')
	model = svm_train(prob, param)
	#print model
	#svm_save_model('{}/svm_model'.format(args['outputDir']),model)
Beispiel #54
0
    def train_SVR_Linear(self,labels,vectors,verbose, C_range, callback=None):
        '''Private use only'''
        # combine the labels and vectors into one set.
        data = []
        for i in range(len(labels)):
            data.append([labels[i],vectors[i]])
            
        #shuffle the data
        rng = random.Random()
        if self.random_seed != None:
            rng.seed(self.random_seed)
        rng.shuffle(data)
                
        # partition into validation and training
        if type(self.validation_size) == float and self.validation_size > 0.0 and self.validation_size < 1.0:
            training_cutoff = int(len(data)*(1.0-self.validation_size))
        elif type(self.validation_size) == int and self.validation_size < len(labels):
            training_cutoff = len(labels)-self.validation_size
        else:
            raise NotImplementedError("Cannot determine validation set from %s"%self.validation_size)
            
        if verbose: print "Training Cutoff:",len(labels),training_cutoff
        training_data = data[:training_cutoff]
        validation_data = data[training_cutoff:]
        
        tmp_labels = []
        tmp_vectors = []
        for each in training_data:
            tmp_labels.append(each[0])
            tmp_vectors.append(each[1])
        
        prob = svm.svm_problem(tmp_labels,tmp_vectors)
        
        training_info = []
        training_svm = []
        training_table = Table()
        self.training_table = training_table
        i=0
        for C in C_range:
                
            param = svm.svm_parameter(svm_type=self.svm_type,kernel_type = svm.LINEAR, C = C, p=self.epsilon,nu=self.nu)
                
            test_svm = svm.svm_model(prob, param)
                
            mse = 0.0
            total = len(validation_data)
            for label,vector in validation_data:
                pred = test_svm.predict(vector)
                error = label - pred
                mse += error*error
            mse = mse/total
 
            training_svm.append(test_svm)
            training_info.append([C,mse])
            training_table.setElement(i,'C',C)
            training_table.setElement(i,'mse',mse)
            i+=1

            if callback != None:
                callback(int(100*float(i)/len(C_range)))
                
        if verbose: print 
        if verbose: print "------------------------------"
        if verbose: print " Tuning Information:"
        if verbose: print "         C   error"
        if verbose: print "------------------------------"
        best = training_info[0]
        best_svm = training_svm[0]
        for i in range(len(training_info)):
            each = training_info[i]
            if verbose: print " %8.3e  %0.8f"%(each[0],each[1])
            if best[-1] > each[-1]:
                best = each
                best_svm = training_svm[i]
        if verbose: print "------------------------------"
        if verbose: print 
        if verbose: print "------------------------------"
        if verbose: print " Best Tuning:"
        if verbose: print "         C   error"
        if verbose: print "------------------------------"
        if verbose: print " %8.3e  %0.8f"%(best[0],best[1])
        if verbose: print "------------------------------"
        if verbose: print
        self.training_info = training_info
        self.C     = best[0]
        self.error = best[1]

        self.svm = best_svm