def getOptCG(labels, values): # Format data for subsequent methods # Set up cross-validation settings param = Param() param.cset = range(-5, 15, 2) param.gset = range(3, -15, -2) param.nfold = 10 prob = svm_problem(labels, values) rVal = [[0 for col in range(len(param.cset))] for row in range(len(param.gset))]; # Cross-validation to get optimal parameters for i in range(len(param.gset)): param.g = 2 ** param.gset[i] for j in range(len(param.cset)): param.c = 2 ** param.cset[j] testParam = svm_parameter(param.libsvm) # Train on learning data with x-validation and store result rVal[i][j] = svm_train(prob, param.libsvm + " -v " + str(param.nfold)) # Select the parameters with highest accuracy min_val, loc = getMax(rVal) g = 2 ** param.gset[loc[0]] c = 2 ** param.cset[loc[1]] return c, g
def svmtrain(labels, values=None, c=None, g=None): # If Dictionary if isinstance(labels, dict): values = [j for i in labels.itervalues() for j in i.itervalues()] labels = [i + 1 for i in range(len(labels.values())) for j in range(len(labels[labels.keys()[i]]))] if values != None: optParam = Param() optParam.c = c optParam.g = g if c == None or g == None: # Retrieve optimal c and g optParam.c, optParam.g = getOptCG(labels, values) # Train model with optimal c and g prob = svm_problem(labels, values) m = svm_train(prob, optParam.libsvm) # Return model return m else: raise TypeError("Values not provided for the arguments")
def xTrain(labels, values=None, k=10, rand=True): # Split data into k partitions partitions = split(labels, values, k, rand) best_model = None best_acc = -1 avg_acc = 0 count = 0 if isinstance(labels, dict): values = [j for i in labels.itervalues() for j in i.itervalues()] labels = [i + 1 for i in range(len(labels.values())) for j in range(len(labels[labels.keys()[i]]))] if values != None: optParam = Param() print "Searching for optimal parameters..." optParam.c, optParam.g = getOptCG(labels, values) print "c: " + str(optParam.c) + " g: " + str(optParam.g) print " " # For each partition, train a model and check the accuracy of the partition's test data against # the model. The highest accuracy model will be the model returned. The accuracy returned is the # average accuracy of all the partitions for i in partitions.iter: print "Training iteration " + str(count + 1) + ".." # Train the model using the partition training data model = svmtrain(i.train.labels, i.train.values, optParam.c, optParam.g) # Get a list of predictions for the testing data pred = svmtest(i.test.values, model) # Find the accuracy of the test data predicted by the model acc, x1, x2 = evaluations(i.test.labels, pred) # Store the model with the best accuracy if acc > best_acc: best_acc = acc best_model = model print "Iteration " + str(count + 1) + " accuracy: " + str(acc) avg_acc += acc count += 1 # Get the avg accuracy avg_acc /= count print " " print "xTrain completed." return model, avg_acc