Esempio n. 1
0
def getOptCG(labels, values):
    # Format data for subsequent methods

    # Set up cross-validation settings
    param = Param()    
    param.cset = range(-5, 15, 2)
    param.gset = range(3, -15, -2)
    param.nfold = 10
    prob = svm_problem(labels, values)
    rVal = [[0 for col in range(len(param.cset))] for row in range(len(param.gset))];

    # Cross-validation to get optimal parameters
    for i in range(len(param.gset)):
        param.g = 2 ** param.gset[i]

        for j in range(len(param.cset)):
            param.c = 2 ** param.cset[j]
            testParam = svm_parameter(param.libsvm)
                
            # Train on learning data with x-validation and store result
            rVal[i][j] = svm_train(prob, param.libsvm + " -v " + str(param.nfold))


    # Select the parameters with highest accuracy
    min_val, loc = getMax(rVal)
    g = 2 ** param.gset[loc[0]]
    c = 2 ** param.cset[loc[1]]

    return c, g
Esempio n. 2
0
def svmtrain(labels, values=None, c=None, g=None):

    # If Dictionary
    if isinstance(labels, dict):        
        values = [j for i in labels.itervalues() for j in i.itervalues()]
        labels = [i + 1 for i in range(len(labels.values())) for j in range(len(labels[labels.keys()[i]]))]

    if values != None:
        
        optParam = Param()
        optParam.c = c
        optParam.g = g
        if c == None or g == None:
            # Retrieve optimal c and g
            optParam.c, optParam.g = getOptCG(labels, values)    
    
        # Train model with optimal c and g
        prob = svm_problem(labels, values)
        m = svm_train(prob, optParam.libsvm)
    
        # Return model
        return m
    else:
        raise TypeError("Values not provided for the arguments")
Esempio n. 3
0
def xTrain(labels, values=None, k=10, rand=True):
    # Split data into k partitions
    partitions = split(labels, values, k, rand)
    best_model = None
    best_acc = -1
    avg_acc = 0
    count = 0
    

    if isinstance(labels, dict):
        values = [j for i in labels.itervalues() for j in i.itervalues()]
        labels = [i + 1 for i in range(len(labels.values())) for j in range(len(labels[labels.keys()[i]]))]

    if values != None:
        optParam = Param()
        print "Searching for optimal parameters..."
        optParam.c, optParam.g = getOptCG(labels, values)
        print "c: " + str(optParam.c) + " g: " + str(optParam.g)
        print " "

        # For each partition, train a model and check the accuracy of the partition's test data against
        # the model. The highest accuracy model will be the model returned. The accuracy returned is the
        # average accuracy of all the partitions
        for i in partitions.iter:
            print "Training iteration " + str(count + 1) + ".."
            # Train the model using the partition training data
            model = svmtrain(i.train.labels, i.train.values, optParam.c, optParam.g)
            # Get a list of predictions for the testing data
            pred = svmtest(i.test.values, model)
            # Find the accuracy of the test data predicted by the model
            acc, x1, x2 = evaluations(i.test.labels, pred)

            # Store the model with the best accuracy
            if acc > best_acc:
                best_acc = acc
                best_model = model

            print "Iteration " + str(count + 1) + " accuracy: " + str(acc)
            avg_acc += acc
            count += 1

        # Get the avg accuracy
        avg_acc /= count

        print " "
        print "xTrain completed."
        return model, avg_acc