def psi(x, y, sm, sparm): """Return a feature vector representing pattern x and label y. This is the combined feature function, which this returns either a svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful during kernel evaluations, as all components undergo kernel evaluation separately). There is no default behavior.""" # In the case of binary classification, psi is just the class (+1 # or -1) times the feature vector for x, including that special # constant bias feature we pretend that we have. # 1^T.y is subgraph size # x*y/1^t*y is the average feature value with the subgraph # x*1^T/n is the average feature value in the whole graph # is the average feature value outside the subgraph # the last two are the nonlinear transformations of the average feature value within the subgraph import svmapi import numpy as np SubGraph_size = svmapi.Sparse(sum(y), kernel_id=1) avg_SubGraph = svmapi.Sparse(np.dot(x, y), kernel_id=2) avg_WholeGraph = svmapi.Sparse(sum(x) / len(x), kernel_id=3) avg_OutSubgraph = svmapi.Sparse(np.dot(x, np.subtract(1, y)) / sum(np.subtract(1, y)), kernel_id=4) avg_Quad_SubGraph = svmapi.Sparse(np.dot(x, y) * np.dot(x, y), kernel_id=5) avg_Log_SubGraph = svmapi.Sparse(np.log(np.dot(x, y)), kernel_id=6) pri = svmapi.Document([ SubGraph_size, avg_SubGraph, avg_WholeGraph, avg_OutSubgraph, avg_Quad_SubGraph, avg_Log_SubGraph ]) print "psi called..........................." print psi return
def psi(x, y, sm, sparm): """Returns the combined feature vector Psi(x,y).""" # Just increment the feature index to the appropriate stack position. #vecness = [(k,v) for k,v in x] pvec = svmapi.Sparse(x, kernel_id=y) #print list(sm.w) #print pveca #import pdb; pdb.set_trace() return svmapi.Document([pvec])
def init_constraints(sample, sm, sparm): if True: c, d = svmapi.Sparse, svmapi.Document return [(d([c([(1, 1)])], slackid=len(sample) + 1), 1), (d([c([0, 0, 0, 1])], slackid=len(sample) + 1), .2)] constraints = [] for i in xrange(sm.size_psi): sparse = svmapi.Sparse([(i, 1)]) lhs = svmapi.Document([sparse], costfactor=1, slackid=i + 1 + len(sample)) constraints.append((lhs, 0)) return constraints
def init_constraints(sample, sm, sparm): """Initializes special constraints. Returns a sequence of initial constraints. Each constraint in the returned sequence is itself a sequence with two items (the intention is to be a tuple). The first item of the tuple is a document object. The second item is a number, indicating that the inner product of the feature vector of the document object with the linear weights must be greater than or equal to the number (or, in the nonlinear case, the evaluation of the kernel on the feature vector with the current model must be greater). This initializes the optimization problem by allowing the introduction of special constraints. Typically no special constraints are necessary. A typical constraint may be to ensure that all feature weights are positive. Note that the slack id must be set. The slack IDs 1 through len(sample) (or just 1 in the combined constraint option) are used by the training examples in the sample, so do not use these if you do not intend to share slack with the constraints inferred from the training data. The default behavior is equivalent to returning an empty list, i.e., no constraints.""" import svmapi if True: # Just some example cosntraints. c, d = svmapi.Sparse, svmapi.Document # Return some really goofy constraints! Normally, if the SVM # is allowed to converge normally, the second and fourth # features are 0 and -1 respectively for sufficiently high C. # Let's make them be greater than 1 and 0.2 respectively!! # Both forms of a feature vector (sparse and then full) are # shown. return [(d([c([(1, 1)])], slackid=len(sample) + 1), 1), (d([c([0, 0, 0, 1])], slackid=len(sample) + 1), .2)] # Encode positivity constraints. Note that this constraint is # satisfied subject to slack constraints. constraints = [] for i in xrange(sm.size_psi): # Create a sparse vector which selects out a single feature. sparse = svmapi.Sparse([(i, 1)]) # The left hand side of the inequality is a document. lhs = svmapi.Document([sparse], costfactor=1, slackid=i + 1 + len(sample)) # Append the lhs and the rhs (in this case 0). constraints.append((lhs, 0)) return constraints
def init_constraints(sample, sm, sparm): """Initializes special constraints. Returns a sequence of initial constraints. Each constraint in the returned sequence is itself a sequence with two items (the intention is to be a tuple). The first item of the tuple is a document object. The second item is a number, indicating that the inner product of the feature vector of the document object with the linear weights must be greater than or equal to the number (or, in the nonlinear case, the evaluation of the kernel on the feature vector with the current model must be greater). This initializes the optimization problem by allowing the introduction of special constraints. Typically no special constraints are necessary. A typical constraint may be to ensure that all feature weights are positive. Note that the slack id must be set. The slack IDs 1 through len(sample) (or just 1 in the combined constraint option) are used by the training examples in the sample, so do not use these if you do not intend to share slack with the constraints inferred from the training data. The default behavior is equivalent to returning an empty list, i.e., no constraints.""" # Encode positivity constraints for the last two items of the weight vector constraints = [] ftrlen = sm.size_psi for i in range(ftrlen - 2, ftrlen): # Create a sparse vector which selects out a single feature. v = (np.arange(ftrlen) == i).astype(float) sparse = svmapi.Sparse(tuple(v)) # The left hand side of the inequality is a document. # These ids leave a gap of size ftrlen, but I don't think this matters lhs = svmapi.Document([sparse], costfactor=1, slackid=len(sample) + i + 1) # Append the lhs and the rhs constraints.append((lhs, 0)) return constraints
def init_constraints(sample, sm, sparm): """Initializes special constraints, if necessary. The constraints "F(x, box ) - F(x,no box) > 1-xi" for all x with a 'real' box remain all through-out learning. We add them to the constraint set once in the beginning. This also fixes some degenerate behaviour of SVMstruct when starting with w==0. """ import svmapi constraints = [] curslack = len(sample) + 1 for feature, boxlist in sample: for box in boxlist: if box.score < 0: continue # use images which contain an object instance feature_hist = psi(feature, box, sm, sparm) sparse = svmapi.Sparse(feature_hist) lhs = svmapi.Document([sparse], costfactor=1, slackid=curslack + 100000) # Append the lhs and the rhs to the constraint set constraints.append((lhs, 1.)) return constraints