def read_examples(filename, sparm): """Parses an input file into an example sequence.""" # This reads example files of the type read by SVM^multiclass. examples = [] x, y = [], [] # Open the file and read each example. for line in file(filename): # Get rid of comments. if line.find('#'): line = line[:line.find('#')] tokens = line.split() # If the line is empty, one sequence ended if not tokens: if x: examples.append((x, y)) x, y = [], [] continue # Get the target. target = int(tokens[0]) # Get the features. tokens = [t.split(':') for t in tokens[1:]] features = [(0,1)]+[(int(k),float(v)) for k,v in tokens] # Add the example to the list x.append(svmapi.Sparse(features)) y.append(target) if x: # non added sample of the end examples.append((x, y)) # Print out some very useful statistics. # print (examples) print len(examples),'examples read in multiclass_seq.py' return examples
def psi(x, y, sm, sparm): psi_vec = [0] * num_features doc_features = x[0] for pos in range(len(y)): for feature, val in doc_features[y[pos][0]]: psi_vec[feature] += val * math.pow(math.log(pos+2, 2), -1) return svmapi.Sparse(psi_vec) ####
def psi(x, y, sm, sparm): """Returns the combined feature vector Psi(x,y).""" # Just increment the feature index to the appropriate stack position. #vecness = [(k,v) for k,v in x] pvec = svmapi.Sparse(x, kernel_id=y) #print list(sm.w) #print pveca #import pdb; pdb.set_trace() return svmapi.Document([pvec])
def psi(x, y, model, sparm): """Return a feature vector representing pattern x and label y. This is the combined feature function, which this returns either a svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful during kernel evaluations, as all components undergo kernel evaluation separately). There is no default behavior.""" return svmapi.Sparse(path.sum_path_features(x, y))
def find_most_violated_constraint(x, y, sm, sparm): vals = [] doc_features = x[0] b = x[1] # output_y is obtained by sorting documents by doc_score - weighted relevance for i in range(len(doc_features)): vals.append((b[i][0], b[i][1], classify(sm, svmapi.Sparse(doc_features[i])) - b[i][1])) vals = sorted(vals, key = lambda a: a[2], reverse = True) output_y = [(p,q) for p,q,r in vals] return output_y
def psi(instance, hyp, model, sparm): """Return a feature vector representing pattern x and label y. This is the combined feature function, which this returns either a svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful during kernel evaluations, as all components undergo kernel evaluation separately). There is no default behavior.""" assert (isinstance(instance, py_indoor_context.TrainingInstance)) assert (isinstance(hyp, py_indoor_context.ManhattanHypothesis)), type(hyp) return svmapi.Sparse(training_helpers.get_feature(FtrMgr, instance, hyp))
def psi(problem, hyp, model, sparm): """Return a feature vector representing pattern x and label y. This is the combined feature function, which this returns either a svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful during kernel evaluations, as all components undergo kernel evaluation separately). There is no default behavior.""" assert (isinstance(problem, manhattan_utils.ManhattanProblem)) assert (isinstance(hyp, manhattan_utils.ManhattanSolution)) return svmapi.Sparse(path.compute_path_features(problem.F, hyp.pair))
def classify_example(x, sm, sparm): doc_scores = [] doc_index = 0 doc_features = x[0] b = x[1] for features in doc_features: doc_scores.append((doc_index, classify(sm, svmapi.Sparse(features)))) doc_index += 1 # output_y is obtained by sorting documents by doc_score doc_scores = sorted(doc_scores, key = lambda a: a[1], reverse = True) output_y = [b[index] for index, score in doc_scores] return output_y
def psi(F, y, model, sparm): """Return a feature vector representing pattern x and label y. This is the combined feature function, which this returns either a svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful during kernel evaluations, as all components undergo kernel evaluation separately). There is no default behavior.""" if len(y) != 2: raise Exception('y should be a pair (states,orients)') return svmapi.Sparse(path.compute_path_features(F, y))
def psi(x, y, sm, sparm): """Return a feature vector representing pattern x and label y. This is the combined feature function, which this returns either a svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful during kernel evaluations, as all components undergo kernel evaluation separately). There is no default behavior.""" # In the case of binary classification, psi is just the class (+1 # or -1) times the feature vector for x, including that special # constant bias feature we pretend that we have. # 1^T.y is subgraph size # x*y/1^t*y is the average feature value with the subgraph # x*1^T/n is the average feature value in the whole graph # is the average feature value outside the subgraph # the last two are the nonlinear transformations of the average feature value within the subgraph import svmapi import numpy as np SubGraph_size = svmapi.Sparse(sum(y), kernel_id=1) avg_SubGraph = svmapi.Sparse(np.dot(x, y), kernel_id=2) avg_WholeGraph = svmapi.Sparse(sum(x) / len(x), kernel_id=3) avg_OutSubgraph = svmapi.Sparse(np.dot(x, np.subtract(1, y)) / sum(np.subtract(1, y)), kernel_id=4) avg_Quad_SubGraph = svmapi.Sparse(np.dot(x, y) * np.dot(x, y), kernel_id=5) avg_Log_SubGraph = svmapi.Sparse(np.log(np.dot(x, y)), kernel_id=6) pri = svmapi.Document([ SubGraph_size, avg_SubGraph, avg_WholeGraph, avg_OutSubgraph, avg_Quad_SubGraph, avg_Log_SubGraph ]) print "psi called..........................." print psi return
def init_constraints(sample, sm, sparm): if True: c, d = svmapi.Sparse, svmapi.Document return [(d([c([(1, 1)])], slackid=len(sample) + 1), 1), (d([c([0, 0, 0, 1])], slackid=len(sample) + 1), .2)] constraints = [] for i in xrange(sm.size_psi): sparse = svmapi.Sparse([(i, 1)]) lhs = svmapi.Document([sparse], costfactor=1, slackid=i + 1 + len(sample)) constraints.append((lhs, 0)) return constraints
def psi(x, y, sm, sparm): # print 'psi' """Returns the combined feature vector Psi(x,y).""" T = len(y) n_class, n_feature = sm.num_classes, sm.num_features ps = [0] * sm.size_psi edge_offset = n_feature * n_class for t in range(T): offset = n_feature * (y[t]-1) for k,v in x[t]: ps[k+offset] += v if t > 0: ps[edge_offset + (y[t-1]-1)*n_class + y[t]-1 ] += 1 return svmapi.Sparse(ps)
def psi(x, y, sm, sparm): """Return a feature vector representing pattern x and label y. This is the combined feature function, which this returns either a svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful during kernel evaluations, as all components undergo kernel evaluation separately). There is no default behavior.""" # In the case of binary classification, psi is just the class (+1 # or -1) times the feature vector for x, including that special # constant bias feature we pretend that we have. thePsi = [0.5 * y * i for i in x] thePsi.append(0.5 * y) # Pretend as though x had an 1 at the end. return svmapi.Sparse(thePsi)
def init_constraints(sample, sm, sparm): """Initializes special constraints. Returns a sequence of initial constraints. Each constraint in the returned sequence is itself a sequence with two items (the intention is to be a tuple). The first item of the tuple is a document object. The second item is a number, indicating that the inner product of the feature vector of the document object with the linear weights must be greater than or equal to the number (or, in the nonlinear case, the evaluation of the kernel on the feature vector with the current model must be greater). This initializes the optimization problem by allowing the introduction of special constraints. Typically no special constraints are necessary. A typical constraint may be to ensure that all feature weights are positive. Note that the slack id must be set. The slack IDs 1 through len(sample) (or just 1 in the combined constraint option) are used by the training examples in the sample, so do not use these if you do not intend to share slack with the constraints inferred from the training data. The default behavior is equivalent to returning an empty list, i.e., no constraints.""" import svmapi if True: # Just some example cosntraints. c, d = svmapi.Sparse, svmapi.Document # Return some really goofy constraints! Normally, if the SVM # is allowed to converge normally, the second and fourth # features are 0 and -1 respectively for sufficiently high C. # Let's make them be greater than 1 and 0.2 respectively!! # Both forms of a feature vector (sparse and then full) are # shown. return [(d([c([(1, 1)])], slackid=len(sample) + 1), 1), (d([c([0, 0, 0, 1])], slackid=len(sample) + 1), .2)] # Encode positivity constraints. Note that this constraint is # satisfied subject to slack constraints. constraints = [] for i in xrange(sm.size_psi): # Create a sparse vector which selects out a single feature. sparse = svmapi.Sparse([(i, 1)]) # The left hand side of the inequality is a document. lhs = svmapi.Document([sparse], costfactor=1, slackid=i + 1 + len(sample)) # Append the lhs and the rhs (in this case 0). constraints.append((lhs, 0)) return constraints
def psi(x, y, sm, sparm): sentLen = len(x) observationLen = FEATURE_DIM * LABEL_NUM pvec = [0] * (observationLen + LABEL_NUM * LABEL_NUM) prevY = -1 for idx in xrange(sentLen): offset = FEATURE_DIM * y[idx] # observation vector for i in xrange(FEATURE_DIM): pvec[i + offset] += x[idx][i] # transition vector if (idx != 0): pvec[observationLen + prevY * LABEL_NUM + y[idx]] += 1 prevY = y[idx] pvec = svmapi.Sparse(pvec) return pvec
def psi(ex, y, sm, sparm): """Returns the combined feature vector Psi(x,y). For us this is the histogram of the image x restricted to the box y. y should derive from 'find_most_violated_costraint' and be a real box. If it's a list, we use just the first box inside. If the list is empty, we use the whole image (used in testing). """ if isinstance( y, list ): # in theory, this should only be called with single boxes as y. y = y[ 0] # but just in case, for a list of boxes we use only the first one # Crop the image ex to the region given by y idx = (ex.x >= y.left) & (ex.x <= y.right) & (ex.y >= y.top) & (ex.y <= y.bottom) clsthist = histogram(ex.c[idx], xrange(sparm.numbins))[0] return svmapi.Sparse(clsthist)
def read_examples(filename, sparm): """Parses an input file into an example sequence.""" # This reads example files of the type read by SVM^multiclass. examples = [] # Open the file and read each example. for line in file(filename): # Get rid of comments. if line.find('#'): line = line[:line.find('#')] tokens = line.split() # If the line is empty, who cares? if not tokens: continue # Get the target. target = int(tokens[0]) # Get the features. tokens = [tuple(t.split(':')) for t in tokens[1:]] features = [(0, 1)] + [(int(k), float(v)) for k, v in tokens] # Add the example to the list examples.append((svmapi.Sparse(features), target)) # Print out some very useful statistics. print len(examples), 'examples read' return examples
def init_constraints(sample, sm, sparm): """Initializes special constraints. Returns a sequence of initial constraints. Each constraint in the returned sequence is itself a sequence with two items (the intention is to be a tuple). The first item of the tuple is a document object. The second item is a number, indicating that the inner product of the feature vector of the document object with the linear weights must be greater than or equal to the number (or, in the nonlinear case, the evaluation of the kernel on the feature vector with the current model must be greater). This initializes the optimization problem by allowing the introduction of special constraints. Typically no special constraints are necessary. A typical constraint may be to ensure that all feature weights are positive. Note that the slack id must be set. The slack IDs 1 through len(sample) (or just 1 in the combined constraint option) are used by the training examples in the sample, so do not use these if you do not intend to share slack with the constraints inferred from the training data. The default behavior is equivalent to returning an empty list, i.e., no constraints.""" # Encode positivity constraints for the last two items of the weight vector constraints = [] ftrlen = sm.size_psi for i in range(ftrlen - 2, ftrlen): # Create a sparse vector which selects out a single feature. v = (np.arange(ftrlen) == i).astype(float) sparse = svmapi.Sparse(tuple(v)) # The left hand side of the inequality is a document. # These ids leave a gap of size ftrlen, but I don't think this matters lhs = svmapi.Document([sparse], costfactor=1, slackid=len(sample) + i + 1) # Append the lhs and the rhs constraints.append((lhs, 0)) return constraints
def psi(x, y, sm, sparm): """Return a feature vector representing pattern x and label y. This is the combined feature function, which this returns either a svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful during kernel evaluations, as all components undergo kernel evaluation separately). There is no default behavior.""" # In the case of binary classification, psi is just the class (+1 # or -1) times the feature vector for x, including that special # constant bias feature we pretend that we have. import svmapi sentence_len = len(x) feature = np.zeros((48 * 48 * 2), float) for j in range(sentence_len): feature[y[j] * 48:(y[j] + 1) * 48] += x[j] if j > 0: prev = y[j - 1] curr = y[j] feature[prev * 48 + curr] += 1 thePsi = [0.5 * y * i for i in x] thePsi.append(0.5 * y) # Pretend as though x had an 1 at the end. return svmapi.Sparse(thePsi)
def psi(x, y, sm, sparm): sequence_length = len(y) observation_values = [[0 for i in range(69)] for j in range(48)] transition_values = [[0 for i in range(48)] for j in range(48)] #fill in observation value & transition value for i in range(sequence_length - 1): observation_values[y[i]] = map(sum, zip(observation_values[y[i]], x[i])) px, py = y[i], y[i + 1] transition_values[px][py] += 1 i = sequence_length - 1 #the last observation_values[y[i]] = map(sum, zip(observation_values[y[i]], x[i])) #arrange values to form a 1-D feature vector thePsi = [] for i in range(48): thePsi.extend(observation_values[i]) for i in range(48): thePsi.extend(transition_values[i]) return svmapi.Sparse(thePsi)
def init_constraints(sample, sm, sparm): """Initializes special constraints, if necessary. The constraints "F(x, box ) - F(x,no box) > 1-xi" for all x with a 'real' box remain all through-out learning. We add them to the constraint set once in the beginning. This also fixes some degenerate behaviour of SVMstruct when starting with w==0. """ import svmapi constraints = [] curslack = len(sample) + 1 for feature, boxlist in sample: for box in boxlist: if box.score < 0: continue # use images which contain an object instance feature_hist = psi(feature, box, sm, sparm) sparse = svmapi.Sparse(feature_hist) lhs = svmapi.Document([sparse], costfactor=1, slackid=curslack + 100000) # Append the lhs and the rhs to the constraint set constraints.append((lhs, 1.)) return constraints
def psi(x, y, sm, sparm): """Returns the combined feature vector Psi(x,y).""" # Just increment the feature index to the appropriate stack position. offset = sm.num_features * (y - 1) pvec = svmapi.Sparse([(k + offset, v) for k, v in x], kernel_id=y) return pvec
def psi(x, y, sm, sparm): import svmapi thePsi = [0.5 * y * i for i in x] thePsi.append(0.5 * y) return svmapi.Sparse(thePsi)
def psi(x, y, sm, sparm): return svmapi.Sparse( psiUtil.genObsMat(x, y).reshape(-1, ).tolist() + psiUtil.genTrsMat(y).reshape(-1, ).tolist())