def read_examples(filename, sparm):
    """Parses an input file into an example sequence."""
    # This reads example files of the type read by SVM^multiclass.
    examples = []
    x, y = [], []
    # Open the file and read each example.
    for line in file(filename):
        # Get rid of comments.
        if line.find('#'): line = line[:line.find('#')]
        tokens = line.split()
        # If the line is empty, one sequence ended
        if not tokens: 
            if x: examples.append((x, y))
            x, y = [], []
            continue
        # Get the target.
        target = int(tokens[0])
        # Get the features.
        tokens = [t.split(':') for t in tokens[1:]]
        features = [(0,1)]+[(int(k),float(v)) for k,v in tokens]
        # Add the example to the list
        x.append(svmapi.Sparse(features))
        y.append(target)
    if x: # non added sample of the end
        examples.append((x, y))
    # Print out some very useful statistics.
    # print (examples)
    print len(examples),'examples read in multiclass_seq.py'
    return examples
Ejemplo n.º 2
0
def psi(x, y, sm, sparm):
    psi_vec = [0] * num_features  
    doc_features = x[0]
    for pos in range(len(y)):
       for feature, val in doc_features[y[pos][0]]:  
          psi_vec[feature] += val * math.pow(math.log(pos+2, 2), -1)
    return svmapi.Sparse(psi_vec)                    ####
Ejemplo n.º 3
0
def psi(x, y, sm, sparm):
    """Returns the combined feature vector Psi(x,y)."""
    # Just increment the feature index to the appropriate stack position.
    #vecness = [(k,v) for k,v in x]
    pvec = svmapi.Sparse(x, kernel_id=y)
    #print list(sm.w)
    #print pveca
    #import pdb; pdb.set_trace()
    return svmapi.Document([pvec])
Ejemplo n.º 4
0
def psi(x, y, model, sparm):
    """Return a feature vector representing pattern x and label y.

    This is the combined feature function, which this returns either a
    svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful
    during kernel evaluations, as all components undergo kernel
    evaluation separately).  There is no default behavior."""

    return svmapi.Sparse(path.sum_path_features(x, y))
Ejemplo n.º 5
0
def find_most_violated_constraint(x, y, sm, sparm):
    vals = []
    doc_features = x[0]
    b = x[1]
    # output_y is obtained by sorting documents by doc_score - weighted relevance 
    for i in range(len(doc_features)):
      vals.append((b[i][0], b[i][1], classify(sm, svmapi.Sparse(doc_features[i])) - b[i][1])) 
    vals = sorted(vals, key = lambda a: a[2], reverse = True)
    output_y = [(p,q) for p,q,r in vals] 
    return output_y
Ejemplo n.º 6
0
def psi(instance, hyp, model, sparm):
    """Return a feature vector representing pattern x and label y.

    This is the combined feature function, which this returns either a
    svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful
    during kernel evaluations, as all components undergo kernel
    evaluation separately).  There is no default behavior."""

    assert (isinstance(instance, py_indoor_context.TrainingInstance))
    assert (isinstance(hyp, py_indoor_context.ManhattanHypothesis)), type(hyp)

    return svmapi.Sparse(training_helpers.get_feature(FtrMgr, instance, hyp))
Ejemplo n.º 7
0
def psi(problem, hyp, model, sparm):
    """Return a feature vector representing pattern x and label y.

    This is the combined feature function, which this returns either a
    svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful
    during kernel evaluations, as all components undergo kernel
    evaluation separately).  There is no default behavior."""

    assert (isinstance(problem, manhattan_utils.ManhattanProblem))
    assert (isinstance(hyp, manhattan_utils.ManhattanSolution))

    return svmapi.Sparse(path.compute_path_features(problem.F, hyp.pair))
Ejemplo n.º 8
0
def classify_example(x, sm, sparm):
    doc_scores = []
    doc_index = 0
    doc_features = x[0]
    b = x[1]
    for features in doc_features:
       doc_scores.append((doc_index, classify(sm, svmapi.Sparse(features))))
       doc_index += 1
    # output_y is obtained by sorting documents by doc_score
    doc_scores = sorted(doc_scores, key = lambda a: a[1], reverse = True)
    output_y = [b[index] for index, score in doc_scores]             
    return output_y   
def psi(F, y, model, sparm):
    """Return a feature vector representing pattern x and label y.

    This is the combined feature function, which this returns either a
    svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful
    during kernel evaluations, as all components undergo kernel
    evaluation separately).  There is no default behavior."""

    if len(y) != 2:
        raise Exception('y should be a pair (states,orients)')

    return svmapi.Sparse(path.compute_path_features(F, y))
Ejemplo n.º 10
0
def psi(x, y, sm, sparm):
    """Return a feature vector representing pattern x and label y.

    This is the combined feature function, which this returns either a
    svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful
    during kernel evaluations, as all components undergo kernel
    evaluation separately).  There is no default behavior."""
    # In the case of binary classification, psi is just the class (+1
    # or -1) times the feature vector for x, including that special
    # constant bias feature we pretend that we have.
    #    1^T.y is subgraph size
    #    x*y/1^t*y is the average feature value with the subgraph
    #    x*1^T/n is the average feature value in the whole graph
    #     is the average feature value outside the subgraph
    #    the last two are the nonlinear transformations of the average feature value within the subgraph
    import svmapi
    import numpy as np
    SubGraph_size = svmapi.Sparse(sum(y), kernel_id=1)
    avg_SubGraph = svmapi.Sparse(np.dot(x, y), kernel_id=2)
    avg_WholeGraph = svmapi.Sparse(sum(x) / len(x), kernel_id=3)
    avg_OutSubgraph = svmapi.Sparse(np.dot(x, np.subtract(1, y)) /
                                    sum(np.subtract(1, y)),
                                    kernel_id=4)
    avg_Quad_SubGraph = svmapi.Sparse(np.dot(x, y) * np.dot(x, y), kernel_id=5)
    avg_Log_SubGraph = svmapi.Sparse(np.log(np.dot(x, y)), kernel_id=6)

    pri = svmapi.Document([
        SubGraph_size, avg_SubGraph, avg_WholeGraph, avg_OutSubgraph,
        avg_Quad_SubGraph, avg_Log_SubGraph
    ])
    print "psi called..........................."
    print psi
    return
Ejemplo n.º 11
0
def init_constraints(sample, sm, sparm):
    if True:
        c, d = svmapi.Sparse, svmapi.Document
        return [(d([c([(1, 1)])], slackid=len(sample) + 1), 1),
                (d([c([0, 0, 0, 1])], slackid=len(sample) + 1), .2)]
    constraints = []
    for i in xrange(sm.size_psi):
        sparse = svmapi.Sparse([(i, 1)])
        lhs = svmapi.Document([sparse],
                              costfactor=1,
                              slackid=i + 1 + len(sample))
        constraints.append((lhs, 0))
    return constraints
def psi(x, y, sm, sparm):
    # print 'psi'
    """Returns the combined feature vector Psi(x,y)."""
    T = len(y)
    n_class, n_feature = sm.num_classes, sm.num_features
    ps = [0] * sm.size_psi
    edge_offset = n_feature * n_class
    for t in range(T):
        offset = n_feature * (y[t]-1)
        for k,v in x[t]: ps[k+offset] += v
        if t > 0: ps[edge_offset + (y[t-1]-1)*n_class + y[t]-1 ] += 1
        
    return svmapi.Sparse(ps)
Ejemplo n.º 13
0
def psi(x, y, sm, sparm):
    """Return a feature vector representing pattern x and label y.

    This is the combined feature function, which this returns either a
    svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful
    during kernel evaluations, as all components undergo kernel
    evaluation separately).  There is no default behavior."""
    # In the case of binary classification, psi is just the class (+1
    # or -1) times the feature vector for x, including that special
    # constant bias feature we pretend that we have.
    thePsi = [0.5 * y * i for i in x]
    thePsi.append(0.5 * y)  # Pretend as though x had an 1 at the end.
    return svmapi.Sparse(thePsi)
Ejemplo n.º 14
0
def init_constraints(sample, sm, sparm):
    """Initializes special constraints.

    Returns a sequence of initial constraints.  Each constraint in the
    returned sequence is itself a sequence with two items (the
    intention is to be a tuple).  The first item of the tuple is a
    document object.  The second item is a number, indicating that the
    inner product of the feature vector of the document object with
    the linear weights must be greater than or equal to the number
    (or, in the nonlinear case, the evaluation of the kernel on the
    feature vector with the current model must be greater).  This
    initializes the optimization problem by allowing the introduction
    of special constraints.  Typically no special constraints are
    necessary.  A typical constraint may be to ensure that all feature
    weights are positive.

    Note that the slack id must be set.  The slack IDs 1 through
    len(sample) (or just 1 in the combined constraint option) are used
    by the training examples in the sample, so do not use these if you
    do not intend to share slack with the constraints inferred from
    the training data.

    The default behavior is equivalent to returning an empty list,
    i.e., no constraints."""
    import svmapi

    if True:
        # Just some example cosntraints.
        c, d = svmapi.Sparse, svmapi.Document
        # Return some really goofy constraints!  Normally, if the SVM
        # is allowed to converge normally, the second and fourth
        # features are 0 and -1 respectively for sufficiently high C.
        # Let's make them be greater than 1 and 0.2 respectively!!
        # Both forms of a feature vector (sparse and then full) are
        # shown.
        return [(d([c([(1, 1)])], slackid=len(sample) + 1), 1),
                (d([c([0, 0, 0, 1])], slackid=len(sample) + 1), .2)]
    # Encode positivity constraints.  Note that this constraint is
    # satisfied subject to slack constraints.
    constraints = []
    for i in xrange(sm.size_psi):
        # Create a sparse vector which selects out a single feature.
        sparse = svmapi.Sparse([(i, 1)])
        # The left hand side of the inequality is a document.
        lhs = svmapi.Document([sparse],
                              costfactor=1,
                              slackid=i + 1 + len(sample))
        # Append the lhs and the rhs (in this case 0).
        constraints.append((lhs, 0))
    return constraints
Ejemplo n.º 15
0
def psi(x, y, sm, sparm):
    sentLen = len(x)
    observationLen = FEATURE_DIM * LABEL_NUM
    pvec = [0] * (observationLen + LABEL_NUM * LABEL_NUM)
    prevY = -1
    for idx in xrange(sentLen):
        offset = FEATURE_DIM * y[idx]
        # observation vector
        for i in xrange(FEATURE_DIM):
            pvec[i + offset] += x[idx][i]
        # transition vector
        if (idx != 0):
            pvec[observationLen + prevY * LABEL_NUM + y[idx]] += 1
        prevY = y[idx]

    pvec = svmapi.Sparse(pvec)
    return pvec
Ejemplo n.º 16
0
def psi(ex, y, sm, sparm):
    """Returns the combined feature vector Psi(x,y).
       For us this is the histogram of the image x restricted to the box y.
       y should derive from 'find_most_violated_costraint' and be a real box. 
       If it's a list, we use just the first box inside. If the list is empty,
       we use the whole image (used in testing).
    """

    if isinstance(
            y, list
    ):  # in theory, this should only be called with single boxes as y.
        y = y[
            0]  # but just in case, for a list of boxes we use only the first one

    # Crop the image ex to the region given by y
    idx = (ex.x >= y.left) & (ex.x <= y.right) & (ex.y >= y.top) & (ex.y <=
                                                                    y.bottom)
    clsthist = histogram(ex.c[idx], xrange(sparm.numbins))[0]
    return svmapi.Sparse(clsthist)
Ejemplo n.º 17
0
def read_examples(filename, sparm):
    """Parses an input file into an example sequence."""
    # This reads example files of the type read by SVM^multiclass.
    examples = []
    # Open the file and read each example.
    for line in file(filename):
        # Get rid of comments.
        if line.find('#'): line = line[:line.find('#')]
        tokens = line.split()
        # If the line is empty, who cares?
        if not tokens: continue
        # Get the target.
        target = int(tokens[0])
        # Get the features.
        tokens = [tuple(t.split(':')) for t in tokens[1:]]
        features = [(0, 1)] + [(int(k), float(v)) for k, v in tokens]
        # Add the example to the list
        examples.append((svmapi.Sparse(features), target))
    # Print out some very useful statistics.
    print len(examples), 'examples read'
    return examples
Ejemplo n.º 18
0
def init_constraints(sample, sm, sparm):
    """Initializes special constraints.

    Returns a sequence of initial constraints.  Each constraint in the
    returned sequence is itself a sequence with two items (the
    intention is to be a tuple).  The first item of the tuple is a
    document object.  The second item is a number, indicating that the
    inner product of the feature vector of the document object with
    the linear weights must be greater than or equal to the number
    (or, in the nonlinear case, the evaluation of the kernel on the
    feature vector with the current model must be greater).  This
    initializes the optimization problem by allowing the introduction
    of special constraints.  Typically no special constraints are
    necessary.  A typical constraint may be to ensure that all feature
    weights are positive.

    Note that the slack id must be set.  The slack IDs 1 through
    len(sample) (or just 1 in the combined constraint option) are used
    by the training examples in the sample, so do not use these if you
    do not intend to share slack with the constraints inferred from
    the training data.

    The default behavior is equivalent to returning an empty list,
    i.e., no constraints."""

    # Encode positivity constraints for the last two items of the weight vector
    constraints = []
    ftrlen = sm.size_psi
    for i in range(ftrlen - 2, ftrlen):
        # Create a sparse vector which selects out a single feature.
        v = (np.arange(ftrlen) == i).astype(float)
        sparse = svmapi.Sparse(tuple(v))
        # The left hand side of the inequality is a document.
        # These ids leave a gap of size ftrlen, but I don't think this matters
        lhs = svmapi.Document([sparse],
                              costfactor=1,
                              slackid=len(sample) + i + 1)
        # Append the lhs and the rhs
        constraints.append((lhs, 0))
    return constraints
def psi(x, y, sm, sparm):
    """Return a feature vector representing pattern x and label y.

    This is the combined feature function, which this returns either a
    svmapi.Sparse object, or sequence of svmapi.Sparse objects (useful
    during kernel evaluations, as all components undergo kernel
    evaluation separately).  There is no default behavior."""
    # In the case of binary classification, psi is just the class (+1
    # or -1) times the feature vector for x, including that special
    # constant bias feature we pretend that we have.
    import svmapi
    sentence_len = len(x)
    feature = np.zeros((48 * 48 * 2), float)
    for j in range(sentence_len):
        feature[y[j] * 48:(y[j] + 1) * 48] += x[j]
        if j > 0:
            prev = y[j - 1]
            curr = y[j]
            feature[prev * 48 + curr] += 1

    thePsi = [0.5 * y * i for i in x]
    thePsi.append(0.5 * y)  # Pretend as though x had an 1 at the end.
    return svmapi.Sparse(thePsi)
Ejemplo n.º 20
0
def psi(x, y, sm, sparm):

    sequence_length = len(y)
    observation_values = [[0 for i in range(69)] for j in range(48)]
    transition_values = [[0 for i in range(48)] for j in range(48)]

    #fill in observation value & transition value
    for i in range(sequence_length - 1):
        observation_values[y[i]] = map(sum, zip(observation_values[y[i]],
                                                x[i]))
        px, py = y[i], y[i + 1]
        transition_values[px][py] += 1
    i = sequence_length - 1  #the last
    observation_values[y[i]] = map(sum, zip(observation_values[y[i]], x[i]))

    #arrange values to form a 1-D feature vector
    thePsi = []
    for i in range(48):
        thePsi.extend(observation_values[i])
    for i in range(48):
        thePsi.extend(transition_values[i])

    return svmapi.Sparse(thePsi)
Ejemplo n.º 21
0
def init_constraints(sample, sm, sparm):
    """Initializes special constraints, if necessary. 

    The constraints "F(x, box ) - F(x,no box) > 1-xi" for all x with a 'real' box remain
    all through-out learning. We add them to the constraint set once in the beginning. 
    This also fixes some degenerate behaviour of SVMstruct when starting with w==0.
    """
    import svmapi
    constraints = []
    curslack = len(sample) + 1
    for feature, boxlist in sample:
        for box in boxlist:
            if box.score < 0:
                continue  # use images which contain an object instance

            feature_hist = psi(feature, box, sm, sparm)
            sparse = svmapi.Sparse(feature_hist)
            lhs = svmapi.Document([sparse],
                                  costfactor=1,
                                  slackid=curslack + 100000)

            # Append the lhs and the rhs to the constraint set
            constraints.append((lhs, 1.))
    return constraints
Ejemplo n.º 22
0
def psi(x, y, sm, sparm):
    """Returns the combined feature vector Psi(x,y)."""
    # Just increment the feature index to the appropriate stack position.
    offset = sm.num_features * (y - 1)
    pvec = svmapi.Sparse([(k + offset, v) for k, v in x], kernel_id=y)
    return pvec
Ejemplo n.º 23
0
def psi(x, y, sm, sparm):
    import svmapi
    thePsi = [0.5 * y * i for i in x]
    thePsi.append(0.5 * y)
    return svmapi.Sparse(thePsi)
Ejemplo n.º 24
0
def psi(x, y, sm, sparm):
    return svmapi.Sparse(
        psiUtil.genObsMat(x, y).reshape(-1, ).tolist() +
        psiUtil.genTrsMat(y).reshape(-1, ).tolist())