Example #1
0
def train_sample(feature_str, label, pos_train=0.5, neg_train=1000):
    """Perform training and testing using disjont samples from the full
    set of label_values. This is equivalent to doing one round of cross
    validation (see classipy.cross_validation) only it keeps the values around
    for display.

    Args:

    """
    all_hashes = list(cass.get_image_hashes())
    pos_hashes = [_[0] for _ in cass.buffered_get_row(cf_labels, label)]
    neg_hashes = list(set(all_hashes) - set(pos_hashes))

    if 0 < pos_train <= 1: pos_train = int(pos_train * len(pos_hashes))
    if 0 < neg_train <= 1: neg_train = int(neg_train * len(neg_hashes))

    # Choose a training sample and a testing sample
    if len(pos_hashes) < pos_train:
        raise ValueError('Not enough positive examples %s(%d)' % \
                         (label, len(pos_hashes)))
    if len(neg_hashes) < neg_train:
        raise ValueError('Not enough negative examples %s(%d)' % \
                         (label, len(neg_hashes)))

    pos_sample = random.sample(pos_hashes, pos_train)
    neg_sample = random.sample(neg_hashes, neg_train)

    labels = [-1 for _ in neg_sample] + [1 for _ in pos_sample]
    values = cass.get_feature_values(feature_str, neg_sample+pos_sample)

    global label_values
    label_values = zip(labels, values)

    print 'Training classifier with sample %d' % len(label_values)
    train_classifier(label_values)
Example #2
0
def run_train_test(feature_str, label, split_opts={}):
    """
    Args:
        split_opts: passed to split_train_test
    Returns:
        (label, [(conf, gt, hash), ...]
        where
            label: string label, e.g. 'airplane'
            conf: -inf to inf, prediction confidence
            gt: -1 or 1 for negative or positive annotation
            hash: image key for use with cass.get_image
    """

    train, test = split_train_test(label, **split_opts)

    # Train
    labels = (L for L, k in train)
    hashes = [k for L, k in train]
    values = cass.get_feature_values(feature_str, hashes)
    label_values = zip(labels, values)

    print 'Training classifier with %d values' % len(label_values)
    classifier = train_classifier(label_values)

    # Test
    hashes = [k for L, k in test]
    values = list(cass.get_feature_values(feature_str, hashes))
    print('Testing with %d(%d) values' % (len(hashes),len(values)))
    conf_label = (classifier.predict(value) for value in values)

    conf_gt_hash = [(pred*conf, L, k)
                    for ((pred, conf),), (L, k)
                    in zip(conf_label, test)]
    conf_gt_hash = sorted(conf_gt_hash, key=lambda _: _[0])    

    return (label, conf_gt_hash)