Example #1
0
import numpy
import random
from sklearn.svm import LinearSVC
from .train import TrainMentionClassifier

from nel import logging
log = logging.getLogger()

def sample_by_magnitude(_, negatives, limit):
    # learn from candidates with highest feature vector magnitude
    # given feature vectors are standardised to have 0 mean and unit standard deviation, this  
    # should be like selecting instances with the strongest feature activation
    return sorted(negatives, key=lambda fv: numpy.abs(fv).sum(), reverse=True)[:limit]

def sample_by_mag_difference(positive, negatives, limit):
    return sorted(negatives, key=lambda fv: numpy.abs(positive - fv).sum(), reverse=True)[:limit]

def sample_randomly(_, negatives, limit):
    random.shuffle(negatives)
    return negatives[:limit]

def sample_by_std(_, negatives, limit):
    # diversity in feature activation
    return sorted(negatives, key=numpy.std, reverse=True)[:limit]

class TrainLinearRanker(TrainMentionClassifier):
    """ Trains a linear candidate ranker over a corpus of documents. """
    def __init__(self, **kwargs):
        kwargs['mapping'] = 'PolynomialMapper'
        super(TrainLinearRanker, self).__init__(**kwargs)
Example #2
0
from __future__ import absolute_import
import redis
import re
import ujson as json
from itertools import islice, izip

from ..data import Store, ObjectStore, FieldStore, SetStore

from nel import logging

log = logging.getLogger()


class RedisStore(Store):
    """ Abstract base class for stores built on redis """
    def __init__(self, namespace, uri):
        self.kvs = redis.from_url(uri)
        self.ns = namespace

    @classmethod
    def get_protocol(cls):
        return 'redis'

    def to_key(self, oid):
        return self.ns + ':' + oid

    def to_oid(self, key):
        return key[len(self.ns) + 1:].decode('utf-8')

    def _fetch_batch(self, keys_iter):
        raise NotImplementedError