import numpy import random from sklearn.svm import LinearSVC from .train import TrainMentionClassifier from nel import logging log = logging.getLogger() def sample_by_magnitude(_, negatives, limit): # learn from candidates with highest feature vector magnitude # given feature vectors are standardised to have 0 mean and unit standard deviation, this # should be like selecting instances with the strongest feature activation return sorted(negatives, key=lambda fv: numpy.abs(fv).sum(), reverse=True)[:limit] def sample_by_mag_difference(positive, negatives, limit): return sorted(negatives, key=lambda fv: numpy.abs(positive - fv).sum(), reverse=True)[:limit] def sample_randomly(_, negatives, limit): random.shuffle(negatives) return negatives[:limit] def sample_by_std(_, negatives, limit): # diversity in feature activation return sorted(negatives, key=numpy.std, reverse=True)[:limit] class TrainLinearRanker(TrainMentionClassifier): """ Trains a linear candidate ranker over a corpus of documents. """ def __init__(self, **kwargs): kwargs['mapping'] = 'PolynomialMapper' super(TrainLinearRanker, self).__init__(**kwargs)
from __future__ import absolute_import import redis import re import ujson as json from itertools import islice, izip from ..data import Store, ObjectStore, FieldStore, SetStore from nel import logging log = logging.getLogger() class RedisStore(Store): """ Abstract base class for stores built on redis """ def __init__(self, namespace, uri): self.kvs = redis.from_url(uri) self.ns = namespace @classmethod def get_protocol(cls): return 'redis' def to_key(self, oid): return self.ns + ':' + oid def to_oid(self, key): return key[len(self.ns) + 1:].decode('utf-8') def _fetch_batch(self, keys_iter): raise NotImplementedError