Ejemplo n.º 1
0
 def __init__(self, feature_alphabet=None, bias=False):
     self._feature_alphabet = feature_alphabet
     if self._feature_alphabet == None:
         self._feature_alphabet = Alphabet()
     self._weights = None
     self._init_model()
     self._bias = bias
     return
Ejemplo n.º 2
0
	def __init__( self, feature_alphabet=None, bias=False ):
		self._feature_alphabet = feature_alphabet
		if self._feature_alphabet == None:
			self._feature_alphabet = Alphabet()
		self._weights = None
		self._init_model()
		self._bias = bias
		return
Ejemplo n.º 3
0
class Binary(object):
    """ Abstract linear classifier (in primal form) for binary problems."""
    def __init__(self, feature_alphabet=None, bias=False):
        self._feature_alphabet = feature_alphabet
        if self._feature_alphabet == None:
            self._feature_alphabet = Alphabet()
        self._weights = None
        self._init_model()
        self._bias = bias
        return

    def _init_model(self):
        """ initialize weights to 0 """
        m = self._feature_alphabet.size()
        self._weights = zeros(m, 'd')
        return

    def set_alphabet(self, feature_alphabet):
        """ set alphabet and (re-)initialize weights
		accordingly"""
        m = feature_alphabet.size()
        assert m >= 1 or not feature_alphabet.locked(
        ), "Feature alphabet has %s size." % m
        self._feature_alphabet = feature_alphabet
        self._init_model()
        return

    def get_alphabet(self):
        return self._feature_alphabet

    def get_model(self):
        """ return current model """
        return self._get_model()

    def _get_model(self):
        w = self._weights
        return w

    def set_model(self, weight_dict):
        """ set model weights from dictionaries"""
        self._set_weights(weight_dict)
        return

    def _set_weights(self, weight_dict):
        """ set model weight vectors from weight dictionary"""
        assert isinstance(weight_dict, dict)
        weights = self._weights
        feat_alpha = self._feature_alphabet
        for f in weight_dict:
            fidx = feat_alpha[f]
            weights[fidx] = weight_dict[f]
        return

    def learn(self, train_sample, epochs):
        raise NotImplementedError

    def _get_train_stream(self, data):
        """ returns training instances stream from data file name or
		data source"""
        feature_alphabet = self._feature_alphabet
        if isinstance(data, str):
            stream = Source(data,
                            feature_alphabet=feature_alphabet,
                            alphabet_lock=False,
                            alphabet_pop=False,
                            bias=self._bias)
        elif isinstance(data, Source):
            stream = data
        elif callable(data):
            stream = Source(data,
                            feature_alphabet=feature_alphabet,
                            alphabet_lock=False,
                            alphabet_pop=False,
                            bias=self._bias)
        else:
            raise Exception(
                "Error: data is either string for file name or ClassificationSource!"
            )
        # set alphabet from data
        self.set_alphabet(stream.get_alphabet())
        return stream

    def _get_test_stream(self, data):
        """ returns test instances stream from data file name or
		data source"""
        if isinstance(data, str):
            stream = Source( data, alphabet_lock=True,\
               alphabet_pop=False, bias=self._bias )
        elif isinstance(data, Source):
            stream = data
        else:
            raise Exception(
                "Error: data is either string for file name or ClassificationSource!"
            )
        # use model alphabet
        stream.set_alphabet(self.get_alphabet())
        return stream

    def resize_weights(self, instance):
        if len(self._weights) != self._feature_alphabet.size():
            self._weights.resize(self._feature_alphabet.size())
        return

    def update(self, instance, prediction, rate=1.0):
        raise NotImplementedError

    def _decode(self, instance, weights):
        """ return prediction in {-1, 1}
		for current instance based on linear combination of given
		weight parameters """
        fv = instance.get_fv()
        score = dot(weights, fv)
        return Prediction(score)

    def decode(self, instance):
        """ return prediction for instance given current model"""
        if not self._feature_alphabet.locked():
            self.resize_weights(instance)
        return self._decode(instance, self.get_model())

    def predict(self, instance):
        """ return prediction (scored labels) for instance """
        ws = self._get_model()
        prediction = self._decode(instance, ws)
        return prediction

    def classify(self, instance):
        """ return highest-scoring outcome along with its score for
		instance according to current weight vectors."""
        ws = self._get_model()
        prediction = self._decode(instance, ws)
        return prediction.get_pred()  # (label,score)

    def test(self, test_sample, sink):
        """ evaluate classifier on test sample """
        start_time = time.time()
        sink.set_labels = [POS_LAB, NEG_LAB]
        # read in data
        stream = self._get_test_stream(test_sample)
        print >> sys.stderr, "-" * 100
        print >> sys.stderr, "Testing...",
        # make predictions on test sample
        for inst in stream:
            true_label = inst.get_target_label()
            pred_label, score = self.classify(inst)
            # store label pair
            sink.update(true_label, pred_label, score)
        stream.close()
        elapsed_time = time.time() - start_time
        print >> sys.stderr, "done in %s sec." % (round(elapsed_time, 3))
        return
Ejemplo n.º 4
0
class Binary( object ):
	""" Abstract linear classifier (in primal form) for binary problems."""
	def __init__( self, feature_alphabet=None, bias=False ):
		self._feature_alphabet = feature_alphabet
		if self._feature_alphabet == None:
			self._feature_alphabet = Alphabet()
		self._weights = None
		self._init_model()
		self._bias = bias
		return

	def _init_model(self):
		""" initialize weights to 0 """
		m = self._feature_alphabet.size()
		self._weights = zeros( m, 'd' )
		return

	def set_alphabet(self, feature_alphabet ):
		""" set alphabet and (re-)initialize weights
		accordingly"""
		m = feature_alphabet.size()
		assert m >= 1 or not feature_alphabet.locked(), "Feature alphabet has %s size." %m
		self._feature_alphabet = feature_alphabet
		self._init_model()
		return

	def get_alphabet(self):
		return self._feature_alphabet

	def get_model(self):
		""" return current model """
		return self._get_model()

	def _get_model(self):
		w = self._weights
		return w

	def set_model( self, weight_dict ):
		""" set model weights from dictionaries"""
		self._set_weights( weight_dict )
		return

	def _set_weights( self, weight_dict ):
		""" set model weight vectors from weight dictionary"""
		assert isinstance(weight_dict,dict)
		weights = self._weights
		feat_alpha = self._feature_alphabet
		for f in weight_dict:
			fidx = feat_alpha[f]
			weights[fidx] = weight_dict[f]
		return

	def learn( self, train_sample, epochs ):
		raise NotImplementedError

	def _get_train_stream( self, data ):
		""" returns training instances stream from data file name or
		data source"""
		feature_alphabet = self._feature_alphabet
		if isinstance(data,str):
			stream = Source(data, feature_alphabet=feature_alphabet, alphabet_lock=False, alphabet_pop=False, bias=self._bias)
		elif isinstance(data,Source):
			stream = data
		elif callable(data):
			stream = Source(data, feature_alphabet=feature_alphabet, alphabet_lock=False, alphabet_pop=False, bias=self._bias)
		else:
			raise Exception("Error: data is either string for file name or ClassificationSource!")
		# set alphabet from data
		self.set_alphabet( stream.get_alphabet() )
		return stream

	def _get_test_stream( self, data ):
		""" returns test instances stream from data file name or
		data source"""
		if isinstance(data,str):
			stream = Source( data, alphabet_lock=True,\
					 alphabet_pop=False, bias=self._bias )
		elif isinstance(data,Source):
			stream = data
		else:
			raise Exception("Error: data is either string for file name or ClassificationSource!")
		# use model alphabet
		stream.set_alphabet( self.get_alphabet() )
		return stream

	def resize_weights(self, instance):
		if len(self._weights) != self._feature_alphabet.size():
			self._weights.resize(self._feature_alphabet.size())
		return

	def update( self, instance, prediction, rate=1.0 ):
		raise NotImplementedError

	def _decode( self, instance, weights ):
		""" return prediction in {-1, 1}
		for current instance based on linear combination of given
		weight parameters """
		fv = instance.get_fv()
		score = dot( weights, fv )
		return Prediction( score )

	def decode( self, instance ):
		""" return prediction for instance given current model"""
		if not self._feature_alphabet.locked():
			self.resize_weights(instance)
		return self._decode( instance, self.get_model() )

	def predict( self, instance ):
		""" return prediction (scored labels) for instance """
		ws = self._get_model()
		prediction = self._decode( instance, ws )
		return prediction

	def classify( self, instance ):
		""" return highest-scoring outcome along with its score for
		instance according to current weight vectors."""
		ws = self._get_model()
		prediction = self._decode( instance, ws )
		return prediction.get_pred() # (label,score)

	def test( self, test_sample, sink ):
		""" evaluate classifier on test sample """
		start_time = time.time()
		sink.set_labels = [POS_LAB,NEG_LAB]
		# read in data
		stream = self._get_test_stream( test_sample )
		print >> sys.stderr, "-"*100
		print >> sys.stderr, "Testing...",
		# make predictions on test sample
		for inst in stream:
			true_label = inst.get_target_label()
			pred_label, score = self.classify( inst )
			# store label pair
			sink.update( true_label, pred_label, score )
		stream.close()
		elapsed_time = time.time()-start_time
		print >> sys.stderr, "done in %s sec." %(round(elapsed_time,3))
		return