def __init__(self, feature_alphabet=None, bias=False): self._feature_alphabet = feature_alphabet if self._feature_alphabet == None: self._feature_alphabet = Alphabet() self._weights = None self._init_model() self._bias = bias return
def __init__( self, feature_alphabet=None, bias=False ): self._feature_alphabet = feature_alphabet if self._feature_alphabet == None: self._feature_alphabet = Alphabet() self._weights = None self._init_model() self._bias = bias return
class Binary(object): """ Abstract linear classifier (in primal form) for binary problems.""" def __init__(self, feature_alphabet=None, bias=False): self._feature_alphabet = feature_alphabet if self._feature_alphabet == None: self._feature_alphabet = Alphabet() self._weights = None self._init_model() self._bias = bias return def _init_model(self): """ initialize weights to 0 """ m = self._feature_alphabet.size() self._weights = zeros(m, 'd') return def set_alphabet(self, feature_alphabet): """ set alphabet and (re-)initialize weights accordingly""" m = feature_alphabet.size() assert m >= 1 or not feature_alphabet.locked( ), "Feature alphabet has %s size." % m self._feature_alphabet = feature_alphabet self._init_model() return def get_alphabet(self): return self._feature_alphabet def get_model(self): """ return current model """ return self._get_model() def _get_model(self): w = self._weights return w def set_model(self, weight_dict): """ set model weights from dictionaries""" self._set_weights(weight_dict) return def _set_weights(self, weight_dict): """ set model weight vectors from weight dictionary""" assert isinstance(weight_dict, dict) weights = self._weights feat_alpha = self._feature_alphabet for f in weight_dict: fidx = feat_alpha[f] weights[fidx] = weight_dict[f] return def learn(self, train_sample, epochs): raise NotImplementedError def _get_train_stream(self, data): """ returns training instances stream from data file name or data source""" feature_alphabet = self._feature_alphabet if isinstance(data, str): stream = Source(data, feature_alphabet=feature_alphabet, alphabet_lock=False, alphabet_pop=False, bias=self._bias) elif isinstance(data, Source): stream = data elif callable(data): stream = Source(data, feature_alphabet=feature_alphabet, alphabet_lock=False, alphabet_pop=False, bias=self._bias) else: raise Exception( "Error: data is either string for file name or ClassificationSource!" ) # set alphabet from data self.set_alphabet(stream.get_alphabet()) return stream def _get_test_stream(self, data): """ returns test instances stream from data file name or data source""" if isinstance(data, str): stream = Source( data, alphabet_lock=True,\ alphabet_pop=False, bias=self._bias ) elif isinstance(data, Source): stream = data else: raise Exception( "Error: data is either string for file name or ClassificationSource!" ) # use model alphabet stream.set_alphabet(self.get_alphabet()) return stream def resize_weights(self, instance): if len(self._weights) != self._feature_alphabet.size(): self._weights.resize(self._feature_alphabet.size()) return def update(self, instance, prediction, rate=1.0): raise NotImplementedError def _decode(self, instance, weights): """ return prediction in {-1, 1} for current instance based on linear combination of given weight parameters """ fv = instance.get_fv() score = dot(weights, fv) return Prediction(score) def decode(self, instance): """ return prediction for instance given current model""" if not self._feature_alphabet.locked(): self.resize_weights(instance) return self._decode(instance, self.get_model()) def predict(self, instance): """ return prediction (scored labels) for instance """ ws = self._get_model() prediction = self._decode(instance, ws) return prediction def classify(self, instance): """ return highest-scoring outcome along with its score for instance according to current weight vectors.""" ws = self._get_model() prediction = self._decode(instance, ws) return prediction.get_pred() # (label,score) def test(self, test_sample, sink): """ evaluate classifier on test sample """ start_time = time.time() sink.set_labels = [POS_LAB, NEG_LAB] # read in data stream = self._get_test_stream(test_sample) print >> sys.stderr, "-" * 100 print >> sys.stderr, "Testing...", # make predictions on test sample for inst in stream: true_label = inst.get_target_label() pred_label, score = self.classify(inst) # store label pair sink.update(true_label, pred_label, score) stream.close() elapsed_time = time.time() - start_time print >> sys.stderr, "done in %s sec." % (round(elapsed_time, 3)) return
class Binary( object ): """ Abstract linear classifier (in primal form) for binary problems.""" def __init__( self, feature_alphabet=None, bias=False ): self._feature_alphabet = feature_alphabet if self._feature_alphabet == None: self._feature_alphabet = Alphabet() self._weights = None self._init_model() self._bias = bias return def _init_model(self): """ initialize weights to 0 """ m = self._feature_alphabet.size() self._weights = zeros( m, 'd' ) return def set_alphabet(self, feature_alphabet ): """ set alphabet and (re-)initialize weights accordingly""" m = feature_alphabet.size() assert m >= 1 or not feature_alphabet.locked(), "Feature alphabet has %s size." %m self._feature_alphabet = feature_alphabet self._init_model() return def get_alphabet(self): return self._feature_alphabet def get_model(self): """ return current model """ return self._get_model() def _get_model(self): w = self._weights return w def set_model( self, weight_dict ): """ set model weights from dictionaries""" self._set_weights( weight_dict ) return def _set_weights( self, weight_dict ): """ set model weight vectors from weight dictionary""" assert isinstance(weight_dict,dict) weights = self._weights feat_alpha = self._feature_alphabet for f in weight_dict: fidx = feat_alpha[f] weights[fidx] = weight_dict[f] return def learn( self, train_sample, epochs ): raise NotImplementedError def _get_train_stream( self, data ): """ returns training instances stream from data file name or data source""" feature_alphabet = self._feature_alphabet if isinstance(data,str): stream = Source(data, feature_alphabet=feature_alphabet, alphabet_lock=False, alphabet_pop=False, bias=self._bias) elif isinstance(data,Source): stream = data elif callable(data): stream = Source(data, feature_alphabet=feature_alphabet, alphabet_lock=False, alphabet_pop=False, bias=self._bias) else: raise Exception("Error: data is either string for file name or ClassificationSource!") # set alphabet from data self.set_alphabet( stream.get_alphabet() ) return stream def _get_test_stream( self, data ): """ returns test instances stream from data file name or data source""" if isinstance(data,str): stream = Source( data, alphabet_lock=True,\ alphabet_pop=False, bias=self._bias ) elif isinstance(data,Source): stream = data else: raise Exception("Error: data is either string for file name or ClassificationSource!") # use model alphabet stream.set_alphabet( self.get_alphabet() ) return stream def resize_weights(self, instance): if len(self._weights) != self._feature_alphabet.size(): self._weights.resize(self._feature_alphabet.size()) return def update( self, instance, prediction, rate=1.0 ): raise NotImplementedError def _decode( self, instance, weights ): """ return prediction in {-1, 1} for current instance based on linear combination of given weight parameters """ fv = instance.get_fv() score = dot( weights, fv ) return Prediction( score ) def decode( self, instance ): """ return prediction for instance given current model""" if not self._feature_alphabet.locked(): self.resize_weights(instance) return self._decode( instance, self.get_model() ) def predict( self, instance ): """ return prediction (scored labels) for instance """ ws = self._get_model() prediction = self._decode( instance, ws ) return prediction def classify( self, instance ): """ return highest-scoring outcome along with its score for instance according to current weight vectors.""" ws = self._get_model() prediction = self._decode( instance, ws ) return prediction.get_pred() # (label,score) def test( self, test_sample, sink ): """ evaluate classifier on test sample """ start_time = time.time() sink.set_labels = [POS_LAB,NEG_LAB] # read in data stream = self._get_test_stream( test_sample ) print >> sys.stderr, "-"*100 print >> sys.stderr, "Testing...", # make predictions on test sample for inst in stream: true_label = inst.get_target_label() pred_label, score = self.classify( inst ) # store label pair sink.update( true_label, pred_label, score ) stream.close() elapsed_time = time.time()-start_time print >> sys.stderr, "done in %s sec." %(round(elapsed_time,3)) return