Esempio n. 1
0
 def __init__(self, moniker):
     self.moniker = moniker
     self.log = VPLogger()
     self.model = VW(vw = "/usr/local/bin/vw", \
                     moniker=moniker, \
                     logger=self.log, \
                     **{'passes': 10,
                        'learning_rate': 15,
                        'power_t': 1.0, })
Esempio n. 2
0
class SimpleModel(object):
    def __init__(self, moniker):
        self.moniker = moniker
        self.log = VPLogger()
        self.model = VW(vw = "/usr/local/bin/vw", \
                        moniker=moniker, \
                        logger=self.log, \
                        **{'passes': 10,
                           'learning_rate': 15,
                           'power_t': 1.0, })

    def train(self, instance_stream):
        """
        Trains the model on the given data stream.
        """
        self.log.info('%s: training' % (self.moniker))
        with self.model.training():
            seen = 0
            for instance in instance_stream:
                self.model.push_instance(instance)
                seen += 1
                if seen % 10000 == 0:
                    self.log.debug('streamed %d instances...' % seen)
            self.log.debug('done streaming.')
        self.log.info('%s: trained on %d data points' % (self.moniker, seen))
        return self

    def predict(self, instance_stream):
        self.log.info('%s: predicting' % self.moniker)
        instances = []
        with self.model.predicting():
            seen = 0
            for instance in instance_stream:
                self.model.push_instance(instance)
                instances.append(instance)
                seen += 1

        self.log.info('%s: predicted for %d data points' % (self.moniker, seen))
        predictions = list(self.model.read_predictions_())
        if seen != len(predictions):
            raise Exception("Number of labels and predictions do not match!  (%d vs %d)" % \
                (seen, len(predictions)))
        return itertools.izip(instances, predictions)