def refresh(self):

        self.dl.refresh()
        self.minibatch = FLQ(self.bs)
        self.data = None
        self.mean = np.zeros((1, self.dl.cols()))
        self.num_rounds = 0
Beispiel #2
0
    def __init__(self, d, window=1, reg=10**(-5)):

        self.d = d
        self.window = window
        self.reg = reg

        self.q = FLQ(self.window)
        self.gram = np.identity(self.d) * self.reg
        self.num_rounds = 0
        self.num_examples = 0
Beispiel #3
0
class BoxcarGramServer:
    def __init__(self, d, window=1, reg=10**(-5)):

        self.d = d
        self.window = window
        self.reg = reg

        self.q = FLQ(self.window)
        self.gram = np.identity(self.d) * self.reg
        self.num_rounds = 0
        self.num_examples = 0

    def get_gram(self, batch):

        update = np.dot(batch.T, batch)

        if self.q.is_full():
            self.gram += update
            self.gram -= self.q.get_items()[0]
        else:
            self.gram += update

        self.num_rounds += 1
        self.q.enqueue(update)

        # Compute normalization constant
        num_examples = sum([item.shape[0] for item in self.q.get_items()])

        return np.copy(self.gram) / num_examples

    def get_status(self):

        return {
            'window': self.window,
            'reg': self.reg,
            'queue': self.q,
            'gram': self.gram,
            'num_rounds': self.num_rounds
        }
    def __init__(self, data_loader, batch_size, center=False, num_coords=None):

        self.dl = data_loader
        self.bs = batch_size
        self.center = center
        self.num_coords = num_coords

        self.num_rounds = 0
        self.num_stored_data = 0
        self.data = None
        self.unnormed_mean = np.zeros((1, self.dl.cols()))
        self.minibatch = FLQ(self.bs)
        self.num_missing_rows = 0
Beispiel #5
0
    def __init__(self, period, c, lower=None, verbose=False):

        self.period = period
        self.weight = period - int(period)
        self.c = c
        self.lower = lower
        self.verbose = verbose

        q_length = int(self.period)

        if self.weight > 0:
            q_length += 1

        self.window = FLQ(q_length)
        self.num_rounds = 0
Beispiel #6
0
class PeriodicParameterProximalOptimizer:
    def __init__(self, period, c, lower=None, verbose=False):

        self.period = period
        self.weight = period - int(period)
        self.c = c
        self.lower = lower
        self.verbose = verbose

        q_length = int(self.period)

        if self.weight > 0:
            q_length += 1

        self.window = FLQ(q_length)
        self.num_rounds = 0

    def get_update(self, parameters, gradient, eta):

        unscaled = parameters - eta * gradient

        if self.lower is not None:
            unscaled = get_st(unscaled, lower=self.lower)

        if self.window.is_full():
            last_period = None

            if self.weight > 0:
                last = self.weight * self.window.dequeue()
                second_last = self.window.get_items()[-1]
                last_period = last + second_last
            else:
                last_period = self.window.dequeue()

            unscaled += self.c * last_period

        self.window.enqueue(np.copy(parameters))
        self.num_rounds += 1
        other_c = eta**(-1)

        return (other_c + other_c * self.c)**(-1) * unscaled

    def get_status(self):

        return {
            'period': self.period,
            'c': self.c,
            'verbose': self.verbose,
            'window': self.window,
            'num_rounds': self.num_rounds
        }
class Minibatch2Minibatch:
    def __init__(self, data_loader, batch_size, center=False, num_coords=None):

        self.dl = data_loader
        self.bs = batch_size
        self.center = center
        self.num_coords = num_coords

        self.num_rounds = 0
        self.num_stored_data = 0
        self.data = None
        self.unnormed_mean = np.zeros((1, self.dl.cols()))
        self.minibatch = FLQ(self.bs)
        self.num_missing_rows = 0

    def get_data(self):

        self.num_rounds += 1

        return self._get_minibatch()

    def _get_minibatch(self):

        if self.data is None:
            self.data = self.dl.get_data()

            if isinstance(self.data, MissingData):
                info = self.data.get_status()
                self.num_missing_rows = info['num_missing_rows']
            elif self.center:
                self.unnormed_mean += np.sum(self.data, axis=0)
                self.num_stored_data += self.data.shape[0]
                self.data -= self.unnormed_mean / self.num_stored_data

        batch = None

        if not isinstance(self.data, MissingData):
            n = self.data.shape[0]
            need = max([self.bs - self.minibatch.get_length(), 1])

            for i in range(min([n, need])):
                self.minibatch.enqueue(np.copy(self.data[i, :]))

            self.data = None if n <= need else self.data[need:, :]

            if not self.minibatch.is_full():
                batch = self._get_minibatch()
            else:
                items = self.minibatch.get_items()
                data_array = np.array(items)
                batch = data_array \
                    if self.num_coords is None else \
                    self._get_avgd(data_array)
        elif self.num_missing_rows > 0:
            batch = self.data
            self.num_missing_rows -= 1
        else:
            self.data = None
            batch = self._get_minibatch()

        return batch

    # TODO: consider moving this to drrobert
    def _get_avgd(self, batch):

        new_batch = np.zeros((self.bs, self.num_coords))
        sample_size = self.cols() / self.num_coords

        for i in range(self.num_coords):
            begin = i * sample_size
            end = begin + sample_size

            if end + sample_size > batch.shape[1] + 1:
                new_batch[:, i] = np.mean(batch[:, begin:], axis=1)
            else:
                new_batch[:, i] = np.mean(batch[:, begin:end], axis=1)

        return new_batch

    def rows(self):

        return self.num_rounds

    def cols(self):

        cols = self.dl.cols()

        if self.num_coords is not None:
            cols = self.num_coords

        return cols

    def finished(self):

        return self.dl.finished()

    def refresh(self):

        self.dl.refresh()
        self.minibatch = FLQ(self.bs)
        self.data = None
        self.mean = np.zeros((1, self.dl.cols()))
        self.num_rounds = 0

    def get_status(self):

        return {
            'data_loader': self.dl,
            'batch_size': self.bs,
            'minibatch': self.minibatch,
            'data': self.data,
            'online': True,
            'num_stored_data': self.num_stored_data,
            'num_rounds': self.num_rounds
        }