class GaussianTheta(PhaseSpace):
    """Longitudinal Gaussian particle phase space distribution."""

    def __init__(self, sigma_theta, sigma_dE, is_accepted=None, generator_seed=None):

        self.sigma_theta = sigma_theta
        self.sigma_dE = sigma_dE
        self.is_accepted = is_accepted

        self.random_state = RandomState()
        self.random_state.seed(generator_seed)

    def generate(self, beam):
        beam.theta = self.sigma_theta * self.random_state.randn(beam.n_macroparticles)
        beam.delta_E = self.sigma_dE * self.random_state.randn(beam.n_macroparticles)
        if self.is_accepted:
            self._redistribute(beam)

    def _redistribute(self, beam):
        n = beam.n_macroparticles
        theta = beam.theta.copy()
        delta_E = beam.delta_E.copy()
        for i in xrange(n):
            while not self.is_accepted(theta[i], delta_E[i]):
                theta[i]  = self.sigma_theta * self.random_state.randn()
                delta_E[i] = self.sigma_dE * self.random_state.randn()
        beam.theta = theta
        beam.delta_E = delta_E
class GaussianX(PhaseSpace):
    """Horizontal Gaussian particle phase space distribution."""

    def __init__(self, sigma_x, sigma_xp, generator_seed=None):
        """Initiates the horizontal beam coordinates
        to the given Gaussian shape.
        """
        self.sigma_x  = sigma_x
        self.sigma_xp = sigma_xp

        self.random_state = RandomState()
        self.random_state.seed(generator_seed)

    @classmethod
    def from_optics(cls, alpha_x, beta_x, epsn_x, betagamma, generator_seed=None):
        """Initialise GaussianX from the given optics functions.
        beta_x is given in meters and epsn_x in micrometers.
        """

        sigma_x  = np.sqrt(beta_x * epsn_x * 1e-6 / betagamma)
        sigma_xp = sigma_x / beta_x

        return cls(sigma_x, sigma_xp, generator_seed)

    def generate(self, beam):
        beam.x = self.sigma_x * self.random_state.randn(beam.n_macroparticles)
        beam.xp = self.sigma_xp * self.random_state.randn(beam.n_macroparticles)
Example #3
0
class RandomGenerator(object):
    def __init__(self, seed=None):
        self._random = RandomState(seed=seed)

    def seed(self, seed):
        self._random.seed(seed)

    def random(self):
        return self._random.rand()

    def randint(self, a, b=None):
        if b is None:
            b = a
            a = 0
        r = self._random.randint(a, high=b, size=1)
        return r[0]

    def sample(self, population, k):
        if k == 0:
            return []
        return list(self._random.choice(population, size=k, replace=False))

    def __getattr__(self, attr):
        return getattr(self._random, attr)

    def __getstate__(self):
        return {'_random': self._random}

    def __setstate__(self, d):
        self._random = d['_random']

    def uniform(self, low=0.0, high=1.0, size=None):
        return self._random.uniform(low, high, size)
def wrapper(n: int,
            trend: Literal["n", "c", "ct", "ctt"],
            b: int,
            rng_seed: int = 0) -> ndarray:
    """
    Wraps and blocks the main simulation so that the maximum amount of memory
    can be controlled on multi processor systems when executing in parallel
    """
    rng = RandomState()
    rng.seed(rng_seed)
    remaining = b
    res = zeros(b)
    finished = 0
    block_size = int(2**20.0 * MAX_MEMORY_SIZE / (8.0 * n))
    for _ in range(0, b, block_size):
        if block_size < remaining:
            count = block_size
        else:
            count = remaining
        st = finished
        en = finished + count
        res[st:en] = adf_simulation(n, trend, count, rng)
        finished += count
        remaining -= count

    return res
Example #5
0
class BulkRandomGenerator(object):
    """Bulk generator of random integers for tournament seeding and
    reproducibility. Bulk generation of random values is more efficient.
    Use this class like a generator."""
    def __init__(self, seed=None, batch_size: int = 1000):
        self._random_generator = RandomState()
        self._random_generator.seed(seed)
        self._ints = None
        self._batch_size = batch_size
        self._index = 0
        self._fill_ints()

    def _fill_ints(self):
        # Generate more random values. Store as a list since generators
        # cannot be pickled.
        self._ints = self._random_generator.randint(low=0,
                                                    high=2**32 - 1,
                                                    size=self._batch_size,
                                                    dtype="uint64")
        self._index = 0

    def __next__(self):
        try:
            x = self._ints[self._index]
        except IndexError:
            self._fill_ints()
            x = self._ints[self._index]
        self._index += 1
        return x
class GaussianY(PhaseSpace):
    """Vertical Gaussian particle phase space distribution."""

    def __init__(self, sigma_y, sigma_yp, generator_seed=None):
        """Initiates the vertical beam coordinates
        to the given Gaussian shape.
        """
        self.sigma_y  = sigma_y
        self.sigma_yp = sigma_yp

        self.random_state = RandomState()
        self.random_state.seed(generator_seed)

    @classmethod
    def from_optics(cls, alpha_y, beta_y, epsn_y, betagamma, generator_seed=None):
        """Initialise GaussianY from the given optics functions.
        beta_y is given in meters and epsn_y in micrometers.
        """

        sigma_y  = np.sqrt(beta_y * epsn_y * 1e-6 / betagamma)
        sigma_yp = sigma_y / beta_y

        return cls(sigma_y, sigma_yp, generator_seed)

    def generate(self, beam):
        beam.y = self.sigma_y * self.random_state.randn(beam.n_macroparticles)
        beam.yp = self.sigma_yp * self.random_state.randn(beam.n_macroparticles)
Example #7
0
def rws_test():
    size = 10000
    selection = 1000
    random_state = RandomState()
    probs = random_state.uniform(size=size)
    probs /= sum(probs)

    random_state.seed(5)

    def standard_method():
        t.tic()
        result = []
        cum_probs = np.cumsum(probs)
        for _ in range(selection):
            r = random_state.random()
            for i in range(size):
                if r <= cum_probs[i]:
                    result.append(i)
                    break
        return result

    def numpy_method():
        return random_state.choice(size, size=selection, replace=True, p=probs)

    t = TicToc()
    t.tic()
    result_standard_method = standard_method()
    elp_std = t.tocvalue(restart=True)
    result_numpy_method = numpy_method()
    elp_np = t.tocvalue()
    print('standard: {}'.format(elp_std))
    print('numpy: {}'.format(elp_np))
    print(result_numpy_method)
    print(result_standard_method)
Example #8
0
    def seed(self, seed=None):
        """ Seed the generator.

        seed can be an integer, an array (or other sequence) of integers of any
        length, or None. If seed is None, then RandomState will try to read data
        from /dev/urandom (or the Windows analogue) if available or seed from
        the clock otherwise.
        """
        RandomState.seed(self, seed)
        self.initial_seed = seed
class GaussianZ(PhaseSpace):
    """Longitudinal Gaussian particle phase space distribution."""

    def __init__(self, sigma_z, sigma_dp, is_accepted=None, generator_seed=None):
        """Initiates the longitudinal beam coordinates to a given
        Gaussian shape. If the argument is_accepted is set to
        the is_in_separatrix(z, dp, beam) method of a RFSystems
        object (or similar), macroparticles will be initialised
        until is_accepted returns True.
        """
        self.sigma_z = sigma_z
        self.sigma_dp = sigma_dp
        self.is_accepted = is_accepted

        self.random_state = RandomState()
        self.random_state.seed(generator_seed)

    @classmethod
    def from_optics(cls, beta_z, epsn_z, p0, is_accepted=None,
                    generator_seed=None):
        """Initialise GaussianZ from the given optics functions.
        For the argument is_accepted see __init__.
        """

        sigma_z = np.sqrt(beta_z*epsn_z/(4*np.pi) * e/p0)
        sigma_dp = sigma_z / beta_z

        return cls(sigma_z, sigma_dp, is_accepted, generator_seed)

    def generate(self, beam):
        beam.z = self.sigma_z * self.random_state.randn(beam.n_macroparticles)
        beam.dp = self.sigma_dp * self.random_state.randn(beam.n_macroparticles)
        if self.is_accepted:
            self._redistribute(beam)

    def _redistribute(self, beam):
        n = beam.n_macroparticles
        z = beam.z.copy()
        dp = beam.dp.copy()

        mask_out = ~self.is_accepted(z, dp)
        while mask_out.any():
            n_gen = np.sum(mask_out)
            z[mask_out] = self.sigma_z * self.random_state.randn(n_gen)
            dp[mask_out] = self.sigma_dp * self.random_state.randn(n_gen)
            mask_out = ~self.is_accepted(z, dp)
            print 'Reiterate on non-accepted particles'

        # for i in xrange(n):
        #     while not self.is_accepted(z[i], dp[i]):
        #         z[i]  = self.sigma_z * self.random_state.randn()
        #         dp[i] = self.sigma_dp * self.random_state.randn()

        beam.z = z
        beam.dp = dp
def test_segmentation():
    PRNG = RandomState()
    PRNG2 = RandomState()
    if args.seed > 0:
        PRNG.seed(args.seed)
        PRNG2.seed(args.seed)

    transform = Compose(
        [
            [ColorJitter(prob=0.75), None],
            Merge(),
            Expand((0.8, 1.5)),
            RandomCompose([
                # RandomResize(1, 1.5),
                RandomRotate(10),
                RandomShift(0.1)
            ]),
            Scale(300),
            # ElasticTransform(100),
            RandomCrop(300),
            HorizontalFlip(),
            Split([0, 3], [3, 6]),
            #[SubtractMean(mean=VOC.MEAN), None],
        ],
        PRNG,
        border='constant',
        fillval=VOC.MEAN,
        anchor_index=3)

    voc_dataset = VOCSegmentation(root=args.root,
                                  image_set=[('2007', 'trainval')],
                                  transform=transform,
                                  instance=False)
    viz = Viz()

    results = []
    count = 0
    i = PRNG2.choice(len(voc_dataset))
    for _ in range(1000):
        img, target = voc_dataset[i]
        img2 = viz.blend_segmentation(img, target)

        con = np.hstack([img, target, img2])
        results.append(con)
        cv2.imshow('result', con[..., ::-1])
        c = cv2.waitKey(500)

        if c == 27 or c == ord('q'):  # ESC / 'q'
            break
        elif c == ord('c') or count >= 3:
            count = 0
            i = PRNG2.choice(len(voc_dataset))
        count += 1
Example #11
0
def test_random_choice():
    """nestle.random_choice() is designed to mimic np.random.choice(),
    for numpy < v1.7.0. In cases where we have both, test that they agree.
    """
    rstate = RandomState(0)
    p = rstate.rand(10)
    p /= p.sum()
    for seed in range(10):
        rstate.seed(seed)
        i = rstate.choice(10, p=p)
        rstate.seed(seed)
        j = nestle.random_choice(10, p=p, rstate=rstate)
        assert i == j
Example #12
0
def test_random_choice():
    """nestle.random_choice() is designed to mimic np.random.choice(),
    for numpy < v1.7.0. In cases where we have both, test that they agree.
    """
    rstate = RandomState(0)
    p = rstate.rand(10)
    p /= p.sum()
    for seed in range(10):
        rstate.seed(seed)
        i = rstate.choice(10, p=p)
        rstate.seed(seed)
        j = nestle.random_choice(10, p=p, rstate=rstate)
        assert i == j
Example #13
0
def pseudorandom(sequence, seed=None):
    '''
    Returns a randomly selected element from the sequence.
    '''
    # We need to create a stand-alone generator that cannot be affected by other
    # parts of the code that may require random data (e.g. noise).
    from numpy.random import RandomState
    state = RandomState()
    state.seed(seed)
    n = len(sequence)
    while True:
        i = state.randint(0, n)
        yield sequence[i]
Example #14
0
def pseudorandom(sequence, seed=None):
    '''
    Returns a randomly selected element from the sequence.
    '''
    # We need to create a stand-alone generator that cannot be affected by other
    # parts of the code that may require random data (e.g. noise).
    from numpy.random import RandomState
    state = RandomState()
    state.seed(seed)
    n = len(sequence)
    while True:
        i = state.randint(0, n)
        yield sequence[i]
Example #15
0
def test_bboxes():
    PRNG = RandomState()
    PRNG2 = RandomState()
    if args.seed > 0:
        PRNG.seed(args.seed)
        PRNG2.seed(args.seed)

    transform = Compose(
        [
            [ColorJitter(prob=0.5)],  # or write [ColorJitter(), None]
            BoxesToCoords(),
            HorizontalFlip(),
            Expand((1, 4), prob=0.5),
            ObjectRandomCrop(),
            Resize(300),
            CoordsToBoxes(),
            #[SubtractMean(mean=VOC.MEAN)],
        ],
        PRNG,
        mode=None,
        fillval=VOC.MEAN,
        outside_points='clamp')

    viz = Viz()
    voc_dataset = VOCDetection(root=args.root,
                               image_set=[('2007', 'trainval')],
                               keep_difficult=True,
                               transform=transform)

    results = []
    count = 0
    i = PRNG2.choice(len(voc_dataset))
    for _ in range(100):
        img, boxes, labels = voc_dataset[i]
        if len(labels) == 0:
            continue

        img = viz.draw_bbox(img, boxes, labels, True)
        results.append(img)
        cv2.imshow('0', img[:, :, ::-1])
        c = cv2.waitKey(500)
        if c == 27 or c == ord('q'):  # ESC / 'q'
            break
        elif c == ord('c') or count >= 5:
            count = 0
            i = PRNG2.choice(len(voc_dataset))
        count += 1
Example #16
0
def wrapper(nobs: int,
            b: int,
            trend: str = "c",
            max_memory: int = 1024) -> np.ndarray:
    """
    A wrapper around the main simulation that runs it in blocks so that large
    simulations can be run without constructing very large arrays and running
    out of memory.
    """
    rng = RandomState()
    rng.seed(0)
    memory = max_memory * 2**20
    b_max_memory = memory // 8 // nobs
    b_max_memory = max(b_max_memory, 1)
    remaining = b
    results = np.zeros(b)
    now = dt.datetime.now()
    time_fmt = "{0:d}:{1:0>2d}:{2:0>2d}"
    msg = "trend {0}, {1} reps remaining, " + "elapsed {2}, remaining {3}"
    while remaining > 0:
        b_eff = min(remaining, b_max_memory)
        completed = b - remaining
        results[completed:completed + b_eff] = simulate_kpss(nobs,
                                                             b_eff,
                                                             trend=trend,
                                                             rng=rng)
        remaining -= b_max_memory
        elapsed = (dt.datetime.now() - now).total_seconds()
        expected_remaining = max(0, remaining) * (elapsed / (b - remaining))

        m, s = divmod(int(elapsed), 60)
        h, m = divmod(m, 60)
        elapsed_fmt = time_fmt.format(h, m, s)

        m, s = divmod(int(expected_remaining), 60)
        h, m = divmod(m, 60)
        expected_remaining_fmt = time_fmt.format(h, m, s)

        print(
            msg.format(trend, max(0, remaining), elapsed_fmt,
                       expected_remaining_fmt))

    return results
Example #17
0
def pseudorandom(sequence, c=np.inf, key=None, seed=None):
    '''
    Returns a randomly selected element from the sequence.

    Parameters
    ----------
    {common_docstring}
    seed : int
        Seed for random number generator.
    '''
    # We need to create a stand-alone generator that cannot be affected by
    # other parts of the code that may require random data (e.g. noise).
    state = RandomState()
    state.seed(seed)
    n = len(sequence)
    cycle = 0
    while cycle < c:
        i = state.randint(0, n)
        yield sequence[i]
        cycle += 1
Example #18
0
def shuffled_set(sequence, c=np.inf, key=None, seed=None):
    '''
    Returns a randomly selected element from the sequence and removes it from
    the sequence.  Once the sequence is exhausted, repopulate list with the
    original sequence.

    Parameters
    ----------
    {common_docstring}
    seed : int
        Seed for random number generator.
    '''
    cycle = 0
    state = RandomState()
    state.seed(seed)
    while cycle < c:
        indices = list(range(len(sequence)))
        state.shuffle(indices)
        for i in indices:
            yield sequence[i]
        cycle += 1
def wrapper(nobs, b, trend='c', max_memory=250):
    """
    A wrapper around the main simulation that runs it in blocks so that large
    simulations can be run without constructing very large arrays and running
    out of memory.
    """
    rng = RandomState()
    rng.seed(0)
    memory = max_memory * 2 ** 20
    b_max_memory = memory // 8 // nobs
    b_max_memory = max(b_max_memory, 1)
    remaining = b
    results = np.zeros(b)
    while remaining > 0:
        b_eff = min(remaining, b_max_memory)
        completed = b - remaining
        results[completed:completed + b_eff] = \
            simulate_kpss(nobs, b_eff, trend=trend, rng=rng)
        remaining -= b_max_memory

    return results
def wrapper(nobs, b, trend='c', max_memory=250):
    """
    A wrapper around the main simulation that runs it in blocks so that large
    simulations can be run without constructing very large arrays and running
    out of memory.
    """
    rng = RandomState()
    rng.seed(0)
    memory = max_memory * 2**20
    b_max_memory = memory // 8 // nobs
    b_max_memory = max(b_max_memory, 1)
    remaining = b
    results = np.zeros(b)
    while remaining > 0:
        b_eff = min(remaining, b_max_memory)
        completed = b - remaining
        results[completed:completed + b_eff] = \
            simulate_kpss(nobs, b_eff, trend=trend, rng=rng)
        remaining -= b_max_memory

    return results
def simulate_kpss(nobs, B, trend='c', rng=None):
    """
    Simulated the KPSS test statistic for nobs observations,
    performing B replications.
    """
    if rng is None:
        rng = RandomState()
        rng.seed(0)

    standard_normal = rng.standard_normal

    e = standard_normal((nobs, B))
    z = np.ones((nobs, 1))
    if trend == 'ct':
        z = add_trend(z, trend='t')
    zinv = np.linalg.pinv(z)
    trend_coef = zinv.dot(e)
    resid = e - z.dot(trend_coef)
    s = np.cumsum(resid, axis=0)
    lam = np.mean(resid ** 2.0, axis=0)
    kpss = 1 / (nobs ** 2.0) * np.sum(s ** 2.0, axis=0) / lam
    return kpss
Example #22
0
def simulate_kpss(nobs, B, trend='c', rng=None):
    """
    Simulated the KPSS test statistic for nobs observations,
    performing B replications.
    """
    if rng is None:
        rng = RandomState()
        rng.seed(0)

    standard_normal = rng.standard_normal

    e = standard_normal((nobs, B))
    z = np.ones((nobs, 1))
    if trend == 'ct':
        z = add_trend(z, trend='t')
    zinv = np.linalg.pinv(z)
    trend_coef = zinv.dot(e)
    resid = e - z.dot(trend_coef)
    s = np.cumsum(resid, axis=0)
    lam = np.mean(resid ** 2.0, axis=0)
    kpss = 1 / (nobs ** 2.0) * np.sum(s ** 2.0, axis=0) / lam
    return kpss
def wrapper(n, trend, b, seed=0):
    """
    Wraps and blocks the main simulation so that the maximum amount of memory
    can be controlled on multi processor systems when executing in parallel
    """
    rng = RandomState()
    rng.seed(seed)
    remaining = b
    res = zeros(b)
    finished = 0
    block_size = int(2 ** 20.0 * MAX_MEMORY_SIZE / (8.0 * n))
    for j in range(0, b, block_size):
        if block_size < remaining:
            count = block_size
        else:
            count = remaining
        st = finished
        en = finished + count
        res[st:en] = adf_simulation(n, trend, count, rng)
        finished += count
        remaining -= count

    return res
Example #24
0
class Streams(object):
    def __init__(self, startscape_seed):
        self.startscape_rand = RandomState()
        self.startscape_rand.seed(startscape_seed)

    ##esta funcion hace que todos los delays sena cero
    # def generate_startscape_rand(self,members):
    #     if members['kids']==0 and members['olds']==0:
    #         stratscape_vals=np.arange(1)
    #         startscape_prob=[1]
    #     elif members['kids']>0 and members['olds']==0:
    #         stratscape_vals=np.arange(1)
    #         startscape_prob=[1]
    #     elif members['kids']==0 and members['olds']>0:
    #         stratscape_vals=np.arange(1)
    #         startscape_prob=[1]
    #     else:
    #         stratscape_vals=np.arange(1)
    #         startscape_prob=[1]
    #     return(self.startscape_rand.choice(stratscape_vals,p=startscape_prob))

    #esta funcion hace que todos los delays sean segun una funcion de dist
    def generate_startscape_rand(self, members):
        if members['kids'] == 0 and members['olds'] == 0:
            stratscape_vals = np.arange(2, 10)
            startscape_prob = (0.2, 0.3, 0.3, 0.15, 0.05, 0.0, 0.0, 0.0)
        elif members['kids'] > 0 and members['olds'] == 0:
            stratscape_vals = np.arange(2, 10)
            startscape_prob = (0.0, 0.1, 0.15, 0.30, 0.3, 0.15, 0.0, 0.0)
        elif members['kids'] == 0 and members['olds'] > 0:
            stratscape_vals = np.arange(2, 10)
            startscape_prob = (0.0, 0.0, 0.0, 0.1, 0.3, 0.3, 0.15, 0.15)
        else:
            stratscape_vals = np.arange(2, 10)
            startscape_prob = (0.0, 0.0, 0.0, 0.0, 0.2, 0.3, 0.3, 0.2)
        return (self.startscape_rand.choice(stratscape_vals,
                                            p=startscape_prob))
def wrapper(nobs, b, trend='c', max_memory=1024):
    """
    A wrapper around the main simulation that runs it in blocks so that large
    simulations can be run without constructing very large arrays and running
    out of memory.
    """
    rng = RandomState()
    rng.seed(0)
    memory = max_memory * 2 ** 20
    b_max_memory = memory // 8 // nobs
    b_max_memory = max(b_max_memory, 1)
    remaining = b
    results = np.zeros(b)
    now = dt.datetime.now()
    time_fmt = '{0:d}:{1:0>2d}:{2:0>2d}'
    msg = 'trend {0}, {1} reps remaining, ' + \
          'elapsed {2}, remaining {3}'
    while remaining > 0:
        b_eff = min(remaining, b_max_memory)
        completed = b - remaining
        results[completed:completed + b_eff] = \
            simulate_kpss(nobs, b_eff, trend=trend, rng=rng)
        remaining -= b_max_memory
        elapsed = (dt.datetime.now() - now).total_seconds()
        expected_remaining = max(0, remaining) * (elapsed / (b - remaining))

        m, s = divmod(int(elapsed), 60)
        h, m = divmod(m, 60)
        elapsed = time_fmt.format(h, m, s)

        m, s = divmod(int(expected_remaining), 60)
        h, m = divmod(m, 60)
        expected_remaining = time_fmt.format(h, m, s)

        print(msg.format(trend, max(0, remaining), elapsed, expected_remaining))

    return results
Example #26
0
class IIDBootstrap(object):
    """
    Bootstrap using uniform resampling

    Parameters
    ----------
    args
        Positional arguments to bootstrap
    kwargs
        Keyword arguments to bootstrap

    Attributes
    ----------
    index : array
        The current index of the bootstrap
    data : tuple
        Two-element tuple with the pos_data in the first position and kw_data
        in the second (pos_data, kw_data)
    pos_data : tuple
        Tuple containing the positional arguments (in the order entered)
    kw_data : dict
        Dictionary containing the keyword arguments
    random_state : RandomState
        RandomState instance used by bootstrap

    Notes
    -----
    Supports numpy arrays and pandas Series and DataFrames.  Data returned has
    the same type as the input date.

    Data entered using keyword arguments is directly accessibly as an
    attribute.

    Examples
    --------
    Data can be accessed in a number of ways.  Positional data is retained in
    the same order as it was entered when the bootstrap was initialized.
    Keyword data is available both as an attribute or using a dictionary syntax
    on kw_data.

    >>> from arch.bootstrap import IIDBootstrap
    >>> from numpy.random import standard_normal
    >>> y = standard_normal((500, 1))
    >>> x = standard_normal((500,2))
    >>> z = standard_normal(500)
    >>> bs = IIDBootstrap(x, y=y, z=z)
    >>> for data in bs.bootstrap(100):
    ...     bs_x = data[0][0]
    ...     bs_y = data[1]['y']
    ...     bs_z = bs.z
    """
    def __init__(self, *args, **kwargs):
        self.random_state = RandomState()
        self._initial_state = self.random_state.get_state()
        self._args = args
        self._kwargs = kwargs
        if args:
            self._num_items = len(args[0])
        elif kwargs:
            key = list(kwargs.keys())[0]
            self._num_items = len(kwargs[key])

        all_args = list(args)
        all_args.extend([v for v in itervalues(kwargs)])

        for arg in all_args:
            if len(arg) != self._num_items:
                raise ValueError("All inputs must have the same number of "
                                 "elements in axis 0")
        self._index = np.arange(self._num_items)

        self._parameters = []
        self._seed = None
        self.pos_data = args
        self.kw_data = kwargs
        self.data = (args, kwargs)

        self._base = None
        self._results = None
        self._studentized_results = None
        self._last_func = None
        self._name = 'IID Bootstrap'
        for key, value in iteritems(kwargs):
            attr = getattr(self, key, None)
            if attr is None:
                self.__setattr__(key, value)
            else:
                raise ValueError(key + ' is a reserved name')

    def __str__(self):
        repr = self._name
        repr += '(no. pos. inputs: ' + str(len(self.pos_data))
        repr += ', no. keyword inputs: ' + str(len(self.kw_data)) + ')'
        return repr

    def __repr__(self):
        return self.__str__()[:-1] + ', ID: ' + hex(id(self)) + ')'

    def _repr_html(self):
        html = '<strong>' + self._name + '</strong>('
        html += '<strong>no. pos. inputs</strong>: ' + str(len(self.pos_data))
        html += ', <strong>no. keyword inputs</strong>: ' + \
                str(len(self.kw_data))
        html += ', <strong>ID</strong>: ' + hex(id(self)) + ')'
        return html

    @property
    def index(self):
        """
        Returns the current index of the bootstrap
        """
        return self._index

    def get_state(self):
        """
        Gets the state of the bootstrap's random number generator

        Returns
        -------
        state : RandomState state vector
            Array containing the state
        """
        return self.random_state.get_state()

    def set_state(self, state):
        """
        Sets the state of the bootstrap's random number generator

        Parameters
        ----------
        state : RandomState state vector
            Array containing the state
        """

        return self.random_state.set_state(state)

    def seed(self, value):
        """
        Seeds the bootstrap's random number generator

        Parameters
        ----------
        value : int
            Integer to use as the seed
        """
        self._seed = value
        self.random_state.seed(value)
        return None

    def reset(self, use_seed=True):
        """
        Resets the bootstrap to either its initial state or the last seed.

        Parameters
        ----------
        use_seed : bool, optional
            Flag indicating whether to use the last seed if provided.  If
            False or if no seed has been set, the bootstrap will be reset
            to the initial state.  Default is True
        """
        self._index = np.arange(self._num_items)
        self._resample()
        self.random_state.set_state(self._initial_state)
        if use_seed and self._seed is not None:
            self.seed(self._seed)
        return None

    def bootstrap(self, reps):
        """
        Iterator for use when bootstrapping

        Parameters
        ----------
        reps : int
            Number of bootstrap replications

        Example
        -------
        The key steps are problem dependent and so this example shows the use
        as an iterator that does not produce any output

        >>> from arch.bootstrap import IIDBootstrap
        >>> import numpy as np
        >>> bs = IIDBootstrap(np.arange(100), x=np.random.randn(100))
        >>> for posdata, kwdata in bs.bootstrap(1000):
        ...     # Do something with the positional data and/or keyword data
        ...     pass

        .. note::

            Note this is a generic example and so the class used should be the
            name of the required bootstrap

        Notes
        -----
        The iterator returns a tuple containing the data entered in positional
        arguments as a tuple and the data entered using keywords as a
        dictionary
        """
        for _ in range(reps):
            indices = np.asarray(self.update_indices())
            self._index = indices
            yield self._resample()

    def conf_int(self,
                 func,
                 reps=1000,
                 method='basic',
                 size=0.95,
                 tail='two',
                 extra_kwargs=None,
                 reuse=False,
                 sampling='nonparametric',
                 std_err_func=None,
                 studentize_reps=1000):
        """
        Parameters
        ----------
        func : callable
            Function the computes parameter values.  See Notes for requirements
        reps : int, optional
            Number of bootstrap replications
        method : string, optional
            One of 'basic', 'percentile', 'studentized', 'norm' (identical to
            'var', 'cov'), 'bc' (identical to 'debiased', 'bias-corrected'), or
            'bca'
        size : float, optional
            Coverage of confidence interval
        tail : string, optional
            One of 'two', 'upper' or 'lower'.
        reuse : bool, optional
            Flag indicating whether to reuse previously computed bootstrap
            results.  This allows alternative methods to be compared without
            rerunning the bootstrap simulation.  Reuse is ignored if reps is
            not the same across multiple runs, func changes across calls, or
            method is 'studentized'.
        sampling : string, optional
            Type of sampling to use: 'nonparametric', 'semi-parametric' (or
            'semi') or 'parametric'.  The default is 'nonparametric'.  See
            notes about the changes to func required when using 'semi' or
            'parametric'.
        extra_kwargs : dict, optional
            Extra keyword arguments to use when calling func and std_err_func,
            when appropriate
        std_err_func : callable, optional
            Function to use when standardizing estimated parameters when using
            the studentized bootstrap.  Providing an analytical function
            eliminates the need for a nested bootstrap
        studentize_reps : int, optional
            Number of bootstraps to use in the innter component when using the
            studentized bootstrap.  Ignored when ``std_err_func`` is provided

        Returns
        -------
        intervals : 2-d array
            Computed confidence interval.  Row 0 contains the lower bounds, and
            row 1 contains the upper bounds.  Each column corresponds to a
            parameter. When tail is 'lower', all upper bounds are inf.
            Similarly, 'upper' sets all lower bounds to -inf.

        Examples
        --------
        >>> import numpy as np
        >>> def func(x):
        ...     return x.mean(0)
        >>> y = np.random.randn(1000, 2)
        >>> from arch.bootstrap import IIDBootstrap
        >>> bs = IIDBootstrap(y)
        >>> ci = bs.conf_int(func, 1000)

        Notes
        -----
        When there are no extra keyword arguments, the function is called

        .. code:: python

            func(*args, **kwargs)

        where args and kwargs are the bootstrap version of the data provided
        when setting up the bootstrap.  When extra keyword arguments are used,
        these are appended to kwargs before calling func.

        The standard error function, if provided, must return a vector of
        parameter standard errors and is called

        .. code:: python

            std_err_func(params, *args, **kwargs)

        where ``params`` is the vector of estimated parameters using the same
        bootstrap data as in args and kwargs.

        The bootstraps are:

        * 'basic' - Basic confidence using the estimated parameter and
          difference between the estimated parameter and the bootstrap
          parameters
        * 'percentile' - Direct use of bootstrap percentiles
        * 'norm' - Makes use of normal approximation and bootstrap covariance
          estimator
        * 'studentized' - Uses either a standard error function or a nested
          bootstrap to estimate percentiles and the bootstrap covariance for
          scale
        * 'bc' - Bias corrected using estimate bootstrap bias correction
        * 'bca' - Bias corrected and accelerated, adding acceleration parameter
          to 'bc' method

        """
        studentized = 'studentized'
        if not 0.0 < size < 1.0:
            raise ValueError('size must be strictly between 0 and 1')
        tail = tail.lower()
        if tail not in ('two', 'lower', 'upper'):
            raise ValueError('tail must be one of two-sided, lower or upper')
        studentize_reps = studentize_reps if method == studentized else 0

        _reuse = False
        if reuse:
            # check conditions for reuse
            _reuse = (self._results is not None and len(self._results) == reps
                      and method != studentized and self._last_func is func)

        if not _reuse:
            if reuse:
                import warnings

                warn = 'The conditions to reuse the previous bootstrap has ' \
                       'not been satisfied. A new bootstrap will be used.'
                warnings.warn(warn, RuntimeWarning)
            self._construct_bootstrap_estimates(
                func,
                reps,
                extra_kwargs,
                std_err_func=std_err_func,
                studentize_reps=studentize_reps,  # noqa
                sampling=sampling)

        base, results = self._base, self._results
        studentized_results = self._studentized_results

        std_err = []
        if method in ('norm', 'var', 'cov', studentized):
            errors = results - results.mean(axis=0)
            std_err = np.sqrt(np.diag(errors.T.dot(errors) / reps))

        if tail == 'two':
            alpha = (1.0 - size) / 2
        else:
            alpha = (1.0 - size)

        percentiles = [alpha, 1.0 - alpha]
        norm_quantiles = stats.norm.ppf(percentiles)

        if method in ('norm', 'var', 'cov'):
            lower = base + norm_quantiles[0] * std_err
            upper = base + norm_quantiles[1] * std_err

        elif method in ('percentile', 'basic', studentized, 'debiased', 'bc',
                        'bias-corrected', 'bca'):
            values = results
            if method == studentized:
                # studentized uses studentized parameter estimates
                values = studentized_results

            if method in ('debiased', 'bc', 'bias-corrected', 'bca'):
                # bias corrected uses modified percentiles, but is
                # otherwise identical to the percentile method
                p = (results < base).mean(axis=0)
                b = stats.norm.ppf(p)
                b = b[:, None]
                if method == 'bca':
                    nobs = self._num_items
                    jk_params = _loo_jackknife(func, nobs, self._args,
                                               self._kwargs)
                    u = (nobs - 1) * (jk_params - base)
                    numer = np.sum(u**3, 0)
                    denom = 6 * (np.sum(u**2, 0)**(3.0 / 2.0))
                    small = denom < (np.abs(numer) * np.finfo(np.float64).eps)
                    if small.any():
                        message = 'Jackknife variance estimate {jk_var} is ' \
                                  'too small to use BCa'
                        raise RuntimeError(message.format(jk_var=denom))
                    a = numer / denom
                    a = a[:, None]
                else:
                    a = 0.0

                percentiles = stats.norm.cdf(b + (b + norm_quantiles) /
                                             (1.0 - a * (b + norm_quantiles)))
                percentiles = list(100 * percentiles)
            else:
                percentiles = [100 * p for p in percentiles]  # Rescale

            if method not in ('bc', 'debiased', 'bias-corrected', 'bca'):
                ci = np.asarray(np.percentile(values, percentiles, axis=0))
                lower = ci[0, :]
                upper = ci[1, :]
            else:
                k = values.shape[1]
                lower = np.zeros(k)
                upper = np.zeros(k)
                for i in range(k):
                    lower[i], upper[i] = np.percentile(values[:, i],
                                                       list(percentiles[i]))

            # Basic and studentized use the lower empirical quantile to
            # compute upper and vice versa.  Bias corrected and percentile use
            # upper to estimate the upper, and lower to estimate the lower
            if method == 'basic':
                lower_copy = lower + 0.0
                lower = 2.0 * base - upper
                upper = 2.0 * base - lower_copy
            elif method == studentized:
                lower_copy = lower + 0.0
                lower = base - upper * std_err
                upper = base - lower_copy * std_err

        else:
            raise ValueError('Unknown method')

        if tail == 'lower':
            upper = np.zeros_like(base)
            upper.fill(np.inf)
        elif tail == 'upper':
            lower = np.zeros_like(base)
            lower.fill(-1 * np.inf)

        return np.vstack((lower, upper))

    def clone(self, *args, **kwargs):
        """
        Clones the bootstrap using different data.

        Parameters
        ----------
        args
            Positional arguments to bootstrap
        kwargs
            Keyword arguments to bootstrap

        Returns
        -------
        bs
            Bootstrap instance
        """
        pos_arguments = copy.deepcopy(self._parameters)
        pos_arguments.extend(args)
        bs = self.__class__(*pos_arguments, **kwargs)
        if self._seed is not None:
            bs.seed(self._seed)
        return bs

    def apply(self, func, reps=1000, extra_kwargs=None):
        """
        Applies a function to bootstrap replicated data

        Parameters
        ----------
        func : callable
            Function the computes parameter values.  See Notes for requirements
        reps : int, optional
            Number of bootstrap replications
        extra_kwargs : dict, optional
            Extra keyword arguments to use when calling func.  Must not
            conflict with keyword arguments used to initialize bootstrap

        Returns
        -------
        results : array
            reps by nparam array of computed function values where each row
            corresponds to a bootstrap iteration

        Notes
        -----
        When there are no extra keyword arguments, the function is called

        .. code:: python

            func(params, *args, **kwargs)

        where args and kwargs are the bootstrap version of the data provided
        when setting up the bootstrap.  When extra keyword arguments are used,
        these are appended to kwargs before calling func

        Examples
        --------
        >>> import numpy as np
        >>> x = np.random.randn(1000,2)
        >>> from arch.bootstrap import IIDBootstrap
        >>> bs = IIDBootstrap(x)
        >>> def func(y):
        ...     return y.mean(0)
        >>> results = bs.apply(func, 100)
        """
        kwargs = _add_extra_kwargs(self._kwargs, extra_kwargs)
        base = func(*self._args, **kwargs)
        try:
            num_params = base.shape[0]
        except:
            num_params = 1
        results = np.zeros((reps, num_params))
        count = 0
        for pos_data, kw_data in self.bootstrap(reps):
            kwargs = _add_extra_kwargs(kw_data, extra_kwargs)
            results[count] = func(*pos_data, **kwargs)
            count += 1
        return results

    def _construct_bootstrap_estimates(self,
                                       func,
                                       reps,
                                       extra_kwargs=None,
                                       std_err_func=None,
                                       studentize_reps=0,
                                       sampling='nonparametric'):
        # Private, more complicated version of apply
        self._last_func = func
        semi = parametric = False
        if sampling == 'parametric':
            parametric = True
        elif sampling == 'semiparametric':
            semi = True

        if extra_kwargs is not None:
            if any(k in self._kwargs for k in extra_kwargs):
                raise ValueError('extra_kwargs contains keys used for variable'
                                 ' names in the bootstrap')
        kwargs = _add_extra_kwargs(self._kwargs, extra_kwargs)
        base = func(*self._args, **kwargs)

        num_params = 1 if np.isscalar(base) else base.shape[0]
        results = np.zeros((reps, num_params))
        studentized_results = np.zeros((reps, num_params))

        count = 0
        for pos_data, kw_data in self.bootstrap(reps):
            kwargs = _add_extra_kwargs(kw_data, extra_kwargs)
            if parametric:
                kwargs['state'] = self.random_state
                kwargs['params'] = base
            elif semi:
                kwargs['params'] = base
            results[count] = func(*pos_data, **kwargs)
            if std_err_func is not None:
                std_err = std_err_func(results[count], *pos_data, **kwargs)
                studentized_results[count] = (results[count] - base) / std_err
            elif studentize_reps > 0:
                # Need new bootstrap of same type
                nested_bs = self.clone(*pos_data, **kw_data)
                # Set the seed to ensure reproducability
                seed = self.random_state.randint(2**31 - 1)
                nested_bs.seed(seed)
                cov = nested_bs.cov(func,
                                    studentize_reps,
                                    extra_kwargs=extra_kwargs)
                std_err = np.sqrt(np.diag(cov))
                studentized_results[count] = (results[count] - base) / std_err
            count += 1

        self._base = np.asarray(base)
        self._results = np.asarray(results)
        self._studentized_results = np.asarray(studentized_results)

    def cov(self, func, reps=1000, recenter=True, extra_kwargs=None):
        """
        Compute parameter covariance using bootstrap

        Parameters
        ----------
        func : callable
            Callable function that returns the statistic of interest as a
            1-d array
        reps : int, optional
            Number of bootstrap replications
        recenter : bool, optional
            Whether to center the bootstrap variance estimator on the average
            of the bootstrap samples (True) or to center on the original sample
            estimate (False).  Default is True.
        extra_kwargs: dict, optional
            Dictionary of extra keyword arguments to pass to func

        Returns
        -------
        cov: array
            Bootstrap covariance estimator

        Notes
        -----
        func must have the signature

        .. code:: python

            func(params, *args, **kwargs)

        where params are a 1-dimensional array, and `*args` and `**kwargs` are
        data used in the the bootstrap.  The first argument, params, will be
        none when called using the original data, and will contain the estimate
        computed using the original data in bootstrap replications.  This
        parameter is passed to allow parametric bootstrap simulation.

        Example
        -------
        Bootstrap covariance of the mean

        >>> from arch.bootstrap import IIDBootstrap
        >>> import numpy as np
        >>> def func(x):
        ...     return x.mean(axis=0)
        >>> y = np.random.randn(1000, 3)
        >>> bs = IIDBootstrap(y)
        >>> cov = bs.cov(func, 1000)

        Bootstrap covariance using a function that takes additional input

        >>> def func(x, stat='mean'):
        ...     if stat=='mean':
        ...         return x.mean(axis=0)
        ...     elif stat=='var':
        ...         return x.var(axis=0)
        >>> cov = bs.cov(func, 1000, extra_kwargs={'stat':'var'})

        .. note::

            Note this is a generic example and so the class used should be the
            name of the required bootstrap

        """
        self._construct_bootstrap_estimates(func, reps, extra_kwargs)
        base, results = self._base, self._results

        if recenter:
            errors = results - np.mean(results, 0)
        else:
            errors = results - base

        return errors.T.dot(errors) / reps

    def var(self, func, reps=1000, recenter=True, extra_kwargs=None):
        """
        Compute parameter variance using bootstrap

        Parameters
        ----------
        func : callable
            Callable function that returns the statistic of interest as a
            1-d array
        reps : int, optional
            Number of bootstrap replications
        recenter : bool, optional
            Whether to center the bootstrap variance estimator on the average
            of the bootstrap samples (True) or to center on the original sample
            estimate (False).  Default is True.
        extra_kwargs: dict, optional
            Dictionary of extra keyword arguments to pass to func

        Returns
        -------
        var : 1-d array
            Bootstrap variance estimator

        Notes
        -----
        func must have the signature

        .. code:: python

            func(params, *args, **kwargs)

        where params are a 1-dimensional array, and `*args` and `**kwargs` are
        data used in the the bootstrap.  The first argument, params, will be
        none when called using the original data, and will contain the estimate
        computed using the original data in bootstrap replications.  This
        parameter is passed to allow parametric bootstrap simulation.

        Example
        -------
        Bootstrap covariance of the mean

        >>> from arch.bootstrap import IIDBootstrap
        >>> import numpy as np
        >>> def func(x):
        ...     return x.mean(axis=0)
        >>> y = np.random.randn(1000, 3)
        >>> bs = IIDBootstrap(y)
        >>> variances = bs.var(func, 1000)

        Bootstrap covariance using a function that takes additional input

        >>> def func(x, stat='mean'):
        ...     if stat=='mean':
        ...         return x.mean(axis=0)
        ...     elif stat=='var':
        ...         return x.var(axis=0)
        >>> variances = bs.var(func, 1000, extra_kwargs={'stat': 'var'})

        .. note::

            Note this is a generic example and so the class used should be the
            name of the required bootstrap

        """
        self._construct_bootstrap_estimates(func, reps, extra_kwargs)
        base, results = self._base, self._results

        if recenter:
            errors = results - np.mean(results, 0)
        else:
            errors = results - base

        return (errors**2).sum(0) / reps

    def update_indices(self):
        """
        Update indices for the next iteration of the bootstrap.  This must
        be overridden when creating new bootstraps.
        """
        return self.random_state.randint(self._num_items, size=self._num_items)

    def _resample(self):
        """
        Resample all data using the values in _index
        """
        indices = self._index
        pos_data = []
        for values in self._args:
            if isinstance(values, (pd.Series, pd.DataFrame)):
                pos_data.append(values.iloc[indices])
            else:
                pos_data.append(values[indices])
        named_data = {}
        for key, values in iteritems(self._kwargs):
            if isinstance(values, (pd.Series, pd.DataFrame)):
                named_data[key] = values.iloc[indices]
            else:
                named_data[key] = values[indices]
            setattr(self, key, named_data[key])

        self.pos_data = pos_data
        self.kw_data = named_data
        self.data = (pos_data, named_data)
        return self.data
Example #27
0
File: rbm.py Project: nludwig/rbm
class RestrictedBoltzmannMachine:

    #
    #initialization methods
    #

    def __init__(self,
                 visibleLayer,
                 hiddenLayer,
                 temperature=1.,
                 sigma=0.01,
                 visibleProportionOn=None,
                 parameterFile=None,
                 rng=None,
                 rngState=None,
                 rngSeed=1337):
        self.visibleLayer = visibleLayer
        self.hiddenLayer = hiddenLayer
        self.temperature = temperature
        self.beta = 1. / self.temperature

        if rng is None:
            self.rng = RandomState(seed=rngSeed)
            if rngState is not None:
                self.rng.set_state(rngState)
        else:
            self.rng = rng

        if parameterFile is None:
            self.initializeVisibleBias(visibleProportionOn=visibleProportionOn)
            self.initializeHiddenBias()
            self.initializeWeights(sigma)
        else:
            self.loadParameterFile(parameterFile)
        self.visibleStep = np.zeros_like(self.visibleBias)
        self.hiddenStep = np.zeros_like(self.hiddenBias)
        self.weightStep = np.zeros_like(self.weights)

    def initializeVisibleBias(self, visibleProportionOn=None):
        if visibleProportionOn is None:
            self.visibleBias = np.zeros(self.visibleLayer.shape[-1])
        else:
            #find minimum non-zero value
            nonZeroMin = visibleProportionOn[visibleProportionOn > 0.].min()
            visibleProportionOn[np.isclose(
                visibleProportionOn, 0.)] = nonZeroMin + (0. - nonZeroMin) / 2.
            nonOneMax = visibleProportionOn[visibleProportionOn < 1.].max()
            print(f'nonZeroMin, nonOneMax: {nonZeroMin}, {nonOneMax}')
            visibleProportionOn[np.isclose(
                visibleProportionOn, 1.)] = nonOneMax + (1. - nonOneMax) / 2.
            self.visibleBias = np.log(visibleProportionOn /
                                      (1. - visibleProportionOn))
            #self.visibleBias = 1. / visibleProportionOn

    def initializeHiddenBias(self):
        self.hiddenBias = np.zeros(self.hiddenLayer.shape[-1])

    def initializeWeights(self, sigma=0.01):
        self.weights = self.rng.normal(scale=sigma,
                                       size=(self.visibleLayer.shape[-1],
                                             self.hiddenLayer.shape[-1]))

    def loadParameterFile(self, parameterFile):
        lv = self.visibleLayer.shape[-1]
        lh = self.hiddenLayer.shape[-1]
        visibleSlice = slice(0, lv)
        hiddenSlice = slice(lv, lv + lh)
        weightsSlice = slice(lv + lh, lv + lh + lv * lh)
        fileContents = [float(line.strip()) for line in parameterFile]
        self.visibleBias = np.array(fileContents[visibleSlice])
        self.hiddenBias = np.array(fileContents[hiddenSlice])
        self.weights = np.array(fileContents[weightsSlice]).reshape((lv, lh))

    def dumpParameterFile(self, parameterFile):
        #assert type(parameterFile) == file
        for theta in self.visibleBias:
            print(f'{theta}', file=parameterFile)
        for theta in self.hiddenBias:
            print(f'{theta}', file=parameterFile)
        for theta in self.weights.flatten():
            print(f'{theta}', file=parameterFile)

    #
    #prediction methods
    #

    def hiddenConditionalProbabilities(self):
        conditionalEnergies = self.hiddenBias + self.visibleLayer @ self.weights
        return logistic(self.beta * conditionalEnergies)

    def visibleConditionalProbabilities(self):
        conditionalEnergies = self.visibleBias + self.hiddenLayer @ self.weights.T
        return logistic(self.beta * conditionalEnergies)

    def rollBernoulliProbabilities(self, probabilities):
        rolls = self.rng.uniform(size=probabilities.shape)
        return (rolls < probabilities).astype(np.float_)

    def gibbsSample(self, hiddenUnitsStochastic=False):
        #compute hidden activation probabilities given visible
        hiddenLayerProbabilities = self.hiddenConditionalProbabilities()
        if hiddenUnitsStochastic:
            self.hiddenLayer = self.rollBernoulliProbabilities(
                hiddenLayerProbabilities)
        else:
            self.hiddenLayer = hiddenLayerProbabilities
        #compute visible activation probabilities given hidden
        self.visibleLayer = self.visibleConditionalProbabilities()
        return self.visibleLayer, hiddenLayerProbabilities

    #
    #training methods
    #

    def computePCDGradient(self,
                           miniBatch,
                           miniFantasyBatch,
                           nCDSteps=1,
                           l1Coefficient=None,
                           l2Coefficient=None):
        visibleDataMean, hiddenDataMean, weightDataMean = self.computePCDGradientPositiveHalf(
            miniBatch)
        visibleModelMean, hiddenModelMean, weightModelMean, newFantasy = \
            self.computePCDGradientNegativeHalf(miniFantasyBatch, nCDSteps=nCDSteps)

        #compute gradients & return
        visibleGradient = visibleDataMean - visibleModelMean
        hiddenGradient = hiddenDataMean - hiddenModelMean
        weightGradient = weightDataMean - weightModelMean
        if l1Coefficient is not None:
            weightGradient -= l1Coefficient * np.sign(self.weights)
        if l2Coefficient is not None:
            weightGradient -= l2Coefficient * self.weights
        return visibleGradient, hiddenGradient, weightGradient, newFantasy

    def computePCDGradientPositiveHalf(self, miniBatch):
        self.visibleLayer = miniBatch
        hiddenLayerProbabilities = self.hiddenConditionalProbabilities()
        return self.computeParameterMeans(miniBatch, hiddenLayerProbabilities)

    def computePCDGradientNegativeHalf(self, miniFantasyBatch, nCDSteps=1):
        self.visibleLayer = miniFantasyBatch
        for _ in range(nCDSteps):
            visibleOut, hiddenOut = self.gibbsSample()
        visibleModelMean, hiddenModelMean, weightModelMean = \
                self.computeParameterMeans(visibleOut, hiddenOut)
        #store for possible use by adversary
        self.visibleModel = visibleOut
        self.hiddenModel = hiddenOut
        self.visibleModelMean = visibleModelMean
        self.hiddenModelMean = hiddenModelMean
        self.weightModelMean = weightModelMean
        return visibleModelMean, hiddenModelMean, weightModelMean, visibleOut

    def computeParameterMeans(self, visible, hidden):
        visibleMean = visible.mean(axis=0)
        hiddenMean = hidden.mean(axis=0)
        weightMean = (visible[..., :, None] *
                      hidden[..., None, :]).mean(axis=0)
        #weightMean = visibleMean[..., :, None] * hiddenMean[..., None, :] * visible.shape[0]
        return visibleMean, hiddenMean, weightMean

    def updateParameters(self):
        self.visibleBias += self.visibleStep
        self.hiddenBias += self.hiddenStep
        self.weights += self.weightStep

    def updateParametersSGD(self,
                            miniBatch,
                            miniFantasyBatch,
                            learningRate,
                            nCDSteps=1,
                            l1Coefficient=None,
                            l2Coefficient=None,
                            verbose=False):
        visibleGradient, hiddenGradient, weightGradient, newFantasy = \
            self.computePCDGradient(miniBatch, miniFantasyBatch, nCDSteps=nCDSteps,
                                    l1Coefficient=l1Coefficient, l2Coefficient=l2Coefficient)
        #hack to stop changing the *Step pointer; req'd for
        # current implementation of histograms of *Steps
        self.visibleStep += learningRate * visibleGradient - self.visibleStep
        self.hiddenStep += learningRate * hiddenGradient - self.hiddenStep
        self.weightStep += learningRate * weightGradient - self.weightStep
        self.updateParameters()
        if verbose is True:
            print('{:.3f}\t{:.3f}\t{:.3f}'.format(self.visibleStep.mean(),
                                                  self.hiddenStep.mean(),
                                                  self.weightStep.mean()))
        return newFantasy

    def updateParametersAdam(self,
                             miniBatch,
                             miniFantasyBatch,
                             adams,
                             nCDSteps=1,
                             l1Coefficient=None,
                             l2Coefficient=None,
                             verbose=False):
        visibleGradient, hiddenGradient, weightGradient, newFantasy = \
            self.computePCDGradient(miniBatch, miniFantasyBatch, nCDSteps=nCDSteps,
                                    l1Coefficient=l1Coefficient, l2Coefficient=l2Coefficient)
        #hack to stop changing the *Step pointer; req'd for
        # current implementation of histograms of *Steps
        self.visibleStep += adams['visible'].computeAdamStep(
            visibleGradient) - self.visibleStep
        self.hiddenStep += adams['hidden'].computeAdamStep(
            hiddenGradient) - self.hiddenStep
        self.weightStep += adams['weights'].computeAdamStep(
            weightGradient) - self.weightStep
        self.updateParameters()
        if verbose is True:
            print('{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}'.format(
                visibleGradient.mean(), hiddenGradient.mean(),
                weightGradient.mean(), self.visibleStep.mean(),
                self.hiddenStep.mean(), self.weightStep.mean()))
        return newFantasy

    def updateParametersAdamAdversarial(self,
                                        miniBatch,
                                        miniFantasyBatch,
                                        adams,
                                        gamma,
                                        adversary,
                                        nCDSteps=1,
                                        l1Coefficient=None,
                                        l2Coefficient=None,
                                        verbose=False):
        visibleGradient, hiddenGradient, weightGradient, newFantasy = \
            self.computePCDGradient(miniBatch, miniFantasyBatch, nCDSteps=nCDSteps,
                                    l1Coefficient=l1Coefficient, l2Coefficient=l2Coefficient)
        visisbleGradientAd, hiddenGradientAd, weightGradientAd = self.computeAdversaryGradient(
            adversary)
        #hack to stop changing the *Step pointer; req'd for
        # current implementation of histograms of *Steps
        self.visibleStep += adams['visible'].computeAdamStep(
            visibleGradient + visibleGradientAd) - self.visibleStep
        self.hiddenStep += adams['hidden'].computeAdamStep(
            hiddenGradient + hiddenGradientAd) - self.hiddenStep
        self.weightStep += adams['weights'].computeAdamStep(
            weightGradient + weightGradientAd) - self.weightStep
        self.updateParameters()
        if verbose is True:
            print('{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}'.format(
                visibleGradient.mean(), hiddenGradient.mean(),
                weightGradient.mean(), self.visibleStep.mean(),
                self.hiddenStep.mean(), self.weightStep.mean()))
        return newFantasy

    def computeAdversaryGradient(self, adversary):
        adversaryPredictions = adversary.predict(miniFantasyBatch)
        adversaryPredictionVariation = adversaryPredictions - adversaryPredictions.mean(
        )
        visibleModelVariation = self.visibleModel - self.visibleModelMean
        hiddenModelVariation = self.hiddenModel - self.hiddenModelMean
        weightModelVariation = self.visibleModel[
            ..., :, None] * self.hiddenModel[...,
                                             None, :] - self.weightModelMean
        visibleGradient = (adversaryPredictionVariation[:, None] *
                           visibleModelVariation).mean(axis=0)
        hiddenGradient = (adversaryPredictionVariation[:, None] *
                          hiddenModelVariation).mean(axis=0)
        weightGradient = (adversaryPredictionVariation[:, None, None] *
                          weightModelVariation).mean(axis=0)
        return visibleGradient, hiddenGradient, weightGradient

    #
    #analysis methods
    #

    def computeReconstructionError(self, miniBatch, nCDSteps=1):
        self.visibleLayer = miniBatch
        for _ in range(nCDSteps):
            visibleOut, hiddenOut = self.gibbsSample()
        #visibleOut = self.rollBernoulliProbabilities(visibleOut)
        sampleError = miniBatch - visibleOut
        meanSquaredError = (sampleError * sampleError).mean()
        return meanSquaredError

    def computeFreeEnergy(self, miniBatch=None):
        if miniBatch is not None:
            self.visibleLayer = miniBatch
        internalFE = -self.visibleLayer @ self.visibleBias
        externalConditionalE = self.hiddenBias + self.visibleLayer @ self.weights
        externalFE = -np.log(1. + np.exp(externalConditionalE)).sum(axis=1)
        return internalFE + externalFE

    def computeMeanFreeEnergy(self, miniBatch=None):
        return self.computeFreeEnergy(miniBatch).mean()

    #
    #miscellaneous methods
    #

    def copy(self):
        copyRBM = RestrictedBoltzmannMachine(np.copy(self.visibleLayer),
                                             np.copy(self.hiddenLayer),
                                             temperature=self.temperature,
                                             rngState=self.rng.get_state())
        copyRBM.visibleBias = np.copy(self.visibleBias)
        copyRBM.hiddenBias = np.copy(self.hiddenBias)
        copyRBM.weights = np.copy(self.weights)
        copyRBM.visibleStep = np.copy(self.visibleStep)
        copyRBM.hiddenStep = np.copy(self.hiddenStep)
        copyRBM.weightStep = np.copy(self.weightStep)
        return copyRBM

    def storeHiddenActivationsOnMiniBatch(self, miniBatch, hiddenUnits=None):
        self.visibleLayer = miniBatch
        self.hiddenConditionalProbabilities()
        return np.copy(self.hiddenLayer) if hiddenUnits is None \
          else np.copy(self.hiddenLayer[..., hiddenUnits])

    def setRngSeed(self, rngSeed):
        self.rng.seed(rngSeed)

    def __len__(self):
        return self.visibleLayer.shape[-1], self.hiddenLayer.shape[-1]
Example #28
0
 def __init__(self, properties, originalPatternDataSet, patternDataSet, patternDataSetProperties, seedAddition):
     self.properties = properties
     
     # Validating the configuration.
     seed = properties.get('seed')
     inputsPerPattern = self.properties.get('inputsPerPattern')
     target = properties.get('target')
     minDistance = properties.get('minDistance')
     
     Utils.assertInt('Random seed', seed)
     Utils.assertInt('Inputs per pattern', inputsPerPattern, 1)
     
     if target not in ('originalPatterns', 'transformedPatterns'):
         raise Exception('Target must be "originalPatterns" or "transformedPatterns"') 
     
     if minDistance == None:
         raise Exception('Minimum distance not defined.')
     
     mean, meanIsProportion = Utils.assertProportionOrFloat('Mean minimum distance', minDistance.get('mean'), 0, 0)
     stdev, stdevIsProportion = Utils.assertProportionOrFloat('Standard deviation of minimum distance', minDistance.get('stdev'), 0, 0)
     
     if (meanIsProportion or stdevIsProportion) and 'distance' not in patternDataSetProperties:
         raise Exception('Trying to create an input data set proportional to the distance of the pattern data set, but pattern data set has not defined distance.')
     
     if meanIsProportion:
         mean *= patternDataSetProperties['distance']['mean']
     
     if stdevIsProportion:
         stdev *= patternDataSetProperties['distance']['stdev']
     
     # Initializing the random generator.
     randomGenerator = RandomState()
     randomGenerator.seed(seed + seedAddition)
     
     # Generating the inputs.
     patternSize = patternDataSetProperties.get('patternSize')
     self.originalInputs = []
     self.targetDataSet = originalPatternDataSet if target == 'originalPatterns' else patternDataSet
     
     if stdev == 0:
         allFlips = [min(patternSize, int(round(mean)))] * (len(self.targetDataSet) * inputsPerPattern)
     else:
         allFlips = map(lambda x: max(0, min(patternSize, int(round(x)))), randomGenerator.normal(mean, stdev, len(self.targetDataSet) * inputsPerPattern))
     
     for i in xrange(len(self.targetDataSet)):
         insertedInputs = set()
         j = 0
         
         while j < inputsPerPattern:
             inputVector = list(self.targetDataSet[i])
             flips = allFlips[i + j] 
             componentsToFlip = range(patternSize)
             
             for k in xrange(flips):
                 componentIndex = randomGenerator.randint(0, patternSize - k)
                 component = componentsToFlip.pop(componentIndex)
                 inputVector[component] = int(not inputVector[component])
             
             inputVector = tuple(inputVector)
             
             if inputVector not in insertedInputs:
                 self.originalInputs.append(inputVector)
                 insertedInputs.add(inputVector)
                 j += 1
                     
     # Applying transformations.
     
     if target == 'originalPatterns':
         self.inputs = Utils.transformDataSet(randomGenerator, self.originalInputs, patternDataSetProperties)
     else:
         self.inputs = self.originalInputs
    def __init__(self, properties, seedAddition):
        self.properties = properties

        # Validating the configuration.
        seed = properties.get('seed')
        dataSetSize = self.properties.get('dataSetSize')
        patternSize = properties.get('patternSize')
        extraBits = properties.get('extraBits')
        distance = properties.get('distance')
        scale = properties.get('scale')

        Utils.assertInt('Random seed', seed)
        Utils.assertInt('Pattern data set size', dataSetSize, 1)
        Utils.assertInt('Pattern size', patternSize, 1)

        if extraBits != None:
            Utils.assertInt('Number of extra bits', extraBits.get('number'), 1)

            if extraBits.get('values') not in (0, 1, 'random', 'randomFixed'):
                raise Exception(
                    'Extra bits values must be 0, 1, "random" or "randomFixed"'
                )

        if distance != None:
            Utils.assertFloat('Mean distance', distance.get('mean'), 0)
            Utils.assertFloat('Standard deviation of distance',
                              distance.get('stdev'), 0)

        if scale != None:
            if scale.get('type') == '1D':
                Utils.assertInt('Scale factor for 1D', scale.get('factor'), 1)
            elif scale.get('type') == '2D':
                Utils.assertInt('Scale pattern width',
                                scale.get('patternWidth'), 1)
                Utils.assertInt('Scale pattern height',
                                scale.get('patternHeight'), 1)
                Utils.assertInt('Scale width factor', scale.get('widthFactor'),
                                1)
                Utils.assertInt('Scale height factor',
                                scale.get('heightFactor'), 1)

                if scale.get('patternWidth') * scale.get(
                        'patternHeight') != patternSize:
                    raise Exception(
                        'Scale pattern width and pattern height do not fit with the given pattern size'
                    )
            else:
                raise Exception('Unknown scale type ' + scale.get('type'))

        # Initializing the random generator.
        randomGenerator = RandomState()
        randomGenerator.seed(seed + seedAddition)

        # Generating the patterns.
        self.originalPatterns = Utils.generateDataSet(
            randomGenerator, dataSetSize, patternSize,
            self.computeError if 'distance' in self.properties else None,
            MainPatternGenerator.MAX_TRIES)

        # Applying transformations.
        self.patterns = Utils.transformDataSet(randomGenerator,
                                               self.originalPatterns,
                                               self.properties)
Example #30
0
class PRNG(object):
    """A Pseudorandom Number Generator that yields samples
    from the set of source blocks using the RSD degree
    distribution described above.
    """

    def __init__(self, K, delta, c, np = None, enc_num=1,enc_key=[2**32,1103,12345]):
        """Provide RSD parameters on construction
        # K is the number of segments
        # delta and c are parameters that determine the distribution
        #np is to use numpy random number generator which is faster
        """

        self.K = float(K)
        self.K_int = int(K)
        self.delta = delta
        self.c = c

        S = self.calc_S()
        cdf, Z = gen_rsd_cdf(K, S, delta)
        self.cdf = cdf
        self.Z = Z

        #self.inter = inter.interp1d(np.concatenate(([0], cdf)), range(0,K+1))
        self.np_rand = RandomState(1)
        self.np = np

        self.state = 1
        
        self.enc_num=enc_num
        self.enc_key=enc_key

    def calc_S(self):
        """ A helper function to calculate S, the expected number of degree=1 nodes
        """
  
        K = self.K
        S = self.c * log(self.K/self.delta) * sqrt(self.K) 
        self.S = S
        return S


    def get_S(self):
        return self.S


    def set_seed(self, seed):
        """Reset the state of the PRNG to the 
        given seed
        """

        self.state = seed
    
    def get_state(self):
        """Returns current state of the linear PRNG
        """

        return self.state


    def get_src_blocks_wrap(self, seed=None):
        #a wrapper function to get source blocks.
        #if np flag is on, it will use a numpy-based method.
        #otherwise, it will use the native python random function.
        #np is faster but in compatible with python random which implemented in previous versions.
        if self.enc_num:
            return self.get_src_blocks_enc(seed)
        elif self.np:
            return self.get_src_blocks_np(seed)
        else:
            return self.get_src_blocks(seed)
        
    def get_src_blocks_enc(self,seed=None):
        if seed:
            self.state = seed

        blockseed = self.state
        random.seed(self.state)
        
        d = self._sample_d()

        nums = LCG(blockseed,0,self.K_int,d,self.enc_key)
        return blockseed, d, nums
        
    def get_src_blocks(self, seed=None):
        """Returns the indices of a set of `d` source blocks
        sampled from indices i = 1, ..., K-1 uniformly, where
        `d` is sampled from the RSD described above.
        """

        if seed:
            self.state = seed

        blockseed = self.state
        random.seed(self.state)
        
        d = self._sample_d()

        nums = random.sample(range(self.K_int), d)
        return blockseed, d, nums


    def get_src_blocks_np(self, seed=None):
        """Returns the indices of a set of `d` source blocks
        sampled from indices i = 1, ..., K-1 uniformly, where
        `d` is sampled from the RSD described above.
        Uses numpy for speed.
        """

        if seed:
            self.state = seed


        blockseed = self.state
        self.np_rand.seed(self.state)
        
        d = self._sample_d_np()
        nums = self.np_rand.randint(0, self.K_int, d)
        return blockseed, d, nums

    def _sample_d_np(self):
        """Samples degree given the precomputed
        distributions above. Uses numpy for speed"""

        p = self.np_rand.rand()
        for ix, v in enumerate(self.cdf):
            if v > p:
                return ix + 1
        return ix + 1        


    def _sample_d_inter(self):
        """Samples degree given the precomputed
        distributions above using interpolation
        """

        p = random.random()
        return int(self.inter(p))+1 #faster than math.ceil albeit can return the wrong value...

    # Samples from the CDF of mu
    def _sample_d(self):
        """Samples degree given the precomputed
        distributions above"""

        p = random.random()

        for ix, v in enumerate(self.cdf):
            if v > p:
                return ix + 1
        
        return ix + 1
class RestrictedBoltzmannModel(Model):
    STOP_DELTA_WEIGHTS_EPOCHS = 1000
    DELTA_WEIGHTS_NORM_N = 1000
    
    def __init__(self, properties, seedAddition):
        # Validating the configuration.
        self.seed = properties.get('seed')
        self.hiddenNeurons = properties.get('hiddenNeurons')
        self.learningRate = properties.get('learningRate')
        self.weightDecay = properties.get('weightDecay')
        self.momentum = properties.get('momentum')
        self.batchSize = properties.get('batchSize')
        
        Utils.assertInt('Random seed', self.seed)
        Utils.assertInt('Hidden neurons', self.hiddenNeurons, 0)
        Utils.assertFloat('Learning rate', self.learningRate, 0)
        Utils.assertFloat('Weight decay', self.weightDecay, 0)
        Utils.assertBoolean('Momentum', self.momentum)
        Utils.assertInt('Batch size', self.batchSize, 0)
        self.batchSize = int(self.batchSize)
        
        # Preparing the random generator.
        self.randomGenerator = RandomState()
        self.seed += seedAddition
    
    # Public methods. A model must implement these methods in order to use it in Main.py

    def train(self, patternDataSet, patternDataSetProperties):
        # Initializing the random generator.
        self.randomGenerator.seed(self.seed)
        
        visibleNeurons = len(patternDataSet[0])
        
        # Initializing weights according to Hinton, G. (2010). A practical guide to training restricted Boltzmann machines. Momentum, 9(1), 926.
        self.weights = self.randomGenerator.normal(0, 0.01, (visibleNeurons, self.hiddenNeurons))
        self.visibleOffset = numpy.zeros((1, visibleNeurons))
        self.hiddenOffset = numpy.zeros((1, self.hiddenNeurons))
        
        for i in xrange(visibleNeurons):
            p = sum(map(lambda x: x[i], patternDataSet)) / float(len(patternDataSet))
            
            if p == 0:
                self.visibleOffset[0, i] = -2.0
            elif p == 1:
                self.visibleOffset[0, i] = 2.0
            else:
                self.visibleOffset[0, i] = numpy.log(p / (1 - p))
        
        # Training.
        deltaWeights = numpy.zeros((visibleNeurons, self.hiddenNeurons))
        deltaVisibleOffset = numpy.zeros((1, visibleNeurons))
        deltaHiddenOffset = numpy.zeros((1, self.hiddenNeurons))
        
        epochs = 0
        
        lesserDeltaWeightsNormSummation = None
        deltaWeightsSummation = 0
        lastDeltaWeightsNorm = 0
        deltaWeightsStopCounter = 0
        
        bestWeights = None
        bestVisibleOffset = None
        bestHiddenOffset = None
        
        while True:
            for i in xrange(0, len(patternDataSet), self.batchSize):
                visibleBatch0 = numpy.asarray(patternDataSet[i:i + self.batchSize])
                
                # Positive phase: sample hiddenBatch0 from batch
                hiddenBatch0Probability = self.activation(visibleBatch0, self.weights, self.hiddenOffset)
                
                # Sample hiddenBatch0: we binarize the results of the activation function
                hiddenBatch0 = (hiddenBatch0Probability > self.randomGenerator.rand(self.batchSize, self.hiddenNeurons))
                
                # Negative phase: calculate visibleBatch1 from our hiddenBatch0 samples
                # Hinton, 2010 recommends not to binarize when updating visible units
                # No need to sample the last hidden states because they're not used
                visibleBatch1 = self.activation(hiddenBatch0, self.weights.T, self.visibleOffset)
                hiddenBatch1Probability = self.activation(visibleBatch1, self.weights, self.hiddenOffset)
                
                # Momentum is set as specified by Hinton's practical guide

                if self.momentum:
                    momentum = 0.5 if epochs > 5 else 0.9           # TODO: > 5 o >= 5?
                else:
                    momentum = 1
                
                # Update increments of weights and offsets
                deltaWeights = deltaWeights * momentum + (self.learningRate / self.batchSize) * (numpy.dot(visibleBatch0.T, hiddenBatch0Probability) - numpy.dot(visibleBatch1.T, hiddenBatch1Probability)) - self.weightDecay * self.weights
                deltaVisibleOffset = deltaVisibleOffset * momentum + (self.learningRate / self.batchSize) * (numpy.sum(visibleBatch0, axis=0) - numpy.sum(visibleBatch1, axis=0))
                deltaHiddenOffset = deltaHiddenOffset * momentum + (self.learningRate / self.batchSize) * (numpy.sum(hiddenBatch0Probability, axis=0) - numpy.sum(hiddenBatch1Probability, axis=0))
                
                # Update weights and offsets
                self.weights += deltaWeights
                self.visibleOffset += deltaVisibleOffset
                self.hiddenOffset += deltaHiddenOffset
            
            # Stop if looks like it is oscillating.
            deltaWeightsNorm = numpy.linalg.norm(deltaWeights)
            deltaWeightsSummation += deltaWeightsNorm
            
            if epochs > RestrictedBoltzmannModel.DELTA_WEIGHTS_NORM_N:
                deltaWeightsSummation -= lastDeltaWeightsNorm
                
                if lesserDeltaWeightsNormSummation == None or deltaWeightsSummation < lesserDeltaWeightsNormSummation:
                    lesserDeltaWeightsNormSummation = deltaWeightsSummation
                    deltaWeightsStopCounter = 0
                    bestWeights, bestVisibleOffset, bestHiddenOffset = self.copyState()
                else:
                    deltaWeightsStopCounter += 1
                    
                    if deltaWeightsStopCounter >= RestrictedBoltzmannModel.STOP_DELTA_WEIGHTS_EPOCHS:
                        break
            
            lastDeltaWeightsNorm = deltaWeightsNorm
            epochs += 1
        
        self.weights = bestWeights
        self.visibleOffset = bestVisibleOffset
        self.hiddenOffset = bestHiddenOffset
        return {'trainingEpochs': epochs + 1}
    
    def recall(self, visibleValues):
        # Initializing the random generator.
        self.randomGenerator.seed(self.seed)
        
        # Computing the output.
        hiddenProbability = self.activation(visibleValues, self.weights, self.hiddenOffset)
        hiddenValues = (hiddenProbability > self.randomGenerator.rand(1, self.hiddenNeurons))
        result = self.activation(hiddenValues, self.weights.T, self.visibleOffset)
        return tuple(map(lambda x: int(x), result[0] > 0.5)), 1
    
    # Private methods.
    
    def copyState(self):
        return numpy.copy(self.weights), numpy.copy(self.visibleOffset), numpy.copy(self.hiddenOffset)
    
    def activation(self, batch, weights, offset):
        return expit(numpy.dot(batch, weights) + offset)
from networkx import nx
from numpy.random import RandomState

from pymoreg.structure.graph_generation import random_dag

seeds = list(range(101, 200))
rng = RandomState()
variables = list(range(15))

for i, s in enumerate(seeds):
    print('Test {0}/{1}'.format(i + 1, len(seeds)))

    rng.seed(s)
    g = random_dag(variables, rng=rng)
    nx_g = nx.from_scipy_sparse_matrix(g, create_using=nx.DiGraph())

    real_edges = g.edges()
    edges = real_edges.copy()

    while len(edges):

        if set(edges) != set(nx_g.edges()):
            raise ValueError('Error in graph created with seed {2}\n Expected edges: {0}\n got: {1}'
                             .format(s, nx_g.edges(), g.edge()))
        for v in variables:
            real_parents = set(nx_g.predecessors(v))
            parents = set(g.parents(v))

            if parents != real_parents:
                raise ValueError('Error in graph created with seed {2}\n Expected parents for node {3}: {0}\n got: {1}'
                                 .format(s, real_parents, parents, v))
Example #33
0
class SchmitzWMSystem(HistBasedWMSystem):
    """
    This class implements Schmitz et al.'s watermarking method, which is
    adapted for the use with audio data.

    A watermark is embedded by forming the histogram of the samples'
    amplitudes and considering the relation of pairs
    of bins (a_i, b_i), which are pseudo-randomly selected. The embedding
    and detection key is the seed for the PRNG.
    The embedding of the i-th bit is realized by swapping the i-th bin pair,
    if the relation resembles not the
    wanted proportion.
    """

    # Class constants specifing minimal and maximal step size
    MIN_STEP = -9
    MAX_STEP = 9

    # Specifies the keys for the parameter dictionary
    PARAM_KEYS = ['step']

    def __init__(self, step=(-9, 9)):
        """Constructs and initializes a SchmitzWMSystem object.

        Keyword arguments:
        :param step: a tuple or scalar, which specifies the step size
        :return: None
        """
        self._min_step = None
        self._max_step = None
        self._prng = None
        self.set_params(step=step)

        self._is_init = True

    @property
    def min_step(self):
        return self._min_step

    @min_step.setter
    def min_step(self, value):
        if value < SchmitzWMSystem.MIN_STEP or value >= 0:
            raise ValueError('Step exceeds the suggested range')
        self._min_step = value

    @property
    def max_step(self):
        return self._max_step

    @max_step.setter
    def max_step(self, value):
        if value <= 0 or value > SchmitzWMSystem.MAX_STEP:
            raise ValueError('Step exceeds the suggested range')
        self._max_step = value

    def set_params(self, **kwargs):
        """Sets the parameters of the watermarking system. This is the
        implementation of an abstract convenience method,
        which is specified by a superclass and uses **kwargs, so that it's
        possible to set a multiple parameters
        without changing the others to default (in this case kind of redundant.

        Keyword arguments:
        :param step: a tuple or scalar, which specifies the step size
        :return: None
        """
        if 'step' in kwargs:
            step = kwargs['step']
            if not isinstance(step, tuple):
                self.min_step = abs(step) * -1
                self.max_step = abs(step)
            else:
                self.min_step, self.max_step = step

        # Init PRNG
        self._prng = RandomState()

    def get_params(self):
        """Returns all parameters as a dictionary

        :return: params: a dict containing all parameters
        """
        return dict(
            zip(SchmitzWMSystem.PARAM_KEYS, [(self.min_step, self.max_step)]))

    def embed_watermark(self, samples, w, **kwargs):
        """Embeds the specified mark in the samples.

        :param samples: the signal to be marked
        :param w: the watermark
        :param kwargs: the embedding key (in this case the seed)
        :return: None
        """

        assert self._is_init, 'WM system NOT initialized'

        if 'key' in kwargs:
            # Retrieve seed from kwargs
            seed = kwargs['key']
        else:
            raise TypeError('Required parameter \'key\'(seed) is missing')

        print('=============================================')
        print('Embedding ', w, ' via Schmitz\' method')
        print('---------------------------------------------')

        # Make a deep copy of the samples to mark
        samples_to_mark = np.empty_like(samples)
        samples_to_mark[:] = samples

        # Check, if multi channel audio has to be marked
        if samples.ndim > 1:
            length, num_channels = samples.shape
            # Check if watermark matches channel layout
            wmk = self.check_wmk_alignment(num_channels, w)
            seed = self.check_key_alignment(num_channels, seed)

            bin_pairs = np.array([])
            for i in range(0, num_channels):
                print('in channel #', i)
                print('---------------------------------------------')

                samples_to_mark[:,
                                i], bp = self._embed_watermark_single_channel(
                                    samples_to_mark[:, i], wmk[i],
                                    seed[i])  # returns copy
                if i == 0:
                    bin_pairs = bp
                else:
                    bin_pairs = np.stack((bin_pairs, bp), axis=0)

            return samples_to_mark, bin_pairs

        else:
            print('in channel #0')
            print('---------------------------------------------')
            return self._embed_watermark_single_channel(
                samples_to_mark, w, seed)

    def _embed_watermark_single_channel(self, samples_to_mark, w, seed):
        """Embeds the watermark in a mono signal.

        :param samples_to_mark: the samples to mark
        :param w: the watermark
        :param seed: the key - more precise: the seed for the PRNG
        :param mean: the original mean of the signal
        :return: marked_samples, bin_pairs: marked copy of the samples and
        the bin pairs (detection key)
        """
        hist, bins = self.generate_standard_histogram(samples_to_mark)

        # Construct a sequence of pseudo-randomly selected bin pairs
        self._prng.seed(seed)
        bin_pairs = self._generate_bin_pairs(hist, bins, len(w), seed)

        bins_to_swap = []
        for i, bit in enumerate(w):
            id1 = bin_pairs[i][0]
            id2 = bin_pairs[i][1]

            if bit == 1:
                if hist[id1] < hist[id2]:
                    # do nothing
                    continue
                else:
                    # Store the bins to swap
                    bins_to_swap.append((id1, id2))

            if bit == 0:
                if hist[id1] > hist[id2]:
                    continue
                else:
                    # Store the bins to swap
                    bins_to_swap.append((id1, id2))

        # Swap the bins
        marked_samples = self.swap_bins_at_once(samples_to_mark, bins,
                                                bins_to_swap)

        return marked_samples, bin_pairs

    @staticmethod
    def swap_bins_at_once(samples, bins, bins_to_swap):
        """Swaps the bin pairs, that are specified by bins_to_swap,
        which contains the bin indices.
        If a pair (id1, id2) has to be swapped, every sample x, which falls
        in bin[id1] is modified, so that it falls
        into bin[id2].

        :param samples: the samples to be modified
        :param bins: the bin edges
        :param bins_to_swap: a list of pairs to be swapped
        :return:samples_to_mark: the modified samples
        """
        bin_width = abs(bins[1] - bins[0])

        # make a deep copy of the samples to mark
        samples_to_mark = np.empty_like(samples)
        samples_to_mark[:] = samples

        for i, x in enumerate(samples):
            for j, ids in enumerate(bins_to_swap):
                if bins[ids[0]] <= x < bins[ids[0] + 1]:
                    samples_to_mark[i] += ((ids[1] - ids[0]) * bin_width)
                    break
                elif bins[ids[1]] <= x < bins[ids[1] + 1]:
                    samples_to_mark[i] -= ((ids[1] - ids[0]) * bin_width)
                    break

        return samples_to_mark

    def extract_watermark(self, samples, **kwargs):
        """Extracts the watermark from the given samples by means of the
        given key.

        :param samples: the marked samples
        :param kwargs: various parameters; key, syn are required, orig_mean
        had to be set
        :return: w: the extracted mark (if samples is a multi-channel
        signal, then a list of marks is returned)
        """
        assert self._is_init, 'WM system NOT initialized'
        if 'key' in kwargs:
            key = kwargs['key']
        else:
            raise TypeError('Required parameter \'key\' is missing')

        if 'length' in kwargs:
            wmk_len = kwargs['length']
        else:
            raise TypeError('Required parameter \'length\' is missing')

        print('=============================================')
        print("Detecting watermark")
        print('---------------------------------------------')

        # Multi-channel signal
        if samples.ndim > 1:
            length, num_channels = samples.shape
            # Check, that for each channel exist a seed (or use the same for
            #  all)
            key = self.check_key_alignment(num_channels, key)
            w = np.empty((num_channels, 1), dtype=np.int)

            for i in range(0, num_channels):
                print('in channel #', i)
                print('---------------------------------------------')

                # Extract watermark
                w_i = self.extract_watermark_single_channel(
                    samples[:, i], key[i], wmk_len)

                # Store extracted watermark in 2d output array
                if i == 0:
                    w = np.array(w_i)
                else:
                    w = np.vstack((w, w_i))

        # Mono signal
        else:
            w = self.extract_watermark_single_channel(samples, key, wmk_len)

        return w

    def extract_watermark_single_channel(self, samples, key, length):
        """Extracts watermark from a marked mono signal.

        :param samples: the marked single channel signal
        :param key: the extraction key
        :param length: the number of bits to extract
        :return: w: the extracted mark
        """
        hist, bins = self.generate_standard_histogram(samples)

        # Construct the same sequence of pseudo-randomly selected bin pairs
        # as on the embedder side
        self._prng.seed(key)
        bin_pairs = self._generate_bin_pairs(hist, bins, length, key)

        w = []
        for i, p in enumerate(bin_pairs):
            id1 = p[0]
            id2 = p[1]

            if hist[id1] < hist[id2]:
                w.append(1)

            elif hist[id1] > hist[id2]:
                w.append(0)

        return np.array(w)

    def _gen_a_i(self, hist, bins, used_bins):
        """Generates the initial bin a for the i_th bit to embed.

        :param hist: The histogram of the signal (necessary to check for bin
        equality or emptiness
        :param bins: A list which specifies the edges of the histogram bins
        :param used_bins: A list of already used bins, that cannot be
        considered anymore
        :return:
        """
        a_i = self._prng.randint(0, len(bins) - 1)

        while a_i in used_bins or hist[a_i] == 0:
            if a_i < len(bins) - 2:
                a_i += 1
            else:
                return self._gen_a_i(hist, bins, used_bins)

        used_bins.append(a_i)
        return a_i

    def _gen_pair(self, hist, bins, used_bins):
        """ Generates a single bin pair.

        :param hist: The histogram of the signal (necessary to check for bin
        equality or emptiness
        :param bins: A list which specifies the edges of the histogram bins
        :param used_bins: a list of already used bins, that cannot be
        considered anymore
        :return:
        """
        a_i = self._gen_a_i(hist, bins, used_bins)
        step = self._prng.randint(max(self.min_step, 0 - a_i),
                                  min(len(bins) - 1 - a_i, self.max_step) + 1)
        b_i = a_i + step

        if b_i not in used_bins and (hist[a_i] != hist[b_i]) and hist[b_i] > 0:
            used_bins.append(b_i)
            return a_i, b_i
        else:
            used_bins.remove(a_i)
            return self._gen_pair(hist, bins, used_bins)

    def _generate_bin_pairs(self, hist, bins, length, seed):
        """Constructs the bin pairs, whose relation is used to encode one
        watermark bit. This is done by using a seeded
        PRNG.

        :param hist: The histogram of the signal (necessary to check for bin
        equality or emptiness
        :param bins: A list which specifies the edges of the histogram bins
        :param length: The length of the watermark to be embedded
        :param seed: The seed for the PRNG
        :return: bin_pairs: A 2d list, which contains the drawn bin pairs
        """
        self._prng = RandomState()
        self._prng.seed(seed)

        bin_pairs = np.empty((0, 2), dtype=np.int)
        ub = []

        for i in range(0, length):
            a_i, b_i = self._gen_pair(hist, bins, ub)
            bin_pairs = np.append(bin_pairs, [[a_i, b_i]], axis=0)

        return bin_pairs
Example #34
0
class StackedSAE(object):



    def __init__(self, INPUT=None, LABELS=None, verbose=False):

        self.OUTPUT_LAYER = None
        self.StackLayerInfo = {'LAYER'  :    [],
                               'OUTPUT' :    self.OUTPUT_LAYER}
        self.SYSTEM_PARAMS = {'sections' : ['STACK', 'LAYER',
                                            'SMAX_TUNE', 'MET_TUNE',
                                            'LOG_TUNE', 'CPP_LIBRARY',
                                            'FILEIO', 'SYSTEM'] }
        self.NUM_LAYS = {'WIN' : 0, 'WOUT': 0,
                         'BIN' : 0, 'BOUT': 0}
        self.DELETED_IDX = []
        self.LAYER_TRAINING = None
        self.TRNG_DATA = None
        self.DATA = None
        self.TIMER = time.clock()
        self.TRNG_LABS = LABELS
        self.MET = None
        self.COST = None
        self.TUNING_REGIMENT = {}
        self.RANDO = RandomState()
        self.BATCHER = None
        self.CP = ConfigParser.SafeConfigParser(allow_no_value=True)
        self.CHECKS = {'config_loaded'      :     False,
                       'labels_loaded'      :     False,
                       'data_loaded'        :     False,
                       'lee_wants_rand_off' :     False,
                       'verbose'            :     verbose }
        if INPUT is not None:
            self.DATA = INPUT
            self.IN_SHAPE = INPUT.shape


    def LoadStackConfig(self, PATH=None):
        '''
        DESCRPT:  A monster, thankfully this is the only call to the config fil
                    of all the other classes
        IN ARGS:  PATH ; strings    : config file location
        NOTES:
        '''

        if PATH == None:
            PATH = 'config.ini'
        try:
            self.CP.read(PATH)
        except:
            print  "Config Name"
            raise IOError, 'Config file wasn\'t able to be read'

        GENERIC_DICT = {}
        for SECT in self.CP.sections():
            GENERIC_DICT[SECT] = {}
            for OPTS in self.CP.options(SECT):
                val = self.CP.get(SECT, OPTS)
                if val == 'True':
                    GENERIC_DICT[SECT][OPTS] = True
                elif val == 'False':
                    print 'haeeyy'
                    GENERIC_DICT[SECT][OPTS] = False
                else:
                    try:
                        GENERIC_DICT[SECT][OPTS] = int(val)
                    except:
                        try:
                            GENERIC_DICT[SECT][OPTS] = float(val)
                        except:
                            try:
                                GENERIC_DICT[SECT][OPTS] = val
                            except:
                                GENERIC_DICT[SECT][OPTS] = None

        for key in list(GENERIC_DICT):
            if key not in self.SYSTEM_PARAMS['sections']:
                self.SYSTEM_PARAMS['sections'].append(key)

        for key in self.SYSTEM_PARAMS['sections']:
            self.SYSTEM_PARAMS[key] = GENERIC_DICT[key]
        
        if self.TRNG_DATA is None:
            try:
                self.NewInput()
                self.CHECK['data_loaded'] = True
            except IOError:
                print 'WARNING: NO DATA LOADED'
                

        self.SYSTEM_PARAMS['LAYER']['disp'] = self.CHECKS['verbose']
        self.CHECKS['config_loaded'] = True
        
        self.CHECKS['lee_wants_rand_off'] = self.SYSTEM_PARAMS['STACK']['lee_wants_rand_off']
        
        if self.SYSTEM_PARAMS['STACK']['lee_wants_rand_off']:
            CONFIG_SEED = self.SYSTEM_PARAMS['STACK']['rand_seed_32bit']
            self.RANDO.seed(seed=CONFIG_SEED)
            CD = 5
            print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!'
            print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!' 
            print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!'
            print '------------------------------------------------------'
            print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! '
            print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! '
            print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!'
                
            for i in range(0, CD):
                print 'RESUMING IN ', CD + 1 - i, 'seconds!.'
                print 'The RNG STATE HAS BEEN SET!'
                print 'SEED SET TO CONFIG PARAM...'
                print '>>>>> rand_see_32bit : value ', CONFIG_SEED, '<<<<<<<<<'
                print 'PLEASE SET DETERMINISM PARAMETER TO FALSE BEFORE NON TESTING USE'
                print '------------------------------------------------------'
                
                print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!' 
                print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! '
                print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!'
                print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! '
                print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! '
                print 'WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!'
        else:
            self.RANDO.seed(seed=np.int32(time.time()))
        
        
        self.UpdateStackConfig()


        

    def LoadLabels(self, LABEL_TYPE='default', is_training=False, **kwargs):
        #
        # Method for various different ways to create labels
        #    ARG     :    Options:
        #
        # LABEL_TYPE :    {'default' | 'generic' }     : returns generic labes
        #                 {'from_list'}                : returns labels indicated bya  list
        #                 {'from_array'}               : returns array
        #                 {'from_file'}                : tries to load labels from file
        #------------------------------------------------------------------------------------
        
        if not self.CHECKS['data_loaded']:
            raise Error, 'Data aint loaded'
            
        if LABEL_TYPE == 'default' or LABEL_TYPE == 'generic':
            NUM_CLASSES = kwargs['num_classes']
            NUM_LABS = kwargs['num_labs']
            NU_LABS = np.ones((NUM_CLASSES,)).astype(np.int32)
            START = 0
            for i in range(1, NUM_CLASSES):
                STOP = START + i * NUM_CLASSES
                NU_LABS[START:STOP] = i
                START = STOP
        elif LABEL_TYPE == 'from_list':
            NU_LABS = np.asarray(kwarg['labels']).astype(np.int32)
        elif LABEL_TYPE == 'from_array':
            NU_LABS = kwargs['labels'].astype(np.int32)
        elif LABEL_TYPE == 'from_file':
            try:
                LAB_PATH = kwargs['lab_file_path']
                NU_LABS = pickle.load(LAB_PATH)
            except:
                raise Warning, 'PATH NAME NOT VALID'
                return 
                    
        if self.TRNG_LABS is not None:
            self.CHECKS['labels_loaded'] = True



    
    def ProduceFeatureVectors(self, DATA):
        #
        # DESCRPT: Returns the stacks final out put layer
        #-----------------------------------------------------------------------------------
        SCALE = self.SYSTEM_PARAMS['STACK']['output_scalar']
        self.__EncodeInput(DATA, store_output=True)
        return np.asarray(self.OUTPUT_LAYER *SCALE, dtype=np.float32, order='c')



    def PreprocData(self, DATA, LABS):
        '''
            Helper method that sets and cleans data before data training. 
            Reduces number of highly correlated vectors (they should be independent)
            If non binary data, the data is normalized. 
             

        '''

        COLIN_ATTEMPTS = self.SYSTEM_PARAMS['STACK']['stoch_remove_attempts']
        
        self.DELETED_IDX = RemoveColinearity(DATA, COLIN_ATTEMPTS)
        for i in reversed(self.DELETED_IDX):
            DATA = np.vstack((DATA[:i], DATA[i+1:]))
            LABS = np.concatenate((LABS[:i], LABS[i+1]))
    
        if np.all([[True for j in row if j==1 or j==0] for i in DATA]) :
            return DATA.astype(np.float32)
        else:
            return DATA/np.atleast_2d(np.sum(DATA,1)).T

    def StoreLayer(self, LAY):
        '''
        DESCRPT: Updates self.Stacklayerinfo with the next layer
        '''

        self.StackLayerInfo['LAYER'].append(LAY)




    def __EncodeInput(self, IN=None, store_output=False):
        '''
        DESCRPT:  Indiviual layers handle feeding forward data and they return
                    their hidden layer for the next successive layer

        IN_ARG: IN              : np.ndarray    : 0 <= IN[i,j] <=1
                store_output    : bool          : True | False
        '''

        if IN is None:
            IN = self.TRNG_DATA

        # feed forward and store each layers non-activation layers, and its derivative
        for LAY in self.StackLayerInfo['LAYER']:
            if LAY['TYPE'] == 'Logistic':
                AIN = IN.dot(LAY['WIN'])
                AIN += LAY['BIN']
                IN = Sigm(AIN)
                print 'LOG'
            elif LAY['TYPE'] == 'SoftMax':
                self.StackLayerInfo['OUTPUT'] = IN
                self.OUTPUT_LAYER = self.StackLayerInfo['OUTPUT']
                print 'SMAX'




    def __DecodeInput(self, IN):
        '''
        DESCRPT:  Indiviual layers handle reconstructing data and they return
                    their reconstructed layer for the next successive layer

        IN_ARG:  IN           : np.ndarray    : 0 <= IN[i,j] <= 1
        '''

        # get method paramters
        # same steps as in EncodeInput()z
        HIDD = IN
        for LAY in reversed(self.StackLayerInfo['LAYER']):
            HIDD = Sigm(IN.dot(LAY['WOUT']) + LAY['BOUT'])
        return HIDD



    def PreTrainLayers(self, INPUT=None):
        '''
        DESCRPT:    Supervises he layer-wise SSAE construction
        PRECOND:   None
        POSTCON:   Stack is built consiting MAX_LAYER number SAE's
        IN ARGS:   INPUT : np.npdarray : 0 <= INPUT[i,j] <= 1
        RETURNS:   None
        NOTES:
        '''

        NUM_HIDD = self.SYSTEM_PARAMS['STACK']['num_hidden']
        MIN_HIDD = self.SYSTEM_PARAMS['STACK']['min_hidden']
        MAX_LAYER = self.SYSTEM_PARAMS['STACK']['max_layer']
        DEC_HIDD_BY = self.SYSTEM_PARAMS['STACK']['decrement_num_hidden']
        BASE_NOISE = self.SYSTEM_PARAMS['STACK']['base_noise_level']

        DisplayItems('Pretraining Layers', self.SYSTEM_PARAMS['STACK'],
                      verbose=self.CHECKS['verbose'], NumHidden=NUM_HIDD,
                        Maximum_layers=MAX_LAYER, MinimumHidden=MIN_HIDD,
                        DecRange=DEC_HIDD_BY)
        
        # checks if config file is loaded
        if not self.CHECKS['config_loaded']:
            self.LoadStackConfig()

        # checks if input was passed and if so
        # the training data is updated
        if INPUT is None:
            INPUT = self.TRNG_DATA
            self.IN_SHAPE = INPUT.shape
        else:
            self.TRNG_DATA = INPUT
            self.IN_SHAPE = INPUT.shape
            
        # timer for record keeping
        startTtime = time.clock()        

        self.LAYER_TRAINING = Layer(self.SYSTEM_PARAMS['LAYER'])
        IN_SHAPE = INPUT.shape
        for L in range(0, MAX_LAYER):  
            OUT_SHAPE = (IN_SHAPE[0], NUM_HIDD)
            [WIN, WOUT, BIN, 
             BOUT, SHAPES] = self.LAYER_TRAINING.CreateLogLayer(IN_SHAPE,
                                                               NUM_HIDD, 
                                                               self.RANDO) 
            print "{0}  |   {1}    |    {2}     <<   {3}".format('COST',
                                                                 'AVGACT', 
                                                                 'KLD',
                                                                 'ERR')            
            THETA = self.LAYER_TRAINING.TrainSparseAE(WIN, WOUT, BIN,
                                                         BOUT, INPUT)
            [TWIN, TWOUT, 
             TBIN, TBOUT] = self.LAYER_TRAINING.FluffLayer(THETA.x, SHAPES)
            INPUT = Encode(INPUT, TWIN, TBIN)
            self.StackLayerInfo['LAYER'].append({'TYPE'     : "Logistic",
                                                 'WIN'      : TWIN,
                                                 'WOUT'     : TWOUT,
                                                 'BIN'      : TBIN,
                                                 'BOUT'     : TBOUT,
                                                 'SHAPES'   : SHAPES,
                                                 'IN_SHAPE' : IN_SHAPE,
                                                 'H_SHAPE'  : OUT_SHAPE})
            
            DisplayItems('Pretrain', self.SYSTEM_PARAMS['LAYER'],
                                     verbose=self.CHECKS['verbose'],
                                     Elapsed_epoch_Time=time.clock() - startTtime,
                                     Elapsed_Global_Time=time.clock() - self.TIMER)

            IN_SHAPE = OUT_SHAPE                    
            if NUM_HIDD == MIN_HIDD:
                break
            else:
                NEW_NH = NUM_HIDD - DEC_HIDD_BY
                NUM_HIDD = NEW_NH if NEW_NH >= MIN_HIDD else MIN_HIDD                 
        
        if self.SYSTEM_PARAMS['STACK']['tuning_algo'] == 'softmax_classifier':            
            IN_SHAPE = INPUT.shape      
            NUM_CLASSES = self.SYSTEM_PARAMS['STACK']['num_classes']
            MAX_TUNE_EPS = self.SYSTEM_PARAMS['LAYER']['smax_epochs']
            W_DECAY = self.SYSTEM_PARAMS['SMAX_TUNE']['smax_weight_decay']
            NOISE_SIG = self.SYSTEM_PARAMS['LAYER']['epoch_noise_sigma']
            
            DisplayItems('Pretrain Softmax', 
                         self.SYSTEM_PARAMS['LAYER'],
                         verbose=self.CHECKS['verbose'],
                         Elapsed_epoch_Time=time.clock() - startTtime,
                         Elapsed_Global_Time=time.clock() - self.TIMER)            
            
            if self.SYSTEM_PARAMS['STACK']['use_generic_labels'] or not self.CHECKS['labels_loaded']:
                self.LoadLabels('generic', 
                                num_classes=NUM_CLASSES, 
                                num_labs=INPUT.shape[0])
            LABS = self.TRNG_LABS
            SMAXW = self.LAYER_TRAINING.CreateSoftMaxLayer(IN_SHAPE, 
                                                           NUM_CLASSES,
                                                           self.RANDO)
            M, P = SMAXW.shape
            THETA = self.LAYER_TRAINING.TrainSoftMaxLayer(SMAXW, W_DECAY, 
                                                          NOISE_SIG, P, 
                                                          INPUT, LABS)                
            WIN = THETA.x.reshape(M,P)
            self.StackLayerInfo['LAYER'].append({'TYPE'  : 'SoftMax',
                                                 'WIN'   : WIN,
                                                 'WSHAPE': (M,P)})
            
                
    def TuneStack(self, DATA=None):
        '''
        DESCRPT:   Takes in put data and trains the entire stack as if it were
                        a single layer
        PRECOND:   Layers must be pretrainined
        POSTCON:   A trainined stacked auto encoder
        IN ARGS:   DATA     : np.ndarray. :  0 <= DATA[i,j] <= 1
        RETURNS:   None
        NOTES:
        '''

        # timer for record keeping
        TSstart = time.clock()

        # checks if config is loaded
        if not self.CHECKS['config_loaded']:
            self.LoadStackConfig()

        # checks that there is data to train to
        if DATA is None:
            INPUT = self.TRNG_DATA
        else:
            INPUT = DATA
        
        self.IN_SHAPE = INPUT.shape
        if self.SYSTEM_PARAMS['STACK']['tuning_algo'] == 'softmax_classifier':
            self.SMaxTuning()

        DisplayItems('Finished Tuning', self.SYSTEM_PARAMS['STACK'],
                     verbose=self.CHECKS['verbose'],
                     Elapsed_Training_Time=time.clock() - TSstart,
                     Elapsed_Global_Time=time.clock() - self.TIMER)

        


    def SMaxTuning(self):
        #
        #-----------------------------------------------
                
        INPUT = self.TRNG_DATA
        NOISE_SIG = self.SYSTEM_PARAMS['SMAX_TUNE']['smax_noise_sigma'] 
        W_DECAY = self.SYSTEM_PARAMS['SMAX_TUNE']['smax_weight_decay']
        MAX_TUNE_EPS = self.SYSTEM_PARAMS['SMAX_TUNE']['smax_tune_epochs']
        NUM_CLASSES = self.SYSTEM_PARAMS['STACK']['num_classes']
        SYSPARM = []               
        SHAPES = []
        for LAY in  self.GenLayers():
            W = LAY['WIN']
            SHAPES.append(W.shape)
            SYSPARM.append(W.flatten())
            if LAY.has_key('BIN'):
                B = LAY['BIN']
                SHAPES.append(B.shape)
                SYSPARM.append(B.flatten())
        SYSPARM = np.concatenate(SYSPARM)
        if self.SYSTEM_PARAMS['SMAX_TUNE']['use_batcher']:
            if self.BATCHER is None:
                self.BATCHER = Batcher()
            NUM_BATCHES = self.SYSTEM_PARAMS['BATCHER']['num_batches']
            BATCHES, BAT_LABS = self.BATCHER.GetBatches(INPUT, self.RANDO)
            
            DisplayItems('Finetune BATCH SMax', 
                          self.SYSTEM_PARAMS['SMAX_TUNE'],
                         num_classes=NUM_CLASSES,
                          verbose=self.CHECKS['verbose'],
                         Elapsed_Global_Time=time.clock() - self.TIMER)
            
            T = lambda x: TrainStackedSAEBatchAlgo(x, NUM_CLASSES, 
                                                   SHAPES, W_DECAY,
                                                   NOISE_SIG, NUM_BATCHES,
                                                   BATCHES, BAT_LABS) 
        else:
            if self.SYSTEM_PARAMS['STACK']['use_generic_labels'] or not self.CHECKS['labels_loaded']:
                self.LoadLabels('generic', 
                                num_classes=NUM_CLASSES, 
                                num_labs=INPUT.shape[0])
            LABS = self.TRNG_LABS
            DisplayItems('Finetune Online SMax', 
                         self.SYSTEM_PARAMS['SMAX_TUNE'],
                         num_classes=NUM_CLASSES,
                         Elapsed_Global_Time=time.clock() - self.TIMER,
                         verbose=self.CHECKS['verbose'])
            
            print "{0}  |   {1}    |    {2}     <<   {3}".format('COST','AVGACT', 'KLD','ERR')
            T = lambda x: TrainStackedSAEAlgo(x, NUM_CLASSES, 
                                              SHAPES, W_DECAY,
                                              NOISE_SIG, INPUT, LABS)
        options_ = {'maxiter'   : MAX_TUNE_EPS,
                    'gtol'    :   1e-9 ,
                    'disp'      : self.CHECKS['verbose']}
        RES = mini(T, SYSPARM, method='L-BFGS-B', jac=True, options=options_)
        FLATWnB = RES.x
        NEW_SLI = []
        for wb, lay in zip(self.GenRebuilt(FLATWnB, SHAPES),self.StackLayerInfo['LAYER']):
            LAY_DICT = {} 
            LAY_DICT.update(lay)
            WIN,BIN = wb
            if BIN != []:
                LAY_DICT.update({'WIN':WIN, 'BIN':BIN})
            else:
                LAY_DICT.update({'WIN':WIN})
            NEW_SLI.append(LAY_DICT)
        self.StackLayerInfo['LAYER'] = [nu_sli for nu_sli in NEW_SLI]
        
       
                            
    def MetricTuning(self):
        #
        #-----------------------------------------------
        
        pass



    def CostTuning(self):
        #
        #-----------------------------------------------
        
        pass



    def UpdateStack(self, DATA, sli_binary_path=None):
        #
        #-----------------------------------------------
        
        pass


    def GenLayers(self): 
        #
        #-----------------------------------------------
        
        for LAY in self.StackLayerInfo['LAYER']:
            yield LAY
    
    
    
    def GenRebuilt(self, FLATWB, SHAPES):
        W, B = self.RebuildWandB(FLATWB, SHAPES)
    
        if len(W)>len(B):
            Ws_WITH_Bs = W[:len(B)]
            Ws_WITHOUT = W[len(B):]
        else:
            Ws_WITH_Bs = W
            Ws_WITHOUT = []
            
        for i in range(0,len(W)):
            if i < len(B):
                yield Ws_WITH_Bs[i], B[i] 
            else:
                yield Ws_WITHOUT, []
    
    
    
    def RebuildWandB(self, FLAT_WandB, SHAPES):
        
        START =0
        STOP =0
        W_LAY = []
        B_LAY = []
        
        for shp in SHAPES:
            A,B = shp
            print A, B
            
            STOP += A*B
            if A==1:
                TB = FLAT_WandB[START:STOP]
                B_LAY.append(TB.reshape(A,B))
            else:
                TW = FLAT_WandB[START:STOP]
                W_LAY.append(TW.reshape(A,B))
            START = STOP
            
        return W_LAY, B_LAY



    def NewInput(self, DATA=None, LABS=None, PATH=None):
        '''
        DESCRPT:
        PRECOND:
        POSTCON:
        IN ARGS:    DATA: np.ndarray | None : New data to load
        RETURNS:
        NOTES:    If called default: the function will load a data file
                  with the name stored in the config file.
                  Else wise, DATA is stored in the class parameter and the
                  number of hidden units is adjusted
        '''

        NUM_HIDD = self.SYSTEM_PARAMS['STACK']['num_hidden']

        if DATA is None:
            if PATH is None:
                raise IOError, 'invalid path of <type None>'
            else:
                self.TRNG_DATA = LoadText(PATH)
        else:
            self.TRNG_DATA = DATA

        if NUM_HIDD >= np.size(self.TRNG_DATA, 1):
            self.SYSTEM_PARAMS['STACK']['num_hidden'] = np.size(self.TRNG_DATA, 1) - 1

        if LABS is not None:
            self.TRNG_LABS = LABS

        self.CHECKS['data_loaded'] = True




    def UpdateStackConfig(self, SECTION=None, OPTION=None):
        '''
        DESCRPT:
                PREVIOUSLY:
                    Class Parameters were loading with-in  themselves
                    and were frustratingly difficult to interact with from out side.
                    Plus they took up a lot of space
                LATER:
                    I implement parameter dictionaries for individual class that
                    would be passed back-and-forth from StackSAE to an instantiating class
                    Not bad overall just tedious
                NOW:
                    ONE dictionary contains all the parameters and behaves exacly
                    like the old way but with way less tedium

        NOTE: This function backs up the latest version of the config file before
              writing the updated one. The back up as time stamped
        '''

        import shutil
        import os

        # This keeps track of the last config file backed up and is a parameter in
        # FILEIO
        LAST_CONFIG_UPDATE_PATH = os.getcwd()
        LAST_CONFIG_UPDATE_PATH += self.SYSTEM_PARAMS['FILEIO']['config_backup_directory']
        LAST_CONFIG_UPDATE_PATH += SaveFileTimeStamp()
        self.SYSTEM_PARAMS['FILEIO']['last_config_backup'] = LAST_CONFIG_UPDATE_PATH + '_config_bu.ini'
        shutil.copy2(os.getcwd() + '/config.ini', LAST_CONFIG_UPDATE_PATH)

         # The config parser instantiated with SSAE is updated
        if SECTION is not None:
            sect = list(SECTION)
        else:
            sect = self.SYSTEM_PARAMS['sections']

        if OPTION is not None:
            opt = OPTION
            self.CP.set(sect, opt, self.SYSTEM_PARAMS[sect][opt])
        else:
            for s in sect:
                for opt in list(self.SYSTEM_PARAMS[s]):
                    self.CP.set(s, opt, str(self.SYSTEM_PARAMS[s][opt]))

        # new config file is now in the cwd
        with open('config.ini', 'w') as write_config:
            self.CP.write(write_config)


    

    def CrossValidate(self, INPUT, CVLABEL=None):
        #
        #-----------------------------------------------
        
        NUM_FOLDS = self.SYSTEM_PARAMS['STACK']['num_folds']
        NUM_CLASSES = self.SYSTEM_PARAMS['STACK']['num_classes']
        START_TIME = time.clock()

        if not self.CHECKS['config_loaded']:
            self.LoadStackConfig()

        self.NewInput(INPUT)
        if CVLABEL is None:
            if self.TRNG_LABS is None:
                self.LoadLabels()
        else:
            self.TRNG_LABS = CVLABEL

        if np.ndim(self.TRNG_LABS) <= 1:
            self.TRNG_LABS = self.TRNG_LABS
            
        ROWS = np.size(INPUT, 0)
        PART_SIZE = int(ROWS / NUM_FOLDS)
        FOLDS = {}
        LABS = {}
        print INPUT.shape
        for i in range(NUM_FOLDS):
            FOLDS[str(i)] = INPUT[PART_SIZE * i:PART_SIZE * (i + 1)]
            LABS[str(i)] = self.TRNG_LABS[PART_SIZE * i:PART_SIZE * (i + 1)]

        for  j in range(NUM_FOLDS):
            t1 = str(j)
            t2 = str(np.mod(j + 1, NUM_FOLDS))
            c1 = str(np.mod(j + 2, NUM_FOLDS))
            c2 = str(np.mod(j + 3, NUM_FOLDS))
            e1 = str(np.mod(j + 4, NUM_FOLDS))

            VECT2TRAIN1 = np.array(FOLDS[t1])
            VECT2TRAIN2 = np.array(FOLDS[t2])
            VECT2TRAIN = np.concatenate((VECT2TRAIN1, VECT2TRAIN2), 0)
            TRAINLABS1 = np.array(LABS[t1])
            TRAINLABS2 = np.array(LABS[t2])
            TRAINLABS = np.concatenate((TRAINLABS1, TRAINLABS2), 0)

            VECT2CLASS1 = np.array(FOLDS[c1])
            VECT2CLASS2 = np.array(FOLDS[c2])
            VECT2CLASS = np.concatenate((VECT2CLASS1, VECT2CLASS2), 0)
            CLASSLABS1 = np.array(LABS[c1])
            CLASSLABS2 = np.array(LABS[c2])
            CLASSLABS = np.concatenate((CLASSLABS1, CLASSLABS2), 0)

            VECT2EVAL = np.array(FOLDS[e1])
            EVALLABS = np.array(LABS[e1])

            self.NewInput(DATA=VECT2TRAIN, LABS=TRAINLABS)
            self.PreTrainLayers()
            self.TuneStack()

            TFV=self.ProduceFeatureVectors(VECT2CLASS)
            
            EFV =self.ProduceFeatureVectors(VECT2EVAL)

            SCORES = BulkClassify(i, VECT2CLASS,
                                  VECT2EVAL, TFV,
                                  EFV, EVALLABS,
                                  CLASSLABS,
                                  NUM_CLASSES)

            self.ClearStackLayerInfo()




    def ClearStackLayerInfo(self):
        #
        #-----------------------------------------------
        
        self.StackLayerInfo = {'LAYER'   :   [],
                               'OUTPUT'  : None}
        self.OUTPUT_LAYER = None

        DisplayItems('Stack Clearing Complete',
                                self.StackLayerInfo,
                                    verbose=self.CHECKS['verbose'])


    def SaveStackLayerInfo(self, save_path=None):
        #
        #-----------------------------------------------
        
        import os

        if save_path is None:
            PATH = os.getcwd()
            PATH += self.SYSTEM_PARAMS['FILEIO']['binaries_dir']
            PATH += SaveFileTimeStamp() + '_sli.pkl'
        else:
            PATH = save_path
        
        print PATH
        
        SaveBinary(self.StackLayerInfo, PATH, EXTENSION='pkl')
        self.SYSTEM_PARAMS['FILEIO']['last_sli_save'] = PATH
        self.UpdateStackConfig()





    def UpdateStored(self):
        '''
        DESCRPT:   Redundant function to function EncodeInput() will be removed
        PRECOND:   None
        POSTCON:   Layers within in StackLayerInfo will be 'fresh'
        IN ARGS:   None
        RETURNS:   None
        NOTES:    STOP USING THIS. LET MY MISTAKES DIE

        '''

        Warning, 'UpdateStored will be depreciated in future versions'
        self.__EncodeInput(store_output=True)


    def LoadStackLayerInfo(self, load_path=None):
        '''
        DESCRPT:    Loads the pickled stacklayerinfo.pkl which is the main data structure for the
                    StackedSAE
        PRECOND:    stacklayerinfo.pkl has been created and saved
        POSTCON:    self.StackLayerInfo holds the value of stacklayerinfo.pkl
        IN ARGS:    DIR           :    path to save directory      :       string
                    FNAME     :      desired file name           :    string
        RETURNS:    None
        NOTES:          None
        '''

        import os

        if not self.CHECKS['config_loaded']:
            self.LoadStackConfig()
        
        if load_path is None:
            if self.SYSTEM_PARAMS['FILEIO'].has_key('last_sli_save'):
                load_path = self.SYSTEM_PARAMS['FILEIO']['last_sli_save']
            else:
                raise IOError, 'No file path given or available'
                
        self.StackLayerInfo = LoadBinary(load_path)


    def __TidyUp(self, just_stack=False, just_layers=False):
        #
        #-----------------------------------------------
                
        if just_layers:
            LAYERS = self.StackLayerInfo['LAYER']
            for LAY in LAYERS:
                LAY.CleanLayer()


        gc.collect()
Example #35
0
class Generator():
    seed = None
    random = None
    def __init__(self, seed=1):
        super(Generator, self).__init__()
        self.random = RandomState(seed)
        self.seed = seed
        
    def reseed(self):
        self.random = RandomState(self.seed)
        
    def randSyllable(self):
        c1_dice = ( self.random.random_sample() < 0.91 ) #Chance that a regular consonant will start the syllable
        s1_dice = ( self.random.random_sample() < 0.05 ) #Chance that a special conjunction consonant is used
        v1_dice = ( self.random.random_sample() < 0.85 ) #Chance that a regular vowel will be used
        c2_add_dice = ( self.random.random_sample() < 0.28 ) #Chance that it has an ending consonant
        c2_dice = ( self.random.random_sample() < 0.91 ) #Chance that a regular consonant will end the syllable
        s2_dice = ( self.random.random_sample() < 0.03 ) #Chance that the ending has an addon consonant
        
        c1 = self.random.choice(REGULAR_CONSONANTS) if c1_dice else self.random.choice(COMPOSITE_CONSONANTS)
        s1 = self.random.choice(SPECIAL_CONSONANTS) if s1_dice else ''
        v1 = self.random.choice(REGULAR_VOWELS) if v1_dice else self.random.choice(COMPOSITE_VOWELS)
        c2 = ( self.random.choice(REGULAR_CONSONANTS) if c2_dice else self.random.choice(ENDING_CONSONANTS) ) if c2_add_dice else ''
        s2 = self.random.choice(ADDON_ENDING_CONSONANTS) if s2_dice else ''
        syllable = c1+s1+v1+c2+s2
#         print(syllable)
        return syllable
    
    def randWord(self, s=2):
        """ s = number of syllables in int """
        word = ''
        for syllable in range(0, s):
            word += self.randSyllable()
        return word
    
    def randSentence(self, meter=[2, 2, 1, 2, 3, 2, 1, 2, 2]):
        sentence = []
        for syllable in meter:
            sentence.append(self.randWord(syllable))
        return ' '.join(sentence)
    
    def randParagraph(self):
        paragraph = []
        rand_wordcount = [ self.random.randint(3, 6) for i in range(0, self.random.randint( 4, 5 )) ]
        for words in rand_wordcount:
            rand_meter = [ self.random.randint(1, 4) for i in range(0, words) ]
            sentence = self.randSentence(rand_meter)
            paragraph.append(sentence)
        return '. '.join(paragraph)
    
    def randDictionary(self, word_list=['apple', 'banana', 'cake', 'dog', 'elephant', 'fruit', 'guava', 'human', 'island', 'joke', 'king', 'love', 'mother', 'nature', 'ocean', 'pie', 'queen', 'random', 'start', 'tree', 'up', 'vine', 'wisdom', 'yellow', 'zoo' ]):
        rand_dict_e2r = { word: self.randWord() for word in word_list }
        rand_dict_r2e = { v: k for k, v in rand_dict_e2r.items() }
        ordered_e2r = OrderedDict()
        print("English to Random Language")
        for key in sorted(rand_dict_e2r.keys()):
            print(key+ ' : '+rand_dict_e2r[key])
            ordered_e2r[key] = rand_dict_e2r[key]
        ordered_r2e = OrderedDict()
        print("\n\nRandom Language to English")
        for key in sorted(rand_dict_r2e.keys()):
            print(key+ ' : '+rand_dict_r2e[key])
            ordered_r2e[key] = rand_dict_r2e[key]
        return ( ordered_e2r, ordered_r2e )
    
    def convertWord(self, word):
        word = word.lower()
        saved_state = self.random.get_state()
        
        # Word mapping method : md5
        # To make it more natural, this mapping should be updated
        # to reflect natural language patterns
        md5 = hashlib.md5(bytes(word, encoding='utf-8'))
        wordseed = ( self.seed + int.from_bytes(md5.digest(), 'little') ) % (2**31)
#         print(wordseed)
        self.random.seed( wordseed )
        randword = self.randWord( math.ceil( abs( self.random.normal(2, 1) ) ) )
        self.random.set_state(saved_state)
        return randword
     
    def convertSentence(self, sentence):
        words = sentence.split()
        converted = [self.convertWord(word) for word in words]
        return ' '.join(converted)
Example #36
0
class Configurator:

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def __init__(self, NU_TRNG_DATA, NU_TRNG_LABS, verbose = False, NU_CFG_PATH = None):
        # DATA and LAB MEMBERS
        # Should be your training data and corresponding labels
        self.TRNG_DATA = NU_TRNG_DATA
        self.TRNG_LABS = NU_TRNG_LABS
        # Just in case you want to use a special Config file
        if NU_CFG_PATH is None:
            self.CFG_PATH = DEF_CFG_PATH
        else:
            self.CFG_PATH = NU_CFG_PATH
        # Different Containers for Data
        self.CFG = {'sections' :  ['STACK', 'LAYER', 'SMAX_TUNE',
                                          'MET_TUNE', 'LOG_TUNE',
                                'CPP_LIBRARY','FILEIO', 'SYSTEM'
                                  ]}
        self.CFG_STACK = []
        # RandomState Object
        self.RANDO = RS()
        # Congfig parser
        self.CP = CFPR(allow_no_value=True)
        self.TLAY = None
        self.CHECKS = {'cfg_loaded'        :  False,
                       'lay_obj_initd'    :  False,
                       'verbose'        :  verbose}
        self.SWARM_SIZE = 5
        self.ALL_CHECKS = self.CHECKS.keys()
        self.FLOAT_STEP = .00001
        self.INT_STEP = 1
        self.GIVE_UP_SCALE = np.linspace(-100,100,5000)
        self.PATIENT_LEVEL = 2500
        self.SCORE_STACK = [999999.]
        self.START_TIME = time.time()
        self.STOP_AT_HOUR = 1.0

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def CFGControl(self):
        for TC in [self.isPatient(), not self.isStopTime()]:
            print TC
            yield TC

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def GetPatiencesLevel(self):
        return self.GIVE_UP_SCALE[self.PATIENT_LEVEL]

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def isPatient(self):
        return self.GIVE_UP_SCALE[np.minimum(self.PATIENT_LEVEL, 
               len(self.GIVE_UP_SCALE))] > -50.

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def TimeElapsed(self):
        return ((time.time() - self.START_TIME)/360./60.)

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def isStopTime(self):
        return self.STOP_AT_HOUR < self.TimeElapsed()

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def isStackEmpty(self, STACK):
        return len(STACK) == 0

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def PatienceUp(self):
        self.PATIENT_LEVEL+=1

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def PatienceDown(self):
        self.PATIENT_LEVEL-=1

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def BuzzEm(self, VAL, BUZZ):
        if BUZZ in GenInts():
            NU_INT = self.BuzzInt(VAL)
            return NU_INT if NU_INT>0 else VAL
        else:
            NU_FLOAT = self.BuzzFloat(VAL)
            return NU_FLOAT if NU_FLOAT>0 else VAL
            

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def BuzzInt(self, VAL):
        RANGE = [VAL-self.INT_STEP, VAL+self.INT_STEP]
        SEQ = np.arange(RANGE[0], RANGE[1]+1,self.INT_STEP)
        NEW_INT, THROW_AWAY = SEQ[0],SEQ[1:]
        return NEW_INT

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def BuzzFloat(self, VAL):
        RANGE = [VAL-self.FLOAT_STEP, VAL+self.FLOAT_STEP]
        SEQ = np.arange(RANGE[0], RANGE[1]+1,self.FLOAT_STEP)
        NEW_FLOAT, THROW_AWAY = SEQ[0],SEQ[1:]
        return NEW_FLOAT

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def LoadAndConfigure(self, CFG_PATH=None):
        if CFG_PATH is not None:
            self.CFG_PATH = CFG_PATH
        self.LoadConfig()
        self.Configure()

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def Configure(self, CFG_PATH = None):
        self.CFG_STACK.insert(0, self.CFG)
        LAY = Layer(self.CFG['LAYER'])
        while all([C for C in self.CFGControl()]):
            LAY.ClearLayerParams()
            INT_IDX = [[i,j] for i,j in GenInts()]
            FLOAT_IDX =[[i,j] for i,j in GenFloats()]
            ALL_BUZZERS = self.RANDO.permutation([i for i in INT_IDX]+[f for f in  FLOAT_IDX])
            SPLIT_AT = np.minimum(self.SWARM_SIZE, ALL_BUZZERS.size)
            BUZZERS = ALL_BUZZERS[:SPLIT_AT]
            REJECTS = ALL_BUZZERS[SPLIT_AT+1:]
            for [SECT, PARAM] in BUZZERS:
                OLD_VAL = self.CFG[SECT][PARAM]
                self.CFG[SECT][PARAM] = self.BuzzEm(OLD_VAL, [SECT, PARAM])
            for [SECT,PARAM] in REJECTS:
                self.CFG[SECT][PARAM] = self.CFG[SECT][PARAM] 
            print self.CFG.keys()
            LAY.SetNewParams(self.CFG)
            RESULT = self.LogTrain(LAY)            
            if RESULT['fun'][0][-1]        <= self.SCORE_STACK[0]:
                self.CFG_STACK.insert(0, self.CFG)
                self.PatienceUp()
            else:
                if self.isStackEmpty(self.CFG_STACK):
                    self.SWARM_SIZE += np.ceil(self.SWARM_SIZE/2.)
                    self.FLOAT_STEP = .00002
                    self.INT_STEP = 2
                    self.PatienceDown()
                else:
                    self.CFG_STACK.pop(0)
                    self.PatienceDown()
    
    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def GetCFG(self):
        return self.CFG
    

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def DeepCopy2CFG(self, NU_CFG):
        self.CFG = {SECT : {PARAM : NU_CFG[SECT][PARAM]
                                for PARAM in GenParams(NU_CFG)
                           } for SECT in GenSects(NU_CFG)}

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def DeepCopyCFG2NU(self):
        return    {SECT : {PARAM : self.CFG[SECT][PARAM]
                                for PARAM in GenParams(self.CFG)
                           } for SECT in GenSects(self.CFG)}

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def PushBehave(self):
        self.BHAVE_STACK.insert(0, {})
        self.BHAVE_STACK[0].update(self.CURR_BEHAVIOR)

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def PushConfig(self, LATEST_CFG):
        self.CFG_STACK.insert(0, {})
        for SECT, PARM in GenSectsAndParams(LATEST_CFG):
            self.CFG_STACK[0][SECT][PARM] = LATEST_CFG[SECT][PARM]
        self.NUM_CFG_STACKED+=1

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def PopConfig(self):
        if self.NUM_CFG_STACKED > 0:
            NU_CFG = {SECT : {PARM : self.CFG_STACK[0][SECT][PARM]
                                for PARM in GenParams(self.CFG_STACK[0][SECT])
                             }    for SECT in GenSects(self.CFG_STACK[0])}
            self.CFG_STACK.pop(0)
            self.SCORE_STACK.pop(0)
            self.NUM_CFG_STACKED -= 1
            return NU_CFG
        else:
            raise IndexError, "None more configurations to pop off stack."

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def LoadConfig(self):
        '''
        DESCRPT:  A monster, thankfully this is the only call to the config fil
                    of all the other classes
        IN ARGS:  PATH ; strings    : config file location
        NOTES:
        '''
        try:
            self.CP.read(self.CFG_PATH)
        except:
            print  "Config Name"
            raise IOError, 'Config file wasn\'t able to be read'
        GENERIC_DICT = {}
        for SECT in self.CP.sections():
            GENERIC_DICT[SECT] = {}
            for OPTS in self.CP.options(SECT):
                val = self.CP.get(SECT, OPTS)
                if val == 'True':
                    GENERIC_DICT[SECT][OPTS] = True
                elif val == 'False':
                    print 'haeeyy'
                    GENERIC_DICT[SECT][OPTS] = False
                else:
                    try:
                        GENERIC_DICT[SECT][OPTS] = int(val)
                    except:
                        try:
                            GENERIC_DICT[SECT][OPTS] = float(val)
                        except:
                            try:
                                GENERIC_DICT[SECT][OPTS] = val
                            except:
                                GENERIC_DICT[SECT][OPTS] = None
        for key in GenSects(GENERIC_DICT):
            if key not in self.CFG['sections']:
                self.CFG['sections'].append(key)
        for key in self.CFG['sections']:
            self.CFG[key] = GENERIC_DICT[key]
        self.CFG['LAYER']['disp'] = self.CHECKS['verbose']
        self.CHECKS['config_loaded'] = True
        self.CHECKS['lee_wants_rand_off'] = self.CFG['STACK']['lee_wants_rand_off']
        if self.CFG['STACK']['lee_wants_rand_off']:
            CONFIG_SEED = self.CFG['STACK']['rand_seed_32bit']
            self.RANDO.seed(seed=CONFIG_SEED)
        else:
            self.RANDO.seed(seed=np.int32(time.time()))

        self.CHECKS['cfg_loaded'] = True

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def GetChecks(self, CHECK_NAME):
        if self.CHECKS.has_key(CHECK_NAME):
            return self.CHECKS[CHECK_NAME]
        else:
            raise Warning, "Thats not a valid Check"

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def UpdateConfig(self, SECTION=None, OPTION=None):
        '''
        DESCRPT:
                PREVIOUSLY:
                    Class Parameters were loading with-in  themselves
                    and were frustratingly difficult to interact with from out side.
                    Plus they took up a lot of space
                LATER:
                    I implement parameter dictionaries for individual class that
                    would be passed back-and-forth from StackSAE to an instantiating class
                    Not bad overall just tedious
                NOW:
                    ONE dictionary contains all the parameters and behaves exacly
                    like the old way but with way less tedium

        NOTE: This function backs up the latest version of the config file before
              writing the updated one. The back up as time stamped
        '''
        import shutil
        # This keeps track of the last config file backed up and is a parameter in
        # FILEIO
        LAST_CONFIG_UPDATE_PATH = os.getcwd()
        self.CFG['FILEIO']['last_config_backup'] = LAST_CONFIG_UPDATE_PATH + '_config_bu.ini'
        shutil.copy2(os.getcwd() + '/config.ini', LAST_CONFIG_UPDATE_PATH)
         # The config parser instantiated with SSAE is updated
        if SECTION is not None:
            sect = list(SECTION)
        else:
            sect = self.CFG['sections']
        if OPTION is not None:
            opt = OPTION
            self.CP.set(sect, opt, self.CFG[sect][opt])
        else:
            for s in sect:
                for opt in list(self.CFG[s]):
                    self.CP.set(s, opt, str(self.CFG[s][opt]))
        # new config file is now in the cwd
        with open('config.ini', 'w') as write_config:
            self.CP.write(write_config)

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def ProcessResult(self, RES):
        self.IntakeNuResults()
        for ATT in RES.keys():
            if ATT in self.PREV_RESULT.keys():
                self.CURR_RESULT[ATT] = RES.get(ATT)
        self.TLAY.ClearLayerParams()

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def StopTrain(self):
        self.ChangePhase('stop')
        self.ProcessResult({'phase_name': self.CURR_PHASE})

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def LogTrain(self, LAY):
        IN_SHAPE = self.TRNG_DATA.shape
        NUM_HIDD = self.CFG['STACK']['num_hidden']
        MIN_HIDD = self.CFG['STACK']['min_hidden']
        MAX_LAYER = self.CFG['STACK']['max_layer']
        DEC_HIDD_BY = self.CFG['STACK']['decrement_num_hidden']
        BASE_NOISE = self.CFG['STACK']['base_noise_level']
        OUT_SHAPE = (IN_SHAPE[0], NUM_HIDD)
        [WIN, WOUT, BIN, BOUT, SHAPES] = LAY.CreateLogLayer(IN_SHAPE, NUM_HIDD, self.RANDO)
        THETA = LAY.TrainSparseAE(WIN, WOUT,
                                  BIN, BOUT,
                                  self.TRNG_DATA)
        return {'success': THETA.success,
                'message': THETA.message,
                'fun'    : THETA.fun,
                'nfev'   : THETA.nfev,
                'nit'    : THETA.nit }

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def FlattenData(self, DATA):
        N,M = DATA.shape
        self.TRNG_DATA = DATA.reshape(N*M)

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def SetTrainingData(self, DATA):
        if DATA.ndim > 1:
            self.FlattenData(DATA)
        else:
            self.TRNG_DATA = DATA

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def SetTrainingLabs(self, LABS):
        self.TRNG_LABS = LABS

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def ChangePhase(self, NU_PHASE):
        if NU_PHASE in self.ALLOWED_PHASE:
            self.CURR_PHASE = NU_PHASE
        else:
            raise ValueError, 'Non allowable phase passed'

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def BuildNewLay_CurrParam(self, PHASE = None):
        if PHASE is not None and self.CURR_PHASE != PHASE:
            self.ChangePhase(PHASE)
        self.TEST_LAY = Layer(self.CFG['LAYER'])

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def BuildNewLay_NuParam(self, PHASE = None, **NU_LAY_PARAM):
        if PHASE is not None and self.CURR_PHASE != PHASE:
            self.ChangePhase(PHASE)
        self.TEST_LAY = Layer(MergeNu2Old(self.CFG, NU_LAY_PARAM))

    '''@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'''
    def CFGStackIsEmpty(self):
        return len(self.CFG_STACK) == 0
Example #37
0
class RandomGenerator(object):
    """Container around a random number generator.
    Enables reproducibility of player behavior, matches,
    and tournaments."""
    def __init__(self, seed: Optional[int] = None):
        # _random is the internal object that generators random values
        self._random = RandomState()
        self.original_seed = seed
        self.seed(seed)

    def seed(self, seed_: Optional[int] = None):
        """Sets a seed"""
        self._random.seed(seed_)

    def random(self, *args, **kwargs):
        return self._random.rand(*args, **kwargs)

    def randint(self, *args, **kwargs):
        return self._random.randint(*args, **kwargs)

    def random_seed_int(self) -> int:
        return self.randint(low=0, high=2**32 - 1, dtype="uint64")

    def choice(self, *args, **kwargs):
        return self._random.choice(*args, **kwargs)

    def uniform(self, *args, **kwargs):
        return self._random.uniform(*args, **kwargs)

    def random_choice(self, p: float = 0.5) -> Action:
        """
        Return C with probability `p`, else return D

        No random sample is carried out if p is 0 or 1.

        Parameters
        ----------
        p : float
            The probability of picking C

        Returns
        -------
        axelrod.Action
        """
        if p == 0:
            return D

        if p == 1:
            return C

        r = self.random()
        if r < p:
            return C
        return D

    def random_flip(self, action: Action, threshold: float) -> Action:
        """
        Return flipped action with probability `threshold`

        No random sample is carried out if threshold is 0 or 1.

        Parameters
        ----------
        action:
            The action to flip or not
        threshold : float
            The probability of flipping action

        Returns
        -------
        axelrod.Action
        """
        if self.random_choice(threshold) == C:
            return action.flip()
        return action

    def randrange(self, a: int, b: int) -> int:
        """Returns a random integer uniformly between a and b: [a, b)."""
        c = b - a
        r = c * self.random()
        return a + int(r)

    def random_vector(self, size):
        """Create a random vector of values in [0, 1] that sums to 1."""
        vector = self.random(size)
        return np.array(vector) / np.sum(vector)
Example #38
0
class IIDBootstrap(object):
    """
    Bootstrap using uniform resampling

    Parameters
    ----------
    args
        Positional arguments to bootstrap
    kwargs
        Keyword arguments to bootstrap

    Attributes
    ----------
    index : array
        The current index of the bootstrap
    data : tuple
        Two-element tuple with the pos_data in the first position and kw_data
        in the second (pos_data, kw_data)
    pos_data : tuple
        Tuple containing the positional arguments (in the order entered)
    kw_data : dict
        Dictionary containing the keyword arguments
    random_state : RandomState
        RandomState instance used by bootstrap

    Notes
    -----
    Supports numpy arrays and pandas Series and DataFrames.  Data returned has
    the same type as the input date.

    Data entered using keyword arguments is directly accessibly as an attribute.

    Examples
    --------
    Data can be accessed in a number of ways.  Positional data is retained in
    the same order as it was entered when the bootstrap was initialized.
    Keyword data is available both as an attribute or using a dictionary syntax
    on kw_data.

    >>> from arch.bootstrap import IIDBootstrap
    >>> from numpy.random import standard_normal
    >>> y = standard_normal((500, 1))
    >>> x = standard_normal((500,2))
    >>> z = standard_normal(500)
    >>> bs = IIDBootstrap(x, y=y, z=z)
    >>> for data in bs.bootstrap(100):
    ...     bs_x = data[0][0]
    ...     bs_y = data[1]['y']
    ...     bs_z = bs.z
    """

    def __init__(self, *args, **kwargs):
        self.random_state = RandomState()
        self._initial_state = self.random_state.get_state()
        self._args = args
        self._kwargs = kwargs
        if args:
            self._num_items = len(args[0])
        elif kwargs:
            key = list(kwargs.keys())[0]
            self._num_items = len(kwargs[key])

        all_args = list(args)
        all_args.extend([v for v in itervalues(kwargs)])

        for arg in all_args:
            if len(arg) != self._num_items:
                raise ValueError("All inputs must have the same number of "
                                 "elements in axis 0")
        self._index = np.arange(self._num_items)

        self._parameters = []
        self._seed = None
        self.pos_data = args
        self.kw_data = kwargs
        self.data = (args, kwargs)

        self._base = None
        self._results = None
        self._studentized_results = None
        self._last_func = None
        self._name = 'IID Bootstrap'
        for key, value in iteritems(kwargs):
            attr = getattr(self, key, None)
            if attr is None:
                self.__setattr__(key, value)
            else:
                raise ValueError(key + ' is a reserved name')

    def __str__(self):
        repr = self._name
        repr += '(no. pos. inputs: ' + str(len(self.pos_data))
        repr += ', no. keyword inputs: ' + str(len(self.kw_data)) + ')'
        return repr

    def __repr__(self):
        return self.__str__()[:-1] + ', ID: ' + hex(id(self)) + ')'

    def _repr_html(self):
        html = '<strong>' + self._name + '</strong>('
        html += '<strong>no. pos. inputs</strong>: ' + str(len(self.pos_data))
        html += ', <strong>no. keyword inputs</strong>: ' + str(len(self.kw_data))
        html += ', <strong>ID</strong>: ' + hex(id(self)) + ')'
        return html

    @property
    def index(self):
        """
        Returns the current index of the bootstrap
        """
        return self._index

    def get_state(self):
        """
        Gets the state of the bootstrap's random number generator

        Returns
        -------
        state : RandomState state vector
            Array containing the state
        """
        return self.random_state.get_state()

    def set_state(self, state):
        """
        Sets the state of the bootstrap's random number generator

        Parameters
        ----------
        state : RandomState state vector
            Array containing the state
        """

        return self.random_state.set_state(state)

    def seed(self, value):
        """
        Seeds the bootstrap's random number generator

        Parameters
        ----------
        value : int
            Integer to use as the seed
        """
        self._seed = value
        self.random_state.seed(value)
        return None

    def reset(self, use_seed=True):
        """
        Resets the bootstrap to either its initial state or the last seed.

        Parameters
        ----------
        use_seed : bool, optional
            Flag indicating whether to use the last seed if provided.  If
            False or if no seed has been set, the bootstrap will be reset
            to the initial state.  Default is True
        """
        self._index = np.arange(self._num_items)
        self._resample()
        self.random_state.set_state(self._initial_state)
        if use_seed and self._seed is not None:
            self.seed(self._seed)
        return None

    def bootstrap(self, reps):
        """
        Iterator for use when bootstrapping

        Parameters
        ----------
        reps : int
            Number of bootstrap replications

        Example
        -------
        The key steps are problem dependent and so this example shows the use
        as an iterator that does not produce any output

        >>> from arch.bootstrap import IIDBootstrap
        >>> import numpy as np
        >>> bs = IIDBootstrap(np.arange(100), x=np.random.randn(100))
        >>> for posdata, kwdata in bs.bootstrap(1000):
        ...     # Do something with the positional data and/or keyword data
        ...     pass

        .. note::

            Note this is a generic example and so the class used should be the
            name of the required bootstrap

        Notes
        -----
        The iterator returns a tuple containing the data entered in positional
        arguments as a tuple and the data entered using keywords as a
        dictionary
        """
        for _ in range(reps):
            indices = np.asarray(self.update_indices())
            self._index = indices
            yield self._resample()

    def conf_int(self, func, reps=1000, method='basic', size=0.95, tail='two',
                 extra_kwargs=None, reuse=False, sampling='nonparametric',
                 std_err_func=None, studentize_reps=1000):
        """
        Parameters
        ----------
        func : callable
            Function the computes parameter values.  See Notes for requirements
        reps : int, optional
            Number of bootstrap replications
        method : string, optional
            One of 'basic', 'percentile', 'studentized', 'norm' (identical to
            'var', 'cov'), 'bc' (identical to 'debiased', 'bias-corrected'), or
            'bca'
        size : float, optional
            Coverage of confidence interval
        tail : string, optional
            One of 'two', 'upper' or 'lower'.
        reuse : bool, optional
            Flag indicating whether to reuse previously computed bootstrap
            results.  This allows alternative methods to be compared without
            rerunning the bootstrap simulation.  Reuse is ignored if reps is
            not the same across multiple runs, func changes across calls, or
            method is 'studentized'.
        sampling : string, optional
            Type of sampling to use: 'nonparametric', 'semi-parametric' (or
            'semi') or 'parametric'.  The default is 'nonparametric'.  See
            notes about the changes to func required when using 'semi' or
            'parametric'.
        extra_kwargs : dict, optional
            Extra keyword arguments to use when calling func and std_err_func,
            when appropriate
        std_err_func : callable, optional
            Function to use when standardizing estimated parameters when using
            the studentized bootstrap.  Providing an analytical function
            eliminates the need for a nested bootstrap
        studentize_reps : int, optional
            Number of bootstraps to use in the innter component when using the
            studentized bootstrap.  Ignored when ``std_err_func`` is provided

        Returns
        -------
        intervals : 2-d array
            Computed confidence interval.  Row 0 contains the lower bounds, and
            row 1 contains the upper bounds.  Each column corresponds to a
            parameter. When tail is 'lower', all upper bounds are inf.
            Similarly, 'upper' sets all lower bounds to -inf.

        Examples
        --------
        >>> import numpy as np
        >>> def func(x):
        ...     return x.mean(0)
        >>> y = np.random.randn(1000, 2)
        >>> from arch.bootstrap import IIDBootstrap
        >>> bs = IIDBootstrap(y)
        >>> ci = bs.conf_int(func, 1000)

        Notes
        -----
        When there are no extra keyword arguments, the function is called

        .. code:: python

            func(*args, **kwargs)

        where args and kwargs are the bootstrap version of the data provided
        when setting up the bootstrap.  When extra keyword arguments are used,
        these are appended to kwargs before calling func.

        The standard error function, if provided, must return a vector of
        parameter standard errors and is called

        .. code:: python

            std_err_func(params, *args, **kwargs)

        where ``params`` is the vector of estimated parameters using the same
        bootstrap data as in args and kwargs.

        The bootstraps are:

        * 'basic' - Basic confidence using the estimated parameter and
          difference between the estimated parameter and the bootstrap
          parameters
        * 'percentile' - Direct use of bootstrap percentiles
        * 'norm' - Makes use of normal approximation and bootstrap covariance
          estimator
        * 'studentized' - Uses either a standard error function or a nested
          bootstrap to estimate percentiles and the bootstrap covariance for
          scale
        * 'bc' - Bias corrected using estimate bootstrap bias correction
        * 'bca' - Bias corrected and accelerated, adding acceleration parameter
          to 'bc' method

        """
        studentized = 'studentized'
        if not 0.0 < size < 1.0:
            raise ValueError('size must be strictly between 0 and 1')
        tail = tail.lower()
        if tail not in ('two', 'lower', 'upper'):
            raise ValueError('tail must be one of two-sided, lower or upper')
        studentize_reps = studentize_reps if method == studentized else 0

        _reuse = False
        if reuse:
            # check conditions for reuse
            _reuse = (self._results is not None and len(self._results) == reps
                      and method != studentized and self._last_func is func)

        if not _reuse:
            if reuse:
                import warnings

                warn = 'The conditions to reuse the previous bootstrap has ' \
                       'not been satisfied. A new bootstrap will be constructed'
                warnings.warn(warn, RuntimeWarning)
            self._construct_bootstrap_estimates(func, reps, extra_kwargs,
                                                std_err_func=std_err_func,
                                                studentize_reps=studentize_reps,
                                                sampling=sampling)

        base, results = self._base, self._results
        studentized_results = self._studentized_results

        std_err = []
        if method in ('norm', 'var', 'cov', studentized):
            errors = results - results.mean(axis=0)
            std_err = np.sqrt(np.diag(errors.T.dot(errors) / reps))

        if tail == 'two':
            alpha = (1.0 - size) / 2
        else:
            alpha = (1.0 - size)

        percentiles = [alpha, 1.0 - alpha]
        norm_quantiles = stats.norm.ppf(percentiles)

        if method in ('norm', 'var', 'cov'):
            lower = base + norm_quantiles[0] * std_err
            upper = base + norm_quantiles[1] * std_err

        elif method in ('percentile', 'basic', studentized,
                        'debiased', 'bc', 'bias-corrected', 'bca'):
            values = results
            if method == studentized:
                # studentized uses studentized parameter estimates
                values = studentized_results

            if method in ('debiased', 'bc', 'bias-corrected', 'bca'):
                # bias corrected uses modified percentiles, but is
                # otherwise identical to the percentile method
                p = (results < base).mean(axis=0)
                b = stats.norm.ppf(p)
                b = b[:, None]
                if method == 'bca':
                    nobs = self._num_items
                    jk_params = _loo_jackknife(func, nobs, self._args,
                                               self._kwargs)
                    u = (nobs - 1) * (jk_params - base)
                    numer = np.sum(u ** 3, 0)
                    denom = 6 * (np.sum(u ** 2, 0) ** (3.0 / 2.0))
                    small = denom < (np.abs(numer) * np.finfo(np.float64).eps)
                    if small.any():
                        message = 'Jackknife variance estimate {jk_var} is ' \
                                  'too small to use BCa'
                        raise RuntimeError(message.format(jk_var=denom))
                    a = numer / denom
                    a = a[:, None]
                else:
                    a = 0.0

                percentiles = stats.norm.cdf(b + (b + norm_quantiles) /
                                             (1.0 - a * (b + norm_quantiles)))
                percentiles = list(100 * percentiles)
            else:
                percentiles = [100 * p for p in percentiles]  # Rescale

            if method not in ('bc', 'debiased', 'bias-corrected', 'bca'):
                ci = np.asarray(np.percentile(values, percentiles, axis=0))
                lower = ci[0, :]
                upper = ci[1, :]
            else:
                k = values.shape[1]
                lower = np.zeros(k)
                upper = np.zeros(k)
                for i in range(k):
                    lower[i], upper[i] = np.percentile(values[:, i],
                                                       list(percentiles[i]))

            # Basic and studentized use the lower empirical quantile to
            # compute upper and vice versa.  Bias corrected and percentile use
            # upper to estimate the upper, and lower to estimate the lower
            if method == 'basic':
                lower_copy = lower + 0.0
                lower = 2.0 * base - upper
                upper = 2.0 * base - lower_copy
            elif method == studentized:
                lower_copy = lower + 0.0
                lower = base - upper * std_err
                upper = base - lower_copy * std_err

        else:
            raise ValueError('Unknown method')

        if tail == 'lower':
            upper = np.zeros_like(base)
            upper.fill(np.inf)
        elif tail == 'upper':
            lower = np.zeros_like(base)
            lower.fill(-1 * np.inf)

        return np.vstack((lower, upper))

    def clone(self, *args, **kwargs):
        """
        Clones the bootstrap using different data.

        Parameters
        ----------
        args
            Positional arguments to bootstrap
        kwargs
            Keyword arguments to bootstrap

        Returns
        -------
        bs
            Bootstrap instance
        """
        pos_arguments = copy.deepcopy(self._parameters)
        pos_arguments.extend(args)
        bs = self.__class__(*pos_arguments, **kwargs)
        if self._seed is not None:
            bs.seed(self._seed)
        return bs

    def apply(self, func, reps=1000, extra_kwargs=None):
        """
        Applies a function to bootstrap replicated data

        Parameters
        ----------
        func : callable
            Function the computes parameter values.  See Notes for requirements
        reps : int, optional
            Number of bootstrap replications
        extra_kwargs : dict, optional
            Extra keyword arguments to use when calling func.  Must not conflict
            with keyword arguments used to initialize bootstrap

        Returns
        -------
        results : array
            reps by nparam array of computed function values where each row
            corresponds to a bootstrap iteration

        Notes
        -----
        When there are no extra keyword arguments, the function is called

        .. code:: python

            func(params, *args, **kwargs)

        where args and kwargs are the bootstrap version of the data provided
        when setting up the bootstrap.  When extra keyword arguments are used,
        these are appended to kwargs before calling func

        Examples
        --------
        >>> import numpy as np
        >>> x = np.random.randn(1000,2)
        >>> from arch.bootstrap import IIDBootstrap
        >>> bs = IIDBootstrap(x)
        >>> def func(y):
        ...     return y.mean(0)
        >>> results = bs.apply(func, 100)
        """
        kwargs = _add_extra_kwargs(self._kwargs, extra_kwargs)
        base = func(*self._args, **kwargs)
        try:
            num_params = base.shape[0]
        except:
            num_params = 1
        results = np.zeros((reps, num_params))
        count = 0
        for pos_data, kw_data in self.bootstrap(reps):
            kwargs = _add_extra_kwargs(kw_data, extra_kwargs)
            results[count] = func(*pos_data, **kwargs)
            count += 1
        return results

    def _construct_bootstrap_estimates(self, func, reps, extra_kwargs=None,
                                       std_err_func=None, studentize_reps=0,
                                       sampling='nonparametric'):
        # Private, more complicated version of apply
        self._last_func = func
        semi = parametric = False
        if sampling == 'parametric':
            parametric = True
        elif sampling == 'semiparametric':
            semi = True

        if extra_kwargs is not None:
            if any(k in self._kwargs for k in extra_kwargs):
                raise ValueError('extra_kwargs contains keys used for variable'
                                 ' names in the bootstrap')
        kwargs = _add_extra_kwargs(self._kwargs, extra_kwargs)
        base = func(*self._args, **kwargs)

        num_params = 1 if np.isscalar(base) else base.shape[0]
        results = np.zeros((reps, num_params))
        studentized_results = np.zeros((reps, num_params))

        count = 0
        for pos_data, kw_data in self.bootstrap(reps):
            kwargs = _add_extra_kwargs(kw_data, extra_kwargs)
            if parametric:
                kwargs['state'] = self.random_state
                kwargs['params'] = base
            elif semi:
                kwargs['params'] = base
            results[count] = func(*pos_data, **kwargs)
            if std_err_func is not None:
                std_err = std_err_func(results[count], *pos_data, **kwargs)
                studentized_results[count] = (results[count] - base) / std_err
            elif studentize_reps > 0:
                # Need new bootstrap of same type
                nested_bs = self.clone(*pos_data, **kw_data)
                # Set the seed to ensure reproducability
                seed = self.random_state.randint(2 ** 31 - 1)
                nested_bs.seed(seed)
                cov = nested_bs.cov(func, studentize_reps,
                                    extra_kwargs=extra_kwargs)
                std_err = np.sqrt(np.diag(cov))
                studentized_results[count] = (results[count] - base) / std_err
            count += 1

        self._base = np.asarray(base)
        self._results = np.asarray(results)
        self._studentized_results = np.asarray(studentized_results)

    def cov(self, func, reps=1000, recenter=True, extra_kwargs=None):
        """
        Compute parameter covariance using bootstrap

        Parameters
        ----------
        func : callable
            Callable function that returns the statistic of interest as a
            1-d array
        reps : int, optional
            Number of bootstrap replications
        recenter : bool, optional
            Whether to center the bootstrap variance estimator on the average
            of the bootstrap samples (True) or to center on the original sample
            estimate (False).  Default is True.
        extra_kwargs: dict, optional
            Dictionary of extra keyword arguments to pass to func

        Returns
        -------
        cov: array
            Bootstrap covariance estimator

        Notes
        -----
        func must have the signature

        .. code:: python

            func(params, *args, **kwargs)

        where params are a 1-dimensional array, and `*args` and `**kwargs` are
        data used in the the bootstrap.  The first argument, params, will be
        none when called using the original data, and will contain the estimate
        computed using the original data in bootstrap replications.  This
        parameter is passed to allow parametric bootstrap simulation.

        Example
        -------
        Bootstrap covariance of the mean

        >>> from arch.bootstrap import IIDBootstrap
        >>> import numpy as np
        >>> def func(x):
        ...     return x.mean(axis=0)
        >>> y = np.random.randn(1000, 3)
        >>> bs = IIDBootstrap(y)
        >>> cov = bs.cov(func, 1000)

        Bootstrap covariance using a function that takes additional input

        >>> def func(x, stat='mean'):
        ...     if stat=='mean':
        ...         return x.mean(axis=0)
        ...     elif stat=='var':
        ...         return x.var(axis=0)
        >>> cov = bs.cov(func, 1000, extra_kwargs={'stat':'var'})

        .. note::

            Note this is a generic example and so the class used should be the
            name of the required bootstrap

        """
        self._construct_bootstrap_estimates(func, reps, extra_kwargs)
        base, results = self._base, self._results

        if recenter:
            errors = results - np.mean(results, 0)
        else:
            errors = results - base

        return errors.T.dot(errors) / reps

    def var(self, func, reps=1000, recenter=True, extra_kwargs=None):
        """
        Compute parameter variance using bootstrap

        Parameters
        ----------
        func : callable
            Callable function that returns the statistic of interest as a
            1-d array
        reps : int, optional
            Number of bootstrap replications
        recenter : bool, optional
            Whether to center the bootstrap variance estimator on the average
            of the bootstrap samples (True) or to center on the original sample
            estimate (False).  Default is True.
        extra_kwargs: dict, optional
            Dictionary of extra keyword arguments to pass to func

        Returns
        -------
        var : 1-d array
            Bootstrap variance estimator

        Notes
        -----
        func must have the signature

        .. code:: python

            func(params, *args, **kwargs)

        where params are a 1-dimensional array, and `*args` and `**kwargs` are
        data used in the the bootstrap.  The first argument, params, will be
        none when called using the original data, and will contain the estimate
        computed using the original data in bootstrap replications.  This
        parameter is passed to allow parametric bootstrap simulation.

        Example
        -------
        Bootstrap covariance of the mean

        >>> from arch.bootstrap import IIDBootstrap
        >>> import numpy as np
        >>> def func(x):
        ...     return x.mean(axis=0)
        >>> y = np.random.randn(1000, 3)
        >>> bs = IIDBootstrap(y)
        >>> variances = bs.var(func, 1000)

        Bootstrap covariance using a function that takes additional input

        >>> def func(x, stat='mean'):
        ...     if stat=='mean':
        ...         return x.mean(axis=0)
        ...     elif stat=='var':
        ...         return x.var(axis=0)
        >>> variances = bs.var(func, 1000, extra_kwargs={'stat': 'var'})

        .. note::

            Note this is a generic example and so the class used should be the
            name of the required bootstrap

        """
        self._construct_bootstrap_estimates(func, reps, extra_kwargs)
        base, results = self._base, self._results

        if recenter:
            errors = results - np.mean(results, 0)
        else:
            errors = results - base

        return (errors ** 2).sum(0) / reps

    def update_indices(self):
        """
        Update indices for the next iteration of the bootstrap.  This must
        be overridden when creating new bootstraps.
        """
        return self.random_state.randint(self._num_items,
                                         size=self._num_items)

    def _resample(self):
        """
        Resample all data using the values in _index
        """
        indices = self._index
        pos_data = []
        for values in self._args:
            if isinstance(values, (pd.Series, pd.DataFrame)):
                pos_data.append(values.iloc[indices])
            else:
                pos_data.append(values[indices])
        named_data = {}
        for key, values in iteritems(self._kwargs):
            if isinstance(values, (pd.Series, pd.DataFrame)):
                named_data[key] = values.iloc[indices]
            else:
                named_data[key] = values[indices]
            setattr(self, key, named_data[key])

        self.pos_data = pos_data
        self.kw_data = named_data
        self.data = (pos_data, named_data)
        return self.data
Example #39
0
class Smearer(Analyzer):
    '''Applies a numerical smearing to objects

  Example:  

  from heppy.analyzers.triggerrates.Smearer import Smearer  
  jetToElectronTrasformer = cfg.Analyzer(
    Smearer ,
    input_collection = 'jets',
    output_collection = 'l1tEGamma',
    distribution_file = "convFile.root",
    smearing_distribution_prefix = "l1tObjectPtDistributionBinnedInGenJet",
    bins = [0, 10, 20, 30, 40, 50, 60],
    object_x_range = (0, 200),
    probability_file = "probFile.root",
    probability_histogram = "efficiencyPlot"
  )
  
  * input_collection : input collection containing the jets
  * output_collection : output collection which the converted jet will be stored in, i.e. the type of object the jet will be converted to
  * convolution_file : file containing the jet-to-object convolution curves
  * convolution_histogram_prefix : prefix in the convolution file, it will be followed by _10_20, if 10 is the low bin and 20 is the high bin
  * bins : bins of the convolution file
  * object_x_range : range in which the momentum of the genrated object will be located, helps in generating new object faster if the TH1F is very big
  * probability_file : file containing the binned fraction of object to smear (a sort of trasformation probability). If omitted assumed to be 1.
  * probability_histogram : name of the histogram containing the probabilities

  NOTE: A property 'match' is added to the input object to connect it to the smeared version
  '''
    def beginLoop(self, setup):
        super(Smearer, self).beginLoop(setup)
        self.rng = RandomState()
        self.rng.seed()
        self.convolutionHistograms = []
        self.convolutionFile = TFile(self.cfg_ana.convolution_file)
        for x in xrange(0, len(self.cfg_ana.bins) - 1):
            self.convolutionHistograms.append(
                self.convolutionFile.Get(
                    self.cfg_ana.convolution_histogram_prefix + "_" +
                    str(self.cfg_ana.bins[x]) + "_" +
                    str(self.cfg_ana.bins[x + 1])))

        self.probabilityFileName = getattr(self.cfg_ana, "probability_file",
                                           "")
        if self.probabilityFileName != "":
            self.probabilityFile = TFile(self.probabilityFileName)
            self.probabilityHistogram = self.probabilityFile.Get(
                self.cfg_ana.probability_histogram)
        else:
            self.probabilityFile = None
            self.probabilityHistogram = None

    # End beginLoop

    def process(self, event):

        jets = getattr(event, self.cfg_ana.input_collection)
        output_collection = []

        # jetIdx = 0

        for jet in jets:
            jetPt = jet.pt()
            jetEta = jet.eta()
            jetPhi = jet.phi()
            jetE = jet.e()
            factorIndex = bisect_right(self.cfg_ana.bins, jetPt) - 1
            if factorIndex >= len(self.cfg_ana.bins) - 1:
                jet.match = None
                #factorIndex = len(self.cfg_ana.bins) - 2
                continue

            rndNumber = self.rng.uniform(0, 1)

            if self.probabilityHistogram is None:
                isMisidentified = True
            else:
                isMisidentified = rndNumber < self.probabilityHistogram.GetBinContent(
                    factorIndex + 1)

            if not isMisidentified:
                jet.match = None
                continue

            #Creating a new object with the same properties
            trgObject = deepcopy(jet)
            # Getting the quantity to add in order to smear
            # I will use the root method which uses the cumulative probability function
            # Reference https://root.cern.ch/doc/master/TH1_8cxx_source.html#l04710
            convolutionHistogram = self.convolutionHistograms[factorIndex]
            rndX = convolutionHistogram.GetRandom()
            trgObject._tlv.SetPtEtaPhiE(jetPt + rndX, jetEta, jetPhi, jetE)
            jet.match = trgObject
            jet.matches = [trgObject]
            jet.dr = 0
            trgObject.matches = [jet]
            trgObject.match = jet
            output_collection.append(trgObject)

        setattr(event, self.cfg_ana.output_collection, output_collection)

    # End process