예제 #1
0
    def event_occurrence_probability(events: List[str],
                                     power: TYPE_FLOAT = TYPE_FLOAT(1.0)):
        """Calculate the probabilities of event occurrences
        Args:
            events: list of standardized events
            power:
                parameter to adjust the probability by p = p**power/sum(p**power)
                This is to balance the contributions of less frequent and more frequent events.
                Default 1.0. In word2vec negative sampling, power=0.75 is used.
        """
        assert (len(events) > 0)

        total: int = len(events)
        counts = collections.Counter(events)
        powered = {
            event: np.power((count / total), power)
            for (event, count) in counts.items()
        }

        integral = np.sum(list(powered.values()))
        probabilities = {
            event: TYPE_FLOAT(p / integral)
            for (event, p) in powered.items()
        }
        assert \
            (TYPE_FLOAT(1.0) - np.sum(list(probabilities.values()), dtype=TYPE_FLOAT)) < \
            TYPE_FLOAT(1e-5)

        del total, counts, powered, integral
        return probabilities
예제 #2
0
def expected_gradient_from_log_loss(P: np.ndarray, T: np.ndarray,
                                    N: int) -> np.ndarray:
    """Calculate expected back-propagation from a log loss layer.
    L:() = log_loss(P)
    P:(N,M): = activation(Y) where activation is softmax or sigmoid.

    Args:
        P: Probabilities in the log loss layer
        T: Labels in the index format, NOT in OHE format
        N: Batch size
    Returns:

    """
    assert T.shape == (N, )
    # --------------------------------------------------------------------------------
    # EDY: Expected back-propagation E[dL/dY] from the log-loss layer
    # EDY.shape(N,M) = (P-T)/N
    # EDY should match the actual back-propagation dL/dY from log-loss layer.
    # --------------------------------------------------------------------------------
    # (P-T)/N, NOT P/N - T
    EDY = np.copy(P)
    EDY[np.arange(N), T] -= TYPE_FLOAT(1)
    EDY /= TYPE_FLOAT(N)

    return EDY
예제 #3
0
    def update(self, W, dW, out=None) -> np.ndarray:
        """Default method to update the weight matrix W
        Args:
            W: weight matrix to update
            dW: gradient of dL/dW, the impact on L by dW
            out: location into which the result is stored
        Return:
            W: A reference to out if specified or a np array allocated.
        """
        # --------------------------------------------------------------------------------
        # Gradient can be zero. e.g for a Batch Normalization layer, when a feature xi
        # in a batch has the same value, such as a specific pixel in images is all black
        # then the standardized value xi_std = 0 -> dL/dGamma = sum(dL/dY * xi_std) = 0.
        # --------------------------------------------------------------------------------
        if np.all(np.abs(dW) < np.abs(W / TYPE_FLOAT(100.0))):
            self.logger.warning(
                "SGD[%s].update(): Gradient descent potentially stalling with dW < W/100.",
                self.name
            )

        # --------------------------------------------------------------------------------
        # Why excluding the bias weight from the regularization?
        # TODO: Remove w0 from the regularization (not include bias weight)
        # --------------------------------------------------------------------------------
        # Overfitting is when the model is sensitive to changes in the input.
        # Bias is fixed (x0=1), hence no change, hence no point to include it
        # --------------------------------------------------------------------------------
        l2 = self.l2
        lr = self.lr
        scale = TYPE_FLOAT(1.0) + l2
        # return np.subtract(W, dW * scale, out=out)
        return ne.evaluate("W - lr * dW * scale", out=out)
def test_020_fss_builder_to_succeed():
    """
    Objective:
        Verify the Matmul.build()
    Expected:
        build() parse the spec and succeed
    """
    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        # ----------------------------------------------------------------------
        # Validate the correct specification.
        # NOTE: Invalidate one parameter at a time from the correct one.
        # Otherwise not sure what you are testing.
        # ----------------------------------------------------------------------
        valid_fss_parameters = FeatureScaleShift.specification_template(
        )[_PARAMETERS]
        lr = TYPE_FLOAT(valid_fss_parameters[_OPTIMIZER][_PARAMETERS]["lr"])
        l2 = TYPE_FLOAT(valid_fss_parameters[_OPTIMIZER][_PARAMETERS]["l2"])
        log_level = valid_fss_parameters[_LOG_LEVEL]
        try:
            fss: FeatureScaleShift = FeatureScaleShift.build(
                parameters=valid_fss_parameters)
            assert fss.gamma_optimizer.lr == lr
            assert fss.gamma_optimizer.l2 == l2
            assert fss.beta_optimizer.lr == lr
            assert fss.beta_optimizer.l2 == l2
            assert fss.logger.getEffectiveLevel() == log_level
        except Exception as e:
            raise RuntimeError("Matmul.build() must succeed with %s" %
                               valid_fss_parameters)

    profiler.disable()
    profiler.print_stats(sort="cumtime")
예제 #5
0
 def build(parameters: Dict):
     """Build an optimizer based on the specification.
     """
     if "lr" in parameters:
         parameters["lr"] = TYPE_FLOAT(parameters["lr"])
         parameters["l2"] = TYPE_FLOAT(parameters["l2"])
     return SGD(**parameters)
def test_020_std_builder_to_succeed():
    """
    Objective:
        Verify the Matmul.build()
    Expected:
        build() parse the spec and succeed
    """
    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        # ----------------------------------------------------------------------
        # Validate the correct specification.
        # NOTE: Invalidate one parameter at a time from the correct one.
        # Otherwise not sure what you are testing.
        # ----------------------------------------------------------------------
        valid_std_parameters = Standardization.specification_template()[_PARAMETERS]
        eps = TYPE_FLOAT(valid_std_parameters["eps"])
        momentum = TYPE_FLOAT(valid_std_parameters["momentum"])
        log_level = valid_std_parameters[_LOG_LEVEL]
        try:
            std: Standardization = Standardization.build(parameters=valid_std_parameters)
            assert std.logger.getEffectiveLevel() == log_level
            assert std.eps == eps
            assert std.momentum == momentum
        except Exception as e:
            raise RuntimeError(
                "Matmul.build() must succeed with %s" % valid_std_parameters
            ) from e

    profiler.disable()
    profiler.print_stats(sort="cumtime")
예제 #7
0
def linear_separable(d: int = 2, n: int = 10000):
    """Generate a data set X to linearly separate.
    Args:
        d: number of dimension of the data
        n: number of data to generate
    Returns:
           X dot W > 0 is True and < 0 for False.
        X: d dimension data (x1, ... xn)
        T: labels. If Xi dot W > 0, then 1 else 0
        W: Vector orthogonal to the linear hyper plane that separates the data.
    """
    assert n >= 10, f"n {n} is too small"
    # Unit vector w of dimension d, dividing by its magnitude
    # Generate X:(N,D) and set bias=1 to x0
    X = np.random.randn(n, d)
    _X = np.c_[np.ones(n),  # Bias
               X].astype(TYPE_FLOAT)

    while True:
        W = TYPE_FLOAT(np.random.randn(d + 1))  # +1 for bias weight w0
        W = W / TYPE_FLOAT(np.linalg.norm(W))

        # Label t = 1 if X dot w > 0 else 0
        T = (np.einsum('ij,j', _X, W) > 0).astype(TYPE_LABEL)

        # Each label has at least 30% of the data
        if 0.3 < np.sum(T[T == 1]) / n < 0.7:
            break

    return X.astype(TYPE_FLOAT), T.astype(TYPE_LABEL), W.astype(TYPE_FLOAT)
예제 #8
0
def test_010_standardize_eps(caplog):
    """
    Objective:
        Verify the standardize() function with eps
    Expected:
        standardize(X) = (X - np.mean(X)) / sqrt(variance + eps)
    """
    name = "test_010_standardize"
    keepdims = True

    # Test eps
    u = TYPE_FLOAT(1e-3)
    for i in range(NUM_MAX_TEST_TIMES):
        eps = np.random.uniform(1e-12, 1e-7)
        N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE)
        M: int = np.random.randint(2, NUM_MAX_NODES)
        X = np.random.uniform(-MAX_ACTIVATION_VALUE, MAX_ACTIVATION_VALUE,
                              (N, M)).astype(TYPE_FLOAT)
        Logger.debug("%s: X \n%s\n", name, X)

        # Constraint: standardize(X) == (X - np.mean(A)) / np.std(X)
        ddof = 1 if N > 1 else 0
        variance = np.var(X, axis=0, keepdims=keepdims, ddof=ddof)
        sd = np.sqrt(variance + eps)
        if np.all(sd > 0):
            # Expected
            mean = np.mean(X, axis=0)
            E = (X - mean) / sd
            # Actual

            if (i % 20) == 0:
                backup = eps
                eps = TYPE_FLOAT(0)  # Test eps == 0 at 5 % of times
                npsd = np.std(X, axis=0, keepdims=keepdims, ddof=ddof)
                if np.all(npsd > TYPE_FLOAT(0)):
                    # **********************************************************************
                    # Constraint: numpy sd matches __sd from standardize()
                    # **********************************************************************
                    A, __mean, __sd, _ = standardize(X,
                                                     keepdims=keepdims,
                                                     eps=eps)
                    assert np.allclose(a=__sd,
                                       b=npsd,
                                       atol=u,
                                       rtol=TYPE_FLOAT(0))
                else:
                    eps = backup
                    continue

            A, __mean, __sd, _ = standardize(X, keepdims=keepdims, eps=eps)

            # **********************************************************************
            # Constraint. mean/sd should be same.
            # **********************************************************************
            assert np.allclose(mean, __mean, atol=u)
            assert np.allclose(sd, __sd, atol=u), \
                "expected sd\n%s\nactual\n%s\ndiff=\n%s\n" % (sd, __sd, (sd - __sd))
            assert np.allclose(E, A, atol=u), \
                f"X\n{X}\nstandardized\n{E}\nndiff\n{A-E}\n"
예제 #9
0
 def __init__(
         self,
         name=_SGD_NAME_DEFAULT,
         lr: TYPE_FLOAT = TYPE_FLOAT(0.01),
         l2: TYPE_FLOAT = TYPE_FLOAT(1e-3),
         log_level=logging.ERROR
 ):
     super().__init__(name=name, lr=lr, l2=l2, log_level=log_level)
def _validate_layer_running_statistics(layer: BatchNormalization, previous_ru,
                                       previous_rsd, X, eps):
    momentum = layer.momentum
    ddof = 1 if X.shape[0] > 1 else 0

    if layer.total_training_invocations == 1:
        assert np.all(layer.RU == layer.U)
        assert np.all(layer.RSD == layer.SD)
        assert layer.total_training_invocations * layer.N == layer.total_rows_processed
    else:
        # ----------------------------------------------------------------------
        # Currently in standardize(), sd[sd==0.0] = 1.0 is implemented.
        # ----------------------------------------------------------------------
        variance = X.var(axis=0, ddof=ddof)
        if eps > TYPE_FLOAT(0.0):
            sd = np.sqrt(variance + eps)
        else:
            sd = np.std(X, axis=0, ddof=ddof)
            sd[sd == TYPE_FLOAT(0.0)] = TYPE_FLOAT(1.0)

        expected_ru = momentum * previous_ru + (TYPE_FLOAT(1) -
                                                momentum) * X.mean(axis=0)
        expected_rsd = momentum * previous_rsd + (TYPE_FLOAT(1) -
                                                  momentum) * sd
        assert np.allclose(layer.RU,
                           expected_ru,
                           atol=TYPE_FLOAT(1e-6),
                           rtol=TYPE_FLOAT(0))
        assert \
            np.allclose(layer.RSD, expected_rsd, atol=TYPE_FLOAT(1e-6), rtol=TYPE_FLOAT(0)), \
            "X=\n%s\nX.sd()=\n%s\nlayer.SD=\n%s\nlayer.RSD=\n%s\n" \
            % (X, X.std(axis=0, ddof=ddof), layer.SD, layer.RSD)
예제 #11
0
    def event_indexing(corpus: str, power: TYPE_FLOAT = TYPE_FLOAT(1)):
        """Generate event indices from a text corpus
        Add meta-events EVENT_NIL at 0 and EVENT_UNK at 1.
        events are all lower-cased.

        Assumptions:
            Meta event EVENT_NIL is NOT included in the corpus

        Args:
            corpus: A string including sentences to process.
            power: parameter to adjust the event probability

        Returns:
            event_to_index: event to index mapping
            index_to_event: index to event mapping
            vocabulary: unique events in the corpus
            probabilities: event occurrence probabilities
        """
        events = Function.standardize(corpus).split()
        # --------------------------------------------------------------------------------
        # Preliminary event probabilities from the standardized event sequence.
        # --------------------------------------------------------------------------------
        _event_probabilities: Dict[str, TYPE_FLOAT] = \
            Function.event_occurrence_probability(events=events, power=power)
        assert _event_probabilities.get(EVENT_NIL.lower(), None) is None, \
            f"EVENT_NIL {EVENT_NIL.lower()} should not be included in the corpus. Change EVENT_NIL"
        del events

        # --------------------------------------------------------------------------------
        # Event probability with NIL, UNK at the top, so that the vocabulary, probabilities
        # both have the same event orders. UNK may be in corpus.
        # --------------------------------------------------------------------------------
        event_to_probability: Dict[str, TYPE_FLOAT] = {
            EVENT_NIL.lower():
            TYPE_FLOAT(0),
            EVENT_UNK.lower():
            _event_probabilities.get(EVENT_UNK.lower(), TYPE_FLOAT(0))
        }
        event_to_probability.update(_event_probabilities)
        del _event_probabilities

        # --------------------------------------------------------------------------------
        # Vocabulary from the keys of probabilities preserving the same event order
        # --------------------------------------------------------------------------------
        vocabulary: List[str] = list(event_to_probability.keys())

        # --------------------------------------------------------------------------------
        # mappings
        # --------------------------------------------------------------------------------
        index_to_event: Dict[TYPE_INT, str] = dict(enumerate(vocabulary))
        event_to_index: Dict[str, TYPE_INT] = dict(
            zip(index_to_event.values(), index_to_event.keys()))

        return event_to_index, index_to_event, vocabulary, event_to_probability
예제 #12
0
def test_010_sigmoid():
    """Test Case for sigmoid
    """
    u = ACTIVATION_DIFF_ACCEPTANCE_VALUE
    assert sigmoid(np.array(TYPE_FLOAT(0),
                            dtype=TYPE_FLOAT)) == TYPE_FLOAT(0.5)
    x = np.array([0.0, 0.6, 0., -0.5]).reshape((2, 2)).astype(TYPE_FLOAT)
    t = np.array(
        [0.5, 0.6456563062257954529091, 0.5,
         0.3775406687981454353611]).reshape((2, 2)).astype(TYPE_FLOAT)
    assert np.all(np.abs(t - sigmoid(x)) < u), \
        f"delta (t-x) is expected < {u} but {x-t}"
def _instance(
        name,
        num_nodes: int,
        momentum: TYPE_FLOAT,
        eps: TYPE_FLOAT = TYPE_FLOAT(0),
        log_level: int = logging.ERROR
):
    return Standardization(
        name=name,
        num_nodes=num_nodes,
        momentum=TYPE_FLOAT(momentum),
        eps=TYPE_FLOAT(eps),
        log_level=log_level
    )
def _validate_layer_values(_layer: Standardization, X, eps):
    ddof = 1 if X.shape[0] > 1 else 0

    # ----------------------------------------------------------------------
    # Currently in standardize(), sd[sd==0.0] = 1.0 is implemented.
    # ----------------------------------------------------------------------
    md = X - X.mean(axis=0)     # md = mean deviation
    variance = X.var(axis=0, ddof=ddof)
    if eps > TYPE_FLOAT(0.0):
        sd = np.sqrt(variance + eps).astype(TYPE_FLOAT)
    else:
        sd = np.std(X, axis=0, ddof=ddof).astype(TYPE_FLOAT)
        sd[sd == TYPE_FLOAT(0.0)] = TYPE_FLOAT(1.0)

    expected_standardized = md / sd
    diff = expected_standardized - _layer.Y

    assert np.allclose(_layer.U, X.mean(axis=0), atol=1e-6, rtol=TYPE_FLOAT(0.0))
    assert np.allclose(_layer.Xmd, md, atol=1e-6, rtol=TYPE_FLOAT(0.0))
    assert np.allclose(_layer.SD, sd, atol=1e-6, rtol=TYPE_FLOAT(0.0))
    assert np.allclose(
        _layer.Y,
        expected_standardized,
        atol=TYPE_FLOAT(1e-6),
        rtol=TYPE_FLOAT(0)
    ), "Xstd\n%s\nexpected_standardized=\n%s\ndiff=\n%s\n" \
       % (_layer.Y, expected_standardized, diff)
예제 #15
0
def linear_separable_sectors(n: int = 10000,
                             d: int = 2,
                             m: int = 3,
                             r: float = 1.0,
                             rotation: float = 0.0):
    """Generate plots X in a circle to be linearly separated into m sectors.
    The sectors are rotated as per the "rotation" parameter.

    Args:
        n: number of coordinates
        d: number of dimension of the data
        m: number of classes (sectors)
        r: radius
        rotation: angle to rotate X
    Returns:
        X: d dimension data (x0, x1, ... xn) of shape (n, d)
        T: labels (0, 1, ...M-1) of m classes
        B: List of the base (sin(θ), cos(θ)) of each section
    """
    assert m > 1, f"m {m} > 1 required to split."
    assert d == 2, "currently only d==2 for 2D (bias, x1, x2) is valid"
    assert n >= m, "At least m instance of coordinates required."

    # Generate plots in the circle
    Z = np.random.uniform(0, 2 * np.pi, n)
    radii = np.random.uniform(0.0, r, n)
    X = np.c_[radii * np.cos(Z), radii * np.sin(Z)].astype(TYPE_FLOAT)
    T = np.zeros(n, dtype=TYPE_LABEL)
    sector = TYPE_FLOAT(2 * np.pi) / TYPE_FLOAT(m)  # angle of a sector

    # The initial vector (1, 0) forms the start angle of the 0th sector.
    B = np.array([[1.0, 0.0]], dtype=TYPE_FLOAT)

    # The label has been already set to 0 for the 0th sector. Hence, the
    # splitting the circle into m sectors starts with the 1st sector.
    for i in range(1, m):  # Start with 1st sector.
        base = (sector * i)  # Start angle of the i-th sector.
        T[is_point_inside_sector(
            X=X, base=base,
            coverage=sector)] = i  # Label i for plots in the i-th sector
        B = np.r_[B,
                  np.array([[np.cos(base), np.sin(base)]])].astype(TYPE_FLOAT)

    # Rotate the circle and the start angles of the sectors
    rotation = TYPE_FLOAT(rotation % (2 * np.pi))
    X = rotate(X, rotation)
    B = rotate(B, rotation)
    return X.astype(TYPE_FLOAT), T.astype(TYPE_LABEL), B.astype(TYPE_FLOAT)
def test_categorical_classifier(
        M: int = 3,
        log_loss_function: Callable = softmax_cross_entropy_log_loss):
    """Test case for layer matmul class
    """
    N = 10
    D = 2
    W = weights.he(M, D + 1)
    optimizer = SGD(lr=TYPE_FLOAT(0.1))
    X, T, V = linear_separable_sectors(n=N, d=D, m=M)
    assert X.shape == (N, D)
    X, T = transform_X_T(X, T)

    def callback(W):
        W

    profiler = cProfile.Profile()
    profiler.enable()

    train_binary_classifier(N=N,
                            D=D,
                            M=M,
                            X=X,
                            T=T,
                            W=W,
                            log_loss_function=log_loss_function,
                            optimizer=optimizer,
                            test_numerical_gradient=True,
                            log_level=logging.WARNING,
                            callback=callback)

    profiler.disable()
    profiler.print_stats(sort="cumtime")
def _test_binary_classifier(
        M: int = 2,
        log_loss_function: Callable = softmax_cross_entropy_log_loss,
        num_epochs: int = 100):
    """Test case for layer matmul class
    """
    N = 50
    D = 2
    W = weights.he(M, D + 1)
    optimizer = SGD(lr=TYPE_FLOAT(0.1))
    X, T, V = linear_separable(d=D, n=N)

    # X, T = transform_X_T(X, T)

    def callback(W):
        return W

    train_binary_classifier(N=N,
                            D=D,
                            M=M,
                            X=X,
                            T=T,
                            W=W,
                            log_loss_function=log_loss_function,
                            optimizer=optimizer,
                            num_epochs=num_epochs,
                            test_numerical_gradient=True,
                            callback=callback)
예제 #18
0
    def X(self, X: Union[float, np.ndarray]):
        """Set layer input X
        1. Convert into 2D array if X is scalar or X.ndim < 2.
        2. Allocate _dX storage.
        3. DO NOT set/update _D as it can be set with the weight shape.
        """
        assert X is not None and \
               ((isinstance(X, np.ndarray) and X.dtype == TYPE_FLOAT) or isinstance(X, float))
        assert np.all(np.isfinite(X)), f"{X}"

        if np.all(np.abs(X) > TYPE_FLOAT(1.0)):
            self.logger.warning("Input data X has not been standardized.")

        if isinstance(X, float):
            self._X = np.array(X)
        elif X.ndim == 1:
            self._X = np.array(X).reshape(1, -1)
        else:
            self._X = X

        assert self.X.size > 0
        self._N = self.X.shape[0]

        # Allocate the storage for np.func(out=dX).
        if self._dX.shape != self.X.shape:
            self._dX = np.empty(self.X.shape, dtype=TYPE_FLOAT)
def test_matmul_bn_relu_classifier(M: int = 3):
    """Test case for layer matmul class
    """
    N = 10
    D = 2
    W = weights.he(M, D + 1)
    optimizer = SGD(lr=TYPE_FLOAT(0.5))
    X, T, V = linear_separable_sectors(n=N, d=D, m=M)
    assert X.shape == (N, D)
    X, T = transform_X_T(X, T)

    def callback(W):
        """Dummy callback"""
        W

    profiler = cProfile.Profile()
    profiler.enable()

    train_matmul_bn_relu_classifier(
        N=N,
        D=D,
        M=M,
        X=X,
        T=T,
        W=W,
        log_loss_function=softmax_cross_entropy_log_loss,
        optimizer=optimizer,
        test_numerical_gradient=True,
        callback=callback)

    profiler.disable()
    profiler.print_stats(sort="cumtime")
def test_020_std_function_method_to_fail():
    """
    Objective:
        Verify the _layer class instance function validates invalid inputs
    Expected:
        Layer method fails.
    """
    for _ in range(NUM_MAX_TEST_TIMES):
        name = random_string(np.random.randint(1, 10))

        # For which works on statistics on per-feature basis,
        # no sense if M = 1 or N = 1.
        M: int = np.random.randint(2, NUM_MAX_NODES)
        momentum = TYPE_FLOAT(0.85)

        try:
            _layer = _instance(name=name, num_nodes=M, momentum=momentum)
            _layer.function(int(1))
            raise RuntimeError("Invoke _layer.function(int(1)) must fail.")
        except AssertionError:
            pass

        try:
            _layer = _instance(name=name, num_nodes=M, momentum=momentum)
            _layer.gradient(int(1))
            raise RuntimeError("Invoke _layer.gradient(int(1)) must fail.")
        except AssertionError:
            pass
예제 #21
0
    def load(self, path: str) -> List:
        """Load and restore the layer state
        Args:
            path: state file path
        """
        state = super().load(path)
        del self._vocabulary, \
            self._probabilities, \
            self._event_to_index, \
            self._event_to_probability

        self._vocabulary = state[0]
        self._probabilities = state[1]
        self._event_to_index = state[2]
        self._event_to_probability = state[3]

        assert \
            isinstance(self.event_to_index, dict) and \
            isinstance(self.event_to_probability, dict) and \
            self.event_to_index[EVENT_NIL.lower()] == 0 and \
            self.event_to_index[EVENT_UNK.lower()] == 1 and \
            self.event_to_probability[EVENT_NIL.lower()] == 0 and \
            self.vocabulary[0] == EVENT_NIL.lower() and \
            self.probabilities[0] == TYPE_FLOAT(0) and \
            self.probabilities[1] == self.event_to_probability[EVENT_UNK.lower()]

        return self.S
예제 #22
0
    def build(parameters: Dict):
        assert (_NAME in parameters and _NUM_NODES in parameters)

        return ReLU(name=parameters[_NAME],
                    num_nodes=parameters[_NUM_NODES],
                    slope=parameters["slope"]
                    if "slope" in parameters else TYPE_FLOAT(0),
                    log_level=parameters[_LOG_LEVEL]
                    if _LOG_LEVEL in parameters else logging.ERROR)
예제 #23
0
 def list_probabilities(self, events: Iterable[str]) -> Iterable[TYPE_FLOAT]:
     """Provides probabilities of events
     Args:
         events: events to get the indices
     """
     return [
         self.event_to_probability.get(event, TYPE_FLOAT(0))
         for event in events
     ]
예제 #24
0
    def __init__(
            self,
            name,
            lr=TYPE_FLOAT(0.01),
            l2: TYPE_FLOAT = TYPE_FLOAT(1e-3),
            log_level=logging.WARNING
    ):
        """
        Args:
            lr: learning rate of the gradient descent
            l2: L2 regularization hyper parameter, e.g. 1e-3, set to 0 not to use it
        """
        super().__init__(name=name, log_level=log_level)
        assert isinstance(lr, TYPE_FLOAT) and isinstance(l2, TYPE_FLOAT)
        self.lr = lr
        self.l2 = l2

        self._logger = logging.getLogger(name)
        self._logger.setLevel(logging._levelToName[log_level])
예제 #25
0
def sets_in_circles(radius: TYPE_FLOAT,
                    ratio: TYPE_FLOAT = 1.0,
                    m: int = 3,
                    n: int = 10000):
    """Generate m set of coordinates where each set forms plots in a circle
    Args:
        radius: circle radius
        ratio: how far to locate a new centre of a circle
        m: number of circles
        n: Number of points in a circle
    Returns:
        circles: coordinates of circles. Shape (m, n, d). (n, d) per circle
        centre: coordinates of the centre of each circle. Shape (m, 2)
    """
    assert 2 <= m <= n and ratio > 0 and radius > 0.0

    radius = TYPE_FLOAT(radius)
    d = 2  # circle is in 2D
    circle = set_in_a_radius(radius=radius, d=d, n=n)

    # Generate a new circle by shifting the centre of the "circle" to a "centre".
    # The coordinate of the new centre = rotate(base, step * i).
    base = np.array([radius * ratio, TYPE_FLOAT(0)])
    step = (2 * np.pi) / (m - 1)
    step = step if step < (np.pi / 2) else np.pi / m

    def __rotate(angle):
        return rotate(X=base, radian=angle)

    centres = list(map(__rotate, [step * i for i in range(0, m - 1, 1)]))
    centres.insert(0, np.array([0.0, 0.0]))  # add the original circle
    mean = np.mean(np.array(centres), axis=0)
    centres = np.array([_centre - mean for _centre in centres])

    def __relocate(location):
        return shift(X=circle, offsets=location)

    circles = np.array(list(map(__relocate, centres)))
    assert circles.shape == (m, n, d), f"{circles.shape}\n{circles}"
    assert centres.shape == (m, 2)

    return circles, centres
def _must_fail(
        name: str,
        num_nodes: int,
        momentum: TYPE_FLOAT = 0.9,
        eps=TYPE_FLOAT(0),
        log_level: int = logging.ERROR,
        msg: str = ""
):
    assert msg
    try:
        Standardization(
            name=name,
            num_nodes=num_nodes,
            momentum=TYPE_FLOAT(momentum),
            eps=TYPE_FLOAT(eps),
            log_level=log_level
        )
        raise RuntimeError(msg)
    except AssertionError:
        pass
예제 #27
0
    def gradient_numerical(
        self,
        h: TYPE_FLOAT = TYPE_FLOAT(1e-5)) -> List[Union[float, np.ndarray]]:
        GN: Union[float, np.ndarray] = super().gradient_numerical()[0]

        # Analytical gradient dL/dX is (P-T)/N whose range is [-1,1].
        # Numerical gradient GN should not be far away from the boundary.
        assert np.all(np.abs(GN) < LOG_LOSS_GRADIENT_ACCEPTANCE_VALUE), \
            "%s: numerical dL/dX needs between (-1.2, 1.2) but \n%s\n"\
            % (f"Layer[{self.name}].gradient_numerical()", GN)

        return [GN]
def test_020_fss_method_predict():
    """
    Objective:
        Verify the prediction function
    Expected:
        The objective
    """

    # pylint: disable=not-callable
    def objective(X: np.ndarray) -> Union[float, np.ndarray]:
        """Dummy objective function"""
        return np.sum(X, dtype=TYPE_FLOAT)

    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        name = random_string(np.random.randint(1, 10))
        numexpr_enabled = bool(np.random.randint(0, 2))

        # For BN which works on statistics on per-feature basis,
        # no sense if M = 1 or N = 1.
        N: int = np.random.randint(2, NUM_MAX_BATCH_SIZE)
        M: int = np.random.randint(2, NUM_MAX_NODES)

        X = np.random.randn(N, M).astype(TYPE_FLOAT)

        _layer = _instance(name=name, num_nodes=M, log_level=logging.DEBUG)
        _layer.objective = objective
        Y = _layer.function(
            X,
            numexpr_enabled=numexpr_enabled,
        )
        # ********************************************************************************
        # Constraint: With only 1 invocation, predict should be the same with Y.
        # ********************************************************************************
        assert np.allclose(Y,
                           _layer.predict(X),
                           atol=TYPE_FLOAT(1e-9),
                           rtol=TYPE_FLOAT(0))
예제 #29
0
    def train(self,
              X: Union[TYPE_FLOAT, np.ndarray],
              T: Union[TYPE_FLOAT, np.ndarray],
              run_validations: bool = False):
        """Run the model training.
        1. Set labels T to both inference and objective layers although only the
           objective layers would need labels.
        2. Invoke function() on the layers to run forward path through layers.
        3. Invoke gradient() on the layers to calculate and back-propagate the
           gradients through layers.
        4. Invoke update() on the layers to run gradient descents.

        Args:
            X: batch training data in shape (N,M)
            T: label of shape (N,) in the index format
            run_validations: Flat if run the validations e.g. numerical gradient check
        Returns:
            Model S: Updated state of the network
        """
        # self.X = X.astype(TYPE_FLOAT)
        self.X = X
        self.T = T.astype(TYPE_LABEL)  # Set T to the layers. See @T.setter

        # --------------------------------------------------------------------------------
        # Forward path
        # --------------------------------------------------------------------------------
        # pylint: disable=not-callable
        self._Y = self.function(self.X).astype(TYPE_FLOAT)
        self._L = self.objective(self.Y).astype(TYPE_FLOAT)
        self._history.append(self.L)

        # --------------------------------------------------------------------------------
        # Backward path
        # --------------------------------------------------------------------------------
        self._dX = self.gradient(TYPE_FLOAT(1)).astype(TYPE_FLOAT)

        # --------------------------------------------------------------------------------
        # Gradient descent
        # --------------------------------------------------------------------------------
        self.update()

        # --------------------------------------------------------------------------------
        # Info
        # --------------------------------------------------------------------------------
        self.logger.info("Network[%s]: Loss is %s", self.name, self.L)
        self.logger.info("Gradient dL/dX is %s", self.dX)
        self.logger.info("Analytical gradients dS are %s\n", self.dS)

        # TODO:
        #  Reconsider using dictionary with layer ids as keys instead of list.
        #  Think again why need to return dS and how it could be used.
        return self.dS
예제 #30
0
def _instantiate(name: str, num_nodes: int, num_features: int, objective=None):
    category = TYPE_FLOAT(np.random.uniform())
    if category < 0.3:
        W = weights.he(num_nodes, num_features + 1)
    elif category < 0.7:
        W = weights.xavier(num_nodes, num_features + 1)
    else:
        W = weights.uniform(num_nodes, num_features + 1)

    matmul = Matmul(name=name, num_nodes=num_nodes, W=W)
    if objective is not None:
        matmul.objective = objective
    return matmul