Example #1
1
    def predictedPoint(self, x, y, model, coords, values, invg):
        """Prediction of the Big Kriging for a point \o/

        Parameters
        ----------
        x, y : floats
               coordinates of the desired predicted point
        model : Model
                what model to use (and not your favorite color!)
        coords : ndarray
                 original grid coordinates
        values : ndarray
                 original grid values, ordered like coords
        invg : the resulting inverse gamma matrix based on model and coords

        Returns
        ----------
        array(x,y,v,e)
            x, y : coordinates of the desired predicted point
            v    : the predicted value
            e    : the standard error

        """
        dist = spatial.distance_matrix(coords, [[x, y]])
        gg = np.matrix(np.vstack([model.func(dist), [1]]))
        weights = invg * gg
        v = np.sum(values[:, np.newaxis] * np.asarray(weights[:-1]))
        e = np.sqrt(abs(np.sum(gg.A1 * weights.A1)))
        return np.asarray([x, y, v, e])
Example #2
1
def biKmeans(dataSet, k, distMeas=distEuclid):
    m = np.shape(dataSet)[0]
    clusterAssment = np.mat(np.zeros((m, 2)))
    centroid0 = np.mean(dataSet, axis=0).tolist()[0]
    centList = [centroid0]
    for j in range(m):
        clusterAssment[j, 1] = distMeas(np.mat(centroid0), dataSet[j, :]) ** 2
    while len(centList) < k:
        lowestSSE = np.inf
        # try splitting every cluster
        for i in range(len(centList)):
            ptsInCurrCluster = dataSet[np.nonzero(clusterAssment[:, 0].A == i)[0], :]
            centroidMat, splitClustAss = KMeans(ptsInCurrCluster, 2, distMeas)
            sseSplit = np.sum(splitClustAss[:, 1])
            sseNotSplit = np.sum(clusterAssment[np.nonzero(clusterAssment[:, 0].A != i)[0], 1])
            print "sseSplit, and notSplit:", sseSplit, sseNotSplit
            if (sseSplit + sseNotSplit) < lowestSSE:
                bestCentToSplit = i
                bestNewCents = centroidMat
                bestClustAss = splitClustAss.copy()
                lowestSSE = sseSplit + sseNotSplit
        # update the cluster assignment
        bestClustAss[np.nonzero(bestClustAss[:, 0].A == 1)[0], 0] = len(centList)
        bestClustAss[np.nonzero(bestClustAss[:, 0].A == 0)[0], 0] = bestCentToSplit
        print "the bestCentToSplit is :", bestCentToSplit
        print "the len of bestClustAss is:", len(bestClustAss)
        # kmeans split the dataset into two part, the first part for the class0 denoted as the best cluster splitting,
        # the new class as the append new class in the centList
        centList[bestCentToSplit] = bestNewCents[0, :]
        centList.append(bestNewCents[1, :])
        clusterAssment[np.nonzero(clusterAssment[:, 0].A == bestCentToSplit)[0], :] = bestClustAss
    return centList, clusterAssment
def correlateTimeseries(A, B):

    # Convert the time series to relative time
    aDate = A["date"] - A["date"].iat[0]
    bDate = B["date"] - B["date"].iat[0]

    # Prepare indices for matched data points
    datesMatched = np.searchsorted(aDate, bDate)
    l = len(aDate) - 1
    datesMatched[datesMatched > l] = l
    c = dict()
    keyword = "price"
    # Select data according to matched indices
    a = np.array(A[keyword].values)
    aReduced = a[datesMatched]
    bReduced = np.array(B[keyword].values)
    # Correct to the baseline
    aReduced = aReduced - np.mean(aReduced)
    bReduced = bReduced - np.mean(bReduced)
    # Perform the z-transformation
    zA = aReduced / np.sqrt(np.sum(np.square(aReduced)) / l)
    zB = bReduced / np.sqrt(np.sum(np.square(bReduced)) / l)
    # Calculate the correlation
    r = pearsonr(zA, zB)
    return r[1]
Example #4
1
    def train(self, batchdata, nepochs, cdsteps=1, passthroughs=None):
        errors = N.empty(nepochs)
        numcases, numvis, numbatches = batchdata.shape
        for epoch in xrange(nepochs):
            vishidinc = N.zeros((self._numvis, self._numhid))
            hidbiasinc = N.zeros((self._numhid))
            visbiasinc = N.zeros((self._numvis))
            errsum = 0
            for batch in xrange(batchdata.shape[2]):
                data = batchdata[:, :, batch]
                if passthroughs:
                    for rbm in passthroughs:
                        data = rbm.passthrough(data)
                # Start of positive phase
                poshidprobs, poshidstates = self._hid_activate(self._vishid, self._hidbiases, data, meanfield=False)

                posprods = N.dot(data.T, poshidprobs)
                poshidact = N.sum(poshidprobs, axis=0)
                posvisact = N.sum(data, axis=0)

                # Start negative phase
                if cdsteps == 1:
                    negdata = self._vis_activate(self._vishid.T, self._visbiases, poshidstates, meanfield=True)
                    neghidprobs = self._hid_activate(self._vishid, self._hidbiases, negdata)[1]
                else:
                    neghidstates = poshidstates
                    for i in xrange(cdsteps):
                        negdata, negvisstates = self._vis_activate(self._vishid.T, self._visbiases, neghidstates)

                        neghidstates, neghidprobs = self._hid_activate(self._vishid, self._hidbiases, negvisstates)

                negprods = N.dot(negdata.T, neghidprobs)
                neghidact = N.sum(neghidprobs, axis=0)
                negvisact = N.sum(negdata, axis=0)

                # End of negative phase
                err = N.sum((data - negdata) ** 2)
                errsum = errsum + err
                if epoch + self._trainedfor > 5:
                    momentum = self._finalmomentum
                else:
                    momentum = self._initialmomentum

                # Updates

                vishidinc = momentum * vishidinc + self._epsilonw * (
                    (posprods - negprods) / numcases - self._weightcost * self._vishid
                )
                visbiasinc = momentum * visbiasinc + (self._epsilonvb / numcases) * (posvisact - negvisact)
                hidbiasinc = momentum * hidbiasinc + (self._epsilonhb / numcases) * (poshidact - neghidact)

                self._vishid += vishidinc
                self._visbiases += visbiasinc
                self._hidbiases += hidbiasinc
                if passthroughs:
                    del data
            print "epoch %5d, error = %10.5f" % (epoch + 1 + self._trainedfor, errsum)
            errors[epoch] = errsum
        self._trainedfor += nepochs
        return errors
    def predict(self, X):
        """
		This method makes a nearest neighbor prediction on test data.
		Refer to the predict_soft doc string for a description of X.
		"""
        tr_r, tr_c = np.asmatrix(self.X_train).shape  # get size of training data
        te_r, te_c = np.asmatrix(X).shape  # get size of test data
        assert te_c == tr_c, "Training and prediction data must have same number of features"

        num_classes = len(self.classes)
        Y_te = np.tile(self.Y_train[0], (te_r, 1))  # make Y_te same data type as Y_train
        K = min(self.K, tr_r)  # can't have more neighbors than there are training data points
        for i in range(te_r):  # for each test example...
            # ...compute sum of squared differences...
            dist = np.sum(np.power(self.X_train - np.asmatrix(X)[i, :], 2), axis=1)
            # ...find neares neighbors over training data and keep nearest K data points
            sorted_dist = np.sort(dist, axis=0)[0:K]
            indices = np.argsort(dist, axis=0)[0:K]
            wts = np.exp(-self.alpha * sorted_dist)
            count = []
            for c in range(len(self.classes)):
                # total weight of instances of that classes
                count.append(np.sum(wts[self.Y_train[indices] == self.classes[c]]))
            count = np.asarray(count)
            c_max = np.argmax(count)  # find largest count...
            Y_te[i] = self.classes[c_max]  # ...and save results
        return Y_te
def compareImplementations2():
    (x, y) = DataModel.loadData("..\\train.csv")

    y = y.astype(int)

    (x_train, x_cv, y_train, y_cv) = DataModel.splitData(x, y)

    x_sub = x_train[:500, :]
    y_sub = y_train[:500]

    s_my = SimpleNN2.NeuralNetConfig(784, 70, 10)
    s_t = NN_1HL.NN_1HL(reg_lambda=10, opti_method="CG")

    np.random.seed(123)

    thetas = [s_t.rand_init(784, 70), s_t.rand_init(70, 10)]

    # Check costs
    cost_t = s_t.function(s_t.pack_thetas(thetas[0].copy(), thetas[1].copy()), 784, 70, 10, x_sub, y_sub, 10)
    print("Cost test: ", cost_t)

    cost_my = SimpleNN2.computeCost(s_my, thetas[0], thetas[1], x_sub, y_sub, 10)
    print("Cost my: ", cost_my)

    # Check gradients
    grad_t = s_t.function_prime(s_t.pack_thetas(thetas[0].copy(), thetas[1].copy()), 784, 70, 10, x_sub, y_sub, 10)
    print("Grad sum test: ", np.sum(grad_t))

    grad_my1, grad_my2 = SimpleNN2.computeGrad(s_my, thetas[0], thetas[1], x_sub, y_sub, 10)
    print("Grad sum my: ", np.sum(grad_my1) + np.sum(grad_my2))
Example #7
1
def detect_nan(i, node, fn):
    for output in fn.outputs:
        if not isinstance(output[0], np.random.RandomState) and not (
            hasattr(node, "op")
            or isinstance(
                node.op, (theano.sandbox.rng_mrg.GPU_mrg_uniform, theano.sandbox.cuda.basic_ops.GpuAllocEmpty)
            )
        ):
            try:
                has_nans = np.isnan(output[0]).any() or np.isinf(output[0]).any()
            except TypeError:
                has_nans = False
            if not has_nans:
                continue
            print ("*** NaN detected ***")
            theano.printing.debugprint(node, depth=3)
            print (type(node), node.op, type(node.op))
            print ("Inputs : %s" % [input[0] for input in fn.inputs])
            print "Input shape", [input[0].shape for input in fn.inputs]
            print ("Outputs: %s" % [output[0] for output in fn.outputs])
            print "Output shape", [output[0].shape for output in fn.outputs]
            print "NaN # :", [np.sum(np.isnan(output[0])) for output in fn.outputs]
            print "Inf # :", [np.sum(np.isinf(output[0])) for output in fn.outputs]
            print "NaN location: ", np.argwhere(np.isnan(output[0])), ", Inf location: ", np.argwhere(
                np.isinf(output[0])
            )
            import pdb

            pdb.set_trace()
            raise ValueError
Example #8
1
def computeCost(theta, X, y, l=0):
    m = y.size
    term1 = np.dot(-np.array(y).T, np.log(h(theta, X)))
    term2 = np.dot((1 - np.array(y)).T, np.log(1 - h(theta, X)))
    regterm = (l / 2) * np.sum(np.dot(theta[1:].T, theta[1:]))
    J = float((1.0 / m) * (np.sum(term1 - term2) + regterm))
    return J
Example #9
0
 def test_bandpassZPHSHVsPitsa(self):
     """
     Test Butterworth zero-phase bandpass filter against Butterworth
     zero-phase bandpass filter of PITSA. Note that the corners value is
     twice the value of the filter sections in PITSA. The rms of the
     difference between ObsPy and PITSA tends to get bigger with higher
     order filtering.
     Note: The Zero-Phase filters deviate from PITSA's zero-phase filters
     at the end of the trace! The rms for the test is calculated omitting
     the last 200 samples, as this part of the trace is assumed to
     generally be of low interest/importance.
     """
     # load test file
     file = os.path.join(self.path, "rjob_20051006.gz")
     f = gzip.open(file)
     data = np.loadtxt(f)
     f.close()
     # parameters for the test
     samp_rate = 200.0
     freq1 = 5
     freq2 = 10
     corners = 2
     # filter trace
     datcorr = bandpass(data, freq1, freq2, df=samp_rate, corners=corners, zerophase=True)
     # load pitsa file
     file = os.path.join(self.path, "rjob_20051006_bandpassZPHSH.gz")
     f = gzip.open(file)
     data_pitsa = np.loadtxt(f)
     f.close()
     # calculate normalized rms
     rms = np.sqrt(np.sum((datcorr[:-200] - data_pitsa[:-200]) ** 2) / np.sum(data_pitsa[:-200] ** 2))
     self.assertEqual(rms < 1.0e-05, True)
Example #10
0
def loglike_func(hyper, local, dat_fixm, dat_varm):
    sigma_like_R0, sigma_like_R1 = split_group(hyper, local)

    # fix mass
    Rob0 = dat_fixm[:, 2]
    Rerr0 = dat_fixm[:, 3]
    Rth0 = local[0:n_fixm]

    L0 = (
        0.0
        - 0.5 * np.sum((Rob0 - Rth0) ** 2.0 / (Rerr0 ** 2.0 + sigma_like_R0 ** 2.0))
        - 0.5 * np.sum(np.log(Rerr0 ** 2.0 + sigma_like_R0 ** 2.0))
    )

    # variable mass
    Mob1 = dat_varm[:, 0]
    Merr1 = dat_varm[:, 1]
    Rob1 = dat_varm[:, 2]
    Rerr1 = dat_varm[:, 3]
    Mth1 = local[n_fixm : n_fixm + n_varm]
    Rth1 = local[n_fixm + n_varm :]

    L1 = (
        0.0
        - 0.5 * np.sum((Mob1 - Mth1) ** 2.0 / Merr1 ** 2.0)
        - 0.5 * np.sum((Rob1 - Rth1) ** 2.0 / (Rerr1 ** 2.0 + sigma_like_R1 ** 2.0))
        - 0.5 * np.sum(np.log(Rerr1 ** 2.0 + sigma_like_R1 ** 2.0))
    )

    L = L0 + L1
    return L / temperature
Example #11
0
def svm_loss(x, y):
    """
  Computes the loss and gradient using for multiclass SVM classification.

  Inputs:
  - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
    for the ith input.
  - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
    0 <= y[i] < C

  Returns a tuple of:
  - loss: Scalar giving the loss
  - dx: Gradient of the loss with respect to x
  """
    N = x.shape[0]
    correct_class_scores = x[np.arange(N), y]
    margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0)
    margins[np.arange(N), y] = 0
    loss = np.sum(margins) / N
    num_pos = np.sum(margins > 0, axis=1)
    dx = np.zeros_like(x)
    dx[margins > 0] = 1
    dx[np.arange(N), y] -= num_pos
    dx /= N
    return loss, dx
Example #12
0
File: gmm.py Project: kslin/CS181
def gm_assign_to_cluster(X, center_list, cov_list, p_k):
    """Assigns each sample to one of the Gaussian clusters given.
    
    Returns an array with numbers, 0 corresponding to the first cluster in the
    cluster list.
    """
    # Reused code from E-step, should be unified somehow:
    samples = X.shape[0]
    K = len(center_list)
    log_p_Xn_mat = np.zeros((samples, K))
    for k in range(K):
        log_p_Xn_mat[:, k] = logmulnormpdf(X, center_list[k], cov_list[k]) + np.log(p_k[k])
    pmax = np.max(log_p_Xn_mat, axis=1)
    log_p_Xn = pmax + np.log(np.sum(np.exp(log_p_Xn_mat.T - pmax), axis=0).T)
    logL = np.sum(log_p_Xn)

    log_p_nk = np.zeros((samples, K))
    for k in range(K):
        # log_p_nk[:,k] = logmulnormpdf(X, center_list[k], cov_list[k]) + np.log(p_k[k]) - log_p_Xn
        log_p_nk[:, k] = log_p_Xn_mat[:, k] - log_p_Xn

    print log_p_nk
    # Assign to cluster:
    maxP_k = np.c_[np.max(log_p_nk, axis=1)] == log_p_nk
    # print np.max(log_p_nk, axis=1)
    maxP_k = maxP_k * (np.array(range(K)) + 1)
    return np.sum(maxP_k, axis=1) - 1
Example #13
0
def cubic(c, d):
    """
    Solve x**3 + c * x + d = 0
    """

    c = c.astype(np.complex)
    d = d.astype(np.complex)

    q = c / 3.0
    r = -d / 2.0

    delta = q ** 3 + r ** 2

    pos = delta >= 0.0

    s = np.zeros(c.shape, dtype=np.complex)
    t = np.zeros(c.shape, dtype=np.complex)

    if np.sum(pos) > 0:
        s[pos], t[pos] = delta_pos(r[pos], delta[pos])

    if np.sum(~pos) > 0:
        s[~pos], t[~pos] = delta_neg(r[~pos], q[~pos])

    x1 = s + t
    x2 = -(s + t) / 2.0 + np.sqrt(3.0) / 2.0 * (s - t) * np.complex(0.0, 1.0)
    x3 = -(s + t) / 2.0 - np.sqrt(3.0) / 2.0 * (s - t) * np.complex(0.0, 1.0)

    return x1, x2, x3
Example #14
0
def reldiff(a, b):
    diff = np.sum(np.abs(a - b))
    norm = np.sum(np.abs(a))
    if diff == 0:
        return 0
    reldiff = diff / norm
    return reldiff
Example #15
0
def gradient_NL(state):  # first of the gradient.

    Nsteps = int(state.total_time / state.dt)
    Nb = np.shape(state.basisNL)[0]  # number of basis functions.
    dt = state.dt

    gr_NL = np.zeros((state.Ng * Nb, Nsteps), dtype="float")

    MP = MembPot(state)
    MP12 = subMembPot(state)  # contains membrane potential in group before NL

    for g in range(state.Ng):  # loop over compartments/groups

        u = MP12[g, :]

        for i in range(Nb - 1):  # for loop to create a stack of NB times MP12[g,:]

            u = np.vstack((MP12[g, :], u))

        gr_NL[g * Nb : (g + 1) * Nb, :] = applyNL_2d(state.basisNL, u, state)  # apply NL to stack.

    sptimes = np.around(np.array(state.output[0]) / state.dt)  # conversion to timestep
    sptimes = sptimes.astype("int")  # has to be int to be an array of indices.
    lambd = np.exp(MP)  # MP contains the threshold.

    gr_NL = np.sum(gr_NL[:, sptimes], axis=1) - dt * np.sum(gr_NL * lambd, axis=1)

    # Before summation, gr_NL is the gradient of the membrane potential.

    return gr_NL
Example #16
0
    def test_update_beliefs_disconnected(self):
        a = DiscreteFactor([(1, 2), (2, 2)], data=np.array([[1, 2], [3, 4]], dtype=np.float64))
        b = DiscreteFactor([(2, 2), (3, 2)], data=np.array([[1, 2], [3, 4]], dtype=np.float64))
        c = DiscreteFactor([(4, 2), (5, 2)], data=np.array([[5, 6], [8, 9]], dtype=np.float64))
        d = DiscreteFactor([(5, 2), (6, 2)], data=np.array([[1, 6], [2, 3]], dtype=np.float64))
        e = DiscreteFactor([(7, 2), (8, 2)], data=np.array([[2, 1], [2, 3]], dtype=np.float64))

        model = Model([a, b, c, d, e])
        for factor in model.factors:
            print "before", factor, np.sum(factor.data)

        update_order = DistributeCollectProtocol(model)
        inference = LoopyBeliefUpdateInference(model, update_order=update_order)

        exact_inference = ExhaustiveEnumeration(model)
        exhaustive_answer = exact_inference.calibrate().belief
        print "Exhaust", np.sum(exhaustive_answer.data)

        change = inference.calibrate()
        print change

        for factor in model.factors:
            print factor, np.sum(factor.data)

        for variable in model.variables:
            marginal_beliefs = inference.get_marginals(variable)
            true_marginal = exhaustive_answer.marginalize([variable])
            for marginal in marginal_beliefs:
                assert_array_almost_equal(true_marginal.normalized_data, marginal.normalized_data)

        expected_ln_Z = np.log(exhaustive_answer.data.sum())
        self.assertAlmostEqual(expected_ln_Z, inference.partition_approximation())
Example #17
0
    def area(self, mz=None, method="shoelace"):
        data = self.as_poly(mz)

        # filter out any points that have a nan
        fdata = data[~np.isnan(data).any(1)]

        if method == "shoelace":
            # up to 5e-10 diff from shoelace-slow
            csum = np.sum(np.fliplr(np.roll(fdata, 1, axis=0)) * fdata, axis=0)
            return 0.5 * np.abs(csum[0] - csum[1])
        elif method == "shoelace-slow":
            csum = 0
            x, y = fdata[-1, :]
            for i in fdata:
                csum += i[0] * y - i[1] * x
                x, y = i
            return abs(csum / 2.0)
        elif method == "trapezoid":
            # http://en.wikipedia.org/wiki/trapezoidal_rule#non-uniform_grid
            # todo: this essentially ignores baseline data?
            # fdata[:, 1][fdata[:, 1] < 0] = 0
            # y = convolve(fdata[:, 1], [0.5, 0.5], mode='valid')

            # y = convolve(np.abs(fdata[:, 1]), [0.5, 0.5], mode='valid')

            y = convolve(fdata[:, 1], [0.5, 0.5], mode="valid")
            if y.shape[0] != fdata.shape[0] - 1:
                return 0
            return np.sum(np.diff(fdata[:, 0]) * y)
        elif method == "sum":
            return np.sum(fdata[:, 1])
Example #18
0
    def main(self):
        global weights, densities, weighted_densities
        plt.figure()

        cluster = clusters.SingleStation()
        self.station = cluster.stations[0]

        R = np.linspace(0, 100, 100)
        densities = []
        weights = []
        for E in np.linspace(1e13, 1e17, 10000):
            relative_flux = E ** -2.7
            Ne = 10 ** (np.log10(E) - 15 + 4.8)
            self.ldf = KascadeLdf(Ne)
            min_dens = self.calculate_minimum_density_for_station_at_R(R)

            weights.append(relative_flux)
            densities.append(min_dens)
        weights = np.array(weights)
        densities = np.array(densities).T

        weighted_densities = np.sum(weights * densities, axis=1) / np.sum(weights)
        plt.plot(R, weighted_densities)
        plt.yscale("log")
        plt.ylabel("Min. density [m^{-2}]")
        plt.xlabel("Core distance [m]")
        plt.axvline(5.77)
        plt.show()
def compareImplementations():
    (x, y) = DataModel.loadData("..\\train.csv")

    y = y.astype(int)

    (x_train, x_cv, y_train, y_cv) = DataModel.splitData(x, y)

    x_sub = x_train[:500, :]
    y_sub = y_train[:500]

    s_my = SimpleNN.SimpleNN([784, 70, 10])
    s_t = NN_1HL.NN_1HL(reg_lambda=1, opti_method="CG")

    np.random.seed(123)

    thetas = [s_t.rand_init(784, 70), s_t.rand_init(70, 10)]

    cost_t = s_t.function(s_t.pack_thetas(thetas[0].copy(), thetas[1].copy()), 784, 70, 10, x_sub, y_sub, 10)
    grad_t = s_t.function_prime(s_t.pack_thetas(thetas[0], thetas[1]), 784, 70, 10, x_sub, y_sub, 10)
    print(cost_t, np.sum(grad_t))

    cost_my = s_my.computeCost(s_my.combineTheta(thetas.copy()), x_sub, y_sub, 10)
    grad_my = s_my.computeGrad(s_my.combineTheta(thetas), x_sub, y_sub, 10)

    print(cost_my, np.sum(grad_my))
def check_class_weight_balanced_linear_classifier(name, Classifier):
    """Test class weights with non-contiguous class labels."""
    X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
    y = np.array([1, 1, 1, -1, -1])

    with warnings.catch_warnings(record=True):
        classifier = Classifier()
    if hasattr(classifier, "n_iter"):
        # This is a very small dataset, default n_iter are likely to prevent
        # convergence
        classifier.set_params(n_iter=1000)
    set_random_state(classifier)

    # Let the model compute the class frequencies
    classifier.set_params(class_weight="balanced")
    coef_balanced = classifier.fit(X, y).coef_.copy()

    # Count each label occurrence to reweight manually
    n_samples = len(y)
    n_classes = float(len(np.unique(y)))

    class_weight = {1: n_samples / (np.sum(y == 1) * n_classes), -1: n_samples / (np.sum(y == -1) * n_classes)}
    classifier.set_params(class_weight=class_weight)
    coef_manual = classifier.fit(X, y).coef_.copy()

    assert_array_almost_equal(coef_balanced, coef_manual)
Example #21
0
 def test_lowpassVsPitsa(self):
     """
     Test Butterworth lowpass filter against Butterworth lowpass filter of
     PITSA. Note that the corners value is twice the value of the filter
     sections in PITSA. The rms of the difference between ObsPy and PITSA
     tends to get bigger with higher order filtering.
     """
     # load test file
     file = os.path.join(self.path, "rjob_20051006.gz")
     f = gzip.open(file)
     data = np.loadtxt(f)
     f.close()
     # parameters for the test
     samp_rate = 200.0
     freq = 5
     corners = 4
     # filter trace
     datcorr = lowpass(data, freq, df=samp_rate, corners=corners)
     # load pitsa file
     file = os.path.join(self.path, "rjob_20051006_lowpass.gz")
     f = gzip.open(file)
     data_pitsa = np.loadtxt(f)
     f.close()
     # calculate normalized rms
     rms = np.sqrt(np.sum((datcorr - data_pitsa) ** 2) / np.sum(data_pitsa ** 2))
     self.assertEqual(rms < 1.0e-05, True)
Example #22
0
        def function(x, kwargs):
            """Computes part of the likelihood function that has
            terms containing `tau`.
            """

            data = kwargs["data"]
            zeta = kwargs["zeta"]
            pi = kwargs["pi"]
            j = kwargs["j"]

            func = np.zeros(zeta.estim[:, 1].shape, dtype=float)
            # loop over replicates
            for r in xrange(data.R):
                F = (
                    gammaln(data.value[j][r] + pi.value[j] * x)
                    + gammaln(data.total[j][r] - data.value[j][r] + (1 - pi.value[j]) * x)
                    - gammaln(data.total[j][r] + x)
                    + gammaln(x)
                    - gammaln(pi.value[j] * x)
                    - gammaln((1 - pi.value[j]) * x)
                )
                func += np.sum(F, 1)

            F = -1.0 * np.sum(zeta.estim[:, 1] * func)
            return F
Example #23
0
def initialGauss(data):
    size = np.shape(data)

    xSlice = np.sum(data, 0)
    ySlice = np.sum(data, 1)
    x0 = np.argmax(xSlice)
    y0 = np.argmax(ySlice)
    offset = np.nanmin(data)
    peak = np.nanmax(data)
    amplitude = peak - offset

    a = 0
    xOff = np.nanmin(xSlice)
    maxX = np.nanmax(xSlice) - xOff
    for i in range(len(xSlice)):
        if xSlice[i] - xOff > 0.5 * maxX:
            a += 1
    b = 0
    yOff = np.nanmin(ySlice)
    maxY = np.nanmax(ySlice) - yOff
    for i in range(len(ySlice)):
        if ySlice[i] - yOff > 0.5 * maxY:
            b += 1

    return [x0, y0, a, b, amplitude, offset]
Example #24
0
        def hessian(x, kwargs):
            """Computes hessian of the likelihood function with respect to `tau`.
            """

            data = kwargs["data"]
            zeta = kwargs["zeta"]
            pi = kwargs["pi"]
            j = kwargs["j"]

            # loop over replicates
            hess = np.empty((1,), dtype="float")
            hf = np.zeros(zeta.estim[:, 1].shape, dtype=float)
            for r in xrange(data.R):
                f = (
                    pi.value[j] ** 2 * polygamma(1, data.value[j][r] + pi.value[j] * x)
                    + (1 - pi.value[j]) ** 2 * polygamma(1, data.total[j][r] - data.value[j][r] + (1 - pi.value[j]) * x)
                    - polygamma(1, data.total[j][r] + x)
                    + polygamma(1, x)
                    - pi.value[j] ** 2 * polygamma(1, pi.value[j] * x)
                    - (1 - pi.value[j]) ** 2 * polygamma(1, (1 - pi.value[j]) * x)
                )
                hf += np.sum(f, 1)
            hess[0] = -1 * np.sum(zeta.estim[:, 1] * hf)

            Hf = np.diag(hess)
            return Hf
    def predict_soft(self, X):
        """
		This method makes a "soft" nearest-neighbor prediction on test data.

		Parameters
		----------
		X : N x M numpy array 
			N = number of testing instances; M = number of features.  
		"""
        tr_r, tr_c = np.asmatrix(self.X_train).shape  # get size of training data
        te_r, te_c = np.asmatrix(X).shape  # get size of test data

        num_classes = len(self.classes)
        prob = np.zeros((te_r, num_classes))  # allocate memory for class probabilities
        K = min(self.K, tr_r)  # can't have more than neighbors than there are training data points
        for i in range(te_r):  # for each test example...
            # ...compute sum of squared differences...
            dist = np.sum(np.power(self.X_train - np.asmatrix(X)[i, :], 2), axis=1)
            # ...find nearest neighbors over training data and keep nearest K data points
            sorted_dist = np.sort(dist, axis=0)[0:K]
            indices = np.argsort(dist, axis=0)[0:K]
            wts = np.exp(-self.alpha * sorted_dist)
            count = []
            for c in range(len(self.classes)):
                # total weight of instances of that classes
                count.append(np.sum(wts[self.Y_train[indices] == self.classes[c]]))
            count = np.asarray(count)
            prob[i, :] = np.divide(count, np.sum(count))  # save (soft) results
        return prob
Example #26
0
def compute_cost_logistic(theta, X, y, regularise=False, num_classes=5):
    """Cost function for logistic regression.
    Returns negated cost and gradient, because 
    is to be optimised by a minimisation algorithm.
    Faster than compute_cost_logistic_safe."""
    global LAMBDA
    data_count, features_count = X.shape
    # Need to reshape, because optimisation algorithms flatten theta.
    theta = theta.reshape((num_classes, features_count))
    grad = np.zeros(shape=theta.shape)
    dot_theta_x = np.dot(theta, X.T)
    try:
        hypothesis = np.exp(dot_theta_x)
    except FloatingPointError:
        print "FloatingPointError, using compute_cost_logistic_safe function instead"
        return compute_cost_logistic_safe(theta, X, y, regularise, num_classes)
    # Compute probabilities matrix.
    probabilities = np.transpose(hypothesis / np.sum(hypothesis, axis=0))
    # Vector of actual probabilities, or the probabilities to use to calculate the loss
    actual_prob = np.array([probabilities[i][y[i]] for i in range(data_count)])
    loss = np.sum(np.log(actual_prob))  # Logistic loss
    for i in range(0, data_count):
        for k in range(0, num_classes):  # Update gradient for every class.
            grad[k] += np.dot(X[i], (identity(y[i], k) - probabilities[i][k]))
    if regularise:  # Ridge regularisation
        loss = LAMBDA * loss - (1 - LAMBDA) * np.sum(np.square(theta))
        grad = LAMBDA * grad - 2 * (1 - LAMBDA) * theta
    return [-loss, -grad.flatten()]
Example #27
0
    def compute_distances_no_loops(self, X):
        """
    Compute the distance between each test point in X and each training point
    in self.X_train using no explicit loops.

    Input / Output: Same as compute_distances_two_loops
    """
        num_test = X.shape[0]
        num_train = self.X_train.shape[0]
        dists = np.zeros((num_test, num_train))
        #########################################################################
        # Compute the l2 distance between all test points and all training      #
        # points without using any explicit loops, and store the result in      #
        # dists.                                                                #
        # HINT: Try to formulate the l2 distance using matrix multiplication    #
        #       and two broadcast sums.                                         #
        #########################################################################

        a = np.sum(np.square(self.X_train), axis=1)
        b = -2 * X.dot(self.X_train.T)
        c = np.sum(np.square(X), axis=1)

        dists = a[np.newaxis, :] + b + c[:, np.newaxis]

        #########################################################################
        #                         END OF YOUR CODE                              #
        #########################################################################
        return dists
def test_discretenb_predict_proba():
    """Test discrete NB classes' probability scores"""

    # The 100s below distinguish Bernoulli from multinomial.
    # FIXME: write a test to show this.
    X_bernoulli = [[1, 100, 0], [0, 1, 0], [0, 100, 1]]
    X_multinomial = [[0, 1], [1, 3], [4, 0]]

    # test binary case (1-d output)
    y = [0, 0, 2]  # 2 is regression test for binary case, 02e673
    for cls, X in ([BernoulliNB, MultinomialNB], [X_bernoulli, X_multinomial]):
        clf = MultinomialNB().fit([[0, 1], [1, 3], [4, 0]], y)
        assert_equal(clf.predict([4, 1]), 2)
        assert_equal(clf.predict_proba([0, 1]).shape, (1, 2))
        assert_equal(clf.predict_proba([[0, 1], [1, 0]]).sum(axis=1), [1, 1])

    # test multiclass case (2-d output, must sum to one)
    y = [0, 1, 2]
    for cls, X in ([BernoulliNB, MultinomialNB], [X_bernoulli, X_multinomial]):
        clf = MultinomialNB().fit([[0, 1], [1, 3], [4, 0]], y)
        assert_equal(clf.predict_proba([0, 1]).shape, (1, 3))
        assert_equal(clf.predict_proba([[0, 1], [1, 0]]).shape, (2, 3))
        assert_almost_equal(np.sum(clf.predict_proba([1, 5])), 1)
        assert_almost_equal(np.sum(clf.predict_proba([3, 0])), 1)
        assert_almost_equal(np.sum(np.exp(clf.class_log_prior_)), 1)
        assert_almost_equal(np.sum(np.exp(clf.intercept_)), 1)
Example #29
0
def loss(y, ybar, sparm):
    """Return the loss of ybar relative to the true labeling y.
    
    Returns the loss for the correct label y and the predicted label
    ybar.  In the event that y and ybar are identical loss must be 0.
    Presumably as y and ybar grow more and more dissimilar the
    returned value will increase from that point.  sparm.loss_function
    holds the loss function option specified on the command line via
    the -l option.

    The default behavior is to perform 0/1 loss based on the truth of
    y==ybar."""
    # HAMMING LOSS
    number_eventsC = len(y)
    sigma = 0
    prod = 0
    class_weights = [0.96, 0.005, 0.005, 0.01, 0.02]
    for i in range(number_eventsC):
        sigma += np.sum(y[i])
        # FINAL CHOICE : we are not using the class weights because on average it is less good (accuracy + precision)

        # prod +=np.sum( np.array(y[i])*np.array(ybar[i])/float(class_weights[i]))
        prod += np.sum(np.array(y[i]) * np.array(ybar[i]))

    return 1 - prod / float(sigma)
Example #30
0
def implicit_black_box(propensities, V, X, w, h, deter_vector, stoc_positions, positions, valid, deriv):

    # Adjustment for systems reaching steady state

    temp = derivative_G(propensities, V, X, w, deter_vector, stoc_positions, positions, valid)
    # pdb.set_trace()
    valid_adjust_pos = np.where(np.sum(np.abs(temp), axis=0) < 1e-10, True, False)

    valid_adjust = valid[:, :]
    valid_adjust[valid_adjust_pos, :] = False

    # print(" Reached Steady State %d"%(np.sum(valid_adjust_pos)))

    from scipy.integrate import ode

    # pdb.set_trace()
    deter_ode = ode(f).set_integrator("lsoda", method="adams", with_jacobian=False)
    deter_ode.set_initial_value(X[deter_vector, :].flatten(), 0).set_f_params(
        [propensities, V, X, deter_vector, stoc_positions, positions, valid_adjust, w]
    )

    # pdb.set_trace()
    while deter_ode.successful() and deter_ode.t < h:
        deter_ode.integrate(h)

        # print("Black Box: \n"+ str(deter_ode.y))

        # print("iterator : \n:"+str(next_X[deter_vector,:]))
    X[deter_vector, :] = deter_ode.y.reshape((np.sum(deter_vector), X.shape[1]))

    # Another adjust to compensate for non negative
    X = np.where(X < 0.0, 0.0, X)

    return X