def fit_gaussian(S, Cin, param_means, param_sigmas):
    '''
    generates predictions using parameters sampled form the distribution, then outputs
    a gaussian fitted to the predictions
    '''

    output_samples = sample_output(sol, Cin, true_params, prior_sigmas) * 100

    # generate approximate gaussian
    bins = np.array(range(1000))

    indeces = np.digitize(output_samples, bins)

    count1 = np.bincount(indeces[:, 0], minlength=1001)[1:]
    print(bins.shape)
    print(count1.shape)

    print(count1.shape)
    print(count1)
    count2 = np.bincount(indeces[:, 1], minlength=1001)[1:]

    # fit gaussians
    '''
    popt1 = curve_fit(gaussian, bins, count1)
    popt2 = curve_fit(gaussian, bins, count2)


    print('popt1', popt1)
    print('popt2', popt2)
    '''
    plt.plot(bins, count1)
    plt.plot(bins, count2)

    plt.show()
    sys.exit()
Example #2
0
    def _init_common_params(self):
        """Initialize parameters common to RFLVMs.
        """
        # Initialize and then stabilize the estimated latent variable `X`.
        pca = PCA(n_components=self.D, random_state=self.rng)
        self.X = pca.fit_transform(self.Y)
        self._stabilize_x()

        # Initialize K cluster mean vectors and covariance matrices.
        self.mu = np.empty((self.K, self.D))
        self.Sigma = np.empty((self.K, self.D, self.D))
        for k in range(self.K):
            self.Sigma[k] = self.iw0.rvs()
            var0 = 1. / self.prior_obs * self.Sigma[k]
            self.mu[k] = self.rng.multivariate_normal(self.mu0, var0)

        # Initialize cluster assignments and counts.
        self.Z = self.rng.choice(self.K, size=self.M_div_2)
        self.Z_count = np.bincount(self.Z, minlength=self.K)

        # Initialize `W` to approximate RBF kernel.
        self.W = self.rng.normal(0, 1, size=(self.M_div_2, self.D))

        # Gibb samples for analysis and visualization after burn-in.
        self.X_samples = np.empty((self.n_samples, self.N, self.D))
Example #3
0
def average_path_length(tree, X):
    """Compute average path length: cost of simulating the average
    example; this is used in the objective function.

    @param tree: DecisionTreeClassifier instance
    @param X: NumPy array (D x N)
              D := number of dimensions
              N := number of examples
    @return path_length: float
                         average path length
    """

    #added for computing anomalies apl
    # if(X.shape[0]==1):
    #     intermediate = [0]*tree.tree_.node_count
    #     print(intermediate)
    #     intermediate[tree.apply(X)[0]] = 1
    #     print(intermediate)
    #     leaf_counts = intermediate
    #     print(leaf_counts)
    # else:
    leaf_indices = tree.apply(X)
    leaf_counts = np.bincount(leaf_indices)
    leaf_i = np.arange(tree.tree_.node_count)
    path_length = np.dot(leaf_i, leaf_counts) / float(X.shape[0])
    return path_length
 def check(x):
     if x.ndim == 1:
         assert_array_almost_equal(
             grouped_sum(x, groups),
             np.bincount(groups, x))
     check_grads(grouped_sum)(x, groups)
     check_grads(grouped_sum)(x, groups, num_groups=n_groups + 4)
Example #5
0
 def sufficientStats(cls, x, constParams=None):
     # Compute T( x )
     assert (isinstance(x, np.ndarray) and x.ndim == 1) or isinstance(
         x, list), x
     D = constParams
     assert D is not None
     t1 = np.bincount(x, minlength=D)
     return (t1, )
Example #6
0
def average_path_length(tree, X):
    """Compute average path length: cost of simulating the average
    example; this is used in the objective function.

    @param tree: DecisionTreeClassifier instance
    @param X: NumPy array (D x N)
              D := number of dimensions
              N := number of examples
    @return path_length: float
                         average path length
    """
    leaf_indices = tree.apply(X)
    leaf_counts = np.bincount(leaf_indices)
    leaf_i = np.arange(tree.tree_.node_count)
    path_length = np.dot(leaf_i, leaf_counts) / float(X.shape[0])
    return path_length
Example #7
0
 def count_transitions(labels):
     return np.vstack([
         np.bincount(labels[1:][labels[:-1] == i], minlength=N)
         for i in xrange(N)
     ])
Example #8
0
    def _init_params(self, data, lengths=None, params='stmpaw'):
        X = data['obs']

        if self.n_lags == 0:
            super(ARTHMM, self)._init_params(data, lengths, params)
        else:
            if 's' in params:
                super(ARTHMM, self)._init_params(data, lengths, 's')

            if 't' in params:
                super(ARTHMM, self)._init_params(data, lengths, 't')

            if 'm' in params or 'a' in params or 'p' in params:
                kmmod = cluster.KMeans(n_clusters=self.n_unique,
                                       random_state=self.random_state).fit(X)
                kmeans = kmmod.cluster_centers_
                ar_mod = []
                ar_alpha = []
                ar_resid = []

                if not self.shared_alpha:
                    count = 0
                    for u in range(self.n_unique):
                        for f in range(self.n_features):
                            ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                            u,f]).fit(self.n_lags))
                            ar_alpha.append(ar_mod[count].params[1:])
                            ar_resid.append(ar_mod[count].resid)
                            count += 1
                else:
                    # run one AR model on most part of time series
                    # that has most points assigned after clustering
                    mf = np.argmax(np.bincount(kmmod.labels_))
                    for f in range(self.n_features):
                        ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                                    mf,f]).fit(self.n_lags))
                        ar_alpha.append(ar_mod[f].params[1:])
                        ar_resid.append(ar_mod[f].resid)

            if 'm' in params:
                mu_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    for f in range(self.n_features):
                        ar_idx = u
                        if self.shared_alpha:
                            ar_idx = 0
                        mu_init[u, f] = kmeans[u, f] - np.dot(
                            np.repeat(kmeans[u, f], self.n_lags),
                            ar_alpha[ar_idx])
                self.mu_ = np.copy(mu_init)

            if 'p' in params:

                precision_init = \
                np.zeros((self.n_unique, self.n_features, self.n_features))

                for u in range(self.n_unique):
                    if self.n_features == 1:
                        precision_init[u] = 1.0 / (np.var(
                            X[kmmod.labels_ == u]))

                    else:
                        precision_init[u] = np.linalg.inv\
                        (np.cov(np.transpose(X[kmmod.labels_ == u])))

                        # Alternative: Initialization using ar_resid
                        #for f in range(self.n_features):
                        #    if not self.shared_alpha:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[count])
                        #        count += 1
                        #    else:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[f])'''

                self.precision_ = np.copy(precision_init)

            if 'a' in params:
                if self.shared_alpha:
                    alpha_init = np.zeros((1, self.n_lags))
                    alpha_init = ar_alpha[0].reshape((1, self.n_lags))
                else:
                    alpha_init = np.zeros((self.n_unique, self.n_lags))
                    for u in range(self.n_unique):
                        ar_idx = 0
                        alpha_init[u] = ar_alpha[ar_idx]
                        ar_idx += self.n_features
                self.alpha_ = np.copy(alpha_init)
Example #9
0
    def _init_params(self, data, lengths=None, params='stmpaw'):
        X = data['obs']

        if self.n_lags == 0:
            super(ARTHMM, self)._init_params(data, lengths, params)
        else:
            if 's' in params:
                super(ARTHMM, self)._init_params(data, lengths, 's')

            if 't' in params:
                super(ARTHMM, self)._init_params(data, lengths, 't')

            if 'm' in params or 'a' in params or 'p' in params:
                kmmod = cluster.KMeans(
                    n_clusters=self.n_unique,
                    random_state=self.random_state).fit(X)
                kmeans = kmmod.cluster_centers_
                ar_mod = []
                ar_alpha = []
                ar_resid = []
                if not self.shared_alpha:
                    for u in range(self.n_unique):
                        ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                                u]).fit(self.n_lags))
                        ar_alpha.append(ar_mod[u].params[1:])
                        ar_resid.append(ar_mod[u].resid)
                else:
                    # run one AR model on most part of time series
                    # that has most points assigned after clustering
                    mf = np.argmax(np.bincount(kmmod.labels_))
                    ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                              mf]).fit(self.n_lags))
                    ar_alpha.append(ar_mod[0].params[1:])
                    ar_resid.append(ar_mod[0].resid)

            if 'm' in params:
                mu_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    ar_idx = u
                    if self.shared_alpha:
                        ar_idx = 0
                    mu_init[u] = kmeans[u, 0] - np.dot(
                            np.repeat(kmeans[u, 0], self.n_lags),
                            ar_alpha[ar_idx])
                self.mu_ = np.copy(mu_init)

            if 'p' in params:
                precision_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    if not self.shared_alpha:
                        maxVar = np.max([np.var(ar_resid[i]) for i in
                                        range(self.n_unique)])
                    else:
                        maxVar = np.var(ar_resid[0])
                    precision_init[u] = 1.0 / maxVar
                self.precision_ = np.copy(precision_init)

            if 'a' in params:
                alpha_init = np.zeros((self.n_unique, self.n_lags))
                for u in range(self.n_unique):
                    ar_idx = u
                    if self.shared_alpha:
                        ar_idx = 0
                    alpha_init[u, :] = ar_alpha[ar_idx]
                self.alpha_ = alpha_init
Example #10
0
def atomsDistances(positions, cell, cutoff_radius=6.0, self_interaction=False):
    """ Compute the distance of every atom to its neighbors.

    
    This function computes the distances of every central atom to its neighbors. If the
    distances is larger than the cutoff radius, then the distances will be handled as 0.
    Here, periodic boundary condition is assuming true for every axis.

    Parameters:
    -----------
    positions: np.ndarray
        Atomic positions. The size of this tensor will be (N_atoms, 3), where N_atoms is the number of atoms
        in the cluster.
    cell: np.ndarray
        Periodic cell, which has the size of (3, 3)
    cutoff_radius: float
        Cutoff Radius, which is a hyper parameters. The default is 6.0 Angstrom.
    self_interaction: boolean
        Default is False, which means that results will not consider the atom itself as its neighbor.
    Returns:
    ----------
    distances: np.ndarray
        Differentialble distances array.
    first_atoms: np.ndarray
        Atoms that we observed in the cell. The np.unique of first_atoms will be np.arange of the number of
        atoms in the cell.
    second_atoms: np.ndarray
        Atoms that are considered as the neighbor atoms of first atoms. The distances of first_atoms and
        second_atoms will be computed and stored in the distances array.
    cell_shift_vector: np.ndarray
        The cell shift vector of every atom.
    """
    # Compute reciprocal lattice vectors.
    inverse_cell = np.linalg.pinv(cell).T

    # Compute distances of cell faces.
    face_dist_c = 1 / np.linalg.norm(inverse_cell, axis=0)

    # We use a minimum bin size of 3 A
    bin_size = max(cutoff_radius, 3)

    # Compute number of bins, the minimum bin size must be [1., 1., 1.].
    nbins_c = np.maximum(
        (face_dist_c / bin_size - (face_dist_c / bin_size) % 1), [1., 1., 1.])
    nbins = np.prod(nbins_c)

    # Compute the number of neighbor cell that need to be search
    neighbor_search_x, neighbor_search_y, neighbor_search_z =\
                np.ceil(bin_size * nbins_c / face_dist_c).astype(int)

    # Sort atoms into bins.
    scaled_positions_ic = np.dot(positions, inverse_cell) % 1
    bin_index_ic = scaled_positions_ic * nbins_c - (scaled_positions_ic *
                                                    nbins_c) % 1

    # Convert Cartesian bin index to unique scalar bin index.
    bin_index_i = (bin_index_ic[:, 0] + nbins_c[0] *
                   (bin_index_ic[:, 1] + nbins_c[1] * bin_index_ic[:, 2]))

    # atom_i contains atom index in new sort order.
    atom_i = np.argsort(bin_index_i)
    bin_index_i = bin_index_i[atom_i]

    # Compute the maximum number of atoms in a bin
    max_natoms_per_bin = np.bincount(np.int_(bin_index_i)).max()

    # Sort atoms into bins. The atoms_in_bin_ba contains the information about where the atoms located.
    atoms_in_bin_ba = -np.ones([np.int_(nbins), max_natoms_per_bin], dtype=int)

    for i in range(max_natoms_per_bin):
        # Create a mask array that identifies the first atom of each bin.
        mask = np.append([True], bin_index_i[:-1] != bin_index_i[1:])
        # Assign all first atoms.
        atoms_in_bin_ba[np.int_(bin_index_i[mask]), i] = atom_i[mask]

        # Remove atoms that we just sorted into atoms_in_bin_ba. The next
        # "first" atom will be the second and so on.
        mask = np.logical_not(mask)
        atom_i = atom_i[mask]
        bin_index_i = bin_index_i[mask]

    # Create the shift list that indicates that where the cell might shift.
    shift = []
    for x in range(-neighbor_search_x, neighbor_search_x + 1):
        for y in range(-neighbor_search_y, neighbor_search_y + 1):
            for z in range(-neighbor_search_z, neighbor_search_z + 1):
                shift += [[x, y, z]]

    # Therefore, the possible positions of neighborhood bin can be computed by the following code.
    neighborbin = (bin_index_ic[:, None] + np.array(shift)[None, :]) % nbins_c
    cell_shift = ((bin_index_ic[:, None] + np.array(shift)[None, :]) -
                  neighborbin) / nbins_c
    neighborbin = neighborbin[:, :, 0] + nbins_c[0] * (
        neighborbin[:, :, 1] + nbins_c[1] * neighborbin[:, :, 2])

    distances = []
    first_atoms = []
    second_atoms = []
    cell_shift_vector = []
    for i in range(len(positions)):
        # Create a mask that indicates which neighborhood bin contains atoms.
        if self_interaction:
            mask = (atoms_in_bin_ba[np.int_(neighborbin[i])] != -1)
        else:
            mask = np.logical_and(
                atoms_in_bin_ba[np.int_(neighborbin[i])] != -1,
                atoms_in_bin_ba[np.int_(neighborbin[i])] != i)
        distances_vec = positions[atoms_in_bin_ba[np.int_(
            neighborbin[i])]] - positions[i]
        # the distance should consider the cell shift
        distances_vec = distances_vec + np.dot(cell_shift[i], cell)[:, None]
        # make the cell shift vector for every atom instead of every bin.
        _cell_shift_vector = np.repeat(cell_shift[i][:, None],
                                       max_natoms_per_bin,
                                       axis=1)[mask]
        distances_vec = distances_vec[mask]
        temp_distances = np.sum(distances_vec * distances_vec, axis=1)
        temp_distances = (temp_distances)**0.5
        cutoff_mask = (temp_distances < cutoff_radius)
        _second_atoms = atoms_in_bin_ba[np.int_(
            neighborbin[i])][mask][cutoff_mask]
        _first_atoms = [i] * len(_second_atoms)
        _cell_shift_vector = _cell_shift_vector[cutoff_mask]
        first_atoms.extend(_first_atoms)
        second_atoms.extend(_second_atoms)
        distances.extend(temp_distances[cutoff_mask])
        cell_shift_vector.extend(_cell_shift_vector)

    distances = np.array(distances)
    cell_shift_vector = np.array(cell_shift_vector)
    first_atoms = np.array(first_atoms)
    second_atoms = np.array(second_atoms)

    return distances, first_atoms, second_atoms, cell_shift_vector
Example #11
0
    def _init_params(self, data, lengths=None, params='stmpaw'):
        X = data['obs']

        if self.n_lags == 0:
            super(ARTHMM, self)._init_params(data, lengths, params)
        else:
            if 's' in params:
                super(ARTHMM, self)._init_params(data, lengths, 's')

            if 't' in params:
                super(ARTHMM, self)._init_params(data, lengths, 't')

            if 'm' in params or 'a' in params or 'p' in params:
                kmmod = cluster.KMeans(
                    n_clusters=self.n_unique,
                    random_state=self.random_state).fit(X)
                kmeans = kmmod.cluster_centers_
                ar_mod = []
                ar_alpha = []
                ar_resid = []

                if not self.shared_alpha:
                    count = 0
                    for u in range(self.n_unique):
                        for f in range(self.n_features):
                            ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                            u,f]).fit(self.n_lags))
                            ar_alpha.append(ar_mod[count].params[1:])
                            ar_resid.append(ar_mod[count].resid)
                            count += 1
                else:
                    # run one AR model on most part of time series
                    # that has most points assigned after clustering
                    mf = np.argmax(np.bincount(kmmod.labels_))
                    for f in range(self.n_features):
                        ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                                    mf,f]).fit(self.n_lags))
                        ar_alpha.append(ar_mod[f].params[1:])
                        ar_resid.append(ar_mod[f].resid)

            if 'm' in params:
                mu_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    for f in range(self.n_features):
                        ar_idx = u
                        if self.shared_alpha:
                            ar_idx = 0
                        mu_init[u,f] = kmeans[u, f] - np.dot(
                        np.repeat(kmeans[u, f], self.n_lags), ar_alpha[ar_idx])
                self.mu_ = np.copy(mu_init)

            if 'p' in params:

                precision_init = \
                np.zeros((self.n_unique, self.n_features, self.n_features))

                for u in range(self.n_unique):
                    if self.n_features == 1:
                        precision_init[u] = 1.0/(np.var(X[kmmod.labels_ == u]))

                    else:
                        precision_init[u] = np.linalg.inv\
                        (np.cov(np.transpose(X[kmmod.labels_ == u])))

                        # Alternative: Initialization using ar_resid
                        #for f in range(self.n_features):
                        #    if not self.shared_alpha:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[count])
                        #        count += 1
                        #    else:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[f])'''

                self.precision_ = np.copy(precision_init)

            if 'a' in params:
                if self.shared_alpha:
                    alpha_init = np.zeros((1, self.n_lags))
                    alpha_init = ar_alpha[0].reshape((1, self.n_lags))
                else:
                    alpha_init = np.zeros((self.n_unique, self.n_lags))
                    for u in range(self.n_unique):
                        ar_idx = 0
                        alpha_init[u] = ar_alpha[ar_idx]
                        ar_idx += self.n_features
                self.alpha_ = np.copy(alpha_init)