Example #1
1
 def moments(self):
     """Calculate covariance and correlation matrices,
     trait, genotipic and ontogenetic means"""
     zs = np.array([ind["z"] for ind in self.pop])
     xs = np.array([ind["x"] for ind in self.pop])
     ys = np.array([ind["y"] for ind in self.pop])
     bs = np.array([ind["b"] for ind in self.pop])
     ymean = ys.mean(axis=0)
     zmean = zs.mean(axis=0)
     xmean = xs.mean(axis=0)
     ymean = ys.mean(axis=0)
     bmean = bs.mean(axis=0)
     phenotipic = np.cov(zs, rowvar=0, bias=1)
     genetic = np.cov(xs, rowvar=0, bias=1)
     heridability = genetic[np.diag_indices_from(genetic)] / phenotipic[np.diag_indices_from(phenotipic)]
     corr_phenotipic = np.corrcoef(zs, rowvar=0, bias=1)
     corr_genetic = np.corrcoef(xs, rowvar=0, bias=1)
     avgP = avg_ratio(corr_phenotipic, self.modules)
     avgG = avg_ratio(corr_genetic, self.modules)
     return {
         "y.mean": ymean,
         "b.mean": bmean,
         "z.mean": zmean,
         "x.mean": xmean,
         "P": phenotipic,
         "G": genetic,
         "h2": heridability,
         "avgP": avgP,
         "avgG": avgG,
         "corrP": corr_phenotipic,
         "corrG": corr_genetic,
     }
Example #2
0
def Voigt_6x6_to_cubic(C):
    """
    Convert the Voigt 6x6 representation into the cubic elastic constants
    C11, C12 and C44.
    """

    tol = 1e-6

    C_check = np.zeros_like(C)
    C_check[np.diag_indices_from(C_check)] = C[np.diag_indices_from(C)]
    C_check[0:3,0:3] = C[0:3,0:3]
    if np.any(np.abs(C-C_check) > tol):
        raise ValueError('"C" does not have cubic symmetry.')

    C11s = np.array([C[0,0], C[1,1], C[2,2]])
    C12s = np.array([C[1,2], C[0,2], C[0,1]])
    C44s = np.array([C[3,3], C[4,4], C[5,5]])

    C11 = np.mean(C11s)
    C12 = np.mean(C12s)
    C44 = np.mean(C44s)

    if np.any(np.abs(C11-C11s) > tol) or np.any(np.abs(C12-C12s) > tol) or \
            np.any(np.abs(C44-C44s) > tol):
        raise ValueError('"C" does not have cubic symmetry.')

    return np.array([C11, C12, C44])
 def transform_covars_grad(self, internal_grad):
     grad = np.empty((self.num_latent, self.get_covar_size()), dtype=np.float32)
     for j in range(self.num_latent):
         tmp = self._theano_transform_covars_grad(internal_grad[0, j], self.covars_cholesky[j])
         tmp[np.diag_indices_from(tmp)] *= self.covars_cholesky[j][np.diag_indices_from(tmp)]
         grad[j] = tmp[np.tril_indices_from(self.covars_cholesky[j])]
     return grad.flatten()
 def _get_raw_covars(self):
     flattened_covars = np.empty([self.num_latent, self.get_covar_size()], dtype=np.float32)
     for i in xrange(self.num_latent):
         raw_covars = self.covars_cholesky[i].copy()
         raw_covars[np.diag_indices_from(raw_covars)] = np.log(raw_covars[np.diag_indices_from(raw_covars)])
         flattened_covars[i] = raw_covars[np.tril_indices_from(raw_covars)]
     return flattened_covars.flatten()
def test_cosine_distances():
    # Check the pairwise Cosine distances computation
    rng = np.random.RandomState(1337)
    x = np.abs(rng.rand(910))
    XA = np.vstack([x, x])
    D = cosine_distances(XA)
    assert_array_almost_equal(D, [[0., 0.], [0., 0.]])
    # check that all elements are in [0, 2]
    assert np.all(D >= 0.)
    assert np.all(D <= 2.)
    # check that diagonal elements are equal to 0
    assert_array_almost_equal(D[np.diag_indices_from(D)], [0., 0.])

    XB = np.vstack([x, -x])
    D2 = cosine_distances(XB)
    # check that all elements are in [0, 2]
    assert np.all(D2 >= 0.)
    assert np.all(D2 <= 2.)
    # check that diagonal elements are equal to 0 and non diagonal to 2
    assert_array_almost_equal(D2, [[0., 2.], [2., 0.]])

    # check large random matrix
    X = np.abs(rng.rand(1000, 5000))
    D = cosine_distances(X)
    # check that diagonal elements are equal to 0
    assert_array_almost_equal(D[np.diag_indices_from(D)], [0.] * D.shape[0])
    assert np.all(D >= 0.)
    assert np.all(D <= 2.)
Example #6
0
	def newCostFunction(self, xs, ys, test=False):
	    xs = np.array(xs)
	    ys = np.array(ys)
	    s1 = xs.dot(ys.T).T
	    s2 = ys.dot(xs.T).T

	    s1 = np.maximum(0, 1 - np.diag(s1) + s1).T
	    s2 = np.maximum(0, 1 - np.diag(s2) + s2).T

	    s1[np.diag_indices_from(s1)] = 0
	    s2[np.diag_indices_from(s2)] = 0
	    ns1 = s1
	    ns2 = s2
	    cost = np.sum(s1)+np.sum(s2)
	    if abs(cost - 2) < 1e-5:
	    	import pdb
	    	pdb.set_trace()
	    if test:
	    	return cost
	    s1t = s1 > 0
	    s2t = s2 > 0
	    tx1 = (ys[:,:,None].T - ys[:,:,None]).transpose([0,2,1])*s1t[:,:,None]
	    ty1 = (xs[:,:,None].T - xs[:,:,None]).transpose([0,2,1])*s2t[:,:,None]
	    tx2 = (ys * np.ones((len(xs),len(xs),xs[0].size))).transpose(1,0,2) * s2t[:,:,None]
	    ty2 = (xs * np.ones((len(xs),len(xs),xs[0].size))).transpose(1,0,2) * s1t[:,:,None]
	    tx3 = (s2t.T)[:,:,None]*ys
	    ty3 = (s1t.T)[:,:,None]*xs
	    xd = np.sum(tx1 - tx2 + tx3, 1)
	    yd = np.sum(ty1 - ty2 + ty3, 1)
	    #print 'xd norm: %.4f, yd norm: %.4f'%(np.linalg.norm(xd), np.linalg.norm(yd))
	    return cost, list(xd), list(yd)
 def set_covars(self, raw_covars):
     raw_covars = raw_covars.reshape([self.num_latent, self.get_covar_size()])
     for j in xrange(self.num_latent):
         cholesky = np.zeros([self.num_dim, self.num_dim], dtype=np.float32)
         cholesky[np.tril_indices_from(cholesky)] = raw_covars[j]
         cholesky[np.diag_indices_from(cholesky)] = np.exp(cholesky[np.diag_indices_from(cholesky)])
         self.covars_cholesky[j] = cholesky
         self.covars[j] = mdot(self.covars_cholesky[j], self.covars_cholesky[j].T)
Example #8
0
 def _update(self):
     self.parameters = self.get_parameters()
     for k in range(self.num_comp):
         for j in range(self.num_process):
             temp = np.zeros((self.num_dim, self.num_dim))
             temp[np.tril_indices_from(temp)] = self.L_flatten[k,j,:].copy()
             temp[np.diag_indices_from(temp)] = np.exp(temp[np.diag_indices_from(temp)])
             # temp[np.diag_indices_from(temp)] = temp[np.diag_indices_from(temp)] ** 2
             self.L[k,j,:,:] = temp
             self.s[k,j] = mdot(self.L[k,j,:,:], self.L[k,j,:,:].T)
Example #9
0
 def update_covariance(self, j, Sj):
     Sj = Sj.copy()
     mm = min(Sj[np.diag_indices_from(Sj)])
     if mm < 0:
         Sj[np.diag_indices_from(Sj)] = Sj[np.diag_indices_from(Sj)] - 1.1 * mm
     for k in range(self.num_comp):
         self.s[k,j] = Sj.copy()
         self.L[k,j] = jitchol(Sj,10)
         tmp = self.L[k,j].copy()
         tmp[np.diag_indices_from(tmp)] = np.log(tmp[np.diag_indices_from(tmp)])
         self.L_flatten[k,j] = tmp[np.tril_indices_from(tmp)]
     self._update()
Example #10
0
File: gnm.py Project: sixpi/ProDy
    def getNormDistFluct(self, coords):
        """Normalized distance fluctuation
        """
            
        model = self.getModel()
        LOGGER.info('Number of chains: {0}, chains: {1}.'
                     .format(len(list(set(coords.getChids()))), \
                                 list(set(coords.getChids()))))

        try:
            #coords = coords.select('protein and name CA')
            coords = (coords._getCoords() if hasattr(coords, '_getCoords') else
                coords.getCoords())
        except AttributeError:
            try:
                checkCoords(coords)
            except TypeError:
                raise TypeError('coords must be a Numpy array or an object '
                                                'with `getCoords` method')
        
        if not isinstance(model, NMA):
            LOGGER.info('Calculating new model')
            model = GNM('prot analysis')
            model.buildKirchhoff(coords)
            model.calcModes() 
            
        linalg = importLA()
        n_atoms = model.numAtoms()
        n_modes = model.numModes()
        LOGGER.timeit('_ndf')
    
        from .analysis import calcCrossCorr
        from numpy import linalg as LA
        # <dRi, dRi>, <dRj, dRj> = 1
        crossC = 2-2*calcCrossCorr(model)
        r_ij = np.zeros((n_atoms,n_atoms,3))

        for i in range(n_atoms):
           for j in range(i+1,n_atoms):
               r_ij[i][j] = coords[j,:] - coords[i,:]
               r_ij[j][i] = r_ij[i][j]
               r_ij_n = LA.norm(r_ij, axis=2)

        #with np.errstate(divide='ignore'):
        r_ij_n[np.diag_indices_from(r_ij_n)] = 1e-5  # div by 0
        crossC=abs(crossC)
        normdistfluct = np.divide(np.sqrt(crossC),r_ij_n)
        LOGGER.report('NDF calculated in %.2lfs.', label='_ndf')
        normdistfluct[np.diag_indices_from(normdistfluct)] = 0  # div by 0
        return normdistfluct
Example #11
0
def ExpandNode(fringe,node):
    col_sum = np.sum(node.attacked_cells,0)
    dict_sum = {}
    for i in range(8):
        if col_sum[0,i] == 8:
            continue
        dict_sum[i] = col_sum[0,i]
    sorted_sum = sorted(dict_sum.items(),key=operator.\
                        itemgetter(1),reverse=True)
    for i in range(len(sorted_sum)):
        col = sorted_sum[i][0]
        for row in range(8):
            if node.attacked_cells[row,col]:
                continue
            attacked_cells = copy.deepcopy(node.attacked_cells)
            attacked_cells[:,col] = 1
            attacked_cells[row,:] = 1
            k = row-col
            rows, cols = np.diag_indices_from(attacked_cells)
            if k < 0:
                rows,cols = rows[:k],cols[-k:]
            elif k > 0:
                rows,cols = rows[k:],cols[:-k]
            attacked_cells[rows,cols] = 1

            attacked_cells = np.fliplr(attacked_cells)
            ncol = 7-col
            k = row-ncol
            rows, cols = np.diag_indices_from(attacked_cells)
            if k < 0:
                rows,cols = rows[:k],cols[-k:]
            elif k > 0:
                rows,cols = rows[k:],cols[:-k]
            attacked_cells[rows,cols] = 1
            attacked_cells = np.fliplr(attacked_cells)

            valid = True
            for i in range(node.depth+1,8):
                if np.sum(attacked_cells[i,:]) == 8:
                    valid = False
                    break
            if not valid:
                continue
            
            nstate = copy.deepcopy(node.state)
            nstate[row,col] = 1
            new_node = Node(parent=node,depth=node.depth\
                 +1,state=nstate,attacked_cells=attacked_cells)
            fringe.insert(0,new_node)
def problem8():
   "problem set 2.1, problem 8, page 56"
   import LUdecomp
   A = np.array([[-3,6,-4],[9,-8,24],[-12,24,-26]],dtype=float)
   A_orig = A.copy()
   LU = LUdecomp.LUdecomp(A)
   b = np.array([-3,65,-42],dtype=float)
   b_orig = b.copy()
   x = LUdecomp.LUsolve(LU,b)
   # extract L and U for verification
   U = np.triu(LU)  # 
   L = np.tril(LU)
   L[ np.diag_indices_from(L) ] = 1.0 
   print("""
Problem 8:
A = 
{}
LU decomposition A = LU, LU (in one matrix) = 
{}
Solving Ax=b, with b = {}
Solution x = {}
Verifying solution: 
     residual ||Ax-b||_2 = {}
     ||A - dot(L,U)||_inf = {}
""".format(A_orig,LU,b_orig,x, 
   la.norm(np.dot(A_orig,x)-b_orig,2), 
   la.norm(A_orig - np.dot(L,U),np.inf))
   )
Example #13
0
    def test_map_diag_and_offdiag(self):

        vars = ["x", "y", "z"]
        g = ag.PairGrid(self.df)
        g.map_offdiag(plt.scatter)
        g.map_diag(plt.hist)

        for ax in g.diag_axes:
            nt.assert_equal(len(ax.patches), 10)

        for i, j in zip(*np.triu_indices_from(g.axes, 1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

        for i, j in zip(*np.tril_indices_from(g.axes, -1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

        for i, j in zip(*np.diag_indices_from(g.axes)):
            ax = g.axes[i, j]
            nt.assert_equal(len(ax.collections), 0)
def ddiag(a, nozero=False):
    """ Robust diagonalization : always put selected diagonal on a diagonal!

    This small function aims at getting a behaviour closer to the
    mathematical "hat", compared to what np.diag() can delivers.

    If applied to a vector or a 2d-matrix with one dimension of size 1, put
    the coefficients on the diagonal of a matrix with off-diagonal elements
    equal to zero.

    If applied to a 2d-matrix (with all dimensions of size > 1), replace
    all off-diagonal elements by zeros.

    Parameters
    ----------
    a : numpy matrix or vector to be diagonalized

    Returns
    --------
    b : Diagonalized vector

    Raises:
       ValueError if a is more than 2dimensional

    See Also
    --------
        diag
    """

    # If numpy vector
    if a.ndim == 1:
        b = np.diag(a)

    # If numpy 2d-array
    elif a.ndim == 2:

        #...but with dimension of magnitude 1
        if min(a.shape) == 1:
            b = np.diag(np.squeeze(a))

        # ... or a "true" 2-d matrix
        else:
            b = np.diag(np.diag(a))

    else:
        raise ValueError("Input must be 1- or 2-d")

    # Extreme case: a 1 element matrix/vector
    if b.ndim == 1 & b.size == 1:
        b = b.reshape((1, 1))

    if nozero:
        # Replace offdiagonal zeros by nan if desired
        c = np.empty_like(b) *  np.nan
        di = np.diag_indices_from(c)
        c[di] = b.diagonal()
        return c
    else:
        # A certainly diagonal vector is returned
        return b
Example #15
0
def report_clustering_dot_product(loci, thresholds_pack, method, feature_labels):

    thr_occ, thr_crisp, cluster_thresholds = thresholds_pack

    M = scores.generate_dot_product_score_matrix(feature_labels, method, loci=loci)
    M += np.transpose(M)
    M = -1 * np.log(M)
    M[np.diag_indices_from(M)] = 0
    M[np.where(M==np.inf)] = 100

    reports_dir_base = os.path.join(gv.project_data_path, 'cas4/reports/')

    cluster2summary_file_path = os.path.join(gv.project_data_path, 'cas4/reports/cluster_summary.tab')

    for threshold in cluster_thresholds:

        repors_dir = reports_dir_base + 'dot_%s_%d_%.2f_%.2f'%(method, thr_occ, thr_crisp, threshold)
        # print "Thresholds:", thr_occ, thr_crisp, threshold
        # print repors_dir
        # if os.path.exists(repors_dir):
        #     sh.rmtree(repors_dir)
        # os.mkdir(repors_dir)

        singles, cluster_packs, entropies = dendrogram.classify_by_scores_cas4(M, threshold, loci)

        _local_thresholds_pack = (thr_occ, thr_crisp, threshold)

        generate_cluster_reports_cas4(cluster_packs,
                                      loci,
                                      repors_dir,
                                      feature_labels,
                                      method,
                                      _local_thresholds_pack)

        generate_cas4_gi_summary_file(singles, cluster_packs, loci, repors_dir, cluster2summary_file_path)
Example #16
0
    def test_pairplot(self):

        vars = ["x", "y", "z"]
        g = pairplot(self.df)

        for ax in g.diag_axes:
            nt.assert_equal(len(ax.patches), 10)

        for i, j in zip(*np.triu_indices_from(g.axes, 1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

        for i, j in zip(*np.tril_indices_from(g.axes, -1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

        for i, j in zip(*np.diag_indices_from(g.axes)):
            ax = g.axes[i, j]
            nt.assert_equal(len(ax.collections), 0)

        plt.close("all")
Example #17
0
def _generate_noise(covar_matrix, time=1000, use_inverse=False):
    """
    Generate a multivariate normal distribution using correlated innovations.

    Parameters
    ----------
    covar_matrix : array
        Covariance matrix of the random variables

    time : int
        Sample size

    use_inverse : bool, optional
        Negate the off-diagonal elements and invert the covariance matrix
        before use

    Returns
    -------
    noise : array
        Random noise generated according to covar_matrix
    """
    # Pull out the number of nodes from the shape of the covar_matrix
    n_nodes = covar_matrix.shape[0]
    # Make a deep copy for use in the inverse case
    this_covar = covar_matrix
    # Take the negative inverse if needed
    if use_inverse:
        this_covar = copy.deepcopy(covar_matrix)
        this_covar *= -1
        this_covar[np.diag_indices_from(this_covar)] *= -1
        this_covar = np.linalg.inv(this_covar)
    # Return the noise distribution
    return np.random.multivariate_normal(mean=np.zeros(n_nodes),
                                            cov=this_covar,
                                            size=time)
Example #18
0
        def compute_distances(self, x1, x2):
            """
            The method imputes the missing values as means and calls
            safe_sparse_dot. Imputation simplifies computation at a cost of
            (theoretically) slightly wrong distance between pairs of missing
             values.
            """

            def prepare_data(x):
                if self.discrete.any():
                    data = Cosine.discrete_to_indicators(x, self.discrete)
                else:
                    data = x.copy()
                for col, mean in enumerate(self.means):
                    column = data[:, col]
                    column[np.isnan(column)] = mean
                if self.axis == 0:
                    data = data.T
                data /= row_norms(data)[:, np.newaxis]
                return data

            data1 = prepare_data(x1)
            data2 = data1 if x2 is None else prepare_data(x2)
            dist = safe_sparse_dot(data1, data2.T)
            np.clip(dist, 0, 1, out=dist)
            if x2 is None:
                diag = np.diag_indices_from(dist)
                dist[diag] = np.where(np.isnan(dist[diag]), np.nan, 1.0)
            return 1 - dist
Example #19
0
	def _setup_pop_meas_ana(self, start_dens, end_dens, O_meas):
		H0_vecs = self.H0_vecs # cache locally

		self.info("O_measure:")
		self.pprint(O_meas)
		O_meas = dot(dot(H0_vecs.conj().T, O_meas), H0_vecs)
		self.pprint(O_meas)
		self.info("Start_dens:")
		self.pprint(start_dens)
		start_dens = dot(dot(H0_vecs.conj().T, start_dens), H0_vecs)
		self.pprint(start_dens)
		self.info("End_dens ({0}):".format(end_dens.dtype))
		self.pprint(end_dens)
		end_dens = dot(dot(H0_vecs.conj().T, end_dens), H0_vecs)
		self.pprint(end_dens)

		self._pop_base = diag(end_dens)
		self._pop_diff = diag(start_dens) - self._pop_base
		self._pop_meas = diag(O_meas)

		m_start_d = dot(start_dens, O_meas).trace()
		m_end_d = dot(end_dens, O_meas).trace()
		# m_start_p = dot(pop_diff + pop_base, pop_meas)
		mod_start_dens = start_dens.copy()
		mod_start_dens[diag_indices_from(mod_start_dens)] = 0
		m_diff = dot(mod_start_dens, O_meas).trace()
		# m_err = (m_start_p - m_start_d) / (m_start_d - m_end_d)
		m_err = m_diff / (m_start_d - m_end_d)
Example #20
0
def cosine_distances(X, Y=None):
    """Compute cosine distance between samples in X and Y.

    Cosine distance is defined as 1.0 minus the cosine similarity.

    Read more in the :ref:`User Guide <metrics>`.

    Parameters
    ----------
    X : array_like, sparse matrix
        with shape (n_samples_X, n_features).

    Y : array_like, sparse matrix (optional)
        with shape (n_samples_Y, n_features).

    Returns
    -------
    distance matrix : array
        An array with shape (n_samples_X, n_samples_Y).

    See also
    --------
    sklearn.metrics.pairwise.cosine_similarity
    scipy.spatial.distance.cosine (dense matrices only)
    """
    # 1.0 - cosine_similarity(X, Y) without copy
    S = cosine_similarity(X, Y)
    S *= -1
    S += 1
    np.clip(S, 0, 2, out=S)
    if X is Y or Y is None:
        # Ensure that distances between vectors and themselves are set to 0.0.
        # This may not be the case due to floating point rounding errors.
        S[np.diag_indices_from(S)] = 0.0
    return S
Example #21
0
	def _add_relaxation(self, f_set, J0, J1, J2):
		H0_vecs = self.H0_vecs # cache locally

		J0ab = J0(self.w_diff)
		J1ab = J1(self.w_diff)
		J2ab = J2(self.w_diff)
		# pprint(J1ab)

		f2 = []
		for A, Jq in zip(f_set, (J2ab, J1ab, J0ab, J1ab, J2ab)):
			A = dot(dot(H0_vecs.conj().T, A), H0_vecs)
			A *= A.conj()
			A *= Jq
			A = real_if_close(A)
			f2.append(A)

		f2 = array(f2)
		# pprint(f2)

		Rab = f2.sum(axis=0)
		diag_idx = diag_indices_from(Rab)
		Rab[diag_idx] = 0
		assert allclose(Rab, Rab.T)

		Rab[diag_idx] = -Rab.sum(axis=1)
		self.info("Redfield matrix:")
		self.pprint(Rab)
		self.Rab_list.append(Rab)
Example #22
0
    def test_pairplot_reg(self):

        vars = ["x", "y", "z"]
        g = ag.pairplot(self.df, diag_kind="hist", kind="reg")

        for ax in g.diag_axes:
            nt.assert_equal(len(ax.patches), 10)

        for i, j in zip(*np.triu_indices_from(g.axes, 1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

            nt.assert_equal(len(ax.lines), 1)
            nt.assert_equal(len(ax.collections), 2)

        for i, j in zip(*np.tril_indices_from(g.axes, -1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

            nt.assert_equal(len(ax.lines), 1)
            nt.assert_equal(len(ax.collections), 2)

        for i, j in zip(*np.diag_indices_from(g.axes)):
            ax = g.axes[i, j]
            nt.assert_equal(len(ax.collections), 0)
Example #23
0
def _sim_gp(tt, gp):
    """Simulates values from a sklearn GPR object
    """
    K = gp.kernel(tt[:, None])
    K[np.diag_indices_from(K)] += gp.alpha
    L = np.linalg.cholesky(K)
    return L.dot(np.random.normal(size=tt.size))
Example #24
0
def diagonal_indices(a, offset=0):
    """The indices to the diagonal of a 2D array ``a``

    The indices are those to the main diagonal (if ``offset`` is 0), or to a
    secondary diagonal, having the specified offset from the main one.

    The array ``A`` does not need to be square.

    **Parameters:**

    a : ndarray
        The 2D ndarray for which the diagonal indices should be calculated.
    offset : int, optional
        The diagonal offset from the main one. Note that the sup-diagonal is at
        offset +1, the sub-diagonal at offset -1, and so on. Defaults to 0,
        which corresponds to the main diagonal.

    **Returns:**

    xs, ys : tuples
        The indices in the two coordinates. Thanks to ``numpy``'s advanced
        slicing, the diagonal may be accessed with ``A[(xs, ys)]``.
    """
    di, dj = np.diag_indices_from(a[:min(a.shape), :min(a.shape)])
    if offset > 0:
        di, dj = zip(*[(i, j)
                     for i, j in zip(di, dj + offset) if 0 <= j < a.shape[1]])
    elif offset == 0:
        pass
    else:
        di, dj = zip(*[(i, j)
                     for i, j in zip(di - offset, dj) if 0 <= i < a.shape[0]])
    return di, dj
Example #25
0
def nwin1_bet_returns(w, odds):
    assert len(w) == len(odds)
    R = w.reshape(1, -1).repeat(len(w), 0)
    R *= eye(R.shape[0]) - 1.0
    ix = diag_indices_from(R)
    R[ix] = w * (odds - 1.0)
    return np.sum(R, 1)
Example #26
0
def active_passive_collisions(active_tl, active_br, passive_tl, passive_br):
    '''
    Returns an NxN array, where element at [i, j] says if
    thing i's active hitbox crosses thing j's active hitbox.
    An active hitbox isn't considered if any of its dimensions is not-positive.

    active/passive_tl/br must be arrays of shape (N, 2) - the boxes' corners in
    global coordinates

    See comment for passive_passive_collisions for longer explanation.
    The main difference is that we can't cheat here and do half the checks,
    then transpose, we need to do all checks.
    '''
    passive_tl_3d = passive_tl.reshape(1, -1, 2)
    passive_br_3d = passive_br.reshape(1, -1, 2)

    active_tl_3d = active_tl.reshape(-1, 1, 2)
    active_br_3d = active_br.reshape(-1, 1, 2)

    negcheck = numpy.logical_or(numpy.any(active_tl_3d > passive_br_3d, axis=2),
                                numpy.any(active_br_3d < passive_tl_3d, axis=2))

    legible = numpy.all(active_tl < active_br, axis=1).reshape(-1, 1)

    result = numpy.logical_and(numpy.logical_not(negcheck), legible)

    # Remove self collisions
    result[numpy.diag_indices_from(result)] = False
    return result
Example #27
0
def _test_solver(Solver, N=300, seed=1234, **kwargs):
    # Set up the solver.
    kernel = 1.0 * kernels.ExpSquaredKernel(1.0)
    solver = Solver(kernel, **kwargs)

    # Sample some data.
    np.random.seed(seed)
    x = np.atleast_2d(np.sort(10*np.random.randn(N))).T
    yerr = np.ones(N)
    solver.compute(x, yerr)

    # Build the matrix.
    K = kernel.get_value(x)
    K[np.diag_indices_from(K)] += yerr ** 2

    # Check the determinant.
    sgn, lndet = np.linalg.slogdet(K)
    assert sgn == 1.0, "Invalid determinant"
    assert np.allclose(solver.log_determinant, lndet), "Incorrect determinant"

    y = np.sin(x[:, 0])
    b0 = np.linalg.solve(K, y)
    b = solver.apply_inverse(y).flatten()
    assert np.allclose(b, b0)

    # Check the inverse.
    assert np.allclose(solver.apply_inverse(K), np.eye(N)), "Incorrect inverse"
Example #28
0
def _R_matrix(p, odds):
    assert len(p) == len(odds)
    R = p.reshape(1, -1).repeat(len(p), 0)
    R *= eye(R.shape[0]) - 1.0
    ix = diag_indices_from(R)
    R[ix] = p * (odds - 1.0)
    return R
Example #29
0
    def test_pairplot(self):

        vars = ["x", "y", "z"]
        g = ag.pairplot(self.df)

        for ax in g.diag_axes:
            assert len(ax.patches) > 1

        for i, j in zip(*np.triu_indices_from(g.axes, 1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

        for i, j in zip(*np.tril_indices_from(g.axes, -1)):
            ax = g.axes[i, j]
            x_in = self.df[vars[j]]
            y_in = self.df[vars[i]]
            x_out, y_out = ax.collections[0].get_offsets().T
            npt.assert_array_equal(x_in, x_out)
            npt.assert_array_equal(y_in, y_out)

        for i, j in zip(*np.diag_indices_from(g.axes)):
            ax = g.axes[i, j]
            nt.assert_equal(len(ax.collections), 0)

        g = ag.pairplot(self.df, hue="a")
        n = len(self.df.a.unique())

        for ax in g.diag_axes:
            assert len(ax.lines) == n
            assert len(ax.collections) == n
Example #30
0
    def _pipe_as_flow(self, signal_packet):
        # Get signal_packet details
        hkey = signal_packet.keys()[0]
        adj = signal_packet[hkey]['data']

        # Add 1s along the diagonal to make positive definite
        adj[np.diag_indices_from(adj)] = 1

        # Compute eigenvalues and eigenvectors, ensure they are real
        eigval, eigvec = np.linalg.eig(adj)
        eigval = np.real(eigval)
        eigvec = np.real(eigvec)

        # Sort largest to smallest eigenvalue
        sorted_idx = np.argsort(eigval)[::-1]
        largest_idx = sorted_idx[0]
        centrality = np.abs(eigvec[:, largest_idx])
        centrality = centrality.reshape(-1, 1)

        # Dump into signal_packet
        new_packet = {}
        new_packet[hkey] = {
            'data': centrality,
            'meta': {
                'ax_0': signal_packet[hkey]['meta']['ax_0'],
                'time': signal_packet[hkey]['meta']['time']
            }
        }

        return new_packet
def compute_adjacency_matrix_images(coord, sigma=0.1):
    coord = coord.reshape(-1, 2)
    dist = cdist(coord, coord)
    A = np.exp(-dist / (sigma * np.pi)**2)
    A[np.diag_indices_from(A)] = 0
    return A
Example #32
0
 def _calculateLk(self, G01, D):
     Bk = dot(G01.T, ddot(D, G01, left=True))
     Bk[NP.diag_indices_from(Bk)] += 1.0
     Lk = cholesky(Bk, lower=True, check_finite=False)
     return Lk
Example #33
0
 def lnlike(theta):
     m, b = theta[:2]
     K = user_kernel(np.exp(theta[2:]), dx)
     K[np.diag_indices_from(K)] += ye2
     return user_lnlike(y - (m * x + b), K)
Example #34
0
def divide_diagonal_by_2(CHI0, div_fact=2.):
    CHI = CHI0.copy()
    CHI[np.diag_indices_from(CHI)] /= div_fact
    return CHI
Example #35
0
    def fit(self, X, y, previous_K=None, newData=1, previous_mats=None):
        #the previous_K stores the gram mat from last step while previous_mat=[A,B] where A is the secondNorm of the x_i-x_j and B is x_i dot x_j
        """Fit Gaussian process regression model.

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_features)
            Training data

        y : array-like, shape = (n_samples, [n_output_dims])
            Target values
        newData controls how many new datapoints has been added from last training(Used for batchMode)
        Returns
        -------
        self : returns an instance of self.
        """
        self.newData = 1
        t1 = time.time()
        if self.kernel is None:  # Use an RBF kernel as default
            self.logger.info('Training with default rbf kernel')
            self.kernel_ = C(1.0, constant_value_bounds="fixed") \
                * RBF(1.0, length_scale_bounds="fixed")
        else:
            self.logger.info('Training with customized kernel')
            #if previous_K is None:
            self.kernel_ = clone(self.kernel)

        self._rng = check_random_state(self.random_state)

        X, y = check_X_y(X, y, multi_output=True, y_numeric=True)
        self.logger.info('shape of the x is m=%d , n=%d', X.shape[0],
                         X.shape[1])
        # Normalize target value

        self.logger.info('start to normalize y value...')
        t3 = time.time()
        if self.normalize_y:
            self._y_train_mean = np.mean(y, axis=0)
            # demean y
            y = y - self._y_train_mean
        else:
            self._y_train_mean = np.zeros(1)

        if np.iterable(self.alpha) \
           and self.alpha.shape[0] != y.shape[0]:
            if self.alpha.shape[0] == 1:
                self.alpha = self.alpha[0]
            else:
                raise ValueError(
                    "alpha must be a scalar or an array"
                    " with same number of entries as y.(%d != %d)" %
                    (self.alpha.shape[0], y.shape[0]))

        self.X_train_ = np.copy(X) if self.copy_X_train else X
        self.y_train_ = np.copy(y) if self.copy_X_train else y
        t4 = time.time()
        self.logger.info("finish normalizing Y in----------- %s seconds",
                         str(t4 - t3))

        if self.optimizer is not None and self.kernel_.n_dims > 0:
            self.logger.info('hyper parameter of the kernel will be optimized')
            self.logger.info('optimizing the hyper parameter of the kernel')

            # Choose hyperparameters based on maximizing the log-marginal
            # likelihood (potentially starting from several initial values)
            def obj_func(theta, eval_gradient=True):
                if eval_gradient:
                    lml, grad = self.log_marginal_likelihood(
                        theta, eval_gradient=True, pre_K=previous_K)
                    return -lml, -grad
                else:
                    return -self.log_marginal_likelihood(theta)

            # First optimize starting from theta specified in kernel
            optima = [(self._constrained_optimization(obj_func,
                                                      self.kernel_.theta,
                                                      self.kernel_.bounds))]

            # Additional runs are performed from log-uniform chosen initial
            # theta
            if self.n_restarts_optimizer > 0:
                if not np.isfinite(self.kernel_.bounds).all():
                    raise ValueError(
                        "Multiple optimizer restarts (n_restarts_optimizer>0) "
                        "requires that all bounds are finite.")
                bounds = self.kernel_.bounds
                for iteration in range(self.n_restarts_optimizer):
                    theta_initial = \
                        self._rng.uniform(bounds[:, 0], bounds[:, 1])
                    optima.append(
                        self._constrained_optimization(obj_func, theta_initial,
                                                       bounds))
            # Select result from run with minimal (negative) log-marginal
            # likelihood
            lml_values = list(map(itemgetter(1), optima))
            self.kernel_.theta = optima[np.argmin(lml_values)][0]
            self.log_marginal_likelihood_value_ = -np.min(lml_values)
            t2 = time.time()
            self.logger.info(
                "finish opt hyper-para of kernel in----------- %s seconds",
                str(t2 - t4))
        else:
            self.logger.info('hyper parameter of the kernel will be fixed')
            self.log_marginal_likelihood_value_ = \
                self.log_marginal_likelihood(self.kernel_.theta)

        # Precompute quantities required for predictions which are independent
        # of actual query points
        #This is the most time consuming part?
        t6 = time.time()
        if (previous_K is not None):
            self.logger.info('use previous K_n and x_new to compute K_n+1')
            col = self.kernel_(X[0:X.shape[0] - newData, :],
                               X[X.shape[0] - newData:X.shape[0], :])
            K = np.concatenate((previous_K, col), axis=1)
            row = np.concatenate(
                (col.T,
                 self.kernel_(X[X.shape[0] - newData:X.shape[0], :],
                              X[X.shape[0] - newData:X.shape[0], :])),
                axis=1)
            K = np.concatenate((K, row), axis=0)
            self.K_return = K
        elif (previous_mats is not None):
            #update A
            A = previous_mats[0]
            B = previous_mats[1]
            trainInd = range(int(X.shape[0]) - newData)
            sampleInd = range(int(X.shape[0]) - newData, int(X.shape[0]))

            Acol = cdist(X[trainInd, :], X[sampleInd, :], metric='sqeuclidean')
            Aone = cdist(X[sampleInd, :],
                         X[sampleInd, :],
                         metric='sqeuclidean')
            Arow = np.concatenate((Acol.transpose(), Aone), axis=1)
            #print('aaaa'+str(A.shape))
            #print('acol'+str(Acol.shape))
            A = np.concatenate((A, Acol), axis=1)
            A = np.concatenate((A, Arow), axis=0)
            #update B
            Bcol = np.inner(X[trainInd, :], X[sampleInd, :])
            Bone = np.inner(X[sampleInd, :], X[sampleInd, :])
            Brow = np.concatenate((Bcol.transpose(), Bone), axis=1)
            B = np.concatenate((B, Bcol), axis=1)
            B = np.concatenate((B, Brow), axis=0)
            #compute gram
            #note theta are in log format
            thetas = self.kernel_.theta
            thetas = np.exp(thetas)
            #rbf part
            krbf = np.exp(A * (-0.5) / (thetas[1]**2))
            np.fill_diagonal(krbf, 1)
            krbf = thetas[0] * krbf
            #dot product part
            kdot = B + thetas[2]**2
            kdot = kdot * thetas[3]
            #note that we changed custom kernel, thetas[4] no longer exist
            #self.K_return=krbf+kdot+np.ones(kdot.shape)*thetas[4]
            self.K_return = krbf + kdot
            K = self.K_return
            #also save [A,B]
            self.mats_return = [A, B]

        else:
            K = self.kernel_(self.X_train_)
            self.K_return = K
            A = pdist(self.X_train_, metric='sqeuclidean'
                      )  #this is the flatten upper triangular ||xi-xj||_2
            A = squareform(A)
            B = np.inner(self.X_train_, self.X_train_)
            self.mats_return = [A, B]

        K[np.diag_indices_from(K)] += self.alpha
        t7 = time.time()

        self.logger.info("compute matrix K takes----------- %s seconds",
                         str(t7 - t6))
        try:
            self.L_ = cholesky(K, lower=True)  # Line 2
        except np.linalg.LinAlgError as exc:
            exc.args = ("The kernel, %s, is not returning a "
                        "positive definite matrix. Try gradually "
                        "increasing the 'alpha' parameter of your "
                        "GaussianProcessRegressor estimator." %
                        self.kernel_, ) + exc.args
            raise
        self.alpha_ = cho_solve((self.L_, True), self.y_train_)  # Line 3
        t5 = time.time()
        #print("retrain in (compute matrix K and invers and det) takes-----------  seconds:"+ str(t5-t6))
        self.logger.info("compute K.inv*t takes----------- %s seconds",
                         str(t5 - t7))
        self.logger.info('training ends in %s seconds-----------',
                         str(t5 - t1))
        return self
Example #36
0
    def add_data_async(self,
                       fX,
                       fT,
                       istart=0,
                       icount=np.inf,
                       fHH=None,
                       fHT=None):
        """Version of `add_data()` with asyncronous I/O. See `add_data()` for reference.

        Spawns new processes using Python's `multiprocessing` module, and requires more memory than non-async version.
        """
        # initialize
        assert len(self.nnet.neurons) > 0, "Add neurons to ELM before using it"
        X, T = self._checkdata(fX, fT)
        N = X.shape[0]
        # TODO: adapt for GPU solver
        _prepare_fHH(fHH, fHT, self.nnet, self.precision)
        # custom range adjustments
        icount = min(icount, N - istart)
        nb = int(np.ceil(float(icount) / self.batch))

        # weighted classification initialization
        if self.classification == "wc" and self.wc is None:
            ns = np.zeros((self.nnet.outputs, ))
            for b in xrange(nb):  # batch sum is much faster
                start = b * self.batch + istart
                stop = min((b + 1) * self.batch + istart, icount + istart)
                ns += T[start:stop].sum(axis=0)
            ns = ns.astype(self.precision)
            self.wc = ns.sum(
            ) / ns  # class weights normalized to number of samples

        # close X and T files opened by _checkdata()
        h5 = self.opened_hdf5.pop()
        h5.close()
        h5 = self.opened_hdf5.pop()
        h5.close()

        # start async reader and writer for HDF5 files
        qX_in = mp.Queue()
        qX_out = mp.Queue(1)
        readerX = mp.Process(target=_ireader, args=(fX, qX_in, qX_out))
        readerX.daemon = True
        readerX.start()
        qT_in = mp.Queue()
        qT_out = mp.Queue(1)
        readerT = mp.Process(target=_ireader, args=(fT, qT_in, qT_out))
        readerT.daemon = True
        readerT.start()

        # main loop over all the data
        t = time()
        t0 = time()
        wc_vector = None
        for b in xrange(0, nb + 1):
            start_next = b * self.batch + istart
            stop_next = min((b + 1) * self.batch + istart, icount + istart)
            # prefetch data
            qX_in.put((start_next,
                       stop_next))  # asyncronous reading of next data batch
            qT_in.put((start_next, stop_next))

            if b > 0:  # first iteration only prefetches data
                Xb = qX_out.get()
                Tb = qT_out.get()
                if self.classification == "wc":
                    wc_vector = self.wc[np.where(
                        Tb == 1)[1]]  # weights for samples in the batch

                self.nnet.add_batch(Xb, Tb, wc_vector)

            # report time
            eta = int(((time() - t0) / (b + 1)) * (nb - b - 1))
            if time() - t > self.tprint:
                print("processing batch %d/%d, eta %d:%02d:%02d" %
                      (b + 1, nb, eta / 3600, (eta % 3600) / 60, eta % 60))
                t = time()

        # close async reader and writer
        readerX.join()
        readerT.join()

        # if storing output to disk
        if fHH is not None and fHT is not None:
            HH, HT = self.nnet.get_corr()
            HH[np.diag_indices_from(
                HH)] -= self.nnet.norm  # norm is already included
            _write_fHH(fHH, fHT, HH, HT)
Example #37
0
 def __init__(self, transition_models, transition_prob=None):
     # save attributes
     self.transition_models = transition_models
     self.transition_prob = transition_prob
     num_patterns = len(transition_models)
     # first stack all transition models
     first_states = []
     last_states = []
     for p, tm in enumerate(self.transition_models):
         # set/update the probabilities, states and pointers
         offset = 0
         if p == 0:
             # for the first pattern, just use the TM arrays
             states = tm.states
             pointers = tm.pointers
             probabilities = tm.probabilities
         else:
             # for all consecutive patterns, stack the TM arrays after
             # applying an offset
             # Note: len(pointers) = len(states) + 1, because of the CSR
             #       format of the TM (please see ml.hmm.TransitionModel)
             offset = len(pointers) - 1
             # states: offset = length of the pointers - 1
             states = np.hstack((states, tm.states + len(pointers) - 1))
             # pointers: offset = current maximum of the pointers
             #           start = tm.pointers[1:]
             pointers = np.hstack((pointers, tm.pointers[1:] +
                                   max(pointers)))
             # probabilities: just stack them
             probabilities = np.hstack((probabilities, tm.probabilities))
         # save the first/last states
         first_states.append(tm.state_space.first_states[0] + offset)
         last_states.append(tm.state_space.last_states[-1] + offset)
     # retrieve a dense representation in order to add transitions
     # TODO: operate directly on the sparse representation?
     states, prev_states, probabilities = self.make_dense(states, pointers,
                                                          probabilities)
     # translate float transition_prob value to transition_prob matrix
     if isinstance(transition_prob, float) and transition_prob:
         # create a pattern transition probability matrix
         self.transition_prob = np.ones((num_patterns, num_patterns))
         # transition to other patterns
         self.transition_prob *= transition_prob / (num_patterns - 1)
         # transition to same pattern
         diag = np.diag_indices_from(self.transition_prob)
         self.transition_prob[diag] = 1. - transition_prob
     else:
         self.transition_prob = transition_prob
     # update/add transitions between patterns
     if self.transition_prob is not None and num_patterns > 1:
         new_states = []
         new_prev_states = []
         new_probabilities = []
         for p in range(num_patterns):
             # indices of states/prev_states/probabilities
             idx = np.logical_and(np.in1d(prev_states, last_states[p]),
                                  np.in1d(states, first_states[p]))
             # transition probability
             prob = probabilities[idx]
             # update transitions to same pattern with new probability
             probabilities[idx] *= self.transition_prob[p, p]
             # distribute that part among all other patterns
             for p_ in np.setdiff1d(range(num_patterns), p):
                 idx_ = np.logical_and(
                     np.in1d(prev_states, last_states[p_]),
                     np.in1d(states, first_states[p_]))
                 # make sure idx and idx_ have same length
                 if len(np.nonzero(idx)[0]) != len(np.nonzero(idx_)[0]):
                     raise ValueError('Cannot add transition between '
                                      'patterns with different number of '
                                      'entering/exiting states.')
                 # use idx for the states and idx_ for prev_states
                 new_states.extend(states[idx])
                 new_prev_states.extend(prev_states[idx_])
                 new_probabilities.extend(prob *
                                          self.transition_prob[p, p_])
         # extend the arrays by these new transitions
         states = np.append(states, new_states)
         prev_states = np.append(prev_states, new_prev_states)
         probabilities = np.append(probabilities, new_probabilities)
     # make the transitions sparse
     transitions = self.make_sparse(states, prev_states, probabilities)
     # instantiate a TransitionModel
     super(MultiPatternTransitionModel, self).__init__(*transitions)
Example #38
0
def plot_correlation_matrix(df, columns, figsize=(8,8), annot=True, errors=True, nbs=100,# fmt='.2g',
        mask_diagonal=True, mask_upper_triangle=True):
    '''
    Create a plot of the correlation matrix for (continous) data columns 
    (or features) of a dataframe (df)
    @params:
        df - Pandas data frame
        columns - Columns of data frame to include in matrix
        annot - Should the value of the correlation appear in the cell?
        errors - Calculate errors via bootstrap resampling
        nbs - Number of bootstrap realisations
        #fmt - Format for annotations
        mask_diagonral - Mask the matrix diagonal (all 1's)
        mask_upper_triangle - Mask the (copy) upper triangle
    '''
    # Calculate correlation coefficients
    corr = df[columns].corr()
    if annot and errors: # Calculate errors via bootstrap
        std = _bootstrap_correlation_errors(df, columns, n=nbs)
        notes = []
        for i in range(len(columns)): # Create annotations for heatmap
            note = []
            for j in range(len(columns)): 
                note.append('$%.2g \pm %.2g$'%(np.array(corr)[i, j], std[i, j]))
            notes.append(note)
        notes = pd.DataFrame(notes, index=corr.index, columns=corr.columns)

    # Apply mask
    if mask_diagonal and mask_upper_triangle:
        corr.drop(labels=columns[0], axis=0, inplace=True)  # Remove first row
        corr.drop(labels=columns[-1], axis=1, inplace=True) # Remove last column
        if annot and errors:
            notes.drop(labels=columns[0], axis=0, inplace=True)  # Remove first row
            notes.drop(labels=columns[-1], axis=1, inplace=True) # Remove last column

    # Create mask
    mask = np.zeros_like(corr, dtype=bool) 
    if mask_upper_triangle and mask_diagonal:
        # k=1 does diagonal offset from centre
        mask[np.triu_indices_from(mask, k=1)] = True 
    elif mask_upper_triangle:
        mask[np.triu_indices_from(mask, k=1)] = True
    elif mask_diagonal:
        mask[np.diag_indices_from(mask)] = True

    if annot and errors:
        fmt = ''
    else:
        fmt='.2g'
        notes = annot

    # Make the plot
    plt.style.use('seaborn-white') 
    plt.figure(figsize=figsize)
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    g = sns.heatmap(corr, vmin=-1., vmax=1., cmap=cmap, mask=mask, 
                    linewidths=.5,
                    annot=notes,
                    fmt=fmt,
                    square=True,
                    cbar=False,
                   )
    # Centre y-axis ticks
    g.set_yticklabels(labels=g.get_yticklabels(), va='center') 
Example #39
0
 def getK(self):
     x = self.X
     K = (self.kernelX_module(x) +
          self.linear_kernelY_1_module(x)).evaluate()
     K[np.diag_indices_from(K)] += self.getNoise()
     return K
Example #40
0
    def fit(self, X, y):
        """Fit Gaussian process regression model.
        
        Args:
            X: Training data
            y: Target values

        Returns:
            self : returns an instance of self.
        """
        self.kernel_ = clone(self.kernel)
        self._rng = check_random_state(self.random_state)

        #X, y = check_X_y(X, y, multi_output=True, y_numeric=True)

        # Normalize target value
        if self.normalize_y:
            self._y_train_mean = np.mean(y, axis=0)
            # demean y
            y = y - self._y_train_mean
        else:
            self._y_train_mean = np.zeros(1)

        if np.iterable(self.alpha) \
           and self.alpha.shape[0] != y.shape[0]:
            if self.alpha.shape[0] == 1:
                self.alpha = self.alpha[0]
            else:
                raise ValueError(
                    "alpha must be a scalar or an array"
                    " with same number of entries as y.(%d != %d)" %
                    (self.alpha.shape[0], y.shape[0]))

        self.X_train_ = np.copy(X) if self.copy_X_train else X
        self.y_train_ = np.copy(y) if self.copy_X_train else y

        if self.optimizer is not None and self.kernel_.n_dims > 0:
            # Choose hyperparameters based on maximizing the log-marginal
            # likelihood (potentially starting from several initial values)
            def obj_func(theta, eval_gradient=True):
                if eval_gradient:
                    lml, grad = self.log_marginal_likelihood(
                        theta, eval_gradient=True)
                    return -lml, -grad
                else:
                    return -self.log_marginal_likelihood(theta)

            # First optimize starting from theta specified in kernel
            optima = [(self._constrained_optimization(obj_func,
                                                      self.kernel_.theta,
                                                      self.kernel_.bounds))]

            # Additional runs are performed from log-uniform chosen initial
            # theta
            if self.n_restarts_optimizer > 0:
                if not np.isfinite(self.kernel_.bounds).all():
                    raise ValueError(
                        "Multiple optimizer restarts (n_restarts_optimizer>0) "
                        "requires that all bounds are finite.")
                bounds = self.kernel_.bounds
                for iteration in range(self.n_restarts_optimizer):
                    theta_initial = \
                        self._rng.uniform(bounds[:, 0], bounds[:, 1])
                    optima.append(
                        self._constrained_optimization(obj_func, theta_initial,
                                                       bounds))
            # Select result from run with minimal (negative) log-marginal
            # likelihood
            lml_values = list(map(itemgetter(1), optima))
            self.kernel_.theta = optima[np.argmin(lml_values)][0]
            self.log_marginal_likelihood_value_ = -np.min(lml_values)
        else:
            self.log_marginal_likelihood_value_ = \
                self.log_marginal_likelihood(self.kernel_.theta)

        # Precompute quantities required for predictions which are independent
        # of actual query points
        K = self.kernel_(self.X_train_)
        K[np.diag_indices_from(K)] += self.alpha
        try:
            self.L_ = cholesky(K, lower=True)  # Line 2
        except np.linalg.LinAlgError as exc:
            exc.args = ("The kernel, %s, is not returning a "
                        "positive definite matrix. Try gradually "
                        "increasing the 'alpha' parameter of your "
                        "GaussianProcessRegressor estimator." %
                        self.kernel_, ) + exc.args
            raise
        self.alpha_ = cho_solve((self.L_, True), self.y_train_)  # Line 3
        return self
Example #41
0
    def fit(self, X, y):
        """Fit Gaussian process regression model.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features) or list of object
            Feature vectors or other representations of training data.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values

        Returns
        -------
        self : returns an instance of self.
        """
        if self.kernel is None:  # Use an RBF kernel as default
            self.kernel_ = C(1.0, constant_value_bounds="fixed") \
                * RBF(1.0, length_scale_bounds="fixed")
        else:
            self.kernel_ = clone(self.kernel)

        self._rng = check_random_state(self.random_state)

        if self.kernel_.requires_vector_input:
            X, y = self._validate_data(X,
                                       y,
                                       multi_output=True,
                                       y_numeric=True,
                                       ensure_2d=True,
                                       dtype="numeric")
        else:
            X, y = self._validate_data(X,
                                       y,
                                       multi_output=True,
                                       y_numeric=True,
                                       ensure_2d=False,
                                       dtype=None)

        # Normalize target value
        if self.normalize_y:
            self._y_train_mean = np.mean(y, axis=0)
            self._y_train_std = _handle_zeros_in_scale(np.std(y, axis=0),
                                                       copy=False)

            # Remove mean and make unit variance
            y = (y - self._y_train_mean) / self._y_train_std

        else:
            self._y_train_mean = np.zeros(1)
            self._y_train_std = 1

        if np.iterable(self.alpha) \
           and self.alpha.shape[0] != y.shape[0]:
            if self.alpha.shape[0] == 1:
                self.alpha = self.alpha[0]
            else:
                raise ValueError(
                    "alpha must be a scalar or an array "
                    "with same number of entries as y. (%d != %d)" %
                    (self.alpha.shape[0], y.shape[0]))

        self.X_train_ = np.copy(X) if self.copy_X_train else X
        self.y_train_ = np.copy(y) if self.copy_X_train else y

        if self.optimizer is not None and self.kernel_.n_dims > 0:
            # Choose hyperparameters based on maximizing the log-marginal
            # likelihood (potentially starting from several initial values)
            def obj_func(theta, eval_gradient=True):
                if eval_gradient:
                    lml, grad = self.log_marginal_likelihood(
                        theta, eval_gradient=True, clone_kernel=False)
                    return -lml, -grad
                else:
                    return -self.log_marginal_likelihood(theta,
                                                         clone_kernel=False)

            # First optimize starting from theta specified in kernel
            optima = [(self._constrained_optimization(obj_func,
                                                      self.kernel_.theta,
                                                      self.kernel_.bounds))]

            # Additional runs are performed from log-uniform chosen initial
            # theta
            if self.n_restarts_optimizer > 0:
                if not np.isfinite(self.kernel_.bounds).all():
                    raise ValueError(
                        "Multiple optimizer restarts (n_restarts_optimizer>0) "
                        "requires that all bounds are finite.")
                bounds = self.kernel_.bounds
                for iteration in range(self.n_restarts_optimizer):
                    theta_initial = \
                        self._rng.uniform(bounds[:, 0], bounds[:, 1])
                    optima.append(
                        self._constrained_optimization(obj_func, theta_initial,
                                                       bounds))
            # Select result from run with minimal (negative) log-marginal
            # likelihood
            lml_values = list(map(itemgetter(1), optima))
            self.kernel_.theta = optima[np.argmin(lml_values)][0]
            self.kernel_._check_bounds_params()

            self.log_marginal_likelihood_value_ = -np.min(lml_values)
        else:
            self.log_marginal_likelihood_value_ = \
                self.log_marginal_likelihood(self.kernel_.theta,
                                             clone_kernel=False)

        # Precompute quantities required for predictions which are independent
        # of actual query points
        K = self.kernel_(self.X_train_)
        K[np.diag_indices_from(K)] += self.alpha
        try:
            self.L_ = cholesky(K, lower=True)  # Line 2
            # self.L_ changed, self._K_inv needs to be recomputed
            self._K_inv = None
        except np.linalg.LinAlgError as exc:
            exc.args = ("The kernel, %s, is not returning a "
                        "positive definite matrix. Try gradually "
                        "increasing the 'alpha' parameter of your "
                        "GaussianProcessRegressor estimator." %
                        self.kernel_, ) + exc.args
            raise
        self.alpha_ = cho_solve((self.L_, True), self.y_train_)  # Line 3
        return self
Example #42
0
    def log_marginal_likelihood(self,
                                theta=None,
                                eval_gradient=False,
                                clone_kernel=True):
        """Returns log-marginal likelihood of theta for training data.

        Parameters
        ----------
        theta : array-like of shape (n_kernel_params,) default=None
            Kernel hyperparameters for which the log-marginal likelihood is
            evaluated. If None, the precomputed log_marginal_likelihood
            of ``self.kernel_.theta`` is returned.

        eval_gradient : bool, default=False
            If True, the gradient of the log-marginal likelihood with respect
            to the kernel hyperparameters at position theta is returned
            additionally. If True, theta must not be None.

        clone_kernel : bool, default=True
            If True, the kernel attribute is copied. If False, the kernel
            attribute is modified, but may result in a performance improvement.

        Returns
        -------
        log_likelihood : float
            Log-marginal likelihood of theta for training data.

        log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional
            Gradient of the log-marginal likelihood with respect to the kernel
            hyperparameters at position theta.
            Only returned when eval_gradient is True.
        """
        if theta is None:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated for theta!=None")
            return self.log_marginal_likelihood_value_

        if clone_kernel:
            kernel = self.kernel_.clone_with_theta(theta)
        else:
            kernel = self.kernel_
            kernel.theta = theta

        if eval_gradient:
            K, K_gradient = kernel(self.X_train_, eval_gradient=True)
        else:
            K = kernel(self.X_train_)

        K[np.diag_indices_from(K)] += self.alpha
        try:
            L = cholesky(K, lower=True)  # Line 2
        except np.linalg.LinAlgError:
            return (-np.inf, np.zeros_like(theta)) \
                if eval_gradient else -np.inf

        # Support multi-dimensional output of self.y_train_
        y_train = self.y_train_
        if y_train.ndim == 1:
            y_train = y_train[:, np.newaxis]

        alpha = cho_solve((L, True), y_train)  # Line 3

        # Compute log-likelihood (compare line 7)
        log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
        log_likelihood_dims -= np.log(np.diag(L)).sum()
        log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
        log_likelihood = log_likelihood_dims.sum(-1)  # sum over dimensions

        if eval_gradient:  # compare Equation 5.9 from GPML
            tmp = np.einsum("ik,jk->ijk", alpha, alpha)  # k: output-dimension
            tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis]
            # Compute "0.5 * trace(tmp.dot(K_gradient))" without
            # constructing the full matrix tmp.dot(K_gradient) since only
            # its diagonal is required
            log_likelihood_gradient_dims = \
                0.5 * np.einsum("ijl,jik->kl", tmp, K_gradient)
            log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)

        if eval_gradient:
            return log_likelihood, log_likelihood_gradient
        else:
            return log_likelihood
Example #43
0
reveal_type(np.index_exp[0:1])  # E: Tuple[builtins.slice]
reveal_type(np.index_exp[0:1,
                         None:3])  # E: Tuple[builtins.slice, builtins.slice]
reveal_type(
    np.index_exp[0, 0:1, ..., [0, 1, 3]]
)  # E: Tuple[Literal[0]?, builtins.slice, builtins.ellipsis, builtins.list[builtins.int]]

reveal_type(np.s_[0:1])  # E: builtins.slice
reveal_type(np.s_[0:1, None:3])  # E: Tuple[builtins.slice, builtins.slice]
reveal_type(
    np.s_[0, 0:1, ..., [0, 1, 3]]
)  # E: Tuple[Literal[0]?, builtins.slice, builtins.ellipsis, builtins.list[builtins.int]]

reveal_type(np.ix_(
    AR_LIKE_b))  # E: tuple[numpy.ndarray[Any, numpy.dtype[numpy.bool_]]]
reveal_type(
    np.ix_(AR_LIKE_i,
           AR_LIKE_f))  # E: tuple[numpy.ndarray[Any, numpy.dtype[{double}]]]
reveal_type(
    np.ix_(AR_i8))  # E: tuple[numpy.ndarray[Any, numpy.dtype[{int64}]]]

reveal_type(np.fill_diagonal(AR_i8, 5))  # E: None

reveal_type(
    np.diag_indices(4))  # E: tuple[numpy.ndarray[Any, numpy.dtype[{int_}]]]
reveal_type(np.diag_indices(
    2, 3))  # E: tuple[numpy.ndarray[Any, numpy.dtype[{int_}]]]

reveal_type(np.diag_indices_from(
    AR_i8))  # E: tuple[numpy.ndarray[Any, numpy.dtype[{int_}]]]
Example #44
0
        Biajb -= np.einsum('ij,ab->iajb', moF[:ndocc:, :ndocc], np.diag(np.ones(nvirt)))
        Biajb += 4 * MO[:, ndocc:, :ndocc, ndocc:]
        Biajb -= MO[:, ndocc:, :ndocc, ndocc:].swapaxes(0, 2)
        Biajb -= MO[:, :ndocc, ndocc:, ndocc:].swapaxes(1, 2)
        Biajb *= 4

        # Invert B, (o^3 v^3)
        Binv = np.linalg.inv(Biajb.reshape(ndocc * nvirt, -1)).reshape(ndocc, nvirt, ndocc, nvirt)

        # Build orbital rotation matrix
        x = np.einsum('iajb,ia->jb', Binv, gn)
        U = np.zeros_like(hf.Ca)
        U[:ndocc, ndocc:] = x
        U[ndocc:, :ndocc] = -x.T
        U += 0.5 * np.dot(U, U)
        U[np.diag_indices_from(hf.A)] += 1

        # Easy access to Schmidt orthogonalization
        U, r = np.linalg.qr(U.T)

        # Rotate and set orbitals
        C = hf.Ca.dot(U)
        hf.set_Cleft(C)
        iter_type = 'SOSCF'

print('Total time taken for SCF iterations: %.3f seconds \n' % (time.time() - t))

print('Final SCF energy:     %.8f hartree' % hf.scf_e)

# Compare to Psi4
SCF_E_psi = psi4.energy('SCF')
Example #45
0
    def log_marginal_likelihood(self,
                                theta=None,
                                eval_gradient=False,
                                pre_K=None):
        """Returns log-marginal likelihood of theta for training data.

        Parameters
        ----------
        theta : array-like, shape = (n_kernel_params,) or None
            Kernel hyperparameters for which the log-marginal likelihood is
            evaluated. If None, the precomputed log_marginal_likelihood
            of ``self.kernel_.theta`` is returned.

        eval_gradient : bool, default: False
            If True, the gradient of the log-marginal likelihood with respect
            to the kernel hyperparameters at position theta is returned
            additionally. If True, theta must not be None.

        Returns
        -------
        log_likelihood : float
            Log-marginal likelihood of theta for training data.

        log_likelihood_gradient : array, shape = (n_kernel_params,), optional
            Gradient of the log-marginal likelihood with respect to the kernel
            hyperparameters at position theta.
            Only returned when eval_gradient is True.
        """
        t1 = time.time()
        if theta is None:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated for theta!=None")
            return self.log_marginal_likelihood_value_

        kernel = self.kernel_.clone_with_theta(theta)
        #kernel.dists_mat=self.kernel_.dists_mat

        if eval_gradient:
            #use incremental way of computing K
            #            if(previous_K is not None):
            #                self.logger.info('In computing log marginal likelihood:use previous K_n and x_new to compute K_n+1');
            #                #pass previous K to kernel function kernel.__call__ to compute new K
            #                K= np.concatenate((previous_K,self.kernel_(X[0:X.shape[0]-1,:],X[[X.shape[0]-1],:])),axis=1);
            #                row=np.concatenate((self.kernel_(X[0:X.shape[0]-1,:],X[[X.shape[0]-1],:]).T,self.kernel_(X[[X.shape[0]-1],:],X[[X.shape[0]-1],:])),axis=1)
            #                K=np.concatenate((K,row),axis=0)
            #                self.K_return=K;

            K, K_gradient = kernel(self.X_train_, eval_gradient=True)
            #self.kernel_.dists_mat=kernel.dists_mat;
        else:

            if (pre_K is None):
                K = kernel(self.X_train_)
            else:  #fast compute K note that after fit() of the model has been called, the A,B returned from A,B have already have the correct size. we just need to compute the K(do not need to update,A,B like we did in fit.)
                A = pre_K[0]
                B = pre_K[1]

                #compute gram
                #note theta are in log format
                thetas = np.exp(theta)
                #rbf part
                krbf = np.exp(A * (-0.5) / (thetas[1]**2))
                np.fill_diagonal(krbf, 1)
                krbf = thetas[0] * krbf
                #dot product part
                kdot = B + thetas[2]**2
                kdot = kdot * thetas[3]
                #note that we changed custom kernel, thetas[4] no longer exist
                #self.K_return=krbf+kdot+np.ones(kdot.shape)*thetas[4]
                K = krbf + kdot
                #also save [A,B]

        K[np.diag_indices_from(K)] += self.alpha
        t2 = time.time()
        self.logger.info('compute K in logLikelihood in %s sec', str(t2 - t1))
        try:
            L = cholesky(K, lower=True)  # Line 2
        except np.linalg.LinAlgError:
            return (-np.inf, np.zeros_like(theta)) \
                if eval_gradient else -np.inf

        # Support multi-dimensional output of self.y_train_
        y_train = self.y_train_
        if y_train.ndim == 1:
            y_train = y_train[:, np.newaxis]

        alpha = cho_solve((L, True), y_train)  # Line 3

        # Compute log-likelihood (compare line 7)
        log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
        log_likelihood_dims -= np.log(np.diag(L)).sum()
        log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
        log_likelihood = log_likelihood_dims.sum(-1)  # sum over dimensions

        if eval_gradient:  # compare Equation 5.9 from GPML
            tmp = np.einsum("ik,jk->ijk", alpha, alpha)  # k: output-dimension
            tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis]
            # Compute "0.5 * trace(tmp.dot(K_gradient))" without
            # constructing the full matrix tmp.dot(K_gradient) since only
            # its diagonal is required
            log_likelihood_gradient_dims = \
                0.5 * np.einsum("ijl,ijk->kl", tmp, K_gradient)
            log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)

        t3 = time.time()
        self.logger.info('logLikelihood computation finished in %s sec',
                         str(t3 - t1))

        if eval_gradient:
            return log_likelihood, log_likelihood_gradient
        else:
            return log_likelihood
import numpy as np
import Graphics as artist
import matplotlib.pyplot as plt

from mpl_toolkits.axes_grid1 import make_axes_locatable

plt.xkcd()
data = np.load(
    'interaction_matrix-2015-11-28-w-deduped-curated-drug-names.npy')
print data[:10, 3]
data[np.diag_indices_from(data)] /= 2.  #Forgot to tell you this- MC
data = np.log(1. + data)  #log because decreasing marginal importance

data = (data - data.min(axis=1)) / (data.max(axis=1) - data.min(axis=1))

cutoff = 10
eig_vals, eig_vecs = np.linalg.eigh(data)  #Wrong function
idx = np.argsort(eig_vals)  # sorting the eigenvalues
idx = idx[::-1]  # in ascending order

# sorting eigenvectors according to the sorted eigenvalues
eig_vecs = eig_vecs[:, idx]
eig_vecs = eig_vecs[:, range(cutoff)]
eig_vals = eig_vals[idx]  # sorting eigenvalues
score = np.dot(eig_vecs.T, data)  # projection of the data in the new space

eig_vals /= eig_vals.max()
print eig_vals[:10]
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(14, 6.5))

#-- Raw data
Example #47
0
def test_krr_gaussian_local_cmat():

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.data.Compound() objects"
    mols = []

    for xyz_file in sorted(data.keys())[:1000]:

        # Initialize the qml.data.Compound() objects
        mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.generate_atomic_coulomb_matrix(size=23, sorting="row-norm")

        mols.append(mol)

    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = 100
    n_train = 200

    training = mols[:n_train]
    test = mols[-n_test:]

    X = np.concatenate([mol.representation for mol in training])
    Xs = np.concatenate([mol.representation for mol in test])

    N = np.array([mol.natoms for mol in training])
    Ns = np.array([mol.natoms for mol in test])

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    # Set hyper-parameters
    sigma = 724.0
    llambda = 10**(-6.5)

    K = get_local_kernels_gaussian(X, X, N, N, [sigma])[0]
    assert np.allclose(K, K.T), "Error in local Gaussian kernel symmetry"

    K_test = np.loadtxt(test_dir + "/data/K_local_gaussian.txt")
    assert np.allclose(
        K, K_test), "Error in local Gaussian kernel (vs. reference)"

    K_test = get_atomic_kernels_gaussian(training, training, [sigma])[0]
    assert np.allclose(K,
                       K_test), "Error in local Gaussian kernel (vs. wrapper)"

    # Solve alpha
    K[np.diag_indices_from(K)] += llambda
    alpha = cho_solve(K, Y)

    # Calculate prediction kernel
    Ks = get_local_kernels_gaussian(Xs, X, Ns, N, [sigma])[0]

    Ks_test = np.loadtxt(test_dir + "/data/Ks_local_gaussian.txt")
    # Somtimes a few coulomb matrices differ because of parallel sorting and numerical error
    # Allow up to 5 molecules to differ from the supplied reference.
    differences_count = len(set(np.where(Ks - Ks_test > 1e-7)[0]))
    assert differences_count < 5, "Error in local Laplacian kernel (vs. reference)"
    # assert np.allclose(Ks, Ks_test), "Error in local Gaussian kernel (vs. reference)"

    Ks_test = get_atomic_kernels_gaussian(test, training, [sigma])[0]
    assert np.allclose(Ks,
                       Ks_test), "Error in local Gaussian kernel (vs. wrapper)"

    Yss = np.dot(Ks, alpha)

    mae = np.mean(np.abs(Ys - Yss))
    print(mae)
    assert abs(19.0 -
               mae) < 1.0, "Error in local Gaussian kernel-ridge regression"
Example #48
0
def run_simulation(sim_params: ParamDict, _current_results_dir: Path) -> SimResults:
    from matplotlib.gridspec import GridSpec
    from simulator.params import PlasticityTypeItoE, PlasticityTypeEtoI, IsPlastic
    import numpy as np
    from time import time
    from simulator.utils import allocate_aligned
    from simulator.rates import train_network, estimate_responses
    from simulator.setup import make_afferents, make_synapses
    from simulator.params import AllParameters
    from simulator.params import VectorE, VectorI, ArrayIE, ArrayEE, ArrayEI
    from simulator.plasticity import MomentEstimate
    from simulator.measure import orientation_selectivity_index, compute_syn_current_spearmanr, compute_response_similarity

    t0 = time()
    dtype = AllParameters.float_type

    params = AllParameters(**sim_params)
    inp = params.inp
    ni = params.numint
    ng = params.ng
    pl = params.pl

    n_e = ng.exc.n_per_axis ** ng.n_d
    n_i = n_e // 8
    dt_tau_e = ni.dt / ng.exc.tau_m
    dt_tau_i = ni.dt / ng.inh.tau_m
    max_dr_dt_exc = dt_tau_e * ni.max_dr
    max_dr_dt_inh = dt_tau_i * ni.max_dr

    if pl.is_plastic == IsPlastic.INH:
        pl.eta_e = dtype(0)
    elif pl.is_plastic == IsPlastic.EXC:
        pl.eta_i = dtype(0)
    elif pl.is_plastic == IsPlastic.NEITHER:
        pl.eta_e = pl.eta_i = dtype(0)
        ni.n_trials = 0

    aff_arrays = make_afferents(ng.n_d, ng.exc.n_per_axis, inp.n_stimuli, inp.exc.bg_input,
                                inp.exc.peak_stimulus, inp.vonmises_kappa, PLOT_AFFERENTS)
    if params.sy.e2e.w_total > 0:
        corr_kappa = inp.vonmises_kappa / 4
        tmp_arrays = make_afferents(ng.n_d, ng.exc.n_per_axis, inp.n_stimuli, inp.exc.bg_input,
                                    inp.exc.peak_stimulus, corr_kappa, PLOT_AFFERENTS)
        flattened = np.zeros((n_e, inp.n_stimuli ** ng.n_d), dtype=dtype)
        for n in range(n_e):
            flattened[n, :] = tmp_arrays.afferents[..., n].flatten()
        flattened = flattened.astype(dtype)
        target_correlations = np.corrcoef(flattened)
        target_correlations = ArrayEE(target_correlations.astype(dtype))
    else:
        target_correlations = None

    sya = make_synapses(params, n_e, n_i, target_correlations, plot_weight_hist=PLOT_WEIGHT_HIST)
    wei_init = sya.wei.copy()
    wie_init = sya.wie.copy()

    pop_in = aff_arrays.afferents.sum(axis=-1)
    print(f"Sharp: {inp.sharp_input} Avg in: {pop_in.mean():.1f}, std: {pop_in.std():.1f}")
    per_exc = aff_arrays.afferents.sum(axis=(0, 1, 2))
    print(f"Per neuron. Avg in: {per_exc.mean()}, std: {per_exc.std():.1f}")

    recording_re = allocate_aligned((n_e, ni.max_steps), dtype=dtype)
    recording_ri = allocate_aligned((n_i, ni.max_steps), dtype=dtype)

    m_phases = ni.n_trials // ni.every_n
    assert ni.n_trials % ni.every_n == 0

    if ni.n_trials:
        correlations_ee = np.empty((n_e, n_e, m_phases+1))
    else:
        correlations_ee = np.empty((n_e, n_e, 1))

    responses_exc, responses_inh, exc_in, inh_in = estimate_responses(
        inp.n_stimuli, aff_arrays.locations_idx, aff_arrays.afferents, inp.inh.bg_input,
        n_e, n_i, pl.rho0, sya,
        dt_tau_e, dt_tau_i,
        params.ng.r_max,
        max_dr_dt_exc, max_dr_dt_inh,
        ni.max_steps,
        recording_re, recording_ri
    )
    print(f"Maximum exc. rate {np.max(responses_exc):.2f}")
    flattened = np.zeros((n_e, inp.n_stimuli ** ng.n_d), dtype=dtype)
    for n in range(n_e):
        flattened[n, :] = responses_exc[..., n].flatten()
    corr = np.corrcoef(flattened)
    correlations_ee[..., 0] = corr

    if pl.compute_gradient_angles and pl.eta_e > 0:
        angles_ie = allocate_aligned((inp.n_stimuli ** params.ng.n_d * ni.n_trials), np.NaN, dtype=dtype)
    else:
        angles_ie = None

    if pl.compute_gradient_angles and pl.eta_i > 0:
        angles_ei = allocate_aligned((inp.n_stimuli**params.ng.n_d * ni.n_trials), np.NaN, dtype=dtype)
    else:
        angles_ei = None

    if pl.plasticity_type_ei == PlasticityTypeItoE.GRADIENT and ni.n_trials:
        adam_ei = MomentEstimate(
            ArrayEI(allocate_aligned(sya.wei.shape, dtype=sya.wei.dtype)),
            ArrayEI(allocate_aligned(sya.wei.shape, dtype=sya.wei.dtype)),
        )
    else:
        adam_ei = None
    if pl.plasticity_type_ie == PlasticityTypeEtoI.GRADIENT and ni.n_trials:
        adam_ie = MomentEstimate(
            ArrayIE(allocate_aligned(sya.wie.shape, dtype=sya.wie.dtype)),
            ArrayIE(allocate_aligned(sya.wie.shape, dtype=sya.wie.dtype)),
        )
    else:
        adam_ie = None

    converged = True
    if ni.n_trials:
        r_e = VectorE(allocate_aligned(n_e, dtype=dtype))
        r_i = VectorI(allocate_aligned(n_i, dtype=dtype))

        recording_mu = allocate_aligned((ni.n_trials * inp.n_stimuli**ng.n_d, 4, 2), np.NaN, dtype=dtype)
        all_t = allocate_aligned((ni.n_trials, inp.n_stimuli ** ng.n_d), -1, dtype=np.int32)
        all_di = allocate_aligned((1, 2), dtype=dtype)
        last_n = 10
        inh_in_buffer = allocate_aligned((inp.n_stimuli, inp.n_stimuli, inp.n_stimuli, n_e, last_n), dtype=dtype)

        for m in range(m_phases):
            print(f"Phase {m+1} of {m_phases}")
            mu_idx = ni.every_n * (inp.n_stimuli ** ng.n_d)

            if DIAGNOSTIC_RATE_PLOT:  # and not converged:
                import matplotlib.pyplot as plt
                fig = plt.figure()
                gs = GridSpec(2, 2)
                ax = fig.add_subplot(gs[0, 0])
                ax.plot(recording_re.T)
                ax = fig.add_subplot(gs[0, 1])
                last_idx = np.argwhere(np.isfinite(recording_re[0, :]))[-1]
                ax.hist(np.squeeze(recording_re[:, last_idx]), bins=100)
                ax = fig.add_subplot(gs[1, 0])
                ax.plot(recording_ri.T)
                ax = fig.add_subplot(gs[1, 1])
                ax.hist(np.squeeze(recording_ri[:, last_idx]), bins=100)
                plt.show()

            converged, n_run, ni.max_steps, all_di = train_network(
                n_trials=ni.every_n, rho0=pl.rho0,
                re=r_e, ri=r_i, sya=sya,
                eta_e=pl.eta_e, eta_i=pl.eta_i,
                wie_decay=pl.wie_decay, wei_decay=pl.wei_decay,
                plasticity_type_ie=pl.plasticity_type_ie, plasticity_type_ei=pl.plasticity_type_ei,
                bp_weights=pl.bp_weights,
                afferents=aff_arrays.afferents,  bg_input_inh=inp.inh.bg_input,
                inh_in=inh_in_buffer, trial_t=all_t[m * ni.every_n: (m + 1) * ni.every_n, :],
                dt_tau_e=dt_tau_e, dt_tau_i=dt_tau_i,
                dt_bcm_tau_inv=ni.dt * pl.bcm.tau_inv,
                r_max=params.ng.r_max,
                max_dr_dt_exc=max_dr_dt_exc, max_dr_dt_inh=max_dr_dt_inh,
                convergence_max=pl.convergence_max, convergence_mean=pl.convergence_mean,
                x_locations=aff_arrays.locations_idx[0],
                y_locations=aff_arrays.locations_idx[1],
                z_locations=aff_arrays.locations_idx[2],
                rec_mu=recording_mu[m * mu_idx:(m + 1) * mu_idx, ...],
                rec_re=recording_re, rec_ri=recording_ri,
                max_steps=ni.max_steps,
                do_abort=ni.do_abort,
                increment_steps_on_non_convergence=0,
                bcm_theta=pl.bcm.theta,
                adam_ie=adam_ie, adam_ei=adam_ei,
                angles_ie=angles_ie[m * mu_idx:(m + 1) * mu_idx] if angles_ie is not None else None,
                angles_ei=angles_ei[m * mu_idx:(m + 1) * mu_idx] if angles_ei is not None else None,
                compute_angles=pl.compute_gradient_angles,
            )

            responses_exc, responses_inh, exc_in, inh_in = estimate_responses(
                    inp.n_stimuli, aff_arrays.locations_idx,
                    aff_arrays.afferents, inp.inh.bg_input,
                    n_e, n_i, pl.rho0, sya,
                    dt_tau_e, dt_tau_i,
                    params.ng.r_max,
                    max_dr_dt_exc, max_dr_dt_inh,
                    ni.max_steps,
                    recording_re, recording_ri
            )
            flattened = np.zeros((n_e, inp.n_stimuli ** ng.n_d), dtype=dtype)
            for n in range(n_e):
                flattened[n, :] = responses_exc[..., n].flatten()
            corr = np.corrcoef(flattened)
            correlations_ee[..., m+1] = corr

    else:
        recording_mu = np.zeros((1, 4, 2), dtype=dtype)
        all_t = np.zeros((1, inp.n_stimuli ** ng.n_d), dtype=np.int32)
        all_di = np.zeros((1, 2), dtype=dtype)

    if not np.isfinite(sya.wie).all():
        print("wie had NaN or inf values")
    if not np.isfinite(sya.wei).all():
        print("wei had NaN or inf values")

    if DIAGNOSTIC_RATE_PLOT:  # and not converged:
        import matplotlib.pyplot as plt
        fig = plt.figure()
        gs = GridSpec(2, 2)
        ax = fig.add_subplot(gs[0, 0])
        ax.plot(recording_re.T)
        ax = fig.add_subplot(gs[0, 1])
        m = np.argwhere(np.isfinite(recording_re[0, :]))[-1]
        ax.hist(np.squeeze(recording_re[:, m]), bins=100)
        ax = fig.add_subplot(gs[1, 0])
        ax.plot(recording_ri.T)
        ax = fig.add_subplot(gs[1, 1])
        ax.hist(np.squeeze(recording_ri[:, m]), bins=100)
        plt.show()

    osi_e, osi_i = orientation_selectivity_index(inp, responses_exc, responses_inh, aff_arrays)
    response_sim_ee = compute_response_similarity(responses_exc, responses_exc)
    response_sim = compute_response_similarity(responses_exc, responses_inh)
    cc, cp = compute_syn_current_spearmanr(exc_in, inh_in)
    print(f"Avg correlation between synaptic currents: {np.nanmean(cc):.1f}")
    print(f"Percentage of cells without strong correlation: {100 * np.nanmean(cp > 1e-3, axis=0):.1f}")

    if PLOT_RESP_SIMILARITY:
        import matplotlib.pyplot as plt
        fig = plt.figure()
        gs = GridSpec(2, 2)
        response_sim_ee[np.diag_indices_from(response_sim_ee)] = 0

        ax = fig.add_subplot(gs[0, 0])
        ax.set_title("corr_ee")
        ax.imshow(correlations_ee[..., 0])
        ax.set_xticks([])
        ax.set_yticks([])

        ax = fig.add_subplot(gs[0, 1])
        ax.set_title("wee")
        ax.imshow(sya.wee)
        ax.set_xticks([])
        ax.set_yticks([])

        ax = fig.add_subplot(gs[1, 0])
        ax.set_title("response_sim")
        ax.imshow(response_sim_ee)
        ax.set_xticks([])
        ax.set_yticks([])

        if params.sy.e2e.w_total > dtype(0):
            ax = fig.add_subplot(gs[1, 1])
            ax.set_title("wee - corr_ee")
            _wee = sya.wee - np.min(sya.wee)
            _wee /= _wee.max()
            ax.set_xticks([])
            ax.set_yticks([])
            ax.imshow(_wee - correlations_ee[..., 0])
        plt.show()

    print("Finished computing responses")
    t1 = time()
    t_compute = (t1 - t0) / 60.0
    print(f"Compute time {t_compute:.1f} min.")

    raw_data = dict(
        converged=converged,
        steps_to_converge=all_t,
        max_inh_syn_change=all_di,
        recording_re=recording_re,
        recording_ri=recording_ri,
        recording_mu=recording_mu,
        angles_ie=angles_ie if angles_ie is not None else np.zeros(1),
        angles_ei=angles_ei if angles_ei is not None else np.zeros(1),
    )
    computed = dict(
        responses_exc=responses_exc,
        responses_inh=responses_inh,
        response_sim=response_sim,
        response_sim_ee=response_sim_ee,
        cc=cc,
        cp=cp,
        exc_in=exc_in,
        inh_in=inh_in,
        osi_e=osi_e,
        osi_i=osi_i,
        correlations_ee=correlations_ee,
    )
    sim_state = dict(
        wee=sya.wee,
        wei=sya.wei,
        wie=sya.wie,
        wii=sya.wii,
        wei_init=wei_init,
        wie_init=wie_init,
        zei=sya.zei,
        zie=sya.zie,
        stimulus_pref=aff_arrays.stimulus_pref,
        afferents=aff_arrays.afferents,
        ei_min=sya.ei_min,
        ie_min=sya.ie_min,
    )
    results: SimResults = dict(raw_data=raw_data, computed=computed, sim_state=sim_state)

    return results
Example #49
0
def test_brockwell_davis_ex533():
    # See Brockwell and Davis (2009) - Time Series Theory and Methods
    # Example 5.3.3: ARMA(1, 1) process, p.g. 177
    nobs = 10

    ar_params = np.array([0.2])
    ma_params = np.array([0.4])
    sigma2 = 8.92
    p = len(ar_params)
    q = len(ma_params)
    m = max(p, q)

    ar = np.r_[1, -ar_params]
    ma = np.r_[1, ma_params]

    # First, get the autocovariance of the process
    arma_process_acovf = arma_acovf(ar, ma, nobs=nobs, sigma2=sigma2)
    unconditional_variance = (
        sigma2 * (1 + 2 * ar_params[0] * ma_params[0] + ma_params[0]**2) /
        (1 - ar_params[0]**2))
    assert_allclose(arma_process_acovf[0], unconditional_variance)

    # Next, get the autocovariance of the transformed process
    # Note: as required by {{prefix}}arma_transformed_acovf, we first divide
    # through by sigma^2
    arma_process_acovf /= sigma2
    unconditional_variance /= sigma2
    out = np.array(
        _arma_innovations.darma_transformed_acovf_fast(ar, ma,
                                                       arma_process_acovf))
    acovf = np.array(out[0])
    acovf2 = np.array(out[1])

    # `acovf` is an m^2 x m^2 matrix, where m = max(p, q)
    # but it is only valid for the autocovariances of the first m observations
    # (this means in particular that the block `acovf[m:, m:]` should *not* be
    # used)
    # `acovf2` then contains the (time invariant) autocovariance terms for
    # the observations m + 1, ..., nobs - since the autocovariance is the same
    # for these terms, to save space we do not construct the autocovariance
    # matrix as we did for the first m terms. Thus `acovf2[0]` is the variance,
    # `acovf2[1]` is the first autocovariance, etc.

    # Test the autocovariance function for observations m + 1, ..., nobs
    # (it is time invariant here)
    assert_equal(acovf2.shape, (nobs - m, ))
    assert_allclose(acovf2[0], 1 + ma_params[0]**2)
    assert_allclose(acovf2[1], ma_params[0])
    assert_allclose(acovf2[2:], 0)

    # Test the autocovariance function for observations 1, ..., m
    # (it is time varying here)
    assert_equal(acovf.shape, (m * 2, m * 2))

    # (we need to check `acovf[:m * 2, :m]`, i.e. `acovf[:2, :1])`
    ix = np.diag_indices_from(acovf)
    ix_lower = (ix[0][:-1] + 1, ix[1][:-1])

    # acovf[ix] is the diagonal, and we want to check the first m
    # elements of the diagonal
    assert_allclose(acovf[ix][:m], unconditional_variance)

    # acovf[ix_lower] is the first lower off-diagonal
    assert_allclose(acovf[ix_lower][:m], ma_params[0])

    # Now, check that we compute the moving average coefficients and the
    # associated variances correctly
    out = _arma_innovations.darma_innovations_algo_fast(
        nobs, ar_params, ma_params, acovf, acovf2)
    theta = np.array(out[0])
    v = np.array(out[1])

    # Test v (see eq. 5.3.13)
    desired_v = np.zeros(nobs)
    desired_v[0] = unconditional_variance
    for i in range(1, nobs):
        desired_v[i] = 1 + (1 - 1 / desired_v[i - 1]) * ma_params[0]**2
    assert_allclose(v, desired_v)

    # Test theta (see eq. 5.3.13)
    # Note that they will have shape (nobs, m + 1) here, not (nobs, nobs - 1)
    # as in the original (non-fast) version
    assert_equal(theta.shape, (nobs, m + 1))
    desired_theta = np.zeros(nobs)
    for i in range(1, nobs):
        desired_theta[i] = ma_params[0] / desired_v[i - 1]
    assert_allclose(theta[:, 0], desired_theta)
    assert_allclose(theta[:, 1:], 0)

    # Test against Table 5.3.1
    endog = np.array([
        -1.1, 0.514, 0.116, -0.845, 0.872, -0.467, -0.977, -1.699, -1.228,
        -1.093
    ])
    u = _arma_innovations.darma_innovations_filter(endog, ar_params, ma_params,
                                                   theta)

    # Note: Table 5.3.1 has \hat X_n+1 = -0.5340 for n = 1, but this seems to
    # be a typo, since equation 5.3.12 gives the form of the prediction
    # equation as \hat X_n+1 = \phi X_n + \theta_n1 (X_n - \hat X_n)
    # Then for n = 1 we have:
    # \hat X_n+1 = 0.2 (-1.1) + (0.2909) (-1.1 - 0) = -0.5399
    # And for n = 2 if we use what we have computed, then we get:
    # \hat X_n+1 = 0.2 (0.514) + (0.3833) (0.514 - (-0.54)) = 0.5068
    # as desired, whereas if we used the book's number for n=1 we would get:
    # \hat X_n+1 = 0.2 (0.514) + (0.3833) (0.514 - (-0.534)) = 0.5045
    # which is not what Table 5.3.1 shows.
    desired_hat = np.array([
        0, -0.540, 0.5068, -0.1321, -0.4539, 0.7046, -0.5620, -0.3614, -0.8748,
        -0.3869
    ])
    desired_u = endog - desired_hat
    assert_allclose(u, desired_u, atol=1e-4)
Example #50
0
filenames.sort()

fig, ax = plt.subplots(figsize=(12, 12))

for filename in filenames:
    lengths = io.load_lengths(filename.replace(".matrix", ".bed"))
    counts = io.load_counts(filename, lengths=lengths)

    if "25kb" in filename:
        resolution = 25000
    elif "20000" in filename:
        resolution = 20000
    else:
        resolution = 10000

    counts = counts.toarray()
    counts = counts.T + counts

    # Just making sure there is no interaction counted in teh diag
    counts[np.diag_indices_from(counts)] = 0
    counts = filter.filter_low_counts(counts, percentage=0.03, sparsity=False)
    counts = normalization.ICE_normalization(counts)

    print("1. Compute count vs genomic distance relationship")
    mapping = get_mapping(counts, lengths, verbose=True, smoothed=False)
    ax.plot(mapping[0, 2:] * resolution, mapping[1, 2:])
    ax.axhline(mapping[1, 0])

ax.set_yscale("log")
ax.set_xscale("log")
Example #51
0
def WishartBartlett(name,
                    S,
                    nu,
                    is_cholesky=False,
                    return_cholesky=False,
                    testval=None):
    R"""
    Bartlett decomposition of the Wishart distribution. As the Wishart
    distribution requires the matrix to be symmetric positive semi-definite
    it is impossible for MCMC to ever propose acceptable matrices.

    Instead, we can use the Barlett decomposition which samples a lower
    diagonal matrix. Specifically:

    .. math::
        \text{If} L \sim \begin{pmatrix}
        \sqrt{c_1} & 0 & 0 \\
        z_{21} & \sqrt{c_2} & 0 \\
        z_{31} & z_{32} & \sqrt{c_3}
        \end{pmatrix}

        \text{with} c_i \sim \chi^2(n-i+1) \text{ and } n_{ij} \sim \mathcal{N}(0, 1), \text{then} \\
        L \times A \times A.T \times L.T \sim \text{Wishart}(L \times L.T, \nu)

    See http://en.wikipedia.org/wiki/Wishart_distribution#Bartlett_decomposition
    for more information.

    Parameters
    ----------
    S : ndarray
        p x p positive definite matrix
        Or:
        p x p lower-triangular matrix that is the Cholesky factor
        of the covariance matrix.
    nu : int
        Degrees of freedom, > dim(S).
    is_cholesky : bool (default=False)
        Input matrix S is already Cholesky decomposed as S.T * S
    return_cholesky : bool (default=False)
        Only return the Cholesky decomposed matrix.
    testval : ndarray
        p x p positive definite matrix used to initialize

    Note
    ----
    This is not a standard Distribution class but follows a similar
    interface. Besides the Wishart distribution, it will add RVs
    c and z to your model which make up the matrix.
    """

    L = S if is_cholesky else scipy.linalg.cholesky(S)
    diag_idx = np.diag_indices_from(S)
    tril_idx = np.tril_indices_from(S, k=-1)
    n_diag = len(diag_idx[0])
    n_tril = len(tril_idx[0])

    if testval is not None:
        # Inverse transform
        testval = np.dot(np.dot(np.linalg.inv(L), testval), np.linalg.inv(L.T))
        testval = scipy.linalg.cholesky(testval, lower=True)
        diag_testval = testval[diag_idx]**2
        tril_testval = testval[tril_idx]
    else:
        diag_testval = None
        tril_testval = None

    c = tt.sqrt(
        ChiSquared('c',
                   nu - np.arange(2, 2 + n_diag),
                   shape=n_diag,
                   testval=diag_testval))
    pm._log.info('Added new variable c to model diagonal of Wishart.')
    z = Normal('z', 0, 1, shape=n_tril, testval=tril_testval)
    pm._log.info('Added new variable z to model off-diagonals of Wishart.')
    # Construct A matrix
    A = tt.zeros(S.shape, dtype=np.float32)
    A = tt.set_subtensor(A[diag_idx], c)
    A = tt.set_subtensor(A[tril_idx], z)

    # L * A * A.T * L.T ~ Wishart(L*L.T, nu)
    if return_cholesky:
        return Deterministic(name, tt.dot(L, A))
    else:
        return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T))
Example #52
0
    def log_marginal_likeli(self,
                            theta=None,
                            eval_gradient=False,
                            clone_kernel=True):
        """Returns log-marginal likelihood of theta for training data.
        Parameters
        ----------
        theta : array-like of shape (n_kernel_params,) default=None
            Kernel hyperparameters for which the log-marginal likelihood is
            evaluated. If None, the precomputed log_marginal_likelihood
            of ``self.kernel_.theta`` is returned.
        eval_gradient : bool, default=False
            If True, the gradient of the log-marginal likelihood with respect
            to the kernel hyperparameters at position theta is returned
            additionally. If True, theta must not be None.
        clone_kernel : bool, default=True
            If True, the kernel attribute is copied. If False, the kernel
            attribute is modified, but may result in a performance improvement.
        Returns
        -------
        log_likelihood : float
            Log-marginal likelihood of theta for training data.
        log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional
            Gradient of the log-marginal likelihood with respect to the kernel
            hyperparameters at position theta.
            Only returned when eval_gradient is True.
        """
        if theta is None:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated for theta!=None")
            return self.log_marginal_likelihood_value_

        if clone_kernel:
            kernel = self.kernel_.clone_with_theta(theta)
        else:
            kernel = self.kernel_
            kernel.theta = theta

        if eval_gradient:
            K, K_gradient = kernel(self.X_train_, eval_gradient=True)
        else:
            K = kernel(self.X_train_)

        K[np.diag_indices_from(K)] += self.alpha
        try:
            L = cholesky(K, lower=True)  # Line 2
        except np.linalg.LinAlgError:
            return (-np.inf, np.zeros_like(theta)) \
                if eval_gradient else -np.inf

        # Support multi-dimensional output of self.y_train_
        y_train = self.y_train_
        if y_train.ndim == 1:
            y_train = y_train[:, np.newaxis]

        alpha = cho_solve((L, True), y_train)  # Line 3

        # Compute log-likelihood (compare line 7)
        log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
        log_likelihood_dims -= np.log(np.diag(L)).sum()
        log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
        log_likelihood = log_likelihood_dims.sum(-1)  # sum over dimensions

        # Precompute quantities required for predictions which are independent
        # of actual query points
        K = self.kernel_(self.X_train_)
        K[np.diag_indices_from(K)] += self.alpha
        try:
            self.L_ = cholesky(K, lower=True)  # Line 2
            # self.L_ changed, self._K_inv needs to be recomputed
            self._K_inv = None
        except np.linalg.LinAlgError as exc:
            exc.args = ("The kernel, %s, is not returning a "
                        "positive definite matrix. Try gradually "
                        "increasing the 'alpha' parameter of your "
                        "GaussianProcessRegressor estimator." %
                        self.kernel_, ) + exc.args
            raise
        self.alpha_ = cho_solve((self.L_, True), self.y_train_)  # Line 3

        pred1 = self.predict(uX1)
        pred2 = self.predict(uX2)
        phyloss = density_diff(density(pred1), density(pred2))
        #print("phyLoss:", 500*phyloss)
        log_likelihood -= 500 * phyloss
        #print(log_likelihood)

        if eval_gradient:  # compare Equation 5.9 from GPML
            tmp = np.einsum("ik,jk->ijk", alpha, alpha)  # k: output-dimension
            tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis]
            # Compute "0.5 * trace(tmp.dot(K_gradient))" without
            # constructing the full matrix tmp.dot(K_gradient) since only
            # its diagonal is required
            log_likelihood_gradient_dims = \
                0.5 * np.einsum("ijl,jik->kl", tmp, K_gradient)
            log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)

        if eval_gradient:
            return log_likelihood, log_likelihood_gradient
        else:
            return log_likelihood
Example #53
0
def custom_leastsq(obj_fn, jac_fn, x0, f_norm2_tol=1e-6, jac_norm_tol=1e-6,
                   rel_ftol=1e-6, rel_xtol=1e-6, max_iter=100, comm=None,
                   verbosity=0, profiler=None):
    msg = ""
    converged = False
    x = x0
    f = obj_fn(x)
    norm_f = _np.dot(f,f) # _np.linalg.norm(f)**2
    half_max_nu = 2**62 #what should this be??
    tau = 1e-3
    nu = 2
    mu = 0 #initialized on 1st iter
    my_cols_slice = None

    if comm is not None and comm.Get_rank() != 0:
        verbosity = 0 #Only print to stdout from root process

    if not _np.isfinite(norm_f):
        msg = "Infinite norm of objective function at initial point!"


    for k in range(max_iter): #outer loop
        # assume x, f, fnorm hold valid values

        if len(msg) > 0: 
            break #exit outer loop if an exit-message has been set

        if norm_f < f_norm2_tol:
            msg = "Sum of squares is at most %g" % f_norm2_tol
            converged = True; break

        if verbosity > 0:
            print("--- Outer Iter %d: norm_f = %g, mu=%g" % (k,norm_f,mu))
            
        if profiler: profiler.mem_check("custom_leastsq: begin outer iter *before de-alloc*")
        Jac = None; JTJ = None; JTf = None

        if profiler: profiler.mem_check("custom_leastsq: begin outer iter")
        Jac = jac_fn(x)
        if profiler: profiler.mem_check("custom_leastsq: after jacobian:" 
                                        + "shape=%s, GB=%.2f" % (str(Jac.shape),
                                                        Jac.nbytes/(1024.0**3)) )

        tm = _time.time()
        if my_cols_slice is None:
            my_cols_slice = _mpit.distribute_for_dot(Jac.shape[0], comm)
        JTJ = _mpit.mpidot(Jac.T,Jac,my_cols_slice,comm)   #_np.dot(Jac.T,Jac)
        JTf = _np.dot(Jac.T,f)
        if profiler: profiler.add_time("custom_leastsq: dotprods",tm)

        idiag = _np.diag_indices_from(JTJ)
        norm_JTf = _np.linalg.norm(JTf,ord=_np.inf)
        norm_x = _np.dot(x,x) # _np.linalg.norm(x)**2
        undampled_JTJ_diag = JTJ.diagonal().copy()

        if norm_JTf < jac_norm_tol:
            msg = "norm(jacobian) is at most %g" % jac_norm_tol
            converged = True; break

        if k == 0:
            #mu = tau # initial damping element
            mu = tau * _np.max(undampled_JTJ_diag) # initial damping element

        #determing increment using adaptive damping
        while True:  #inner loop

            if profiler: profiler.mem_check("custom_leastsq: begin inner iter")
            JTJ[idiag] += mu # augment normal equations
            #JTJ[idiag] *= (1.0 + mu) # augment normal equations

            try:
                if profiler: profiler.mem_check("custom_leastsq: before linsolve")
                tm = _time.time()
                success = True
                dx = _np.linalg.solve(JTJ, -JTf) 
                if profiler: profiler.add_time("custom_leastsq: linsolve",tm)
            except _np.linalg.LinAlgError:
                success = False
            
            if profiler: profiler.mem_check("custom_leastsq: after linsolve")
            if success: #linear solve succeeded
                new_x = x + dx
                norm_dx = _np.dot(dx,dx) # _np.linalg.norm(dx)**2

                if verbosity > 1:
                    print("  - Inner Loop: mu=%g, norm_dx=%g" % (mu,norm_dx))

                if norm_dx < (rel_xtol**2)*norm_x:
                    msg = "Relative change in |x| is at most %g" % rel_xtol
                    converged = True; break

                if norm_dx > (norm_x+rel_xtol)/(MACH_PRECISION**2):
                    msg = "(near-)singular linear system"; break
                
                new_f = obj_fn(new_x)
                if profiler: profiler.mem_check("custom_leastsq: after obj_fn")
                norm_new_f = _np.dot(new_f,new_f) # _np.linalg.norm(new_f)**2
                if not _np.isfinite(norm_new_f): # avoid infinite loop...
                    msg = "Infinite norm of objective function!"; break

                dL = _np.dot(dx, mu*dx - JTf) # expected decrease in ||F||^2 from linear model
                dF = norm_f - norm_new_f      # actual decrease in ||F||^2

                if verbosity > 1:
                    print("      (cont): norm_new_f=%g, dL=%g, dF=%g, reldL=%g, reldF=%g" % 
                          (norm_new_f,dL,dF,dL/norm_f,dF/norm_f))

                if dL/norm_f < rel_ftol and dF/norm_f < rel_ftol and dF/dL < 2.0:
                    msg = "Both actual and predicted relative reductions in the" + \
                        " sum of squares are at most %g" % rel_ftol
                    converged = True; break

                if profiler: profiler.mem_check("custom_leastsq: before success")

                if dL > 0 and dF > 0:
                    # reduction in error: increment accepted!
                    t = 1.0 - (2*dF/dL-1.0)**3 # dF/dL == gain ratio
                    mu *= max(t,1.0/3.0)
                    nu = 2
                    x,f, norm_f = new_x, new_f, norm_new_f

                    if verbosity > 1:
                        print("      Accepted! gain ratio=%g  mu * %g => %g"
                              % (dF/dL,max(t,1.0/3.0),mu))

                    ##Check to see if we *would* switch to Q-N method in a hybrid algorithm
                    #new_Jac = jac_fn(new_x)
                    #new_JTf = _np.dot(new_Jac.T,new_f)
                    #print(" CHECK: %g < %g ?" % (_np.linalg.norm(new_JTf,
                    #    ord=_np.inf),0.02 * _np.linalg.norm(new_f)))

                    break # exit inner loop normally
            #else:
            #    print("LinSolve Failure!!")

            # if this point is reached, either the linear solve failed
            # or the error did not reduce.  In either case, reject increment.
                
            #Increase damping (mu), then increase damping factor to 
            # accelerate further damping increases.
            mu *= nu
            if nu > half_max_nu : #watch for nu getting too large (&overflow)
                msg = "Stopping after nu overflow!"; break
            nu = 2*nu
            if verbosity > 1:
                print("      Rejected!  mu => mu*nu = %g, nu => 2*nu = %g"
                      % (mu, nu))
            
            JTJ[idiag] = undampled_JTJ_diag #restore diagonal
        #end of inner loop
    #end of outer loop
    else:
        #if no break stmt hit, then we've exceeded maxIter
        msg = "Maximum iterations (%d) exceeded" % max_iter

    #JTJ[idiag] = undampled_JTJ_diag #restore diagonal
    return x, converged, msg
Example #54
0
def test_brockwell_davis_ex534():
    # See Brockwell and Davis (2009) - Time Series Theory and Methods
    # Example 5.3.4: ARMA(1, 1) process, p.g. 178
    nobs = 10

    ar_params = np.array([1, -0.24])
    ma_params = np.array([0.4, 0.2, 0.1])
    sigma2 = 1
    p = len(ar_params)
    q = len(ma_params)
    m = max(p, q)

    ar = np.r_[1, -ar_params]
    ma = np.r_[1, ma_params]

    # First, get the autocovariance of the process
    arma_process_acovf = arma_acovf(ar, ma, nobs=nobs, sigma2=sigma2)
    assert_allclose(arma_process_acovf[:3], [7.17133, 6.44139, 5.06027],
                    atol=1e-5)

    # Next, get the autocovariance of the transformed process
    out = np.array(
        _arma_innovations.darma_transformed_acovf_fast(ar, ma,
                                                       arma_process_acovf))
    acovf = np.array(out[0])
    acovf2 = np.array(out[1])
    # See test_brockwell_davis_ex533 for details on acovf vs acovf2

    # Test acovf
    assert_equal(acovf.shape, (m * 2, m * 2))

    ix = np.diag_indices_from(acovf)
    ix_lower1 = (ix[0][:-1] + 1, ix[1][:-1])
    ix_lower2 = (ix[0][:-2] + 2, ix[1][:-2])
    ix_lower3 = (ix[0][:-3] + 3, ix[1][:-3])
    ix_lower4 = (ix[0][:-4] + 4, ix[1][:-4])

    assert_allclose(acovf[ix][:m], 7.17133, atol=1e-5)
    desired = [6.44139, 6.44139, 0.816]
    assert_allclose(acovf[ix_lower1][:m], desired, atol=1e-5)
    assert_allclose(acovf[ix_lower2][0], 5.06027, atol=1e-5)
    assert_allclose(acovf[ix_lower2][1:m], 0.34, atol=1e-5)
    assert_allclose(acovf[ix_lower3][:m], 0.1, atol=1e-5)
    assert_allclose(acovf[ix_lower4][:m], 0, atol=1e-5)

    # Test acovf2
    assert_equal(acovf2.shape, (nobs - m, ))
    assert_allclose(acovf2[:4], [1.21, 0.5, 0.24, 0.1])
    assert_allclose(acovf2[4:], 0)

    # Test innovations algorithm output
    out = _arma_innovations.darma_innovations_algo_fast(
        nobs, ar_params, ma_params, acovf, acovf2)
    theta = np.array(out[0])
    v = np.array(out[1])

    # Test v (see Table 5.3.2)
    desired_v = [
        7.1713, 1.3856, 1.0057, 1.0019, 1.0016, 1.0005, 1.0000, 1.0000, 1.0000,
        1.0000
    ]
    assert_allclose(v, desired_v, atol=1e-4)

    # Test theta (see Table 5.3.2)
    assert_equal(theta.shape, (nobs, m + 1))
    desired_theta = np.array([[
        0, 0.8982, 1.3685, 0.4008, 0.3998, 0.3992, 0.4000, 0.4000, 0.4000,
        0.4000
    ], [0, 0, 0.7056, 0.1806, 0.2020, 0.1995, 0.1997, 0.2000, 0.2000, 0.2000
        ], [0, 0, 0, 0.0139, 0.0722, 0.0994, 0.0998, 0.0998, 0.0999, 0.1]]).T
    assert_allclose(theta[:, :m], desired_theta, atol=1e-4)
    assert_allclose(theta[:, m:], 0)

    # Test innovations filter output
    endog = np.array([
        1.704, 0.527, 1.041, 0.942, 0.555, -1.002, -0.585, 0.010, -0.638, 0.525
    ])
    u = _arma_innovations.darma_innovations_filter(endog, ar_params, ma_params,
                                                   theta)

    desired_hat = np.array([
        0, 1.5305, -0.1710, 1.2428, 0.7443, 0.3138, -1.7293, -0.1688, 0.3193,
        -0.8731
    ])
    desired_u = endog - desired_hat
    assert_allclose(u, desired_u, atol=1e-4)
Example #55
0
    def fit(self, X, y):
        """Fit Gaussian process regression model.
        Parameters
        ----------
        X : array-like, shape = (n_samples, n_features)
            Training data
        y : array-like, shape = (n_samples, [n_output_dims])
            Target values
        Returns
        -------
        self : returns an instance of self.
        """
        if self.kernel is None:  # Use an RBF kernel as default
            self.kernel_ = C() * RBF() + WhiteKernel()
        else:
            self.kernel_ = clone(self.kernel)

        # Fix the Covariance matrix
        if self.x_cov is None:
            self.x_cov = 0.0
            self.propagate_error = False
        if isinstance(self.x_cov, float):
            self.x_cov = np.array([self.x_cov])
        if np.ndim(self.x_cov) < 2:
            self.x_cov = np.diag(self.x_cov)
        self.x_cov = self.x_cov

        self._rng = check_random_state(self.random_state)

        X, y = check_X_y(X, y, multi_output=True, y_numeric=True)

        # Normalize target value
        if self.normalize_y:
            self._y_train_mean = np.mean(y, axis=0)
            # demean y
            y = y - self._y_train_mean
        else:
            self._y_train_mean = np.zeros(1)


        self.X_train_ =  X
        self.y_train_ =  y

        #======================================
        # Step I: Marginal Maximum Likelihood
        #         w/o Derivative of the kernel
        #======================================
        # Choose hyperparameters based on the log-marginal
        # likelihood 
        self.derivative_term = None

        optima = self._constrained_optimization(
            self._obj_func, self.kernel_.theta,
            self.kernel_.bounds)

        # extract optimum parameters
        self.kernel_.theta = optima[0]
        self.log_marginal_likelihood_value_ = - optima[1]

        #======================================
        # Step II: Solve for Weights
        #======================================
        K = self.kernel_(self.X_train_)
        K[np.diag_indices_from(K)] += self.alpha 

        try:
            self.L_ = cholesky(K, lower=True)

        except np.linalg.LinAlgError as exc:
            exc.args(f"The kernel {self.kernel_}, is not returing a "
                     "positive definite matrix. Try gradually "
                     "increasing the 'alpha' parameter of your GPR.") + exc.args
            raise 

        self.alpha_ = cho_solve((self.L_, True), self.y_train_)

        #======================================
        # Step III: Take Derivative
        #======================================

        # Calculate the Derivative for RBF Kernel
        self.derivative = rbf_derivative(
            self.X_train_, self.X_train_, 
            self.kernel_(self.X_train_, self.X_train_),
            self.alpha_, self.kernel_.get_params()['k1__k2__length_scale']
        )

        # Calculate the derivative term
        self.derivative_term = np.dot(self.derivative, np.dot(self.x_cov, self.derivative.T))


        #======================================
        # Step IV: Maximum Marginal Likelihood
        #          w/ Derivative
        #======================================
        # Choose hyperparameters based on the log-marginal
        # likelihood 
        optima = self._constrained_optimization(
            self._obj_func, self.kernel_.theta,
            self.kernel_.bounds)

        # extract optimum parameters
        self.kernel_.theta = optima[0]
        self.log_marginal_likelihood_value_ = - optima[1]

        K = self.kernel_(self.X_train_)
        K[np.diag_indices_from(K)] += self.alpha 
        K += self.derivative_term

        try:
            self.L_ = cholesky(K, lower=True)
            self._K_inv = None
        except np.linalg.LinAlgError as exc:
            exc.args(f"The kernel {self.kernel_}, is not returing a "
                     "positive definite matrix. Try gradually "
                     "increasing the 'alpha' parameter of your GPR.") + exc.args
            raise 

        self.alpha_ = cho_solve((self.L_, True), self.y_train_)


        #======================================
        # Step V: Repeat Steps II-IV until 
        #         desired convergence
        #======================================

        # TODO: Complete convergence

        return self
Example #56
0
import cvxpy as cp
import numpy as np

I = np.random.randn(10, 10) > .4
I[np.diag_indices_from(I)] = 0
K = np.shape(I)[0]

X = cp.variable(K, K, name='X')
const = []
for i in range(K):
    for j in range(K):
        if I[i, j] > 0:
            c = cp.equals(X[i, j], 0)
            const.append(c)
c = cp.equals(cp.diag(X), 1)
const.append(c)

p = cp.program(cp.minimize(cp.nuclear_norm(X)), const)
p.solve(quiet=False)
print X.value
Example #57
0
#
# ## Dense mass matrices
#
# The main extra is the :func:`exoplanet.get_dense_nuts_step` function that extends the PyMC3 sampling procedure to include support for learning off-diagonal elements of the mass matrix.
# This is *very* important for any problems where there are covariances between the parameters (this is true for pretty much all exoplanet models).
# A thorough discussion of this [can be found elsewhere online](https://dfm.io/posts/pymc3-mass-matrix/), but here is a simple demo where we sample a covariant Gaussian using :func:`exoplanet.get_dense_nuts_step`.
#
# First, we generate a random positive definite covariance matrix for the Gaussian:

# %%
import numpy as np

ndim = 5
np.random.seed(42)
L = np.random.randn(ndim, ndim)
L[np.diag_indices_from(L)] = 0.1 * np.exp(L[np.diag_indices_from(L)])
L[np.triu_indices_from(L, 1)] = 0.0
cov = np.dot(L, L.T)

# %% [markdown]
# And then we can sample this using PyMC3 and :func:`exoplanet.get_dense_nuts_step`:

# %%
import pymc3 as pm
import exoplanet as xo

with pm.Model() as model:
    pm.MvNormal("x", mu=np.zeros(ndim), chol=L, shape=(ndim, ))
    trace = pm.sample(tune=2000,
                      draws=2000,
                      chains=2,
Example #58
0
    def log_marginal_likelihood(self,
                                theta=None,
                                eval_gradient=False,
                                clone_kernel=True):
        """Return log-marginal likelihood of theta for training data.

        Parameters
        ----------
        theta : array-like of shape (n_kernel_params,) default=None
            Kernel hyperparameters for which the log-marginal likelihood is
            evaluated. If None, the precomputed log_marginal_likelihood
            of ``self.kernel_.theta`` is returned.

        eval_gradient : bool, default=False
            If True, the gradient of the log-marginal likelihood with respect
            to the kernel hyperparameters at position theta is returned
            additionally. If True, theta must not be None.

        clone_kernel : bool, default=True
            If True, the kernel attribute is copied. If False, the kernel
            attribute is modified, but may result in a performance improvement.

        Returns
        -------
        log_likelihood : float
            Log-marginal likelihood of theta for training data.

        log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional
            Gradient of the log-marginal likelihood with respect to the kernel
            hyperparameters at position theta.
            Only returned when eval_gradient is True.
        """
        if theta is None:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated for theta!=None")
            return self.log_marginal_likelihood_value_

        if clone_kernel:
            kernel = self.kernel_.clone_with_theta(theta)
        else:
            kernel = self.kernel_
            kernel.theta = theta

        if eval_gradient:
            K, K_gradient = kernel(self.X_train_, eval_gradient=True)
        else:
            K = kernel(self.X_train_)

        # Alg. 2.1, page 19, line 2 -> L = cholesky(K + sigma^2 I)
        K[np.diag_indices_from(K)] += self.alpha
        try:
            L = cholesky(K, lower=GPR_CHOLESKY_LOWER, check_finite=False)
        except np.linalg.LinAlgError:
            return (-np.inf,
                    np.zeros_like(theta)) if eval_gradient else -np.inf

        # Support multi-dimensional output of self.y_train_
        y_train = self.y_train_
        if y_train.ndim == 1:
            y_train = y_train[:, np.newaxis]

        # Alg 2.1, page 19, line 3 -> alpha = L^T \ (L \ y)
        alpha = cho_solve((L, GPR_CHOLESKY_LOWER), y_train, check_finite=False)

        # Alg 2.1, page 19, line 7
        # -0.5 . y^T . alpha - sum(log(diag(L))) - n_samples / 2 log(2*pi)
        # y is originally thought to be a (1, n_samples) row vector. However,
        # in multioutputs, y is of shape (n_samples, 2) and we need to compute
        # y^T . alpha for each output, independently using einsum. Thus, it
        # is equivalent to:
        # for output_idx in range(n_outputs):
        #     log_likelihood_dims[output_idx] = (
        #         y_train[:, [output_idx]] @ alpha[:, [output_idx]]
        #     )
        log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
        log_likelihood_dims -= np.log(np.diag(L)).sum()
        log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
        # the log likehood is sum-up across the outputs
        log_likelihood = log_likelihood_dims.sum(axis=-1)

        if eval_gradient:
            # Eq. 5.9, p. 114, and footnote 5 in p. 114
            # 0.5 * trace((alpha . alpha^T - K^-1) . K_gradient)
            # alpha is supposed to be a vector of (n_samples,) elements. With
            # multioutputs, alpha is a matrix of size (n_samples, n_outputs).
            # Therefore, we want to construct a matrix of
            # (n_samples, n_samples, n_outputs) equivalent to
            # for output_idx in range(n_outputs):
            #     output_alpha = alpha[:, [output_idx]]
            #     inner_term[..., output_idx] = output_alpha @ output_alpha.T
            inner_term = np.einsum("ik,jk->ijk", alpha, alpha)
            # compute K^-1 of shape (n_samples, n_samples)
            K_inv = cho_solve((L, GPR_CHOLESKY_LOWER),
                              np.eye(K.shape[0]),
                              check_finite=False)
            # create a new axis to use broadcasting between inner_term and
            # K_inv
            inner_term -= K_inv[..., np.newaxis]
            # Since we are interested about the trace of
            # inner_term @ K_gradient, we don't explicitly compute the
            # matrix-by-matrix operation and instead use an einsum. Therefore
            # it is equivalent to:
            # for param_idx in range(n_kernel_params):
            #     for output_idx in range(n_output):
            #         log_likehood_gradient_dims[param_idx, output_idx] = (
            #             inner_term[..., output_idx] @
            #             K_gradient[..., param_idx]
            #         )
            log_likelihood_gradient_dims = 0.5 * np.einsum(
                "ijl,jik->kl", inner_term, K_gradient)
            # the log likehood gradient is the sum-up across the outputs
            log_likelihood_gradient = log_likelihood_gradient_dims.sum(axis=-1)

        if eval_gradient:
            return log_likelihood, log_likelihood_gradient
        else:
            return log_likelihood
Example #59
0
def train_KRR_qml(X, y, sigma=1e3, llambda=1e-8):
    K = compute_kernel_qml(X, X, sigma=sigma)
    K[np.diag_indices_from(K)] += llambda
    alpha = cho_solve(K, y)
    return alpha
Example #60
0
    def add_data(self, fX, fT, istart=0, icount=np.inf, fHH=None, fHT=None):
        """Feed new training data (X,T) to HP-ELM model in batches: does not solve ELM itself.

        This method prepares an intermediate solution data, that takes the most time. After that, obtaining
        the solution is fast.

        The intermediate solution consists of two matrices: `HH` and `HT`. They can be in memory for a model computed
        at once, or stored on disk for a model computed in parts or in parallel.

        For iterative solution, provide file names for on-disk matrices in the input parameters `fHH` and `fHT`.
        They will be created if they don't exist, or new results will be merged with the existing ones. This method is
        multiprocess-safe for parallel writing into files `fHH` and `fHT`, that allows you to easily compute ELM
        in parallel. The multiprocess-safeness uses Python module 'fasteners' and a lock file, which is named
        fHH+'.lock' and fHT+'.lock'.

        Args:
            fX (hdf5): (part of) input training data size (N * `inputs`)
            fT (hdf5) (part of) output training data size (N * `outputs`)
            istart (int, optional): index of first data sample to use from `fX`, `istart` < N. If not given,
                all data from `fX` is used. Sample with index `istart` is used for training, indexing is 0-based.
            icount (int, optional): number of data samples to use from `fX`, starting from `istart`, automatically
                adjusted to `istart` + `icount` <= N. If not given, all data starting from `start` is used.
                The last sample used for training is `istart`+`icount`-1, so you can index data as:
                istart_1=0, icount_1=1000; istart_2=1000, icount_2=1000; istart_3=2000, icount_3=1000, ...
            fHH, fHT (string, optional): file names for storing HH and HT matrices. Files are created if they don't
                exist, or new result is added to the existing files if they exist. Parallel writing to the same
                `fHH`, `fHT` files is multiprocess-safe, made specially for parallel training of HP-ELM. Another use
                is to split a very long training of huge ELM into smaller parts, so the training can be interrupted
                and resumed later.

        """
        # initialize
        assert len(self.nnet.neurons) > 0, "Add neurons to ELM before using it"
        X, T = self._checkdata(fX, fT)
        N = X.shape[0]
        _prepare_fHH(fHH, fHT, self.nnet, self.precision)
        # custom range adjustments
        icount = min(icount, N - istart)
        nb = int(np.ceil(float(icount) / self.batch))  # number of batches

        # weighted classification initialization
        if self.classification == "wc" and self.wc is None:
            ns = np.zeros((self.nnet.outputs, ))
            for b in xrange(nb):  # batch sum is much faster
                start = b * self.batch + istart
                stop = min((b + 1) * self.batch + istart, icount + istart)
                ns += T[start:stop].sum(axis=0)
            ns = ns.astype(self.precision)
            self.wc = ns.sum(
            ) / ns  # class weights normalized to number of samples

        # main loop over all the data
        t = time()
        t0 = time()
        wc_vector = None
        for b in xrange(nb):
            start = b * self.batch + istart
            stop = min((b + 1) * self.batch + istart, icount + istart)
            Xb = X[start:stop]
            Tb = T[start:stop]
            if self.classification == "wc":
                wc_vector = self.wc[np.where(
                    Tb == 1)[1]]  # weights for samples in the batch

            self.nnet.add_batch(Xb, Tb, wc_vector)

            # report time
            eta = int(((time() - t0) / (b + 1)) * (nb - b - 1))
            if time() - t > self.tprint:
                print("processing batch %d/%d, eta %d:%02d:%02d" %
                      (b + 1, nb, eta / 3600, (eta % 3600) / 60, eta % 60))
                t = time()

        # if storing output to disk
        if fHH is not None and fHT is not None:
            HH, HT = self.nnet.get_corr()
            HH[np.diag_indices_from(
                HH)] -= self.nnet.norm  # norm is already included
            _write_fHH(fHH, fHT, HH, HT)