def csr_to_fm(self, X_csr, return_oh=True, indices=None):
        assert (X_csr.shape == (self.n_samples, self.n_features))

        if indices is None:
            y = check_array(X_csr.data, ensure_2d=False, copy=True)
        else:
            if isinstance(indices, tuple):
                indices_samples, indices_features = indices
            elif isinstance(indices, sp.csc_matrix):
                indices_samples, indices_features = self.fm_to_indices(indices)
            y = X_csr[indices_samples, indices_features].A[0].copy()
        if not return_oh:
            return y
        else:
            X = check_array(X_csr, accept_sparse='coo',
                            force_all_finite=False)
            n_rows, n_cols = X_csr.shape
            assert ((n_rows, n_cols) == (self.n_samples, self.n_features))
            if indices is None:
                encoder = OneHotEncoder(n_values=[self.n_samples,
                                                  self.n_features])
                X_ix = np.column_stack([X.row, X.col])
            else:
                assert (np.sorted(indices_samples) == np.sorted(X.row))
                assert (np.sorted(indices_features) == np.sorted(X.col))
                X_ix = np.column_stack([indices_samples, indices_features])
            X_oh = encoder.fit_transform(X_ix)
            return X_oh, y
Example #2
0
def line_intersect_grid(pt, xGr, yGr, zGr, returnvertices=False):
    """
    Intersect a list of polyline vertices with a rectilinear MODFLOW
    grid. Vertices at the intersection of the polyline with the grid
    cell edges is returned. Optionally the original polyline vertices
    are returned.
    Points outside the grid are not returned unless returnvertices==True.

    Parameters
    ----------
    ptsin : list
        A list of x, y points defining the vertices of a polyline that will be
        intersected with the rectilinear MODFLOW grid
    xedge : numpy.ndarray
        x-coordinate of the edge of each MODFLOW column. xedge is dimensioned
        to NCOL + 1. If xedge is not a numpy.ndarray it is converted to a
        numpy.ndarray.
    yedge : numpy.ndarray
        y-coordinate of the edge of each MODFLOW row. yedge is dimensioned
        to NROW + 1. If yedge is not a numpy.ndarray it is converted to a
        numpy.ndarray.
    returnvertices: bool
        Return the original polyline vertices in the list of numpy.ndarray
        containing vertices resulting from intersection of the provided
        polygon and the MODFLOW model grid if returnvertices=True.
        (default is False).

    Returns
    -------
    (x, y, dlen) : numpy.ndarray of tuples
        numpy.ndarray of tuples containing the x, y, and segment length of the
        intersection of the provided polyline with the rectilinear MODFLOW
        grid.

    Examples
    --------
    >>> import flopy
    >>> ptsout = flopy.plotutil.line_intersect_grid(ptsin, xedge, yedge)

    """

    xGr = np.array(xGr)
    yGr = np.array(yGr)
    zGr = np.array(zGr)

    pt = np.matrix(pt)
    dp = np.matrix(np.diff(axi=0))

    lam = np.hstack(((xGr[np.newaxis, :] - pt[:-1, 0]) / dp[:, 0],
                     (yGr[np.newaxis, :] - pt[:-1, 1]) / dp[:, 1],
                     (zGr[np.newaxis, :] - pt[:-1, 2]) / dp[:, 2]))
    p_out = []
    for i, la in enumerate(lam):
        la = np.sorted(la[np.logical_and(la > 0.0, la < 1.0)])
        if returnvertices:
            la = np.hstack((0, la))
        p_out += list(pt[i] + la * dp[i])
    if returnvertices:
        p_out.append(pt[-1])
    return np.array(p_out)
Example #3
0
def recommend(dataMat, user, N, simMeas=cos_sim, percentage=0.9):
    """生成评分最高的N个结果"""
    unratedItems = nonzero(dataMat[user, :].A == 0)[1]  # 建立一个用户未评分item的列表
    print("==========non-predicted items=========")
    print(unratedItems)
    print("用户", user, "有", len(unratedItems), "部电影未评分")
    print("==========non-predicted items=========\n")
    if len(unratedItems) == 0:
        return 'you rated everything'  # 如果都已经评过分,则退出
    # 先对整个矩阵进行奇异值分解
    xformedItems = svdExt(dataMat, percentage)
    itemScores = []
    # 对于每个未评分的item,都计算其预测评分
    for item in unratedItems:
        # print("now predicting item_id:", item)
        estimatedScore = svdEst(xformedItems, dataMat, user, cos_sim, item)
        # print("the estimated score of item_id=", item, "------>", estimatedScore, "\n")

        itemScores.append((item, estimatedScore))

    # 此时只对预测的列表进行排序,推荐前N个,没有考虑用户之前评分吗过的项目

    itemScores = np.sorted(itemScores, key=lambda x: x[1],
                           reverse=True)  # 按照item的得分进行从大到小排序
    print(itemScores)
    return itemScores[:N]  # 返回前N大评分值的item名,及其预测评分值
Example #4
0
def local_empirical_measure(tau, X, T, f, b = lambda tau, t : 2*np.sqrt(f(tau, t))):
  b0 = b(tau, T) 
  bb = np.sorted(b0)
  bsum = 0.
  for j in range(T.shape[0]):
    bsum += bb[j]
    if bb[j] >= (0.5 + bsum)/(j+1)
      bsum -= bb[j]
      mu = (0.5 + bsum)/j
      return 2*np.maximum(mu - b0, 0.)
Example #5
0
def plot_elements(tris, nodes):
    edges = set()
    for t in range(tris.shape[0]):
        t_verts = np.sorted(tris[t])
        edges.add((t_verts[0], t_verts[1]))
        edges.add((t_verts[0], t_verts[2]))
        edges.add((t_verts[1], t_verts[2]))

    edges = np.array(list(edges))

    all_lines = nodes[edges]
    coll = LineCollection(all_lines)
    ax = plt.gca()
    ax.add_collection(coll)
Example #6
0
def distances_between_nodes(heat_print,mode,node1,node2,type_comp="auc",mode_diff="agg",normalize="True",plot=False,savefig=False,filefig="plots/nodes_dist.png"):
    ### Computes the distance between two nodes for the same graph/ based on their heat profiles
    if type_comp=="auc":
        d=compute_auc(heat_print[mode].iloc[:,node1],heat_print[mode].iloc[:,node2],normalize=normalize, mode_diff=mode_diff,plot=plot, savefig=savefig,filefig=filefig)
    elif type_comp=="emd":
                 ### Required params:  
         ### P,Q - Two histograms of size H
         ### D - The HxH matrix of the ground distance between bins of P and Q
        H=30
        hist1,bins_arr=np.histogram(heat_print[mode].iloc[:,node1],H)
                #### Normalize histogram
        w=[bins_arr[i+1]-bins_arr[i] for i in range(len(bins_arr)-1)]
        hist1=hist1*1.0/np.matrix(w).dot(hist1)
        hist2,_=np.histogram(heat_print[mode].iloc[:,node2],bins_arr)
        hist2=hist2*1.0/np.matrix(w).dot(hist2)
        hist1=np.reshape(np.matrix(hist1), [1, H])
        hist2=np.reshape(np.matrix(hist2), [1, H])
        D=np.zeros((H,H))
        for i in range(H):
            for j in range(H):
                D[i,j]=np.abs(bins_arr[i+1]-bins_arr[j+1])
        d=emd(np.array(hist1.tolist()[0]),np.array(hist2.tolist()[0]),D)
    elif type_comp=="corr":
        d=1-1.0/(np.linalg.norm((heat_print[mode]).iloc[:,node1])*np.linalg.norm((heat_print[mode]).iloc[:,node2]))*((heat_print[mode]).iloc[:,node1]).dot((heat_print[mode]).iloc[:,node2])
    elif type_comp=="corr_sorted":
        d=1-1.0/(np.linalg.norm((heat_print[mode]).iloc[:,node1])*np.linalg.norm( heat_print[mode].iloc[:,node2]))*(np.sorted(heat_print[mode].iloc[:,node1])).dot(np.sorted(heat_print[mode].iloc[:,node2]))
    elif type_comp=="ks":
        test1=heat_print[mode].iloc[:,node1]
        test2=heat_print[mode].iloc[:,node2]
        d=np.max(np.abs(np.sort(test1)-np.sort(test2)))
    elif type_comp=="ks_p":
        test1=heat_print[mode].iloc[:,node1]
        test2=heat_print[mode].iloc[:,node2]
        stats=sc.stats.ks_2samp(test1, test2)
        d=1-stats[1]
#print stats[1]

    elif type_comp=="ks_r":
        sorted1=np.sort(heat_print[mode].iloc[:,node1])
        sorted2=np.sort(heat_print[mode].iloc[:,node2])
        sorted3=np.sort(sorted1.tolist()+sorted2.tolist())
        ks=[None]*len(sorted3)
        #print "ne sorted 3:",len(sorted3)
        for i in range(len(sorted3)):
            ks[i]=(len([e for e in sorted1 if e<=sorted3[i]])-len([e for e in sorted2 if e<=sorted3[i]]))*1.0/len(sorted1)
        return np.max(np.abs(ks))  
    else:
        print "comparison type not recognized!!!"
        d=np.nan
    return d
import numpy as np
import csv
import operator
import readcol
files = readcol.readcol('/home/shared/data/h148/testarraymainbh.orbit')
csv1 = csv.reader(files, delimiter=',')
sort = np.sorted(csv1, key=operator.itemgetter(0))
for eachline in sort:
    print(eachline)
print files.ndim
print files[0:, 1]
Example #8
0
File: mmsb.py Project: dtrckd/ml
    def generate(self,
                 N=None,
                 K=None,
                 hyperparams=None,
                 mode='predictive',
                 symmetric=True,
                 **kwargs):
        if mode == 'generative':
            self.update_hyper(hyperparams)
            alpha, gmma, delta = self.get_hyper()
            N = int(N)
            _name = self.__module__.split('.')[-1]
            if _name == 'immsb_cgs':
                # @todo: compute the variance for random simulation
                # Number of table in the CRF
                if symmetric is True:
                    m = alpha * N * (digamma(N + alpha) - digamma(alpha))
                else:
                    m = alpha * N * (digamma(2 * N + alpha) - digamma(alpha))

                # Number of class in the CRF
                K = int(gmma * (digamma(m + gmma) - digamma(gmma)))
                alpha = gem(gmma, K)

            i = 0
            while i < 3:
                try:
                    dirichlet(alpha, size=N)
                    i = 0
                    break
                except ZeroDivisionError:
                    # Sometimes umprobable values !
                    alpha = gem(gmma, K)
                    i += 1

            # Generate Theta
            if i > 0:
                params, order = zip(
                    *np.sorted(zip(alpha, range(len(alpha)), reverse=True)))
                _K = int(1 / 3. * len(alpha))
                alpha[order[:_K]] = 1
                alpha[order[_K:]] = 0
                theta = multinomial(1, alpha, size=N)
            else:
                theta = dirichlet(alpha, size=N)

            # Generate Phi
            phi = beta(delta[0], delta[1], size=(K, K))
            if symmetric is True:
                phi = np.triu(phi) + np.triu(phi, 1).T

            self._theta = theta
            self._phi = phi
        elif mode == 'predictive':
            try:
                theta, phi = self.get_params()
            except:
                return self.generate(N, K, hyperparams, 'generative',
                                     symmetric)
            K = theta.shape[1]

        pij = self.likelihood(theta, phi)

        # Treshold
        #pij[pij >= 0.5 ] = 1
        #pij[pij < 0.5 ] = 0
        #Y = pij

        # Sampling
        pij = np.clip(pij, 0, 1)
        Y = sp.stats.bernoulli.rvs(pij)

        #for j in xrange(N):
        #    print 'j %d' % j
        #    for i in xrange(N):
        #        zj = categorical(theta[j])
        #        zi = categorical(theta[i])
        #        Y[j, i] = sp.stats.bernoulli.rvs(B[zj, zi])
        return Y, theta, phi
Example #9
0
def preprocess_dataset(Xtrain,
                       Ytrain,
                       Xtest=None,
                       Ytest=None,
                       dtype=None,
                       zscore=True,
                       denan=True,
                       delays=[1, 2, 3, 4],
                       order='C',
                       trim_random=False,
                       trim_regressors=None,
                       trim_regressands=None):
    """preprocess a dataset

    Parameters
    ----------
    - Xtrain: array
    - Ytrain: array
    - Xtest: array
    - Ytest: array
    - dtype: numpy dtype to use
    - zscore: bool of whether to zscore data
    - denan: bool of whether to denan arrays
    - order: str of 'C' or 'F' for C-ordering or Fortran ordering
    """
    data = {
        'Xtrain': Xtrain,
        'Ytrain': Ytrain,
        'Xtest': Xtest,
        'Ytest': Ytest,
    }
    data = {key: value for key, value in data.items() if value is not None}

    if dtype is not None:
        data = {key: value.astype(dtype) for key, value in data.items()}
    if zscore:
        data = {key: scipy.stats.zscore(value) for key, value in data.items()}
    if denan:
        data = {key: np.nan_to_num(value) for key, value in data.items()}
    if delays:
        for key in list(data.keys()):
            if key.startswith('X'):
                data[key] = make_delayed(data[key], delays)
    if order == 'F':
        data = {key: np.asfortranarray(value) for key, value in data.items()}
    elif order == 'C':
        data = {
            key: np.ascontiguousarray(value)
            for key, value in data.items()
        }

    # trim dimensions
    if trim_random:
        f_keep = lambda before, after: np.sorted(
            np.random.choice(
                np.arange(n_regressors),
                new_n_regressors,
                replace=False,
            ))
    else:
        f_keep = lambda before, after: slice(None, after)
    if trim_regressors is not None:
        n_regressors = data['Xtrain'].shape[1]
        new_n_regressors = int(n_regressors * trim_regressors)
        keep = f_keep(n_regressors, new_n_regressors)
        data['Xtrain'] = data['Xtrain'][:, keep]
        data['Xtest'] = data['Xtest'][:, keep]

    if trim_regressands is not None:
        n_regressands = data['Ytrain'].shape[1]
        new_n_regressands = int(n_regressands * trim_regressands)
        keep = f_keep(n_regressands, new_n_regressands)
        data['Ytrain'] = data['Ytrain'][:, keep]
        data['Ytest'] = data['Ytest'][:, keep]

    return data