Python toCArray Beispiele, util.toCArray Python Beispiele

Beispiel #1

0

Datei anzeigen

def loadDictFromMatfile(matfilepath):
    ''' Load dict of numpy arrays from a .mat-format file on disk.

    This is a wrapper around scipy.io.loadmat,
    which makes the returned numpy arrays in standard aligned format.

    Returns
    --------
    D : dict
        Each key/value pair is a parameter name and a numpy array
        loaded from the provided mat file.
        We ensure before returning that each array has properties:
            * C alignment
            * Original 2D shape has been squeezed as much as possible
                * (1,1) becomes a size=1 1D array
                * (1,N) or (N,1) become 1D arrays
            * flags.aligned is True
            * flags.owndata is True
            * dtype.byteorder is '='

    Examples
    -------
    >>> import scipy.io
    >>> Dorig = dict(scalar=5, scalar1DN1=np.asarray([3.14,]))
    >>> Dorig['arr1DN3'] = np.asarray([1,2,3])
    >>> scipy.io.savemat('Dorig.mat', Dorig, oned_as='row')
    >>> D = loadDictFromMatfile('Dorig.mat')
    >>> D['scalar']
    array(5)
    >>> D['scalar1DN1']
    array(3.14)
    >>> D['arr1DN3']
    array([1, 2, 3])
    '''
    Dtmp = scipy.io.loadmat(matfilepath)
    D = dict([x for x in Dtmp.items() if not x[0].startswith('__')])
    for key in D:
        if not isinstance(D[key], np.ndarray):
            continue
        x = D[key]
        if isinstance(x[0], np.unicode_):
            if x.size == 1:
                D[key] = str(x[0])
            else:
                D[key] = tuple([str(s) for s in x])
            continue
        if x.ndim == 2:
            x = np.squeeze(x)
        if str(x.dtype).count('int'):
            arr = toCArray(x, dtype=np.int32)
        else:
            arr = toCArray(x, dtype=np.float64)
        assert arr.dtype.byteorder == '='
        assert arr.flags.aligned is True
        assert arr.flags.owndata is True
        D[key] = arr
    return D

Beispiel #2

0

Datei anzeigen

Datei: GaussRegressYFromFixedXObsModel.py Projekt: zhaottcrystal/bnpy

def calcSummaryStats(Data, SS, LP, **kwargs):
    ''' Calculate summary statistics for given dataset and local parameters

    Returns
    --------
    SS : SuffStatBag object, with K components.
    '''
    if not hasattr(Data, 'X_NE'):
        Data.X_NE = np.hstack([Data.X, np.ones(Data.nObs)[:, np.newaxis]])

    Y_N = Data.Y
    X_NE = Data.X_NE
    E = X_NE.shape[1]

    if 'resp' in LP:
        # Dense responsibility calculations
        resp = LP['resp']
        K = resp.shape[1]
        S_yy_K = dotATB(resp, np.square(Y_N)).flatten()
        S_yx_KE = dotATB(resp, Y_N * X_NE)

        # Expected outer product
        S_xxT_KEE = np.zeros((K, E, E))
        sqrtResp_k_N = np.sqrt(resp[:, 0])
        sqrtR_X_k_NE = sqrtResp_k_N[:, np.newaxis] * X_NE
        S_xxT_KEE[0] = dotATA(sqrtR_X_k_NE)
        for k in xrange(1, K):
            np.sqrt(resp[:, k], out=sqrtResp_k_N)
            np.multiply(sqrtResp_k_N[:, np.newaxis], X_NE, out=sqrtR_X_k_NE)
            S_xxT_KEE[k] = dotATA(sqrtR_X_k_NE)
    else:
        raise ValueError("TODO")
        spR = LP['spR']
        K = spR.shape[1]

    if SS is None:
        SS = SuffStatBag(K=K, D=Data.dim, E=E)
    elif not hasattr(SS, 'E'):
        SS._Fields.E = E
    SS.setField('xxT_KEE', S_xxT_KEE, dims=('K', 'E', 'E'))
    SS.setField('yx_KE', S_yx_KE, dims=('K', 'E'))
    SS.setField('yy_K', S_yy_K, dims=('K'))
    # Expected count for each k
    # Usually computed by allocmodel. But just in case...
    if not hasattr(SS, 'N'):
        if 'resp' in LP:
            SS.setField('N', LP['resp'].sum(axis=0), dims='K')
        else:
            SS.setField('N', as1D(toCArray(LP['spR'].sum(axis=0))), dims='K')

    #SS.setField("N_K", SS.N, dims="K")
    return SS

Beispiel #3

0

Datei anzeigen

def calcSummaryStats(Data, SS, LP, **kwargs):
    ''' Calculate summary statistics for given dataset and local parameters

    Returns
    --------
    SS : SuffStatBag object, with K components.
    '''
    X = Data.X
    D = Data.dim
    if 'resp' in LP:
        resp = LP['resp']
        K = resp.shape[1]
        # Compute expected outer-product statistic
        S_xxT = np.zeros((K, Data.dim, Data.dim))
        sqrtResp_k = np.sqrt(resp[:, 0])
        sqrtRX_k = sqrtResp_k[:, np.newaxis] * Data.X
        S_xxT[0] = dotATA(sqrtRX_k)
        for k in xrange(1, K):
            np.sqrt(resp[:, k], out=sqrtResp_k)
            np.multiply(sqrtResp_k[:, np.newaxis], Data.X, out=sqrtRX_k)
            S_xxT[k] = dotATA(sqrtRX_k)

        sqrtResp = np.sqrt(resp)
        xxT = np.zeros((K, D, D))
        for k in xrange(K):
            xxT[k] = dotATA(sqrtResp[:, k][:, np.newaxis] * Data.X)
        assert np.allclose(xxT, S_xxT)
    else:
        spR = LP['spR']
        K = spR.shape[1]
        # Compute expected outer-product statistic
        S_xxT = calcSpRXXT(X=X, spR_csr=spR)

    if SS is None:
        SS = SuffStatBag(K=K, D=D)
    # Expected outer-product for each state k
    SS.setField('xxT', S_xxT, dims=('K', 'D', 'D'))
    # Expected count for each k
    #  Usually computed by allocmodel. But sometimes not (eg TopicModel)
    if not hasattr(SS, 'N'):
        if 'resp' in LP:
            SS.setField('N', LP['resp'].sum(axis=0), dims='K')
        else:
            SS.setField('N', as1D(toCArray(LP['spR'].sum(axis=0))), dims='K')
    return SS

Beispiel #4

0

Datei anzeigen

Datei: DiagGaussObsModel.py Projekt: zhaottcrystal/bnpy

def calcSummaryStats(Data, SS, LP, **kwargs):
    ''' Calculate summary statistics for given dataset and local parameters

    Returns
    --------
    SS : SuffStatBag object, with K components.
    '''
    X = Data.X
    if 'resp' in LP:
        resp = LP['resp']
        K = resp.shape[1]
        # 1/2: Compute mean statistic
        S_x = dotATB(resp, X)
        # 2/2: Compute expected outer-product statistic
        S_xx = calcRXX_withDenseResp(resp, X)
    else:
        spR = LP['spR']
        K = spR.shape[1]
        # 1/2: Compute mean statistic
        S_x = spR.T * X
        # 2/2: Compute expected outer-product statistic
        S_xx = calcSpRXX(X=X, spR_csr=spR)
    if SS is None:
        SS = SuffStatBag(K=K, D=Data.dim)
    # Expected mean for each state k
    SS.setField('x', S_x, dims=('K', 'D'))
    # Expected sum-of-squares for each state k
    SS.setField('xx', S_xx, dims=('K', 'D'))
    # Expected count for each k
    #  Usually computed by allocmodel. But sometimes not (eg TopicModel)
    if not hasattr(SS, 'N'):
        if 'resp' in LP:
            SS.setField('N', LP['resp'].sum(axis=0), dims='K')
        else:
            SS.setField('N', as1D(toCArray(LP['spR'].sum(axis=0))), dims='K')
    return SS

Beispiel #5

0

Datei anzeigen

def loadTopicModelFromTxtFiles(snapshotPath,
                               returnTPA=False,
                               returnWordCounts=False,
                               normalizeProbs=True,
                               normalizeTopics=True,
                               **kwargs):
    ''' Load from snapshot text files.

    Returns
    -------
    hmodel
    '''
    Mdict = dict()
    possibleKeys = [
        'K', 'probs', 'alpha', 'beta', 'lam', 'gamma', 'nTopics', 'nTypes',
        'vocab_size'
    ]
    keyMap = dict(beta='lam', nTopics='K', nTypes='vocab_size')
    for key in possibleKeys:
        try:
            arr = np.loadtxt(snapshotPath + "/%s.txt" % (key))
            if key in keyMap:
                Mdict[keyMap[key]] = arr
            else:
                Mdict[key] = arr
        except Exception:
            pass
    assert 'K' in Mdict
    assert 'lam' in Mdict
    K = int(Mdict['K'])
    V = int(Mdict['vocab_size'])

    if os.path.exists(snapshotPath + "/topics.txt"):
        Mdict['topics'] = np.loadtxt(snapshotPath + "/topics.txt")
        Mdict['topics'] = as2D(toCArray(Mdict['topics'], dtype=np.float64))
        assert Mdict['topics'].ndim == 2
        assert Mdict['topics'].shape == (K, V)
    else:
        TWC_data = np.loadtxt(snapshotPath + "/TopicWordCount_data.txt")
        TWC_inds = np.loadtxt(snapshotPath + "/TopicWordCount_indices.txt",
                              dtype=np.int32)
        if os.path.exists(snapshotPath + "/TopicWordCount_cscindptr.txt"):
            TWC_cscindptr = np.loadtxt(snapshotPath +
                                       "/TopicWordCount_cscindptr.txt",
                                       dtype=np.int32)
            TWC = scipy.sparse.csc_matrix((TWC_data, TWC_inds, TWC_cscindptr),
                                          shape=(K, V))
        else:
            TWC_csrindptr = np.loadtxt(snapshotPath +
                                       "/TopicWordCount_indptr.txt",
                                       dtype=np.int32)
            TWC = scipy.sparse.csr_matrix((TWC_data, TWC_inds, TWC_csrindptr),
                                          shape=(K, V))

        Mdict['WordCounts'] = TWC.toarray()

    if returnTPA:
        # Load topics : 2D array, K x vocab_size
        if 'WordCounts' in Mdict:
            topics = Mdict['WordCounts'] + Mdict['lam']
        else:
            topics = Mdict['topics']
        topics = as2D(toCArray(topics, dtype=np.float64))
        assert topics.ndim == 2
        K = topics.shape[0]
        if normalizeTopics:
            topics /= topics.sum(axis=1)[:, np.newaxis]

        # Load probs : 1D array, size K
        try:
            probs = Mdict['probs']
        except KeyError:
            probs = (1.0 / K) * np.ones(K)
        probs = as1D(toCArray(probs, dtype=np.float64))
        assert probs.ndim == 1
        assert probs.size == K
        if normalizeProbs:
            probs = probs / np.sum(probs)

        # Load alpha : scalar float > 0
        try:
            alpha = float(Mdict['alpha'])
        except KeyError:
            if 'alpha' in os.environ:
                alpha = float(os.environ['alpha'])
            else:
                raise ValueError('Unknown parameter alpha')
        return topics, probs, alpha

    # BUILD HMODEL FROM LOADED TXT
    infAlg = 'VB'
    # avoids circular import
    from bnpy.HModel import HModel
    if 'gamma' in Mdict:
        aPriorDict = dict(alpha=Mdict['alpha'], gamma=Mdict['gamma'])
        HDPTopicModel = AllocModelConstructorsByName['HDPTopicModel']
        amodel = HDPTopicModel(infAlg, aPriorDict)
    else:
        FiniteTopicModel = AllocModelConstructorsByName['FiniteTopicModel']
        amodel = FiniteTopicModel(infAlg, dict(alpha=Mdict['alpha']))
    omodel = ObsModelConstructorsByName['Mult'](infAlg, **Mdict)
    hmodel = HModel(amodel, omodel)
    hmodel.set_global_params(**Mdict)
    if returnWordCounts:
        return hmodel, Mdict['WordCounts']
    return hmodel

Beispiel #6

0

Datei anzeigen

def loadTopicModel(matfilepath,
                   queryLap=None,
                   prefix=None,
                   returnWordCounts=0,
                   returnTPA=0,
                   normalizeTopics=0,
                   normalizeProbs=0,
                   **kwargs):
    ''' Load saved topic model

    Returns
    -------
    topics : 2D array, K x vocab_size (if returnTPA)
    probs : 1D array, size K (if returnTPA)
    alpha : scalar (if returnTPA)
    hmodel : HModel
    WordCounts : 2D array, size K x vocab_size (if returnWordCounts)
    '''
    if prefix is None:
        prefix, lapQuery = getPrefixForLapQuery(matfilepath, queryLap)
    # avoids circular import
    from bnpy.HModel import HModel
    if len(glob.glob(os.path.join(matfilepath, "*.log_prob_w"))) > 0:
        return loadTopicModelFromMEDLDA(matfilepath,
                                        prefix,
                                        returnTPA=returnTPA)

    snapshotList = glob.glob(os.path.join(matfilepath, 'Lap*TopicSnapshot'))
    matfileList = glob.glob(os.path.join(matfilepath, 'Lap*TopicModel.mat'))
    if len(snapshotList) > 0:
        if prefix is None:
            snapshotList.sort()
            snapshotPath = snapshotList[-1]
        else:
            snapshotPath = None
            for curPath in snapshotList:
                if curPath.count(prefix):
                    snapshotPath = curPath
        return loadTopicModelFromTxtFiles(snapshotPath,
                                          normalizeTopics=normalizeTopics,
                                          normalizeProbs=normalizeProbs,
                                          returnWordCounts=returnWordCounts,
                                          returnTPA=returnTPA)

    if prefix is not None:
        matfilepath = os.path.join(matfilepath, prefix + 'TopicModel.mat')
    Mdict = loadDictFromMatfile(matfilepath)
    if 'SparseWordCount_data' in Mdict:
        data = np.asarray(Mdict['SparseWordCount_data'], dtype=np.float64)
        K = int(Mdict['K'])
        vocab_size = int(Mdict['vocab_size'])
        try:
            indices = Mdict['SparseWordCount_indices']
            indptr = Mdict['SparseWordCount_indptr']
            WordCounts = scipy.sparse.csr_matrix((data, indices, indptr),
                                                 shape=(K, vocab_size))
        except KeyError:
            rowIDs = Mdict['SparseWordCount_i'] - 1
            colIDs = Mdict['SparseWordCount_j'] - 1
            WordCounts = scipy.sparse.csr_matrix((data, (rowIDs, colIDs)),
                                                 shape=(K, vocab_size))
        Mdict['WordCounts'] = WordCounts.toarray()
    if returnTPA:
        # Load topics : 2D array, K x vocab_size
        if 'WordCounts' in Mdict:
            topics = Mdict['WordCounts'] + Mdict['lam']
        else:
            topics = Mdict['topics']
        topics = as2D(toCArray(topics, dtype=np.float64))
        assert topics.ndim == 2
        K = topics.shape[0]
        if normalizeTopics:
            topics /= topics.sum(axis=1)[:, np.newaxis]

        # Load probs : 1D array, size K
        try:
            probs = Mdict['probs']
        except KeyError:
            probs = (1.0 / K) * np.ones(K)
        probs = as1D(toCArray(probs, dtype=np.float64))
        assert probs.ndim == 1
        assert probs.size == K
        if normalizeProbs:
            probs = probs / np.sum(probs)

        # Load alpha : scalar float > 0
        try:
            alpha = float(Mdict['alpha'])
        except KeyError:
            if 'alpha' in os.environ:
                alpha = float(os.environ['alpha'])
            else:
                raise ValueError('Unknown parameter alpha')
        if 'eta' in Mdict:
            return topics, probs, alpha, as1D(toCArray(Mdict['eta']))
        return topics, probs, alpha

    infAlg = 'VB'
    if 'gamma' in Mdict:
        aPriorDict = dict(alpha=Mdict['alpha'], gamma=Mdict['gamma'])
        HDPTopicModel = AllocModelConstructorsByName['HDPTopicModel']
        amodel = HDPTopicModel(infAlg, aPriorDict)
    else:
        FiniteTopicModel = AllocModelConstructorsByName['FiniteTopicModel']
        amodel = FiniteTopicModel(infAlg, dict(alpha=Mdict['alpha']))
    omodel = ObsModelConstructorsByName['Mult'](infAlg, **Mdict)
    hmodel = HModel(amodel, omodel)
    hmodel.set_global_params(**Mdict)
    if returnWordCounts:
        return hmodel, Mdict['WordCounts']
    return hmodel

Beispiel #7

0

Datei anzeigen

    def __init__(self,
                 edges=None,
                 X=None,
                 AdjMat=None,
                 nNodesTotal=None,
                 nEdgesTotal=None,
                 nNodes=None,
                 TrueParams=None,
                 nodeNames=None,
                 nodeZ=None,
                 **kwargs):
        ''' Construct a GraphXData object.

        Pass either a full adjacency matrix (nNodes x nNodes x D), 
        or a list of edges and associated observations.

        Args
        -----
        edges : 2D array, shape nEdges x 2
        X : 2D array, shape nEdges x D
        AdjMat : 3D array, shape nNodes x nNodes x D
            Defines adjacency matrix of desired graph.
            Assumes D=1 if 2D array specified.

        Returns
        --------
        Data : GraphXData
        '''
        self.isSparse = False
        self.TrueParams = TrueParams

        if AdjMat is not None:
            AdjMat = np.asarray(AdjMat)
            if AdjMat.ndim == 2:
                AdjMat = AdjMat[:, :, np.newaxis]
            nNodes = AdjMat.shape[0]
            edges = makeEdgesForDenseGraphWithNNodes(nNodes)
            X = np.zeros((edges.shape[0], AdjMat.shape[-1]))
            for eid, (i, j) in enumerate(edges):
                X[eid] = AdjMat[i, j]

        if AdjMat is None and (X is None or edges is None):
            raise ValueError(
                'Must specify adjacency matrix AdjMat, or ' +
                'a list of edges and corresponding dense observations X')

        # Create core attributes
        self.edges = toCArray(as2D(edges), dtype=np.int32)
        self.X = toCArray(as2D(X), dtype=np.float64)

        # Verify all edges are unique (raise error otherwise)
        N = self.edges.max() + 1
        edgeAsBaseNInteger = self.edges[:, 0] * N + self.edges[:, 1]
        nUniqueEdges = np.unique(edgeAsBaseNInteger).size
        if nUniqueEdges < self.edges.shape[0]:
            raise ValueError("Provided edges must be unique.")

        # Discard self loops
        nonselfloopmask = self.edges[:, 0] != self.edges[:, 1]
        if np.sum(nonselfloopmask) < self.edges.shape[0]:
            self.edges = self.edges[nonselfloopmask].copy()
            self.X = self.X[nonselfloopmask].copy()

        self._set_size_attributes(nNodesTotal=nNodesTotal,
                                  nEdgesTotal=nEdgesTotal)
        self._verify_attributes()

        if TrueParams is None:
            if nodeZ is not None:
                self.TrueParams = dict()
                self.TrueParams['nodeZ'] = nodeZ
        else:
            self.TrueParams = TrueParams
        if nodeNames is not None:
            self.nodeNames = nodeNames

Beispiel #8

0

Datei anzeigen

Datei: GroupXData.py Projekt: zhaottcrystal/bnpy

    def __init__(self,
                 X=None,
                 doc_range=None,
                 nDocTotal=None,
                 Xprev=None,
                 TrueZ=None,
                 TrueParams=None,
                 fileNames=None,
                 summary=None,
                 **kwargs):
        ''' Create an instance of GroupXData for provided array X

        Post Condition
        ---------
        self.X : 2D array, size N x D
            with standardized dtype, alignment, byteorder.
        self.Xprev : 2D array, size N x D
            with standardized dtype, alignment, byteorder.
        self.doc_range : 1D array, size nDoc+1
        '''
        self.X = as2D(toCArray(X, dtype=np.float64))
        self.doc_range = as1D(toCArray(doc_range, dtype=np.int32))
        if summary is not None:
            self.summary = summary
        if Xprev is not None:
            self.Xprev = as2D(toCArray(Xprev, dtype=np.float64))

        # Verify attributes are consistent
        self._set_dependent_params(doc_range, nDocTotal)
        self._check_dims()

        # Add optional true parameters / true hard labels
        if TrueParams is not None:
            self.TrueParams = dict()
            for key, arr in TrueParams.items():
                self.TrueParams[key] = toCArray(arr)

        if TrueZ is not None:
            if not hasattr(self, 'TrueParams'):
                self.TrueParams = dict()
            self.TrueParams['Z'] = as1D(toCArray(TrueZ))
            self.TrueParams['K'] = np.unique(self.TrueParams['Z']).size

        # Add optional source files for each group/sequence
        if fileNames is not None:
            if hasattr(fileNames, 'shape') and fileNames.shape == (1, 1):
                fileNames = fileNames[0, 0]
            if len(fileNames) > 1:
                self.fileNames = [
                    str(x).strip() for x in np.squeeze(fileNames)
                ]
            else:
                self.fileNames = [str(fileNames[0])]
        # Add extra data attributes custom for the dataset
        for key in kwargs:
            if hasattr(self, key):
                continue
            if not key.startswith("__"):
                arr = np.squeeze(as1D(kwargs[key]))
                if arr.shape == ():
                    try:
                        arr = float(arr)
                    except TypeError:
                        continue
                setattr(self, key, arr)

Beispiel #9

0

Datei anzeigen

    def __init__(self,
                 X=None,
                 nObsTotal=None,
                 TrueZ=None,
                 Xprev=None,
                 Y=None,
                 TrueParams=None,
                 name=None,
                 summary=None,
                 dtype='auto',
                 row_names=None,
                 column_names=None,
                 y_column_names=None,
                 xprev_column_names=None,
                 do_copy=True,
                 **kwargs):
        ''' Constructor for XData instance given in-memory dense array X.

        Post Condition
        ---------
        self.X : 2D array, size N x D
            with standardized dtype, alignment, byteorder.
        '''
        if dtype == 'auto':
            dtype = X.dtype
        if not do_copy and X.dtype == dtype:
            self.X = as2D(X)
        else:
            self.X = as2D(toCArray(X, dtype=dtype))

        if Xprev is not None:
            self.Xprev = as2D(toCArray(Xprev, dtype=dtype))
        if Y is not None:
            self.Y = as2D(toCArray(Y, dtype=dtype))

        # Verify attributes are consistent
        self._set_dependent_params(nObsTotal=nObsTotal)
        self._check_dims(do_copy=do_copy)

        # Add optional true parameters / true hard labels
        if TrueParams is not None:
            self.TrueParams = TrueParams
        if TrueZ is not None:
            if not hasattr(self, 'TrueParams'):
                self.TrueParams = dict()
            self.TrueParams['Z'] = as1D(toCArray(TrueZ))
            self.TrueParams['K'] = np.unique(self.TrueParams['Z']).size
        if summary is not None:
            self.summary = summary
        if name is not None:
            self.name = str(name)

        # Add optional row names
        # this line is added by Tingting
        self.row_names = np.arange(0, self.nObs, 1)

        if row_names is None:
            self.row_names = np.arange(0, self.nObs, 1)
            ## map(str, range(self.nObs))
        else:
            assert len(list(self.row_names)) == self.nObs
            # self.row_names = map(str, row_names)

        # Add optional column names
        if column_names is None:
            self.column_names = map(lambda n: "dim_%d" % n, range(self.dim))
        else:
            assert len(column_names) == self.dim
            self.column_names = map(str, column_names)