예제 #1
0
def get_data(relationName='esteem', phase='3', **kwargs):
    DataLines = list()
    with open(datfilepath, 'r') as f:
        doRecord = 0
        for line in f.readlines():
            line = line.strip()
            if doRecord:
                DataLines.append(np.asarray(
                    line.split(' '), dtype=np.int32)>0)
            if line.startswith('DATA:'):
                doRecord = 1
    AdjMatStack = np.vstack(DataLines)
    AdjMatStack[AdjMatStack > 0] = 1
    # AdjMatStack is 180 x 18, where each set of 18 rows
    # corresponds to one of the 10 relations.

    # Crop out set of 18 contig rows
    # specified by the relation keyword 
    matchID = -1
    matchrelLabel = relationName + '_phase' + str(phase)
    for relID, relLabel in enumerate(relationLabels):
      if relLabel == matchrelLabel:
          matchID = relID
          break
    if matchID < 0:
        raise ValueError(
            "Cannot find desired relation: %s" % matchrelLabel)
    AdjMat = AdjMatStack[matchID*18:(matchID+1)*18]

    MonkNameToIDMap = dict()
    for uid, name in enumerate(monkNames):
        MonkNameToIDMap[name] = uid

    MonkIDToLabelIDMap = dict()
    labelfilepath = datfilepath.replace('sampson.dat', 'sampson_labels.txt')
    with open(labelfilepath, 'r') as f:
        header = f.readline()
        LabelNames = header.strip().split()
        for line in f.readlines():
            line = line.strip()
            if len(line) == 0:
                break
            # "John_Bosco 1" >> "John_Bosco", "1"
            keyval = line.split(' ')
            name = keyval[0]
            labelID = int(keyval[1])
            monkID = MonkNameToIDMap[name]
            MonkIDToLabelIDMap[monkID] = labelID
    nodeZ = np.asarray([
        MonkIDToLabelIDMap[MonkNameToIDMap[mName]]
        for mName in monkNames], dtype=np.int32)
        
    Data = GraphXData(AdjMat=AdjMat,
        nodeNames=monkNames, nodeZ=nodeZ)
    Data.summary = get_data_info()
    Data.name = get_short_name()
    Data.relationName = matchrelLabel
    return Data
예제 #2
0
def get_data(seed=123, nNodes=100, w_diag=.95, w_offdiag_eps=.01, **kwargs):
    ''' Create toy dataset as bnpy GraphXData object.

    Uses a simple mixed membership generative model.
    Assumes high within-block edge probability, small epsilon otherwise.

    Args
    -------
    seed : int
        seed for random number generator
    nNodes : int
        number of nodes in the generated network

    Returns
    -------
    Data : bnpy GraphXData object
    '''
    prng = np.random.RandomState(seed)

    # Create membership probabilities at each node
    pi = 1.0 / K * np.ones(K)

    # Create block relation matrix W, shape K x K
    w = w_offdiag_eps * np.ones((K, K))
    w[np.diag_indices(K)] = w_diag

    # Generate node assignments
    Z = prng.choice(range(K), p=pi, size=nNodes)
    TrueParams = dict(Z=Z, w=w, pi=pi)

    # Generate edges
    AdjMat = np.zeros((nNodes, nNodes))
    for i in range(nNodes):
        for j in range(nNodes):
            if i != j:
                AdjMat[i, j] = prng.binomial(n=1, p=w[Z[i], Z[j]])

    Data = GraphXData(AdjMat=AdjMat, nNodesTotal=nNodes, TrueParams=TrueParams)
    Data.name = get_short_name()
    return Data
예제 #3
0
def get_data(seed=123,
             nNodes=100,
             alpha=0.05,
             epsilon=1e-4,
             delta=.1,
             **kwargs):
    ''' Create toy dataset as bnpy GraphXData object.

                Args
                -------
                seed : int
                                seed for random number generator
                nNodes : int
                                number of nodes in the generated network
                alpha : float
                                Controls the Dirichlet prior on pi, pi ~ Dir(alpha)
                epsilon : float
                                Probability that an edge representing an out of community
                                interaction will have a value outside [-delta, delta]
                delta : float
                                See above

                Returns
                -------
                Data : bnpy GraphXData object
        '''

    prng = np.random.RandomState(seed)
    np.random.seed(seed)

    # Create membership probabilities at each node
    N = nNodes
    if not hasattr(alpha, '__len__'):
        alpha = alpha * np.ones(K)
        pi = prng.dirichlet(alpha, size=nNodes)

        # Make source / receiver assignments and pack into TrueZ
        s = np.zeros((N, N), dtype=int)
        r = np.zeros((N, N), dtype=int)
        for i in range(N):
            s[i, :] = prng.choice(range(K), p=pi[i, :], size=nNodes)
            r[:, i] = prng.choice(range(K), p=pi[i, :], size=nNodes)
        TrueZ = np.zeros((N, N, 2), dtype=int)
        TrueZ[:, :, 0] = s
        TrueZ[:, :, 1] = r
        TrueParams = {'TrueZ': TrueZ, 'pi': pi, 'mu': mus, 'sigma': sigmas}

        # Generate graph
        X = np.zeros((N, N))
        cnt = 0
        for i in range(N):
            for j in range(N):
                if i == j:
                    continue
                if s[i, j] == r[i, j]:
                    X[i, j] = np.random.normal(mus[s[i, j]], sigmas[s[i, j]])
                    cnt += 1

        M = np.max(np.abs(X))
        for i in range(N):
            for j in range(N):
                if i == j:
                    continue
                if s[i, j] != r[i, j]:
                    inInterval = prng.binomial(n=1, p=1 - epsilon)
                    if inInterval:
                        X[i, j] = np.random.uniform(low=-delta, high=delta)
                    else:
                        negativeHalf = prng.binomial(n=1, p=.5)
                        if negativeHalf:
                            X[i, j] = np.random.uniform(low=-M, high=-delta)
                        else:
                            X[i, j] = np.random.uniform(low=delta, high=M)

        Data = GraphXData(AdjMat=X,
                          X=None,
                          edges=None,
                          nNodesTotal=nNodes,
                          nNodes=nNodes,
                          TrueParams=TrueParams,
                          isSparse=False)
        return Data
예제 #4
0
파일: ToyMMSBK6.py 프로젝트: jpfeil/hydra
def get_data(seed=123,
             nNodes=100,
             alpha=0.05,
             w_diag=.95,
             w_offdiag_eps=.01,
             **kwargs):
    ''' Create toy dataset as bnpy GraphXData object.

    Uses a simple mixed membership generative model.
    Assumes high within-block edge probability, small epsilon otherwise.

    Args
    -------
    seed : int
        seed for random number generator
    nNodes : int
        number of nodes in the generated network

    Returns
    -------
    Data : bnpy GraphXData object
    '''
    nNodes = int(nNodes)
    prng = np.random.RandomState(seed)

    # Create membership probabilities at each node
    if not hasattr(alpha, '__len__'):
        alpha = alpha * np.ones(K)
    pi = prng.dirichlet(alpha, size=nNodes)

    # Create block relation matrix W, shape K x K
    w = w_offdiag_eps * np.ones((K, K))
    w[np.diag_indices(6)] = w_diag

    # Generate community assignments, s, r, and pack into TrueZ
    s = np.zeros((nNodes, nNodes), dtype=int)
    r = np.zeros((nNodes, nNodes), dtype=int)
    for i in xrange(nNodes):
        s[i, :] = prng.choice(xrange(K), p=pi[i, :], size=nNodes)
        r[:, i] = prng.choice(xrange(K), p=pi[i, :], size=nNodes)
    TrueZ = np.zeros((nNodes, nNodes, 2), dtype=int)
    TrueZ[:, :, 0] = s
    TrueZ[:, :, 1] = r

    TrueParams = dict(Z=TrueZ, w=w, pi=pi)

    # Generate adjacency matrix
    AdjMat = np.zeros((nNodes, nNodes))
    for i in xrange(nNodes):
        for j in xrange(nNodes):
            if i == j:
                continue
            AdjMat[i, j] = prng.binomial(n=1, p=w[s[i, j], r[i, j]])

    Data = GraphXData(AdjMat=AdjMat,
                      nNodesTotal=nNodes,
                      nNodes=nNodes,
                      TrueParams=TrueParams,
                      isSparse=True)
    Data.name = get_short_name()
    return Data
예제 #5
0
def get_data(**kwargs):
    Data = GraphXData.read_from_mat(matfilepath)
    Data.summary = get_data_info()
    Data.name = get_short_name()
    return Data