def get_data(relationName='esteem', phase='3', **kwargs): DataLines = list() with open(datfilepath, 'r') as f: doRecord = 0 for line in f.readlines(): line = line.strip() if doRecord: DataLines.append(np.asarray( line.split(' '), dtype=np.int32)>0) if line.startswith('DATA:'): doRecord = 1 AdjMatStack = np.vstack(DataLines) AdjMatStack[AdjMatStack > 0] = 1 # AdjMatStack is 180 x 18, where each set of 18 rows # corresponds to one of the 10 relations. # Crop out set of 18 contig rows # specified by the relation keyword matchID = -1 matchrelLabel = relationName + '_phase' + str(phase) for relID, relLabel in enumerate(relationLabels): if relLabel == matchrelLabel: matchID = relID break if matchID < 0: raise ValueError( "Cannot find desired relation: %s" % matchrelLabel) AdjMat = AdjMatStack[matchID*18:(matchID+1)*18] MonkNameToIDMap = dict() for uid, name in enumerate(monkNames): MonkNameToIDMap[name] = uid MonkIDToLabelIDMap = dict() labelfilepath = datfilepath.replace('sampson.dat', 'sampson_labels.txt') with open(labelfilepath, 'r') as f: header = f.readline() LabelNames = header.strip().split() for line in f.readlines(): line = line.strip() if len(line) == 0: break # "John_Bosco 1" >> "John_Bosco", "1" keyval = line.split(' ') name = keyval[0] labelID = int(keyval[1]) monkID = MonkNameToIDMap[name] MonkIDToLabelIDMap[monkID] = labelID nodeZ = np.asarray([ MonkIDToLabelIDMap[MonkNameToIDMap[mName]] for mName in monkNames], dtype=np.int32) Data = GraphXData(AdjMat=AdjMat, nodeNames=monkNames, nodeZ=nodeZ) Data.summary = get_data_info() Data.name = get_short_name() Data.relationName = matchrelLabel return Data
def get_data(seed=123, nNodes=100, w_diag=.95, w_offdiag_eps=.01, **kwargs): ''' Create toy dataset as bnpy GraphXData object. Uses a simple mixed membership generative model. Assumes high within-block edge probability, small epsilon otherwise. Args ------- seed : int seed for random number generator nNodes : int number of nodes in the generated network Returns ------- Data : bnpy GraphXData object ''' prng = np.random.RandomState(seed) # Create membership probabilities at each node pi = 1.0 / K * np.ones(K) # Create block relation matrix W, shape K x K w = w_offdiag_eps * np.ones((K, K)) w[np.diag_indices(K)] = w_diag # Generate node assignments Z = prng.choice(range(K), p=pi, size=nNodes) TrueParams = dict(Z=Z, w=w, pi=pi) # Generate edges AdjMat = np.zeros((nNodes, nNodes)) for i in range(nNodes): for j in range(nNodes): if i != j: AdjMat[i, j] = prng.binomial(n=1, p=w[Z[i], Z[j]]) Data = GraphXData(AdjMat=AdjMat, nNodesTotal=nNodes, TrueParams=TrueParams) Data.name = get_short_name() return Data
def get_data(seed=123, nNodes=100, alpha=0.05, epsilon=1e-4, delta=.1, **kwargs): ''' Create toy dataset as bnpy GraphXData object. Args ------- seed : int seed for random number generator nNodes : int number of nodes in the generated network alpha : float Controls the Dirichlet prior on pi, pi ~ Dir(alpha) epsilon : float Probability that an edge representing an out of community interaction will have a value outside [-delta, delta] delta : float See above Returns ------- Data : bnpy GraphXData object ''' prng = np.random.RandomState(seed) np.random.seed(seed) # Create membership probabilities at each node N = nNodes if not hasattr(alpha, '__len__'): alpha = alpha * np.ones(K) pi = prng.dirichlet(alpha, size=nNodes) # Make source / receiver assignments and pack into TrueZ s = np.zeros((N, N), dtype=int) r = np.zeros((N, N), dtype=int) for i in range(N): s[i, :] = prng.choice(range(K), p=pi[i, :], size=nNodes) r[:, i] = prng.choice(range(K), p=pi[i, :], size=nNodes) TrueZ = np.zeros((N, N, 2), dtype=int) TrueZ[:, :, 0] = s TrueZ[:, :, 1] = r TrueParams = {'TrueZ': TrueZ, 'pi': pi, 'mu': mus, 'sigma': sigmas} # Generate graph X = np.zeros((N, N)) cnt = 0 for i in range(N): for j in range(N): if i == j: continue if s[i, j] == r[i, j]: X[i, j] = np.random.normal(mus[s[i, j]], sigmas[s[i, j]]) cnt += 1 M = np.max(np.abs(X)) for i in range(N): for j in range(N): if i == j: continue if s[i, j] != r[i, j]: inInterval = prng.binomial(n=1, p=1 - epsilon) if inInterval: X[i, j] = np.random.uniform(low=-delta, high=delta) else: negativeHalf = prng.binomial(n=1, p=.5) if negativeHalf: X[i, j] = np.random.uniform(low=-M, high=-delta) else: X[i, j] = np.random.uniform(low=delta, high=M) Data = GraphXData(AdjMat=X, X=None, edges=None, nNodesTotal=nNodes, nNodes=nNodes, TrueParams=TrueParams, isSparse=False) return Data
def get_data(seed=123, nNodes=100, alpha=0.05, w_diag=.95, w_offdiag_eps=.01, **kwargs): ''' Create toy dataset as bnpy GraphXData object. Uses a simple mixed membership generative model. Assumes high within-block edge probability, small epsilon otherwise. Args ------- seed : int seed for random number generator nNodes : int number of nodes in the generated network Returns ------- Data : bnpy GraphXData object ''' nNodes = int(nNodes) prng = np.random.RandomState(seed) # Create membership probabilities at each node if not hasattr(alpha, '__len__'): alpha = alpha * np.ones(K) pi = prng.dirichlet(alpha, size=nNodes) # Create block relation matrix W, shape K x K w = w_offdiag_eps * np.ones((K, K)) w[np.diag_indices(6)] = w_diag # Generate community assignments, s, r, and pack into TrueZ s = np.zeros((nNodes, nNodes), dtype=int) r = np.zeros((nNodes, nNodes), dtype=int) for i in xrange(nNodes): s[i, :] = prng.choice(xrange(K), p=pi[i, :], size=nNodes) r[:, i] = prng.choice(xrange(K), p=pi[i, :], size=nNodes) TrueZ = np.zeros((nNodes, nNodes, 2), dtype=int) TrueZ[:, :, 0] = s TrueZ[:, :, 1] = r TrueParams = dict(Z=TrueZ, w=w, pi=pi) # Generate adjacency matrix AdjMat = np.zeros((nNodes, nNodes)) for i in xrange(nNodes): for j in xrange(nNodes): if i == j: continue AdjMat[i, j] = prng.binomial(n=1, p=w[s[i, j], r[i, j]]) Data = GraphXData(AdjMat=AdjMat, nNodesTotal=nNodes, nNodes=nNodes, TrueParams=TrueParams, isSparse=True) Data.name = get_short_name() return Data
def get_data(**kwargs): Data = GraphXData.read_from_mat(matfilepath) Data.summary = get_data_info() Data.name = get_short_name() return Data