예제 #1
0
def motifOrder(data,
               key,
               epsilon,
               orderSize=3,
               motifSize=3,
               degree=10,
               swap=False):
    """Sorts graphs into bins based on motif frequency ordering"""
    if key == "rand":
        graphs = []
        for i in xrange(100):
            x = np.random.rand(88, 88)
            x -= np.diag(np.diag(x))
            graphs.append(x)
    else:
        graphs = data[key]

    pattern = defaultdict(float)
    for G in graphs:
        #calculate threshold
        sortedWeights = np.sort(G, axis=None)
        threshold = sortedWeights[-len(G) * degree - 1]
        #Output graph to txt file
        graph = nx.DiGraph(G > threshold)
        graph = nx.convert_node_labels_to_integers(graph, 1)
        if swap:
            graph = gh.randomize_graph(graph, 2000)
        with open('result/OUTPUT.txt', 'wb') as f:
            f.write(str(len(graph)) + '\n')
            nx.write_edgelist(graph, f, data=False)
        #Jenky way to use c++ motif finder in python
        subprocess.call(['./Kavosh', str(motifSize)])
        data = np.loadtxt("result/MotifCount.txt", ndmin=2)

        order = []
        for iD, total, percent in data:
            order.append((int(iD), total))
        keys = sorted(order, key=lambda x: -x[1])

        index = 0
        groupedKeys = []
        size = min(orderSize, len(keys))
        while index < size:
            root = keys[index][1]
            element = [keys[index][0]]
            index += 1
            while index < size and keys[index][1] * epsilon > root:
                element.append(keys[index][0])
                index += 1
            element.sort()
            groupedKeys.append(tuple(element))

        pattern[tuple(groupedKeys)] += 100 / float(len(graphs))

    return pattern
예제 #2
0
def motifOrder(data,key,epsilon,orderSize=3,motifSize=3,degree=10,swap=False):
	"""Sorts graphs into bins based on motif frequency ordering"""	
	if key == "rand":
		graphs = []
		for i in xrange(100):	
			x = np.random.rand(88,88)
			x -= np.diag(np.diag(x))
			graphs.append(x)
	else:
		graphs = data[key]
	
	pattern = defaultdict(float)
	for G in graphs:
		#calculate threshold
		sortedWeights = np.sort(G,axis=None)
		threshold = sortedWeights[-len(G)*degree-1]
		#Output graph to txt file
		graph = nx.DiGraph(G>threshold)
		graph = nx.convert_node_labels_to_integers(graph,1)
		if swap:
			graph = gh.randomize_graph(graph, 2000)
		with open('result/OUTPUT.txt','wb') as f:
			f.write(str(len(graph)) + '\n')
			nx.write_edgelist(graph,f,data=False)
		#Jenky way to use c++ motif finder in python
		subprocess.call(['./Kavosh', str(motifSize)])
		data = np.loadtxt("result/MotifCount.txt",ndmin=2)
		
		order = []
		for iD,total,percent in data:
			order.append((int(iD),total))
		keys = sorted(order,key=lambda x:-x[1])
		
		index = 0
		groupedKeys = []
		size = min(orderSize,len(keys))
		while index < size:
			root = keys[index][1]
			element = [keys[index][0]]
			index += 1
			while index < size and keys[index][1]*epsilon > root:
				element.append(keys[index][0])
				index += 1
			element.sort()
			groupedKeys.append(tuple(element))
			
		pattern[tuple(groupedKeys)] += 100/float(len(graphs))
	
	return pattern		
예제 #3
0
def makeSwapData(degree=10):
	with open("aznorbert_corrsd_new.pkl","rb") as f:
		data = pickle.load(f)

	swapData = {}

	for key, graphs in data.iteritems():
		print key
		keyData = []
		for i,G in enumerate(graphs):
			print i
			sortedWeights = np.sort(G,axis=None)
			threshold = sortedWeights[-len(G)*degree-1]

			graph = nx.DiGraph(G>threshold)
			diff = gh.randomize_graph(graph, 2500)
			keyData.append(graph)
		swapData[key] = keyData

	with open("SwapData"+str(degree)+".pkl",'wb') as f:
		pickle.dump(swapData,f)
예제 #4
0
def findMotifs(data,key,motifSize=3,degree=10,randomize=False):
	"""Main finding motifs routine"""
	
	usetotal = False
	
	#Check cache
	filename = str(key)+'s'+str(int(motifSize))+'d'+str(int(degree))+str(usetotal)+".pkl"
	if os.path.exists('cache/'+filename) and USECACHE:
		print "in cache"
		with open('cache/'+filename,"rb") as f:
			return pickle.load(f)
	
	if key == "rand":
		graphs = []
		for i in xrange(100):	
			x = np.random.rand(88,88)
			x -= np.diag(np.diag(x))
			graphs.append(x)
	else:
		graphs = data[key]
	
	motifs = defaultdict(list)
	numstring ="/"+str(len(graphs))
	rejected = 0
	for index,G in enumerate(graphs):
		#Cull bad graphs
		if np.count_nonzero(G)<len(G)*degree:
			rejected += 1
			continue
			
		#calculate threshold
		sortedWeights = np.sort(G,axis=None)
		threshold = sortedWeights[-len(G)*degree-1]
		#Print progress
		sys.stdout.write("\rMotif Finding Progress: "+str(index)+numstring)
		sys.stdout.write(" Threshold: "+str(threshold))
		sys.stdout.flush()
		
		#Output graph to txt file
		graph = nx.DiGraph(G>threshold)
		graph = nx.convert_node_labels_to_integers(graph,1)
		if randomize:
			graph = gh.randomize_graph(graph, 2000)
		with open('result/OUTPUT.txt','wb') as f:
			f.write(str(len(graph)) + '\n')
			nx.write_edgelist(graph,f,data=False)
		#Jenky way to use c++ motif finder in python
		os.system("./Kavosh "+str(motifSize))
		with open("result/MotifCount.txt","rb") as f:
			subgraphs = float(f.next())
			data = np.loadtxt(f, ndmin=2)
		
		for iD,total in data:
			percent = total/subgraphs
			motifs[int(iD)].append(percent)
		
	print '\nMotifs Done! Graphs Rejected: '+str(rejected)
	
	#add zeros to graphs that didn't contain motifs
	for key,value in motifs.iteritems():
		numZero = len(graphs)-len(value)-rejected
		value.extend([0 for derp in xrange(numZero)])
		motifs[int(key)] = np.array(value)
	
	motifs = dict(motifs)
	#add motifs to cache
	if USECACHE:
		with open('cache/'+filename,'wb') as f:
			pickle.dump(motifs,f)
		
	return motifs
예제 #5
0
			f.write(
			"\\end{tabular}\n"
			"\\end{table}\n"
			)
		
		f.write("\\end{document}\n")
		
	os.system("pdflatex -output-directory result " + filename)
	os.system("rm result/*.log result/*.aux")

if __name__ == '__main__':
	with open("aznorbert_corrsd.pkl","rb") as f:
		data = pickle.load(f)	
	
	#PDFOrder(data,"AllOrder",1.05)
	
	for i,G in enumerate(data[('NL','corr')]):
		print i
		sortedWeights = np.sort(G,axis=None)
		threshold = sortedWeights[-len(G)*10-1]

		graph = nx.DiGraph(G>threshold)
		graph = gh.randomize_graph(graph, 1000)
	
	#PDFstats(data,"Motif_Statistics_Mats",False)
	#motifStats(data)
	
	#print 'Normal'
	#motifOrder(data,('NL','corr'),1.05)

예제 #6
0
def findMotifs(data, key, motifSize=3, degree=10, randomize=False):
    """Main finding motifs routine"""

    usetotal = False

    #Check cache
    filename = str(key) + 's' + str(int(motifSize)) + 'd' + str(
        int(degree)) + str(usetotal) + ".pkl"
    if os.path.exists('cache/' + filename) and USECACHE:
        print "in cache"
        with open('cache/' + filename, "rb") as f:
            return pickle.load(f)

    if key == "rand":
        graphs = []
        for i in xrange(100):
            x = np.random.rand(88, 88)
            x -= np.diag(np.diag(x))
            graphs.append(x)
    else:
        graphs = data[key]

    motifs = defaultdict(list)
    numstring = "/" + str(len(graphs))
    rejected = 0
    for index, G in enumerate(graphs):
        #Cull bad graphs
        if np.count_nonzero(G) < len(G) * degree:
            rejected += 1
            continue

        #calculate threshold
        sortedWeights = np.sort(G, axis=None)
        threshold = sortedWeights[-len(G) * degree - 1]
        #Print progress
        sys.stdout.write("\rMotif Finding Progress: " + str(index) + numstring)
        sys.stdout.write(" Threshold: " + str(threshold))
        sys.stdout.flush()

        #Output graph to txt file
        graph = nx.DiGraph(G > threshold)
        graph = nx.convert_node_labels_to_integers(graph, 1)
        if randomize:
            graph = gh.randomize_graph(graph, 2000)
        with open('result/OUTPUT.txt', 'wb') as f:
            f.write(str(len(graph)) + '\n')
            nx.write_edgelist(graph, f, data=False)
        #Jenky way to use c++ motif finder in python
        os.system("./Kavosh " + str(motifSize))
        with open("result/MotifCount.txt", "rb") as f:
            subgraphs = float(f.next())
            data = np.loadtxt(f, ndmin=2)

        for iD, total in data:
            percent = total / subgraphs
            motifs[int(iD)].append(percent)

    print '\nMotifs Done! Graphs Rejected: ' + str(rejected)

    #add zeros to graphs that didn't contain motifs
    for key, value in motifs.iteritems():
        numZero = len(graphs) - len(value) - rejected
        value.extend([0 for derp in xrange(numZero)])
        motifs[int(key)] = np.array(value)

    motifs = dict(motifs)
    #add motifs to cache
    if USECACHE:
        with open('cache/' + filename, 'wb') as f:
            pickle.dump(motifs, f)

    return motifs
예제 #7
0
                f.write(line + " \\\\ \\hline\n")

            f.write("\\end{tabular}\n" "\\end{table}\n")

        f.write("\\end{document}\n")

    os.system("pdflatex -output-directory result " + filename)
    os.system("rm result/*.log result/*.aux")


if __name__ == '__main__':
    with open("aznorbert_corrsd.pkl", "rb") as f:
        data = pickle.load(f)

    #PDFOrder(data,"AllOrder",1.05)

    for i, G in enumerate(data[('NL', 'corr')]):
        print i
        sortedWeights = np.sort(G, axis=None)
        threshold = sortedWeights[-len(G) * 10 - 1]

        graph = nx.DiGraph(G > threshold)
        graph = gh.randomize_graph(graph, 1000)

    #PDFstats(data,"Motif_Statistics_Mats",False)
    #motifStats(data)

    #print 'Normal'
    #motifOrder(data,('NL','corr'),1.05)