Beispiel #1
0
	def kMax_parser(self, inf):
		#initlialize the cluster_set data structure
		cluster_set = []
		#for dumb purpose
		parameter = ''
		#first line contains the splat_id
		line = inf.readline()
		splat_id = int(line[2:-1])
		#second line contains the parameter, old kMax_batch_run.py doesn't output the parameter
		line = inf.readline()
		parameter = line[2:-1]
		inf2 = cStringIO.StringIO(inf.read())
		iter = splat_result_iterator(inf2)
		for pattern in iter:
			#initlialize the set first
			vertex_set = Set()
			#first line is no_of_edges
			line = pattern.readline()
			no_of_edges = int(line[:-1])
			#second line is recurrence pattern, ignore.
			line = pattern.readline()
			#from the third line, it's the edge_set
			line = pattern.readline()
			while line !='\n':
				edge_set = line[1:-2].split(')(')
				for edge in edge_set:
					vertex_list = edge.split()
					vertex_set.add(int(vertex_list[0]))
					vertex_set.add(int(vertex_list[1]))
				line = pattern.readline()
			#convert the set to list
			vertex_list = list(vertex_set)
			vertex_list.sort()
			no_of_nodes = float(len(vertex_list))
			connectivity = 2*no_of_edges/(no_of_nodes*(no_of_nodes-1))
			unit = cluster(splat_id, vertex_list, parameter, connectivity)
			cluster_set.append(unit)
		return cluster_set
Beispiel #2
0
	def parse_splat_results(self, splat_id, outfname, splat2db_instance, codense2db_instance, curs):
		"""
		03-19-05
		"""
		listOfMclResult = []
		from splat_to_db import splat_result_iterator
		outf = open(outfname, 'r')
		iter = splat_result_iterator(outf)
		for pattern in iter:
			unit = MclResult()
			splat2db_instance.parse(pattern)
			edge_set = splat2db_instance.edge_set
			node_set = Set()
			for edge in edge_set:
				#in-situ sort, splat2db_instance.edge_set is also changed
				edge.sort()
				node_set.add(edge[0])
				node_set.add(edge[1])
			node_list = list(node_set)
			node_list.sort()
			no_of_nodes = len(node_list)
			no_of_edges = len(edge_set)
			#calculate the recurrence_array via codense2db_instance's functions
			combined_cor_vector, combined_sig_vector = codense2db_instance.get_combined_cor_vector(curs, edge_set)
			cor_cut_off = 0		#0 means no cut off for those edges.
			
			#all these are needed to  submit to splat_result and mcl_result
			unit.splat_id = splat_id
			unit.vertex_set = node_list
			unit.edge_set = edge_set
			unit.connectivity = codense2db_instance.parse_connectivity(combined_sig_vector, no_of_edges, no_of_nodes)
			unit.recurrence_array = codense2db_instance.parse_recurrence(combined_sig_vector, \
				len(edge_set), cor_cut_off)
			listOfMclResult.append(unit)
		outf.close()
		return listOfMclResult