def kMax_parser(self, inf): #initlialize the cluster_set data structure cluster_set = [] #for dumb purpose parameter = '' #first line contains the splat_id line = inf.readline() splat_id = int(line[2:-1]) #second line contains the parameter, old kMax_batch_run.py doesn't output the parameter line = inf.readline() parameter = line[2:-1] inf2 = cStringIO.StringIO(inf.read()) iter = splat_result_iterator(inf2) for pattern in iter: #initlialize the set first vertex_set = Set() #first line is no_of_edges line = pattern.readline() no_of_edges = int(line[:-1]) #second line is recurrence pattern, ignore. line = pattern.readline() #from the third line, it's the edge_set line = pattern.readline() while line !='\n': edge_set = line[1:-2].split(')(') for edge in edge_set: vertex_list = edge.split() vertex_set.add(int(vertex_list[0])) vertex_set.add(int(vertex_list[1])) line = pattern.readline() #convert the set to list vertex_list = list(vertex_set) vertex_list.sort() no_of_nodes = float(len(vertex_list)) connectivity = 2*no_of_edges/(no_of_nodes*(no_of_nodes-1)) unit = cluster(splat_id, vertex_list, parameter, connectivity) cluster_set.append(unit) return cluster_set
def parse_splat_results(self, splat_id, outfname, splat2db_instance, codense2db_instance, curs): """ 03-19-05 """ listOfMclResult = [] from splat_to_db import splat_result_iterator outf = open(outfname, 'r') iter = splat_result_iterator(outf) for pattern in iter: unit = MclResult() splat2db_instance.parse(pattern) edge_set = splat2db_instance.edge_set node_set = Set() for edge in edge_set: #in-situ sort, splat2db_instance.edge_set is also changed edge.sort() node_set.add(edge[0]) node_set.add(edge[1]) node_list = list(node_set) node_list.sort() no_of_nodes = len(node_list) no_of_edges = len(edge_set) #calculate the recurrence_array via codense2db_instance's functions combined_cor_vector, combined_sig_vector = codense2db_instance.get_combined_cor_vector(curs, edge_set) cor_cut_off = 0 #0 means no cut off for those edges. #all these are needed to submit to splat_result and mcl_result unit.splat_id = splat_id unit.vertex_set = node_list unit.edge_set = edge_set unit.connectivity = codense2db_instance.parse_connectivity(combined_sig_vector, no_of_edges, no_of_nodes) unit.recurrence_array = codense2db_instance.parse_recurrence(combined_sig_vector, \ len(edge_set), cor_cut_off) listOfMclResult.append(unit) outf.close() return listOfMclResult