def haifeng_output_parser(self, row, argument=None, argument2=None): """ 05-31-06 parse haifeng's output, only first two columns based on fimbfs_parser() 2006-08-22 use ], [ as separator for edge_set 2006-08-29 cluster_no increments first(starting from 1) 2006-09-21 also parse the recurrence_array """ self.cluster_no += 1 self.cooccurrent_cluster_id += 1 cluster_list = [] gene_no2incidence_array = argument curs = argument2 cluster = cluster_dstructure() #initialize two sets cluster.vertex_set = row[0][1:-1].split(',') cluster.vertex_set = map(int, cluster.vertex_set) cluster.vertex_set.sort() cluster.cooccurrent_cluster_id = self.cooccurrent_cluster_id cluster.cluster_id = self.cluster_no cluster.edge_set = row[1][2:-2].split('], [') for i in range(len(cluster.edge_set)): cluster.edge_set[i] = cluster.edge_set[i].split(',') cluster.edge_set[i] = map(int, cluster.edge_set[i]) cluster.edge_set[i].sort() cluster.edge_set.sort() cluster.no_of_edges = len(cluster.edge_set) no_of_nodes = len(cluster.vertex_set) cluster.splat_connectivity = 2*float(cluster.no_of_edges)/(no_of_nodes*(no_of_nodes-1)) cluster.connectivity = cluster.splat_connectivity cluster.recurrence_array = row[2][1:-1].split(',') cluster.recurrence_array = map(float, cluster.recurrence_array) #05-31-06 fake the d_matrix cluster.d_matrix = [[0,0,0]] cluster.gim_array = get_vertex_set_gim_array(gene_no2incidence_array, cluster.vertex_set) if self.debug: print "cluster vertex_set: ", cluster.vertex_set print "cluster edge_set: ", cluster.edge_set print "cluster splat_connectivity: ", cluster.splat_connectivity print "cluster recurrence_array: ", cluster.recurrence_array print "cluster.d_matrix:", cluster.d_matrix print "cluster.gim_array:", cluster.gim_array raw_input("Continue?(Y/n)") cluster_list.append(cluster) return cluster_list
def fimbfs_parser(self, row, argument=None, argument2=None): """ 10-28-05 similar to fim_parser(), but no sort for vertex_set and edge_set. and has d_matrix. Output of MpiBFSCluster.py 12-06-05 add gene_no2incidence_array calculate cluster.gim_array """ cluster_list = [] gene_no2incidence_array = argument #12-06-05 curs = argument2 cluster = cluster_dstructure() #initialize two sets cluster.vertex_set = row[0][1:-1].split(',') cluster.vertex_set = map(int, cluster.vertex_set) if len(cluster.vertex_set)<self.min_cluster_size: #pre-stop return cluster_list cluster.cooccurrent_cluster_id = self.cooccurrent_cluster_id cluster.cluster_id = self.cluster_no cluster.edge_set = row[1][2:-2].split('], [') for i in range(len(cluster.edge_set)): cluster.edge_set[i] = cluster.edge_set[i].split(',') cluster.edge_set[i] = map(int, cluster.edge_set[i]) cluster.no_of_edges = len(cluster.edge_set) no_of_nodes = len(cluster.vertex_set) cluster.splat_connectivity = 2*float(cluster.no_of_edges)/(no_of_nodes*(no_of_nodes-1)) cluster.connectivity = cluster.splat_connectivity cluster.recurrence_array = row[2][1:-1].split(',') cluster.recurrence_array = map(float, cluster.recurrence_array) cluster.d_matrix = row[3] #10-28-05 string form cluster.gim_array = get_vertex_set_gim_array(gene_no2incidence_array, cluster.vertex_set) #12-06-05 if self.debug: print "cluster vertex_set: ", cluster.vertex_set print "cluster edge_set: ", cluster.edge_set print "cluster splat_connectivity: ", cluster.splat_connectivity print "cluster recurrence_array: ", cluster.recurrence_array print "cluster.d_matrix:", cluster.d_matrix print "cluster.gim_array:", cluster.gim_array #12-06-05 raw_input("Continue?(Y/n)") cluster_list.append(cluster) self.cluster_no += 1 self.cooccurrent_cluster_id += 1 return cluster_list