def cluster(*args): #"cyano.txt" with open(args[0]) as handle: record = Cluster.read(handle) genetree = record.treecluster(method='s') #genetree.scale() exptree = record.treecluster(dist='u', transpose=1) record.save(args[1], genetree, exptree)
def cluster_and_assign_all_cuts(file, dist): with open(file) as handle: record = Cluster.read(handle) row_tree = record.treecluster(transpose=False, method='a', dist=dist) row_tree.scale() record.save(name.split('.')[0], geneclusters=row_tree) # cut tree into n clusters, for 1 <= n <= n_elements n_elements = len(record.geneid) mat = np.zeros((n_elements, n_elements)) for i in range(n_elements): mat[:, i] = row_tree.cut(i + 1) # cut into 1 <= i+1 <= n_elements return pd.DataFrame(data=mat, dtype=int, index=record.geneid, columns=range(1, n_elements + 1))
def cluster_dataset(filename, name=None, rows=True, cols=True, method='a', dist='c', row_order=None, col_order=None): with open(filename) as handle: record = Cluster.read(handle) if row_order: record.gorder = [row_order.index(row) for row in record.geneid] if col_order: record.eorder = [col_order.index(col) for col in record.expid] # initialize trees as 'None' in case only one axis is being clustered row_tree = None col_tree = None # cluster rows (mutants) if rows: row_tree = record.treecluster(transpose=False, method=method, dist=dist) row_tree.scale() # scale to [0,1] for ease of viewing in Java TreeView # cluster columns (library genes) if cols: col_tree = record.treecluster(transpose=True, method=method, dist=dist) col_tree.scale() if not name: name = filename.split('.')[0] record.save(name, row_tree, col_tree) return record, row_tree, col_tree
from Bio import Cluster with open("/home/koreanraichu/cyano.txt") as handle: record = Cluster.read(handle) # 불러왔다 matrix = record.distancematrix() # Distance matrix 계산 cdata, cmask = record.clustercentroids() # Cluster 무게중심(Centroid) 계산 distance = record.clusterdistance() # 클러스터간 거리 계산 tree = record.treecluster() # hierarchical clustering # 이거 matplot으로 못뽑나... clusterid, error, nfound = record.kcluster() # k-mean clustering # method='a': k-mean # method='m': k-median clusterid, celldata = record.somcluster() # SOM 계산하기 jobname = "cyano_clustering" record.save(jobname, record.treecluster(), record.treecluster(transpose=1)) # 내 컴퓨터에 저(별)장 # 기본 형식: record.save(jobname, geneclusters, expclusters) # geneclusters=record.treecluster() # expclusters=record.treecluster(transpose=1)
def clust(source): with open(source,'r') as handle: data=Cluster.read(handle) tree=data.treecluster() tree.scale() data.save(path.splitext(source)[0],tree)
Z = linkage(Y, 'average') print Z dendrogram(Z) fclust = fcluster(Z, 2, criterion='distance') clust_dict = defaultdict( list ) for i, row in enumerate(water_list): #print fclust[i], str(fclust[i]) clust_dict[ str(fclust[i]) ].append( row ) #print clust_dict for c in clust_dict: print 'select water and (' + ' or '.join( [ '(~' + w[12] + ' and ' + str(w[5]) + ')' for w in clust_dict[c] ] ) + '); isosurface id "foo' + c + '" color lightblue center {selected} SPHERE @{ [ {selected}.x.stddev, {selected}.y.stddev, {selected}.z.stddev, 0.5 ].max *2 } translucent' + ';' sys.exit(0) from Bio import Cluster lines = "Start\tX\tY\tZ\n" + "\n".join( [ "\t".join( [ row[12] + "|" + str(row[5]), str(row[6]), str(row[7]), str(row[8]) ] ) for row in water_list ] ) + "\n" import StringIO handle = StringIO.StringIO(lines) record = Cluster.read(handle) tree = record.treecluster( method="c", dist="e" ) record.save( '../data/test_tree', tree ) print tree