def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() active_nodes = pool.where("isa_partition") if(options.ignore_failed): active_nodes = pool.where("isa_partition and not state=='mdrun-failed'") assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes if active_nodes.where("'weight_direct' not in obs"): active_nodes.unlock() sys.exit("Matrix calculation not possible: Not all of the nodes have been reweighted.") print "\n### Getting S matrix ..." s_matrix = cache_matrix(pool.s_mat_fn, active_nodes, overwrite=options.overwrite_mat, fast=options.fast_mat) register_file_dependency(pool.s_mat_fn, pool.filename) node_weights = np.array([node.obs.weight_direct for node in active_nodes]) print "\n### Symmetrizing S matrix ..." (corr_s_matrix, corr_node_weights) = symmetrize(s_matrix, node_weights, correct_weights=True, error=float(options.error)) # store intermediate results register_file_dependency(pool.s_corr_mat_fn, pool.s_mat_fn) np.savez(pool.s_corr_mat_fn, matrix=corr_s_matrix, node_names=[n.name for n in active_nodes]) if options.export_matlab: savemat(pool.analysis_dir+"node_weights.mat", {"node_weights":node_weights, "node_weights_corrected":corr_node_weights}) savemat(pool.analysis_dir+"s_mats.mat", {"s_matrix":s_matrix, "s_matrix_corrected":corr_s_matrix}) for (n, cw) in zip(active_nodes, corr_node_weights): n.obs.weight_corrected = cw print "\n### Node weights after symmetrization of S matrix:" for n in active_nodes: print "%s: initial weight: %f, corrected weight: %f, weight change: %f" % (n.name, n.obs.weight_direct, n.obs.weight_corrected, abs(n.obs.weight_direct - n.obs.weight_corrected)) n.save() active_nodes.unlock() # calculate and sort eigenvalues in descending order (eigvalues, eigvectors) = np.linalg.eig(corr_s_matrix) argsorted_eigvalues = np.argsort(-eigvalues) eigvalues = eigvalues[argsorted_eigvalues] eigvectors = eigvectors[:, argsorted_eigvalues] gaps = np.abs(eigvalues[1:]-eigvalues[:-1]) gaps = np.append(gaps, 0.0) wgaps = gaps*eigvalues print "\n### Sorted eigenvalues of symmetrized S matrix:" for (idx, ev, gap, wgap) in zip(range(1, len(eigvalues)+1), eigvalues, gaps, wgaps): print "EV%04d: %f, gap to next: %f, EV-weighted gap to next: %f" % (idx, ev, gap, wgap) n_clusters = np.argmax(wgaps)+1 print "\n### Maximum gap %f after top %d eigenvalues." % (np.max(gaps), n_clusters) print "### Maximum EV-weighted gap %f after top %d eigenvalues." % (np.max(wgaps), np.argmax(wgaps)+1) sys.stdout.flush() if not options.auto_cluster: n_clusters = userinput("Please enter the number of clusters for PCCA+", "int", "x>0") print "### Using %d clusters for PCCA+ ..."%n_clusters if options.export_matlab: savemat(pool.analysis_dir+"evs.mat", {"evs":eigvectors}) # orthogonalize and normalize eigenvectors eigvectors = orthogonalize(eigvalues, eigvectors, corr_node_weights) # perform PCCA+ # First two return-values "c_f" and "indicator" are not needed (chi_matrix, rot_matrix) = cluster_by_isa(eigvectors, n_clusters)[2:] if(options.optimize_chi): print "\n### Optimizing chi matrix ..." outliers = 5 mean_weight = np.mean(corr_node_weights) threshold = mean_weight/100*outliers print "Light-weight node threshold (%d%% of mean corrected node weight): %.4f."%(outliers, threshold) # accumulate nodes for optimization edges = np.where(np.max(chi_matrix, axis=1) > 0.9999)[0] # edges of simplex heavies = np.where( corr_node_weights > threshold)[0] # heavy-weight nodes filtered_eigvectors = eigvectors[ np.union1d(edges, heavies) ] # perform the actual optimization rot_matrix = opt_soft(filtered_eigvectors, rot_matrix, n_clusters) chi_matrix = np.dot(eigvectors[:,:n_clusters], rot_matrix) # deal with light-weight nodes: shift and scale for i in np.where(corr_node_weights <= threshold)[0]: if(i in edges): print "Column %d belongs to (potentially dangerous) light-weight node, but its node is a simplex edge."%(i+1) continue print "Column %d is shifted and scaled."%(i+1) col_min = np.min( chi_matrix[i,:] ) chi_matrix[i,:] -= col_min chi_matrix[i,:] /= 1-(n_clusters*col_min) qc_matrix = np.dot( np.dot( np.linalg.inv(rot_matrix), np.diag(eigvalues[range(n_clusters)]) ), rot_matrix ) - np.eye(n_clusters) cluster_weights = rot_matrix[0] print "\n### Matrix numerics check" print "-- Q_c matrix row sums --" print np.sum(qc_matrix, axis=1) print "-- cluster weights: first column of rot_matrix --" print cluster_weights print "-- cluster weights: numpy.dot(node_weights, chi_matrix) --" print np.dot(corr_node_weights, chi_matrix) print "-- chi matrix column max values --" print np.max(chi_matrix, axis=0) print "-- chi matrix row sums --" print np.sum(chi_matrix, axis=1) # store final results np.savez(pool.chi_mat_fn, matrix=chi_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes]) np.savez(pool.qc_mat_fn, matrix=qc_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes], weights=cluster_weights) if options.export_matlab: savemat(pool.analysis_dir+"chi_mat.mat", {"chi_matrix":chi_matrix}) savemat(pool.analysis_dir+"qc_mat.mat", {"qc_matrix":qc_matrix, "weights":cluster_weights}) register_file_dependency(pool.chi_mat_fn, pool.s_corr_mat_fn) register_file_dependency(pool.qc_mat_fn, pool.s_corr_mat_fn) for fn in (pool.s_mat_fn, pool.s_corr_mat_fn): register_file_dependency(pool.chi_mat_fn, fn) register_file_dependency(pool.qc_mat_fn, fn) # touch analysis directory (triggering update in zgf_browser) atime = mtime = time.time() os.utime(pool.analysis_dir, (atime, mtime)) # show summary if(options.summary): print "\n### Preparing cluster summary ..." chi_threshold = 1E-3 from pprint import pformat for i in range(n_clusters): involved_nodes = [active_nodes[ni] for ni in np.argwhere(chi_matrix[:,i] > chi_threshold)] max_chi_node = active_nodes[ np.argmax(chi_matrix[:,i]) ] c_max = [] for c in pool.converter: coord_range = pool.coord_range(c) scale = c.plot_scale edges = scale(np.linspace(np.min(coord_range), np.max(coord_range), num=50)) hist_cluster = np.zeros(edges.size-1) for (n, chi) in zip([n for n in active_nodes], chi_matrix[:,i]): samples = scale( n.trajectory.getcoord(c) ) hist_node = np.histogram(samples, bins=edges, weights=n.frameweights, normed=True)[0] hist_cluster += n.obs.weight_corrected * hist_node * chi c_max.append( scale(np.linspace(np.min(coord_range), np.max(coord_range), num=50))[np.argmax(hist_cluster)] ) msg = "### Cluster %d (weight=%.4f, #involved nodes=%d, representative='%s'):"%(i+1, cluster_weights[i], len(involved_nodes), max_chi_node.name) print "\n"+msg print "-- internal coordinates --" print "%s"%pformat(["%.2f"%cm for cm in c_max]) print "-- involved nodes --" print "%s"%pformat([n.name for n in involved_nodes]) print "-"*len(msg)
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() active_nodes = pool.where("isa_partition") assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes if active_nodes.where("'weight_direct' not in obs"): active_nodes.unlock() sys.exit("Matrix calculation not possible: Not all of the nodes have been reweighted.") print "\n### Getting S matrix ..." s_matrix = cache_matrix(pool.s_mat_fn, active_nodes, overwrite=options.overwrite_mat) register_file_dependency(pool.s_mat_fn, pool.filename) print "\n### Getting K matrix ..." k_matrix = cache_matrix(pool.k_mat_fn, active_nodes, shift=options.lag_time, overwrite=options.overwrite_mat) register_file_dependency(pool.k_mat_fn, pool.filename) node_weights = np.array([node.obs.weight_direct for node in active_nodes]) print "\n### Symmetrizing S matrix ..." (corr_s_matrix, corr_node_weights) = symmetrize(s_matrix, node_weights, correct_weights=True, error=float(options.error)) print "\n### Symmetrizing K matrix ..." (corr_k_matrix, corr_node_weights) = symmetrize(k_matrix, corr_node_weights) # store intermediate results register_file_dependency(pool.s_corr_mat_fn, pool.s_mat_fn) register_file_dependency(pool.k_corr_mat_fn, pool.k_mat_fn) np.savez(pool.s_corr_mat_fn, matrix=corr_s_matrix, node_names=[n.name for n in active_nodes]) np.savez(pool.k_corr_mat_fn, matrix=corr_k_matrix, node_names=[n.name for n in active_nodes]) if options.export_matlab: savemat(pool.analysis_dir+"node_weights.mat", {"node_weights":node_weights, "node_weights_corrected":corr_node_weights}) savemat(pool.analysis_dir+"s_mats.mat", {"s_matrix":s_matrix, "s_matrix_corrected":corr_s_matrix}) savemat(pool.analysis_dir+"k_mats.mat", {"k_matrix":k_matrix, "k_matrix_corrected":corr_k_matrix}) for (n, cw) in zip(active_nodes, corr_node_weights): n.obs.weight_corrected = cw print "\n### Node weights after symmetrization of S matrix:" for n in active_nodes: print "%s: initial weight: %f, corrected weight: %f, weight change: %f" % (n.name, n.obs.weight_direct, n.obs.weight_corrected, abs(n.obs.weight_direct - n.obs.weight_corrected)) n.save() active_nodes.unlock() # calculate and sort eigenvalues in descending order (eigvalues, eigvectors) = np.linalg.eig(corr_s_matrix) argsorted_eigvalues = np.argsort(-eigvalues) eigvalues = eigvalues[argsorted_eigvalues] eigvectors = eigvectors[:, argsorted_eigvalues] gaps = np.abs(eigvalues[1:]-eigvalues[:-1]) gaps = np.append(gaps, 0.0) wgaps = gaps*eigvalues print "\n### Sorted eigenvalues of symmetrized S matrix:" for (idx, ev, gap, wgap) in zip(range(1, len(eigvalues)+1), eigvalues, gaps, wgaps): print "EV%04d: %f, gap to next: %f, EV-weighted gap to next: %f" % (idx, ev, gap, wgap) n_clusters = np.argmax(wgaps)+1 print "\n### Maximum gap %f after top %d eigenvalues." % (np.max(gaps), n_clusters) print "### Maximum EV-weighted gap %f after top %d eigenvalues." % (np.max(wgaps), np.argmax(wgaps)+1) sys.stdout.flush() if not options.auto_cluster: n_clusters = userinput("Please enter the number of clusters for PCCA+", "int", "x>0") print "### Using %d clusters for PCCA+ ..."%n_clusters print "eigenvectors" print eigvectors[:, :n_clusters] if options.export_matlab: savemat(pool.analysis_dir+"evs.mat", {"evs":eigvectors}) # orthogonalize and normalize eigenvectors eigvectors = orthogonalize(eigvalues, eigvectors, corr_node_weights) # perform PCCA+ # First two return-values "c_f" and "indicator" are not needed (chi_matrix, rot_matrix) = cluster_by_isa(eigvectors, n_clusters)[2:] #TODO at the moment, K-matrix is not used #xi = [] # calculate eigenvalues of Q_c, xi #for eigvec in np.transpose(eigvectors)[: n_clusters]: # num = np.dot( np.dot( np.transpose(eigvec), corr_k_matrix ), eigvec ) # denom = np.dot( np.dot( np.transpose(eigvec), corr_s_matrix ), eigvec ) # xi.append(num/denom-1) #print np.diag(xi) #TODO what does this tell us? Marcus-check qc_matrix = np.dot( np.dot( np.linalg.inv(rot_matrix), np.diag(eigvalues[range(n_clusters)]) ), rot_matrix ) - np.eye(n_clusters) cluster_weights = rot_matrix[0] print "Q_c matrix:" print qc_matrix print "Q_c matrix row sums:" print np.sum(qc_matrix, axis=1) print "cluster weights (calculated twice for checking):" print cluster_weights print np.dot(corr_node_weights, chi_matrix) print "chi matrix column sums:" print np.sum(chi_matrix, axis=0) print "chi matrix row sums:" print np.sum(chi_matrix, axis=1) # store final results np.savez(pool.chi_mat_fn, matrix=chi_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes]) np.savez(pool.qc_mat_fn, matrix=qc_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes], weights=cluster_weights) if options.export_matlab: savemat(pool.analysis_dir+"chi_mat.mat", {"chi_matrix":chi_matrix}) savemat(pool.analysis_dir+"qc_mat.mat", {"qc_matrix":qc_matrix, "weights":cluster_weights}) register_file_dependency(pool.chi_mat_fn, pool.s_corr_mat_fn) register_file_dependency(pool.qc_mat_fn, pool.s_corr_mat_fn) for fn in (pool.s_mat_fn, pool.s_corr_mat_fn, pool.k_mat_fn, pool.k_corr_mat_fn): register_file_dependency(pool.chi_mat_fn, fn) register_file_dependency(pool.qc_mat_fn, fn) zgf_cleanup.main()