def conformational_space_overlap(clustering, trajectoryHandler, matrixHandler): current = 0 traj_ranges = {} traj_to_file = {} total_populations = {} for i, pdb_source in enumerate(trajectoryHandler.sources): num_confs = pdb_source.get_info("number_of_conformations") traj_id = "traj_%d"%i traj_ranges[traj_id] = (current, current + num_confs -1) traj_to_file[traj_id] = pdb_source.get_path() total_populations[traj_id] = num_confs current = current + num_confs decomposed_clusters = Separator.decompose(clustering.clusters, traj_ranges) # Get population percents for each cluster all_traj_ids = total_populations.keys() relative_populations = [] for cluster_id in decomposed_clusters: dc = decomposed_clusters[cluster_id] relative_population = [] for traj_id in all_traj_ids: if traj_id in dc: relative_population.append(len(dc[traj_id]) / float(total_populations[traj_id])) else: relative_population.append(0.) relative_population.append(cluster_id) relative_populations.append(tuple(relative_population)) # Sort by first traj (to 'prettify' it a bit) relative_populations.sort() cluster_ids = [rp[-1] for rp in relative_populations] relative_populations = numpy.array([rp[0:len(all_traj_ids)] for rp in relative_populations]) rel_pop_per_id = {} sm_rel_pop_per_id = {} for i in range(len(all_traj_ids)): # print all_traj_ids[i] # print relative_populations.T[i] rel_pop_per_id[all_traj_ids[i]] = list(relative_populations.T[i]) sm_rel_pop_per_id[all_traj_ids[i]] = smoothed(relative_populations.T[i]) plt.plot(relative_populations.T[i],label = traj_to_file[all_traj_ids[i]]) plt.legend() plt.show() # Calculate JSDs jsds = {} for traj_a in all_traj_ids: jsds[traj_a] = {} for traj_b in all_traj_ids: jsds[traj_a][traj_b] = JSD(sm_rel_pop_per_id[traj_a], sm_rel_pop_per_id[traj_b]) # Compile results results = { "id_to_path":traj_to_file, "populations": rel_pop_per_id, "JSD": jsds, "cluster_ids": cluster_ids } return results
def test_classify(self): decomposed = { '0': { 'traj_A': [0, 1, 2] }, '1': { 'traj_A': [3], 'traj_B': [8, 10] }, '2': { 'traj_A': [4], 'traj_B': [14, 15] }, '3': { 'traj_A': [5, 6] }, '4': { 'traj_B': [9, 11, 12, 13, 7] } } expected = { 'mixed': { '1': { 'traj_A': [3], 'traj_B': [8, 10] }, '2': { 'traj_A': [4], 'traj_B': [14, 15] } }, 'pure': { '0': { 'traj_A': [0, 1, 2] }, '3': { 'traj_A': [5, 6] }, '4': { 'traj_B': [9, 11, 12, 13, 7] } } } self.assertDictEqual(expected, Separator.classify(decomposed))
def conformational_space_comparison(clustering, trajectoryHandler, matrixHandler, clustering_parameters, refinement_parameters): # clustering = Refiner(matrixHandler, # trajectoryHandler, # clustering_parameters, # refinement_parameters, # observer).run(clustering) traj_ranges = {} current = 0 for i, pdb_source in enumerate(trajectoryHandler.sources): num_confs = pdb_source.get_info("number_of_conformations") traj_ranges["traj_%d"%i] = (current, current + num_confs -1) current = current + num_confs decomposed_clusters = Separator.separate(clustering.clusters, traj_ranges) analysis = Analyzer.run(decomposed_clusters, matrixHandler.distance_matrix) return analysis
def test_decompose(self): traj_ranges = {"traj_A":(0,6),"traj_B":(7,15)} clusters = [ Cluster(None,[0,1,2]), Cluster(None,[3,8,10]), Cluster(None,[14,4,15]), Cluster(None,[5,6]), Cluster(None,[7,9,11,12,13]), ] for i in range(len(clusters)): clusters[i].id = str(i) decomposed = Separator.decompose(clusters, traj_ranges) all_elements = [] for cluster_id in decomposed: all_elements.extend(getAllElements(decomposed[cluster_id])) expected = { '0': { 'traj_A': [0, 1, 2] }, '1': { 'traj_A': [3], 'traj_B': [8, 10] }, '2': { 'traj_A': [4], 'traj_B': [14, 15] }, '3': { 'traj_A': [5, 6] }, '4': { 'traj_B': [9, 11, 12, 13, 7] } } self.assertItemsEqual(range(16),sorted(all_elements)) self.assertDictEqual(expected, decomposed )
def test_decompose(self): traj_ranges = {"traj_A": (0, 6), "traj_B": (7, 15)} clusters = [ Cluster(None, [0, 1, 2]), Cluster(None, [3, 8, 10]), Cluster(None, [14, 4, 15]), Cluster(None, [5, 6]), Cluster(None, [7, 9, 11, 12, 13]), ] for i in range(len(clusters)): clusters[i].id = str(i) decomposed = Separator.decompose(clusters, traj_ranges) all_elements = [] for cluster_id in decomposed: all_elements.extend(getAllElements(decomposed[cluster_id])) expected = { '0': { 'traj_A': [0, 1, 2] }, '1': { 'traj_A': [3], 'traj_B': [8, 10] }, '2': { 'traj_A': [4], 'traj_B': [14, 15] }, '3': { 'traj_A': [5, 6] }, '4': { 'traj_B': [9, 11, 12, 13, 7] } } self.assertItemsEqual(range(16), sorted(all_elements)) self.assertDictEqual(expected, decomposed)
def conformational_space_comparison(clustering, trajectoryHandler, matrixHandler, clustering_parameters, refinement_parameters): # clustering = Refiner(matrixHandler, # trajectoryHandler, # clustering_parameters, # refinement_parameters, # observer).run(clustering) traj_ranges = {} current = 0 for i, pdb_source in enumerate(trajectoryHandler.sources): num_confs = pdb_source.get_info("number_of_conformations") traj_ranges["traj_%d" % i] = (current, current + num_confs - 1) current = current + num_confs decomposed_clusters = Separator.separate(clustering.clusters, traj_ranges) analysis = Analyzer.run(decomposed_clusters, matrixHandler.distance_matrix) return analysis
def test_separate(self): traj_ranges = {"traj_A": (0, 6), "traj_B": (7, 15)} clusters = [ Cluster(None, [0, 1, 2]), Cluster(None, [3, 8, 10]), Cluster(None, [14, 4, 15]), Cluster(None, [5, 6]), Cluster(None, [7, 9, 11, 12, 13]), ] for i in range(len(clusters)): clusters[i].id = str(i) expected = { 'mixed': { '1': { 'traj_A': [3], 'traj_B': [8, 10] }, '2': { 'traj_A': [4], 'traj_B': [14, 15] } }, 'pure': { '0': { 'traj_A': [0, 1, 2] }, '3': { 'traj_A': [5, 6] }, '4': { 'traj_B': [9, 11, 12, 13, 7] } } } self.assertDictEqual(expected, Separator.separate(clusters, traj_ranges))
def conformational_space_comparison(clustering, matrixHandler, trajectoryHandler, clustering_parameters, refinement_parameters, observer): # clustering = Refiner(matrixHandler, # trajectoryHandler, # clustering_parameters, # refinement_parameters, # observer).run(clustering) # TODO: testing traj_ranges = {} current = 0 for i, pdb in enumerate(trajectoryHandler.pdbs): traj_ranges["traj_%d" % i] = (current, current + pdb["conformations"] - 1) current = current + pdb["conformations"] decomposed_clusters = Separator.separate(clustering.clusters, traj_ranges) analysis = Analyzer.run(decomposed_clusters, matrixHandler.distance_matrix) return analysis
def test_separate(self): traj_ranges = {"traj_A":(0,6),"traj_B":(7,15)} clusters = [ Cluster(None,[0,1,2]), Cluster(None,[3,8,10]), Cluster(None,[14,4,15]), Cluster(None,[5,6]), Cluster(None,[7,9,11,12,13]), ] for i in range(len(clusters)): clusters[i].id = str(i) expected = { 'mixed': { '1': { 'traj_A': [3], 'traj_B': [8, 10] }, '2': { 'traj_A': [4], 'traj_B': [14, 15] } }, 'pure': { '0': { 'traj_A': [0, 1, 2] }, '3': { 'traj_A': [5, 6] }, '4': { 'traj_B': [9, 11, 12, 13, 7] } } } self.assertDictEqual(expected,Separator.separate(clusters, traj_ranges))
def conformational_space_overlap(clustering, trajectoryHandler, matrixHandler): current = 0 traj_ranges = {} traj_to_file = {} total_populations = {} for i, pdb_source in enumerate(trajectoryHandler.sources): num_confs = pdb_source.get_info("number_of_conformations") traj_id = "traj_%d" % i traj_ranges[traj_id] = (current, current + num_confs - 1) traj_to_file[traj_id] = pdb_source.get_path() total_populations[traj_id] = num_confs current = current + num_confs decomposed_clusters = Separator.decompose(clustering.clusters, traj_ranges) # Get population percents for each cluster all_traj_ids = total_populations.keys() relative_populations = [] for cluster_id in decomposed_clusters: dc = decomposed_clusters[cluster_id] relative_population = [] for traj_id in all_traj_ids: if traj_id in dc: relative_population.append( len(dc[traj_id]) / float(total_populations[traj_id])) else: relative_population.append(0.) relative_population.append(cluster_id) relative_populations.append(tuple(relative_population)) # Sort by first traj (to 'prettify' it a bit) relative_populations.sort() cluster_ids = [rp[-1] for rp in relative_populations] relative_populations = numpy.array( [rp[0:len(all_traj_ids)] for rp in relative_populations]) rel_pop_per_id = {} sm_rel_pop_per_id = {} for i in range(len(all_traj_ids)): # print all_traj_ids[i] # print relative_populations.T[i] rel_pop_per_id[all_traj_ids[i]] = list(relative_populations.T[i]) sm_rel_pop_per_id[all_traj_ids[i]] = smoothed( relative_populations.T[i]) plt.plot(relative_populations.T[i], label=traj_to_file[all_traj_ids[i]]) plt.legend() plt.show() # Calculate JSDs jsds = {} for traj_a in all_traj_ids: jsds[traj_a] = {} for traj_b in all_traj_ids: jsds[traj_a][traj_b] = JSD(sm_rel_pop_per_id[traj_a], sm_rel_pop_per_id[traj_b]) # Compile results results = { "id_to_path": traj_to_file, "populations": rel_pop_per_id, "JSD": jsds, "cluster_ids": cluster_ids } return results