def test_clustering(d, clusters, vlmcs, out_directory, cluster_class=MSTClustering, do_draw_graph=True): metadata = get_metadata_for([vlmc.name for vlmc in vlmcs]) clustering = cluster_class(vlmcs, d, metadata) for i in range(clusters + 0, clusters - 1, -1): print(i) clustering_metrics = clustering.cluster(i) if do_draw_graph: plot_largest_components(clustering_metrics, i, out_directory) pictures = [('Family', 'family'), ('Genus', 'genus'), ('Host', 'hosts'), ('Baltimore', 'baltimore')] for name, key in pictures: draw_graph(clustering_metrics, name, key, i, out_directory) print_connected_components(clustering_metrics) print_cluster_metrics(clustering_metrics, i)
def test_clustering(d, vlmcs, cluster_class): metadata = get_metadata_for([vlmc.name for vlmc in vlmcs]) metrics = np.zeros([len(vlmcs), 7], dtype=np.float32) clustering = cluster_class(vlmcs, d, metadata) for i in range(len(vlmcs) - 1, 0, -1): print(i) clustering_metrics = clustering.cluster(i) metrics[i, 0] = clustering_metrics.average_silhouette() metrics[i, 1] = clustering_metrics.average_percent_same_taxonomy( 'organism') metrics[i, 2] = clustering_metrics.average_percent_same_taxonomy('family') metrics[i, 3] = clustering_metrics.average_percent_same_taxonomy('genus') fam_sensitivity, fam_specificity = clustering_metrics.sensitivity_specificity( 'family') metrics[i, 4] = fam_sensitivity metrics[i, 5] = fam_specificity metrics[i, 6] = clustering_metrics.get_latest_merge_distance() return metrics
def test_distance_function(d, tree_dir, out_dir, plot_distances=False, plot_boxes=False): parse_trees_to_json.parse_trees(tree_dir) vlmcs = VLMC.from_json_dir(tree_dir) metadata = get_metadata_for([vlmc.name for vlmc in vlmcs]) test_dir = tree_dir + "_test" if os.path.isdir(test_dir): parse_trees_to_json.parse_trees(test_dir) test_vlmcs = VLMC.from_json_dir(test_dir) else: test_vlmcs = vlmcs if out_dir is not None: try: os.stat(out_dir) except: os.mkdir(out_dir) return test_distance_function_(d, vlmcs, test_vlmcs, metadata, out_dir, True, False, plot_distances, plot_boxes)
parser.add_argument('--intersection', action='store_true') parser.add_argument('--occurrence-probability-labels', action='store_true') parser.add_argument( '--directory', type=str, default='../trees_pst_better', help='The directory which contains the vlmcs to be printed.') parser.add_argument( '--out-directory', type=str, default='../images', help='The directory to where the images should be written.') args = parser.parse_args() try: os.stat(args.out_directory) except: os.mkdir(args.out_directory) parse_trees(args.directory, args.deltas) vlmcs = VLMC.from_json_dir(args.directory) metadata = get_metadata_for([vlmc.name for vlmc in vlmcs]) if args.intersection: save_intersection(vlmcs, metadata, args.out_directory) else: save(vlmcs, metadata, args.out_directory, args.deltas, args.occurrence_probability_labels)