def test_distance_function(d,
                           tree_dir,
                           out_dir,
                           plot_distances=False,
                           plot_boxes=False):
    parse_trees_to_json.parse_trees(tree_dir)
    vlmcs = VLMC.from_json_dir(tree_dir)

    metadata = get_metadata_for([vlmc.name for vlmc in vlmcs])

    test_dir = tree_dir + "_test"
    if os.path.isdir(test_dir):
        parse_trees_to_json.parse_trees(test_dir)
        test_vlmcs = VLMC.from_json_dir(test_dir)
    else:
        test_vlmcs = vlmcs
    if out_dir is not None:
        try:
            os.stat(out_dir)
        except:
            os.mkdir(out_dir)

    return test_distance_function_(d, vlmcs, test_vlmcs, metadata, out_dir,
                                   True, False, plot_distances, plot_boxes)
def test():
    out_directory = "../test_128"
    in_directory = "../test_trees_128"
    image_directory = "../images/128"

    with multiprocessing.Pool(processes=4) as pool:
        parse_trees_to_json.parse_trees(in_directory)
        vlmcs = VLMC.from_json_dir(in_directory)

        lengths = [int(l) for l in np.logspace(2, 6, 10)]
        print(lengths)
        distances = calculate_distances_for_lengths(vlmcs, lengths,
                                                    out_directory,
                                                    image_directory, pool)

        plot_results(vlmcs, distances, lengths, image_directory)
def distance_for_length(length, vlmcs, out_directory, image_directory, d):
    repetitions = 5
    distances = np.zeros(len(vlmcs))
    for _ in range(repetitions):
        train(vlmcs, length, out_directory)

        parse_trees_to_json.parse_trees(out_directory)
        new_vlmcs = VLMC.from_json_dir(out_directory)

        pairs = pair_vlmcs(vlmcs, new_vlmcs)

        # plot_vlmcs(pairs, image_directory)
        rep_distance = distance_calculation(pairs, d)
        distances += rep_distance / repetitions

    print("{} done".format(length))
    return distances
def test(args):
    tree_directory = args.directory
    out_directory = args.out_directory
    parse_trees_to_json.parse_trees(tree_directory)
    vlmcs = VLMC.from_json_dir(tree_directory)

    cluster_class = parse_clustering_method(args)
    d = parse_distance_method(args)

    if args.name:
        name = args.name
    else:
        name = cluster_class.__name__ + ", " + d.__class__.__name__

    metrics = test_clustering(d, vlmcs, cluster_class)

    try:
        os.stat(out_directory)
    except:
        os.mkdir(out_directory)

    plot_metrics(metrics, out_directory, name)
Beispiel #5
0
    parser.add_argument('--intersection', action='store_true')
    parser.add_argument('--occurrence-probability-labels', action='store_true')

    parser.add_argument(
        '--directory',
        type=str,
        default='../trees_pst_better',
        help='The directory which contains the vlmcs to be printed.')
    parser.add_argument(
        '--out-directory',
        type=str,
        default='../images',
        help='The directory to where the images should be written.')

    args = parser.parse_args()

    try:
        os.stat(args.out_directory)
    except:
        os.mkdir(args.out_directory)

    parse_trees(args.directory, args.deltas)
    vlmcs = VLMC.from_json_dir(args.directory)
    metadata = get_metadata_for([vlmc.name for vlmc in vlmcs])

    if args.intersection:
        save_intersection(vlmcs, metadata, args.out_directory)
    else:
        save(vlmcs, metadata, args.out_directory, args.deltas,
             args.occurrence_probability_labels)
Beispiel #6
0
def number_in_rank(metadata, key):
    rank = Counter([v[key] for v in metadata.values()])
    print("{}: size {}\n{}".format(key, len(rank.keys()), rank))


def order_analysis(vlmcs):
    orders = [v.order for v in vlmcs]
    order_counts = Counter(orders)
    min_order = np.min(orders)
    max_order = np.max(orders)
    average_order = np.mean(orders)

    print("Orders: {}, min: {}, max: {}, average: {}".format(
        order_counts, min_order, max_order, average_order))


if __name__ == '__main__':
    tree_dir = '../trees_virus_martin_all_96'
    # tree_dir = '../trees_more_192'
    parse_trees_to_json.parse_trees(tree_dir)
    vlmcs = VLMC.from_json_dir(tree_dir)
    metadata = get_metadata_for([vlmc.name for vlmc in vlmcs])
    # vlmcs = [v for v in vlmcs if metadata[v.name]['genus'] == 'Ebolavirus']
    # metadata = {k: v for k, v in metadata.items() if v['genus'] == 'Ebolavirus'}
    # vlmcs = {metadata[v.name]['species']: v for v in vlmcs}
    # vlmcs = [v for _, v in vlmcs.items()]
    metadata = get_metadata_for([vlmc.name for vlmc in vlmcs])

    number_in_ranks(metadata)
    # order_analysis(vlmcs)
Beispiel #7
0
def get_vlmcs(out_directory, number_of_parameters):
    directory = os.path.join(out_directory, str(number_of_parameters))
    parse_trees(directory)
    return VLMC.from_json_dir(directory)
Beispiel #8
0
def example_distance(tree_dir, image_dir):
  parse_trees_to_json.parse_trees(tree_dir)
  vlmcs = VLMC.from_json_dir(tree_dir)
  d = FrobeniusNorm()
  metadata = {v.name: {n: v.name for n in ['species', 'family', 'genus']} for v in vlmcs}
  test_distance_function_(d, vlmcs, vlmcs, metadata, image_dir)
Beispiel #9
0
def regenerate_example_vlmcs(tree_dir, gen_tree_dir):
  vlmcs = VLMC.from_json_dir(tree_dir)

  sequence_length = 100000
  number_of_parameters = 24
  train(vlmcs, sequence_length, gen_tree_dir, number_of_parameters)
def parse_trees(args):
    tree_dir = args.directory
    parse_trees_to_json.parse_trees(tree_dir)
    return VLMC.from_json_dir(tree_dir)