matrix = matrix_pce


    #hierarchical clustering part starts here
    linkage = dendro.compute_linkage(matrix)

    #methods = ['single', 'complete', 'average', 'weighted', 'centroid', 'median', 'ward']
    #for method in methods:
    #    linkage = sch.linkage(matrix, method=method)


    threshold = 0.7*linkage[:,2].max() # default threshold used in sch.dendogram is 0.7*linkage[:,2].max()


    #dendrogram = dendro.compute_dendrogram(linkage)
    dendrogram = dendro.plot_dendrogram_and_matrix(linkage, matrix, color_threshold=threshold)


    #compute flat clustering in the exact same way as sch.dendogram colors the clusters
    cluster = numpy.array(sch.fcluster(linkage, threshold, criterion='distance'), dtype=numpy.int)
    numpy.set_printoptions(threshold=numpy.nan) # make numpy print the full array
    print "flat clustering:\n", cluster

    #get the actual clustering
    filelist = numpy.loadtxt(directory + "/filelist.txt", dtype=numpy.string_)
    true_clustering = ["_".join(s.split("_")[:-1]) for s in filelist]
    true_clusters = sorted(set(true_clustering))
    true_labels = numpy.array([true_clusters.index(id) for id in true_clustering], dtype=numpy.int)
    print "true clustering:\n", true_labels

def get_ground_truth():
    filelist = numpy.loadtxt(directory + "/filelist.txt", dtype=numpy.string_)
    numfiles = filelist.size
    matrix_ans = numpy.zeros([numfiles,numfiles], dtype=numpy.float)
    for i in range(numfiles):
        for j in range(numfiles):
            cam1 = "_".join(filelist[i].split("_")[:-1])
            cam2 = "_".join(filelist[j].split("_")[:-1])
            if cam1 == cam2:
                matrix_ans[i][j] = 1.0
            else:
                matrix_ans[i][j] = 100.0
            if i == j:
                matrix_ans[i][j] = 0.0
    return matrix_ans




#if __name__ == "__main__":

#    import sys
#    if len(sys.argv) != 2:
#        print("Usage: ./camera_identification <name-of-dataset>")
#        exit()
#    import os
#    if not os.path.isdir(directory + "/" + sys.argv[1]):
#        print("incorrect dataset name, cannot find " + directory + "/" + sys.argv[1])
#        exit()

    dataset ="pentax" #sys.argv[1]
    directory = directory + "/" + dataset

    #load the distance matrixes from files
    matrix_pce = numpy.fromfile(directory + "/matrix-" + dataset + "-pce.dat", dtype='>d')
    matrix_pce0 = numpy.fromfile(directory + "/matrix-" + dataset + "-pce0.dat", dtype='>d')
    matrix_ncc = numpy.fromfile(directory + "/matrix-" + dataset + "-ncc.dat", dtype='>d')

    matrix_pce, matrix_ncc = map_ncc_scores_to_pce_domain(matrix_pce, matrix_ncc)
    matrix_ncc = convert_similarity_to_distance(matrix_ncc)
    matrix_pce = convert_similarity_to_distance(matrix_pce)
    matrix_pce0 = convert_similarity_to_distance(matrix_pce0)

    matrix_ans = get_ground_truth()
    plot_distance_matrices(matrix_ncc, matrix_pce, matrix_pce0, matrix_ans)


    #set metric to use for clustering
    matrix = matrix_pce


    #hierarchical clustering part starts here
    linkage = dendro.compute_linkage(matrix)

    #methods = ['single', 'complete', 'average', 'weighted', 'centroid', 'median', 'ward']
    #for method in methods:
    #    linkage = sch.linkage(matrix, method=method)


    threshold = 0.7*linkage[:,2].max() # default threshold used in sch.dendogram is 0.7*linkage[:,2].max()


    #dendrogram = dendro.compute_dendrogram(linkage)
    dendrogram = dendro.plot_dendrogram_and_matrix(linkage, matrix, color_threshold=threshold)


    #compute flat clustering in the exact same way as sch.dendogram colors the clusters
    cluster = numpy.array(sch.fcluster(linkage, threshold, criterion='distance'), dtype=numpy.int)
    numpy.set_printoptions(threshold=numpy.nan) # make numpy print the full array
    print("flat clustering:\n", cluster)

    #get the actual clustering
    filelist = numpy.loadtxt(directory + "/filelist.txt", dtype=numpy.string_)
    true_clustering = ["_".join(s.split("_")[:-1]) for s in filelist]
    true_clusters = sorted(set(true_clustering))
    true_labels = numpy.array([true_clusters.index(id) for id in true_clustering], dtype=numpy.int)
    print("true clustering:\n", true_labels)


    print_metrics(true_labels, cluster)