def buildEmbeddingsTree(indexMap, embeddings, comparator=None):
    embeddingsCount = len(embeddings)
    embeddingIndices = numpy.arange(0, embeddingsCount)
    xy = [xy for xy in itertools.product(embeddingIndices, embeddingIndices)]

    comparator = lambda a, b: vectors.euclideanDistance(a, b) + 1 / (2 + 2*vectors.cosineSimilarity(a, b))

    function = lambda xy: comparator(embeddings[xy[0]], embeddings[xy[1]]) if xy[0] != xy[1] else 0
    comparisons = map(function, xy)
    maxComparison = max(comparisons)
    comparisons = numpy.reshape(comparisons, (embeddingsCount, embeddingsCount)) / maxComparison
    comparisons = ssd.squareform(comparisons)
    links = linkage(comparisons)

    fig, ax = plt.subplots()
    fig.subplots_adjust(right=0.8)

    names = map(lambda nameIndexPair: nameIndexPair[0].split('/')[-1], indexMap.items())
    names = sorted(names)
    dendrogram(
        links,
        leaf_rotation=90.,
        leaf_font_size=8.,
        orientation='right',
        labels=names,
        show_contracted=True,
        show_leaf_counts=True)

    plt.show()
def compareEmbeddings(indexMap, embeddingsList, comparator=None, annotate=False, axisLabels=True):
    embeddingsCount = len(indexMap)
    embeddingIndices = numpy.arange(0, embeddingsCount)

    xy = [xy for xy in itertools.product(embeddingIndices, embeddingIndices)]
    xx, yy = zip(*xy)

    if comparator is None:
        comparator = lambda a, b: vectors.cosineSimilarity(a, b) + 1 / vectors.euclideanDistance(a, b)

    function = lambda xy: comparator(embeddingsList[xy[0]], embeddingsList[xy[1]]) if xy[0] != xy[1] else numpy.nan
    comparisons = map(function, xy)
    comparisons = numpy.reshape(comparisons, (embeddingsCount, embeddingsCount))

    nanxx, nanyy = numpy.where(numpy.isnan(comparisons))
    nanxy = zip(nanxx, nanyy)
    leftx = lambda x: max(x, 0)
    rightx = lambda x: min(x, comparisons.shape[0])
    lefty = lambda y: max(y, 0)
    righty = lambda y: min(y, comparisons.shape[1])
    for x, y in nanxy:
        neighbours = comparisons[leftx(x-1):rightx(x+2),lefty(y-1):righty(y+2)]
        neighbours = neighbours[neighbours > 0]
        comparisons[x,y] = numpy.mean(neighbours)

    fig, ax = plt.subplots()
    fig.subplots_adjust(bottom=0.2)

    if axisLabels:
        filePaths = indexMap.keys()
        fileNames = [os.path.basename(filePath).split('.')[0] for filePath in filePaths]
        indices = [indexMap[filePath] for filePath in filePaths]

        plt.xticks(indices, fileNames, size='small', rotation='vertical')
        plt.yticks(indices, fileNames, size='small')

    plt.contourf(comparisons)

    if annotate:
        for x, y, c in zip(xx, yy, comparisons.flatten()):
            c = '{0:.1f}'.format(c*100)
            plt.annotate(c, (x, y))

    plt.show()