Beispiel #1
0
def visualize(cnt, model, rundir, idxs, str):
    """
    Visualize a set of examples using t-SNE.
    """
    from vocabulary import wordmap, wordform
    PERPLEXITY = 30

    idxs = [id % model.parameters.embeddings.shape[0] for id in idxs]
    x = model.parameters.embeddings[idxs]
    print x.shape
    #titles = [`wordmap().str(id)` for id in idxs]
    titles = [wordform(id) for id in idxs]

    import os.path
    filename = os.path.join(
        rundir, "embeddings.model-%s.-%s-%d.png" % (model.modelname, str, cnt))
    try:
        from textSNE.calc_tsne import tsne
        #       from textSNE.tsne import tsne
        out = tsne(x, perplexity=PERPLEXITY)
        from textSNE.render import render
        render([(title, point[0], point[1])
                for title, point in zip(titles, out)], filename)
    except IOError:
        logging.info("ERROR visualizing", filename, ". Continuing...")
def visualize(cnt, model, rundir, idxs, str):
    """
    Visualize a set of examples using t-SNE.
    """
    from vocabulary import wordmap, wordform
    PERPLEXITY=30

    idxs = [id % model.parameters.embeddings.shape[0] for id in idxs]
    x = model.parameters.embeddings[idxs]
    print x.shape
    #titles = [`wordmap().str(id)` for id in idxs]
    titles = [wordform(id) for id in idxs]

    import os.path
    filename = os.path.join(rundir, "embeddings.model-%s.-%s-%d.png" % (model.modelname, str, cnt))
    try:
        from textSNE.calc_tsne import tsne
#       from textSNE.tsne import tsne
        out = tsne(x, perplexity=PERPLEXITY)
        from textSNE.render import render
        render([(title, point[0], point[1]) for title, point in zip(titles, out)], filename)
    except IOError:
        logging.info("ERROR visualizing", filename, ". Continuing...")
    from targetvocabulary import targetmap

    for w1 in wordmap().all:
        w1 = wordmap().id(w1)
        # Actually, should assert W2W SKIP TRANSLATIONS FROM UNKNOWN WORD
        assert HYPERPARAMETERS["W2W SKIP TRANSLATIONS TO UNKNOWN WORD"]
        if language(w1) is None:
            print >> sys.stderr, "Skipping %s" % `wordmap().str(w1)`
            continue
        if w1 not in targetmap():
            print >> sys.stderr, "Skipping %s, not a source word in targetmap" % `wordmap().str(w1)`
            continue
        for l2 in targetmap()[w1]:
            totcnt = 0
            for cnt, w2 in dictsort(targetmap()[w1][l2]): totcnt += cnt
            print wordmap().str(w1), l2, [(percent(cnt, totcnt), wordform(w2)) for cnt, w2 in dictsort(targetmap()[w1][l2])]

    print >> sys.stderr, "REVERSE MAP NOW"

    for w1 in wordmap().all:
        w1 = wordmap().id(w1)
        # Actually, should assert W2W SKIP TRANSLATIONS FROM UNKNOWN WORD
        assert HYPERPARAMETERS["W2W SKIP TRANSLATIONS TO UNKNOWN WORD"]
        if language(w1) is None:
            print >> sys.stderr, "Skipping %s" % `wordmap().str(w1)`
            continue
        if w1 not in targetmap(name="reverse"):
            print >> sys.stderr, "Skipping %s, not a source word in targetmap" % `wordmap().str(w1)`
            continue
        for l2 in targetmap(name="reverse")[w1]:
            totcnt = 0
        w1 = wordmap().id(w1)
        # Actually, should assert W2W SKIP TRANSLATIONS FROM UNKNOWN WORD
        assert HYPERPARAMETERS["W2W SKIP TRANSLATIONS TO UNKNOWN WORD"]
        if language(w1) is None:
            print >> sys.stderr, "Skipping %s" % ` wordmap().str(w1) `
            continue
        if w1 not in targetmap():
            print >> sys.stderr, "Skipping %s, not a source word in targetmap" % ` wordmap(
            ).str(w1) `
            continue
        for l2 in targetmap()[w1]:
            totcnt = 0
            for cnt, w2 in dictsort(targetmap()[w1][l2]):
                totcnt += cnt
            print wordmap().str(w1), l2, [
                (percent(cnt, totcnt), wordform(w2))
                for cnt, w2 in dictsort(targetmap()[w1][l2])
            ]

    print >> sys.stderr, "REVERSE MAP NOW"

    for w1 in wordmap().all:
        w1 = wordmap().id(w1)
        # Actually, should assert W2W SKIP TRANSLATIONS FROM UNKNOWN WORD
        assert HYPERPARAMETERS["W2W SKIP TRANSLATIONS TO UNKNOWN WORD"]
        if language(w1) is None:
            print >> sys.stderr, "Skipping %s" % ` wordmap().str(w1) `
            continue
        if w1 not in targetmap(name="reverse"):
            print >> sys.stderr, "Skipping %s, not a source word in targetmap" % ` wordmap(
            ).str(w1) `