def main():
    from Datasets import DatasetManager

    embeddingPath = DatasetManager.getVisualEmbeddingsFullSizeFolderPath() + "VisualGlove-Full.txt"
    embeddings = DatasetManager.getWordsAndEmbeddingsFromFile(embeddingPath)
    FileProcessing.saveToFile(embeddings, 'tempfullEmbeddings')

    pureEmbeddings = [embeddings[w] for w in embeddings.keys()]
    print("Getting TSNE embeddings")
    reducedEmbeddings = getTsneEmbeddings(pureEmbeddings)

    print("Saving to file...")
    FileProcessing.saveToFile(reducedEmbeddings, 'tempEmbeddings')

    labels = [k for k in embeddings.keys()]
    visualizeEmbeddings(reducedEmbeddings, labels)
예제 #2
0
def concatenateEmbeddingsFiles(folderPath, newFileName):
    embeddedWords = {}
    with open(newFileName, 'w', encoding='utf-8') as newFile:
        for filePath in [
                p for p in os.listdir(folderPath)
                if os.path.isdir(folderPath + "/" + p) == False
        ]:
            print(filePath)
            localEmbeddings = DatasetManager.getWordsAndEmbeddingsFromFile(
                folderPath + "/" + filePath, asStr=True)
            for i, w in enumerate(localEmbeddings):
                if (w not in embeddedWords):
                    embeddedWords[w] = 1
                    newFile.write("{} {}\n".format(
                        w,
                        _embeddingsToString(localEmbeddings[w],
                                            strEmbeddings=True)))
            print("Processed lines:", len(embeddedWords.keys()))