def main(): from Datasets import DatasetManager embeddingPath = DatasetManager.getVisualEmbeddingsFullSizeFolderPath() + "VisualGlove-Full.txt" embeddings = DatasetManager.getWordsAndEmbeddingsFromFile(embeddingPath) FileProcessing.saveToFile(embeddings, 'tempfullEmbeddings') pureEmbeddings = [embeddings[w] for w in embeddings.keys()] print("Getting TSNE embeddings") reducedEmbeddings = getTsneEmbeddings(pureEmbeddings) print("Saving to file...") FileProcessing.saveToFile(reducedEmbeddings, 'tempEmbeddings') labels = [k for k in embeddings.keys()] visualizeEmbeddings(reducedEmbeddings, labels)
def concatenateEmbeddingsFiles(folderPath, newFileName): embeddedWords = {} with open(newFileName, 'w', encoding='utf-8') as newFile: for filePath in [ p for p in os.listdir(folderPath) if os.path.isdir(folderPath + "/" + p) == False ]: print(filePath) localEmbeddings = DatasetManager.getWordsAndEmbeddingsFromFile( folderPath + "/" + filePath, asStr=True) for i, w in enumerate(localEmbeddings): if (w not in embeddedWords): embeddedWords[w] = 1 newFile.write("{} {}\n".format( w, _embeddingsToString(localEmbeddings[w], strEmbeddings=True))) print("Processed lines:", len(embeddedWords.keys()))