def _getCombinedGloveFolder(name, gloveSize, visualSize, version=1): if (version == 1): return DatasetManager._getDatasetsFolderPath( ) + "/CombinedGlove-{}/Glove{}-Visual{}/".format( name, gloveSize, visualSize) else: return DatasetManager._getDatasetsFolderPath( ) + "/V{}/CombinedGlove-{}/Glove{}-Visual{}/".format( version, name, gloveSize, visualSize)
def getTopAndSkipCombined(top, skip, version=1): sizes = [(50, 50), (50, 150), (100, 100), (200, 100), (100, 200), (50, 300), (100, 300), (300, 50), (300, 150), (300, 300)] if (version == 1): basePath = DatasetManager._getDatasetsFolderPath() + "/CombinedGlove-Top{}K-Skip{}/".format(top, skip) else: basePath = DatasetManager._getDatasetsFolderPath() + "/V{}/CombinedGlove-Top{}K-Skip{}/".format(version, top, skip) return [basePath + "Glove{}-Visual{}/Keyed-Glove{}-Visual{}".format(g, v, g, v) for g, v in sizes]
def concatToStandardGlove(visualGloveBaseFolder, name, sizeCombinations=None, version=1): if (sizeCombinations == None): sizeCombinations = STANDARD_COMBINATIONS for gloveSize, visualGloveSize in sizeCombinations: gloveFile = DatasetManager._getDatasetsFolderPath( ) + "/StandardGlove/Keyed-Glove{}-Visual0".format(gloveSize) visualGloveFile = visualGloveBaseFolder + "-{}/Keyed-VisualGlove-{}".format( visualGloveSize, visualGloveSize) saveDir = _getCombinedGloveFolder(name, gloveSize, visualGloveSize, version) if (os.path.isdir(saveDir) == False): os.makedirs(saveDir) newFileName = _getCombinedGlovedFilename(name, gloveSize, visualGloveSize, version) GloveFormatter.combineGloveFiles(visualGloveFile, gloveFile, newFileName) keyedFilename = _getCombinedKeyedFilename(name, gloveSize, visualGloveSize, version) GloveFormatter.createKeyedVectorsFromGloveFile(newFileName, keyedFilename)
def main(): datasetPath = DatasetManager._getDatasetsFolderPath() print(datasetPath) sizes = [(50, 50), (50, 150), (100, 100), (100, 200), (200, 100)] for gloveSize, visualGloveSize in sizes: gloveFile = datasetPath + "/StandardGlove/glove.6B.{}d.txt".format( gloveSize) visualGloveFile = datasetPath + "/VisualEmbeddings/Top-100K-{}/VisualGlove-{}.txt".format( visualGloveSize, visualGloveSize) saveDir = "/home/ubuntu/VisualGlove/Datasets/CombinedGlove-Top100K/Glove{}-Visual{}/".format( gloveSize, visualGloveSize) if (os.path.isdir(saveDir) == False): os.makedirs(saveDir) newFileName = saveDir + "CombinedGlove-{}-{}.txt".format( gloveSize, visualGloveSize) GloveFormatter.combineGloveFiles(visualGloveFile, gloveFile, newFileName) keyedFilename = saveDir + "Keyed-Glove{}-Visual{}".format( gloveSize, visualGloveSize) GloveFormatter.createKeyedVectorsFromGloveFile(newFileName, keyedFilename)
def getTop100KCombinedPaths(): sizes = [(50, 50), (50, 150), (100, 100), (200, 100), (100, 200)] folderPath = DatasetManager._getDatasetsFolderPath() + "/CombinedGlove-Top100K/" return [folderPath + "/Glove{}-Visual{}/Keyed-Glove{}-Visual{}".format(g, v, g, v) for g, v in sizes]