Example #1
0
def loadEmbeddings(embeddingsFilePath):
    with open(embeddingsFilePath, 'rb') as embeddingsFile:
        embeddingsCount = binary.readi(embeddingsFile)
        embeddingSize = binary.readi(embeddingsFile)

        embeddings = numpy.empty((embeddingsCount, embeddingSize)).astype('float32')

        for embeddingIndex in range(0, embeddingsCount):
            embedding = binary.readf(embeddingsFile, embeddingSize)
            embeddings[embeddingIndex] = embedding

            log.progress('Loading embeddings: {0:.3f}%.', embeddingIndex + 1, embeddingsCount)

        log.info('Loading embeddings complete. {0} embeddings loaded.', embeddingsCount)

        return embeddings
Example #2
0
def loadW2VParameters(filePath, loadEmbeddings=True):
    with open(filePath, 'rb') as w2vFile:
        firstLine = w2vFile.readline()
        embeddingsCount, embeddingSize = tuple(firstLine.split(' '))
        embeddingsCount, embeddingSize = int(embeddingsCount), int(embeddingSize)
        wordIndexMap = collections.OrderedDict()
        embeddings = numpy.zeros((embeddingsCount, embeddingSize))

        embeddingIndex = 0
        while True:
            word = ''
            while True:
                char = w2vFile.read(1)

                if not char:
                    log.lineBreak()

                    if loadEmbeddings:
                        return wordIndexMap, embeddings
                    else:
                        return wordIndexMap

                if char == ' ':
                    word = word.strip()
                    break

                word += char

            wordIndexMap[word] = len(wordIndexMap)
            if loadEmbeddings:
                embedding = binary.readf(w2vFile, embeddingSize)
                embeddings[wordIndexMap[word]] = embedding
            else:
                w2vFile.seek(embeddingSize * 4, io.SEEK_CUR)

            embeddingIndex += 1
            log.progress('Loading W2V embeddings: {0:.3f}%. {1} embeddings {2} features each.',
                         embeddingIndex,
                         embeddingsCount,
                         embeddingIndex,
                         embeddingSize)