コード例 #1
0
    def update_vectors(self, sv: SentenceVectors, total_sentences: int):
        """Given existing sentence vectors, append new ones"""
        logger.info(
            f"appending sentence vectors for {total_sentences} sentences")
        sentences_before = len(sv.vectors)
        sentences_after = len(sv.vectors) + total_sentences

        if sv.mapfile_path:
            sv.vectors = np_memmap(
                str(sv.mapfile_path) + ".vectors",
                dtype=REAL,
                mode="r+",
                shape=(sentences_after, sv.vector_size),
            )
            for i in range(sentences_before, sentences_after):
                sv.vectors[i] = full(shape=sv.vector_size,
                                     fill_value=EPS,
                                     dtype=REAL)
        else:
            newvectors = empty((total_sentences, sv.vector_size), dtype=REAL)
            for i in range(total_sentences):
                newvectors[i] = full(shape=sv.vector_size,
                                     fill_value=EPS,
                                     dtype=REAL)
            sv.vectors = vstack([sv.vectors, newvectors])
        sv.vectors_norm = None
コード例 #2
0
    def reset_vectors(self, sv: SentenceVectors, total_sentences: int):
        """Initialize all sentence vectors to zero and overwrite existing files"""
        logger.info(
            f"initializing sentence vectors for {total_sentences} sentences")
        if sv.mapfile_path:
            sv.vectors = np_memmap(str(sv.mapfile_path) + '.vectors',
                                   dtype=REAL,
                                   mode='w+',
                                   shape=(total_sentences, sv.vector_size))
        else:
            sv.vectors = empty((total_sentences, sv.vector_size), dtype=REAL)

        for i in range(total_sentences):
            sv.vectors[i] = zeros(sv.vector_size, dtype=REAL)
        sv.vectors_norm = None
コード例 #3
0
    def test_save_load_with_memmap(self):
        p = Path("fse/test/test_data/test_vectors")
        p_target = Path("fse/test/test_data/test_vectors.vectors")
        p_not_exists = Path("fse/test/test_data/test_vectors.vectors.npy")

        sv = SentenceVectors(2, mapfile_path=str(p))

        shape = (1000, 1000)
        sv.vectors = np.ones(shape, dtype=np.float32)
        
        memvecs = np.memmap(
            p_target, dtype=np.float32,
            mode='w+', shape=shape)
        memvecs[:] = sv.vectors[:]
        del memvecs

        self.assertTrue(p_target.exists())
        sv.save(str(p.absolute()))
        self.assertTrue(p.exists())
        self.assertFalse(p_not_exists.exists())

        sv = SentenceVectors.load(str(p.absolute()))
        self.assertEqual(shape, sv.vectors.shape)

        for t in [p, p_target]:
            t.unlink()