def update_vectors(self, sv: SentenceVectors, total_sentences: int): """Given existing sentence vectors, append new ones""" logger.info( f"appending sentence vectors for {total_sentences} sentences") sentences_before = len(sv.vectors) sentences_after = len(sv.vectors) + total_sentences if sv.mapfile_path: sv.vectors = np_memmap( str(sv.mapfile_path) + ".vectors", dtype=REAL, mode="r+", shape=(sentences_after, sv.vector_size), ) for i in range(sentences_before, sentences_after): sv.vectors[i] = full(shape=sv.vector_size, fill_value=EPS, dtype=REAL) else: newvectors = empty((total_sentences, sv.vector_size), dtype=REAL) for i in range(total_sentences): newvectors[i] = full(shape=sv.vector_size, fill_value=EPS, dtype=REAL) sv.vectors = vstack([sv.vectors, newvectors]) sv.vectors_norm = None
def reset_vectors(self, sv: SentenceVectors, total_sentences: int): """Initialize all sentence vectors to zero and overwrite existing files""" logger.info( f"initializing sentence vectors for {total_sentences} sentences") if sv.mapfile_path: sv.vectors = np_memmap(str(sv.mapfile_path) + '.vectors', dtype=REAL, mode='w+', shape=(total_sentences, sv.vector_size)) else: sv.vectors = empty((total_sentences, sv.vector_size), dtype=REAL) for i in range(total_sentences): sv.vectors[i] = zeros(sv.vector_size, dtype=REAL) sv.vectors_norm = None
def test_save_load_with_memmap(self): p = Path("fse/test/test_data/test_vectors") p_target = Path("fse/test/test_data/test_vectors.vectors") p_not_exists = Path("fse/test/test_data/test_vectors.vectors.npy") sv = SentenceVectors(2, mapfile_path=str(p)) shape = (1000, 1000) sv.vectors = np.ones(shape, dtype=np.float32) memvecs = np.memmap( p_target, dtype=np.float32, mode='w+', shape=shape) memvecs[:] = sv.vectors[:] del memvecs self.assertTrue(p_target.exists()) sv.save(str(p.absolute())) self.assertTrue(p.exists()) self.assertFalse(p_not_exists.exists()) sv = SentenceVectors.load(str(p.absolute())) self.assertEqual(shape, sv.vectors.shape) for t in [p, p_target]: t.unlink()