def test_link(self): hfs = HashFS(self.tmp_dir) key = hfs.put(self.test_dir / 'data/think-hires.jpg') self.assertRaises(FileNotFoundError, lambda: hfs.link(key, 'data/think.jpg', True))
def test_get_simple(self): original_file = self.test_dir / 'data/think-hires.jpg' dst_file = self.tmp_dir / 'think-hires.jpg' hfs = HashFS(self.tmp_dir, blocksize=1024 * 1024) objkey = hfs.put(original_file) hfs.get(objkey, dst_file) self.assertEqual(self.md5sum(original_file), self.md5sum(dst_file))
def test_update_log(self): original_file = 'data/think-hires.jpg' hfs = HashFS(self.tmp_dir, blocksize=1024 * 1024) store_log = os.path.join(self.tmp_dir, 'hashfs', 'log', 'store.log') open(store_log, 'a').close() hfs.update_log([original_file]) with open(store_log, 'r') as log_file: self.assertIn(original_file, log_file.read())
def test_put1024K_pathexistence_level2(self): hfs = HashFS(self.tmp_dir) hfs.put(self.test_dir / 'data/think-hires.jpg') m = hashlib.md5() m.update('think-hires.jpg'.encode()) h = m.hexdigest() fullpath = os.path.join(self.tmp_dir, 'hashfs', h[:2], h[2:4], 'think-hires.jpg') self.assertTrue(os.path.exists(fullpath))
def test_put1024K_toomany_levels(self): hfs = HashFS(self.tmp_dir, levels=17) hfs.put(self.test_dir / 'data/think-hires.jpg') m = hashlib.md5() m.update('think-hires.jpg'.encode()) m.hexdigest() fullpath = os.path.join(self.tmp_dir, 'hashfs', '58', '41', 'de', '64', 'a3', '7b', 'a5', 'af', '5c', 'f3', '19', 'd6', '50', 'c1', '4a', 'b3', 'think-hires.jpg') self.assertTrue(os.path.exists(fullpath))
def test_remove_hash(self): idx = MultihashIndex('dataset-spec', self.tmp_dir, self.tmp_dir) data = str(self.test_dir / 'data') idx.add(data, '') idx.add(str(self.test_dir / 'data2'), '') hfs = HashFS(self.tmp_dir, blocksize=1024 * 1024) o = Objects('dataset-spec', self.tmp_dir) o.commit_index(self.tmp_dir, data) for h in hash_list: with open(os.path.join(self.tmp_dir, 'hashfs', 'log', STORAGE_LOG)) as f: self.assertTrue(h in f.read()) for h in hash_list: hfs.remove_hash(h) for h in hash_list: with open(os.path.join(self.tmp_dir, 'hashfs', 'log', STORAGE_LOG)) as f: self.assertFalse(h in f.read())
def test_reset_log(self): hfs = HashFS(self.tmp_dir, blocksize=1024 * 1024) store_log = os.path.join(self.tmp_dir, 'hashfs', 'log', 'store.log') open(store_log, 'a').close() hfs.reset_log() self.assertFalse(os.path.exists(store_log))
def test_put(self): hfs = HashFS(self.tmp_dir) hfs.put(self.test_dir / 'data/think-hires.jpg') for files in hfs.walk(): for file in files: self.assertTrue(file in hfsfiles)