def test_fs_detection_on_container_hdfs(self): # Create a container for testing zip_file_name = "test" zip_file_path = zip_file_name + ".zip" # TODO(tianqi): add functionality ot chainerio from pyarrow import hdfs conn = hdfs.connect() hdfs_home = conn.info('.')['path'] conn.close() hdfs_file_path = os.path.join(hdfs_home, zip_file_path) shutil.make_archive(zip_file_name, "zip", base_dir=self.dir_name) with chainerio.open(hdfs_file_path, "wb") as hdfs_file: with chainerio.open(zip_file_path, "rb") as posix_file: hdfs_file.write(posix_file.read()) with chainerio.open_as_container(hdfs_file_path) as container: with container.open(self.tmpfile_path, "r") as f: self.assertEqual( f.read(), self.test_string_str) chainerio.remove(zip_file_path) chainerio.remove(hdfs_file_path)
def save(self, step, f_id, files): # Only save the model it-self model_to_save = self.model.module if hasattr(self.model, 'module') \ else self.model another_model_to_save = self.another_model.module \ if hasattr(self.another_model, 'module') \ else self.another_model if self.args.resume_step < 0 or not self.args.phase2: output_save_file = os.path.join( self.args.output_dir, "ckpt_{}.pt.{}".format(step, self.team)) else: output_save_file = os.path.join( self.args.output_dir, "ckpt_{}.pt.{}".format( step + self.args.phase1_end_step, self.team) ) with chio.open(output_save_file, "wb") as f: torch.save({ 'model': model_to_save.state_dict(), 'another_model': another_model_to_save.state_dict(), 'optimizer': self.optimizer.state_dict(), 'master params': list(amp.master_params(self.optimizer)), 'files': [f_id] + files }, f) self.most_recent_ckpts_paths.append(output_save_file) if len(self.most_recent_ckpts_paths) > 3: ckpt_to_be_removed = self.most_recent_ckpts_paths.pop(0) chio.remove("{}".format(ckpt_to_be_removed))
def test_open_as_container(self): # Create a container for testing chainerio.set_root("posix") zip_file_name = "test" zip_file_path = zip_file_name + ".zip" shutil.make_archive(zip_file_name, "zip", base_dir=self.dir_name) with chainerio.open_as_container(zip_file_path) as container: file_generator = container.list() file_list = list(file_generator) self.assertIn(self.dir_name[:-1], file_list) self.assertNotIn(self.tmpfile_path, file_list) self.assertNotIn("", file_list) file_generator = container.list(self.dir_name) file_list = list(file_generator) self.assertNotIn(self.dir_name[:-1], file_list) self.assertIn(os.path.basename(self.tmpfile_path), file_list) self.assertNotIn("", file_list) self.assertTrue(container.isdir(self.dir_name)) self.assertFalse(container.isdir(self.tmpfile_path)) self.assertIsInstance(container.info(), str) with container.open(self.tmpfile_path, "r") as f: self.assertEqual( f.read(), self.test_string_str) chainerio.remove(zip_file_path)
def test_remove(self): test_file = "test_remove.txt" test_dir = "test_dir/" nested_dir = os.path.join(test_dir, "nested_file/") nested_file = os.path.join(nested_dir, test_file) with chainerio.open(test_file, 'w') as fp: fp.write('foobar') # test remove on one file self.assertTrue(chainerio.exists(test_file)) chainerio.remove(test_file) self.assertFalse(chainerio.exists(test_file)) # test remove on directory chainerio.makedirs(nested_dir) with chainerio.open(nested_file, 'w') as fp: fp.write('foobar') self.assertTrue(chainerio.exists(test_dir)) self.assertTrue(chainerio.exists(nested_dir)) self.assertTrue(chainerio.exists(nested_file)) chainerio.remove(test_dir, True) self.assertFalse(chainerio.exists(test_dir)) self.assertFalse(chainerio.exists(nested_dir)) self.assertFalse(chainerio.exists(nested_file))
def test_makedirs(self): new_tmp_dir = "testmakedirs/" nested_dir = new_tmp_dir + "test_nest_dir" chainerio.makedirs("file://" + nested_dir) self.assertTrue(os.path.isdir(nested_dir)) chainerio.remove(new_tmp_dir, True)
def test_open_as_container(self): # Create a container for testing chainerio.set_root("posix") zip_file_name = "test" zip_file_path = zip_file_name + ".zip" # in the zip, the leading slash will be removed # TODO(tianqi): related to issue #61 dirname_zip = self.tmpdir.name.lstrip('/') + '/' file_name_zip = self.tmpfile_path.lstrip('/') first_level_dir = dirname_zip.split('/')[0] shutil.make_archive(zip_file_name, "zip", base_dir=self.tmpdir.name) with chainerio.open_as_container(zip_file_path) as container: file_generator = container.list() file_list = list(file_generator) self.assertIn(first_level_dir, file_list) self.assertNotIn(file_name_zip, file_list) self.assertNotIn("", file_list) file_generator = container.list(dirname_zip) file_list = list(file_generator) self.assertNotIn(first_level_dir, file_list) self.assertIn(os.path.basename(file_name_zip), file_list) self.assertNotIn("", file_list) self.assertTrue(container.isdir(dirname_zip)) self.assertFalse(container.isdir(file_name_zip)) self.assertIsInstance(container.info(), str) with container.open(file_name_zip, "r") as f: self.assertEqual(f.read(), self.test_string_str) chainerio.remove(zip_file_path)
def test_fs_detection_on_container_posix(self): # Create a container for testing zip_file_name = "test" zip_file_path = zip_file_name + ".zip" posix_file_path = "file://" + zip_file_path shutil.make_archive(zip_file_name, "zip", base_dir=self.dir_name) with chainerio.open_as_container(posix_file_path) as container: with container.open(self.tmpfile_path, "r") as f: self.assertEqual( f.read(), self.test_string_str) chainerio.remove(zip_file_path)
def test_fs_detection_on_container_posix(self): # Create a container for testing zip_file_name = "test" zip_file_path = zip_file_name + ".zip" posix_file_path = "file://" + zip_file_path # in the zip, the leading slash will be removed file_name_zip = self.tmpfile_path.lstrip('/') shutil.make_archive(zip_file_name, "zip", base_dir=self.tmpdir.name) with chainerio.open_as_container(posix_file_path) as container: with container.open(file_name_zip, "r") as f: self.assertEqual(f.read(), self.test_string_str) chainerio.remove(zip_file_path)
def test_rename(self): new_tmp_dir = "testmkdir/" chainerio.makedirs("file://" + new_tmp_dir) src = os.path.join("file://", new_tmp_dir, 'src') dst = os.path.join("file://", new_tmp_dir, 'dst') with chainerio.open(src, 'w') as fp: fp.write('foobar') chainerio.rename(src, dst) with chainerio.open(dst, 'r') as fp: data = fp.read() assert data == 'foobar' assert not chainerio.exists(src) assert chainerio.exists(dst) chainerio.remove(new_tmp_dir, True)
def tearDown(self): self.tmpdir.cleanup() chainerio.remove(self.zip_file_path)
def tearDown(self): chainerio.remove(self.dir_name, True)
def test_mkdir(self): new_tmp_dir = "testmkdir/" chainerio.mkdir("file://" + new_tmp_dir) self.assertTrue(os.path.isdir(new_tmp_dir)) chainerio.remove(new_tmp_dir, True)