def test_remove(self): test_file = "test_remove.txt" test_dir = "test_dir/" nested_dir = os.path.join(test_dir, "nested_file/") nested_file = os.path.join(nested_dir, test_file) with chainerio.open(test_file, 'w') as fp: fp.write('foobar') # test remove on one file self.assertTrue(chainerio.exists(test_file)) chainerio.remove(test_file) self.assertFalse(chainerio.exists(test_file)) # test remove on directory chainerio.makedirs(nested_dir) with chainerio.open(nested_file, 'w') as fp: fp.write('foobar') self.assertTrue(chainerio.exists(test_dir)) self.assertTrue(chainerio.exists(nested_dir)) self.assertTrue(chainerio.exists(nested_file)) chainerio.remove(test_dir, True) self.assertFalse(chainerio.exists(test_dir)) self.assertFalse(chainerio.exists(nested_dir)) self.assertFalse(chainerio.exists(nested_file))
def test_makedirs(self): new_tmp_dir = "testmakedirs/" nested_dir = new_tmp_dir + "test_nest_dir" chainerio.makedirs("file://" + nested_dir) self.assertTrue(os.path.isdir(nested_dir)) chainerio.remove(new_tmp_dir, True)
def test_rename(self): new_tmp_dir = "testmkdir/" chainerio.makedirs("file://" + new_tmp_dir) src = os.path.join("file://", new_tmp_dir, 'src') dst = os.path.join("file://", new_tmp_dir, 'dst') with chainerio.open(src, 'w') as fp: fp.write('foobar') chainerio.rename(src, dst) with chainerio.open(dst, 'r') as fp: data = fp.read() assert data == 'foobar' assert not chainerio.exists(src) assert chainerio.exists(dst) chainerio.remove(new_tmp_dir, True)
def test_list(self): nested_dir_name1 = "nested_dir1" nested_dir_name2 = "nested_dir2" nested_dir_path1 = os.path.join(self.tmpdir.name, nested_dir_name1) nested_dir_path2 = os.path.join(nested_dir_path1, nested_dir_name2) nested_dir_path2_relative = os.path.join(nested_dir_name1, nested_dir_name2) chainerio.makedirs(nested_dir_path1) chainerio.makedirs(nested_dir_path2) file_list = list(chainerio.list(self.tmpdir.name)) self.assertIn(nested_dir_name1, file_list) self.assertIn(self.tmpfile_name, file_list) self.assertNotIn(nested_dir_path2_relative, file_list) file_list = list(chainerio.list(self.tmpdir.name, recursive=True)) self.assertIn(nested_dir_name1, file_list) self.assertIn(self.tmpfile_name, file_list) self.assertIn(nested_dir_path2_relative, file_list)
def setup_training(self): assert (torch.cuda.is_available()) torch.cuda.set_device(self.args.local_rank) self.device = torch.device("cuda", self.args.local_rank) # Initializes the distributed backend which will take care of # sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl', init_method='env://') self.rank = torch.distributed.get_rank() self.size = torch.distributed.get_world_size() if self.args.online_distillation == "none": self.team = 0 self.team_masters = [0] self.team_master = 0 self.local_group = torch.distributed.new_group( ranks=list(range(0, self.size))) self.team_rank = torch.distributed.get_rank() self.team_size = torch.distributed.get_world_size() else: assert self.size % 2 == 0, \ 'with distillation, world size must be a multiple of 2' self.team = self.rank // (self.size // 2) self.team_masters = [0, (self.size // 2)] self.team_master = self.team_masters[self.team] self.is_team_master = (self.rank % (self.size // 2) == 0) local_group0 = torch.distributed.new_group( ranks=list(range(0, self.size // 2))) local_group1 = torch.distributed.new_group( ranks=list(range(self.size // 2, self.size))) self.local_groups = [local_group0, local_group1] self.local_group = self.local_groups[self.team] self.team_rank = self.rank % (self.size // 2) self.team_size = self.size // 2 comm_model_group_rank0 = \ [0] + list(range(self.team_size, self.team_size * 2)) comm_model_group_rank1 = \ [self.team_size] + list(range(0, self.team_size)) self.comm_model_group_ranks = [ comm_model_group_rank0, comm_model_group_rank1 ] if self.args.online_distillation == "logit": for i in range(0, self.size // 2): ranks = [i, i + self.size // 2] grp = torch.distributed.new_group(ranks=ranks) if self.rank in ranks: self.equalize_data_group = grp # use different seeds in different teams self.args.data_seed = 12345 self.args.seed += self.team * 12345 else: # use different seeds in different teams self.args.seed += self.team * 12345 self.args.train_batch_size //= self.team_size if not self.args.resume_from_checkpoint: chio.makedirs(self.args.output_dir, exist_ok=True)