Exemple #1
0
    def test_remove(self):
        test_file = "test_remove.txt"
        test_dir = "test_dir/"
        nested_dir = os.path.join(test_dir, "nested_file/")
        nested_file = os.path.join(nested_dir, test_file)

        with chainerio.open(test_file, 'w') as fp:
            fp.write('foobar')

        # test remove on one file
        self.assertTrue(chainerio.exists(test_file))
        chainerio.remove(test_file)
        self.assertFalse(chainerio.exists(test_file))

        # test remove on directory
        chainerio.makedirs(nested_dir)
        with chainerio.open(nested_file, 'w') as fp:
            fp.write('foobar')

        self.assertTrue(chainerio.exists(test_dir))
        self.assertTrue(chainerio.exists(nested_dir))
        self.assertTrue(chainerio.exists(nested_file))

        chainerio.remove(test_dir, True)

        self.assertFalse(chainerio.exists(test_dir))
        self.assertFalse(chainerio.exists(nested_dir))
        self.assertFalse(chainerio.exists(nested_file))
Exemple #2
0
    def test_makedirs(self):
        new_tmp_dir = "testmakedirs/"
        nested_dir = new_tmp_dir + "test_nest_dir"

        chainerio.makedirs("file://" + nested_dir)
        self.assertTrue(os.path.isdir(nested_dir))
        chainerio.remove(new_tmp_dir, True)
Exemple #3
0
    def test_rename(self):
        new_tmp_dir = "testmkdir/"
        chainerio.makedirs("file://" + new_tmp_dir)

        src = os.path.join("file://", new_tmp_dir, 'src')
        dst = os.path.join("file://", new_tmp_dir, 'dst')
        with chainerio.open(src, 'w') as fp:
            fp.write('foobar')

        chainerio.rename(src, dst)
        with chainerio.open(dst, 'r') as fp:
            data = fp.read()
            assert data == 'foobar'

        assert not chainerio.exists(src)
        assert chainerio.exists(dst)
        chainerio.remove(new_tmp_dir, True)
Exemple #4
0
    def test_list(self):
        nested_dir_name1 = "nested_dir1"
        nested_dir_name2 = "nested_dir2"

        nested_dir_path1 = os.path.join(self.tmpdir.name, nested_dir_name1)
        nested_dir_path2 = os.path.join(nested_dir_path1, nested_dir_name2)
        nested_dir_path2_relative = os.path.join(nested_dir_name1,
                                                 nested_dir_name2)
        chainerio.makedirs(nested_dir_path1)
        chainerio.makedirs(nested_dir_path2)

        file_list = list(chainerio.list(self.tmpdir.name))
        self.assertIn(nested_dir_name1, file_list)
        self.assertIn(self.tmpfile_name, file_list)
        self.assertNotIn(nested_dir_path2_relative, file_list)

        file_list = list(chainerio.list(self.tmpdir.name, recursive=True))
        self.assertIn(nested_dir_name1, file_list)
        self.assertIn(self.tmpfile_name, file_list)
        self.assertIn(nested_dir_path2_relative, file_list)
Exemple #5
0
    def setup_training(self):
        assert (torch.cuda.is_available())

        torch.cuda.set_device(self.args.local_rank)
        self.device = torch.device("cuda", self.args.local_rank)
        # Initializes the distributed backend which will take care of
        # sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')

        self.rank = torch.distributed.get_rank()
        self.size = torch.distributed.get_world_size()
        if self.args.online_distillation == "none":
            self.team = 0
            self.team_masters = [0]
            self.team_master = 0
            self.local_group = torch.distributed.new_group(
                ranks=list(range(0, self.size)))
            self.team_rank = torch.distributed.get_rank()
            self.team_size = torch.distributed.get_world_size()
        else:
            assert self.size % 2 == 0, \
                'with distillation, world size must be a multiple of 2'
            self.team = self.rank // (self.size // 2)
            self.team_masters = [0, (self.size // 2)]
            self.team_master = self.team_masters[self.team]
            self.is_team_master = (self.rank % (self.size // 2) == 0)
            local_group0 = torch.distributed.new_group(
                ranks=list(range(0, self.size // 2)))
            local_group1 = torch.distributed.new_group(
                ranks=list(range(self.size // 2, self.size)))
            self.local_groups = [local_group0, local_group1]
            self.local_group = self.local_groups[self.team]

            self.team_rank = self.rank % (self.size // 2)
            self.team_size = self.size // 2

            comm_model_group_rank0 = \
                [0] + list(range(self.team_size, self.team_size * 2))
            comm_model_group_rank1 = \
                [self.team_size] + list(range(0, self.team_size))
            self.comm_model_group_ranks = [
                comm_model_group_rank0, comm_model_group_rank1
            ]

            if self.args.online_distillation == "logit":
                for i in range(0, self.size // 2):
                    ranks = [i, i + self.size // 2]
                    grp = torch.distributed.new_group(ranks=ranks)
                    if self.rank in ranks:
                        self.equalize_data_group = grp
                # use different seeds in different teams
                self.args.data_seed = 12345
                self.args.seed += self.team * 12345
            else:
                # use different seeds in different teams
                self.args.seed += self.team * 12345

        self.args.train_batch_size //= self.team_size

        if not self.args.resume_from_checkpoint:
            chio.makedirs(self.args.output_dir, exist_ok=True)