Exemple #1
0
 def hook(module, *unused):
     for name, p in module.named_parameters():
         full_name = parent_name + '.' + name
         if self._use_timeline:
             # End forward computation timeline
             bf.timeline_end_activity(full_name)
         if not module.training:
             continue
         if p.requires_grad:
             if self._pushsum_delay[p] <= 0:
                 raise AssertionError(
                     "Unexpected behavior: forward computation were computed "
                     "more than num_steps_per_communication times before call "
                     "to step(). Adjust num_steps_per_communication to "
                     "accumulate gradients locally.")
             self._pushsum_delay[p] -= 1
             if self._pushsum_delay[p] == 0:
                 ps_weights = self._named_ps_weights[full_name]
                 extended_parameter = torch.cat(
                     (p.data.view(-1), ps_weights), 0)
                 self._named_extension_parameters[
                     name] = extended_parameter
                 handle = bf.win_accumulate_nonblocking(
                     tensor=extended_parameter,
                     name=full_name,
                     dst_weights=self.dst_weights,
                     require_mutex=True)
                 self._handles[p] = handle
Exemple #2
0
 def hook(module, *unused):
     for name, p in module.named_parameters():
         if not module.training:
             continue
         if self._use_timeline:
             # End forward computation timeline
             bf.timeline_end_activity(parent_name + '.' + name)
         if p.requires_grad:
             if self._reduce_delay[p] <= 0:
                 raise AssertionError(
                     "Unexpected behavior: forward computation were computed "
                     "more than num_steps_per_communication times before call "
                     "to step(). Adjust num_steps_per_communication to "
                     "accumulate gradients locally.")
             self._reduce_delay[p] -= 1
             if self._reduce_delay[p] == 0:
                 if self._reduce_method == 0:
                     handle = self._allreduce_data_async(p)
                 elif self._reduce_method == 1:
                     handle = self._neighbor_allreduce_data_async(p)
                 elif self._reduce_method == 2:
                     handle = self._hierarchical_neighbor_allreduce_data_async(
                         p)
                 elif self._reduce_method == -1:
                     handle = None
                 else:
                     raise ValueError(
                         "Unknown reduce method. Do not change _reduce_method manually."
                     )
                 self._handles[p] = handle
Exemple #3
0
    def test_timeline_with_python_interface(self):
        bf.timeline_start_activity("test_python_interface_x", "FAKE_ACTIVITY")
        time.sleep(0.1)
        bf.timeline_end_activity("test_python_interface_x")
        time.sleep(0.1)

        file_name = f"{self.temp_file}{bf.rank()}.json"
        with open(file_name, 'r') as tf:
            timeline_text = tf.read()
            assert 'FAKE_ACTIVITY' in timeline_text, timeline_text
Exemple #4
0
 def hook(module, *unused):
     for name, p in module.named_parameters():
         if self._use_timeline:
             # End forward computation timeline
             bf.timeline_end_activity(parent_name + '.' + name)
         if not module.training:
             continue
         if p.requires_grad:
             if self._bluefog_delay[p] <= 0:
                 raise AssertionError(
                     "Unexpected behavior: forward computation were computed "
                     "more than num_steps_per_communication times before call "
                     "to step(). Adjust num_steps_per_communication to "
                     "accumulate gradients locally.")
             self._bluefog_delay[p] -= 1
             if self._bluefog_delay[p] == 0:
                 handle = bf.win_get_nonblocking(
                     name=parent_name + '.' + name,
                     src_weights=self.src_weights,
                     require_mutex=True)
                 self._handles[p] = handle
Exemple #5
0
 def hook(*ignore):
     bf.timeline_end_activity(name)
Exemple #6
0
 def _timeline_forward_end_hook(module, *unused):
     for name, _ in module.named_parameters():
         full_name = parent_name + '.' + name if parent_name else name
         bf.timeline_end_activity(full_name)
Exemple #7
0
 def f():
     bf.timeline_start_activity("test_multi_thread", "THREAD")
     time.sleep(0.1)
     bf.timeline_end_activity("test_multi_thread")