コード例 #1
0
    def test_set_topology_fail_with_win_create(self):
        bf.init()
        size = bf.size()
        if size <= 1:
            fname = inspect.currentframe().f_code.co_name
            warnings.warn("Skip {} due to size 1".format(fname))
            return

        tensor = torch.FloatTensor([1])
        window_name = "win_create_test"
        is_created = bf.win_create(tensor, window_name)
        assert is_created, "bf.win_create do not create window object successfully."

        if size == 1:
            expected_topology = nx.from_numpy_array(np.array([[0.5]]),
                                                    create_using=nx.DiGraph)
        elif size == 2:
            expected_topology = nx.from_numpy_array(np.array([[0, 0.2],
                                                              [0.2, 0]]),
                                                    create_using=nx.DiGraph)
        else:
            expected_topology = RingGraph(size)

        is_set = bf.set_topology(expected_topology)
        assert not is_set, "bf.set_topology do not fail due to win_create."

        topology = bf.load_topology()
        assert isinstance(topology, nx.DiGraph)
        assert IsTopologyEquivalent(topology, ExponentialGraph(size))

        is_freed = bf.win_free()
        assert is_freed, "bf.win_free do not free window object successfully."
コード例 #2
0
 def test_bluefog_rank(self):
     """Test that the rank returned by bf.rank() is correct."""
     true_rank, _ = mpi_env_rank_and_size()
     bf.init()
     rank = bf.rank()
     # print("Rank: ", true_rank, rank)
     assert true_rank == rank
コード例 #3
0
 def test_bluefog_size(self):
     """Test that the size returned by bf.size() is correct."""
     _, true_size = mpi_env_rank_and_size()
     bf.init()
     size = bf.size()
     # print("Size: ", true_size, size)
     assert true_size == size
コード例 #4
0
def hier_setup():
    os.environ['BLUEFOG_NODES_PER_MACHINE'] = '2'
    bf.init()
    assert bf.size() % 2 == 0
    machine_size = int(bf.size() // 2)
    bf.set_machine_topology(bf.ExponentialGraph(machine_size))
    return bf.rank(), bf.size(), bf.local_rank(), bf.local_size()
コード例 #5
0
    def test_in_out_neighbors_expo2(self):
        bf.init()
        rank = bf.rank()
        size = bf.size()
        assert bf.set_topology(ExponentialGraph(size))
        in_neighobrs = bf.in_neighbor_ranks()
        out_neighbors = bf.out_neighbor_ranks()

        degree = int(np.ceil(np.log2(size)))
        expected_in_neighbors = sorted([(rank - 2**i) % size
                                        for i in range(degree)])
        expected_out_neighbors = sorted([(rank + 2**i) % size
                                         for i in range(degree)])
        assert sorted(in_neighobrs) == expected_in_neighbors
        assert sorted(out_neighbors) == expected_out_neighbors
コード例 #6
0
 def test_set_and_load_topology(self):
     bf.init()
     size = bf.size()
     if size == 4:
         expected_topology = nx.DiGraph(
             np.array([[1 / 3., 1 / 3., 1 / 3., 0.],
                       [0., 1 / 3., 1 / 3., 1 / 3.],
                       [1 / 3., 0., 1 / 3., 1 / 3.],
                       [1 / 3., 1 / 3., 0., 1 / 3.]]))
     elif size == 1:
         expected_topology = nx.DiGraph(np.array([[1.0]]))
     else:
         expected_topology = ExponentialGraph(size)
     topology = bf.load_topology()
     assert isinstance(topology, nx.DiGraph)
     assert IsTopologyEquivalent(expected_topology, topology)
コード例 #7
0
def test_infer_source_from_destination_ranks(topo_func):
    bf.init()
    size = bf.size()
    bf.set_topology(topo_func(size))
    topo = bf.load_topology()
    in_neighbors = bf.in_neighbor_ranks()
    out_neighbors = bf.out_neighbor_ranks()

    # Make the W into average rule.
    expected_W = (nx.to_numpy_array(topo) > 0).astype(float)
    expected_W /= expected_W.sum(axis=0)

    dst_ranks, W = InferSourceFromDestinationRanks(
        dst_ranks=out_neighbors, construct_adjacency_matrix=True)
    assert sorted(dst_ranks) == in_neighbors
    np.testing.assert_allclose(W, expected_W)
コード例 #8
0
    def test_in_out_neighbors_biring(self):
        bf.init()
        rank = bf.rank()
        size = bf.size()
        assert bf.set_topology(RingGraph(size))
        in_neighobrs = bf.in_neighbor_ranks()
        out_neighbors = bf.out_neighbor_ranks()

        expected_in_neighbors = list(
            set(map(lambda x: x % size, [rank - 1, rank + 1])))
        expected_out_neighbors = list(
            set(map(lambda x: x % size, [rank - 1, rank + 1])))

        if size <= 1:
            expected_in_neighbors = []
            expected_out_neighbors = []

        assert sorted(in_neighobrs) == expected_in_neighbors
        assert sorted(out_neighbors) == expected_out_neighbors
コード例 #9
0
def problem_setup(net=LinearNet):
    bf.init()
    num_epochs = 50
    batch_size = 128
    num_train_per_node = 1024
    num_test_per_node = 128
    lr = 0.01

    # Setup Problem
    problem_builder = LinearProblemBuilder()
    train_dataset = problem_builder.get_dataset(num_train_per_node)
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
    test_dataset = problem_builder.get_dataset(num_test_per_node)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
    # Setup Model
    model = net(problem_builder.input_dim, problem_builder.output_dim)
    assert (
        num_train_per_node*bf.size() >= model.num_parameters
    ), "The number of samples is too small making it an underdetermined system."
    # Setup Optimizer
    optimizer = optim.Adam(model.parameters(), lr=lr*bf.size())
    bf.broadcast_parameters(model.state_dict(), root_rank=0)
    bf.broadcast_optimizer_state(optimizer, root_rank=0)
    return problem_builder, train_dataloader, test_dataloader, model, optimizer, num_epochs
コード例 #10
0
                          'per iteration dynamically.'))
parser.add_argument("--plot-interactive",
                    action='store_true',
                    help="Use plt.show() to present the plot.")
parser.add_argument("--save-plot-file",
                    default='average_consensus_plot.png',
                    help="Saving the plot in the file.")
parser.add_argument('--seed',
                    type=int,
                    default=2020,
                    help='Seed for randomness.')

args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

bf.init()

torch.random.manual_seed(args.seed * bf.rank())
if args.cuda:
    device = bf.local_rank() % torch.cuda.device_count()
    x = torch.randn(args.data_size, device=device, dtype=torch.double)
else:
    x = torch.randn(args.data_size, dtype=torch.double)

if args.virtual_topology == "expo2":
    pass
elif args.virtual_topology == "expo3":
    bf.set_topology(topology_util.ExponentialGraph(bf.size(), base=3))
elif args.virtual_topology == "expo4":
    bf.set_topology(topology_util.ExponentialGraph(bf.size(), base=4))
elif args.virtual_topology == "ring":
コード例 #11
0
 def setUp(self):
     # Unfortunately, MPICH implementation have problem on running win ops
     # with negotiate stage as well.
     bf.init()
     bf.set_skip_negotiate_stage(True)
コード例 #12
0
    def setUpClass(cls):
        cls.temp_file = './timeline_temp'

        with env(BLUEFOG_TIMELINE=cls.temp_file):
            bf.init()