# First Party
import smdistributed.modelparallel.tensorflow as smp

tf.random.set_seed(1234)

tf.config.optimizer.set_jit(True)

cfg = {
    "microbatches": 2,
    "partitions": 2,
    "placement_strategy": "spread",
    "pipeline": "interleaved",
    "optimize": "memory",
}
smp.init(cfg)

cache_dir = os.path.join(os.path.expanduser("~"), ".keras", "datasets")

if not os.path.exists(cache_dir):
    try:
        os.mkdir(cache_dir)
    except OSError as e:
        if e.errno == errno.EEXIST and os.path.isdir(cache_dir):
            pass
        else:
            raise

# Download and load MNIST dataset.
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data(
    "MNIST-data-%d" % smp.rank()
Esempio n. 2
0
import os
import errno
import horovod.tensorflow as hvd
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, Dense, Flatten

# Rubik: Import TF2.x API 
import smdistributed.modelparallel.tensorflow as smp

tf.random.set_seed(1234)

# Rubik: Initialize
smp.init()

cache_dir = os.path.join(os.path.expanduser("~"), ".keras", "datasets")
if not os.path.exists(cache_dir):
    try:
        os.mkdir(cache_dir)
    except OSError as e:
        if e.errno == errno.EEXIST and os.path.isdir(cache_dir):
            pass
        else:
            raise

# Download and load MNIST dataset.
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data(
    "MNIST-data-%d" % smp.rank()
)
x_train, x_test = x_train / 255.0, x_test / 255.0

# Add a channels dimension
# Standard Library
import filecmp
import os
import shutil
import time

# First Party
import smdistributed.modelparallel.tensorflow as smp
import smdistributed.modelparallel.tensorflow.utils as utils

smp.init({"partitions": 2, "fork": False})


def generate_big_random_bin_file(filename, size):
    """
    generate big binary file with the specified size in bytes
    :param filename: the filename
    :param size: the size in bytes
    :return:void
    """
    with open("%s" % filename, "wb+") as fout:
        fout.write(os.urandom(size))


start_time = time.time()
src_root_dir = "./send_receive_checkpoint_test"
dst_root_dir = "./send_receive_checkpoint_result"
filename = "data.bin"

if smp.rank() != 0:
    file_path = os.path.join(src_root_dir, "mp_rank_" + str(smp.rank()))