Python AutoDist.create_distributed_session 예제들, autodist.AutoDist.create_distributed_session Python 예제들

예제 #1

0

파일 보기

def main(_):
    autodist = AutoDist(resource_spec_file, AllReduce(128))

    TRUE_W = 3.0
    TRUE_b = 2.0
    NUM_EXAMPLES = 1000
    EPOCHS = 10

    inputs = np.random.randn(NUM_EXAMPLES)
    noises = np.random.randn(NUM_EXAMPLES)
    outputs = inputs * TRUE_W + TRUE_b + noises

    class MyIterator:
        def initialize(self):
            return tf.zeros(1)

        def get_next(self):
            # a fake one
            return inputs

    inputs_iterator = MyIterator()
    print('I am going to a scope.')
    with tf.Graph().as_default() as g, autodist.scope():
        # x = placeholder(shape=[NUM_EXAMPLES], dtype=tf.float32)

        W = tf.Variable(5.0, name='W', dtype=tf.float64)
        b = tf.Variable(0.0, name='b', dtype=tf.float64)

        def train_step(input):
            def y(x):
                return W * x + b

            def l(predicted_y, desired_y):
                return tf.reduce_mean(tf.square(predicted_y - desired_y))

            major_version, _, _ = tf.version.VERSION.split('.')
            if major_version == '1':
                optimizer = tf.train.GradientDescentOptimizer(0.01)
            else:
                optimizer = tf.optimizers.SGD(0.01)

            with tf.GradientTape() as tape:
                loss = l(y(input), outputs)
                vs = [W, b]

                # gradients = tape.gradient(target=loss, sources=vs)
                gradients = tf.gradients(loss, vs)

                train_op = optimizer.apply_gradients(zip(gradients, vs))
            return loss, train_op, b

        fetches = train_step(inputs_iterator.get_next())
        session = autodist.create_distributed_session()
        for epoch in range(EPOCHS):
            l, t, b = session.run(fetches)
            print('node: {}, loss: {}\nb:{}'.format(
                autodist._cluster.get_local_address(), l, b))

    print('I am out of scope')

예제 #2

0

파일 보기

def train_main(args):
    autodist = AutoDist(resource_spec_file, Parallaxx())
    with open(os.path.join(args.path, "meta.yml"), 'rb') as f:
        meta = yaml.load(f.read(), Loader=yaml.FullLoader)
    hidden_layer_size = args.hidden_size
    num_epoch = args.num_epoch
    rank = ad.get_worker_communicate().rank()
    device_id = rank % args.num_local_worker
    nrank = ad.get_worker_communicate().nrank()
    distributed.ps_init(rank, nrank)
    ngraph = meta["partition"]["nodes"][rank] // args.batch_size
    graphs = prepare_data(ngraph)
    idx, epoch, nnodes = 0, 0, 0
    graph_len = graphs[0][0].y.shape[0]
    with tf.Graph().as_default() as g, autodist.scope():
        norm_adj = tf.compat.v1.sparse.placeholder(tf.float32, name="norm_adj")
        sparse_feature = tf.placeholder(tf.int32,
                                        [graph_len, meta["feature"] - 1])
        y_ = tf.placeholder(tf.int32, [graph_len], name="y_")
        train_mask = tf.placeholder(tf.float32, [graph_len], name="train_mask")
        loss, y, train_op = model(norm_adj, sparse_feature, y_, train_mask)
        sess = autodist.create_distributed_session()

        acc_stat = []
        start = time.time()
        while True:
            g_sample, mp_val, mask, mask_eval = graphs[idx]
            idx = (idx + 1) % ngraph
            feed_dict = {
                norm_adj: mp_val,
                sparse_feature: g_sample.x[:, 0:-1],
                y_: g_sample.y,
                train_mask: mask
            }
            print("Before training")
            loss_val = sess.run([loss, y, y_, train_op], feed_dict=feed_dict)
            print(loss_val)
            pred_val = loss_val[1]
            true_val = loss_val[2]
            acc_val = np.equal(np.argmax(pred_val, 1),
                               true_val).astype(np.float)
            acc_stat.append(acc_val)
            nnodes += mask.sum() + mask_eval.sum()
            if nnodes > meta["partition"]["nodes"][rank]:
                nnodes = 0
                epoch += 1
                print("Acc : ", np.mean(acc_stat), "Time : ",
                      time.time() - start)
                start = time.time()
                acc_stat = []
                if epoch >= num_epoch:
                    break

예제 #3

0

파일 보기

파일: test_partitionedPS_saver.py 프로젝트: zeta1999/autodist

def train_and_save():
    resource_spec_file = os.path.join(os.path.dirname(__file__),
                                      'resource_spec.yml')
    autodist = AutoDist(resource_spec_file, PartitionedPS())
    (train_data,
     train_labels), (test_data,
                     test_labels) = tf.keras.datasets.imdb.load_data(
                         num_words=vocab_size)
    train_data = tf.keras.preprocessing.sequence.pad_sequences(train_data,
                                                               value=0,
                                                               padding='post',
                                                               maxlen=256)
    test_data = tf.keras.preprocessing.sequence.pad_sequences(test_data,
                                                              value=0,
                                                              padding='post',
                                                              maxlen=256)
    train_labels = train_labels.astype(np.float32)
    with tf.Graph().as_default(), autodist.scope():  # AutoDist code
        my_iterator = tf.compat.v1.data.Dataset.from_tensor_slices((train_data, train_labels)) \
            .shuffle(25000).batch(batch_size).repeat().make_one_shot_iterator().get_next()
        # my_iterator = MyIterator().get_next()
        model = SimpleModel()
        prev_time = time.time()
        # fetch train_op and loss
        loss_fn, train_op, gradients = model.train_fn(my_iterator)

        saver = autodist_saver()

        sess = autodist.create_distributed_session()
        for local_step in range(max_steps):
            loss, _ = sess.run(fetches=[loss_fn, train_op],
                               options=config_pb2.RunOptions(
                                   trace_level=config_pb2.RunOptions.NO_TRACE))
            if local_step % log_frequency == 0:
                cur_time = time.time()
                elapsed_time = cur_time - prev_time
                num_sentences = batch_size * log_frequency
                wps = float(num_sentences) / elapsed_time
                print(
                    "Iteration %d, time = %.2fs, wps = %.0f, train loss = %.4f"
                    % (local_step, cur_time - prev_time, wps, loss))
                prev_time = cur_time

        saver.save(sess, checkpoint_dir + "sentiment", global_step=local_step)
        print(sess.run(model.emb))
        print(sess.run(gradients[0]))
    print('ending...')

예제 #4

0

파일 보기

파일: tensorflow2_text_classifier.py 프로젝트: big-data-lab-umbc/autodist

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        #calcualte metrics
        train_loss(loss)
        train_accuracy(batch_y, predictions)

    #define test procedure
    @d.function
    def test_step(batch_x, batch_y):
        predictions = model(batch_x)
        t_loss = loss_object(y_true=batch_y, y_pred=predictions)

        test_loss(t_loss)
        test_accuracy(batch_y, predictions)

    session = autodist.create_distributed_session()

    #run train
    for epoch in range(max_epoch):
        batch_steps = int(len(partial_x_train) / batch_size)
        for i in range(batch_steps):
            batch_x = partial_x_train[i * batch_size:(i + 1) * batch_size]
            batch_y = partial_y_train[i * batch_size:(i + 1) * batch_size]
            fetches_train = train_step(batch_x, batch_y)
            session.run(fetches_train)

        #check validation accuracy
        fetches_test = test_step(x_val, y_val)
        session.run(fetches_test)

        template = 'Epoch {}, loss: {} - acc: {} - val_loss: {} - val_acc: {}'

예제 #5

0

파일 보기

def train_criteo(model, args):
    resource_spec_file = os.path.join(os.path.dirname(__file__), 'settings',
                                      'plx_dist_spec.yml')
    # resource_spec_file = os.path.join(os.path.dirname(__file__), 'settings', 'plx_local_spec.yml')
    autodist = AutoDist(resource_spec_file, Parallaxx())
    respec = ResourceSpec(resource_spec_file)
    if args.all:
        from models.load_data import process_all_criteo_data
        dense, sparse, all_labels = process_all_criteo_data()
        dense_feature, val_dense = dense
        sparse_feature, val_sparse = sparse
        labels, val_labels = all_labels
    else:
        from models.load_data import process_sampled_criteo_data
        dense_feature, sparse_feature, labels = process_sampled_criteo_data()

    # autodist will split the feeding data
    batch_size = 128
    with tf.Graph().as_default() as g, autodist.scope():
        dense_input = tf.compat.v1.placeholder(tf.float32, [batch_size, 13])
        sparse_input = tf.compat.v1.placeholder(tf.int32, [batch_size, 26])
        y_ = y_ = tf.compat.v1.placeholder(tf.float32, [batch_size, 1])
        embed_partitioner = tf.fixed_size_partitioner(len(
            respec.nodes), 0) if len(respec.nodes) > 1 else None
        loss, y, opt = model(dense_input, sparse_input, y_, embed_partitioner)
        train_op = opt.minimize(loss)

        sess = autodist.create_distributed_session()

        my_feed_dict = {
            dense_input: np.empty(shape=(batch_size, 13)),
            sparse_input: np.empty(shape=(batch_size, 26)),
            y_: np.empty(shape=(batch_size, 1)),
        }

        if args.all:
            raw_log_file = os.path.join(
                os.path.split(os.path.abspath(__file__))[0], 'logs',
                'tf_plx_%s.log' % (args.model))
            print('Processing all data, log to', raw_log_file)
            log_file = open(raw_log_file, 'w')
            iterations = dense_feature.shape[0] // batch_size
            total_epoch = 32
            start_index = 0
            for ep in range(total_epoch):
                # print("iters: %d" % (lp * 1000))
                print("epoch %d" % ep)
                st_time = time.time()
                train_loss, train_acc, train_auc = [], [], []
                for it in tqdm(
                        range(iterations // 10 + (ep % 10 == 9) *
                              (iterations % 10))):
                    my_feed_dict[dense_input][:] = dense_feature[
                        start_index:start_index + batch_size]
                    my_feed_dict[sparse_input][:] = sparse_feature[
                        start_index:start_index + batch_size]
                    my_feed_dict[y_][:] = labels[start_index:start_index +
                                                 batch_size]
                    start_index += batch_size
                    if start_index + batch_size > dense_feature.shape[0]:
                        start_index = 0
                    loss_val = sess.run([loss, y, y_, train_op],
                                        feed_dict=my_feed_dict)
                    pred_val = loss_val[1]
                    true_val = loss_val[2]
                    acc_val = np.equal(true_val, pred_val > 0.5)
                    train_loss.append(loss_val[0])
                    train_acc.append(acc_val)
                    train_auc.append(metrics.roc_auc_score(true_val, pred_val))
                tra_accuracy = np.mean(train_acc)
                tra_loss = np.mean(train_loss)
                tra_auc = np.mean(train_auc)
                en_time = time.time()
                train_time = en_time - st_time
                printstr = "train_loss: %.4f, train_acc: %.4f, train_auc: %.4f, train_time: %.4f"\
                            % (tra_loss, tra_accuracy, tra_auc, train_time)
                print(printstr)
                log_file.write(printstr + '\n')
                log_file.flush()

        else:
            iteration = dense_feature.shape[0] // batch_size

            epoch = 50
            for ep in range(epoch):
                print('epoch', ep)
                if ep == 5:
                    start = time.time()
                ep_st = time.time()
                train_loss = []
                train_acc = []
                for idx in range(iteration):
                    start_index = idx * batch_size
                    my_feed_dict[dense_input][:] = dense_feature[
                        start_index:start_index + batch_size]
                    my_feed_dict[sparse_input][:] = sparse_feature[
                        start_index:start_index + batch_size]
                    my_feed_dict[y_][:] = labels[start_index:start_index +
                                                 batch_size]

                    loss_val = sess.run([loss, y, y_, train_op],
                                        feed_dict=my_feed_dict)
                    pred_val = loss_val[1]
                    true_val = loss_val[2]
                    if pred_val.shape[1] == 1:  # for criteo case
                        acc_val = np.equal(true_val, pred_val > 0.5)
                    else:
                        acc_val = np.equal(np.argmax(pred_val, 1),
                                           np.argmax(true_val,
                                                     1)).astype(np.float)
                    train_loss.append(loss_val[0])
                    train_acc.append(acc_val)
                tra_accuracy = np.mean(train_acc)
                tra_loss = np.mean(train_loss)
                ep_en = time.time()
                print("train_loss: %.4f, train_acc: %.4f, train_time: %.4f" %
                      (tra_loss, tra_accuracy, ep_en - ep_st))
            print('all time:', (time.time() - start))

예제 #6

0

파일 보기

파일: ssp_image_classifier.py 프로젝트: big-data-lab-umbc/autodist

        # update = optimizer.apply_gradients(zip(new_grads, all_vars))

        return loss, optimizer.iterations, update

    def test_step(x, y):
        y_hat = model(x, training=True)

        return y, y_hat

    fetches = train_step(x, y)
    test_fetches = test_step(x, y)
    e_losses = []
    e_train_accuracy = []
    e_test_accuracy = []

    with autodist.create_distributed_session() as sess:
        for epoch in range(EPOCHS):
            j = 0
            for _ in range(train_steps_per_epoch):
                loss, i, _ = sess.run(
                    fetches, {
                        x: train_images[j:j + BATCH_SIZE],
                        y: train_labels[j:j + BATCH_SIZE]
                    })
                #print(f"step: {i}, train_loss: {loss}")
                j += BATCH_SIZE
                e_losses.append(loss)

                yy, prediction = sess.run(test_fetches, {
                    x: train_images[0:400],
                    y: train_labels[0:400]

예제 #7

0

파일 보기

파일: test_saved_model.py 프로젝트: zeta1999/autodist

def train_and_save():
    """ Train the model and save the serialized model and its weights. """
    autodist = AutoDist(resource_spec_file, AllReduce(128))
    print('I am going to a scope.')
    with tf.Graph().as_default() as g, autodist.scope():
        x = tf.compat.v1.placeholder(shape=[NUM_EXAMPLES], dtype=tf.float64)
        W = tf.Variable(5.0, name='W', dtype=tf.float64)
        b = tf.Variable(0.0, name='b', dtype=tf.float64)

        def y():
            return W * x + b

        def l(predicted_y, desired_y):
            return tf.reduce_mean(tf.square(predicted_y - desired_y))

        major_version, _, _ = tf.version.VERSION.split('.')
        if major_version == '1':
            optimizer = tf.train.GradientDescentOptimizer(0.01)
        else:
            optimizer = tf.optimizers.SGD(0.01)

        with tf.GradientTape() as tape:
            prediction = y()
            loss = l(prediction, outputs)
            vs = [W, b]
            gradients = tf.gradients(loss, vs)
            train_op = optimizer.apply_gradients(zip(gradients, vs))

        ops.add_to_collection(TRAIN_OP_KEY, train_op)
        fetches = [loss, train_op, b, prediction]
        feeds = [x]

        # NOTE: The AutoDist saver should be declared before the wrapped session.
        saver = autodist_saver()
        session = autodist.create_distributed_session()
        for _ in range(EPOCHS):
            l, _, b, _ = session.run(fetches, feed_dict={feeds[0]: inputs})
            print('node: {}, loss: {}\nb:{}'.format(autodist._cluster.get_local_address(), l, b))
        print('I am out of scope')

        inputs_info = {
            "input_data":
                saved_model.utils.build_tensor_info(feeds[0])
        }
        outputs_info = {
            "loss": saved_model.utils.build_tensor_info(fetches[0]),
            "prediction": saved_model.utils.build_tensor_info(fetches[3])
        }
        serving_signature = saved_model.signature_def_utils.build_signature_def(
            inputs=inputs_info,
            outputs=outputs_info,
            method_name=saved_model.signature_constants.PREDICT_METHOD_NAME
        )
        signature_map = {
            saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                serving_signature,
        }
        if os.path.exists(EXPORT_DIR):
            shutil.rmtree(EXPORT_DIR)
        builder = SavedModelBuilder(EXPORT_DIR)
        builder.add_meta_graph_and_variables(
            sess=session,
            tags=[TAG_NAME],
            saver=saver,
            signature_def_map=signature_map)
        builder.save()

예제 #8

0

파일 보기

파일: tf_parallax.py 프로젝트: sj1104/Het

def train_main(args):
    resource_spec_file = os.path.join(os.path.dirname(__file__),
                                      '../../examples/ctr/settings',
                                      'plx_dist_spec.yml')
    autodist = AutoDist(resource_spec_file, Parallaxx())
    with open(os.path.join(args.path, "meta.yml"), 'rb') as f:
        meta = yaml.load(f.read(), Loader=yaml.FullLoader)
    hidden_layer_size = args.hidden_size
    num_epoch = args.num_epoch
    rank = ad.get_worker_communicate().rank()
    device_id = rank % args.num_local_worker
    nrank = ad.get_worker_communicate().nrank()
    distributed.ps_init(rank, nrank)
    ngraph = meta["partition"]["nodes"][rank] // args.batch_size
    graphs = prepare_data(ngraph)
    idx, epoch, nnodes = 0, 0, 0
    worker_device = "gpu:0"
    graph_len = graphs[0][0].y.shape[0]
    with tf.Graph().as_default() as g, autodist.scope():
        with tf.device(worker_device):
            norm_adj = tf.compat.v1.sparse.placeholder(tf.float32,
                                                       name="norm_adj")
            sparse_feature = tf.placeholder(tf.int32,
                                            [graph_len, meta["feature"] - 1])
            y_ = tf.placeholder(tf.int32, [graph_len], name="y_")
            train_mask = tf.placeholder(tf.float32, [graph_len],
                                        name="train_mask")
        loss, y, train_op = model(norm_adj, sparse_feature, y_, train_mask)
        # init=tf.global_variables_initializer()
        # gpu_options = tf.GPUOptions(allow_growth=True)
        # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        sess = autodist.create_distributed_session()
        # sess.run(init)
        acc_cnt, total_cnt = 0, 0
        train_acc, train_cnt = 0, 0
        start = time.time()
        while True:
            g_sample, mp_val, mask, mask_eval = graphs[idx]
            idx = (idx + 1) % ngraph
            feed_dict = {
                norm_adj: mp_val,
                sparse_feature: g_sample.x[:, 0:-1],
                y_: g_sample.y,
                train_mask: mask
            }
            loss_val = sess.run([loss, y, train_op], feed_dict=feed_dict)
            pred_val = loss_val[1]
            acc_val = np.equal(np.argmax(pred_val, 1),
                               g_sample.y).astype(np.float)
            acc_cnt += (acc_val * mask_eval).sum()
            total_cnt += mask_eval.sum()
            nnodes += mask.sum() + mask_eval.sum()
            train_acc += (acc_val * mask).sum()
            train_cnt += mask.sum()
            if nnodes > meta["partition"]["nodes"][rank] // 10:
                nnodes = 0
                epoch += 1
                print("Acc : ", acc_cnt / total_cnt, train_acc / train_cnt,
                      "Time : ",
                      time.time() - start)
                # print(pred_val)
                start = time.time()
                acc_cnt, total_cnt = 0, 0
                train_acc, train_cnt = 0, 0
                if epoch >= num_epoch:
                    break

예제 #9

0

파일 보기

파일: run_parallax.py 프로젝트: sj1104/Het

def main():
    resource_spec_file = os.path.join(os.path.dirname(__file__),
                                      '../ctr/settings', 'plx_dist_spec.yml')
    autodist = AutoDist(resource_spec_file, Parallaxx())
    respec = ResourceSpec(resource_spec_file)

    def validate():
        # validate phase
        hits, ndcgs = [], []
        for idx in range(num_users):
            start_index = idx * 100
            my_feed_dict = {
                user_input: testUserInput[start_index:start_index + 100],
                item_input: testItemInput[start_index:start_index + 100],
            }
            predictions = sess.run([y], feed_dict=my_feed_dict)
            map_item_score = {
                testItemInput[start_index + i]: predictions[0][i]
                for i in range(100)
            }

            # Evaluate top rank list
            ranklist = heapq.nlargest(topK,
                                      map_item_score,
                                      key=map_item_score.get)
            hr = getHitRatio(ranklist, testItemInput[start_index])
            ndcg = getNDCG(ranklist, testItemInput[start_index])
            hits.append(hr)
            ndcgs.append(ndcg)
        hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
        return hr, ndcg

    from movielens import getdata
    trainData, testData = getdata('ml-25m', 'datasets')
    testUserInput = np.repeat(np.arange(testData.shape[0], dtype=np.int32),
                              100)
    testItemInput = testData.reshape((-1, ))
    num_users, num_items = {
        'ml-1m': (6040, 3706),
        'ml-20m': (138493, 26744),
        'ml-25m': (162541, 59047),
    }['ml-25m']
    batch_size = 1024
    num_negatives = 4
    topK = 10
    with tf.Graph().as_default() as g, autodist.scope():
        user_input = tf.compat.v1.placeholder(tf.int32, [
            None,
        ])
        item_input = tf.compat.v1.placeholder(tf.int32, [
            None,
        ])
        y_ = tf.compat.v1.placeholder(tf.float32, [
            None,
        ])

        loss, y, opt = neural_mf(user_input, item_input, y_, num_users,
                                 num_items)
        train_op = opt.minimize(loss)

        # init = tf.compat.v1.global_variables_initializer()
        # gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
        sess = autodist.create_distributed_session()
        # sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
        # sess.run(init)

        log = Logging(
            path=os.path.join(os.path.dirname(__file__), 'logs', 'tfplx.txt'))
        epoch = 7
        iterations = trainData['user_input'].shape[0] // batch_size
        start = time.time()
        for ep in range(epoch):
            ep_st = time.time()
            log.write('epoch %d' % ep)
            train_loss = []
            for idx in range(iterations):
                start_index = idx * batch_size
                my_feed_dict = {
                    user_input:
                    trainData['user_input'][start_index:start_index +
                                            batch_size],
                    item_input:
                    trainData['item_input'][start_index:start_index +
                                            batch_size],
                    y_:
                    trainData['labels'][start_index:start_index + batch_size],
                }

                loss_val = sess.run([loss, train_op], feed_dict=my_feed_dict)
                train_loss.append(loss_val[0])

                # if idx % 10000 == 0:
                #     hr, ndcg = validate()
                #     printstr = "HR: %.4f, NDCF: %.4f" % (hr, ndcg)
                #     log.write(printstr)

            tra_loss = np.mean(train_loss)
            ep_en = time.time()

            # validate phase
            # hr, ndcg = validate()
            printstr = "train_loss: %.4f, train_time: %.4f" % (tra_loss,
                                                               ep_en - ep_st)
            log.write(printstr)
        log.write('all time:', (time.time() - start))

예제 #10

0

파일 보기

파일: imagenet.py 프로젝트: zeta1999/autodist

def run(flags_obj):
    """
    Run ResNet ImageNet training and eval loop using native Keras APIs.
    Raises:
        ValueError: If fp16 is passed as it is not currently supported.
    Returns:
        Dictionary of training and eval stats.
    """

    #########################################################################
    # Construct AutoDist with ResourceSpec for Different Strategies
    if flags_obj.autodist_patch_tf:
        os.environ['AUTODIST_PATCH_TF'] = '1'
    else:
        os.environ['AUTODIST_PATCH_TF'] = '0'

    if flags_obj.cnn_model == 'vgg16':
        chunk = 25
    elif flags_obj.cnn_model == 'resnet101':
        chunk = 200
    elif flags_obj.cnn_model == 'inceptionv3':
        chunk = 30
    else:
        chunk = 512

    if flags_obj.autodist_strategy == 'PS':
        autodist = AutoDist(resource_spec_file,
                            PS(local_proxy_variable=flags_obj.proxy))
    elif flags_obj.autodist_strategy == 'PSLoadBalancing':
        autodist = AutoDist(
            resource_spec_file,
            PSLoadBalancing(local_proxy_variable=flags_obj.proxy))
    elif flags_obj.autodist_strategy == 'PartitionedPS':
        autodist = AutoDist(
            resource_spec_file,
            PartitionedPS(local_proxy_variable=flags_obj.proxy))
    elif flags_obj.autodist_strategy == 'AllReduce':
        autodist = AutoDist(resource_spec_file, AllReduce(chunk_size=chunk))
    elif flags_obj.autodist_strategy == 'Parallax':
        autodist = AutoDist(
            resource_spec_file,
            Parallax(chunk_size=chunk, local_proxy_variable=flags_obj.proxy))
    else:
        raise ValueError(
            'the strategy can be only from PS, PSLoadBalancing, PartitionedPS, AllReduce, Parallax'
        )
    #########################################################################

    dtype = flags_core.get_tf_dtype(flags_obj)
    if dtype == tf.float16:
        loss_scale = flags_core.get_loss_scale(flags_obj, default_for_fp16=128)
        policy = tf.compat.v1.keras.mixed_precision.experimental.Policy(
            'mixed_float16', loss_scale=loss_scale)
        tf.compat.v1.keras.mixed_precision.experimental.set_policy(policy)
        if not keras_utils.is_v2_0():
            raise ValueError('--dtype=fp16 is not supported in TensorFlow 1.')
    elif dtype == tf.bfloat16:
        policy = tf.compat.v1.keras.mixed_precision.experimental.Policy(
            'mixed_bfloat16')
        tf.compat.v1.keras.mixed_precision.experimental.set_policy(policy)

    input_fn = imagenet_preprocessing.input_fn

    drop_remainder = flags_obj.enable_xla

    if 'vgg' in flags_obj.cnn_model:
        lr_schedule = 0.01
    else:
        lr_schedule = 0.1
    if flags_obj.use_tensor_lr:
        lr_schedule = common.PiecewiseConstantDecayWithWarmup(
            batch_size=flags_obj.batch_size,
            epoch_size=imagenet_preprocessing.NUM_IMAGES['train'],
            warmup_epochs=common.LR_SCHEDULE[0][1],
            boundaries=list(p[1] for p in common.LR_SCHEDULE[1:]),
            multipliers=list(p[0] for p in common.LR_SCHEDULE),
            compute_lr_on_cpu=True)

    #########################################################################
    # Build with Graph mode, and put all under AutoDist scope.
    with tf.Graph().as_default(), autodist.scope():
        ##########################################################################
        train_input_dataset = input_fn(
            is_training=True,
            data_dir=flags_obj.data_dir,
            batch_size=flags_obj.batch_size,
            num_epochs=flags_obj.train_epochs,
            parse_record_fn=imagenet_preprocessing.parse_record,
            datasets_num_private_threads=flags_obj.
            datasets_num_private_threads,
            dtype=dtype,
            drop_remainder=drop_remainder,
            tf_data_experimental_slack=flags_obj.tf_data_experimental_slack,
            training_dataset_cache=flags_obj.training_dataset_cache,
        )

        if flags_obj.cnn_model == 'resnet101':
            model = tf.keras.applications.ResNet101(
                weights=None, classes=imagenet_preprocessing.NUM_CLASSES)
        elif flags_obj.cnn_model == 'vgg16':
            model = tf.keras.applications.VGG16(
                weights=None, classes=imagenet_preprocessing.NUM_CLASSES)
        elif flags_obj.cnn_model == 'inceptionv3':
            model = tf.keras.applications.InceptionV3(
                weights=None, classes=imagenet_preprocessing.NUM_CLASSES)
        elif flags_obj.cnn_model == 'densenet121':
            model = tf.keras.applications.DenseNet121(
                weights=None, classes=imagenet_preprocessing.NUM_CLASSES)
        else:
            raise ValueError('Other Model Undeveloped')

        optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule,
                                             beta_1=0.9,
                                             beta_2=0.999,
                                             epsilon=1e-08)

        train_input_iterator = tf.compat.v1.data.make_one_shot_iterator(
            train_input_dataset)
        train_input, train_target = train_input_iterator.get_next()

        steps_per_epoch = (imagenet_preprocessing.NUM_IMAGES['train'] //
                           flags_obj.batch_size)
        train_epochs = flags_obj.train_epochs

        if flags_obj.enable_checkpoint_and_export:
            ckpt_full_path = os.path.join(flags_obj.model_dir,
                                          'model.ckpt-{epoch:04d}')

        if train_epochs <= 1 and flags_obj.train_steps:
            steps_per_epoch = min(flags_obj.train_steps, steps_per_epoch)
            train_epochs = 1

        num_eval_steps = (imagenet_preprocessing.NUM_IMAGES['validation'] //
                          flags_obj.batch_size)

        train_output = model(train_input, training=True)
        scc_loss = tf.keras.losses.SparseCategoricalCrossentropy()

        loss = scc_loss(train_target, train_output)
        var_list = variables.trainable_variables() + \
            ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)
        grad = optimizer.get_gradients(loss, var_list)
        train_op = optimizer.apply_gradients(zip(grad, var_list))

        #####################################################################
        # Create distributed session.
        #   Instead of using the original TensorFlow session for graph execution,
        #   let's use AutoDist's distributed session, in which a computational
        #   graph for distributed training is constructed.
        #
        # [original line]
        # >>> sess = tf.compat.v1.Session()
        #
        sess = autodist.create_distributed_session()
        #####################################################################

        summary = TimeHistory(flags_obj.batch_size, steps_per_epoch)
        for epoch_id in range(train_epochs):
            summary.on_epoch_begin(epoch_id)
            for batch_id in range(steps_per_epoch):
                summary.on_batch_begin(batch_id)
                loss_v, _ = sess.run([loss, train_op])
                summary.on_batch_end(batch_id, loss_v)
            summary.on_epoch_end(epoch_id)
        summary.on_train_end()

    return