Example #1
0
 def _dataset_fn(input_context):
     replica_batch_size = input_context.get_per_replica_batch_size(
         args.global_batch_size)
     dataset = utils.tf_dataset(*random_samples,
                                batchsize=replica_batch_size,
                                to_sparse_tensor=True,
                                repeat=1)
     # because each worker has its own data source, so that no need to shard the dataset.
     return dataset
Example #2
0
 def _dataset_fn(input_context):
     replica_batch_size = input_context.get_per_replica_batch_size(
         args.global_batch_size)
     dataset = utils.tf_dataset(*random_samples,
                                batchsize=replica_batch_size,
                                to_sparse_tensor=True,
                                repeat=1,
                                args=args)
     dataset = dataset.shard(input_context.num_input_pipelines,
                             input_context.input_pipeline_id)
     return dataset
def test_tf_dense_model(args, init_tensors, *random_samples):
    dataset = utils.tf_dataset(*random_samples,
                               batchsize=args.global_batch_size,
                               to_sparse_tensor=False,
                               repeat=1)

    loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)

    tf_dense_demo = TfDenseDemo(init_tensors, args.global_batch_size,
                                args.slot_num, args.nnz_per_slot,
                                args.embedding_vec_size)

    optimizer = utils.get_dense_optimizer(args.optimizer)(learning_rate=0.1)
    if args.mixed_precision:
        optimizer = tf.keras.mixed_precision.LossScaleOptimizer(
            optimizer, initial_scale=1024)

    @tf.function
    def _train_step(inputs, labels):
        with tf.GradientTape() as tape:
            logit, embedding_vector = tf_dense_demo(inputs, training=True)
            loss = loss_fn(labels, logit)
            if args.mixed_precision:
                _loss = optimizer.get_scaled_loss(loss)
            else:
                _loss = loss
        grads = tape.gradient(_loss, tf_dense_demo.trainable_variables)
        if args.mixed_precision:
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(zip(grads,
                                      tf_dense_demo.trainable_variables))
        return loss, embedding_vector

    tf_results = list()

    for i, (input_tensors, labels) in enumerate(dataset):
        print("-" * 30, str(i), "-" * 30)
        loss, embedding_vector = _train_step(input_tensors, labels)
        print("[INFO]: iteration {}, loss {}".format(i, loss))
        tf_results.append(embedding_vector.numpy())

    if not hasattr(args, "task_id"):
        args.task_id = 0
    if 1 == args.save_params and args.task_id == 0:
        filepath = r"./embedding_variables/"
        utils.save_to_file(os.path.join(filepath, r"tf_variable.file"),
                           tf_dense_demo.params.numpy())

    return tf_results
def draw_adv(model, sk_func, num_examples_draw, margin, phase):
    batch_size_draw = 1000
    dataset = tf_dataset(num_examples_draw // batch_size_draw, batch_size_draw, sk_func)
    inf_x, sup_x, inf_y, sup_y = plotex.get_limits(sk_func(100, noise=0.1)[0], 0.6)
    plt.xlim(inf_x, sup_x)
    plt.ylim(inf_y, sup_y)
    for x, _ in dataset:
        w_adv, hinge_adv = generate_adversarial(model, x, margin, phase)
        plt.scatter(x[:,0], x[:,1], c='red', alpha=0.1, marker='.')
        plt.scatter(hinge_adv[:,0], hinge_adv[:,1], c='green', alpha=0.2, marker='x')
        plt.scatter(w_adv[:,0], w_adv[:,1], c='blue', alpha=0.2, marker='+')
    plt.xlabel('X')
    plt.ylabel('Y')
    patch_1 = mpatches.Patch(color='red', label=f'support')
    patch_2 = mpatches.Patch(color='green', label=f'x + delta; x + delta')
    patch_3 = mpatches.Patch(color='blue', label=f'x - delta')
    plt.legend(handles=[patch_1, patch_2, patch_3])
Example #5
0
def draw_adv(model, sk_func, X, num_examples_draw, batch_size_draw, fig,
             index):
    scale = gin.query_parameter('one_class_wasserstein.scale')
    margin = gin.query_parameter('one_class_wasserstein.margin')
    if X.shape[1] == 2:
        fig.add_subplot(index)
    else:
        plt.twinx()
    dataset = tf_dataset(num_examples_draw // batch_size_draw, batch_size_draw,
                         sk_func)
    inf_x, sup_x, inf_y, sup_y = plotex.get_limits(X)
    xs, advs = [], []
    for x, _ in dataset:
        adv = complement_distribution(model, x, scale, margin)
        xs.append(x)
        advs.append(adv)
    xs = np.concatenate(xs)
    advs = np.concatenate(advs)
    if X.shape[1] == 2:
        plt.xlim(inf_x, sup_x)
        plt.ylim(inf_y, sup_y)
        plt.scatter(xs[:, 0], xs[:, 1], c='red', alpha=0.1, marker='.')
        plt.scatter(advs[:, 0], advs[:, 1], c='green', alpha=0.2, marker='x')
        plt.xlabel('X')
        plt.ylabel('Y')
    else:
        plt.hist(xs[:, 0],
                 bins=100,
                 fc=(1, 0, 0, 0.5),
                 histtype='stepfilled',
                 density=True)
        plt.hist(advs[:, 0],
                 bins=100,
                 fc=(0, 1, 0, 0.5),
                 histtype='stepfilled',
                 density=True)
    patch_1 = mpatches.Patch(color='red', label=f'support')
    patch_2 = mpatches.Patch(color='green', label=f'adv')
    plt.legend(handles=[patch_1, patch_2])
def plot_levels_lines(sk_func):
    input_shape = (2)
    seed_dispatcher(None)
    model = models.get_mlp_baseline(input_shape)

    num_batchs = 500
    batch_size = 100
    lbda       = 1.
    alpha      = 10.
    phase      = 'symmetric'
    margin     = 0.2
    dilatation = 1.
    skfunc     = dilated_func(sk_func, dilatation)

    dataset    = tf_dataset(num_batchs, batch_size, sk_func)

    X, Y       = sk_func(100, noise=0.1)
    num_examples_draw = 1000
    fig        = plt.figure(figsize=(20,14))
    plt.subplot(2, 3, 1)
    plotex.plot_levels(X, Y, model)
    plt.subplot(2, 3, 2)
    draw_adv(model, sk_func, num_examples_draw, margin, phase)

    plt.subplot(2, 3, 3)
    penalties = train_OOD_detector(model, dataset, num_batchs, lbda, alpha, margin, phase)
    iterations = np.arange(len(penalties))
    plt.plot(iterations, np.log10(tf.reduce_mean(penalties, axis=1).numpy()))
    plt.plot(iterations, np.log10(tf.reduce_min(penalties, axis=1).numpy()))
    plt.plot(iterations, np.log10(tf.reduce_max(penalties, axis=1).numpy()))
    plt.title(r'Log Gradient Norm $\|\nabla_x f\|_2$')

    plt.subplot(2, 3, 4)
    plotex.plot_levels(X, Y, model)
    plt.subplot(2, 3, 5)
    draw_adv(model, sk_func, num_examples_draw, margin, phase)

    plt.show()
 def _dataset_fn(input_context):
     replica_batch_size = input_context.get_per_replica_batch_size(args.global_batch_size)
     dataset = utils.tf_dataset(*random_samples, batchsize=replica_batch_size, 
                                to_sparse_tensor=False, repeat=1)
     return dataset
Example #8
0
def plot_levels_lines(sk_func_name=gin.REQUIRED,
                      num_batchs=gin.REQUIRED,
                      batch_size=gin.REQUIRED,
                      k_lip=gin.REQUIRED,
                      num_examples_draw=gin.REQUIRED,
                      batch_size_draw=gin.REQUIRED,
                      proj1D=gin.REQUIRED,
                      init_landscape=gin.REQUIRED):
    seed_dispatcher(None)

    scale = gin.query_parameter('one_class_wasserstein.scale')
    if sk_func_name == 'make_moons':
        sk_func = lambda n: make_moons(n, shuffle=True, noise=0.05)
        if proj1D:
            sk_func = projected(sk_func, [1, 0])
    elif sk_func_name == 'make_circles':
        sk_func = lambda n: make_circles(n, shuffle=True, noise=0.05)
        if proj1D:
            sk_func = projected(sk_func, [1, 0])
    elif sk_func_name == 'make_blobs':
        dim = 1 if proj1D else 2
        seed = random.randint(1, 1000)
        sk_func = lambda n: make_blobs(n,
                                       centers=3,
                                       cluster_std=1. * scale,
                                       n_features=dim,
                                       shuffle=True,
                                       random_state=seed)

    sk_func = dilated_func(sk_func, scale)
    X, _ = sk_func(num_examples_draw)
    dataset = tf_dataset(num_batchs, batch_size, sk_func)
    input_shape = X.shape[1:]
    model = models.get_mlp_baseline(input_shape, k_lip)

    fig = plt.figure(figsize=(22, 15))
    plotex.plot_levels(X, model, fig, 121 if proj1D else 231)
    draw_adv(model, sk_func, X, num_examples_draw, batch_size_draw, fig, 232)

    if not proj1D and init_landscape:
        plotex.plot3d(X, model, fig, 233)

    try:
        penalties = train_OOD_detector(model, dataset, num_batchs)
    except tf.python.framework.errors_impl.InvalidArgumentError as e:
        from deel.lip.normalizers import bjorck_normalization, spectral_normalization
        for layer in model.layers:
            W_bar, _u, sigma = spectral_normalization(
                layer.kernel, layer.u, niter=layer.niter_spectral)
            norm = tf.reduce_sum(W_bar**2.)
            W_bar = bjorck_normalization(W_bar, niter=layer.niter_bjorck)
            print('############################################')
            print(norm, sigma, _u, layer.bias, W_bar)
            print('\n\n\n')
        raise e

    if not proj1D and not init_landscape:
        fig.add_subplot(233)
        iterations = np.arange(len(penalties))
        plt.plot(iterations,
                 np.log10(tf.reduce_mean(penalties, axis=1).numpy()))
        plt.plot(iterations,
                 np.log10(tf.reduce_min(penalties, axis=1).numpy()))
        plt.plot(iterations,
                 np.log10(tf.reduce_max(penalties, axis=1).numpy()))
        plt.title(r'Log Gradient Norm $\log_{10}{\|\nabla_x f\|_2}$')

    plotex.plot_levels(X, model, fig, 122 if proj1D else 234)
    draw_adv(model, sk_func, X, num_examples_draw, batch_size_draw, fig, 235)
    if not proj1D:
        plotex.plot3d(X, model, fig, 236)
    plt.show()
Example #9
0
def test_tf_multi_dense_emb(args):
    dataset_filenames = [
        args.file_prefix + str(task_id) + ".file"
        for task_id in range(args.worker_num)
    ]

    samples_total = [list() for _ in range(args.dataset_iter_num)]
    labels_total = [list() for _ in range(args.dataset_iter_num)]
    replica_batch_size = args.global_batch_size // args.worker_num
    for worker_id in range(args.worker_num):
        samples, labels = utils.restore_from_file(dataset_filenames[worker_id])
        for i in range(args.dataset_iter_num):
            samples_total[i].extend(samples[i * replica_batch_size:(i + 1) *
                                            replica_batch_size])
            labels_total[i].extend(labels[i * replica_batch_size:(i + 1) *
                                          replica_batch_size])
    samples_total = np.concatenate(samples_total, axis=0)
    labels_total = np.concatenate(labels_total, axis=0)

    dataset = utils.tf_dataset(samples_total,
                               labels_total,
                               batchsize=args.global_batch_size,
                               to_sparse_tensor=False,
                               repeat=1)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    model = TFDenseModel(
        vocabulary_size=args.max_vocabulary_size_per_gpu * args.worker_num,
        embedding_vec_size_list=args.embedding_vec_size_list,
        slot_num_list=args.slot_num_list,
        nnz_per_slot_list=[
            args.nnz_per_slot for _ in range(len(args.slot_num_list))
        ],
        num_dense_layers=args.num_dense_layers)

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)
    if args.mixed_precision:
        optimizer = tf.keras.mixed_precision.LossScaleOptimizer(
            optimizer, initial_scale=1024)

    # set initial value to embedding variables
    for i, param in enumerate(model.embedding_params):
        init_tensors = utils.get_ones_tensor(
            max_vocab_size_per_gpu=args.max_vocabulary_size_per_gpu *
            args.worker_num,
            embedding_vec_size=args.embedding_vec_size_list[i],
            num=1)
        param.assign(init_tensors[0])

    loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)

    @tf.function
    def _train_step(inputs, labels):
        with tf.GradientTape() as tape:
            logit, all_vectors = model(inputs, training=True)
            loss = loss_fn(labels, logit)
            if args.mixed_precision:
                _loss = optimizer.get_scaled_loss(loss)
            else:
                _loss = loss
        grads = tape.gradient(_loss, model.trainable_variables)
        if args.mixed_precision:
            grads = optimizer.get_unscaled_gradients(grads)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        return loss, all_vectors

    # save its results
    tf_results = list()
    for i, (inputs, labels) in enumerate(dataset):
        if args.stop_iter >= 0 and i >= args.stop_iter:
            break

        loss, all_vectors = _train_step(inputs, labels)
        print("[INFO]: Iteration: {}, loss={}".format(i, loss))

        with tf.device("CPU:0"):
            tf_results.append(all_vectors)
    return tf_results
Example #10
0
def get_sok_results(args, init_tensors, *random_samples):
    if args.distributed_tool == "onedevice":
        strategy = strategy_wrapper.OneDeviceStrategy()
    elif args.distributed_tool == "horovod":
        import horovod.tensorflow as hvd
        hvd.init()
        strategy = strategy_wrapper.HorovodStrategy()
    else:
        raise ValueError(f"{args.distributed_tool} is not supported.")

    with strategy.scope():
        sok_init_op = sok.Init(global_batch_size=args.global_batch_size)

        embedding_initializer = tf.keras.initializers.Ones(
        ) if args.use_tf_initializer else None

        sok_dense_demo = SOKDemo(
            max_vocabulary_size_per_gpu=args.max_vocabulary_size_per_gpu,
            embedding_vec_size=args.embedding_vec_size,
            slot_num=args.slot_num,
            nnz_per_slot=args.nnz_per_slot,
            use_hashtable=args.use_hashtable,
            dynamic_input=args.dynamic_input,
            num_of_dense_layers=0,
            key_dtype=args.key_dtype,
            embedding_initializer=embedding_initializer)

        emb_opt = utils.get_embedding_optimizer(
            args.optimizer)(learning_rate=0.1)
        dense_opt = utils.get_dense_optimizer(
            args.optimizer)(learning_rate=0.1)
        if args.mixed_precision:
            emb_opt = sok.tf.keras.mixed_precision.LossScaleOptimizer(
                emb_opt, 1024)

    sok_saver = sok.Saver()
    restore_op = list()
    for i, embedding_layer in enumerate(sok_dense_demo.embedding_layers):
        control_inputs = [restore_op[-1]] if restore_op else None
        with tf.control_dependencies(control_inputs):
            if args.restore_params:
                filepath = r"./embedding_variables"
                op = sok_saver.restore_from_file(
                    embedding_layer.embedding_variable, filepath)
            else:
                if not args.use_tf_initializer:
                    op = sok_saver.load_embedding_values(
                        embedding_layer.embedding_variable, init_tensors[i])
                else:
                    op = tf.constant(1.0)
            restore_op.append(op)

    loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True,
                                                 reduction='none')

    def _replica_loss(labels, logits):
        loss = loss_fn(labels, logits)
        _dtype = loss.dtype
        loss = tf.cast(loss, tf.float32)
        loss = tf.nn.compute_average_loss(
            loss, global_batch_size=args.global_batch_size)
        return tf.cast(loss, _dtype)

    def _train_step(inputs, labels, training):
        def _step_fn(inputs, labels):
            logit, embedding_vector = sok_dense_demo(inputs, training=training)
            loss = _replica_loss(labels, logit)
            if args.mixed_precision:
                _loss = emb_opt.get_scaled_loss(loss)
            else:
                _loss = loss
            emb_var, other_var = sok.split_embedding_variable_from_others(
                sok_dense_demo.trainable_variables)
            grads = tf.gradients(
                _loss,
                emb_var + other_var,
                colocate_gradients_with_ops=True,
                unconnected_gradients=tf.UnconnectedGradients.NONE)
            emb_grads, other_grads = grads[:len(emb_var)], grads[len(emb_var):]
            if args.mixed_precision:
                other_grads = emb_opt.get_unscaled_gradients(other_grads)
                emb_grads = emb_opt.get_unscaled_gradients(emb_grads)

            if "plugin" in args.optimizer:
                emb_train_op = emb_opt.apply_gradients(zip(emb_grads, emb_var))
            else:
                with sok.OptimizerScope(emb_var):
                    emb_train_op = emb_opt.apply_gradients(
                        zip(emb_grads, emb_var))
            with tf.control_dependencies([*emb_grads]):
                # in case NCCL runs concurrently via SOK and horovod
                other_grads = strategy.reduce("sum", other_grads)
            other_train_op = dense_opt.apply_gradients(
                zip(other_grads, other_var))

            with tf.control_dependencies([emb_train_op, other_train_op]):
                total_loss = strategy.reduce("sum", loss)
                total_loss = tf.identity(total_loss)
                return total_loss, embedding_vector

        return strategy.run(_step_fn, inputs, labels)

    replica_batch_size = args.global_batch_size // args.gpu_num
    dataset = utils.tf_dataset(*random_samples,
                               batchsize=replica_batch_size,
                               to_sparse_tensor=False,
                               repeat=1,
                               args=args)
    train_iterator = dataset.make_initializable_iterator()
    iterator_init = train_iterator.initializer

    inputs, labels = train_iterator.get_next()
    graph_results = _train_step(inputs, labels, training=True)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    if "plugin" in args.optimizer:
        init_op = tf.group(init_op, emb_opt.initializer)

    save_op = list()
    for i, embedding_layer in enumerate(sok_dense_demo.embedding_layers):
        control_inputs = [save_op[-1]] if save_op else None
        with tf.control_dependencies(control_inputs):
            if args.save_params:
                filepath = r"./embedding_variables/"
                utils.try_make_dirs(filepath)
                op = sok_saver.dump_to_file(embedding_layer.embedding_variable,
                                            filepath)
            else:
                op = tf.constant(1.0)
        save_op.append(op)

    sok_results = list()

    config = tf.ConfigProto()
    config.log_device_placement = False
    with tf.Session(config=config) as sess:
        sess.run(sok_init_op)
        sess.run([init_op, iterator_init])
        sess.run(restore_op)
        sess.graph.finalize()

        for step in range(args.iter_num):
            loss_v, emb_vector_v = sess.run([*graph_results])
            print("*" * 80)
            print(f"Step: {step}, loss: {loss_v}"
                  )  #", embedding_vector:\n{emb_vector_v}")
            sok_results.append(emb_vector_v)

        sess.run(save_op)

    name = list()
    for embedding_layer in sok_dense_demo.embedding_layers:
        name.append(embedding_layer.embedding_variable.m_var_name)

    return sok_results, name
Example #11
0
def get_tf_results(args, init_tensors, *random_samples):
    graph = tf.Graph()
    with graph.as_default():
        tf_dense_demo = TFDemo(
            vocabulary_size=args.max_vocabulary_size_per_gpu * args.gpu_num,
            slot_num=args.slot_num,
            nnz_per_slot=args.nnz_per_slot,
            embedding_vec_size=args.embedding_vec_size,
            num_of_dense_layers=0,
            use_hashtable=False,
            dynamic_input=False)

        optimizer = utils.get_dense_optimizer(
            args.optimizer)(learning_rate=0.1)
        if args.mixed_precision:
            optimizer = sok.tf.keras.mixed_precision.LossScaleOptimizer(
                optimizer, 1024)

        loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)

        def _train_step(inputs, labels, training):
            logit, embedding_vector = tf_dense_demo(inputs, training=training)
            loss = loss_fn(labels, logit)
            if args.mixed_precision:
                _loss = optimizer.get_scaled_loss(loss)
            else:
                _loss = loss
            grads = tf.gradients(
                _loss,
                tf_dense_demo.trainable_variables,
                colocate_gradients_with_ops=True,
                unconnected_gradients=tf.UnconnectedGradients.NONE)
            if args.mixed_precision:
                grads = optimizer.get_unscaled_gradients(grads)
            train_op = optimizer.apply_gradients(
                zip(grads, tf_dense_demo.trainable_variables))
            with tf.control_dependencies([train_op]):
                loss = tf.identity(loss)
                return loss, embedding_vector

        dataset = utils.tf_dataset(*random_samples,
                                   batchsize=args.global_batch_size,
                                   to_sparse_tensor=False,
                                   repeat=1)
        train_iterator = dataset.make_initializable_iterator()
        iterator_init = train_iterator.initializer

        inputs, labels = train_iterator.get_next()
        graph_results = _train_step(inputs, labels, training=True)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        restore_op = list()
        for i, embedding_layer in enumerate(tf_dense_demo.embedding_layers):
            restore_op.append(
                embedding_layer.embeddings.assign(
                    tf.concat(init_tensors[i], axis=0)))

        emb_values = list()
        for embedding_layer in tf_dense_demo.embedding_layers:
            if args.save_params:
                filepath = r"./embedding_variables/"
                utils.try_make_dirs(filepath)
                emb_values.append(embedding_layer.embeddings.read_value())
            else:
                emb_values = tf.constant(1.0)

    tf_results = list()
    with tf.Session(graph=graph) as sess:
        sess.run([init_op, iterator_init])
        sess.run(restore_op)
        sess.graph.finalize()

        for step in range(args.iter_num):
            loss_v, embedding_vector_v = sess.run([*graph_results])
            print("*" * 80)
            print(f"step: {step}, loss: {loss_v}"
                  )  #", embedding_vector:\n{embedding_vector_v}")
            tf_results.append(embedding_vector_v)

        emb_values_v = sess.run(emb_values)
        if args.save_params:
            for i, value in enumerate(emb_values_v):
                utils.save_to_file(
                    os.path.join(filepath,
                                 r"tf_variable_" + str(i) + r".file"), value)
    name = list()
    for embedding_layer in tf_dense_demo.embedding_layers:
        name.append(embedding_layer.embeddings.name)

    return tf_results, name