예제 #1
0
파일: train.py 프로젝트: hubayirp/examples
def set_ipu_defaults(opts):
    opts[
        'summary_str'] += "Using Infeeds\n Max Batches Per Step: {batches_per_step}\n"
    opts['summary_str'] += 'Device\n'
    opts['summary_str'] += ' Precision: {}{}\n'.format(
        opts['precision'], '_noSR' if opts['no_stochastic_rounding'] else '')
    opts['summary_str'] += ' IPU\n'
    opts['poplar_version'] = os.popen('popc --version').read()
    opts['summary_str'] += ' {poplar_version}'
    if opts['select_ipu'] == 'AUTO':
        opts['select_ipu'] = -1

    opts['hostname'] = gethostname()
    opts['datetime'] = str(datetime.datetime.now())

    if opts['seed']:
        # Seed the various random sources
        seed = int(opts['seed'])
        opts['seed_specified'] = opts['seed'] is not None
        random.seed(seed)
        # Set other seeds to different values for extra safety
        tf.set_random_seed(random.randint(0, 2**32 - 1))
        np.random.seed(random.randint(0, 2**32 - 1))
        reset_ipu_seed(random.randint(-2**16, 2**16 - 1))
        opts['seed'] = seed
    else:
        opts['seed_specified'] = False

    opts['summary_str'] += (' {hostname}\n' ' {datetime}\n')
예제 #2
0
파일: seed.py 프로젝트: graphcore/examples
def set_seed(seed):
    if seed is not None:
        random.seed(seed)
        # Set other seeds to different values for extra safety.
        # The new seeds are defined indirectly by the main seed,
        # since they are generated by the seeded random function.
        tf.random.set_seed(random.randint(0, 2**32 - 1))
        np.random.seed(random.randint(0, 2**32 - 1))
        reset_ipu_seed(random.randint(-2**16, 2**16 - 1),
                       experimental_identical_replicas=True)
예제 #3
0
def generic_train_graph(opts, is_training):
    data_type = 'float32'
    train_graph = tf.Graph()
    with train_graph.as_default():
        placeholders = {}
        placeholders["learning_rate"] = tf.compat.v1.placeholder(data_type, shape=[])
        uid_embedding, mid_embedding, cat_embedding = id_embedding(opts, is_training, seed)

        if opts['use_synthetic_data']:
            dataset_train = get_synthetic_dataset(opts)
        else:
            dataset_train = get_dataset_embed(opts, is_training=True)

        infeed_train = ipu_infeed_queue.IPUInfeedQueue(dataset_train, feed_name = 'DIN_dataset_infeed_train', replication_factor = (opts['replicas']))

        with ipu_scope('/device:IPU:0'):
            def comp_fn():
                def body(total_loss, total_aux_loss, total_accuracy, uids, mids, cats, mid_his, cat_his, mid_mask, target, seqlen):
                    prob, loss, aux_loss, accuracy, grad_op = graph_builder(opts, uid_embedding, mid_embedding, cat_embedding, placeholders['learning_rate'], uids, mids, cats, mid_his, cat_his, mid_mask, target, seqlen, use_negsampling=False)

                    with tf.control_dependencies([grad_op]):
                        return total_loss + loss, total_aux_loss + aux_loss, total_accuracy + accuracy

                return loops.repeat(opts['batches_per_step'], body, [tf.constant(0, getattr(np, 'float32'))] * 3, infeed_train)

            outputs_train = ipu_compiler.compile(comp_fn, [])
            avg_loss, avg_aux_loss, avg_accuracy = [x / opts['batches_per_step'] for x in outputs_train]
            outfeed = None

        saver = tf.compat.v1.train.Saver()
        utils.move_variable_initialization_to_cpu()
        init = tf.compat.v1.global_variables_initializer()

    if opts['use_ipu_model']:
        os.environ["TF_POPLAR_FLAGS"] = "--use_ipu_model"
    ipu_options = utils.create_ipu_config()
    ipu_options = utils.set_optimization_options(ipu_options,
                                                 combine_embedding_lookups=True)
    ipu_options = utils.set_recomputation_options(ipu_options, allow_recompute=True)
    ipu_options = utils.auto_select_ipus(ipu_options, [opts['replicas']])
    utils.configure_ipu_system(ipu_options)
    if seed is not None:
        utils.reset_ipu_seed(seed)

    ops_train = [avg_loss, avg_aux_loss, avg_accuracy]
    sess = tf.compat.v1.Session(graph=train_graph)

    return GraphOps(sess,
                    init,
                    ops_train,
                    placeholders,
                    infeed_train,
                    outfeed,
                    saver), uid_embedding, mid_embedding, cat_embedding
예제 #4
0
def run_language_model(opts):
    if opts.random_seed is not None:
        utils.reset_ipu_seed(opts.random_seed)

    # Setup and acquire an IPU device:
    logging.info("Acquiring devices")
    if not opts.pipeline:
        opts.num_shards = 1  # FIX-ME enable sparse models using multiple shards

    # Make sure that no matter the number of shards/stages required, we always
    # acquire a power of 2 ipus (else attachment will fail)
    k = 0
    while 2**k < opts.num_shards:
        k += 1
    num_ipus = 2**k
    logger.info(f"Need {opts.num_shards} IPUs, requesting {num_ipus}")
    config = utils.create_ipu_config()

    if opts.compile_only:
        if opts.compile_only_ipu_version is None:
            raise AttributeError(
                "Must provide --compile-only-ipu-version if --compile-only is set."
            )

        config = utils.set_ipu_connection_type(
            config,
            utils.DeviceConnectionType.NEVER,
            ipu_version=opts.compile_only_ipu_version,
            enable_remote_buffers=True)

    config = utils.auto_select_ipus(config, num_ipus)
    config = utils.set_recomputation_options(config,
                                             allow_recompute=opts.recompute)
    # Enable stochastic rounding
    config = utils.set_floating_point_behaviour_options(config,
                                                        inv=False,
                                                        div0=False,
                                                        oflo=False,
                                                        esr=True,
                                                        nanoo=False)
    config = sparse.set_system_config(
        config, custom_op_debug_printing=opts.debug_dense_grad)
    utils.configure_ipu_system(config)

    transformer = DynsparseTransformer(opts)
    if opts.mode in ["all", "train"]:
        run_training(opts, transformer)

    if opts.mode in ["all", "test"]:
        run_testing(opts, transformer)
예제 #5
0
def set_ipu_defaults(opts):
    opts['poplar_version'] = os.popen('popc --version').read()
    opts['hostname'] = gethostname()
    opts['datetime'] = str(datetime.datetime.now())

    if opts['seed']:
        seed = int(opts['seed'])
        random.seed(seed)
        # tensorflow seed
        tf.set_random_seed(random.randint(0, 2**32 - 1))
        # numpy seed
        np.random.seed(random.randint(0, 2**32 - 1))
        # ipu seed
        reset_ipu_seed(random.randint(-2**16, 2**16 - 1))
예제 #6
0
def train():
    graph = tf.Graph()
    with graph.as_default():
        dataset = tf.data.Dataset.from_tensors(tf.constant(1, shape=[]))
        #         dataset = tf.data.Dataset.from_tensors(np.array([1,2,3,4,5,6,7,8,9,0]))
        dataset = dataset.map(lambda x: [x, x])
        dataset = dataset.batch(BS, drop_remainder=True)
        dataset = dataset.repeat()
        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(get_data_set(),
                                                       feed_name="infeed")
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(feed_name='outfeed')
        time_steps_ph = tf.placeholder(tf.int32, shape=[])
        with ipu_scope('/device:IPU:0'):

            def compile_fn():
                def body(x, y):
                    #                     z1, z2 = model1(x, y, time_steps_ph)
                    #                     outfeed = outfeed_queue.enqueue({'z1':z1, 'z2':z2})
                    z3 = model2(time_steps_ph)
                    outfeed = outfeed_queue.enqueue({'z3': z3})
                    return outfeed

                return loops.repeat(1, body, [], infeed_queue)

        utils.move_variable_initialization_to_cpu()
        init = tf.global_variables_initializer()
        outputs = ipu_compiler.compile(compile_fn, [])

        dequeue_outfeed = outfeed_queue.dequeue()
    ipu_options = utils.create_ipu_config(
        profiling=False,
        profile_execution=False,
        max_cross_replica_sum_buffer_size=10000000,
        max_inter_ipu_copies_buffer_size=10000000)
    ipu_options = utils.auto_select_ipus(ipu_options, 1)
    utils.configure_ipu_system(ipu_options)
    utils.reset_ipu_seed(SEED)

    sess = tf.Session(graph=graph)
    sess.run(init)
    sess.run(infeed_queue.initializer)

    steps = 6
    i = 0
    while i < steps:
        sess.run(outputs, feed_dict={time_steps_ph: 3})
        result = sess.run(dequeue_outfeed)
        print(result)
        i = i + 1
        break
def run_mnist(opts):
    if opts.random_seed is not None:
        utils.reset_ipu_seed(opts.random_seed)

    # MNIST
    numpy_dtype = opts.dtype.as_numpy_dtype
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0
    x_train, x_test = x_train.astype(numpy_dtype), x_test.astype(numpy_dtype)
    y_train, y_test = y_train.astype(np.int32), y_test.astype(np.int32)

    # Create a transformer object (does not build a graph until called)
    if opts.mode in ["all", "train"]:
        training_transformer = DynsparseTransformer(opts)
        run_training(opts, training_transformer, x_train, y_train)

    if opts.mode in ["all", "test"]:
        testing_transformer = DynsparseTransformer(opts)
        run_testing(opts, testing_transformer, x_test, y_test)
예제 #8
0
파일: din_infer.py 프로젝트: inejc/examples
def generic_infer_graph(opts, is_training):
    data_type = 'float32'
    infer_graph = tf.Graph()
    with infer_graph.as_default():
        placeholders = {}
        placeholders["learning_rate"] = tf.compat.v1.placeholder(data_type,
                                                                 shape=[])
        uid_embedding, mid_embedding, cat_embedding = id_embedding(
            opts, is_training, seed)

        if opts['use_synthetic_data']:
            dataset_val = get_synthetic_dataset(opts)
        else:
            dataset_val = get_dataset_embed(opts, is_training=False)

        infeed_val = ipu_infeed_queue.IPUInfeedQueue(
            dataset_val,
            feed_name='DIN_dataset_infeed_val',
            replication_factor=(opts['replicas']))

        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue(
            feed_name="DIN_validation_outfeed",
            replication_factor=opts['replicas'])

        with ipu_scope('/device:IPU:0'):

            def comp_fn_validate():
                def body(uids, mids, cats, mid_his, cat_his, mid_mask, target,
                         seqlen):
                    prob, loss_total, _, accuracy, _ = graph_builder(
                        opts,
                        uid_embedding,
                        mid_embedding,
                        cat_embedding,
                        placeholders['learning_rate'],
                        uids,
                        mids,
                        cats,
                        mid_his,
                        cat_his,
                        mid_mask,
                        target,
                        seqlen,
                        use_negsampling=False)
                    outfeed_op = outfeed_queue.enqueue(
                        (prob, target, accuracy))
                    return outfeed_op

                return loops.repeat(opts['batches_per_step'], body, [],
                                    infeed_val)

            outputs_val = ipu_compiler.compile(comp_fn_validate, [])
            outfeed = outfeed_queue.dequeue()

        saver = tf.compat.v1.train.Saver()
        utils.move_variable_initialization_to_cpu()
        init = tf.compat.v1.global_variables_initializer()
    if opts['use_ipu_model']:
        os.environ["TF_POPLAR_FLAGS"] = "--use_ipu_model"
    ipu_options = utils.create_ipu_config()
    ipu_options = utils.set_optimization_options(
        ipu_options, combine_embedding_lookups=True)
    ipu_options = utils.set_recomputation_options(ipu_options,
                                                  allow_recompute=True)
    ipu_options = utils.auto_select_ipus(ipu_options, [opts['replicas']])
    utils.configure_ipu_system(ipu_options)
    if seed is not None:
        utils.reset_ipu_seed(seed)

    ops_val = [outputs_val]

    sess = tf.compat.v1.Session(graph=infer_graph)

    return GraphOps(sess, init, ops_val, placeholders, infeed_val, outfeed,
                    saver), uid_embedding, mid_embedding, cat_embedding
예제 #9
0
def run_mnist(opts):
    if opts.pipelining and opts.gradient_accumulation_count < 4:
        raise ValueError(
            "Pipelining requires at least 4 gradient accumulation steps.")
    if opts.seed is not None:
        utils.reset_ipu_seed(opts.seed)
    random_gen = np.random.default_rng(seed=opts.seed)

    # Use Keras to get the dataset:
    mnist = tf.keras.datasets.mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    # Sizes/shapes for the dataset:
    image_shape = x_train.shape[1:]
    num_pixels = image_shape[0] * image_shape[1]
    batch_size = opts.batch_size // opts.gradient_accumulation_count
    batch_shape = [batch_size, num_pixels]
    num_train = y_train.shape[0]
    num_test = y_test.shape[0]
    dtype = tf.float16 if opts.data_type == 'fp16' else tf.float32

    # Flatten the images and cast the labels:
    permutation = make_pixel_permutation_matrix(opts, image_shape)

    x_train_flat = x_train.astype(dtype.as_numpy_dtype()).reshape(
        -1, num_pixels)
    x_test_flat = x_test.astype(dtype.as_numpy_dtype()).reshape(-1, num_pixels)

    x_train_flat[:, ...] = x_train_flat[:, permutation]
    x_test_flat[:, ...] = x_test_flat[:, permutation]

    if opts.records_path:
        os.makedirs(opts.records_path, exist_ok=True)
        filename = os.path.join(opts.records_path, "pixel_permutation")
        np.save(filename, permutation)

    y_train = y_train.astype(np.int32)
    y_test = y_test.astype(np.int32)

    # Decide how to split epochs into loops up front:
    if opts.pipelining:
        logger.info(
            f"Pipelined: micro-batch-size: {batch_size} accumulation-count: {opts.gradient_accumulation_count}"
        )
    batches_per_epoch = num_train // (batch_size *
                                      opts.gradient_accumulation_count)
    test_batches = num_test // (batch_size * opts.gradient_accumulation_count)

    batches_per_step = opts.batches_per_step_override
    if batches_per_step is None:
        batches_per_step = batches_per_epoch // opts.steps_per_epoch

    if not (batches_per_epoch % opts.steps_per_epoch) == 0:
        raise ValueError(
            f"IPU steps per epoch {opts.steps_per_epoch} must divide batches per epoch {batches_per_epoch} exactly."
        )

    # Create FC layer descriptions:
    fc_layers = create_fc_layers(opts, batch_shape, random_gen)
    for name, fc in fc_layers.items():
        logger.info(f"Layer Config: {name}: {type(fc)}")

    # Put placeholders on the CPU host:
    with tf.device("cpu"):
        lr_placeholder = tf.placeholder(dtype, shape=[])

    # Create dataset and IPU feeds:
    def make_generator(features, labels):
        return lambda: zip(features, labels)

    # Input pipeline
    def make_dataset(features, labels, is_training: bool):
        dataset = tf.data.Dataset.from_generator(
            generator=make_generator(features, labels),
            output_types=(features.dtype, labels.dtype),
            output_shapes=(features.shape[1:], labels.shape[1:]))

        if is_training:
            dataset = dataset.shuffle(buffer_size=num_train,
                                      seed=opts.seed).cache()

        dataset = dataset.repeat().batch(batch_size, drop_remainder=True)
        return dataset

    train_dataset = make_dataset(features=x_train_flat,
                                 labels=y_train,
                                 is_training=True)

    test_dataset = make_dataset(features=x_test_flat,
                                labels=y_test,
                                is_training=False)

    infeed_train_queue = ipu_infeed_queue.IPUInfeedQueue(train_dataset)
    outfeed_train_queue = ipu_outfeed_queue.IPUOutfeedQueue()
    outfeed_prune_and_grow_queue = ipu_outfeed_queue.IPUOutfeedQueue()
    infeed_test_queue = ipu_infeed_queue.IPUInfeedQueue(test_dataset)
    outfeed_test_queue = ipu_outfeed_queue.IPUOutfeedQueue()

    # Get optimiser
    opt_cls, opt_kws = build_optimizer(opts.optimizer, opts.optimizer_arg)
    logger.info('Optimiser %s, optimiser keywords %s', opt_cls.__name__,
                opt_kws)

    # Get the bound model functions
    bound_model_fn = make_bound_model_pipelining if opts.pipelining else make_bound_model
    (bound_train_loop, bound_test_loop), train_inputs = bound_model_fn(
        fc_layers=fc_layers,
        opts=opts,
        lr_placeholder=lr_placeholder,
        opt_cls=opt_cls,
        opt_kws=opt_kws,
        train_batches_per_step=batches_per_step,
        test_batches_per_step=test_batches,
        train_queues=(outfeed_train_queue, infeed_train_queue),
        test_queues=(outfeed_test_queue, infeed_test_queue),
        png_queue=outfeed_prune_and_grow_queue,
        disable_dense_grad=opts.disable_dense_grad_override)

    # Use the bound builder functions to place the model on the IPU:
    with scopes.ipu_scope("/device:IPU:0"):
        train_loop = ipu_compiler.compile(bound_train_loop,
                                          inputs=train_inputs)
        test_loop = ipu_compiler.compile(bound_test_loop)

    # Placeholders can only be created on cpu after all the slots have registered:
    with tf.device("cpu"):
        for fc in fc_layers.values():
            fc.create_placeholders()

    # Create update op on IPU:
    with scopes.ipu_scope("/device:IPU:0"):
        update_representation = build_update_op(fc_layers)

    # Initialisers should go on the CPU:
    with tf.device("cpu"):
        metrics_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                         scope="metrics")
        metrics_initializer = tf.variables_initializer(var_list=metrics_vars)
        saver = tf.train.Saver()

    # Setup and acquire an IPU device:
    utils.move_variable_initialization_to_cpu()
    config = IPUConfig()
    config.auto_select_ipus = 1
    config.floating_point_behaviour.inv = False
    config.floating_point_behaviour.div0 = False
    config.floating_point_behaviour.oflo = False
    config.floating_point_behaviour.esr = True
    config.floating_point_behaviour.nanoo = False
    config.configure_ipu_system()

    # These allow us to retrieve the results of IPU feeds:
    dequeue_test_outfeed = outfeed_test_queue.dequeue()
    dequeue_train_outfeed = outfeed_train_queue.dequeue()

    # Add dense gradient outfeed if we have sparse layers
    dequeue_prune_and_grow_outfeed = None
    if not opts.disable_dense_grad_override and any(
            fc.is_sparse() for fc in fc_layers.values()):
        dequeue_prune_and_grow_outfeed = outfeed_prune_and_grow_queue.dequeue()

    logger.info(
        f"Image shape: {image_shape} Training examples: {num_train} Test examples: {num_test}"
    )
    logger.info(
        f"Epochs: {opts.epochs} Batch-size: {batch_size} Steps-per-epoch: {opts.steps_per_epoch} Batches-per-step: {batches_per_step}"
    )
    total_steps = opts.steps_per_epoch * opts.epochs
    logger.info(f"Total steps: {total_steps}")

    if opts.log:
        # Open log and write header fields:
        log_file = open(opts.log, 'w')
        d1, d2 = opts.densities
        log_file.write(f"Iteration Density_{d1}_{d2}\n")

    if opts.restore:
        logpath = os.path.join(opts.checkpoint_path, opts.restore)
    else:
        logpath = os.path.join(opts.checkpoint_path,
                               datetime.now().strftime("%Y%m%d-%H%M%S"))
    summary_writer = tf.summary.FileWriter(logpath)

    if opts.records_path:
        # Save the first hidden layer's weight mask for later analysis:
        save_weights(opts, 'fc1', fc_layers['fc1'], 0)

    # Run the model:
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(infeed_train_queue.initializer)

        if opts.restore:
            saver.restore(sess, logpath + '/model.ckpt')

        if opts.test_mode in ["all", "training"]:
            logger.info(f"Training...")
            start = opts.start_epoch if opts.restore else 0
            progress = tqdm(
                range(start, opts.epochs),
                bar_format='{desc} Epoch: {n_fmt}/{total_fmt} {bar}')
            for e in progress:
                for i in range(opts.steps_per_epoch):
                    sess.run(metrics_initializer)

                    t1 = time.perf_counter()
                    sess.run(train_loop,
                             feed_dict={lr_placeholder: scheduler(e, opts)})
                    t2 = time.perf_counter()
                    sess_time = t2 - t1
                    batch_time = sess_time / batches_per_step
                    throughput = batch_size / batch_time
                    logger.info(f"Time for sess.run: {sess_time:0.3f} "
                                f"Time per batch: {batch_time:0.6f} "
                                f"Throughput: {throughput}")

                    if opts.single_train_step_only:
                        return

                    train_outputs = sess.run(dequeue_train_outfeed)
                    if opts.pipelining:
                        train_outputs = train_outputs[-1]

                    # Get the last value for all items:
                    for k, v in train_outputs.items():
                        train_outputs[k] = v[-1]
                    logger.debug(f"Train outputs: {train_outputs.keys()}")

                    # Merge prune and grow fetches with last fetches:
                    if dequeue_prune_and_grow_outfeed is not None:
                        png_data = sess.run(dequeue_prune_and_grow_outfeed)
                        for k in png_data:
                            png_data[k] = png_data[k][-1]
                        logger.debug(
                            f"Prune and grow outputs: {png_data.keys()}")

                    steps = 1 + i + e * opts.steps_per_epoch
                    batches_processed = batches_per_step * steps
                    for name, fc in fc_layers.items():
                        if fc.is_sparse():
                            var_name = fc.get_values_var().name
                            logger.info(
                                f"Average weights for layer {name}: {np.mean(png_data[var_name])}"
                            )
                            for slot_name in fc.sparse_slots:
                                logger.info(
                                    f"Average {slot_name} for layer {name} : {np.mean(png_data[slot_name])}"
                                )
                            if i == 0 and e == opts.start_epoch:
                                metainfo = sess.run(fc.get_metainfo_var())
                            else:
                                metainfo = None
                            if not opts.disable_pruning:
                                logger.info(
                                    f"Starting prune and grow for layer {name}"
                                )
                                t0 = time.perf_counter()
                                prune_sched = prune_and_grow(name,
                                                             fc,
                                                             png_data,
                                                             random_gen,
                                                             steps,
                                                             total_steps,
                                                             opts,
                                                             metainfo=metainfo)
                                t1 = time.perf_counter()
                                logger.info(
                                    f"Prune and grow for layer {name} complete in {t1-t0:0.3f} seconds"
                                )
                                logger.info(
                                    f"Pruned proportion: {prune_sched}")
                                if opts.use_wandb:
                                    wandb.log({'Prune Schedule': prune_sched},
                                              commit=False)

                    if opts.log:
                        log_file.write(
                            f"{batches_processed} {train_outputs['acc']}\n")
                    if opts.use_wandb:
                        wandb.log(
                            {
                                'Loss': train_outputs['mean_loss'],
                                'Accuracy': train_outputs['acc'],
                                'Throughput': throughput
                            },
                            commit=True)
                    progress.set_description(
                        f"Loss {train_outputs['mean_loss']:.5f} Accuracy {train_outputs['acc']:.5f}"
                    )

                    # Only need to feed an updated sparsity representation if we are running rig-L:
                    if not opts.disable_pruning:
                        # Merge the feeds needed for all layers:
                        sparse_feed = {}
                        for fc in fc_layers.values():
                            if fc.is_sparse():
                                sparse_feed.update(fc.feed_dict())
                        sess.run(update_representation, feed_dict=sparse_feed)

                if e % opts.checkpoint_freq == 0:
                    logger.info(f"Saving...")
                    saver.save(sess, os.path.join(logpath, 'model.ckpt'))

        if opts.test_mode in ["all", "tests"]:
            logger.info(f"Testing...")
            sess.run(metrics_initializer)
            sess.run(infeed_test_queue.initializer)
            sess.run(test_loop)
            result = sess.run(dequeue_test_outfeed)

            test_loss = result['mean_loss'][-1]
            test_acc = result['acc'][-1]
            logger.info(
                f"Test loss: {test_loss:.8f} Test accuracy: {test_acc:.8f} Name: {opts.log}"
            )
            if opts.use_wandb:
                wandb.run.summary["Test Loss"] = test_loss
                wandb.run.summary["Test Accuracy"] = test_acc
예제 #10
0
파일: din_train.py 프로젝트: inejc/examples
                       default=False,
                       action='store_true',
                       help="set small or large embedding size")
    group.add_argument('--use-ipu-model',
                       default=False,
                       action='store_true',
                       help="use IPU model or not.")
    return parser


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="CTR Model Training in Tensorflow")
    parser = add_model_arguments(parser)
    parser = add_dataset_arguments(parser)
    parser = add_training_arguments(parser)
    parser = logger.add_arguments(parser)
    args, unknown = parser.parse_known_args()
    args = vars(args)

    seed = args['seed']
    if seed is not None:
        tf.compat.v1.set_random_seed(seed)
        np.random.seed(seed)
        random.seed(seed)
        utils.reset_ipu_seed(seed)
    logger.print_setting(args)
    setup_logger(logging.INFO, tf_log)

    train_process(args)
예제 #11
0
파일: train.py 프로젝트: inejc/examples
def set_seeds(seed):
    random.seed(seed)
    # Set other seeds to different values for extra safety
    tf.set_random_seed(random.randint(0, 2**32 - 1))
    np.random.seed(random.randint(0, 2**32 - 1))
    reset_ipu_seed(random.randint(-2**16, 2**16 - 1))
예제 #12
0
def run_mnist(opts):
    if opts.seed is not None:
        utils.reset_ipu_seed(opts.seed)
    random_gen = np.random.default_rng(seed=opts.seed)

    # Use Keras to get the dataset:
    mnist = tf.keras.datasets.mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    # Sizes/shapes for the dataset:
    image_shape = x_train.shape[1:]
    num_pixels = image_shape[0] * image_shape[1]
    batch_size = opts.batch_size
    batch_shape = [batch_size, num_pixels]
    num_train = y_train.shape[0]
    num_test = y_test.shape[0]
    data_shape = [None, num_pixels]
    dtype = tf.float16 if opts.data_type == 'fp16' else tf.float32

    # Flatten the images and cast the labels:
    x_train_flat = x_train.astype(dtype.as_numpy_dtype()).reshape(-1, num_pixels)
    x_test_flat = x_test.astype(dtype.as_numpy_dtype()).reshape(-1, num_pixels)
    y_train = y_train.astype(np.int32)
    y_test = y_test.astype(np.int32)

    # Decide how to split epochs into loops up front:
    batches_per_epoch = num_train // batch_size
    train_batches = (num_train * opts.epochs) // batch_size
    test_batches = num_test // batch_size
    batches_per_step = batches_per_epoch // opts.steps_per_epoch
    if not batches_per_epoch % opts.steps_per_epoch == 0:
        raise ValueError(f"IPU steps per epoch {opts.steps_per_epoch} must divide batches per epoch {batches_per_epoch} exactly.")

    # Create FC layer descriptions:
    fc_layers = create_fc_layers(opts, batch_shape, random_gen)
    for name, fc in fc_layers.items():
        logger.info(f"Layer Config: {name}: {type(fc)}")

    # Put placeholders on the CPU host:
    with tf.device("cpu"):
        place_x = tf.placeholder(dtype=dtype, shape=data_shape, name="input")
        place_y = tf.placeholder(dtype=tf.int32, shape=[None], name="label")
        lr_placeholder = tf.placeholder(dtype, shape=[])

    # Create dataset and IPU feeds:
    dataset = tf.data.Dataset.from_tensor_slices((place_x, place_y))
    dataset = dataset.shuffle(buffer_size=num_train, seed=opts.seed).cache()
    dataset = dataset.repeat().batch(batch_size, drop_remainder=True)
    infeed_train_queue = ipu_infeed_queue.IPUInfeedQueue(
        dataset, feed_name="train_infeed")
    outfeed_train_queue = ipu_outfeed_queue.IPUOutfeedQueue(
        feed_name="train_outfeed_last_itr")
    infeed_test_queue = ipu_infeed_queue.IPUInfeedQueue(
        dataset, feed_name="test_infeed")
    outfeed_test_queue = ipu_outfeed_queue.IPUOutfeedQueue(
        feed_name="test_outfeed")

    # Get optimiser
    opt_cls, opt_kws = build_optimizer(opts.optimizer, opts.optimizer_arg)
    logger.info('Optimiser %s, optimiser keywords %s', opt_cls.__name__, opt_kws)

    # Use function binding to create all the builder functions that are needed:
    bound_train_model = partial(
        model, fc_layers, opts.droprate, lr_placeholder,
        opt_cls, opt_kws, batches_per_step,
        True, outfeed_train_queue)
    bound_train_loop = partial(
        loop_builder, batches_per_step, bound_train_model, infeed_train_queue)
    bound_test_model = partial(
        model, fc_layers, opts.droprate, lr_placeholder,
        opt_cls, opt_kws, batches_per_step,
        False, outfeed_test_queue)
    bound_test_loop = partial(
        loop_builder, test_batches,
        bound_test_model, infeed_test_queue)

    # Use the bound builder functions to place the model on the IPU:
    with scopes.ipu_scope("/device:IPU:0"):
        train_loop = ipu_compiler.compile(bound_train_loop, inputs=[])
        test_loop = ipu_compiler.compile(bound_test_loop, inputs=[])

    # Placeholders can only be created on cpu after all the slots have registered:
    with tf.device("cpu"):
        for fc in fc_layers.values():
            fc.create_placeholders()

    # Create update op on IPU:
    with scopes.ipu_scope("/device:IPU:0"):
        update_representation = build_update_op(fc_layers)

    # Initialisers should go on the CPU:
    with tf.device("cpu"):
        metrics_vars = tf.get_collection(
            tf.GraphKeys.LOCAL_VARIABLES, scope="metrics")
        metrics_initializer = tf.variables_initializer(var_list=metrics_vars)
        saver = tf.train.Saver()

    # Setup and acquire an IPU device:
    config = utils.create_ipu_config()
    config = utils.auto_select_ipus(config, 1)
    utils.configure_ipu_system(config)

    # These allow us to retrieve the results of IPU feeds:
    dequeue_test_outfeed = outfeed_test_queue.dequeue()
    dequeue_train_outfeed = outfeed_train_queue.dequeue()

    logger.info(f"Image shape: {image_shape} Training examples: {num_train} Test examples: {num_test}")
    logger.info(f"Epochs: {opts.epochs} Batch-size: {batch_size} Steps-per-epoch: {opts.steps_per_epoch} Batches-per-step: {batches_per_step}")
    total_steps = opts.steps_per_epoch * opts.epochs
    logger.info(f"Total steps: {total_steps}")

    if opts.log:
        # Open log and write header fields:
        log_file = open(opts.log, 'w')
        d1, d2 = opts.densities
        log_file.write(f"Iteration Density_{d1}_{d2}\n")

    logpath = os.path.join(opts.checkpoint_path, datetime.now().strftime("%Y%m%d-%H%M%S"))
    summary_writer = tf.summary.FileWriter(logpath)

    if opts.records_path:
        # Save the first hidden layer's weight mask for later analysis:
        save_weights(opts, 'fc1', fc_layers['fc1'], 0)

    # Run the model:
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(infeed_train_queue.initializer, feed_dict={
                 place_x: x_train_flat, place_y: y_train})

        if opts.test_mode in ["all", "training"]:
            logger.info(f"Training...")
            progress = tqdm(
                range(opts.epochs), bar_format='{desc} Epoch: {n_fmt}/{total_fmt} {bar}')
            for e in progress:
                for i in range(opts.steps_per_epoch):
                    sess.run(metrics_initializer)
                    # Only need to feed an updated sparsity representation if we are running
                    # a prune and grow algorithm:
                    if not opts.disable_pruning:
                        # Merge the feeds needed for all layers:
                        sparse_feed = {}
                        for fc in fc_layers.values():
                            if fc.is_sparse():
                                sparse_feed.update(fc.feed_dict())
                        sess.run(update_representation, feed_dict=sparse_feed)

                    sess.run(train_loop, feed_dict={lr_placeholder: scheduler(e, opts)})
                    last = sess.run(dequeue_train_outfeed)

                    steps = 1 + i + e*opts.steps_per_epoch
                    batches_processed = batches_per_step*steps
                    for name, fc in fc_layers.items():
                        if fc.is_sparse():
                            logger.info(f"Average weights for layer {name}: {np.mean(last[name+'_non_zeros'][0])}")
                            for slot_name in fc.sparse_slots:
                                logger.info(f"Average {slot_name} for layer {name} : {np.mean(last[name+f'_{slot_name}'][0])}")
                            if not opts.disable_pruning:
                                logger.info(f"Starting prune and grow for layer {name}")
                                t0 = time.perf_counter()
                                prune_and_grow(name, fc, last, random_gen, steps, total_steps, opts)
                                t1 = time.perf_counter()
                                logger.info(f"Prune and grow for layer {name} complete in {t1-t0:0.3f} seconds")

                    if opts.log:
                        log_file.write(f"{batches_processed} {last['acc'][0]}\n")
                    progress.set_description(f"Loss {last['mean_loss'][0]:.5f} Accuracy {last['acc'][0]:.5f}")

            logger.info(f"Saving...")
            saver.save(sess, os.path.join(logpath, 'model.ckpt'))

        if opts.test_mode in ["all", "tests"]:
            test_feed = {}
            for fc in fc_layers.values():
                test_feed.update(fc.feed_dict())

            logger.info(f"Testing...")
            sess.run(metrics_initializer)
            sess.run(infeed_test_queue.initializer, feed_dict={
                     place_x: x_test_flat, place_y: y_test})
            sess.run(test_loop, feed_dict=test_feed)
            result = sess.run(dequeue_test_outfeed)

            test_loss = result['mean_loss'][-1]
            test_acc = result['acc'][-1]
            logger.info(f"Test loss: {test_loss:.8f} Test accuracy: {test_acc:.8f}")
예제 #13
0
def generic_graph(opts):
    data_type = get_tf_datatype(opts)
    graph = tf.Graph()
    with graph.as_default():
        placeholders = {}
        placeholders["learning_rate"] = tf.placeholder(data_type, shape=[])
        uid_embedding, mid_embedding, cat_embedding = id_embedding(
            opts, True, opts['seed'])
        if opts['use_synthetic_data']:
            dataset = get_synthetic_dataset(opts, return_neg=True)
            feed_dict_values = {}
        else:
            dataset, feed_dict_values = get_dataset_embed_from_tensors(
                opts, data_type)
        infeed = ipu_infeed_queue.IPUInfeedQueue(
            dataset,
            feed_name='DIEN_dataset_infeed',
            replication_factor=(opts['replicas']))

        with ipu_scope('/device:IPU:0'):

            def comp_fn():
                def body(total_loss, total_aux_loss, total_accuracy, uids,
                         mids, cats, mid_his, cat_his, mid_mask, target,
                         seqlen, noclk_mids, noclk_cats):
                    prob, loss, aux_loss, accuracy, grad_op = graph_builder(
                        opts,
                        uid_embedding,
                        mid_embedding,
                        cat_embedding,
                        placeholders['learning_rate'],
                        uids,
                        mids,
                        cats,
                        mid_his,
                        cat_his,
                        mid_mask,
                        target,
                        seqlen,
                        noclk_mids,
                        noclk_cats,
                        use_negsampling=True)
                    with tf.control_dependencies([grad_op]):
                        return total_loss + loss, total_aux_loss + aux_loss, total_accuracy + accuracy

                return loops.repeat(opts['batches_per_step'], body,
                                    [tf.constant(0, data_type)] * 3, infeed)

            outputs_train = ipu_compiler.compile(comp_fn, [])
            avg_loss, avg_aux_loss, avg_accuracy = [
                x / opts['batches_per_step'] for x in outputs_train
            ]

        saver = tf.train.Saver()
        utils.move_variable_initialization_to_cpu()
        init = tf.global_variables_initializer()
        if opts['use_ipu_model']:
            os.environ["TF_POPLAR_FLAGS"] = "--use_ipu_model"

    ipu_options = utils.create_ipu_config(
        profiling=False,
        profile_execution=False,
        max_cross_replica_sum_buffer_size=10000000,
        max_inter_ipu_copies_buffer_size=10000000)
    ipu_options = utils.set_recomputation_options(ipu_options,
                                                  allow_recompute=True)
    ipu_options = utils.auto_select_ipus(ipu_options, [opts['replicas']])
    utils.configure_ipu_system(ipu_options)
    utils.reset_ipu_seed(opts['seed'])

    graph_outputs = [avg_loss, avg_aux_loss, avg_accuracy]
    sess = tf.Session(graph=graph)

    return GraphOps(
        sess, init, graph_outputs, placeholders, infeed, saver,
        feed_dict_values), uid_embedding, mid_embedding, cat_embedding