コード例 #1
0
def make_and_run_on_device_benchmark(opts, train=True):
    name = "training" if train else "test"
    logging.info(f"Creating the {name} benchmark for running with a device")
    graph = tf.Graph()

    with graph.as_default():
        ds, num_ds, *_ = make_dataset(opts, use_synthetic_data=False, training=train)
        num_ds = num_ds // opts.batch_size
        infeed = ipu_infeed_queue.IPUInfeedQueue(ds)

        def empty_loop():
            def empty_body(data_infeed):
                return tf.no_op()
            return ipu.loops.repeat(opts.repeat_count, empty_body, [], infeed)

        with ipu.scopes.ipu_scope("/device:IPU:0"):
            benchmark_op = ipu.ipu_compiler.compile(empty_loop, inputs=[])

    with tf.Session(graph=graph) as sess:
        # run a first un-monitored epoch to force compile
        sess.run(benchmark_op)
        times = []
        for _ in range(opts.epochs):
            progress = tqdm.tqdm(range(num_ds // opts.repeat_count))
            for _ in progress:
                t0 = time.perf_counter()
                sess.run(benchmark_op)
                t1 = time.perf_counter()
                times.append(t1 - t0)
        avg_time = np.mean(times)
        token_throughput = opts.source_sequence_length * opts.batch_size * opts.repeat_count / avg_time
        bytes_throughput = token_throughput * 4 / (2**30)

    logging.info(f"On device throughput: {token_throughput:0.2f} tokens/s = {bytes_throughput:0.2f} GB/s")
コード例 #2
0
def main(in_dataset_file, in_model_folder, in_result_file):
    dataset = pd.read_json(in_dataset_file)

    with tf.Session() as sess:
        model, actual_config, vocab, char_vocab, label_vocab = load(
            in_model_folder, sess)
        rev_label_vocab = {
            label_id: label
            for label, label_id in label_vocab.iteritems()
        }
        print 'Done loading'
        X, y = make_dataset(dataset, vocab, label_vocab, actual_config)
        y_pred = predict(model, (X, y), [rev_label_vocab], sess)
    tags_predicted = []
    tag_idx = 0
    for tag_seq in dataset['tags']:
        tags_predicted.append(y_pred[tag_idx:tag_idx + len(tag_seq)])
        tag_idx += len(tag_seq)
    result = pd.DataFrame({
        'utterance': dataset['utterance'],
        'tags_gold': dataset['tags'],
        'tags_predicted': tags_predicted
    })
    result.to_json(in_result_file)
コード例 #3
0
def make_loaders(opt):
    """makes training/val/test"""
    batch_size = opt.batch_size * opt.world_size
    eval_batch_size = opt.eval_batch_size * opt.world_size
    seq_length = opt.seq_length
    if seq_length < 0:
        seq_length = seq_length * opt.world_size
    eval_seq_length = opt.eval_seq_length
    if opt.eval_seq_length < 0:
        eval_seq_length = eval_seq_length * opt.world_size
    # data_loader_args = {'num_workers': 0, 'shuffle': opt.shuffle, 'batch_size': batch_size,
    data_loader_args = {
        'num_workers': 0,
        'shuffle': opt.shuffle,
        'batch_size': batch_size,
        # data_loader_args = {'num_workers': 1, 'shuffle': opt.shuffle, 'batch_size': batch_size,
        'pin_memory': True,
        'transpose': opt.transpose,
        'distributed': opt.world_size > 1,
        'rank': opt.rank,
        'world_size': opt.world_size,
        'drop_last': opt.world_size > 1
    }
    if opt.data_set_type == 'L2R':
        loader_type = data_utils.ShardLoader
        data_loader_args.update({
            'seq_len': seq_length,
            'persist_state': opt.persist_state,
            'samples_per_shard': opt.samples_per_shard
        })
    else:
        loader_type = data_utils.DataLoader
    split = get_split(opt)
    data_set_args = {
        'path': opt.data,
        'seq_length': seq_length,
        'lazy': opt.lazy,
        'delim': opt.delim,
        'text_key': opt.text_key,
        'label_key': opt.label_key,
        'preprocess': opt.preprocess,
        'ds_type': opt.data_set_type,
        'split': split,
        'loose': opt.loose_json,
        'tokenizer_type': opt.tokenizer_type,
        'tokenizer_model_path': opt.tokenizer_path,
        'vocab_size': opt.vocab_size,
        'model_type': opt.tokenizer_model_type,
        'non_binary_cols': opt.non_binary_cols,
        'process_fn': opt.process_fn
    }

    eval_loader_args = copy.copy(data_loader_args)
    eval_set_args = copy.copy(data_set_args)
    eval_set_args['split'] = [1.]
    # if optional eval args were set then replace their equivalent values in the arg dict
    if opt.eval_batch_size != 0:
        eval_loader_args['batch_size'] = eval_batch_size
    if opt.eval_seq_length != 0:
        eval_set_args['seq_length'] = eval_seq_length
        if opt.data_set_type == 'L2R':
            eval_loader_args['seq_len'] = eval_seq_length
    if opt.eval_text_key is not None:
        eval_set_args['text_key'] = opt.eval_text_key
    if opt.eval_label_key is not None:
        eval_set_args['label_key'] = opt.eval_label_key

    train = None
    valid = None
    test = None

    if opt.data is not None:
        train, tokenizer = data_utils.make_dataset(**data_set_args)
        if should_split(split):
            train, valid, test = train
    eval_set_args['tokenizer'] = tokenizer

    if opt.valid is not None:
        eval_set_args['path'] = opt.valid
        valid, _ = data_utils.make_dataset(**eval_set_args)
    if test is None and opt.test is not None:
        eval_set_args['path'] = opt.test
        test, _ = data_utils.make_dataset(**eval_set_args)

    if train is not None and opt.batch_size > 0:
        train = loader_type(train, **data_loader_args)
    if valid is not None:
        valid = loader_type(valid, **eval_loader_args)
    if test is not None:
        test = loader_type(test, **eval_loader_args)
    return (train, valid, test), tokenizer
コード例 #4
0
    logging.info(f"On device throughput: {token_throughput:0.2f} tokens/s = {bytes_throughput:0.2f} GB/s")


if __name__ == "__main__":
    logging.basicConfig(
        level=logging.getLevelName('INFO'),
        format='%(asctime)s %(name)s %(levelname)s %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S')

    # Parse options
    opts = parse_args()

    if not opts.on_device_only:
        logger.info("Creating training dataset, infeed queue and benchmark.")
        # Create training dataset and infeed queue
        train_set, num_train, *_ = make_dataset(opts, use_synthetic_data=False, training=True)
        num_train = num_train // opts.batch_size

        infeed_train_queue = ipu_infeed_queue.IPUInfeedQueue(train_set)
        # Benchmark it
        infeed_perf_train = dataset_benchmark.infeed_benchmark(
            infeed_queue=infeed_train_queue,
            number_of_epochs=opts.epochs,
            elements_per_epochs=num_train,
            print_stats=False)
        ds_perf_train = dataset_benchmark.dataset_benchmark(
            dataset=train_set,
            number_of_epochs=opts.epochs,
            elements_per_epochs=num_train,
            print_stats=False,
            apply_options=True)
コード例 #5
0
def make_loaders(opt):
    """makes training/val/test"""
    batch_size = opt.batch_size * opt.world_size
    eval_batch_size = opt.eval_batch_size * opt.world_size
    seq_length = opt.seq_length
    if seq_length < 0:
        seq_length = seq_length * opt.world_size
    eval_seq_length = opt.eval_seq_length
    if opt.eval_seq_length < 0:
        eval_seq_length = eval_seq_length * opt.world_size
    # TODO: fix data race in lazy loader
    # data_loader_args = {'num_workers': 10, 'shuffle': opt.shuffle, 'batch_size': batch_size,
    data_loader_args = {
        'num_workers': 1,
        'shuffle': opt.shuffle,
        'batch_size': batch_size,
        'pin_memory': True,
        'transpose': opt.transpose,
        'distributed': opt.world_size > 1,
        'rank': opt.rank,
        'world_size': opt.world_size,
        'drop_last': opt.world_size > 1
    }
    split = get_split(opt)
    data_set_args = {
        'path': opt.data,
        'seq_length': seq_length,
        'lazy': opt.lazy,
        'text_key': opt.text_key,
        'label_key': opt.label_key,
        'preprocess': opt.preprocess,
        'persist_state': opt.persist_state,
        'delim': opt.delim,
        'num_shards': opt.num_shards,
        'ds_type': opt.data_set_type,
        'split': split,
        'loose': opt.loose_json
    }
    eval_loader_args = copy.copy(data_loader_args)
    eval_set_args = copy.copy(data_set_args)
    eval_set_args['split'] = [1.]
    # if optional eval args were set then replace their equivalent values in the arg dict
    if opt.eval_batch_size != 0:
        eval_loader_args['batch_size'] = eval_batch_size
    if opt.eval_seq_length != 0:
        eval_set_args['seq_length'] = eval_seq_length
    if opt.eval_text_key != 'None':
        eval_set_args['text_key'] = opt.eval_text_key
    if opt.eval_label_key != 'None':
        eval_set_args['label_key'] = opt.eval_label_key

    train = None
    valid = None
    test = None

    if opt.data != 'None':
        train = data_utils.make_dataset(**data_set_args)
        if should_split(split):
            train, valid, test = train

    if opt.valid != 'None':
        eval_set_args['path'] = opt.valid
        valid = data_utils.make_dataset(**eval_set_args)
    if test is None and opt.test != 'None':
        eval_set_args['path'] = opt.test
        test = data_utils.make_dataset(**eval_set_args)

    if train is not None and opt.batch_size > 0:
        train = data_utils.DataLoader(train, **data_loader_args)
    if valid is not None:
        if opt.data_set_type == 'unsupervised':
            if opt.eval_seq_length != 0:
                valid.set_seq_len(eval_seq_length)
            if opt.val_shards != 0:
                valid.set_num_shards(opt.val_shards)
        valid = data_utils.DataLoader(valid, **eval_loader_args)
    if test is not None:
        if opt.data_set_type == 'unsupervised':
            if opt.eval_seq_length != 0:
                test.set_seq_len(eval_seq_length)
            if opt.test_shards != 0:
                test.set_num_shards(opt.test_shards)
        test = data_utils.DataLoader(test, **eval_loader_args)
    return train, valid, test
コード例 #6
0
def make_loaders(args, tokenizer):
    """makes training/val/test"""

    if args.use_tfrecords:
        return make_tfrecord_loaders(args)
    world_size = torch.distributed.get_world_size(
        group=mpu.get_data_parallel_group())
    if args.loader_scatter is not None:
        assert world_size % args.loader_scatter == 0
    batch_size = args.batch_size * world_size
    eval_batch_size = batch_size
    if args.eval_batch_size is not None:
        eval_batch_size = args.eval_batch_size * world_size
    seq_length = args.seq_length
    if seq_length < 0:
        seq_length = seq_length * world_size
    eval_seq_length = args.eval_seq_length
    if eval_seq_length is not None and eval_seq_length < 0:
        eval_seq_length = eval_seq_length * world_size
    split = get_split(args)
    data_set_args = {
        'path': args.train_data,
        'seq_length': seq_length,
        'mem_length': args.mem_length,
        'delim': args.delim,
        'text_key': args.text_key,
        'label_key': 'label',
        'ds_type': args.data_set_type,
        'split': split,
        'loose': args.loose_json,
        'max_preds_per_seq': args.max_preds_per_seq,
        'presplit_sentences': args.presplit_sentences,
        'sample_one_document': args.sample_one_document,
        'filter_english': args.filter_english,
        'pre_tokenize': not args.no_pre_tokenize,
        'tokenizer': tokenizer,
        'save_splits': args.save_splits,
        'load_splits': args.load_splits,
        'save_test_data': args.save_test_data,
        'no_lazy_loader': args.no_lazy_loader,
        'loader_scatter': args.loader_scatter,
        'data_parallel_rank': mpu.get_data_parallel_rank(),
        "non_sentence_start": args.non_sentence_start,
        "half_lazy_loader": args.half_lazy_loader
    }

    eval_set_args = copy.copy(data_set_args)
    eval_set_args['split'] = [1.]
    # if optional eval args were set then replace their
    # equivalent values in the arg dict
    if eval_seq_length:
        eval_set_args['seq_length'] = eval_seq_length
    if args.eval_max_preds_per_seq:
        eval_set_args['max_preds_per_seq'] = args.eval_max_preds_per_seq
    if args.eval_text_key is not None:
        eval_set_args['text_key'] = args.eval_text_key

    # make datasets splits and tokenizer
    train, valid, test = None, None, None

    if args.train_data is not None:
        train = data_utils.make_dataset(**data_set_args)
        if data_utils.should_split(split):
            train, valid, test = train
        eval_set_args['tokenizer'] = tokenizer

    # make training and val dataset if necessary
    if valid is None and args.valid_data is not None:
        eval_set_args['path'] = args.valid_data
        valid = data_utils.make_dataset(**eval_set_args)
        eval_set_args['tokenizer'] = tokenizer
    if test is None and args.test_data is not None:
        eval_set_args['path'] = args.test_data
        test = data_utils.make_dataset(**eval_set_args)

    # wrap datasets with data loader
    use_block = args.block_lm or args.encoder_decoder

    if train is not None and args.batch_size > 0:
        train = make_data_loader(train,
                                 tokenizer,
                                 batch_size,
                                 args.train_iters,
                                 args,
                                 shuffle=args.shuffle,
                                 block_collate=use_block)
        args.do_train = True
    else:
        args.do_train = False
    eval_batch_size = eval_batch_size if eval_batch_size != 0 else batch_size
    if valid is not None:
        valid = make_data_loader(valid,
                                 tokenizer,
                                 eval_batch_size,
                                 args.train_iters,
                                 args,
                                 shuffle=args.shuffle,
                                 block_collate=use_block)
        args.do_valid = True
    else:
        args.do_valid = False
    if test is not None:
        test = make_data_loader(test,
                                tokenizer,
                                eval_batch_size,
                                len(test) // eval_batch_size + 1,
                                args,
                                shuffle=args.shuffle,
                                block_collate=use_block)
        args.do_test = True
    else:
        args.do_test = False

    return train, valid, test
コード例 #7
0
ファイル: train_sparse.py プロジェクト: WN1695173791/examples
def run_testing(opts, transformer):
    testing_graph = tf.Graph()
    with testing_graph.as_default():
        with tf.device("cpu"):
            logger.info("Creating test dataset")
            dataset, num_test, vocab = data_utils.make_dataset(
                opts,
                use_synthetic_data=opts.use_synthetic_data,
                training=False)

            batch_size = opts.batch_size
            if opts.pipeline:
                batch_size *= opts.gradient_accumulation_count
            batches_per_epoch = num_test // batch_size
            logger.info(f"Effective batch-size (global batch): {batch_size}")

            logger.info("Creating infeed and outfeed queues")
            test_infeed = IPUInfeedQueue(dataset, feed_name="test_infeed")
            test_outfeed = IPUOutfeedQueue(feed_name="test_outfeed")

        # Compile the forward pass for testing
        with scopes.ipu_scope("/device:IPU:0"):
            # Helper function
            def loop_builder(iterations, builder_func, infeed):
                return loops.repeat(iterations, builder_func, [], infeed)

            if opts.pipeline:
                logger.info("Creating pipelined test graph")
                test_loop = partial(forward_pass,
                                    opts,
                                    transformer,
                                    batches_per_epoch,
                                    False,
                                    test_outfeed,
                                    dense_queue=None,
                                    infeed=test_infeed)
            else:
                logger.info("Creating test graph")
                test_loop = partial(forward_pass, opts, transformer,
                                    batches_per_epoch, False, test_outfeed,
                                    None)
                test_loop = partial(loop_builder, batches_per_epoch, test_loop,
                                    test_infeed)
            test_loop = ipu_compiler.compile(test_loop, inputs=[])

        # Metrics
        with tf.device("cpu"):
            metrics_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="metrics")
            metrics_initializer = tf.variables_initializer(
                var_list=metrics_vars)
            saver = tf.train.Saver()

    if opts.restore_epoch is None:
        checkpoint = tf.train.latest_checkpoint(opts.train_checkpoint_path)
    else:
        checkpoint = opts.train_checkpoint_path + "/model_" + str(
            opts.restore_epoch) + ".ckpt"

    with tf.Session(graph=testing_graph) as sess:
        # The sparsity will also  be streamed from the checkpoint
        logger.info("Restoring weights")
        saver.restore(sess, checkpoint)
        sess.run(test_infeed.initializer)
        sess.run(metrics_initializer)

        # Run inference (whole dataset in one session call)
        logger.info("Testing...")
        dt = time.perf_counter()
        sess.run(test_loop)
        dt = time.perf_counter() - dt
        session_outputs = sess.run(test_outfeed.dequeue())[-1]

        # Test set performance
        # Log progress
        nll_loss = session_outputs['nll_loss'][-1]
        training_loss = session_outputs['training_loss'][-1]
        perplexity = session_outputs["perplexity"][-1]
        token_accuracy = session_outputs['token_accuracy'][-1]
        desc = (f"\nTraining loss : {training_loss:.4f}"
                f"\nXentropy loss : {nll_loss:.4f}"
                f"\nPerplexity : {perplexity:.3f}"
                f"\nToken accuracy: {token_accuracy:.2f}")
        logger.info(desc)

        if (opts.decode and opts.log_level == 'INFO'):
            text_pred, text_target = data_utils.decode_prediction(
                prediction=session_outputs['predictions'][-1],
                target=session_outputs['target'][-1],
                vocab=vocab)
            logger.info(f"Target: {text_target}\n"
                        f"Prediction: {text_pred}\n")
        os.sys.stdout.flush()

        logger.info(f"Test complete.")

    return desc
コード例 #8
0
ファイル: train_sparse.py プロジェクト: WN1695173791/examples
def run_training(opts, transformer):
    # Construct the training graph
    training_graph = tf.Graph()
    with training_graph.as_default():
        with tf.device("cpu"):
            dataset, num_train, vocab = data_utils.make_dataset(
                opts,
                use_synthetic_data=opts.use_synthetic_data,
                training=True)

        # Calculate dataset length
        batch_size = opts.batch_size
        if opts.pipeline:
            batch_size *= opts.gradient_accumulation_count
        batches_per_epoch = num_train // batch_size
        io_steps_per_epoch = batches_per_epoch // opts.repeat_count
        total_io_steps = opts.nepochs * io_steps_per_epoch
        total_global_steps = opts.nepochs * io_steps_per_epoch * opts.repeat_count
        logger.info(f"Effective batch-size (global batch): {batch_size}, "
                    f"IO steps per epoch: {io_steps_per_epoch}, "
                    f"Total IO steps: {total_io_steps} "
                    f"Total global steps: {total_global_steps}")

        if opts.prune_ratio is not None and opts.prune_ratio > 0:
            # Compute the pruning ratio when the learning rate will reach a minimum
            lr_decay_steps = opts.cooldown_steps + opts.warmup_steps
            lr_min_epochs = lr_decay_steps / (io_steps_per_epoch *
                                              opts.repeat_count)
            remainining_prune_ratio = opts.prune_ratio * sparse_training.cosine_prune_function(
                lr_decay_steps, total_global_steps, opts.cosine_prune_schedule)
            logger.warn(
                f"\n\nThe learning rate schedule will reach a minimum after {lr_min_epochs:0.2f} epochs, "
                f"at which point the pruning ratio will be {remainining_prune_ratio:0.3f}\n\n"
            )
            logger.info(
                f"Cosine prune schedule options: {opts.cosine_prune_schedule}")

        logger.info("Creating infeed and outfeed queues")
        # Queues for streaming from host to device and back
        train_infeed = IPUInfeedQueue(dataset, feed_name="train_infeed")
        train_outfeed = IPUOutfeedQueue(feed_name="train_outfeed")
        prune_and_grow_outfeed = IPUOutfeedQueue(
            feed_name="prune_and_grow_outfeed")

        # Helper function
        def loop_builder(iterations, builder_func, infeed):
            return loops.repeat(iterations, builder_func, [], infeed)

        # Compile the forward and backward pass for training
        with scopes.ipu_scope("/device:IPU:0"):
            if opts.pipeline:
                logger.info("Creating pipelined training graph")
                train_loop = partial(forward_pass, opts, transformer,
                                     opts.repeat_count, True, train_outfeed,
                                     prune_and_grow_outfeed, train_infeed)
            else:
                logger.info("Creating training graph")
                train_body = partial(forward_pass, opts, transformer,
                                     opts.repeat_count, True, train_outfeed,
                                     prune_and_grow_outfeed)
                train_loop = partial(loop_builder, opts.repeat_count,
                                     train_body, train_infeed)
            train_loop = ipu_compiler.compile(train_loop, inputs=[])
            transformer.buildSparsityUpdateOps()

        # Metrics
        with tf.device("cpu"):
            metrics_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                             scope="metrics")
            metrics_initializer = tf.variables_initializer(
                var_list=metrics_vars)
            saver = tf.train.Saver()

            # These ops are declared here so that the graph can be frozen afterwards
            global_initializer = tf.global_variables_initializer()
            train_outfeed_dequeue = train_outfeed.dequeue()
            if opts.prune_ratio is not None and opts.prune_ratio > 0:
                prune_and_grow_dequeue = prune_and_grow_outfeed.dequeue()
            utils.move_variable_initialization_to_cpu()

            # Tensorboard
            log_name = "logs/" + datetime.now().isoformat()
            summary_writer = tf.summary.FileWriter(logdir=os.path.join(
                opts.train_checkpoint_path, log_name),
                                                   flush_secs=5)

    # Run the model:
    training_graph.finalize()  # no more new ops added from here on out
    with tf.Session(graph=training_graph) as sess:
        logger.info(f"Initializing training session")
        sess.run(global_initializer)
        sess.run(train_infeed.initializer)
        logger.info(f"Training...")
        progress = tqdm(range(opts.nepochs))
        for e in progress:
            sess.run(metrics_initializer)
            for io_step in range(io_steps_per_epoch):
                # Train the model
                step_start_time = time.perf_counter()
                sess.run(train_loop)
                ipu_train_time = time.perf_counter() - step_start_time

                session_outputs = sess.run(train_outfeed_dequeue)[-1]
                logger.debug(f"Train outputs: {session_outputs.keys()}")

                # Calculate avg throughput
                num_tokens = transformer.source_sequence_length * opts.repeat_count * batch_size
                throughput = num_tokens / ipu_train_time

                # Log progress - average stats over the last accumulation step only:
                start_point = -1 if not opts.pipeline else -opts.gradient_accumulation_count
                lr = np.mean(session_outputs["learning_rate"][start_point:])
                training_loss = np.mean(
                    session_outputs['training_loss'][start_point:])
                std_training_loss = np.std(
                    session_outputs['training_loss'][start_point:])
                nll_loss = np.mean(session_outputs['nll_loss'][start_point:])
                perplexity = np.mean(
                    session_outputs["perplexity"][start_point:])
                token_accuracy = np.mean(
                    session_outputs['token_accuracy'][start_point:])
                global_step = session_outputs['global_step'][start_point:][-1]
                logger.info(
                    f"\nEpoch {e}: io_step {io_step+1}/{io_steps_per_epoch}"
                    f"\nGlobal step: {global_step}/{total_global_steps}"
                    f"\nTraining loss : {training_loss:.4f}"
                    f"\nTraining loss standard deviation: {std_training_loss:.4f}"
                    f"\nXentropy loss : {nll_loss:.4f}"
                    f"\nPerplexity : {perplexity:.3f}"
                    f"\nToken accuracy: {token_accuracy:.2f}"
                    f"\nLearning rate: {lr:3.4e}"
                    f"\nThroughput {throughput:.1f} token/s")

                if opts.decode and logger.level <= logging.INFO:
                    try:
                        text_pred, text_target = data_utils.decode_prediction(
                            prediction=session_outputs['predictions'][-1],
                            target=session_outputs['target'][-1],
                            vocab=vocab)
                        logger.info(
                            f"\nTarget: {text_target}\n\nPrediction: {text_pred}\n"
                        )
                    except Exception as ex:
                        logger.warn(f"Decoding failed: {ex}")

                summary_value = [
                    tf.Summary.Value(tag="perplexity",
                                     simple_value=perplexity),
                    tf.Summary.Value(tag="training_loss",
                                     simple_value=training_loss),
                    tf.Summary.Value(tag="stddev_training_loss",
                                     simple_value=std_training_loss),
                    tf.Summary.Value(tag="xentropy_loss",
                                     simple_value=nll_loss),
                    tf.Summary.Value(tag="token_accuracy",
                                     simple_value=token_accuracy),
                    tf.Summary.Value(tag="learning_rate", simple_value=lr),
                    tf.Summary.Value(tag="throughput",
                                     simple_value=throughput),
                    tf.Summary.Value(tag="epoch", simple_value=e)
                ]

                # If we just completed the last io step we do not
                # prune and grow regardless, otherwise check the prune ratio:
                if io_step + 1 < io_steps_per_epoch and transformer.prune_ratio is not None and transformer.prune_ratio > 0:
                    # Retrieve p and g results from the conditional queue:
                    prune_and_grow_data = sess.run(prune_and_grow_dequeue)
                    for k in prune_and_grow_data:
                        prune_and_grow_data[k] = prune_and_grow_data[k][-1]
                    logger.debug(
                        f"Prune and grow outputs: {prune_and_grow_data.keys()}"
                    )

                    prune_and_grow_time, cosine_schedule_factor = transformer.syncPruneAndRegrowOnHost(
                        opts.cosine_prune_schedule, global_step,
                        total_global_steps, prune_and_grow_data)
                    transformer.streamSparsityFromHostToDevice()
                    summary_value.extend([
                        tf.Summary.Value(tag="prune+grow_time",
                                         simple_value=prune_and_grow_time),
                        tf.Summary.Value(tag="cosine_schedule_factor",
                                         simple_value=cosine_schedule_factor)
                    ])

                    for layer_name, sparse_layer in transformer.sparse_layers.items(
                    ):
                        values_var = sparse_layer.get_values_var()
                        grad_w_name = values_var.name.replace(
                            'nz_values:0', 'grad_w')
                        grad_w = np.array(prune_and_grow_data[grad_w_name])
                        if (opts.log_histograms):
                            histogram = tf_utils.make_histogram_proto(
                                grad_w, bins_count=opts.bins_count)
                            summary_value.extend([
                                tf.Summary.Value(tag=layer_name +
                                                 "/dense_grad_w",
                                                 histo=histogram)
                            ])

                        summary_value.extend([
                            tf.Summary.Value(tag=layer_name +
                                             "/dense_grad_w_stddev",
                                             simple_value=np.std(grad_w)),
                            tf.Summary.Value(tag=layer_name +
                                             "/dense_grad_w_mean",
                                             simple_value=np.mean(grad_w)),
                            tf.Summary.Value(tag=layer_name +
                                             "/dense_grad_w_min",
                                             simple_value=np.min(grad_w)),
                            tf.Summary.Value(tag=layer_name +
                                             "/dense_grad_w_max",
                                             simple_value=np.max(grad_w))
                        ])

                        for slot_name, slot in sparse_layer.get_slot_var_dict(
                        ).items():
                            slot_val = prune_and_grow_data[
                                slot.tf_variable.name]
                            if opts.log_histograms:
                                histogram = tf_utils.make_histogram_proto(
                                    slot_val, bins_count=opts.bins_count)
                                summary_value.extend([
                                    tf.Summary.Value(tag=slot_name,
                                                     histo=histogram)
                                ])
                            summary_value.extend([
                                tf.Summary.Value(
                                    tag=slot_name + "/stddev",
                                    simple_value=np.std(slot_val)),
                                tf.Summary.Value(
                                    tag=slot_name + "/mean",
                                    simple_value=np.mean(slot_val)),
                                tf.Summary.Value(
                                    tag=slot_name + "/min",
                                    simple_value=np.min(slot_val)),
                                tf.Summary.Value(tag=slot_name + "/max",
                                                 simple_value=np.max(slot_val))
                            ])

                # Log to tensorboard (outside any graph)
                summary = tf.Summary(value=summary_value)
                summary_writer.add_summary(summary, np.mean(global_step))
                if opts.use_wandb:
                    wandb.tensorflow.log(summary.SerializeToString())
                logger.info(
                    f"Total time for step {time.perf_counter() - step_start_time}"
                )
                logger.info(f"IPU train time for step {ipu_train_time}")

            logger.info(f"Saving model after epoch {e}")
            saver.save(
                sess,
                os.path.join(opts.train_checkpoint_path,
                             'model_' + str(e) + '.ckpt'))
            os.sys.stdout.flush()
        logger.info(f"Training complete.")
コード例 #9
0
def make_loaders(args):
    """makes training/val/test"""

    if args.use_tfrecords:
        return make_tfrecord_loaders(args)
    world_size = torch.distributed.get_world_size(
        group=mpu.get_data_parallel_group())
    batch_size = args.batch_size * world_size
    eval_batch_size = batch_size
    if args.eval_batch_size is not None:
        eval_batch_size = args.eval_batch_size * world_size
    seq_length = args.seq_length
    if seq_length < 0:
        seq_length = seq_length * world_size
    eval_seq_length = args.eval_seq_length
    if eval_seq_length is not None and eval_seq_length < 0:
        eval_seq_length = eval_seq_length * world_size
    split = get_split(args)
    data_set_args = {
        'local_rank': args.local_rank,
        'path': args.train_data,
        'seq_length': seq_length,
        'mem_length': args.mem_length,
        'lazy': args.lazy_loader,
        'xl_style': args.transformer_xl,
        'delim': args.delim,
        'text_key': args.text_key,
        'label_key': 'label',
        'non_binary_cols': None,
        'ds_type': args.data_set_type,
        'split': split,
        'loose': args.loose_json,
        'tokenizer_type': args.tokenizer_type,
        'tokenizer_model_path': args.tokenizer_path,
        'vocab_size': args.vocab_size,
        'model_type': args.tokenizer_model_type,
        'cache_dir': args.cache_dir,
        'max_preds_per_seq': args.max_preds_per_seq,
        'presplit_sentences': args.presplit_sentences,
        'sample_one_document': args.sample_one_document,
        'pre_tokenize': not args.not_pre_tokenize
    }

    eval_set_args = copy.copy(data_set_args)
    eval_set_args['split'] = [1.]
    # if optional eval args were set then replace their
    # equivalent values in the arg dict
    if eval_seq_length:
        eval_set_args['seq_length'] = eval_seq_length
    if args.eval_max_preds_per_seq:
        eval_set_args['max_preds_per_seq'] = args.eval_max_preds_per_seq
    if args.eval_text_key is not None:
        eval_set_args['text_key'] = args.eval_text_key

    # make datasets splits and tokenizer
    train = None
    valid = None
    test = None

    if args.train_data is not None:
        train, tokenizer = data_utils.make_dataset(**data_set_args)
        if data_utils.should_split(split):
            train, valid, test = train
        eval_set_args['tokenizer'] = tokenizer

    # make training and val dataset if necessary
    if valid is None and args.valid_data is not None:
        eval_set_args['path'] = args.valid_data
        valid, tokenizer = data_utils.make_dataset(**eval_set_args)
        eval_set_args['tokenizer'] = tokenizer
    if test is None and args.test_data is not None:
        eval_set_args['path'] = args.test_data
        test, tokenizer = data_utils.make_dataset(**eval_set_args)

    # wrap datasets with data loader
    if train is not None and args.batch_size > 0:
        train = make_data_loader(train, batch_size, args)
        args.do_train = True
    else:
        args.do_train = False
    eval_batch_size = eval_batch_size if eval_batch_size != 0 else batch_size
    if valid is not None:
        valid = make_data_loader(valid, eval_batch_size, args)
        args.do_valid = True
    else:
        args.do_valid = False
    if test is not None:
        test = make_data_loader(test, eval_batch_size, args)
        args.do_test = True
    else:
        args.do_test = False

    return (train, valid, test), tokenizer
コード例 #10
0
def train(n_epochs=10):
    data_file = '../data/train-stanford-raw.conll'
    # if vocab_file is given (ie for pretrained wordvectors), use x2i and i2x from this file.
    # If not given, create new vocab file in ../data
    vocab_file = None

    log_folder = '../logs'
    model_folder = '../models'
    model_name = 'wsj_3'

    model_file = os.path.join(model_folder, model_name + '_{}.model')
    log_file = open(os.path.join(log_folder, model_name + '.csv'), 'w', 1)
    print('epoch,train_loss,val_loss,arc_acc,lab_acc', file=log_file)

    batch_size = 64
    prints_per_epoch = 10
    n_epochs *= prints_per_epoch

    # load data
    print('loading data...')
    data, x2i, i2x = make_dataset(data_file)

    if not vocab_file:
        with open('../data/vocab_{}.pkl'.format(model_name), 'wb') as f:
            pickle.dump((x2i, i2x), f)

    # make train and val batch loaders
    train_data, val_data = split_train_test(data)
    print('# train sentences', len(train_data))
    print('# val sentences', len(val_data))
    train_loader = batch_loader(train_data, batch_size)
    val_loader = batch_loader(val_data, batch_size, shuffle=False)

    print('creating model...')
    # make model
    model = BiAffineParser(word_vocab_size=len(x2i['word']),
                           word_emb_dim=100,
                           pos_vocab_size=len(x2i['tag']),
                           pos_emb_dim=28,
                           emb_dropout=0.33,
                           lstm_hidden=512,
                           lstm_depth=3,
                           lstm_dropout=.33,
                           arc_hidden=256,
                           arc_depth=1,
                           arc_dropout=.33,
                           arc_activation='ReLU',
                           lab_hidden=128,
                           lab_depth=1,
                           lab_dropout=.33,
                           lab_activation='ReLU',
                           n_labels=len(x2i['label']))
    print(model)
    model.cuda()
    base_params, arc_params, lab_params = model.get_param_groups()

    opt = Adam([
        {
            'params': base_params,
            'lr': 2e-3
        },
        {
            'params': arc_params,
            'lr': 2e-3
        },
        {
            'params': lab_params,
            'lr': 1e-4
        },
    ],
               betas=[.9, .9])
    sched = ReduceLROnPlateau(opt,
                              threshold=1e-3,
                              patience=8,
                              factor=.4,
                              verbose=True)

    n_train_batches = int(len(train_data) / batch_size)
    n_val_batches = int(len(val_data) / batch_size)
    batches_per_epoch = int(n_train_batches / prints_per_epoch)

    for epoch in range(n_epochs):
        t0 = time.time()

        # Training
        train_loss = 0
        model.train()
        for i in range(batches_per_epoch):
            opt.zero_grad()

            # Load batch
            words, tags, arcs, lengths = next(train_loader)
            words = words.cuda()
            tags = tags.cuda()

            # Forward
            S_arc, S_lab = model(words, tags, lengths=lengths)

            # Calculate loss
            arc_loss = get_arc_loss(S_arc, arcs)
            lab_loss = get_label_loss(S_lab, arcs)
            loss = arc_loss + .025 * lab_loss
            train_loss += arc_loss.data[0] + lab_loss.data[0]

            # Backward
            loss.backward()
            opt.step()

        train_loss /= batches_per_epoch

        # Evaluation
        val_loss = 0
        arc_acc = 0
        lab_acc = 0
        model.eval()
        for i in range(n_val_batches):
            words, tags, arcs, lengths = next(val_loader)
            words = words.cuda()
            tags = tags.cuda()

            S_arc, S_lab = model(words, tags, lengths=lengths)

            arc_loss = get_arc_loss(S_arc, arcs)
            lab_loss = get_label_loss(S_lab, arcs)
            loss = arc_loss + lab_loss

            val_loss += arc_loss.data[0] + lab_loss.data[0]
            arc_acc += get_arc_accuracy(S_arc, arcs)
            lab_acc += get_label_accuracy(S_lab, arcs)

        val_loss /= n_val_batches
        arc_acc /= n_val_batches
        lab_acc /= n_val_batches
        epoch_time = time.time() - t0

        print(
            'epoch {:.1f}\t train_loss {:.3f}\t val_loss {:.3f}\t arc_acc {:.3f}\t lab_acc {:.3f}\t time {:.1f} sec'
            .format(epoch / prints_per_epoch, train_loss, val_loss, arc_acc,
                    lab_acc, epoch_time),
            end="\r")

        print('{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}'.format(
            epoch / prints_per_epoch, train_loss, val_loss, arc_acc, lab_acc),
              file=log_file)

        sched.step(val_loss)

    print('Done!')
    torch.save(model, model_file.format(val_loss))
    log_file.close()
コード例 #11
0
def make_loaders(args):
    """makes training/val/test"""

    if args.use_tfrecords:
        return make_tfrecord_loaders(args)
    batch_size = args.batch_size * args.world_size
    eval_batch_size = batch_size
    if args.eval_batch_size is not None:
        eval_batch_size = args.eval_batch_size * args.world_size
    seq_length = args.seq_length
    if seq_length < 0:
        seq_length = seq_length * args.world_size
    eval_seq_length = args.eval_seq_length
    if eval_seq_length is not None and eval_seq_length < 0:
        eval_seq_length = eval_seq_length * args.world_size
    split = get_split(args)
    data_set_args = {
        'path': args.train_data,
        'seq_length': seq_length,
        'lazy': args.lazy_loader,
        'delim': args.delim,
        'text_key': args.text_key,
        'label_key': 'label',
        'non_binary_cols': None,
        'ds_type': args.data_set_type,
        'split': split,
        'loose': args.loose_json,
        'tokenizer_type': args.tokenizer_type,
        'tokenizer_model_path': args.tokenizer_path,
        'vocab_size': args.vocab_size,
        'model_type': args.tokenizer_model_type,
        'cache_dir': args.cache_dir,
        'max_preds_per_seq': args.max_preds_per_seq
    }

    eval_set_args = copy.copy(data_set_args)
    eval_set_args['split'] = [1.]
    # if optional eval args were set then replace their
    # equivalent values in the arg dict
    if eval_seq_length:
        eval_set_args['seq_length'] = eval_seq_length
    if args.eval_max_preds_per_seq:
        eval_set_args['max_preds_per_seq'] = args.eval_max_preds_per_seq
    if args.eval_text_key is not None:
        eval_set_args['text_key'] = args.eval_text_key

    # make datasets splits and tokenizer
    train = None
    valid = None
    test = None

    if args.train_data is not None:
        train, tokenizer = data_utils.make_dataset(**data_set_args)
        if data_utils.should_split(split):
            train, valid, test = train
    eval_set_args['tokenizer'] = tokenizer

    # make training and val dataset if necessary
    if valid is None and args.valid_data is not None:
        eval_set_args['path'] = args.valid_data
        valid, _ = data_utils.make_dataset(**eval_set_args)
    if test is None and args.test_data is not None:
        eval_set_args['path'] = args.test_data
        test, _ = data_utils.make_dataset(**eval_set_args)

    # wrap datasets with data loader
    if train is not None and args.batch_size > 0:
        train = make_data_loader(train, batch_size, args)
    eval_batch_size = eval_batch_size if eval_batch_size != 0 else batch_size
    if valid is not None:
        valid = make_data_loader(valid, eval_batch_size, args)
    if test is not None:
        test = make_data_loader(test, eval_batch_size, args)

    return (train, valid, test), tokenizer