예제 #1
0
    BUCKETS = 4

    # get_model = get_permute_model
    # get_model = get_basic_modele
    get_model = get_selective_model
    # get_model = get_bowv_model

    CONCAT_SENTENCES = True
    if get_model == get_selective_model or get_model == get_bowv_model:
        CONCAT_SENTENCES = False

    from jtr.preprocess.vocab import NeuralVocab
    from jtr.load.embeddings.embeddings import load_embeddings

    if USE_PRETRAINED_EMBEDDINGS:
        embeddings = load_embeddings('./data/TBD/GloVe/glove.6B.100d.txt',
                                     'glove')
        #embeddings = load_embeddings('./jtr/data/word2vec/GoogleNews-vectors-negative300.bin.gz', 'glove')
        emb = embeddings.get
    else:
        emb = None
    vocab = Vocab(emb=emb)

    print('loading corpus..')
    if DEBUG:
        train_corpus = load_corpus("debug", USE_PERMUTATION_INDEX)
        dev_corpus = load_corpus("debug", USE_PERMUTATION_INDEX)
        test_corpus = load_corpus("debug", USE_PERMUTATION_INDEX)
    else:
        train_corpus = load_corpus("train", USE_PERMUTATION_INDEX)
        dev_corpus = load_corpus("dev", USE_PERMUTATION_INDEX)
        test_corpus = load_corpus("test", USE_PERMUTATION_INDEX)
예제 #2
0
    if DEBUG:
        train_data = load("./jtr/data/SNLI/snli_1.0/snli_1.0_train.jsonl",
                          DEBUG_EXAMPLES)
        dev_data = train_data
        test_data = train_data
    else:
        train_data, dev_data, test_data = [load("./jtr/data/SNLI/snli_1.0/snli_1.0_%s.jsonl" % name)\
                                           for name in ["train", "dev", "test"]]

    print(train_data)

    print('loaded train/dev/test data')
    if pretrain:
        if DEBUG:
            emb_file = 'glove.6B.50d.txt'
            embeddings = load_embeddings(
                path.join('jtr', 'data', 'GloVe', emb_file), 'glove')
        else:
            #emb_file = 'GoogleNews-vectors-negative300.bin.gz'
            #embeddings = load_embeddings(path.join('jtr', 'data', 'word2vec', emb_file),'word2vec')
            emb_file = 'glove.840B.300d.zip'
            embeddings = load_embeddings(
                path.join('jtr', 'data', 'GloVe', emb_file), 'glove')
        print('loaded pre-trained embeddings')

    emb = embeddings.get if pretrain else None

    checkpoint()
    print('encode train data')
    train_data, train_vocab, train_target_vocab = pipeline(train_data,
                                                           emb=emb,
                                                           normalize=True)
예제 #3
0
def main():
    support_alts = {'none', 'single', 'multiple'}
    question_alts = answer_alts = {'single', 'multiple'}
    candidate_alts = {'open', 'per-instance', 'fixed'}

    train_default = 'tests/test_data/SNLI/train.json'
    dev_default = 'tests/test_data/SNLI/dev.json'
    test_default = 'tests/test_data/SNLI/test.json'

    parser = argparse.ArgumentParser(
        description='Train and Evaluate a Machine Reader',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '--debug',
        action='store_true',
        help=
        "Run in debug mode, in which case the training file is also used for testing"
    )

    parser.add_argument(
        '--debug_examples',
        default=10,
        type=int,
        help="If in debug mode, how many examples should be used (default 2000)"
    )
    parser.add_argument('--train',
                        default=train_default,
                        type=str,
                        help="jtr training file")
    parser.add_argument('--dev',
                        default=dev_default,
                        type=str,
                        help="jtr dev file")
    parser.add_argument('--test',
                        default=test_default,
                        type=str,
                        help="jtr test file")
    parser.add_argument(
        '--supports',
        default='single',
        choices=sorted(support_alts),
        help=
        "None, single (default) or multiple supporting statements per instance; multiple_flat reads multiple instances creates a separate instance for every support"
    )
    parser.add_argument(
        '--questions',
        default='single',
        choices=sorted(question_alts),
        help="None, single (default), or multiple questions per instance")
    parser.add_argument(
        '--candidates',
        default='fixed',
        choices=sorted(candidate_alts),
        help="Open, per-instance, or fixed (default) candidates")
    parser.add_argument('--answers',
                        default='single',
                        choices=sorted(answer_alts),
                        help="Single or multiple")
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help="Batch size for training data, default 128")
    parser.add_argument('--dev_batch_size',
                        default=128,
                        type=int,
                        help="Batch size for development data, default 128")
    parser.add_argument(
        '--repr_dim_input',
        default=128,
        type=int,
        help=("Size of the input representation (embeddings),",
              "default 128 (embeddings cut off or extended if not",
              "matched with pretrained embeddings)"))
    parser.add_argument('--repr_dim',
                        default=128,
                        type=int,
                        help="Size of the hidden representations, default 128")

    parser.add_argument(
        '--pretrain',
        action='store_true',
        help=
        "Use pretrained embeddings, by default the initialisation is random")
    parser.add_argument(
        '--with_char_embeddings',
        action='store_true',
        help="Use also character based embeddings in readers which support it."
    )
    parser.add_argument('--vocab_from_embeddings',
                        action='store_true',
                        help="Use fixed vocab of pretrained embeddings")
    parser.add_argument(
        '--train_pretrain',
        action='store_true',
        help=
        "Continue training pretrained embeddings together with model parameters"
    )
    parser.add_argument(
        '--normalize_pretrain',
        action='store_true',
        help=
        "Normalize pretrained embeddings, default True (randomly initialized embeddings have expected unit norm too)"
    )

    parser.add_argument('--embedding_format',
                        default='word2vec',
                        choices=["glove", "word2vec"],
                        help="format of embeddings to be loaded")
    parser.add_argument(
        '--embedding_file',
        default='jtr/data/SG_GoogleNews/GoogleNews-vectors-negative300.bin.gz',
        type=str,
        help="format of embeddings to be loaded")

    parser.add_argument('--vocab_maxsize', default=sys.maxsize, type=int)
    parser.add_argument('--vocab_minfreq', default=2, type=int)
    parser.add_argument(
        '--vocab_sep',
        default=True,
        type=bool,
        help=
        'Should there be separate vocabularies for questions, supports, candidates and answers. This needs to be set to True for candidate-based methods.'
    )
    parser.add_argument('--model',
                        default='snli_reader',
                        choices=sorted(readers.readers.keys()),
                        help="Reading model to use")
    parser.add_argument('--learning_rate',
                        default=0.001,
                        type=float,
                        help="Learning rate, default 0.001")
    parser.add_argument('--learning_rate_decay',
                        default=0.5,
                        type=float,
                        help="Learning rate decay, default 0.5")
    parser.add_argument('--l2',
                        default=0.0,
                        type=float,
                        help="L2 regularization weight, default 0.0")
    parser.add_argument(
        '--clip_value',
        default=0.0,
        type=float,
        help=
        "Gradients clipped between [-clip_value, clip_value] (default 0.0; no clipping)"
    )
    parser.add_argument(
        '--dropout',
        default=0.0,
        type=float,
        help="Probability for dropout on output (set to 0.0 for no dropout)")
    parser.add_argument('--epochs',
                        default=5,
                        type=int,
                        help="Number of epochs to train for, default 5")
    parser.add_argument('--checkpoint',
                        default=None,
                        type=int,
                        help="Number of batches before evaluation on devset.")

    parser.add_argument(
        '--negsamples',
        default=0,
        type=int,
        help="Number of negative samples, default 0 (= use full candidate list)"
    )
    parser.add_argument('--tensorboard_folder',
                        default=None,
                        help='Folder for tensorboard logs')
    parser.add_argument('--write_metrics_to',
                        default=None,
                        type=str,
                        help='Filename to log the metrics of the EvalHooks')
    parser.add_argument(
        '--prune',
        default='False',
        help='If the vocabulary should be pruned to the most frequent words.')
    parser.add_argument('--model_dir',
                        default='/tmp/jtreader',
                        type=str,
                        help="Directory to write reader to.")
    parser.add_argument('--out_pred',
                        default='out_pred.txt',
                        type=str,
                        help="File to write predictions to.")
    parser.add_argument('--log_interval',
                        default=100,
                        type=int,
                        help="interval for logging eta, training loss, etc.")
    parser.add_argument('--device',
                        default='/cpu:0',
                        type=str,
                        help='device setting for tensorflow')
    parser.add_argument('--lowercase',
                        action='store_true',
                        help='lowercase texts.')
    parser.add_argument('--seed', default=325, type=int, help="Seed for rngs.")
    parser.add_argument('--answer_size',
                        default=3,
                        type=int,
                        help=("How many answer does the output have. Used for "
                              "classification."))
    parser.add_argument(
        '--max_support_length',
        default=-1,
        type=int,
        help=
        "How large the support should be. Can be used for cutting or filtering QA examples."
    )

    args = parser.parse_args()

    # make everything deterministic
    random.seed(args.seed)
    tf.set_random_seed(args.seed)

    clip_value = None
    if args.clip_value != 0.0:
        clip_value = -abs(args.clip_value), abs(args.clip_value)

    logger.info('configuration:')
    for arg in vars(args):
        logger.info('\t{} : {}'.format(str(arg), str(getattr(args, arg))))

    # Get information about available CPUs and GPUs:
    # to set specific device, add CUDA_VISIBLE_DEVICES environment variable, e.g.
    # $ CUDA_VISIBLE_DEVICES=0 ./jtr_script.py

    logger.info('available devices:')
    for device in device_lib.list_local_devices():
        logger.info('device info: ' + str(device).replace("\n", " "))

    if args.debug:
        train_data = load_labelled_data(args.train, args.debug_examples,
                                        **vars(args))

        logger.info(
            'loaded {} samples as debug train/dev/test dataset '.format(
                args.debug_examples))

        dev_data = train_data
        test_data = train_data
        if args.pretrain:
            emb_file = 'glove.6B.50d.txt'
            embeddings = load_embeddings(path.join('data', 'GloVe', emb_file),
                                         'glove')
            logger.info('loaded pre-trained embeddings ({})'.format(emb_file))
            args.repr_dim_input = embeddings.lookup.shape[1]
        else:
            embeddings = Embeddings(None, None)
    else:
        train_data, dev_data = [
            load_labelled_data(name, **vars(args))
            for name in [args.train, args.dev]
        ]
        test_data = load_labelled_data(args.test, **
                                       vars(args)) if args.test else None
        logger.info('loaded train/dev/test data')
        if args.pretrain:
            embeddings = load_embeddings(args.embedding_file,
                                         args.embedding_format)
            logger.info('loaded pre-trained embeddings ({})'.format(
                args.embedding_file))
            args.repr_dim_input = embeddings.lookup.shape[1]
        else:
            embeddings = Embeddings(None, None)

    emb = embeddings

    vocab = Vocab(emb=emb, init_from_embeddings=args.vocab_from_embeddings)

    with tf.device(args.device):
        # build JTReader
        checkpoint()
        reader = readers.readers[args.model](vocab, vars(args))
        checkpoint()

        learning_rate = tf.get_variable("learning_rate",
                                        initializer=args.learning_rate,
                                        dtype=tf.float32,
                                        trainable=False)
        lr_decay_op = learning_rate.assign(args.learning_rate_decay *
                                           learning_rate)
        optim = tf.train.AdamOptimizer(learning_rate)

        if args.tensorboard_folder is not None:
            if os.path.exists(args.tensorboard_folder):
                shutil.rmtree(args.tensorboard_folder)
            sw = tf.summary.FileWriter(args.tensorboard_folder)
        else:
            sw = None

        # Hooks
        iter_interval = 1 if args.debug else args.log_interval
        hooks = [
            LossHook(reader, iter_interval, summary_writer=sw),
            ExamplesPerSecHook(reader, args.batch_size, iter_interval, sw),
            ETAHook(reader, iter_interval,
                    math.ceil(len(train_data) / args.batch_size), args.epochs,
                    args.checkpoint, sw)
        ]

        preferred_metric, best_metric = readers.eval_hooks[
            args.model].preferred_metric_and_best_score()

        def side_effect(metrics, prev_metric):
            """Returns: a state (in this case a metric) that is used as input for the next call"""
            m = metrics[preferred_metric]
            if prev_metric is not None and m < prev_metric:
                reader.sess.run(lr_decay_op)
                logger.info("Decayed learning rate to: %.5f" %
                            reader.sess.run(learning_rate))
            elif m > best_metric[0]:
                best_metric[0] = m
                if prev_metric is None:  # store whole model only at beginning of training
                    reader.store(args.model_dir)
                else:
                    reader.model_module.store(
                        reader.sess,
                        os.path.join(args.model_dir, "model_module"))
                logger.info("Saving model to: %s" % args.model_dir)
            return m

        # this is the standard hook for the model
        hooks.append(readers.eval_hooks[args.model](
            reader,
            dev_data,
            summary_writer=sw,
            side_effect=side_effect,
            iter_interval=args.checkpoint,
            epoch_interval=(1 if args.checkpoint is None else None),
            write_metrics_to=args.write_metrics_to))

        # Train
        reader.train(optim,
                     training_set=train_data,
                     max_epochs=args.epochs,
                     hooks=hooks,
                     l2=args.l2,
                     clip=clip_value,
                     clip_op=tf.clip_by_value,
                     device=args.device)

        # Test final model
        if test_data is not None:
            test_eval_hook = readers.eval_hooks[args.model](
                reader,
                test_data,
                summary_writer=sw,
                epoch_interval=1,
                write_metrics_to=args.write_metrics_to)

            reader.load(args.model_dir)
            test_eval_hook.at_test_time(1)
def main():
    t0 = time()

    # (1) Defined JTR models
    # Please add new models to models.__models__ when they work
    reader_models = {
        model_name: models.get_function(model_name)
        for model_name in models.__models__
    }

    support_alts = {'none', 'single', 'multiple'}
    question_alts = answer_alts = {'single', 'multiple'}
    candidate_alts = {'open', 'per-instance', 'fixed'}

    train_default = dev_default = test_default = '../tests/test_data/sentihood/overfit.json'

    # (2) Parse the input arguments
    parser = argparse.ArgumentParser(
        description='Train and Evaluate a Machine Reader')

    parser.add_argument(
        '--debug',
        action='store_true',
        help=
        "Run in debug mode, in which case the training file is also used for testing"
    )
    parser.add_argument(
        '--debug_examples',
        default=10,
        type=int,
        help="If in debug mode, how many examples should be used (default 2000)"
    )
    parser.add_argument('--train',
                        default=train_default,
                        type=argparse.FileType('r'),
                        help="jtr training file")
    parser.add_argument('--dev',
                        default=dev_default,
                        type=argparse.FileType('r'),
                        help="jtr dev file")
    parser.add_argument('--test',
                        default=test_default,
                        type=argparse.FileType('r'),
                        help="jtr test file")
    parser.add_argument(
        '--supports',
        default='single',
        choices=sorted(support_alts),
        help=
        "None, single (default) or multiple supporting statements per instance; "
        "multiple_flat reads multiple instances creates a separate instance for every support"
    )
    parser.add_argument(
        '--questions',
        default='single',
        choices=sorted(question_alts),
        help="None, single (default), or multiple questions per instance")
    parser.add_argument(
        '--candidates',
        default='fixed',
        choices=sorted(candidate_alts),
        help="Open, per-instance, or fixed (default) candidates")
    parser.add_argument('--answers',
                        default='single',
                        choices=sorted(answer_alts),
                        help="Single or multiple")
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help="Batch size for training data, default 128")
    parser.add_argument('--dev_batch_size',
                        default=128,
                        type=int,
                        help="Batch size for development data, default 128")
    parser.add_argument(
        '--repr_dim_input',
        default=300,
        type=int,
        help="Size of the input representation (embeddings),"
        "default 100 (embeddings cut off or extended if not matched with pretrained embeddings)"
    )
    parser.add_argument(
        '--repr_dim_input_trf',
        default=100,
        type=int,
        help=
        "Size of the input embeddings after reducing with fully_connected layer (default 100)"
    )
    parser.add_argument('--repr_dim_output',
                        default=100,
                        type=int,
                        help="Size of the output representation, default 100")

    parser.add_argument(
        '--pretrain',
        action='store_true',
        help=
        "Use pretrained embeddings, by default the initialisation is random")
    parser.add_argument(
        '--train_pretrain',
        action='store_true',
        help=
        "Continue training pretrained embeddings together with model parameters"
    )
    parser.add_argument(
        '--normalize_pretrain',
        action='store_true',
        help="Normalize pretrained embeddings, default False "
        "(randomly initialized embeddings have expected unit norm too)")

    parser.add_argument('--vocab_maxsize', default=sys.maxsize, type=int)
    parser.add_argument('--vocab_minfreq', default=2, type=int)
    parser.add_argument(
        '--vocab_sep',
        default=True,
        type=bool,
        help='Should there be separate vocabularies for questions and supports, '
        'vs. candidates and answers. This needs to be set to True for candidate-based methods.'
    )
    parser.add_argument('--model',
                        default='bicond_singlesupport_reader',
                        choices=sorted(reader_models.keys()),
                        help="Reading model to use")
    parser.add_argument('--learning_rate',
                        default=0.001,
                        type=float,
                        help="Learning rate, default 0.001")
    parser.add_argument('--l2',
                        default=0.0,
                        type=float,
                        help="L2 regularization weight, default 0.0")
    parser.add_argument(
        '--clip_value',
        default=None,
        type=float,
        help=
        "Gradients clipped between [-clip_value, clip_value] (default: no clipping)"
    )
    parser.add_argument(
        '--drop_keep_prob',
        default=1.0,
        type=float,
        help=
        "Keep probability for dropout on output (set to 1.0 for no dropout)")
    parser.add_argument('--epochs',
                        default=5,
                        type=int,
                        help="Number of epochs to train for, default 5")

    parser.add_argument('--tokenize',
                        dest='tokenize',
                        action='store_true',
                        help="Tokenize question and support")
    parser.add_argument('--no-tokenize',
                        dest='tokenize',
                        action='store_false',
                        help="Tokenize question and support")
    parser.set_defaults(tokenize=True)
    parser.add_argument('--lowercase',
                        dest='lowercase',
                        action='store_true',
                        help="Lowercase data")

    parser.add_argument(
        '--negsamples',
        default=0,
        type=int,
        help="Number of negative samples, default 0 (= use full candidate list)"
    )
    parser.add_argument('--tensorboard_folder',
                        default='./.tb/',
                        help='Folder for tensorboard logs')
    parser.add_argument('--write_metrics_to',
                        default=None,
                        type=str,
                        help='Filename to log the metrics of the EvalHooks')
    parser.add_argument(
        '--prune',
        default='False',
        help='If the vocabulary should be pruned to the most frequent words.')
    parser.add_argument('--seed', default=1337, type=int, help='random seed')
    parser.add_argument('--logfile', default=None, type=str, help='log file')

    args = parser.parse_args()

    clip_range = (-abs(args.clip_value),
                  abs(args.clip_value)) if args.clip_value else None

    if args.logfile:
        fh = logging.FileHandler(args.logfile)
        fh.setLevel(logging.INFO)
        fh.setFormatter(
            logging.Formatter('%(levelname)s:%(name)s:\t%(message)s'))
        logger.addHandler(fh)

    logger.info('Configuration:')
    for arg in vars(args):
        logger.info('\t{} : {}'.format(str(arg), str(getattr(args, arg))))

    # set random seed
    tf.set_random_seed(args.seed)
    DefaultRandomState(args.seed)

    # Get information about available CPUs and GPUs:
    # to set specific device, add CUDA_VISIBLE_DEVICES environment variable, e.g.
    # $ CUDA_VISIBLE_DEVICES=0 ./jtr_script.py

    logger.info('available devices:')
    for l in device_lib.list_local_devices():
        logger.info('device info: ' + str(l).replace("\n", " "))

    # (3) Read the train, dev, and test data (with optionally loading pre-trained embeddings
    embeddings = None
    train_data, dev_data, test_data = None, None, None

    if args.debug:
        train_data = jtr_load(args.train, args.debug_examples, **vars(args))
        dev_data, test_data = train_data, train_data

        logger.info(
            'Loaded {} samples as debug train/dev/test dataset '.format(
                args.debug_examples))

        if args.pretrain:
            emb_file = 'glove.6B.50d.txt'
            embeddings = load_embeddings(
                path.join('jtr', 'data', 'GloVe', emb_file), 'glove')
            logger.info('loaded pre-trained embeddings ({})'.format(emb_file))
    else:
        if args.train:
            train_data = jtr_load(args.train, **vars(args))

        if args.dev:
            dev_data = jtr_load(args.dev, **vars(args))

        if args.test:
            test_data = jtr_load(args.test, **vars(args))

        logger.info('loaded train/dev/test data')
        if args.pretrain:
            emb_file = 'GoogleNews-vectors-negative300.bin.gz'
            embeddings = load_embeddings(
                path.join('jtr', 'data', 'SG_GoogleNews', emb_file),
                'word2vec')
            logger.info('loaded pre-trained embeddings ({})'.format(emb_file))

    emb = embeddings.get if args.pretrain else None

    checkpoint()

    #  (4) Preprocesses the data (tokenize, normalize, add
    #  start and end of sentence tags) via the JTR pipeline method

    if args.vocab_minfreq != 0 and args.vocab_maxsize != 0:
        logger.info('build vocab based on train data')
        _, train_vocab, train_answer_vocab, train_candidate_vocab = pipeline(
            train_data,
            normalize=True,
            sepvocab=args.vocab_sep,
            tokenization=args.tokenize,
            lowercase=args.lowercase,
            emb=emb)
        if args.prune == 'True':
            train_vocab = train_vocab.prune(args.vocab_minfreq,
                                            args.vocab_maxsize)

        logger.info('encode train data')
        train_data, _, _, _ = pipeline(train_data,
                                       train_vocab,
                                       train_answer_vocab,
                                       train_candidate_vocab,
                                       normalize=True,
                                       freeze=True,
                                       sepvocab=args.vocab_sep,
                                       tokenization=args.tokenize,
                                       lowercase=args.lowercase,
                                       negsamples=args.negsamples)
    else:
        train_data, train_vocab, train_answer_vocab, train_candidate_vocab = pipeline(
            train_data,
            emb=emb,
            normalize=True,
            tokenization=args.tokenize,
            lowercase=args.lowercase,
            negsamples=args.negsamples,
            sepvocab=args.vocab_sep)

    N_oov = train_vocab.count_oov()
    N_pre = train_vocab.count_pretrained()
    logger.info(
        'In Training data vocabulary: {} pre-trained, {} out-of-vocab.'.format(
            N_pre, N_oov))

    vocab_size = len(train_vocab)
    answer_size = len(train_answer_vocab)

    # this is a bit of a hack since args are supposed to be user-defined,
    # but it's cleaner that way with passing on args to reader models
    parser.add_argument('--vocab_size', default=vocab_size, type=int)
    parser.add_argument('--answer_size', default=answer_size, type=int)
    args = parser.parse_args()

    checkpoint()
    logger.info('encode dev data')
    dev_data, _, _, _ = pipeline(dev_data,
                                 train_vocab,
                                 train_answer_vocab,
                                 train_candidate_vocab,
                                 freeze=True,
                                 tokenization=args.tokenize,
                                 lowercase=args.lowercase,
                                 sepvocab=args.vocab_sep)
    checkpoint()
    logger.info('encode test data')
    test_data, _, _, _ = pipeline(test_data,
                                  train_vocab,
                                  train_answer_vocab,
                                  train_candidate_vocab,
                                  freeze=True,
                                  tokenization=args.tokenize,
                                  lowercase=args.lowercase,
                                  sepvocab=args.vocab_sep)
    checkpoint()

    # (5) Create NeuralVocab
    logger.info('build NeuralVocab')
    nvocab = NeuralVocab(train_vocab,
                         input_size=args.repr_dim_input,
                         reduced_input_size=args.repr_dim_input_trf,
                         use_pretrained=args.pretrain,
                         train_pretrained=args.train_pretrain,
                         unit_normalize=args.normalize_pretrain)

    with tf.variable_scope("candvocab"):
        candvocab = NeuralVocab(train_candidate_vocab,
                                input_size=args.repr_dim_input,
                                reduced_input_size=args.repr_dim_input_trf,
                                use_pretrained=args.pretrain,
                                train_pretrained=args.train_pretrain,
                                unit_normalize=args.normalize_pretrain)
    checkpoint()

    # (6) Create TensorFlow placeholders and initialize model
    logger.info('create placeholders')
    placeholders = create_placeholders(train_data)
    logger.info('build model {}'.format(args.model))

    # add dropout on the model level
    # todo: more general solution
    options_train = vars(args)
    with tf.name_scope("Train"):
        with tf.variable_scope("Model", reuse=None):
            (logits_train, loss_train,
             predict_train) = reader_models[args.model](placeholders,
                                                        nvocab,
                                                        candvocab=candvocab,
                                                        **options_train)

    options_valid = {k: v for k, v in options_train.items()}
    options_valid["drop_keep_prob"] = 1.0
    with tf.name_scope("Valid_Test"):
        with tf.variable_scope("Model", reuse=True):
            (logits_valid, loss_valid,
             predict_valid) = reader_models[args.model](placeholders,
                                                        nvocab,
                                                        candvocab=candvocab,
                                                        **options_valid)

    # (7) Batch the data via jtr.batch.get_feed_dicts
    if args.supports != "none":
        # composite buckets; first over question, then over support
        bucket_order = ('question', 'support')
        # will result in 16 composite buckets, evenly spaced over questions and supports
        bucket_structure = (1, 1)  # (4, 4)
    else:
        # question buckets
        bucket_order = ('question', )
        # 4 buckets, evenly spaced over questions
        bucket_structure = (1, )  # (4,)

    train_feed_dicts = get_feed_dicts(train_data,
                                      placeholders,
                                      args.batch_size,
                                      bucket_order=bucket_order,
                                      bucket_structure=bucket_structure,
                                      exact_epoch=False)
    dev_feed_dicts = get_feed_dicts(dev_data,
                                    placeholders,
                                    args.dev_batch_size,
                                    exact_epoch=True)

    test_feed_dicts = get_feed_dicts(test_data,
                                     placeholders,
                                     args.dev_batch_size,
                                     exact_epoch=True)

    optim = tf.train.AdamOptimizer(args.learning_rate)

    sw = tf.summary.FileWriter(args.tensorboard_folder)

    answname = "targets" if "cands" in args.model else "answers"

    # (8) Add hooks
    hooks = [
        # report_loss
        LossHook(1, args.batch_size, summary_writer=sw),
        ExamplesPerSecHook(100, args.batch_size, summary_writer=sw),

        # evaluate on train data after each epoch
        EvalHook(train_feed_dicts,
                 logits_valid,
                 predict_valid,
                 placeholders[answname],
                 at_every_epoch=1,
                 metrics=['Acc', 'macroF1'],
                 print_details=False,
                 write_metrics_to=args.write_metrics_to,
                 info="training",
                 summary_writer=sw),

        # evaluate on dev data after each epoch
        EvalHook(dev_feed_dicts,
                 logits_valid,
                 predict_valid,
                 placeholders[answname],
                 at_every_epoch=1,
                 metrics=['Acc', 'macroF1'],
                 print_details=False,
                 write_metrics_to=args.write_metrics_to,
                 info="development",
                 summary_writer=sw),

        # evaluate on test data after training
        EvalHook(test_feed_dicts,
                 logits_valid,
                 predict_valid,
                 placeholders[answname],
                 at_every_epoch=args.epochs,
                 metrics=['Acc', 'macroP', 'macroR', 'macroF1'],
                 print_details=False,
                 write_metrics_to=args.write_metrics_to,
                 info="test")
    ]

    # (9) Train the model
    train(loss_train,
          optim,
          train_feed_dicts,
          max_epochs=args.epochs,
          l2=args.l2,
          clip=clip_range,
          hooks=hooks)
    logger.info('finished in {0:.3g}'.format((time() - t0) / 3600.))