コード例 #1
0
ファイル: train.py プロジェクト: MtMoon/PoemProject
def main():

    # set para
    config = getattr(configurations, "get_config_cs2en")()
    logger.info("Model options:\n{}".format(pprint.pformat(config)))
    tr_stream = get_tr_stream(**config)

    # Create Theano variables
    logger.info("Creating theano variables")

    source_sentence0 = tensor.lmatrix("source0")
    source_sentence_mask0 = tensor.matrix("source0_mask")
    target_sentence0 = tensor.lmatrix("target0")
    target_sentence_mask0 = tensor.matrix("target0_mask")

    source_sentence1 = tensor.lmatrix("source1")
    source_sentence_mask1 = tensor.matrix("source1_mask")
    target_sentence1 = tensor.lmatrix("target1")
    target_sentence_mask1 = tensor.matrix("target1_mask")

    source_sentence2 = tensor.lmatrix("source2")
    source_sentence_mask2 = tensor.matrix("source2_mask")
    target_sentence2 = tensor.lmatrix("target2")
    target_sentence_mask2 = tensor.matrix("target2_mask")

    sampling_input0 = tensor.lmatrix("input0")
    sampling_input1 = tensor.lmatrix("input1")
    sampling_input2 = tensor.lmatrix("input2")

    sampling_hstates0 = tensor.fmatrix("hstates0")
    sampling_hstates1 = tensor.fmatrix("hstates1")
    sampling_hstates2 = tensor.fmatrix("hstates2")

    sampling_lastrep0 = tensor.tensor3("lastrep0")
    sampling_lastrep1 = tensor.tensor3("lastrep1")

    hstates = theano.shared(value=numpy.zeros((config["enc_nhids"]), dtype=theano.config.floatX), name="hstates")

    # Get vocab
    sources = get_attr_rec(tr_stream, "data_stream")
    src_vocab = sources.data_streams[0].dataset.dictionary
    trg_vocab = sources.data_streams[1].dataset.dictionary

    # Construct model
    logger.info("Building PoemModel")

    block0 = PoemBlock(config=config, blockid="block0", name="poemblock0")
    block1 = PoemBlock(config=config, blockid="block1", name="poemblock1")
    block2 = PoemBlock(config=config, blockid="block2", name="poemblock2")

    cost0, hsta0, rep0 = block0.cost(
        source_sentence0,
        source_sentence_mask0,
        source_sentence_mask1,
        source_sentence_mask0,
        target_sentence0,
        target_sentence_mask0,
        hstates,
        lastrep0=None,
        lastrep1=None,
    )

    cost1, hsta1, rep1 = block1.cost(
        source_sentence1,
        source_sentence_mask0,
        source_sentence_mask1,
        source_sentence_mask1,
        target_sentence1,
        target_sentence_mask1,
        hsta0,
        lastrep0=rep0,
        lastrep1=None,
    )

    cost2, hsta2, rep2 = block2.cost(
        source_sentence2,
        source_sentence_mask0,
        source_sentence_mask1,
        source_sentence_mask2,
        target_sentence2,
        target_sentence_mask2,
        hsta1,
        lastrep0=rep0,
        lastrep1=rep1,
    )

    cost = cost0 + cost1 + cost2
    cost.name = "total_cost"

    logger.info("Creating computational graph")

    cg = ComputationGraph(cost)

    # Initialize model
    logger.info("Initializing model")
    block0.set_initw(IsotropicGaussian(config["weight_scale"]))
    block0.set_initb(Constant(0))
    block0.push_initialization_config()
    block0.set_specialinit(Orthogonal(), Orthogonal())
    block0.initialize()

    block1.set_initw(IsotropicGaussian(config["weight_scale"]))
    block1.set_initb(Constant(0))
    block1.push_initialization_config()
    block1.set_specialinit(Orthogonal(), Orthogonal())
    block1.initialize()

    block2.set_initw(IsotropicGaussian(config["weight_scale"]))
    block2.set_initb(Constant(0))
    block2.push_initialization_config()
    block2.set_specialinit(Orthogonal(), Orthogonal())
    block2.initialize()

    # apply dropout for regularization
    if config["dropout"] < 1.0:
        # dropout is applied to the output of maxout in ghog
        logger.info("Applying dropout")
        dropout_inputs = [x for x in cg.intermediary_variables if x.name == "maxout_apply_output"]
        cg = apply_dropout(cg, dropout_inputs, config["dropout"])

    # Print shapes

    shapes = [param.get_value().shape for param in cg.parameters]
    logger.info("Parameter shapes: ")
    for shape, count in Counter(shapes).most_common():
        logger.info("    {:15}: {}".format(shape, count))
    logger.info("Total number of parameters: {}".format(len(shapes)))

    # Print parameter names

    param_dict = Selector(block0).get_parameters()
    logger.info("Parameter names: ")
    for name, value in param_dict.items():
        logger.info("    {:15}: {}".format(value.get_value().shape, name))
    logger.info("Total number of parameters: {}".format(len(param_dict)))

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)

    # logger.info(cg.auxiliary_variables)
    # logger.info("______________________________")

    """
    weights = ""
    for va in cg.auxiliary_variables:
        if va.name == "sequence_generator_block0_cost_matrix_weighted_averages":
            weights = va

    weightsize = weights.shape
    weightsize.name = "weightsize"

    states = ""
    for va in cg.auxiliary_variables:
        if va.name == "sequence_generator_block0_cost_matrix_states":
            states = va

    statesize = states.shape
    statesize.name = "statesize"

    rep = ""
    for va in cg.auxiliary_variables:
        if va.name == "poemblock0_cost_block0hstatesRepeat":
            rep = va

    repsize = rep.shape
    repsize.name = "repsize"

    """

    # Set extensions
    logger.info("Initializing extensions")
    extensions = [
        FinishAfter(after_n_batches=config["finish_after"]),
        TrainingDataMonitoring([cost], after_batch=True),
        Printing(after_batch=True),
        CheckpointNMT(config["saveto"], every_n_batches=config["save_freq"]),
    ]

    # Set up training algorithm
    logger.info("Initializing training algorithm")
    algorithm = GradientDescent(
        cost=cost,
        parameters=cg.parameters,
        step_rule=CompositeRule([StepClipping(config["step_clipping"]), eval(config["step_rule"])()]),
    )

    # Reload model if necessary
    if config["reload"]:
        extensions.append(LoadNMT(config["saveto"]))

    # Add sampling

    if config["hook_samples"] >= 1:
        logger.info("Building sampler")

        generated0 = block0.mygenerate(sampling_input0, sampling_hstates0)
        search_model0 = Model(generated0)

        generated1 = block1.mygenerate(sampling_input1, sampling_hstates1, sampling_lastrep0)
        search_model1 = Model(generated1)

        generated2 = block2.mygenerate(sampling_input2, sampling_hstates2, sampling_lastrep0, sampling_lastrep1)
        search_model2 = Model(generated2)

        extensions.append(
            Sampler(
                config=config,
                model0=search_model0,
                model1=search_model1,
                model2=search_model2,
                data_stream=tr_stream,
                hook_samples=config["hook_samples"],
                every_n_batches=config["sampling_freq"],
                src_vocab_size=config["src_vocab_size"],
            )
        )

        logger.info("End of building sampler")

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(model=training_model, algorithm=algorithm, data_stream=tr_stream, extensions=extensions)

    # Train!
    main_loop.run()