コード例 #1
0
def test_basic():
    # code adapted from Tao's `rationale.py`:
    train = 'data/reviews.aspect1.train.txt.gz'
    train_x, train_y = myio.read_annotations(train)
    # train_x = [embedding_layer.map_to_ids(x)[:max_len] for x in train_x]

    dev = 'data/reviews.aspect1.heldout.txt.gz'
    dev_x, dev_y = myio.read_annotations(dev)
    # dev_x = [embedding_layer.map_to_ids(x)[:max_len] for x in dev_x]

    load_rationale = 'data/annotations.json'
    rationale_data = myio.read_rationales(load_rationale)
コード例 #2
0
ファイル: rationale.py プロジェクト: gagb/rcnn
def main():
    print args

    embedding_layer = None
    if args.embedding:
        assert args.embedding, "Pre-trained word embeddings required."

        embedding_layer = myio.create_embedding_layer(args.embedding)

    max_len = args.max_len

    if args.train:
        train_x, train_y = myio.read_annotations(args.train)
        train_words = set([word for x in train_x for word in x])
        embedding_layer = EmbeddingLayer(n_d=args.hidden_dimension,
                                         vocab=["<unk>", "<padding>"] +
                                         list(train_words),
                                         oov="<unk>",
                                         fix_init_embs=False)
        train_x = [embedding_layer.map_to_ids(x)[:max_len] for x in train_x]

    if args.dev:
        dev_x, dev_y = myio.read_annotations(args.dev)
        dev_x = [embedding_layer.map_to_ids(x)[:max_len] for x in dev_x]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    if args.train:
        model = Model(args=args,
                      embedding_layer=embedding_layer,
                      nclasses=len(train_y[0]))
        model.ready()

        #debug_func2 = theano.function(
        #        inputs = [ model.x, model.z ],
        #        outputs = model.generator.logpz
        #    )
        #theano.printing.debugprint(debug_func2)
        #return

        model.train(
            (train_x, train_y),
            (dev_x, dev_y) if args.dev else None,
            None,  #(test_x, test_y),
            rationale_data if args.load_rationale else None)
コード例 #3
0
ファイル: rationale_dependent.py プロジェクト: taolei87/rcnn
def main():
    print args
    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = myio.create_embedding_layer(
                        args.embedding
                    )

    max_len = args.max_len

    if args.train:
        train_x, train_y = myio.read_annotations(args.train)
        train_x = [ embedding_layer.map_to_ids(x)[:max_len] for x in train_x ]

    if args.dev:
        dev_x, dev_y = myio.read_annotations(args.dev)
        dev_x = [ embedding_layer.map_to_ids(x)[:max_len] for x in dev_x ]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    if args.train:
        model = Model(
                    args = args,
                    embedding_layer = embedding_layer,
                    nclasses = len(train_y[0])
                )
        model.ready()

        #debug_func2 = theano.function(
        #        inputs = [ model.x, model.z ],
        #        outputs = model.generator.logpz
        #    )
        #theano.printing.debugprint(debug_func2)
        #return

        model.train(
                (train_x, train_y),
                (dev_x, dev_y) if args.dev else None,
                None, #(test_x, test_y),
                rationale_data if args.load_rationale else None
            )
コード例 #4
0
ファイル: rationale_dependent.py プロジェクト: ml-lab/rcnn-1
def main():
    print args
    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = myio.create_embedding_layer(args.embedding)

    max_len = args.max_len

    if args.train:
        train_x, train_y = myio.read_annotations(args.train)
        train_x = [embedding_layer.map_to_ids(x)[:max_len] for x in train_x]

    if args.dev:
        dev_x, dev_y = myio.read_annotations(args.dev)
        dev_x = [embedding_layer.map_to_ids(x)[:max_len] for x in dev_x]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    if args.train:
        model = Model(args=args,
                      embedding_layer=embedding_layer,
                      nclasses=len(train_y[0]))
        model.ready()

        #debug_func2 = theano.function(
        #        inputs = [ model.x, model.z ],
        #        outputs = model.generator.logpz
        #    )
        #theano.printing.debugprint(debug_func2)
        #return

        model.train(
            (train_x, train_y),
            (dev_x, dev_y) if args.dev else None,
            None,  #(test_x, test_y),
            rationale_data if args.load_rationale else None)
コード例 #5
0
def main():
    print args
    set_default_rng_seed(args.seed)
    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = myio.create_embedding_layer(args.embedding)

    max_len = args.max_len

    if args.train:
        train_x, train_y = myio.read_annotations(args.train)
        if args.debug:
            len_ = len(train_x) * args.debug
            len_ = int(len_)
            train_x = train_x[:len_]
            train_y = train_y[:len_]
        print 'train size: ', len(train_x)  #, train_x[0], len(train_x[0])
        #exit()
        train_x = [embedding_layer.map_to_ids(x)[:max_len] for x in train_x]

    if args.dev:
        dev_x, dev_y = myio.read_annotations(args.dev)
        if args.debug:
            len_ = len(dev_x) * args.debug
            len_ = int(len_)
            dev_x = dev_x[:len_]
            dev_x = dev_y[:len_]
        print 'train size: ', len(train_x)
        dev_x = [embedding_layer.map_to_ids(x)[:max_len] for x in dev_x]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    #print 'in main: ', args.seed
    if args.train:
        model = Model(args=args,
                      embedding_layer=embedding_layer,
                      nclasses=len(train_y[0]))
        if args.load_model:
            model.load_model(args.load_model,
                             seed=args.seed,
                             select_all=args.select_all)
            say("model loaded successfully.\n")
        else:
            model.ready()
        #say(" ready time nedded {} \n".format(time.time()-start_ready_time))

        #debug_func2 = theano.function(
        #        inputs = [ model.x, model.z ],
        #        outputs = model.generator.logpz
        #    )
        #theano.printing.debugprint(debug_func2)
        #return

        model.train(
            (train_x, train_y),
            (dev_x, dev_y) if args.dev else None,
            None,  #(test_x, test_y),
            rationale_data if args.load_rationale else None,
            trained_max_epochs=args.trained_max_epochs)

    if args.load_model and not args.dev and not args.train:
        model = Model(args=args, embedding_layer=embedding_layer, nclasses=-1)
        model.load_model(args.load_model,
                         seed=args.seed,
                         select_all=args.select_all)
        say("model loaded successfully.\n")

        sample_generator = theano.function(
            inputs=[model.x],
            outputs=model.z,
            #updates = model.generator.sample_updates
        )
        sample_encoder = theano.function(
            inputs=[model.x, model.y, model.z],
            outputs=[
                model.encoder.obj, model.encoder.loss, model.encoder.pred_diff
            ],
            #updates = model.generator.sample_updates
        )
        # compile an evaluation function
        eval_func = theano.function(
            inputs=[model.x, model.y],
            outputs=[
                model.z, model.encoder.obj, model.encoder.loss,
                model.encoder.pred_diff
            ],
            #updates = model.generator.sample_updates
        )
        debug_func_enc = theano.function(
            inputs=[model.x, model.y],
            outputs=[
                model.z, model.encoder.obj, model.encoder.loss,
                model.encoder.pred_diff
            ],
            #updates = model.generator.sample_updates
        )
        debug_func_gen = theano.function(
            inputs=[model.x, model.y],
            outputs=[
                model.z, model.encoder.obj, model.encoder.loss,
                model.encoder.pred_diff
            ],
            #updates = model.generator.sample_updates
        )

        # compile a predictor function
        pred_func = theano.function(
            inputs=[model.x],
            outputs=[model.z, model.encoder.preds],
            #updates = model.generator.sample_updates
        )

        # batching data
        padding_id = embedding_layer.vocab_map["<padding>"]
        if rationale_data is not None:
            valid_batches_x, valid_batches_y = myio.create_batches(
                [u["xids"] for u in rationale_data],
                [u["y"] for u in rationale_data],
                args.batch,
                padding_id,
                sort=False)

        # disable dropout
        model.dropout.set_value(0.0)
        if rationale_data is not None:
            #model.dropout.set_value(0.0)
            start_rational_time = time.time()
            r_mse, r_p1, r_prec1, r_prec2, gen_time, enc_time, prec_cal_time = model.evaluate_rationale(
                rationale_data, valid_batches_x, valid_batches_y,
                sample_generator, sample_encoder, eval_func)
            #valid_batches_y, eval_func)

            #model.dropout.set_value(dropout_prob)
            #say(("\ttest rationale mser={:.4f}  p[1]r={:.2f}  prec1={:.4f}" +
            #            "  prec2={:.4f} generator time={:.4f} encoder time={:.4f} total test time={:.4f}\n").format(
            #        r_mse,
            #        r_p1,
            #        r_prec1,
            #        r_prec2,
            #        gen_time,
            #        enc_time,
            #        time.time() - start_rational_time
            #))

            data = str('%.5f' % r_mse) + "\t" + str(
                '%4.2f' % r_p1) + "\t" + str('%4.4f' % r_prec1) + "\t" + str(
                    '%4.4f' %
                    r_prec2) + "\t" + str('%4.2f' % gen_time) + "\t" + str(
                        '%4.2f' % enc_time) + "\t" + str(
                            '%4.2f' % prec_cal_time) + "\t" + str(
                                '%4.2f' % (time.time() - start_rational_time)
                            ) + "\t" + str(args.sparsity) + "\t" + str(
                                args.coherent) + "\t" + str(
                                    args.max_epochs) + "\t" + str(
                                        args.cur_epoch)

            with open(args.graph_data_path, 'a') as g_f:
                print 'writning to file: ', data
                g_f.write(data + "\n")
コード例 #6
0
def main():
    print(args)
    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = myio.create_embedding_layer(args.embedding)

    max_len = args.max_len

    if args.train:
        train_x, train_y = myio.read_annotations(args.train)
        train_x = [embedding_layer.map_to_ids(x)[:max_len] for x in train_x]

    if args.dev:
        dev_x, dev_y = myio.read_annotations(args.dev)
        dev_x = [embedding_layer.map_to_ids(x)[:max_len] for x in dev_x]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    if args.train:
        model = Model(args=args,
                      embedding_layer=embedding_layer,
                      nclasses=len(train_y[0]))
        model.ready()

        model.train(
            (train_x, train_y),
            (dev_x, dev_y) if args.dev else None,
            None,  #(test_x, test_y),
            rationale_data if args.load_rationale else None)

    if args.load_model and args.dev and not args.train:
        model = Model(args=None, embedding_layer=embedding_layer, nclasses=-1)
        model.load_model(args.load_model)
        say("model loaded successfully.\n")

        # compile an evaluation function
        eval_func = theano.function(
            inputs=[model.x, model.y],
            outputs=[
                model.z, model.generator.obj, model.generator.loss,
                model.encoder.pred_diff
            ],
            givens={model.z: model.generator.z_pred},
        )

        # compile a predictor function
        pred_func = theano.function(
            inputs=[model.x],
            outputs=[model.z, model.encoder.preds],
            givens={model.z: model.generator.z_pred},
        )

        # batching data
        padding_id = embedding_layer.vocab_map["<padding>"]
        dev_batches_x, dev_batches_y = myio.create_batches(
            dev_x, dev_y, args.batch, padding_id)

        # disable dropout
        model.dropout.set_value(0.0)
        dev_obj, dev_loss, dev_diff, dev_p1 = model.evaluate_data(
            dev_batches_x, dev_batches_y, eval_func, sampling=True)
        say("{} {} {} {}\n".format(dev_obj, dev_loss, dev_diff, dev_p1))
コード例 #7
0
ファイル: rationale_dependent.py プロジェクト: Sundayxr/rcnn
def main():
    print args
    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = myio.create_embedding_layer(
                        args.embedding
                    )

    max_len = args.max_len

    if args.train:
        train_x, train_y = myio.read_annotations(args.train)
        train_x = [ embedding_layer.map_to_ids(x)[:max_len] for x in train_x ]

    if args.dev:
        dev_x, dev_y = myio.read_annotations(args.dev)
        dev_x = [ embedding_layer.map_to_ids(x)[:max_len] for x in dev_x ]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    if args.train:
        model = Model(
                    args = args,
                    embedding_layer = embedding_layer,
                    nclasses = len(train_y[0])
                )
        model.ready()

        #debug_func2 = theano.function(
        #        inputs = [ model.x, model.z ],
        #        outputs = model.generator.logpz
        #    )
        #theano.printing.debugprint(debug_func2)
        #return

        model.train(
                (train_x, train_y),
                (dev_x, dev_y) if args.dev else None,
                None, #(test_x, test_y),
                rationale_data if args.load_rationale else None
            )

    if args.load_model and args.dev and not args.train:
        model = Model(
                    args = None,
                    embedding_layer = embedding_layer,
                    nclasses = -1
                )
        model.load_model(args.load_model)
        say("model loaded successfully.\n")

        # compile an evaluation function
        eval_func = theano.function(
                inputs = [ model.x, model.y ],
                outputs = [ model.z, model.encoder.obj, model.encoder.loss,
                                model.encoder.pred_diff ],
                updates = model.generator.sample_updates
            )

        # compile a predictor function
        pred_func = theano.function(
                inputs = [ model.x ],
                outputs = [ model.z, model.encoder.preds ],
                updates = model.generator.sample_updates
            )

        # batching data
        padding_id = embedding_layer.vocab_map["<padding>"]
        dev_batches_x, dev_batches_y = myio.create_batches(
                        dev_x, dev_y, args.batch, padding_id
                    )

        # disable dropout
        model.dropout.set_value(0.0)
        dev_obj, dev_loss, dev_diff, dev_p1 = model.evaluate_data(
                dev_batches_x, dev_batches_y, eval_func, sampling=True)
        say("{} {} {} {}\n".format(dev_obj, dev_loss, dev_diff, dev_p1))
コード例 #8
0
def main():
    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = myio.create_embedding_layer(args.embedding)
    embedding_layer_y = myio.create_embedding_layer(args.embedding)

    max_len_x = args.sentence_length * args.max_sentences
    max_len_y = args.sentence_length_hl * args.max_sentences_hl

    if args.train:
        train_x, train_y = myio.read_docs(args.train)
        train_x = [embedding_layer.map_to_ids(x)[:max_len_x] for x in train_x]
        train_y = [
            embedding_layer_y.map_to_ids(y)[:max_len_y] for y in train_y
        ]

    if args.dev:
        dev_x, dev_y = myio.read_docs(args.dev)
        dev_x = [embedding_layer.map_to_ids(x)[:max_len_x] for x in dev_x]
        dev_y = [embedding_layer_y.map_to_ids(y)[:max_len_y] for y in dev_y]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    if args.train:
        model = Model(args=args,
                      embedding_layer=embedding_layer,
                      embedding_layer_y=embedding_layer_y,
                      nclasses=len(train_y[0]))
        model.ready()

        # debug_func2 = theano.function(
        #        inputs = [ model.x, model.z ],
        #        outputs = model.generator.logpz
        #    )
        # theano.printing.debugprint(debug_func2)
        # return

        model.train(
            (train_x, train_y),
            (dev_x, dev_y) if args.dev else None,
            None,  # (test_x, test_y),
            rationale_data if args.load_rationale else None)

    if args.load_model and args.dev and not args.train:
        model = Model(args=None, embedding_layer=embedding_layer, nclasses=-1)
        model.load_model(args.load_model)
        say("model loaded successfully.\n")

        # compile an evaluation function
        eval_func = theano.function(inputs=[model.x, model.y],
                                    outputs=[
                                        model.z, model.encoder.obj,
                                        model.encoder.loss,
                                        model.encoder.pred_diff
                                    ],
                                    updates=model.generator.sample_updates)

        # compile a predictor function
        pred_func = theano.function(inputs=[model.x],
                                    outputs=[model.z, model.encoder.preds],
                                    updates=model.generator.sample_updates)

        # batching data
        padding_id = embedding_layer.vocab_map["<padding>"]
        dev_batches_x, dev_batches_y = myio.create_batches(
            dev_x, dev_y, args.batch, padding_id)

        # disable dropout
        model.dropout.set_value(0.0)
        dev_obj, dev_loss, dev_diff, dev_p1 = model.evaluate_data(
            dev_batches_x, dev_batches_y, eval_func, sampling=True)
        say("{} {} {} {}\n".format(dev_obj, dev_loss, dev_diff, dev_p1))