Example #1
0
def main(args):
    print args

    model = None

    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = EmbeddingLayer(
                n_d = args.hidden_dim,
                vocab = [ "<unk>" ],
                embs = load_embedding_iterator(args.embedding)
            )

    if args.train:
        train_x, train_y = read_corpus(args.train)
        train_x = [ embedding_layer.map_to_ids(x) for x in train_x ]

    if args.dev:
        dev_x, dev_y = read_corpus(args.dev)
        dev_x = [ embedding_layer.map_to_ids(x) for x in dev_x ]

    if args.test:
        test_x, test_y = read_corpus(args.test)
        test_x = [ embedding_layer.map_to_ids(x) for x in test_x ]

    if args.train:
        model = Model(
                    args = args,
                    embedding_layer = embedding_layer,
                    nclasses = max(train_y)+1
            )
        model.ready()
        model.train(
                (train_x, train_y),
                (dev_x, dev_y) if args.dev else None,
                (test_x, test_y) if args.test else None,
            )

    if args.load and args.test and not args.train:
        # model.args and model.nclasses will be loaded from file
        model = Model(
                    args = None,
                    embedding_layer = embedding_layer,
                    nclasses = -1
            )
        model.load_model(args.load)
        accuracy = model.evaluate_set(test_x, test_y)
        print accuracy
Example #2
0
def main():
    print args

    embedding_layer = None
    if args.embedding:
        assert args.embedding, "Pre-trained word embeddings required."

        embedding_layer = myio.create_embedding_layer(args.embedding)

    max_len = args.max_len

    if args.train:
        train_x, train_y = myio.read_annotations(args.train)
        train_words = set([word for x in train_x for word in x])
        embedding_layer = EmbeddingLayer(n_d=args.hidden_dimension,
                                         vocab=["<unk>", "<padding>"] +
                                         list(train_words),
                                         oov="<unk>",
                                         fix_init_embs=False)
        train_x = [embedding_layer.map_to_ids(x)[:max_len] for x in train_x]

    if args.dev:
        dev_x, dev_y = myio.read_annotations(args.dev)
        dev_x = [embedding_layer.map_to_ids(x)[:max_len] for x in dev_x]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    if args.train:
        model = Model(args=args,
                      embedding_layer=embedding_layer,
                      nclasses=len(train_y[0]))
        model.ready()

        #debug_func2 = theano.function(
        #        inputs = [ model.x, model.z ],
        #        outputs = model.generator.logpz
        #    )
        #theano.printing.debugprint(debug_func2)
        #return

        model.train(
            (train_x, train_y),
            (dev_x, dev_y) if args.dev else None,
            None,  #(test_x, test_y),
            rationale_data if args.load_rationale else None)
Example #3
0
def main(args):
    print(args)

    model = None

    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = EmbeddingLayer(
                n_d = args.hidden_dim,
                vocab = [ "<unk>" ],
                embs = load_embedding_iterator(args.embedding)
            )

    if args.train:
        train_x, train_y = read_corpus(args.train)
        train_x = [ embedding_layer.map_to_ids(x) for x in train_x ]

    if args.dev:
        dev_x, dev_y = read_corpus(args.dev)
        dev_x = [ embedding_layer.map_to_ids(x) for x in dev_x ]

    if args.test:
        test_x, test_y = read_corpus(args.test)
        test_x = [ embedding_layer.map_to_ids(x) for x in test_x ]

    if args.train:
        model = Model(
                    args = args,
                    embedding_layer = embedding_layer,
                    nclasses = max(train_y)+1
            )
        model.ready()
        model.train(
                (train_x, train_y),
                (dev_x, dev_y) if args.dev else None,
                (test_x, test_y) if args.test else None,
            )

    if args.load and args.test and not args.train:
        # model.args and model.nclasses will be loaded from file
        model = Model(
                    args = None,
                    embedding_layer = embedding_layer,
                    nclasses = -1
            )
        model.load_model(args.load)
        accuracy = model.evaluate_set(test_x, test_y)
        print accuracy
Example #4
0
def main(args):
    assert args.train, "Training  set required"
    assert args.dev, "Dev set required"
    assert args.test, "Test set required"
    assert args.emb, "Pre-trained word embeddings required."
    assert args.aspect_seeds, "Aspect seeds required."
	
    print args

    seeds = load_lis(args.aspect_seeds)
    say("loaded {} aspect seeds\n".format(len(seeds)))

    embedding_layer = EmbeddingLayer(
                n_d = 100,
                vocab = [ "<unk>" ],
                pre_embs = load_embedding_iterator(args.emb),
            )

    seeds_id = np.array(map(lambda seed: embedding_layer.map_to_ids(seed.strip().split()).tolist(), seeds), dtype = np.int32)

    if args.train:
	train_x, train_y = load_doc_corpus(embedding_layer, args.train)

    if args.dev:
	dev_x, dev_y = load_doc_corpus(embedding_layer, args.dev)

    if args.test:
	test_x, test_y = load_doc_corpus(embedding_layer, args.test)
    
    if args.train:
        model = Model(
                    args = args,
                    embedding_layer = embedding_layer,
                    num_aspects = len(seeds_id),
		    query = seeds_id
            )
	if args.load:
	    print 'loading model...'
	    model.load_model(args.load)
        else:
	    model.ready()
	
	print 'training...'
        model.train(
                (train_x, train_y),
                (dev_x, dev_y) if args.dev else None,
                (test_x, test_y) if args.test else None
            )