def main(args): print args model = None assert args.embedding, "Pre-trained word embeddings required." embedding_layer = EmbeddingLayer( n_d = args.hidden_dim, vocab = [ "<unk>" ], embs = load_embedding_iterator(args.embedding) ) if args.train: train_x, train_y = read_corpus(args.train) train_x = [ embedding_layer.map_to_ids(x) for x in train_x ] if args.dev: dev_x, dev_y = read_corpus(args.dev) dev_x = [ embedding_layer.map_to_ids(x) for x in dev_x ] if args.test: test_x, test_y = read_corpus(args.test) test_x = [ embedding_layer.map_to_ids(x) for x in test_x ] if args.train: model = Model( args = args, embedding_layer = embedding_layer, nclasses = max(train_y)+1 ) model.ready() model.train( (train_x, train_y), (dev_x, dev_y) if args.dev else None, (test_x, test_y) if args.test else None, ) if args.load and args.test and not args.train: # model.args and model.nclasses will be loaded from file model = Model( args = None, embedding_layer = embedding_layer, nclasses = -1 ) model.load_model(args.load) accuracy = model.evaluate_set(test_x, test_y) print accuracy
def main(): print args embedding_layer = None if args.embedding: assert args.embedding, "Pre-trained word embeddings required." embedding_layer = myio.create_embedding_layer(args.embedding) max_len = args.max_len if args.train: train_x, train_y = myio.read_annotations(args.train) train_words = set([word for x in train_x for word in x]) embedding_layer = EmbeddingLayer(n_d=args.hidden_dimension, vocab=["<unk>", "<padding>"] + list(train_words), oov="<unk>", fix_init_embs=False) train_x = [embedding_layer.map_to_ids(x)[:max_len] for x in train_x] if args.dev: dev_x, dev_y = myio.read_annotations(args.dev) dev_x = [embedding_layer.map_to_ids(x)[:max_len] for x in dev_x] if args.load_rationale: rationale_data = myio.read_rationales(args.load_rationale) for x in rationale_data: x["xids"] = embedding_layer.map_to_ids(x["x"]) if args.train: model = Model(args=args, embedding_layer=embedding_layer, nclasses=len(train_y[0])) model.ready() #debug_func2 = theano.function( # inputs = [ model.x, model.z ], # outputs = model.generator.logpz # ) #theano.printing.debugprint(debug_func2) #return model.train( (train_x, train_y), (dev_x, dev_y) if args.dev else None, None, #(test_x, test_y), rationale_data if args.load_rationale else None)
def main(args): print(args) model = None assert args.embedding, "Pre-trained word embeddings required." embedding_layer = EmbeddingLayer( n_d = args.hidden_dim, vocab = [ "<unk>" ], embs = load_embedding_iterator(args.embedding) ) if args.train: train_x, train_y = read_corpus(args.train) train_x = [ embedding_layer.map_to_ids(x) for x in train_x ] if args.dev: dev_x, dev_y = read_corpus(args.dev) dev_x = [ embedding_layer.map_to_ids(x) for x in dev_x ] if args.test: test_x, test_y = read_corpus(args.test) test_x = [ embedding_layer.map_to_ids(x) for x in test_x ] if args.train: model = Model( args = args, embedding_layer = embedding_layer, nclasses = max(train_y)+1 ) model.ready() model.train( (train_x, train_y), (dev_x, dev_y) if args.dev else None, (test_x, test_y) if args.test else None, ) if args.load and args.test and not args.train: # model.args and model.nclasses will be loaded from file model = Model( args = None, embedding_layer = embedding_layer, nclasses = -1 ) model.load_model(args.load) accuracy = model.evaluate_set(test_x, test_y) print accuracy
def main(args): assert args.train, "Training set required" assert args.dev, "Dev set required" assert args.test, "Test set required" assert args.emb, "Pre-trained word embeddings required." assert args.aspect_seeds, "Aspect seeds required." print args seeds = load_lis(args.aspect_seeds) say("loaded {} aspect seeds\n".format(len(seeds))) embedding_layer = EmbeddingLayer( n_d = 100, vocab = [ "<unk>" ], pre_embs = load_embedding_iterator(args.emb), ) seeds_id = np.array(map(lambda seed: embedding_layer.map_to_ids(seed.strip().split()).tolist(), seeds), dtype = np.int32) if args.train: train_x, train_y = load_doc_corpus(embedding_layer, args.train) if args.dev: dev_x, dev_y = load_doc_corpus(embedding_layer, args.dev) if args.test: test_x, test_y = load_doc_corpus(embedding_layer, args.test) if args.train: model = Model( args = args, embedding_layer = embedding_layer, num_aspects = len(seeds_id), query = seeds_id ) if args.load: print 'loading model...' model.load_model(args.load) else: model.ready() print 'training...' model.train( (train_x, train_y), (dev_x, dev_y) if args.dev else None, (test_x, test_y) if args.test else None )