'aet_word_voca': aet_word_voca, 'aet_word_embeddings': aet_word_embeddings, 'dr': args.dropout_rate, 'args': args } if ModelClass == MulRelRanker: config['df'] = args.df config['n_loops'] = args.n_loops config['n_rels'] = args.n_rels config['mulrel_type'] = args.mulrel_type else: raise Exception('unknown model class') pprint(config) ranker = EDRanker(config=config) dev_datasets = [('aida-A', conll.testA), ('aida-B', conll.testB), ('msnbc', conll.msnbc), ('aquaint', conll.aquaint), ('ace2004', conll.ace2004), ('clueweb', conll.clueweb), ('wikipedia', conll.wikipedia)] if args.mode == 'train': print('training...') config = {'lr': args.learning_rate, 'n_epochs': args.n_epochs} pprint(config) ranker.train(conll.train, dev_datasets, config) elif args.mode == 'eval': org_dev_datasets = dev_datasets # + [('aida-train', conll.train)] dev_datasets = []
if args.multi_instance or args.semisup: config['n_negs'] = args.n_negs if ModelClass == MulRelRanker: config['inference'] = args.inference config['df'] = args.df config['n_loops'] = args.n_loops config['ent_top_n'] = args.ent_top_n config['n_rels'] = args.n_rels config['mulrel_type'] = args.mulrel_type else: raise Exception('unknown model class') # pprint(config) ranker = EDRanker(config=config) if args.mode == 'prerank': conll = D.CoNLLDataset(datadir, person_path, conll_path) if args.filelist is not None: if args.multi_instance: conll.train = {} with open(args.filelist, 'r') as flist: for fname in flist: fname = fname.strip() print('load file from', fname) conll_path = fname cands_path = conll_path + '.csv' data = D.CoNLLDataset.load_file(conll_path, cands_path, person_path) print('#docs', len(data))
'entity_embeddings': entity_embeddings, 'snd_word_voca': snd_word_voca, 'snd_word_embeddings': snd_word_embeddings, 'dr': args.dropout_rate, 'args': args} if ModelClass == MulRelRanker: config['df'] = args.df config['n_loops'] = args.n_loops config['n_rels'] = args.n_rels config['mulrel_type'] = args.mulrel_type else: raise Exception('unknown model class') pprint(config) ranker = EDRanker(config=config) dev_datasets = [('test', conll.test), ('dev', conll.dev), ] if args.mode == 'train': print('training...') config = {'lr': args.learning_rate, 'n_epochs': args.n_epochs} pprint(config) ranker.train(conll.train, dev_datasets, config) elif args.mode == 'eval': org_dev_datasets = dev_datasets # + [('aida-train', conll.train)] dev_datasets = [] for dname, data in org_dev_datasets:
'snd_word_voca': snd_word_voca, 'snd_word_embeddings': snd_word_embeddings, 'dr': args.dropout_rate, 'args': args } if ModelClass == MulRelRanker: config['df'] = args.df config['n_loops'] = args.n_loops config['n_rels'] = args.n_rels config['mulrel_type'] = args.mulrel_type else: raise Exception('unknown model class') pprint(config) ranker = EDRanker(config=config) dev_datasets = [ ('aida-A', conll.testA), ('aida-B', conll.testB), ('msnbc', conll.msnbc), ('aquaint', conll.aquaint), ('ace2004', conll.ace2004), ('clueweb', conll.clueweb), ('wikipedia', conll.wikipedia), ('twitter-microposts', conll.twitter_microposts), ('twitter-mena', conll.twitter_mena), ('twitter-brian', conll.twitter_brian), ('twitter-train', conll.twitter_train), ('twitter-val', conll.twitter_val), ('twitter-test', conll.twitter_test),
voca_emb_dir + '/glove/word_embeddings.npy') print('snd word voca size', snd_word_voca.size()) entity_voca, entity_embeddings = utils.load_voca_embs( voca_emb_dir + 'dict.entity', voca_emb_dir + 'entity_embeddings.npy') config = { 'hid_dims': args.hid_dims, 'emb_dims': entity_embeddings.shape[1], 'freeze_embs': True, 'tok_top_n': args.tok_top_n, 'margin': args.margin, 'word_voca': word_voca, 'entity_voca': entity_voca, 'word_embeddings': word_embeddings, 'entity_embeddings': entity_embeddings, 'snd_word_voca': snd_word_voca, 'snd_word_embeddings': snd_word_embeddings, 'dr': args.dropout_rate, 'args': args } if ModelClass == MulRelRanker: config['df'] = args.df config['n_loops'] = args.n_loops config['n_rels'] = args.n_rels config['mulrel_type'] = args.mulrel_type else: raise Exception('unknown model class') ranker = EDRanker(config=config) app.run(host='0.0.0.0', port=5555)