def make_data(): base_dirs = [setting["parsed_data_path"]["test"], setting["parsed_data_path"]["dev"], setting["parsed_data_path"]["unlabeled"]] print("base_dirs are", base_dirs) corpus = ParsedCorpus(base_dirs) vocab = HeadWordVocabulary() if os.path.exists("./voc.txt"): vocab.load() else: vocab.make_vocabulary(corpus, "headWord") vocab.save() print("vocab length is", len(vocab.stoi)) entity_vocab = HeadWordVocabulary() if os.path.exists("./evoc.txt"): entity_vocab.load("./evoc.txt") else: entity_vocab.make_vocabulary(corpus, "entityType") entity_vocab.save("./evoc.txt") print("entity label vocab length is", len(entity_vocab.stoi)) data_iterator = DataIterator(corpus, vocab, entity_vocab) return data_iterator, vocab, entity_vocab
parser.add_argument("--weight-file", type=str, default="data/weights.hdf5") args = parser.parse_args() if __name__ == "__main__": with open("setting.yaml", "r") as stream: setting = yaml.load(stream) base_dirs = [ setting["parsed_data_path"]["test"], setting["parsed_data_path"]["dev"], setting["parsed_data_path"]["unlabeled"] ] print("base_dirs are", base_dirs) corpus = ParsedCorpus(base_dirs) sentences_generator = corpus.get_single("sentences") corefs_generator = corpus.get_single("corefs") # if you are looking for example, please see https://allennlp.org/elmo # options_file = "/path/to/options.json" # weight_file = "path/to/weights.hdf5" options_file = args.options_file weight_file = args.weight_file encoder = Elmo(options_file, weight_file, 1, dropout=0) encoder.eval() encoder.cuda() pbar = tqdm.tqdm(range(len(corpus))) for _ in pbar:
setting = yaml.load(stream) # default to use GPU, but have to check if GPU exists if not args.nogpu: if torch.cuda.device_count() == 0: args.nogpu = True base_dirs = [ setting["parsed_data_path"]["test"], setting["parsed_data_path"]["dev"], setting["parsed_data_path"]["unlabeled"] ] print("base_dirs are", base_dirs) threshold = 0.5 corpus = ParsedCorpus(base_dirs) vocab = HeadWordVocabulary() vocab.load() entity_vocab = HeadWordVocabulary() entity_vocab.load("./evoc.txt") net_arch = args net_arch.num_input = len(vocab) model = Extractor(net_arch) model.load_cpu_model(args.model_path, None) model.cuda() model.eval() iterator = DataIterator(corpus, vocab, entity_vocab) iterator.reset() slot_word_dist = F.log_softmax(torch.FloatTensor(model.get_unnormalized_phi()),