Esempio n. 1
0
def make_data():
    base_dirs = [setting["parsed_data_path"]["test"],
                 setting["parsed_data_path"]["dev"],
                 setting["parsed_data_path"]["unlabeled"]]
    print("base_dirs are", base_dirs)
    corpus = ParsedCorpus(base_dirs)

    vocab = HeadWordVocabulary()
    if os.path.exists("./voc.txt"):
        vocab.load()
    else:
        vocab.make_vocabulary(corpus, "headWord")
        vocab.save()
    print("vocab length is", len(vocab.stoi))

    entity_vocab = HeadWordVocabulary()
    if os.path.exists("./evoc.txt"):
        entity_vocab.load("./evoc.txt")
    else:
        entity_vocab.make_vocabulary(corpus, "entityType")
        entity_vocab.save("./evoc.txt")
    print("entity label vocab length is", len(entity_vocab.stoi))

    data_iterator = DataIterator(corpus, vocab, entity_vocab)
    return data_iterator, vocab, entity_vocab
Esempio n. 2
0
parser.add_argument("--weight-file", type=str, default="data/weights.hdf5")
args = parser.parse_args()

if __name__ == "__main__":

    with open("setting.yaml", "r") as stream:
        setting = yaml.load(stream)

    base_dirs = [
        setting["parsed_data_path"]["test"],
        setting["parsed_data_path"]["dev"],
        setting["parsed_data_path"]["unlabeled"]
    ]
    print("base_dirs are", base_dirs)

    corpus = ParsedCorpus(base_dirs)

    sentences_generator = corpus.get_single("sentences")
    corefs_generator = corpus.get_single("corefs")

    # if you are looking for example, please see https://allennlp.org/elmo
    # options_file = "/path/to/options.json"
    # weight_file = "path/to/weights.hdf5"
    options_file = args.options_file
    weight_file = args.weight_file
    encoder = Elmo(options_file, weight_file, 1, dropout=0)
    encoder.eval()
    encoder.cuda()

    pbar = tqdm.tqdm(range(len(corpus)))
    for _ in pbar:
Esempio n. 3
0
    setting = yaml.load(stream)

# default to use GPU, but have to check if GPU exists
if not args.nogpu:
    if torch.cuda.device_count() == 0:
        args.nogpu = True

base_dirs = [
    setting["parsed_data_path"]["test"], setting["parsed_data_path"]["dev"],
    setting["parsed_data_path"]["unlabeled"]
]
print("base_dirs are", base_dirs)

threshold = 0.5

corpus = ParsedCorpus(base_dirs)
vocab = HeadWordVocabulary()
vocab.load()
entity_vocab = HeadWordVocabulary()
entity_vocab.load("./evoc.txt")
net_arch = args
net_arch.num_input = len(vocab)
model = Extractor(net_arch)
model.load_cpu_model(args.model_path, None)
model.cuda()
model.eval()

iterator = DataIterator(corpus, vocab, entity_vocab)
iterator.reset()

slot_word_dist = F.log_softmax(torch.FloatTensor(model.get_unnormalized_phi()),