Esempio n. 1
0
def evaluate_model(evalparams):

    torch.manual_seed(evalparams.seed)
    random.seed(1234)
    if evalparams.cpu:
        evalparams.cuda = False
    elif evalparams.cud:
        torch.cuda.manual_seed(args.seed)

    # load opt
    print(evalparams.model_dir, evalparams.model)
    #     model_file = evalparams.model_dir + "/" + evalparams.model
    model_file = 'best_model.pt'
    print("Loading model from {}".format(model_file))
    opt = torch_utils.load_config(model_file)
    model = RelationModel(opt)
    model.load(model_file)

    # load vocab
    vocab_file = evalparams.model_dir + '/vocab.pkl'
    vocab = Vocab(vocab_file, load=True)
    assert opt[
        'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

    # load data
    data_file = opt['data_dir'] + '/{}.json'.format(evalparams.dataset)
    print("Loading data from {} with batch size {}...".format(
        data_file, opt['batch_size']))
    batch = DataLoader(data_file,
                       opt['batch_size'],
                       opt,
                       vocab,
                       evaluation=True)

    helper.print_config(opt)
    id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])

    predictions = []
    all_probs = []
    for i, b in enumerate(batch):
        preds, probs, _ = model.predict(b)
        predictions += preds
        all_probs += probs
    predictions = [id2label[p] for p in predictions]
    p, r, f1 = scorer.score(batch.gold(), predictions, verbose=True)

    # save probability scores
    if len(evalparams.out) > 0:
        helper.ensure_dir(os.path.dirname(evalparams.out))
        with open(evalparams.out, 'wb') as outfile:
            pickle.dump(all_probs, outfile)
        print("Prediction scores saved to {}.".format(evalparams.out))

    print("Evaluation ended.")

    return (batch.gold(), predictions, model)
Esempio n. 2
0
def load_data(args):
    data_path = args.data_dir + '/' + args.dataset + '.json'

    with open(data_path, 'r') as f:
        data = json.load(f)
        f.close()

    # load opt
    model_file = args.model_dir + '/' + args.model
    print("Loading model from {}".format(model_file))
    opt = torch_utils.load_config(model_file)
    model = RelationModel(opt)
    model.load(model_file)

    # load vocab
    vocab_file = args.model_dir + '/vocab.pkl'
    vocab = Vocab(vocab_file, load=True)
    assert opt[
        'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

    # helper.print_config(opt)
    id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])

    def modelfn(inp):
        batch = DataLoader(json.dumps([inp]),
                           3,
                           opt,
                           vocab,
                           evaluation=True,
                           load_from_file=False)
        predictions = []
        all_probs = []
        for i, b in enumerate(batch):
            preds, probs, _ = model.predict(b)
            predictions += preds
            all_probs += probs
        predictions = [id2label[p] for p in predictions]
        return all_probs[0], predictions

    sim_dict = load_similarity_dict()

    return data, sim_dict, modelfn
Esempio n. 3
0
def get_biased_model_class_probs(args):
    # load opt
    model_file = args.model_dir + "/" + args.model
    print("Loading model from {}".format(model_file))
    opt = torch_utils.load_config(model_file)
    model = RelationModel(opt)
    model.load(model_file)

    # load vocab
    vocab_file = args.model_dir + "/vocab.pkl"
    vocab = Vocab(vocab_file, load=True)
    assert opt[
        "vocab_size"] == vocab.size, "Vocab size must match that in the saved model."
    opt["vocab_size"] = vocab.size
    emb_file = opt["vocab_dir"] + "/embedding.npy"
    emb_matrix = np.load(emb_file)
    assert emb_matrix.shape[0] == vocab.size
    assert emb_matrix.shape[1] == opt["emb_dim"]

    # load data
    data_file = args.data_dir + "/{}".format(args.data_name)
    print("Loading data from {} with batch size {}...".format(
        data_file, opt["batch_size"]))
    batch = DataLoader(data_file,
                       opt["batch_size"],
                       opt,
                       vocab,
                       evaluation=True)

    # helper.print_config(opt)
    id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()])

    all_probs = []
    for i, b in enumerate(batch):
        preds, probs, _ = model.predict(b)
        all_probs.append(probs)
    return all_probs
Esempio n. 4
0
parser.add_argument('--cpu', action='store_true')
args = parser.parse_args()

torch.manual_seed(args.seed)
random.seed(1234)
if args.cpu:
    args.cuda = False
elif args.cuda:
    torch.cuda.manual_seed(args.seed)

# load opt
model_file = args.model_dir + '/' + args.model
print("Loading model from {}".format(model_file))
opt = torch_utils.load_config(model_file)
model = RelationModel(opt)
model.load(model_file)

# load vocab
vocab_file = args.model_dir + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
assert opt['vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

# load data
data_file = opt['data_dir'] + '/{}.json'.format(args.dataset)
print("Loading data from {} with batch size {}...".format(data_file, opt['batch_size']))
batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True)

helper.print_config(opt)
id2label = dict([(v,k) for k,v in constant.LABEL_TO_ID.items()])

predictions = []