Example #1
0
def main(option):
    logging.basicConfig(
        stream=sys.stdout,
        level=logging.DEBUG,
        format=
        '%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s'
    )

    dataset = TransitiveSentenceSimilarityDataset()
    if option.dataset_cache is None:
        glove = Glove(option.emb_file)
        logging.info('Embeddings loaded')
        dataset.load(option.dataset_file, glove)
    else:
        dataset.load_cache(option.dataset_cache)
    logging.info('Dataset loaded')

    embeddings = nn.Embedding(option.vocab_size, option.emb_dim, padding_idx=1)
    if option.model == 'NTN':
        model = NeuralTensorNetwork(embeddings, option.em_k)
    elif option.model == 'LowRankNTN':
        model = LowRankNeuralTensorNetwork(embeddings, option.em_k,
                                           option.em_r)
    elif option.model == 'RoleFactor':
        model = RoleFactoredTensorModel(embeddings, option.em_k)
    elif option.model == 'Predicate':
        model = PredicateTensorModel(embeddings)
    elif option.model == 'NN':
        model = NN(embeddings, 2 * option.em_k, option.em_k)
    elif option.model == 'EMC':
        model = EMC(embeddings, 2 * option.em_k, option.em_k)
    else:
        logging.info('Unknown model type: ' + option.model)
        exit(1)

    checkpoint = torch.load(option.model_file, map_location='cpu')
    if type(checkpoint) == dict:
        if 'event_model_state_dict' in checkpoint:
            state_dict = checkpoint['event_model_state_dict']
        else:
            state_dict = checkpoint['model_state_dict']
    else:
        state_dict = checkpoint
    model.load_state_dict(state_dict)
    logging.info(option.model_file + ' loaded')

    # embeddings = nn.Embedding(option.vocab_size, option.emb_dim, padding_idx=1)
    # embeddings.weight.data = torch.from_numpy(glove.embd).float()
    # model = Averaging(embeddings)

    if option.use_gpu:
        model.cuda()
    model.eval()

    data_loader = torch.utils.data.DataLoader(
        dataset,
        collate_fn=TransitiveSentenceSimilarityDataset_collate_fn,
        shuffle=False,
        batch_size=len(dataset))
    batch = next(iter(data_loader))
    e1_subj_id, e1_subj_w, e1_verb_id, e1_verb_w, e1_obj_id, e1_obj_w, \
    e2_subj_id, e2_subj_w, e2_verb_id, e2_verb_w, e2_obj_id, e2_obj_w, \
    gold = batch

    if option.use_gpu:
        e1_subj_id = e1_subj_id.cuda()
        e1_subj_w = e1_subj_w.cuda()
        e1_verb_id = e1_verb_id.cuda()
        e1_verb_w = e1_verb_w.cuda()
        e1_obj_id = e1_obj_id.cuda()
        e1_obj_w = e1_obj_w.cuda()
        e2_subj_id = e2_subj_id.cuda()
        e2_subj_w = e2_subj_w.cuda()
        e2_verb_id = e2_verb_id.cuda()
        e2_verb_w = e2_verb_w.cuda()
        e2_obj_id = e2_obj_id.cuda()
        e2_obj_w = e2_obj_w.cuda()

    e1_emb = model(e1_subj_id, e1_subj_w, e1_verb_id, e1_verb_w, e1_obj_id,
                   e1_obj_w)
    e2_emb = model(e2_subj_id, e2_subj_w, e2_verb_id, e2_verb_w, e2_obj_id,
                   e2_obj_w)

    if option.distance_metric == 'cosine':
        distance_func = cosine_distance
    elif option.distance_metric == 'euclid':
        distance_func = euclid_distance

    pred = -distance_func(e1_emb, e2_emb)

    if option.use_gpu:
        pred = pred.cpu()
    pred = pred.detach().numpy()
    gold = gold.numpy()
    spearman_correlation, spearman_p = scipy.stats.spearmanr(pred, gold)

    if option.output_file.strip() != '':
        output_file = open(option.output_file, 'w')
        for score in pred:
            output_file.write(str(score) + '\n')
        output_file.close()
        logging.info('Output saved to ' + option.output_file)

    logging.info('Spearman correlation: %.4f' % (spearman_correlation, ))
Example #2
0
        event_model = NeuralTensorNetwork(embeddings, option.em_k)
    elif option.model == 'RoleFactor':
        event_model = RoleFactoredTensorModel(embeddings, option.em_k)
    else:
        logging.info('Unknwon model: ' + option.model)
        exit(1)
    criterion = nn.CrossEntropyLoss()

    # load pretrained embeddings
    embeddings.weight.data.copy_(torch.from_numpy(glove.embd).float())

    if not option.update_embeddings:
        event_model.embeddings.weight.requires_grad = False

    if option.use_gpu:
        event_model.cuda()
        neg_embeddings.cuda()
        criterion.cuda()

    params = [{
        'params': event_model.embeddings.parameters()
    }, {
        'params': neg_embeddings.parameters()
    }]
    if option.model == 'NTN':
        params += [{
            'params': event_model.subj_verb_comp.parameters(),
            'weight_decay': option.weight_decay
        }, {
            'params': event_model.verb_obj_comp.parameters(),
            'weight_decay': option.weight_decay