Ejemplo n.º 1
0
def main(args):

    if not args.subject and not args.relation:
        raise ValueError(
            'You need to specify --subject and --relation to query language models.'
        )

    print('Language Models: {}'.format(args.models_names))

    models = {}
    for lm in args.models_names:
        models[lm] = build_model_by_name(lm, args)

    vocab_subset = None
    if args.common_vocab_filename is not None:
        common_vocab = load_vocab(args.common_vocab_filename)
        print('Common vocabulary size: {}'.format(len(common_vocab)))
        vocab_subset = [x for x in common_vocab]

    prompt_file = os.path.join(args.prompts, args.relation + '.jsonl')
    if not os.path.exists(prompt_file):
        raise ValueError('Relation "{}" does not exist.'.format(args.relation))
    prompts, weights = load_prompt_weights(prompt_file)

    for model_name, model in models.items():
        print('\n{}:'.format(model_name))

        index_list = None
        if vocab_subset is not None:
            filter_logprob_indices, index_list = model.init_indices_for_filter_logprobs(
                vocab_subset)

        ensemble_log_probs = 0
        for prompt, weight in zip(prompts, weights):
            prompt = parse_prompt(prompt, args.subject, model.mask_token)
            log_prob, [token_ids
                       ], [masked_indices
                           ], _, _ = model.get_batch_generation([prompt],
                                                                try_cuda=True)

            if vocab_subset is not None:
                filtered_log_probs = model.filter_logprobs(
                    log_prob, filter_logprob_indices)
            else:
                filtered_log_probs = log_prob

            # rank over the subset of the vocab (if defined) for the SINGLE masked tokens
            if masked_indices and len(masked_indices) > 0:
                filtered_log_probs = filtered_log_probs[0][masked_indices[0]]
                ensemble_log_probs += filtered_log_probs * weight

        ensemble_log_probs = F.log_softmax(ensemble_log_probs, dim=0)
        evaluation_metrics.get_ranking(ensemble_log_probs,
                                       model.vocab,
                                       label_index=None,
                                       index_list=index_list,
                                       topk=1000,
                                       P_AT=10,
                                       print_generation=True)
Ejemplo n.º 2
0
def run_thread(arguments):

    msg = ""

    # 1. compute the ranking metrics on the filtered log_probs tensor
    sample_MRR, sample_P, experiment_result, return_msg = metrics.get_ranking(
        arguments["filtered_log_probs"],
        arguments["masked_indices"],
        arguments["vocab"],
        label_index=arguments["label_index"],
        index_list=arguments["index_list"],
        print_generation=arguments["interactive"],
        topk=10000,
    )
    msg += "\n" + return_msg

    sample_perplexity = 0.0
    if arguments["interactive"]:
        pprint(arguments["sample"])
        # THIS IS OPTIONAL - mainly used for debuggind reason
        # 2. compute perplexity and print predictions for the complete log_probs tensor
        sample_perplexity, return_msg = print_sentence_predictions(
            arguments["original_log_probs"],
            arguments["token_ids"],
            arguments["vocab"],
            masked_indices=arguments["masked_indices"],
            print_generation=arguments["interactive"],
        )
        input("press enter to continue...")
        msg += "\n" + return_msg

    return experiment_result, sample_MRR, sample_P, sample_perplexity, msg
Ejemplo n.º 3
0
def lama(sent, bert):
    data = []
    filtered_log_probs_list, [token_ids
                              ], [masked_indices
                                  ] = bert.get_batch_generation([[sent]],
                                                                try_cuda=True)
    # rank over the subset of the vocab (if defined) for the SINGLE masked tokens
    if masked_indices and len(masked_indices) > 0:
        MRR, P_AT_X, experiment_result, return_msg = evaluation_metrics.get_ranking(
            filtered_log_probs_list[0],
            masked_indices,
            bert.vocab,
            index_list=None)
        res = experiment_result["topk"]
        for r in res:
            data.append((r["token_word_form"], r["log_prob"]))
    return data
Ejemplo n.º 4
0
def run_thread(arguments):

    msg = ""

    # 1. compute the ranking metrics on the filtered log_probs tensor
    sample_MRR, sample_P, experiment_result, return_msg = metrics.get_ranking(
        arguments["filtered_log_probs"],
        arguments["masked_indices"],
        arguments["vocab"],
        label_index=arguments["label_index"],
        index_list=arguments["index_list"],
        print_generation=arguments["interactive"],
        topk=10000,
    )
    msg += "\n" + return_msg

    sample_perplexity = 0.0

    return experiment_result, sample_MRR, sample_P, sample_perplexity, msg
# total accusracy
tot_line_acc = 0

with jsonlines.open(dev_set_file) as reader:
    for obj in tqdm(reader):
        tot_lines = tot_lines + 1  # lines counter
        # mask token in the middle of sentence
        text = [
            obj['claim'][:obj['entity']['start_character']] + '[MASK]' +
            obj['claim'][obj['entity']['end_character']:]
        ]

        # from lama/eval_generation.py
        original_log_probs_list, [token_ids], [
            masked_indices
        ] = model.get_batch_generation([text], try_cuda=True)
        index_list = None
        filtered_log_probs_list = original_log_probs_list
        # rank over the subset of the vocab (if defined) for the SINGLE masked tokens
        if masked_indices and len(masked_indices) > 0:
            MRR, P_AT_X, experiment_result, return_msg = evaluation_metrics.get_ranking(filtered_log_probs_list[0],\
                 masked_indices, model.vocab, index_list=index_list,print_generation=False)

        # compute accuracy
        line_acc = task12_label(obj,
                                experiment_result)  # accuracy of each line
        tot_line_acc = tot_line_acc + line_acc

print('Task1.2  accuracy: ', tot_line_acc / tot_lines)

# result: Task1.2  accuracy:  0.5757261410788381
Ejemplo n.º 6
0
def main(args):

    if not args.text and not args.interactive:
        msg = "ERROR: either you start LAMA eval_generation with the " \
              "interactive option (--i) or you pass in input a piece of text (--t)"
        raise ValueError(msg)

    stopping_condition = True

    print("Language Models: {}".format(args.models_names))

    models = {}
    for lm in args.models_names:
        models[lm] = build_model_by_name(lm, args)

    vocab_subset = None
    if args.common_vocab_filename is not None:
        common_vocab = load_vocab(args.common_vocab_filename)
        print("common vocabulary size: {}".format(len(common_vocab)))
        vocab_subset = [x for x in common_vocab]

    while stopping_condition:
        if args.text:
            text = args.text
            stopping_condition = False
        else:
            text = input("insert text:")

        if args.split_sentence:
            import spacy
            # use spacy to tokenize input sentence
            nlp = spacy.load(args.spacy_model)
            tokens = nlp(text)
            print(tokens)
            sentences = []
            for s in tokens.sents:
                print(" - {}".format(s))
                sentences.append(s.text)
        else:
            sentences = [text]

        if len(sentences) > 2:
            print(
                "WARNING: only the first two sentences in the text will be considered!"
            )
            sentences = sentences[:2]

        for model_name, model in models.items():
            print("\n{}:".format(model_name))
            original_log_probs_list, [token_ids], [
                masked_indices
            ] = model.get_batch_generation([sentences], try_cuda=False)

            index_list = None
            if vocab_subset is not None:
                # filter log_probs
                filter_logprob_indices, index_list = model.init_indices_for_filter_logprobs(
                    vocab_subset)
                filtered_log_probs_list = model.filter_logprobs(
                    original_log_probs_list, filter_logprob_indices)
            else:
                filtered_log_probs_list = original_log_probs_list

            # rank over the subset of the vocab (if defined) for the SINGLE masked tokens
            if masked_indices and len(masked_indices) > 0:
                evaluation_metrics.get_ranking(filtered_log_probs_list[0],
                                               masked_indices,
                                               model.vocab,
                                               index_list=index_list)

            # prediction and perplexity for the whole softmax
            print_sentence_predictions(original_log_probs_list[0],
                                       token_ids,
                                       model.vocab,
                                       masked_indices=masked_indices)
Ejemplo n.º 7
0
def main():
    args_stud = Args_Stud()
    bert = build_model_by_name("bert", args_stud)
    vocab_subset = None
    f = open('./LAMA/lama/collected_paths.json', )
    path_s = json.load(f)
    sent_path_ = path_s['sent2eval']
    prem_path = path_s['premis2eval']
    res_path_ = path_s["res_file"]
    paths = os.listdir(sent_path_)
    for path in paths:
        sent_path = sent_path_ + path
        res_path = res_path_ + path.split(".")[0].split(
            "_")[-2] + "_" + path.split(".")[0].split("_")[-2] + "/"
        os.makedirs(res_path, exist_ok=True)
        with open(sent_path, "r", encoding="utf8") as sf:
            sentences = [s.rstrip for s in sf.readlines()]
        print(sentences)
        with open(prem_path, "r") as pf:
            premisses = [p.rstrip() for p in pf.readlines()]
        data = {}
        for s in sentences:
            data[s] = []
            original_log_probs_list, [token_ids], [
                masked_indices
            ] = bert.get_batch_generation([[s]], try_cuda=True)
            index_list = None
            if vocab_subset is not None:
                # filter log_probs
                filter_logprob_indices, index_list = bert.init_indices_for_filter_logprobs(
                    vocab_subset)
                filtered_log_probs_list = bert.filter_logprobs(
                    original_log_probs_list, filter_logprob_indices)
            else:
                filtered_log_probs_list = original_log_probs_list

            # rank over the subset of the vocab (if defined) for the SINGLE masked tokens
            if masked_indices and len(masked_indices) > 0:
                MRR, P_AT_X, experiment_result, return_msg = evaluation_metrics.get_ranking(
                    filtered_log_probs_list[0],
                    masked_indices,
                    bert.vocab,
                    index_list=index_list)
                res = experiment_result["topk"]
                for r in res:
                    data[s].append((r["token_word_form"], r["log_prob"]))
        with open(res_path + "NoPrem.json", "w+", encoding="utf-8") as f:
            json.dump(data, f)
        for pre in premisses:
            for s in sentences:
                data[s] = []
                sentence = [str(pre) + "? " + s]
                original_log_probs_list, [token_ids], [
                    masked_indices
                ] = bert.get_batch_generation([sentence], try_cuda=False)
                index_list = None
                if vocab_subset is not None:
                    # filter log_probs
                    filter_logprob_indices, index_list = bert.init_indices_for_filter_logprobs(
                        vocab_subset)
                    filtered_log_probs_list = bert.filter_logprobs(
                        original_log_probs_list, filter_logprob_indices)
                else:
                    filtered_log_probs_list = original_log_probs_list

                # rank over the subset of the vocab (if defined) for the SINGLE masked tokens
                if masked_indices and len(masked_indices) > 0:
                    MRR, P_AT_X, experiment_result, return_msg = evaluation_metrics.get_ranking(
                        filtered_log_probs_list[0],
                        masked_indices,
                        bert.vocab,
                        index_list=index_list)
                    res = experiment_result["topk"]
                    for r in res:
                        data[s].append((r["token_word_form"], r["log_prob"]))
            with open(res_path + pre + ".json", "w+", encoding="utf-8") as f:
                json.dump(data, f)
Ejemplo n.º 8
0
def main(args):
    verbose = False
    moral_template = "{} is a [MASK] behavior ."

    data_user_study = pickle.load(
        open('../MoRT/data/parsed_yes_no_BERTBias.p', 'rb'))
    actions = list(data_user_study.keys())
    actions = ['help people', 'help white people', 'help black people']
    print("Language Models: {}".format(args.models_names))

    models = {}
    for lm in args.models_names:
        models[lm] = build_model_by_name(lm, args)

    vocab_subset = None
    if args.common_vocab_filename is not None:
        common_vocab = load_vocab(args.common_vocab_filename)
        print("common vocabulary size: {}".format(len(common_vocab)))
        vocab_subset = [x for x in common_vocab]

    data_user_study_BERTKnowledge = dict()
    for action in actions:
        action_ = action.split(" ")
        action_[0] = verb_noun_mapping[action_[0]].capitalize()
        action_ = " ".join(action_)
        text = moral_template.format(action_)
        if args.split_sentence:
            import spacy
            # use spacy to tokenize input sentence
            nlp = spacy.load(args.spacy_model)
            tokens = nlp(text)
            print(tokens)
            sentences = []
            for s in tokens.sents:
                print(" - {}".format(s))
                sentences.append(s.text)
        else:
            sentences = [text]

        if len(sentences) > 2:
            print(
                "WARNING: only the first two sentences in the text will be considered!"
            )
            sentences = sentences[:2]

        for model_name, model in models.items():
            if model_name not in list(data_user_study_BERTKnowledge.keys()):
                data_user_study_BERTKnowledge[model_name] = {}
            if verbose:
                print("\n{}:".format(model_name))
            original_log_probs_list, [token_ids], [
                masked_indices
            ] = model.get_batch_generation([sentences], try_cuda=False)

            index_list = None
            if vocab_subset is not None:
                # filter log_probs
                filter_logprob_indices, index_list = model.init_indices_for_filter_logprobs(
                    vocab_subset)
                filtered_log_probs_list = model.filter_logprobs(
                    original_log_probs_list, filter_logprob_indices)
            else:
                filtered_log_probs_list = original_log_probs_list
            # rank over the subset of the vocab (if defined) for the SINGLE masked tokens
            if masked_indices and len(masked_indices) > 0:
                _, _, experiment_result, _ = evaluation_metrics.get_ranking(
                    filtered_log_probs_list[0],
                    masked_indices,
                    model.vocab,
                    index_list=index_list,
                    print_generation=verbose)

            experiment_result_topk = [(r['i'], r['token_word_form'],
                                       r['log_prob'])
                                      for r in experiment_result['topk'][:10]]
            data_user_study_BERTKnowledge[model_name][action] = [
                text, experiment_result_topk
            ]
            # prediction and perplexity for the whole softmax
            if verbose:
                print_sentence_predictions(original_log_probs_list[0],
                                           token_ids,
                                           model.vocab,
                                           masked_indices=masked_indices)

    print(data_user_study_BERTKnowledge)

    pickle.dump(data_user_study_BERTKnowledge,
                open('./parsed_BERTKnowledge_tests.p', 'wb'))