Python get_ranking Examples, evaluation_metrics.get_ranking Python Examples

Example #1

0

Show file

File: context_qa_completion.py Project: scorpio2017/RE-Flex

def run_thread(arguments):

    msg = ""

    # 1. compute the ranking metrics on the filtered log_probs tensor
    sample_MRR, sample_P, experiment_result, return_msg = metrics.get_ranking(
        arguments["mymodel_probs"],
        arguments["masked_indices"],
        arguments["vocab"],
        is_masked_probs=False,
        label_index=arguments["label_index"],
        print_generation=arguments["interactive"],
        topk=10000,
    )
    em, f1, is_error, no_overlap, larger_by_1, larger_by_2, larger_by_3, larger_by_4, larger_by_5_or_more = metrics.calculate_em_f1(
        arguments['target'], arguments['prediction'])
    msg += "\n" + return_msg

    sample_perplexity = 0.0
    if arguments["interactive"]:
        pprint(arguments["sample"])
        # THIS IS OPTIONAL - mainly used for debuggind reason
        # 2. compute perplexity and print predictions for the complete log_probs tensor
        sample_perplexity, return_msg = print_sentence_predictions(
            arguments["original_log_probs"],
            arguments["token_ids"],
            arguments["vocab"],
            masked_indices=arguments["masked_indices"],
            print_generation=arguments["interactive"],
        )
        input("press enter to continue...")
        msg += "\n" + return_msg

    return experiment_result, sample_MRR, sample_P, sample_perplexity, msg, em, f1, is_error, no_overlap, larger_by_1, larger_by_2, larger_by_3, larger_by_4, larger_by_5_or_more

Example #2

0

Show file

def get_results(model, sentence, paragraph):

    #number of tokens
    max_tokens = 3
    result_list = []
    for i in range(1, max_tokens + 1):

        if i != 1:
            sentence = sentence.replace("[MASK]", "[MASK] [MASK]", 1)
        #print(sentence)
        if paragraph == "":
            sentences = [sentence]
        else:
            sentences = [sentence, paragraph]

        original_log_probs_list, [token_ids], [
            masked_indices
        ] = model.get_batch_generation([sentences], try_cuda=True)
        index_list = None
        filtered_log_probs_list = original_log_probs_list
        ret = {}
        # build topk lists for this template and safe in ret1 and ret2
        if masked_indices and len(masked_indices) > 0:
            results = evaluation_metrics.get_ranking(
                filtered_log_probs_list[0],
                masked_indices,
                model.vocab,
                index_list=index_list)
            result_list.append(results)
    return result_list

Example #3

0

Show file

def main(args):
    if not args.text and not args.interactive:
        msg = "ERROR: either you start LAMA eval_generation with the " \
              "interactive option (--i) or you pass in input a piece of text (--t)"
        raise ValueError(msg)

    stopping_condition = True

    #print("Language Models: {}".format(args.models_names))

    models = {}
    for lm in args.models_names:
        if os.path.exists("/data/fichtel/lm_builds/model_{}".format(lm)):
            with open("/data/fichtel/lm_builds/model_{}".format(lm),
                      'rb') as lm_build_file:
                models[lm] = dill.load(lm_build_file)
        else:
            models[lm] = build_model_by_name(lm, args)
            with open("/data/fichtel/lm_builds/model_{}".format(lm),
                      'wb') as lm_build_file:
                dill.dump(models[lm], lm_build_file)

    vocab_subset = None
    if args.common_vocab_filename is not None:
        common_vocab = load_vocab(args.common_vocab_filename)
        print("common vocabulary size: {}".format(len(common_vocab)))
        vocab_subset = [x for x in common_vocab]

    while stopping_condition:
        if args.text:
            text = args.text
            stopping_condition = False
        else:
            text = input("insert text:")

        if args.split_sentence:
            import spacy
            # use spacy to tokenize input sentence
            nlp = spacy.load(args.spacy_model)
            tokens = nlp(text)
            print(tokens)
            sentences = []
            for s in tokens.sents:
                print(" - {}".format(s))
                sentences.append(s.text)
        else:
            sentences = [text]

        if len(sentences) > 2:
            print(
                "WARNING: only the first two sentences in the text will be considered!"
            )
            sentences = sentences[:2]

        for model_name, model in models.items():
            #print("\n{}:".format(model_name))
            original_log_probs_list, [token_ids], [
                masked_indices
            ] = model.get_batch_generation([sentences], try_cuda=False)

            index_list = None
            if vocab_subset is not None:
                # filter log_probs
                filter_logprob_indices, index_list = model.init_indices_for_filter_logprobs(
                    vocab_subset)
                filtered_log_probs_list = model.filter_logprobs(
                    original_log_probs_list, filter_logprob_indices)
                print(filtered_log_probs_list)
            else:
                filtered_log_probs_list = original_log_probs_list

            ret = {}
            # rank over the subset of the vocab (if defined) for the SINGLE masked tokens
            if masked_indices and len(masked_indices) > 0:
                ret = evaluation_metrics.get_ranking(
                    filtered_log_probs_list[0],
                    masked_indices,
                    model.vocab,
                    index_list=index_list)

            # prediction and perplexity for the whole softmax
            # print_sentence_predictions(original_log_probs_list[0], token_ids, model.vocab, masked_indices=masked_indices

    for r in ret:
        print("%s %s" % (r, ret[r]))

Example #4

0

Show file

def LAMA_results(args, data_point, models):
    #result_task1 will contain label SUPPORT or REJECT as the result of predictions
    result_task1 = ""

    #we store in claimWithMask the claim with [MASK]. Token contains the current entity
    claimWithMask, token = getClaimwithMaks(data_point)
    args.text = claimWithMask
    if not args.text and not args.interactive:
        msg = "ERROR: either you start LAMA eval_generation with the " "interactive option (--i) or you pass in input a piece of text (--t)"
        raise ValueError(msg)

    stopping_condition = True

    #print("Language Models: {}".format(args.models_names))

    vocab_subset = None
    if args.common_vocab_filename is not None:
        common_vocab = load_vocab(args.common_vocab_filename)
        print("common vocabulary size: {}".format(len(common_vocab)))
        vocab_subset = [x for x in common_vocab]

    while stopping_condition:
        if args.text:
            text = args.text
            stopping_condition = False
        else:
            text = input("insert text:")

        if args.split_sentence:
            import spacy
            # use spacy to tokenize input sentence
            nlp = spacy.load(args.spacy_model)
            tokens = nlp(text)
            #print(tokens)
            sentences = []
            for s in tokens.sents:
                #print(" - {}".format(s))
                sentences.append(s.text)
        else:
            sentences = [text]

        if len(sentences) > 2:
            print(
                "WARNING: only the first two sentences in the text will be considered!"
            )
            sentences = sentences[:2]

        for model_name, model in models.items():
            #print("\n{}:".format(model_name))
            original_log_probs_list, [token_ids], [
                masked_indices
            ] = model.get_batch_generation([sentences], try_cuda=False)

            index_list = None
            if vocab_subset is not None:
                # filter log_probs
                filter_logprob_indices, index_list = model.init_indices_for_filter_logprobs(
                    vocab_subset)
                filtered_log_probs_list = model.filter_logprobs(
                    original_log_probs_list, filter_logprob_indices)
            else:
                filtered_log_probs_list = original_log_probs_list

            # rank over the subset of the vocab (if defined) for the SINGLE masked tokens
            if masked_indices and len(masked_indices) > 0:
                # we change evaluation_metrics function. In this function we return also a dataframe that has the same content of msg that is printed with top 1' predictions with their prob.
                # we store this dataframe in df_predictions
                a, b, c, d, df_predictions = evaluation_metrics.get_ranking(
                    filtered_log_probs_list[0],
                    masked_indices,
                    model.vocab,
                    index_list=index_list)

                list_predictions = df_predictions['prediction'].tolist()
                # we set lower all predictions becuase our entity is all in lower case. This to avoid confusion.
                list_predictions = [x.lower() for x in list_predictions]

                #we check if current entity is equal to the first predictions. If it's equal, we set SUPPORT as result, otherwise REJECT
                if token == list_predictions[0]:
                    result_task1 = "SUPPORT"
                else:
                    result_task1 = "REJECT"

                return result_task1