コード例 #1
0
def main():
    log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
    args = build_argparser().parse_args()

    # load vocabulary file for model
    log.info("Loading vocab file:\t{}".format(args.vocab))
    vocab = load_vocab_file(args.vocab)
    log.info("{} tokens loaded".format(len(vocab)))

    # get context as a string (as we might need it's length for the sequence reshape)
    paragraphs = get_paragraphs(args.input)
    context = '\n'.join(paragraphs)
    log.info("Size: {} chars".format(len(context)))
    sentences = re.split(sentence_splitter, context)
    preprocessed_sentences = [text_to_tokens(sentence, vocab) for sentence in sentences]
    max_sent_length = max([len(tokens) + 2 for tokens, _ in preprocessed_sentences])

    log.info("Initializing Inference Engine")
    ie = IECore()
    version = ie.get_versions(args.device)[args.device]
    version_str = "{}.{}.{}".format(version.major, version.minor, version.build_number)
    log.info("Plugin version is {}".format(version_str))

    # read IR
    model_xml = args.model
    model_bin = model_xml.with_suffix(".bin")
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    ie_encoder = ie.read_network(model=model_xml, weights=model_bin)

    # check input and output names
    input_names = [i.strip() for i in args.input_names.split(',')]
    if ie_encoder.input_info.keys() != set(input_names):
        log.error("Input names do not match")
        log.error("    The demo expects input names: {}. "
                  "Please use the --input_names to specify the right names "
                  "(see actual values below)".format(input_names))
        log.error("    Actual network input names: {}".format(list(ie_encoder.input_info.keys())))
        raise Exception("Unexpected network input names")
    if len(ie_encoder.outputs) != 1:
        log.log.error('Demo expects model with single output, while provided {}'.format(len(ie_encoder.outputs)))
        raise Exception('Unexpected number of outputs')
    output_names = list(ie_encoder.outputs)
    max_length = ie_encoder.input_info[input_names[0]].input_data.shape[1]
    if max_sent_length > max_length:
        input_shapes = {
            input_names[0]: [1, max_sent_length],
            input_names[1]: [1, max_sent_length],
            input_names[2]: [1, max_sent_length]
        }
        ie_encoder.reshape(input_shapes)
        max_length = max_sent_length
    # load model to the device
    log.info("Loading model to the {}".format(args.device))
    ie_encoder_exec = ie.load_network(network=ie_encoder, device_name=args.device)
    # maximum number of tokens that can be processed by network at once
    t0 = time.perf_counter()
    t_count = 0

    def get_score(name):
        out = np.exp(res[name][0])
        return out / out.sum(axis=-1, keepdims=True)

    for sentence, (c_tokens_id, c_token_s_e) in zip(sentences, preprocessed_sentences):
        # form the request
        tok_cls = vocab['[CLS]']
        tok_sep = vocab['[SEP]']
        input_ids = [tok_cls] + c_tokens_id + [tok_sep]
        token_type_ids = [0] * len(input_ids)
        attention_mask = [1] * len(input_ids)

        # pad the rest of the request
        pad_len = max_length - len(input_ids)
        input_ids += [0] * pad_len
        token_type_ids += [0] * pad_len
        attention_mask += [0] * pad_len

        # create numpy inputs for IE
        inputs = {
            input_names[0]: np.array([input_ids], dtype=np.int32),
            input_names[1]: np.array([attention_mask], dtype=np.int32),
            input_names[2]: np.array([token_type_ids], dtype=np.int32),
        }
        if len(input_names)>3:
            inputs[input_names[3]] = np.arange(len(input_ids), dtype=np.int32)[None, :]

        t_start = time.perf_counter()
        # infer by IE
        res = ie_encoder_exec.infer(inputs=inputs)
        t_end = time.perf_counter()
        t_count += 1
        log.info("Sequence of length {} is processed with {:0.2f} requests/sec ({:0.2} sec per request)".format(
            max_length,
            1 / (t_end - t_start),
            t_end - t_start
        ))


        score = get_score(output_names[0])
        labels_idx = score.argmax(-1)
        filtered_labels_idx = [
            (idx, label_idx)
            for idx, label_idx in enumerate(labels_idx)
            if label_idx != 0 and 0 < idx < max_length - pad_len
        ]

        if not filtered_labels_idx:
            continue

        log.info('Sentence: \n\t{}'.format(sentence))
        visualized = set()
        for idx, label_idx in filtered_labels_idx:
            word_s, word_e = c_token_s_e[idx - 1]
            if (word_s, word_e) in visualized:
                continue
            visualized.add((word_s, word_e))
            word = sentence[word_s:word_e]
            log.info('\n\tWord: {}\n\tConfidence: {}\n\tTag: {}'.format(word, score[idx][label_idx], label_to_tag[label_idx]))

    t1 = time.perf_counter()
    log.info("The performance below is reported only for reference purposes, "
            "please use the benchmark_app tool (part of the OpenVINO samples) for any actual measurements.")
    log.info("{} requests of {} length were processed in {:0.2f}sec ({:0.2}sec per request)".format(
        t_count,
        max_length,
        t1 - t0,
        (t1 - t0) / t_count
    ))
コード例 #2
0
 def encode_txt(txt):
     if do_lower_case:
         txt = txt.lower()
     return tokens_bert.text_to_tokens(txt, vocab)
コード例 #3
0
def main():
    log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
    args = build_argparser().parse_args()

    if args.colors:
        COLOR_RED = "\033[91m"
        COLOR_RESET = "\033[0m"
    else:
        COLOR_RED = ""
        COLOR_RESET = ""

    # load vocabulary file for model
    log.info("Loading vocab file:\t{}".format(args.vocab))
    vocab = load_vocab_file(args.vocab)
    log.info("{} tokens loaded".format(len(vocab)))

    # get context as a string (as we might need it's length for the sequence reshape)
    paragraphs = get_paragraphs(args.input)
    context = '\n'.join(paragraphs)
    log.info("Size: {} chars".format(len(context)))
    log.info("Context: " + COLOR_RED + context + COLOR_RESET)
    # encode context into token ids list
    c_tokens_id, c_tokens_se = text_to_tokens(context.lower(), vocab)

    log.info("Initializing Inference Engine")
    ie = IECore()
    version = ie.get_versions(args.device)[args.device]
    version_str = "{}.{}.{}".format(version.major, version.minor, version.build_number)
    log.info("Plugin version is {}".format(version_str))

    # read IR
    model_xml = args.model
    model_bin = model_xml.with_suffix(".bin")
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    ie_encoder = ie.read_network(model=model_xml, weights=model_bin)

    if args.reshape:
        # reshape the sequence length to the context + maximum question length (in tokens)
        first_input_layer = next(iter(ie_encoder.input_info))
        c = ie_encoder.input_info[first_input_layer].input_data.shape[1]
        # find the closest multiple of 64, if it is smaller than current network's sequence length, let' use that
        seq = min(c, int(np.ceil((len(c_tokens_id) + args.max_question_token_num) / 64) * 64))
        if seq < c:
            new_shapes = {}
            for input_name, input_info in ie_encoder.input_info.items():
                n, c = input_info.input_data.shape
                new_shapes[input_name] = [n, seq]
                log.info("Reshaped input {} from {} to the {}".format(
                    input_name, input_info.input_data.shape, new_shapes[input_name]))
            log.info("Attempting to reshape the network to the modified inputs...")
            try:
                ie_encoder.reshape(new_shapes)
                log.info("Successful!")
            except RuntimeError:
                log.error("Failed to reshape the network, please retry the demo without '-r' option")
                sys.exit(-1)
        else:
            log.info("Skipping network reshaping,"
                     " as (context length + max question length) exceeds the current (input) network sequence length")

    # check input and output names
    input_names = [i.strip() for i in args.input_names.split(',')]
    output_names = [o.strip() for o in args.output_names.split(',')]
    if ie_encoder.input_info.keys() != set(input_names) or ie_encoder.outputs.keys() != set(output_names):
        log.error("Input or Output names do not match")
        log.error("    The demo expects input->output names: {}->{}. "
                  "Please use the --input_names and --output_names to specify the right names "
                  "(see actual values below)".format(input_names, output_names))
        log.error("    Actual network input->output names: {}->{}".format(list(ie_encoder.input_info.keys()),
                                                                          list(ie_encoder.outputs.keys())))
        raise Exception("Unexpected network input or output names")

    # load model to the device
    log.info("Loading model to the {}".format(args.device))
    ie_encoder_exec = ie.load_network(network=ie_encoder, device_name=args.device)

    if args.questions:
        def questions():
            for question in args.questions:
                log.info("Question: {}".format(question))
                yield question
    else:
        def questions():
            while True:
                yield input('Type question (empty string to exit):')

    # loop on user's or prepared questions
    for question in questions():
        if not question.strip():
            break

        q_tokens_id, _ = text_to_tokens(question.lower(), vocab)

        # maximum number of tokens that can be processed by network at once
        max_length = ie_encoder.input_info[input_names[0]].input_data.shape[1]

        # calculate number of tokens for context in each inference request.
        # reserve 3 positions for special tokens
        # [CLS] q_tokens [SEP] c_tokens [SEP]
        c_wnd_len = max_length - (len(q_tokens_id) + 3)

        # token num between two neighbour context windows
        # 1/2 means that context windows are overlapped by half
        c_stride = c_wnd_len // 2

        t0 = time.perf_counter()
        t_count = 0

        # array of answers from each window
        answers = []

        # init a window to iterate over context
        c_s, c_e = 0, min(c_wnd_len, len(c_tokens_id))

        # iterate while context window is not empty
        while c_e > c_s:
            # form the request
            tok_cls = vocab['[CLS]']
            tok_sep = vocab['[SEP]']
            input_ids = [tok_cls] + q_tokens_id + [tok_sep] + c_tokens_id[c_s:c_e] + [tok_sep]
            token_type_ids = [0] + [0] * len(q_tokens_id) + [0] + [1] * (c_e - c_s) + [0]
            attention_mask = [1] * len(input_ids)

            # pad the rest of the request
            pad_len = max_length - len(input_ids)
            input_ids += [0] * pad_len
            token_type_ids += [0] * pad_len
            attention_mask += [0] * pad_len

            # create numpy inputs for IE
            inputs = {
                input_names[0]: np.array([input_ids], dtype=np.int32),
                input_names[1]: np.array([attention_mask], dtype=np.int32),
                input_names[2]: np.array([token_type_ids], dtype=np.int32),
            }
            if len(input_names)>3:
                inputs[input_names[3]] = np.arange(len(input_ids), dtype=np.int32)[None, :]

            t_start = time.perf_counter()
            # infer by IE
            res = ie_encoder_exec.infer(inputs=inputs)
            t_end = time.perf_counter()
            t_count += 1
            log.info("Sequence of length {} is processed with {:0.2f} requests/sec ({:0.2} sec per request)".format(
                max_length,
                1 / (t_end - t_start),
                t_end - t_start
            ))

            # get start-end scores for context
            def get_score(name):
                out = np.exp(res[name].reshape((max_length,)))
                return out / out.sum(axis=-1)

            score_s = get_score(output_names[0])
            score_e = get_score(output_names[1])

            # get 'no-answer' score (not valid if model has been fine-tuned on squad1.x)
            if args.model_squad_ver.split('.')[0] == '1':
                score_na = 0
            else:
                score_na = score_s[0] * score_e[0]

            # find product of all start-end combinations to find the best one
            c_s_idx = len(q_tokens_id) + 2  # index of first context token in tensor
            c_e_idx = max_length - (1 + pad_len)  # index of last+1 context token in tensor
            score_mat = np.matmul(
                score_s[c_s_idx:c_e_idx].reshape((c_e - c_s, 1)),
                score_e[c_s_idx:c_e_idx].reshape((1, c_e - c_s))
            )
            # reset candidates with end before start
            score_mat = np.triu(score_mat)
            # reset long candidates (>max_answer_token_num)
            score_mat = np.tril(score_mat, args.max_answer_token_num - 1)
            # find the best start-end pair
            max_s, max_e = divmod(score_mat.flatten().argmax(), score_mat.shape[1])
            max_score = score_mat[max_s, max_e] * (1 - score_na)

            # convert to context text start-end index
            max_s = c_tokens_se[c_s + max_s][0]
            max_e = c_tokens_se[c_s + max_e][1]

            # check that answers list does not have duplicates (because of context windows overlapping)
            same = [i for i, a in enumerate(answers) if a[1] == max_s and a[2] == max_e]
            if same:
                assert len(same) == 1
                # update existing answer record
                a = answers[same[0]]
                answers[same[0]] = (max(max_score, a[0]), max_s, max_e)
            else:
                # add new record
                answers.append((max_score, max_s, max_e))

            # check that context window reached the end
            if c_e == len(c_tokens_id):
                break

            # move to next window position
            c_s = min(c_s + c_stride, len(c_tokens_id))
            c_e = min(c_s + c_wnd_len, len(c_tokens_id))

        t1 = time.perf_counter()
        log.info("The performance below is reported only for reference purposes, "
                 "please use the benchmark_app tool (part of the OpenVINO samples) for any actual measurements.")
        log.info("{} requests of {} length were processed in {:0.2f}sec ({:0.2}sec per request)".format(
            t_count,
            max_length,
            t1 - t0,
            (t1 - t0) / t_count
        ))

        # print top 3 results
        answers = sorted(answers, key=lambda x: -x[0])
        for score, s, e in answers[:3]:
            log.info("---answer: {:0.2f} {}".format(score, context[s:e]))
            c_s, c_e = find_sentence_range(context, s, e)
            log.info("   " + context[c_s:s] + COLOR_RED + context[s:e] + COLOR_RESET + context[e:c_e])
def main():
    log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
    args = build_argparser().parse_args()

    log.info("Creating Inference Engine")
    ie = IECore()

    #read model to calculate embedding
    model_xml_emb = args.model_emb
    model_bin_emb = model_xml_emb.with_suffix(".bin")

    log.info("Loading embedding network files:\n\t{}\n\t{}".format(model_xml_emb, model_bin_emb))
    ie_encoder_emb = ie.read_network(model=model_xml_emb, weights=model_bin_emb)
    input_names_model_emb = list(ie_encoder_emb.input_info.keys())
    input_names_emb = args.input_names_emb.split(',')
    log.info("Expected embedding input names: {}".format(input_names_emb))
    log.info("Network embedding input names: {}".format(input_names_model_emb))
    # check input names
    if set(input_names_model_emb) != set(input_names_emb):
        log.error("Unexpected embedding network input names")
        raise Exception("Unexpected embedding network input names")

    # check outputs
    output_names_model_emb = list(ie_encoder_emb.outputs.keys())
    if len(output_names_model_emb)>1:
        log.error("Expected only single output in embedding network but {} outputs detected".format(output_names_model_emb))
        raise Exception("Unexpected number of embedding network outputs")


    #reshape embedding model to infer short questions and long contexts
    ie_encoder_exec_emb_dict = {}
    max_length_c = 384
    max_length_q = 32

    for length in [max_length_q, max_length_c]:
        new_shapes = {}
        for i, input_info in ie_encoder_emb.input_info.items():
            new_shapes[i] = [1, length]
            log.info("Reshaped input {} from {} to the {}".format(
                i,
                input_info.input_data.shape,
                new_shapes[i]))
        log.info("Attempting to reshape the context embedding network to the modified inputs...")

        try:
            ie_encoder_emb.reshape(new_shapes)
            log.info("Successful!")
        except RuntimeError:
            log.error("Failed to reshape the embedding network")
            raise

        # Loading model to the plugin
        log.info("Loading model to the plugin")
        ie_encoder_exec_emb_dict[length] = ie.load_network(network=ie_encoder_emb, device_name=args.device)

    # Read model for final exact qa
    if args.model_qa:
        model_xml = args.model_qa
        model_bin = model_xml.with_suffix(".bin")
        log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))

        ie_encoder_qa = ie.read_network(model=model_xml, weights=model_bin)
        ie_encoder_qa.batch_size = 1

        input_names_qa = args.input_names_qa.split(',')
        output_names_qa = args.output_names_qa.split(',')
        log.info("Expected input->output names: {}->{}".format(input_names_qa, output_names_qa))

        #check input and output names
        input_names_model_qa = list(ie_encoder_qa.input_info.keys())
        output_names_model_qa = list(ie_encoder_qa.outputs.keys())
        log.info("Network input->output names: {}->{}".format(input_names_model_qa, output_names_model_qa))
        if set(input_names_model_qa) != set(input_names_qa) or set(output_names_model_qa) != set(output_names_qa):
            log.error("Unexpected network input or output names")
            raise Exception("Unexpected network input or output names")

        # Loading model to the plugin
        log.info("Loading model to the plugin")
        ie_encoder_qa_exec = ie.load_network(network=ie_encoder_qa, device_name=args.device)

        max_length_qc = ie_encoder_qa.input_info[input_names_qa[0]].input_data.shape[1]

    #load vocabulary file for all models
    log.info("Loading vocab file:\t{}".format(args.vocab))
    vocab = load_vocab_file(args.vocab)
    log.info("{} tokens loaded".format(len(vocab)))

    #define function to infer embedding
    def calc_emb(tokens_id, max_length):
        num = min(max_length - 2, len(tokens_id))

        # forms the request
        pad_len = max_length - num - 2
        tok_cls = [vocab['[CLS]']]
        tok_sep = [vocab['[SEP]']]
        tok_pad = [vocab['[PAD]']]

        dtype = np.int32
        inputs = {
            input_names_emb[0]: np.array([tok_cls + tokens_id[:num] + tok_sep + tok_pad * pad_len], dtype=dtype),
            input_names_emb[1]: np.array([[1]     + [1] * num       + [1]     + [0]     * pad_len], dtype=dtype),
            input_names_emb[2]: np.array([[0]     + [0] * num       + [0]     + tok_pad * pad_len], dtype=dtype),
            input_names_emb[3]: np.arange(max_length, dtype=dtype)[None, :]
        }

        # calc embedding
        ie_encoder_exec_emb = ie_encoder_exec_emb_dict[max_length]

        t_start = time.perf_counter()
        res = ie_encoder_exec_emb.infer(inputs=inputs)
        t_end = time.perf_counter()
        log.info("embedding calculated for sequence of length {} with {:0.2f} requests/sec ({:0.2} sec per request)".format(
            max_length,
            1 / (t_end - t_start),
            t_end - t_start
        ))


        res = res[output_names_model_emb[0]]
        return res.squeeze(0)

    #small class to store context as text and tokens and its embedding vector
    class ContextData:
        def __init__(self, context, c_tokens_id, c_tokens_se):
            self.context = context
            self.c_tokens_id = c_tokens_id
            self.c_tokens_se = c_tokens_se
            self.c_emb = calc_emb(self.c_tokens_id, max_length_c)

    paragraphs = get_paragraphs(args.input)
    contexts_all = []

    log.info("Indexing {} paragraphs...".format(len(paragraphs)))
    for par in paragraphs:
        c_tokens_id, c_tokens_se = text_to_tokens(par.lower(), vocab)
        if not c_tokens_id:
            continue

        # get context as string and then encode it into token id list
        # calculate number of tokens for context in each request.
        # reserve 3 positions for special tokens
        # [CLS] q_tokens [SEP] c_tokens [SEP]
        if args.model_qa:
            #to make context be able to pass model_qa together with question
            c_wnd_len = max_length_qc - (max_length_q + 3)
        else:
            #to make context be able to pass model_emb without question
            c_wnd_len = max_length_c - 2

        # token num between 2 neighbours context windows
        # 1/2 means that context windows are interleaved by half
        c_stride = c_wnd_len // 2

        # init scan window
        c_s, c_e = 0, min(c_wnd_len, len(c_tokens_id))

        # iterate while context window is not empty
        while c_e > c_s:
            contexts_all.append(ContextData(par, c_tokens_id[c_s:c_e], c_tokens_se[c_s:c_e]))

            # check that context window reach the end
            if c_e == len(c_tokens_id):
                break

            # move to next window position
            c_s, c_e = c_s+c_stride, c_e+c_stride

            shift_left = max(0, c_e - len(c_tokens_id))
            c_s, c_e = c_s -shift_left, c_e-shift_left
            assert c_s >= 0, "start can be left of 0 only with window less than len but in this case we can not be here"

    if args.questions:
        def questions():
            for question in args.questions:
                log.info("Question: {}".format(question))
                yield question
    else:
        def questions():
            while True:
                yield input('Type question (empty string to exit):')

    # loop on user's or prepared questions
    for question in questions():
        if not question.strip():
            break

        log.info("---Stage 1---Calc question embedding and compare with {} context embeddings".format(len(contexts_all)))
        q_tokens_id, _ = text_to_tokens(question.lower(), vocab)

        q_emb = calc_emb(q_tokens_id, max_length_q)
        distances = [(np.linalg.norm(c.c_emb - q_emb, 2), c) for c in contexts_all]
        distances.sort(key=lambda x: x[0])
        keep_num = min(args.best_n, len(distances))
        distances_filtered = distances[:keep_num]

        #print short list
        print("The closest contexts to question:")
        for i, (dist, c_data) in enumerate(distances_filtered):
            print("#{}: embedding distance {} for context '{}'".format(i + 1, dist, c_data.context))

        #run model_qa if available to find exact answer to question in filtered in contexts
        if args.model_qa:

            log.info("---Stage 2---Looking for exact answers in {} contexts filtered in from {}".format(keep_num, len(distances)))
            # array of answers from each context_data
            answers = []

            for dist, c_data in distances_filtered:
                #forms the request
                tok_cls = [vocab['[CLS]']]
                tok_sep = [vocab['[SEP]']]
                tok_pad = [vocab['[PAD]']]
                req_len = len(q_tokens_id) + len(c_data.c_tokens_id) + 3
                pad_len = max_length_qc - req_len
                assert pad_len >= 0

                input_ids = tok_cls + q_tokens_id + tok_sep + c_data.c_tokens_id + tok_sep + tok_pad*pad_len
                token_type_ids = [0] * (len(q_tokens_id)+2) + [1] * (len(c_data.c_tokens_id)+1) + tok_pad * pad_len
                attention_mask = [1] * req_len + [0] * pad_len

                #create numpy inputs for IE
                inputs = {
                    input_names_qa[0]: np.array([input_ids], dtype=np.int32),
                    input_names_qa[1]: np.array([attention_mask], dtype=np.int32),
                    input_names_qa[2]: np.array([token_type_ids], dtype=np.int32),
                }
                if len(input_names_qa) > 3:
                    inputs['position_ids'] = np.arange(max_length_qc, dtype=np.int32)[None, :]

                #infer by IE
                t_start = time.perf_counter()
                res = ie_encoder_qa_exec.infer(inputs=inputs)
                t_end = time.perf_counter()
                log.info(
                    "Exact answer calculated for sequence of length {} with {:0.2f} requests/sec ({:0.2} sec per request)".format(
                        max_length_qc,
                        1 / (t_end - t_start),
                        t_end - t_start
                    ))

                #get start-end scores for context
                def get_score(name):
                    out = np.exp(res[name].reshape((max_length_qc, )))
                    return out / out.sum(axis=-1)
                score_s = get_score(output_names_qa[0])
                score_e = get_score(output_names_qa[1])

                # find product of all start-end combinations to find the best one
                c_s_idx = len(q_tokens_id) + 2 # index of first context token in tensor
                c_e_idx = max_length_qc-(1+pad_len) # index of last+1 context token in tensor
                score_mat = np.matmul(
                    score_s[c_s_idx:c_e_idx].reshape((len(c_data.c_tokens_id), 1)),
                    score_e[c_s_idx:c_e_idx].reshape((1, len(c_data.c_tokens_id)))
                )
                # reset candidates with end before start
                score_mat = np.triu(score_mat)
                # reset long candidates (>max_answer_token_num)
                score_mat = np.tril(score_mat, args.max_answer_token_num - 1)
                # find the best start-end pair
                max_s, max_e = divmod(score_mat.flatten().argmax(), score_mat.shape[1])
                max_score = score_mat[max_s, max_e]

                # convert to context text start-end index
                max_s = c_data.c_tokens_se[max_s][0]
                max_e = c_data.c_tokens_se[max_e][1]

                # check that answers list does not have answer yet
                # it could be because of context windows overlapping
                same = [i for i, a in enumerate(answers) if a[1] == max_s and a[2]==max_e and a[3] is c_data.context]
                if same:
                    assert len(same) == 1
                    #update exist answer record
                    a = answers[same[0]]
                    answers[same[0]] = (max(max_score, a[0]), max_s, max_e, c_data.context)
                else:
                    #add new record
                    answers.append((max_score, max_s, max_e, c_data.context))

            def mark(txt):
                return "\033[91m" + txt + "\033[0m" if args.colors else "*" + txt + "*"

            #print top 3 results
            answers.sort(key=lambda x: -x[0])
            log.info("---Stage 3---Find best 3 answers from {} results of Stage 1".format(len(answers)))
            for score, s, e, context in answers[:3]:
                print("Answer (score: {:0.2f}): {}".format(score, mark(context[s:e])))
                print(context[:s] + mark(context[s:e]) + context[e:])
コード例 #5
0
def setup(url):
    global vocab
    global ie_encoder
    global input_names
    global output_names
    global model
    global c_tokens_id
    global ie_encoder_exec
    global args
    global c_tokens_se
    global context
    global COLOR_RED
    global COLOR_RESET



    log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
    args = build_argparser().parse_args()

    if args.colors:
        COLOR_RED = "\033[91m"
        COLOR_RESET = "\033[0m"
    else:
        COLOR_RED = ""
        COLOR_RESET = ""

    # load vocabulary file for model
    log.info("Loading vocab file:\t{}".format(args.vocab))

    vocab = load_vocab_file(args.vocab)
    log.info("{} tokens loaded".format(len(vocab)))

    # get context as a string (as we might need it's length for the sequence reshape)
    p = url
    paragraphs = get_paragraphs([p])
    context = '\n'.join(paragraphs)
    log.info("Size: {} chars".format(len(context)))
    log.info("Context: " + COLOR_RED + context + COLOR_RESET)
    # encode context into token ids list
    c_tokens_id, c_tokens_se = text_to_tokens(context.lower(), vocab)

    log.info("Initializing Inference Engine")
    ie = IECore()
    version = ie.get_versions(args.device)[args.device]
    version_str = "{}.{}.{}".format(version.major, version.minor, version.build_number)
    log.info("Plugin version is {}".format(version_str))

    # read IR
    model_xml = args.model
    model_bin = model_xml.with_suffix(".bin")
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))

    ie_encoder = ie.read_network(model=model_xml, weights=model_bin)

    if args.reshape:
        # reshape the sequence length to the context + maximum question length (in tokens)
        first_input_layer = next(iter(ie_encoder.inputs))
        c = ie_encoder.inputs[first_input_layer].shape[1]
        # find the closest multiple of 64, if it is smaller than current network's sequence length, let' use that
        seq = min(c, int(np.ceil((len(c_tokens_id) + args.max_question_token_num) / 64) * 64))
        if seq < c:
            input_info = list(ie_encoder.inputs)
            new_shapes = dict([])
            for i in input_info:
                n, c = ie_encoder.inputs[i].shape
                new_shapes[i] = [n, seq]
                log.info("Reshaped input {} from {} to the {}".format(i, ie_encoder.inputs[i].shape, new_shapes[i]))
            log.info("Attempting to reshape the network to the modified inputs...")
            try:
                ie_encoder.reshape(new_shapes)
                log.info("Successful!")
            except RuntimeError:
                log.error("Failed to reshape the network, please retry the demo without '-r' option")
                sys.exit(-1)
        else:
            log.info("Skipping network reshaping,"
                     " as (context length + max question length) exceeds the current (input) network sequence length")

    # check input and output names
    input_names = list(i.strip() for i in args.input_names.split(','))
    output_names = list(o.strip() for o in args.output_names.split(','))
    if ie_encoder.inputs.keys() != set(input_names) or ie_encoder.outputs.keys() != set(output_names):
        log.error("Input or Output names do not match")
        log.error("    The demo expects input->output names: {}->{}. "
                  "Please use the --input_names and --output_names to specify the right names "
                  "(see actual values below)".format(input_names, output_names))
        log.error("    Actual network input->output names: {}->{}".format(list(ie_encoder.inputs.keys()),
                                                                          list(ie_encoder.outputs.keys())))
        log.error("    Actual network input->output values: {}->{}".format(list(ie_encoder.inputs.values()),
                                                                          list(ie_encoder.outputs.values())))
        raise Exception("Unexpected network input or output names")


    # load model to the device
    log.info("Loading model to the {}".format(args.device))
    ie_encoder_exec = ie.load_network(network=ie_encoder, device_name=args.device)
コード例 #6
0
def update_output_div(n_clicks, input_value):
    # loop on user's or prepared questions
    for question in [input_value]:
        if not question.strip():
            break

        q_tokens_id, _ = text_to_tokens(question.lower(), vocab)

        # maximum number of tokens that can be processed by network at once
        max_length = ie_encoder.inputs[input_names[0]].shape[1]

        # calculate number of tokens for context in each inference request.
        # reserve 3 positions for special tokens
        # [CLS] q_tokens [SEP] c_tokens [SEP]
        c_wnd_len = max_length - (len(q_tokens_id) + 3)

        # token num between two neighbour context windows
        # 1/2 means that context windows are overlapped by half
        c_stride = c_wnd_len // 2

        t0 = time.perf_counter()
        t_count = 0

        # array of answers from each window
        answers = []

        # init a window to iterate over context
        c_s, c_e = 0, min(c_wnd_len, len(c_tokens_id))

        # iterate while context window is not empty
        while c_e > c_s:
            # form the request
            tok_cls = vocab['[CLS]']
            tok_sep = vocab['[SEP]']
            input_ids = [tok_cls] + q_tokens_id + [
                tok_sep
            ] + c_tokens_id[c_s:c_e] + [tok_sep]
            token_type_ids = [0] + [0] * len(q_tokens_id) + [
                0
            ] + [1] * (c_e - c_s) + [0]
            attention_mask = [1] * len(input_ids)

            # pad the rest of the request
            pad_len = max_length - len(input_ids)
            input_ids += [0] * pad_len
            token_type_ids += [0] * pad_len
            attention_mask += [0] * pad_len

            # create numpy inputs for IE
            inputs = {
                input_names[0]: np.array([input_ids], dtype=np.int32),
                input_names[1]: np.array([attention_mask], dtype=np.int32),
                input_names[2]: np.array([token_type_ids], dtype=np.int32),
            }
            if len(input_names) > 3:
                inputs[input_names[3]] = np.arange(len(input_ids),
                                                   dtype=np.int32)[None, :]

            t_start = time.perf_counter()
            # infer by IE
            res = ie_encoder_exec.infer(inputs=inputs)
            t_end = time.perf_counter()
            t_count += 1
            log.info(
                "Sequence of length {} is processed with {:0.2f} requests/sec ({:0.2} sec per request)"
                .format(max_length, 1 / (t_end - t_start), t_end - t_start))

            # get start-end scores for context
            def get_score(name):
                out = np.exp(res[name].reshape((max_length, )))
                return out / out.sum(axis=-1)

            score_s = get_score(output_names[0])
            score_e = get_score(output_names[1])

            # get 'no-answer' score (not valid if model has been fine-tuned on squad1.x)
            if args.model_squad_ver.split('.')[0] == '1':
                score_na = 0
            else:
                score_na = score_s[0] * score_e[0]

            # find product of all start-end combinations to find the best one
            c_s_idx = len(
                q_tokens_id) + 2  # index of first context token in tensor
            c_e_idx = max_length - (
                1 + pad_len)  # index of last+1 context token in tensor
            score_mat = np.matmul(
                score_s[c_s_idx:c_e_idx].reshape((c_e - c_s, 1)),
                score_e[c_s_idx:c_e_idx].reshape((1, c_e - c_s)))
            # reset candidates with end before start
            score_mat = np.triu(score_mat)
            # reset long candidates (>max_answer_token_num)
            score_mat = np.tril(score_mat, args.max_answer_token_num - 1)
            # find the best start-end pair
            max_s, max_e = divmod(score_mat.flatten().argmax(),
                                  score_mat.shape[1])
            max_score = score_mat[max_s, max_e] * (1 - score_na)

            # convert to context text start-end index
            max_s = c_tokens_se[c_s + max_s][0]
            max_e = c_tokens_se[c_s + max_e][1]

            # check that answers list does not have duplicates (because of context windows overlapping)
            same = [
                i for i, a in enumerate(answers)
                if a[1] == max_s and a[2] == max_e
            ]
            if same:
                assert len(same) == 1
                # update existing answer record
                a = answers[same[0]]
                answers[same[0]] = (max(max_score, a[0]), max_s, max_e)
            else:
                # add new record
                answers.append((max_score, max_s, max_e))

            # check that context window reached the end
            if c_e == len(c_tokens_id):
                break

            # move to next window position
            c_s = min(c_s + c_stride, len(c_tokens_id))
            c_e = min(c_s + c_wnd_len, len(c_tokens_id))

        t1 = time.perf_counter()
        log.info(
            "The performance below is reported only for reference purposes, "
            "please use the benchmark_app tool (part of the OpenVINO samples) for any actual measurements."
        )
        log.info(
            "{} requests of {} length were processed in {:0.2f}sec ({:0.2}sec per request)"
            .format(t_count, max_length, t1 - t0, (t1 - t0) / t_count))

        # print top 3 results
        answers = sorted(answers, key=lambda x: -x[0])
        for score, s, e in answers[:3]:
            resp = "Answer: {}".format(context[s:e]) + "\n"
            #cont = context[c_s:s] + COLOR_RED + context[s:e] + COLOR_RESET + context[e:c_e]
            cont = "Can I answer any other Questions? "
            log.info("---answer: {:0.2f} {}".format(score, context[s:e]))
            c_s, c_e = find_sentence_range(context, s, e)
            log.info("   " + context[c_s:s] + COLOR_RED + context[s:e] +
                     COLOR_RESET + context[e:c_e])
            return resp
コード例 #7
0
    COLOR_RESET = ""

# load vocabulary file for model
log.info("Loading vocab file:\t{}".format(args.vocab))

vocab = load_vocab_file(args.vocab)
log.info("{} tokens loaded".format(len(vocab)))

# get context as a string (as we might need it's length for the sequence reshape)
p = url
paragraphs = get_paragraphs([p])
context = '\n'.join(paragraphs)
log.info("Size: {} chars".format(len(context)))
log.info("Context: " + COLOR_RED + context + COLOR_RESET)
# encode context into token ids list
c_tokens_id, c_tokens_se = text_to_tokens(context.lower(), vocab)

log.info("Initializing Inference Engine")
ie = IECore()
version = ie.get_versions(args.device)[args.device]
version_str = "{}.{}.{}".format(version.major, version.minor,
                                version.build_number)
log.info("Plugin version is {}".format(version_str))

# read IR
model_xml = args.model
model_bin = model_xml.with_suffix(".bin")
log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))

ie_encoder = ie.read_network(model=model_xml, weights=model_bin)